Retrieve the selected seasons for the selected leagues. Parameters ---------- split_up_big5: bool If True, it will load the "Big 5 European Leagues Combined" instead of each league individually. Returns ------- pd.DataFrame
(self, split_up_big5: bool = False)
| 188 | return df[df.index.isin(leagues)] |
| 189 | |
| 190 | def read_seasons(self, split_up_big5: bool = False) -> pd.DataFrame: |
| 191 | """Retrieve the selected seasons for the selected leagues. |
| 192 | |
| 193 | Parameters |
| 194 | ---------- |
| 195 | split_up_big5: bool |
| 196 | If True, it will load the "Big 5 European Leagues Combined" instead of |
| 197 | each league individually. |
| 198 | |
| 199 | Returns |
| 200 | ------- |
| 201 | pd.DataFrame |
| 202 | """ |
| 203 | filemask = "seasons_{}.html" |
| 204 | df_leagues = self.read_leagues(split_up_big5) |
| 205 | |
| 206 | seasons = [] |
| 207 | for lkey, league in df_leagues.iterrows(): |
| 208 | url = FBREF_API + league.url |
| 209 | filepath = self.data_dir / filemask.format(lkey) |
| 210 | reader = self.get(url, filepath) |
| 211 | |
| 212 | # extract season links |
| 213 | tree = html.parse(reader) |
| 214 | (html_table,) = tree.xpath("//table[@id='seasons']") |
| 215 | df_table = _parse_table(html_table) |
| 216 | df_table["url"] = html_table.xpath( |
| 217 | "//th[@data-stat='year_id' or @data-stat='year']/a/@href" |
| 218 | ) |
| 219 | # Override the competition name or add if missing |
| 220 | df_table["Competition Name"] = lkey |
| 221 | # Some tournaments have a "year" column instead of "season" |
| 222 | if "Year" in df_table.columns: |
| 223 | df_table.rename(columns={"Year": "Season"}, inplace=True) |
| 224 | # Get the competition format |
| 225 | if "Final" in df_table.columns: |
| 226 | df_table["Format"] = "elimination" |
| 227 | else: |
| 228 | df_table["Format"] = "round-robin" |
| 229 | seasons.append(df_table) |
| 230 | |
| 231 | df = pd.concat(seasons).pipe(standardize_colnames) |
| 232 | df = df.rename(columns={"competition_name": "league"}) |
| 233 | df["season"] = df["season"].apply(self._season_code.parse) |
| 234 | # if both a 20xx and 19xx season are available, drop the 19xx season |
| 235 | df.drop_duplicates(subset=["league", "season"], keep="first", inplace=True) |
| 236 | df = df.set_index(["league", "season"]).sort_index() |
| 237 | return df.loc[(slice(None), self.seasons), ["format", "url"]] |
| 238 | |
| 239 | def read_team_season_stats( |
| 240 | self, stat_type: str = "standard", opponent_stats: bool = False |