Retrieve all players for the selected leagues. Parameters ---------- team: str or list of str, optional Team(s) to retrieve. If None, will retrieve all teams. Raises ------ ValueError If no data is found for the given team(s)
(self, team: Optional[Union[str, list[str]]] = None)
| 219 | return pd.DataFrame(teams).replace({"team": TEAMNAME_REPLACEMENTS}).set_index(["team_id"]) |
| 220 | |
| 221 | def read_players(self, team: Optional[Union[str, list[str]]] = None) -> pd.DataFrame: |
| 222 | """Retrieve all players for the selected leagues. |
| 223 | |
| 224 | Parameters |
| 225 | ---------- |
| 226 | team: str or list of str, optional |
| 227 | Team(s) to retrieve. If None, will retrieve all teams. |
| 228 | |
| 229 | Raises |
| 230 | ------ |
| 231 | ValueError |
| 232 | If no data is found for the given team(s) in the selected leagues. |
| 233 | |
| 234 | Returns |
| 235 | ------- |
| 236 | pd.DataFrame |
| 237 | """ |
| 238 | # build url |
| 239 | urlmask = SO_FIFA_API + "/team/{}/?r={}&set=true" |
| 240 | filemask = "players_{}_{}.html" |
| 241 | |
| 242 | # get list of teams |
| 243 | df_teams = self.read_teams() |
| 244 | |
| 245 | if team is not None: |
| 246 | teams_to_check = add_standardized_team_name(team) |
| 247 | |
| 248 | # select requested teams |
| 249 | iterator = df_teams.loc[df_teams.team.isin(teams_to_check), :] |
| 250 | if len(iterator) == 0: |
| 251 | raise ValueError("No data found for the given teams in the selected seasons.") |
| 252 | else: |
| 253 | iterator = df_teams |
| 254 | |
| 255 | # collect players |
| 256 | players = [] |
| 257 | iterator = list(product(self.versions.iterrows(), iterator.iterrows())) |
| 258 | for i, ((version_id, version), (team_id, df_team)) in enumerate(iterator): |
| 259 | logger.info( |
| 260 | "[%s/%s] Retrieving list of players for %s in %s edition", |
| 261 | i + 1, |
| 262 | len(iterator), |
| 263 | df_team["team"], |
| 264 | version["update"], |
| 265 | ) |
| 266 | |
| 267 | # read html page (team overview) |
| 268 | filepath = self.data_dir / filemask.format(team_id, version_id) |
| 269 | url = urlmask.format(team_id, version_id) |
| 270 | reader = self.get(url, filepath) |
| 271 | |
| 272 | # extract player links |
| 273 | tree = html.parse(reader) |
| 274 | pat_player = re.compile(r"\/player\/(\d+)\/[\w-]+\/") |
| 275 | table_squad = tree.xpath("//article/table") |
| 276 | for node in table_squad[0].xpath(".//td[2]/a[contains(@href,'/player/')]"): |
| 277 | # extract player IDs from links |
| 278 | # extract player names from links |