MCPcopy
hub / github.com/probberechts/soccerdata / read_teams

Method read_teams

soccerdata/sofifa.py:170–219  ·  view source on GitHub ↗

Retrieve all teams for the selected leagues. Returns ------- pd.DataFrame

(self)

Source from the content-addressed store, hash-verified

168 return pd.DataFrame(versions).set_index("version_id").sort_index()
169
170 def read_teams(self) -> pd.DataFrame:
171 """Retrieve all teams for the selected leagues.
172
173 Returns
174 -------
175 pd.DataFrame
176 """
177 # build url
178 urlmask = SO_FIFA_API + "/teams?lg={}&r={}&set=true"
179 filemask = "teams_{}_{}.html"
180
181 # get league IDs
182 leagues = self.read_leagues()
183
184 # collect teams
185 teams = []
186 iterator = list(product(leagues.iterrows(), self.versions.iterrows()))
187 for i, ((lkey, league), (version_id, version)) in enumerate(iterator):
188 logger.info(
189 "[%s/%s] Retrieving teams for %s in %s edition",
190 i + 1,
191 len(iterator),
192 lkey,
193 version["update"],
194 )
195 league_id = league["league_id"]
196 # read html page (league overview)
197 filepath = self.data_dir / filemask.format(league_id, version_id)
198 url = urlmask.format(league_id, version_id)
199 reader = self.get(url, filepath)
200
201 # extract team links
202 tree = html.parse(reader)
203 pat_team = re.compile(r"\/team\/(\d+)\/[\w-]+\/")
204 for node in tree.xpath("//table/tbody/tr"):
205 # extract team IDs from links
206 team_link = node.xpath(".//td[2]//a")[0]
207 teams.append(
208 {
209 "team_id": int(
210 re.search(pat_team, team_link.get("href")).group(1) # type: ignore
211 ),
212 "team": team_link.text,
213 "league": lkey,
214 **version.to_dict(),
215 }
216 )
217
218 # return data frame
219 return pd.DataFrame(teams).replace({"team": TEAMNAME_REPLACEMENTS}).set_index(["team_id"])
220
221 def read_players(self, team: Optional[Union[str, list[str]]] = None) -> pd.DataFrame:
222 """Retrieve all players for the selected leagues.

Callers 1

read_playersMethod · 0.95

Calls 3

read_leaguesMethod · 0.95
getMethod · 0.80
parseMethod · 0.80

Tested by

no test coverage detected