Parses all the parts of the URL string to a dictionary. URL format: protocal://username:password@domain:port/path/page?querystring#anchor For example: http://user:pass@example.com:992/animal/bird?species=seagull&q#wings This is a cached method that is only invoke
(self)
| 275 | self.parts.update(kwargs) |
| 276 | |
| 277 | def _parse(self): |
| 278 | """ Parses all the parts of the URL string to a dictionary. |
| 279 | URL format: protocal://username:password@domain:port/path/page?querystring#anchor |
| 280 | For example: http://user:pass@example.com:992/animal/bird?species=seagull&q#wings |
| 281 | This is a cached method that is only invoked when necessary, and only once. |
| 282 | """ |
| 283 | p = urlparse.urlsplit(self._string) |
| 284 | P = {PROTOCOL: p[0], # http |
| 285 | USERNAME: u"", # user |
| 286 | PASSWORD: u"", # pass |
| 287 | DOMAIN: p[1], # example.com |
| 288 | PORT: u"", # 992 |
| 289 | PATH: p[2], # [animal] |
| 290 | PAGE: u"", # bird |
| 291 | QUERY: urldecode(p[3]), # {"species": "seagull", "q": None} |
| 292 | ANCHOR: p[4] # wings |
| 293 | } |
| 294 | # Split the username and password from the domain. |
| 295 | if "@" in P[DOMAIN]: |
| 296 | P[USERNAME], \ |
| 297 | P[PASSWORD] = (p[1].split("@")[0].split(":")+[u""])[:2] |
| 298 | P[DOMAIN] = p[1].split("@")[1] |
| 299 | # Split the port number from the domain. |
| 300 | if ":" in P[DOMAIN]: |
| 301 | P[DOMAIN], \ |
| 302 | P[PORT] = P[DOMAIN].split(":") |
| 303 | P[PORT] = P[PORT].isdigit() and int(P[PORT]) or P[PORT] |
| 304 | # Split the base page from the path. |
| 305 | if "/" in P[PATH]: |
| 306 | P[PAGE] = p[2].split("/")[-1] |
| 307 | P[PATH] = p[2][:len(p[2]) - len(P[PAGE])].strip("/").split("/") |
| 308 | P[PATH] = filter(lambda v: v != "", P[PATH]) |
| 309 | else: |
| 310 | P[PAGE] = p[2].strip("/") |
| 311 | P[PATH] = [] |
| 312 | self.__dict__["_parts"] = P |
| 313 | |
| 314 | # URL.string yields unicode(URL) by joining the different parts, |
| 315 | # if the URL parts have been modified. |