Returns a list of URLs parsed from the string. Works on http://, https://, www. links or domain names ending in .com, .org, .net. Links can be preceded by leading punctuation (open parens) and followed by trailing punctuation (period, comma, close parens).
(string, unique=True)
| 594 | re.compile(RE_URL3, re.I)) |
| 595 | |
| 596 | def find_urls(string, unique=True): |
| 597 | """ Returns a list of URLs parsed from the string. |
| 598 | Works on http://, https://, www. links or domain names ending in .com, .org, .net. |
| 599 | Links can be preceded by leading punctuation (open parens) |
| 600 | and followed by trailing punctuation (period, comma, close parens). |
| 601 | """ |
| 602 | string = u(string) |
| 603 | string = string.replace(u"\u2024", ".") |
| 604 | string = string.replace(" ", " ") |
| 605 | matches = [] |
| 606 | for p in (RE_URL1, RE_URL2, RE_URL3): |
| 607 | for m in p.finditer(" %s " % string): |
| 608 | s = m.group(1) |
| 609 | s = s.split("\">")[0].split("'>")[0] # google.com">Google => google.com |
| 610 | if not unique or s not in matches: |
| 611 | matches.append(s) |
| 612 | return matches |
| 613 | |
| 614 | links = find_urls |
| 615 |