| 39 | COOKIES = [("multiup.org", "_locale", "en")] |
| 40 | |
| 41 | def get_links(self): |
| 42 | m_type = self.info['pattern']['TYPE'] |
| 43 | hosts_priority = [_h for _h in self.config.get('hosts_priority').split('|') if _h] |
| 44 | ignored_hosts = [_h for _h in self.config.get('ignored_hosts').split('|') if _h] |
| 45 | grab_all = self.config.get('grab_all') |
| 46 | |
| 47 | if m_type == "project": |
| 48 | return re.findall(r'(https?://www\.multiup\.org/(?:en/|fr/)?download/.*)', self.data) |
| 49 | |
| 50 | elif m_type in ("download", None): |
| 51 | url, inputs = self.parse_html_form() |
| 52 | if inputs is not None: |
| 53 | self.data = self.load(urlparse.urljoin("http://www.multiup.org/", url), |
| 54 | post=inputs) |
| 55 | |
| 56 | hosts_data = {} |
| 57 | for _a in re.findall(r'<a (.+?) class="host btn btn-md btn-default btn-block btn-3d hvr-bounce-to-right">', self.data, re.M): |
| 58 | validity = re.search(r'validity="(\w+)"', _a).group(1) |
| 59 | if validity in ("valid", "unknown"): |
| 60 | host = re.search(r'nameHost="(.+?)"', _a).group(1) |
| 61 | url = re.search(r'href="(.+?)"', _a).group(1) |
| 62 | hosts_data[host] = url |
| 63 | |
| 64 | chosen_hosts = [] |
| 65 | # priority hosts goes first |
| 66 | for _h in hosts_priority: |
| 67 | if _h in hosts_data and _h not in ignored_hosts: |
| 68 | self.log_debug("Adding '%s' link" % _h) |
| 69 | chosen_hosts.append(_h) |
| 70 | if not grab_all: |
| 71 | break |
| 72 | |
| 73 | # Now the rest of the hosts |
| 74 | if grab_all or (not grab_all and not chosen_hosts): |
| 75 | for _h in hosts_data: |
| 76 | if _h not in ignored_hosts and _h not in chosen_hosts: |
| 77 | self.log_debug("Adding '%s' link" % _h) |
| 78 | chosen_hosts.append(_h) |
| 79 | if not grab_all: |
| 80 | break |
| 81 | |
| 82 | return [hosts_data[_h] for _h in chosen_hosts] |