(self)
| 102 | return item.name is None or item.form is None or item.rig is None |
| 103 | |
| 104 | def crawl(self): |
| 105 | if self.driver is None: |
| 106 | raise TypeError('self.driver cannot be None') |
| 107 | self.name_index = self.read_name_index() |
| 108 | document = self.get_beautiful_soup('https://www.reddit.com/r/oratory1990/wiki/index/list_of_presets') |
| 109 | table_header = document.find(id='wiki_full_list_of_eq_settings.3A') |
| 110 | if table_header is None: |
| 111 | raise RedditCrawlFailed('Failed to parse Reddit page. Maybe try again?') |
| 112 | self.crawl_index = NameIndex() |
| 113 | manufacturer, model = None, None |
| 114 | for row in table_header.parent.find('table').find('tbody').find_all('tr'): |
| 115 | cells = row.find_all('td') |
| 116 | # Parse cells |
| 117 | # Try to read manufacturer from the first cell and if it fails (cell is empty), use the previous name |
| 118 | manufacturer = cells[0].text.strip() if cells[0].text.strip() != '-' else manufacturer |
| 119 | # Try to read model from the second cell and if it fails (cell is empty), use the previous name |
| 120 | model = cells[1].text.strip() if cells[1].text.strip() != '-' else model |
| 121 | source_name = f'{manufacturer} {model}' |
| 122 | # Third cell contains hyperlink, where the anchor is the PDF and text is target name |
| 123 | url = cells[2].find('a')['href'].replace('?dl=0', '?dl=1') |
| 124 | form = 'over-ear' if 'over-ear' in cells[2].text.strip().lower() else 'in-ear' |
| 125 | # Fourth cell is notes |
| 126 | notes = cells[3].text.strip() |
| 127 | if 'preliminary' in notes.lower() or ' EQ' in notes: |
| 128 | continue # Skip various EQ settings and preliminary measurements |
| 129 | if notes and notes.lower() != 'standard': |
| 130 | source_name += f' ({notes})' |
| 131 | item = NameItem(url=url, source_name=source_name, form=form) |
| 132 | known_item = self.name_index.find_one(url=url) |
| 133 | if known_item is not None: |
| 134 | if known_item.name is not None: |
| 135 | item.name = known_item.name |
| 136 | if known_item.form is not None: |
| 137 | item.form = known_item.form |
| 138 | if known_item.rig is not None: |
| 139 | item.rig = known_item.rig |
| 140 | if not self.crawl_index.find(source_name=source_name): |
| 141 | self.crawl_index.add(item) |
| 142 | return self.crawl_index |
| 143 | |
| 144 | @staticmethod |
| 145 | def parse_image(path, px_top=800, px_bottom=4400, px_left=0, px_right=2500): |
nothing calls this directly
no test coverage detected