(
self,
existing_urls: Optional[set] = None,
target_gap: int = 0,
screened_sources: Optional[List[Dict]] = None,
)
| 182 | return self.refresh_candidate_pool(merged, save=save) |
| 183 | |
| 184 | def select_screened_validation_batch( |
| 185 | self, |
| 186 | existing_urls: Optional[set] = None, |
| 187 | target_gap: int = 0, |
| 188 | screened_sources: Optional[List[Dict]] = None, |
| 189 | ) -> List[Dict]: |
| 190 | existing_urls = existing_urls or set() |
| 191 | screened = screened_sources or self.load_screened_sources() |
| 192 | |
| 193 | batch_size = max(self.screened_validation_batch, target_gap * self.validation_oversample_factor) |
| 194 | available = [ |
| 195 | deepcopy(source) |
| 196 | for source in screened |
| 197 | if str(source.get("bookSourceUrl", "")).strip() not in existing_urls |
| 198 | ] |
| 199 | return self._sort_sources(available)[:batch_size] |
| 200 | |
| 201 | def _apply_existing_bonus(self, current_sources: List[Dict], candidate_sources: List[Dict]) -> List[Dict]: |
| 202 | current_urls = {str(item.get("bookSourceUrl", "")).strip() for item in current_sources} |
no test coverage detected