MCPcopy
hub / github.com/alirezamika/autoscraper / _get_result_by_func

Method _get_result_by_func

autoscraper/auto_scraper.py:404–443  ·  view source on GitHub ↗
(
        self,
        func,
        url,
        html,
        soup,
        request_args,
        grouped,
        group_by_alias,
        unique,
        attr_fuzz_ratio,
        **kwargs
    )

Source from the content-addressed store, hash-verified

402 return result
403
404 def _get_result_by_func(
405 self,
406 func,
407 url,
408 html,
409 soup,
410 request_args,
411 grouped,
412 group_by_alias,
413 unique,
414 attr_fuzz_ratio,
415 **kwargs
416 ):
417 if not soup:
418 soup = self._get_soup(url=url, html=html, request_args=request_args)
419
420 keep_order = kwargs.get("keep_order", False)
421
422 if group_by_alias or (keep_order and not grouped):
423 for index, child in enumerate(soup.findChildren()):
424 setattr(child, "child_index", index)
425
426 result_list = []
427 grouped_result = defaultdict(list)
428 for stack in self.stack_list:
429 if not url:
430 url = stack.get("url", "")
431
432 result = func(stack, soup, url, attr_fuzz_ratio, **kwargs)
433
434 if not grouped and not group_by_alias:
435 result_list += result
436 continue
437
438 group_id = stack.get("alias", "") if group_by_alias else stack["stack_id"]
439 grouped_result[group_id] += result
440
441 return self._clean_result(
442 result_list, grouped_result, grouped, group_by_alias, unique, keep_order
443 )
444
445 @staticmethod
446 def _clean_result(

Callers 2

get_result_similarMethod · 0.95
get_result_exactMethod · 0.95

Calls 2

_get_soupMethod · 0.95
_clean_resultMethod · 0.95

Tested by

no test coverage detected