(img, url, index, total_images)
| 527 | return None |
| 528 | |
| 529 | def process_image(img, url, index, total_images): |
| 530 | #Check if an image has valid display and inside undesired html elements |
| 531 | def is_valid_image(img, parent, parent_classes): |
| 532 | style = img.get('style', '') |
| 533 | src = img.get('src', '') |
| 534 | classes_to_check = ['button', 'icon', 'logo'] |
| 535 | tags_to_check = ['button', 'input'] |
| 536 | return all([ |
| 537 | 'display:none' not in style, |
| 538 | src, |
| 539 | not any(s in var for var in [src, img.get('alt', ''), *parent_classes] for s in classes_to_check), |
| 540 | parent.name not in tags_to_check |
| 541 | ]) |
| 542 | |
| 543 | #Score an image for it's usefulness |
| 544 | def score_image_for_usefulness(img, base_url, index, images_count): |
| 545 | # Function to parse image height/width value and units |
| 546 | def parse_dimension(dimension): |
| 547 | if dimension: |
| 548 | match = re.match(r"(\d+)(\D*)", dimension) |
| 549 | if match: |
| 550 | number = int(match.group(1)) |
| 551 | unit = match.group(2) or 'px' # Default unit is 'px' if not specified |
| 552 | return number, unit |
| 553 | return None, None |
| 554 | |
| 555 | # Fetch image file metadata to extract size and extension |
| 556 | def fetch_image_file_size(img, base_url): |
| 557 | #If src is relative path construct full URL, if not it may be CDN URL |
| 558 | img_url = urljoin(base_url,img.get('src')) |
| 559 | try: |
| 560 | response = requests.head(img_url) |
| 561 | if response.status_code == 200: |
| 562 | return response.headers.get('Content-Length',None) |
| 563 | else: |
| 564 | print(f"Failed to retrieve file size for {img_url}") |
| 565 | return None |
| 566 | except InvalidSchema as e: |
| 567 | return None |
| 568 | finally: |
| 569 | return |
| 570 | |
| 571 | image_height = img.get('height') |
| 572 | height_value, height_unit = parse_dimension(image_height) |
| 573 | image_width = img.get('width') |
| 574 | width_value, width_unit = parse_dimension(image_width) |
| 575 | image_size = 0 #int(fetch_image_file_size(img,base_url) or 0) |
| 576 | image_format = os.path.splitext(img.get('src',''))[1].lower() |
| 577 | # Remove . from format |
| 578 | image_format = image_format.strip('.') |
| 579 | score = 0 |
| 580 | if height_value: |
| 581 | if height_unit == 'px' and height_value > 150: |
| 582 | score += 1 |
| 583 | if height_unit in ['%','vh','vmin','vmax'] and height_value >30: |
| 584 | score += 1 |
| 585 | if width_value: |
| 586 | if width_unit == 'px' and width_value > 150: |
no test coverage detected
searching dependent graphs…