Computes the md5 for a given example.
(example)
| 604 | |
| 605 | |
| 606 | def checksum(example): |
| 607 | """Computes the md5 for a given example.""" |
| 608 | |
| 609 | def _bytes_flatten(flat_str, element): |
| 610 | """Recursively flatten an element to its byte representation.""" |
| 611 | if isinstance(element, numbers.Number): |
| 612 | # In python3, bytes(-3) is not allowed (or large numbers), |
| 613 | # so convert to str to avoid problems. |
| 614 | element = str(element) |
| 615 | if isinstance(element, dict): |
| 616 | for k, v in sorted(element.items()): |
| 617 | flat_str.append(k) |
| 618 | _bytes_flatten(flat_str, v) |
| 619 | elif isinstance(element, str): |
| 620 | if hasattr(element, "decode"): |
| 621 | # Python2 considers bytes to be str, but are almost always latin-1 |
| 622 | # encoded bytes here. Extra step needed to avoid DecodeError. |
| 623 | element = element.decode("latin-1") |
| 624 | flat_str.append(element) |
| 625 | elif isinstance( |
| 626 | element, (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue) |
| 627 | ): |
| 628 | flat_str.append(str(element.to_list())) |
| 629 | elif isinstance(element, (np.ndarray, np.generic)): |
| 630 | # tf.Tensor() returns np.array of dtype object, which don't work |
| 631 | # with x.to_bytes(). So instead convert numpy into list. |
| 632 | if element.dtype.type is np.object_: |
| 633 | flat_str.append(str(tuple(element.shape))) |
| 634 | flat_str.append(str(list(element.ravel()))) |
| 635 | else: |
| 636 | flat_str.append(element.tobytes()) |
| 637 | elif isinstance(element, dataset_utils._IterableDataset): # pylint: disable=protected-access |
| 638 | for nested_e in element: |
| 639 | _bytes_flatten(flat_str, nested_e) |
| 640 | else: |
| 641 | flat_str.append(bytes(element)) |
| 642 | return flat_str |
| 643 | |
| 644 | flat_str = _bytes_flatten([], example) |
| 645 | flat_bytes = [ |
| 646 | s.encode("utf-8") if not isinstance(s, bytes) else s for s in flat_str |
| 647 | ] |
| 648 | flat_bytes = b"".join(flat_bytes) |
| 649 | |
| 650 | hash_ = hashlib.md5() |
| 651 | hash_.update(flat_bytes) |
| 652 | return hash_.hexdigest() |
| 653 | |
| 654 | |
| 655 | def compare_shapes_and_types(tensor_info, element_spec): |
no test coverage detected