MCPcopy Index your code
hub / github.com/tensorflow/datasets / checksum

Function checksum

tensorflow_datasets/testing/dataset_builder_testing.py:606–652  ·  view source on GitHub ↗

Computes the md5 for a given example.

(example)

Source from the content-addressed store, hash-verified

604
605
606def checksum(example):
607 """Computes the md5 for a given example."""
608
609 def _bytes_flatten(flat_str, element):
610 """Recursively flatten an element to its byte representation."""
611 if isinstance(element, numbers.Number):
612 # In python3, bytes(-3) is not allowed (or large numbers),
613 # so convert to str to avoid problems.
614 element = str(element)
615 if isinstance(element, dict):
616 for k, v in sorted(element.items()):
617 flat_str.append(k)
618 _bytes_flatten(flat_str, v)
619 elif isinstance(element, str):
620 if hasattr(element, "decode"):
621 # Python2 considers bytes to be str, but are almost always latin-1
622 # encoded bytes here. Extra step needed to avoid DecodeError.
623 element = element.decode("latin-1")
624 flat_str.append(element)
625 elif isinstance(
626 element, (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue)
627 ):
628 flat_str.append(str(element.to_list()))
629 elif isinstance(element, (np.ndarray, np.generic)):
630 # tf.Tensor() returns np.array of dtype object, which don't work
631 # with x.to_bytes(). So instead convert numpy into list.
632 if element.dtype.type is np.object_:
633 flat_str.append(str(tuple(element.shape)))
634 flat_str.append(str(list(element.ravel())))
635 else:
636 flat_str.append(element.tobytes())
637 elif isinstance(element, dataset_utils._IterableDataset): # pylint: disable=protected-access
638 for nested_e in element:
639 _bytes_flatten(flat_str, nested_e)
640 else:
641 flat_str.append(bytes(element))
642 return flat_str
643
644 flat_str = _bytes_flatten([], example)
645 flat_bytes = [
646 s.encode("utf-8") if not isinstance(s, bytes) else s for s in flat_str
647 ]
648 flat_bytes = b"".join(flat_bytes)
649
650 hash_ = hashlib.md5()
651 hash_.update(flat_bytes)
652 return hash_.hexdigest()
653
654
655def compare_shapes_and_types(tensor_info, element_spec):

Callers 1

_assertAsDatasetMethod · 0.85

Calls 4

_bytes_flattenFunction · 0.85
joinMethod · 0.80
encodeMethod · 0.45
updateMethod · 0.45

Tested by

no test coverage detected