Encode a nested example. This is used since some features (in particular ClassLabel) have some logic during encoding. To avoid iterating over possibly long lists, it first checks (recursively) if the first element that is not None or empty (if it is a sequence) has to be encoded. If the
(schema, obj, level=0)
| 1432 | |
| 1433 | |
| 1434 | def encode_nested_example(schema, obj, level=0): |
| 1435 | """Encode a nested example. |
| 1436 | This is used since some features (in particular ClassLabel) have some logic during encoding. |
| 1437 | |
| 1438 | To avoid iterating over possibly long lists, it first checks (recursively) if the first element that is not None or empty (if it is a sequence) has to be encoded. |
| 1439 | If the first element needs to be encoded, then all the elements of the list will be encoded, otherwise they'll stay the same. |
| 1440 | """ |
| 1441 | # Nested structures: we allow dict, list/tuples, sequences |
| 1442 | if isinstance(schema, dict): |
| 1443 | if level == 0 and obj is None: |
| 1444 | raise ValueError("Got None but expected a dictionary instead") |
| 1445 | return ( |
| 1446 | {k: encode_nested_example(schema[k], obj.get(k), level=level + 1) for k in schema} |
| 1447 | if obj is not None |
| 1448 | else None |
| 1449 | ) |
| 1450 | elif isinstance(schema, (LargeList, List)): |
| 1451 | if obj is None: |
| 1452 | return None |
| 1453 | else: |
| 1454 | if len(obj) > 0: |
| 1455 | sub_schema = schema.feature |
| 1456 | for first_elmt in obj: |
| 1457 | if _check_non_null_non_empty_recursive(first_elmt, sub_schema): |
| 1458 | break |
| 1459 | try: |
| 1460 | changed = bool(encode_nested_example(sub_schema, first_elmt, level=level + 1) != first_elmt) |
| 1461 | except ValueError: # can happen when comparing arrays |
| 1462 | changed = False |
| 1463 | if changed: |
| 1464 | return [encode_nested_example(sub_schema, o, level=level + 1) for o in obj] |
| 1465 | return list(obj) |
| 1466 | # Object with special encoding: |
| 1467 | # ClassLabel will convert from string to int, TranslationVariableLanguages does some checks |
| 1468 | elif hasattr(schema, "encode_example"): |
| 1469 | return schema.encode_example(obj) if obj is not None else None |
| 1470 | # Other object should be directly convertible to a native Arrow type (like Translation and Translation) |
| 1471 | return obj |
| 1472 | |
| 1473 | |
| 1474 | def decode_nested_example(schema, obj, token_per_repo_id: Optional[dict[str, Union[str, bool, None]]] = None): |