MCPcopy Index your code
hub / github.com/pydata/xarray / EncodedStringCoder

Class EncodedStringCoder

xarray/coding/strings.py:50–98  ·  view source on GitHub ↗

Transforms between unicode strings and fixed-width UTF-8 bytes.

Source from the content-addressed store, hash-verified

48
49
50class EncodedStringCoder(VariableCoder):
51 """Transforms between unicode strings and fixed-width UTF-8 bytes."""
52
53 def __init__(self, allows_unicode=True):
54 self.allows_unicode = allows_unicode
55
56 def encode(self, variable: Variable, name=None) -> Variable:
57 dims, data, attrs, encoding = unpack_for_encoding(variable)
58
59 # StringDType: replace nulls and convert to fixed-width unicode (U),
60 # which all backends support natively (GH11199)
61 if data.dtype.kind == "T":
62 data = np.asarray(data, dtype=object)
63 data[data == None] = "" # noqa: E711
64 data = np.asarray(data, dtype="U")
65 variable = Variable(dims, data, attrs, encoding)
66
67 contains_unicode = is_unicode_dtype(data.dtype)
68 encode_as_char = encoding.get("dtype") == "S1"
69 if encode_as_char:
70 del encoding["dtype"] # no longer relevant
71
72 if contains_unicode and (encode_as_char or not self.allows_unicode):
73 if "_FillValue" in attrs:
74 raise NotImplementedError(
75 f"variable {name!r} has a _FillValue specified, but "
76 "_FillValue is not yet supported on unicode strings: "
77 "https://github.com/pydata/xarray/issues/1647"
78 )
79
80 string_encoding = encoding.pop("_Encoding", "utf-8")
81 safe_setitem(attrs, "_Encoding", string_encoding, name=name)
82 # TODO: figure out how to handle this in a lazy way with dask
83 data = encode_string_array(data, string_encoding)
84
85 return Variable(dims, data, attrs, encoding)
86 else:
87 variable.encoding = encoding
88 return variable
89
90 def decode(self, variable: Variable, name=None) -> Variable:
91 dims, data, attrs, encoding = unpack_for_decoding(variable)
92
93 if "_Encoding" in attrs:
94 string_encoding = pop_to(attrs, encoding, "_Encoding")
95 func = partial(decode_bytes_array, encoding=string_encoding)
96 data = lazy_elemwise_func(data, func, np.dtype(object))
97
98 return Variable(dims, data, attrs, encoding)
99
100
101def decode_bytes_array(bytes_array, encoding="utf-8"):

Callers 1

_encode_nc4_variableFunction · 0.90

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…