| 54 | self.allows_unicode = allows_unicode |
| 55 | |
| 56 | def encode(self, variable: Variable, name=None) -> Variable: |
| 57 | dims, data, attrs, encoding = unpack_for_encoding(variable) |
| 58 | |
| 59 | # StringDType: replace nulls and convert to fixed-width unicode (U), |
| 60 | # which all backends support natively (GH11199) |
| 61 | if data.dtype.kind == "T": |
| 62 | data = np.asarray(data, dtype=object) |
| 63 | data[data == None] = "" # noqa: E711 |
| 64 | data = np.asarray(data, dtype="U") |
| 65 | variable = Variable(dims, data, attrs, encoding) |
| 66 | |
| 67 | contains_unicode = is_unicode_dtype(data.dtype) |
| 68 | encode_as_char = encoding.get("dtype") == "S1" |
| 69 | if encode_as_char: |
| 70 | del encoding["dtype"] # no longer relevant |
| 71 | |
| 72 | if contains_unicode and (encode_as_char or not self.allows_unicode): |
| 73 | if "_FillValue" in attrs: |
| 74 | raise NotImplementedError( |
| 75 | f"variable {name!r} has a _FillValue specified, but " |
| 76 | "_FillValue is not yet supported on unicode strings: " |
| 77 | "https://github.com/pydata/xarray/issues/1647" |
| 78 | ) |
| 79 | |
| 80 | string_encoding = encoding.pop("_Encoding", "utf-8") |
| 81 | safe_setitem(attrs, "_Encoding", string_encoding, name=name) |
| 82 | # TODO: figure out how to handle this in a lazy way with dask |
| 83 | data = encode_string_array(data, string_encoding) |
| 84 | |
| 85 | return Variable(dims, data, attrs, encoding) |
| 86 | else: |
| 87 | variable.encoding = encoding |
| 88 | return variable |
| 89 | |
| 90 | def decode(self, variable: Variable, name=None) -> Variable: |
| 91 | dims, data, attrs, encoding = unpack_for_decoding(variable) |