Returns a table from the data in the given text file. Rows are expected to be separated by a newline. Columns are expected to be separated by the given separator (by default, comma). Strings will be converted to int, float, bool, date or None if headers are pars
(cls, path, separator=",", decoder=lambda v: v, headers=False, preprocess=lambda s: s)
| 1597 | |
| 1598 | @classmethod |
| 1599 | def load(cls, path, separator=",", decoder=lambda v: v, headers=False, preprocess=lambda s: s): |
| 1600 | """ Returns a table from the data in the given text file. |
| 1601 | Rows are expected to be separated by a newline. |
| 1602 | Columns are expected to be separated by the given separator (by default, comma). |
| 1603 | Strings will be converted to int, float, bool, date or None if headers are parsed. |
| 1604 | For other data types, a custom string decoder can be given. |
| 1605 | """ |
| 1606 | # Date objects are saved and loaded as strings, but it is easy to convert these back to dates: |
| 1607 | # - set a DATE field type for the column, |
| 1608 | # - or do Table.columns[x].map(lambda s: date(s)) |
| 1609 | data = open(path, "rb").read().replace(BOM_UTF8, "") |
| 1610 | data = preprocess(data) |
| 1611 | data = "\n".join(line for line in data.splitlines()) # Excel \r => \n |
| 1612 | data = StringIO(data) |
| 1613 | data = [row for row in csv.reader(data, delimiter=separator)] |
| 1614 | if headers: |
| 1615 | fields = [csv_header_decode(field) for field in data.pop(0)] |
| 1616 | fields += [(None, None)] * (max([0]+[len(row) for row in data]) - len(fields)) |
| 1617 | else: |
| 1618 | fields = [] |
| 1619 | if not fields: |
| 1620 | # Cast fields using the given decoder (by default, all strings + None). |
| 1621 | data = [[decoder(decode_utf8(v) if v != "None" else None) for v in row] for row in data] |
| 1622 | else: |
| 1623 | # Cast fields to their defined field type (STRING, INTEGER, ...) |
| 1624 | for i, row in enumerate(data): |
| 1625 | for j, v in enumerate(row): |
| 1626 | type = fields[j][1] |
| 1627 | if row[j] == "None": |
| 1628 | row[j] = decoder(None) |
| 1629 | elif type is None: |
| 1630 | row[j] = decoder(decode_utf8(v)) |
| 1631 | elif type in (STRING, TEXT): |
| 1632 | row[j] = decode_utf8(v) |
| 1633 | elif type == INTEGER: |
| 1634 | row[j] = int(row[j]) |
| 1635 | elif type == FLOAT: |
| 1636 | row[j] = float(row[j]) |
| 1637 | elif type == BOOLEAN: |
| 1638 | row[j] = bool(row[j]) |
| 1639 | elif type == DATE: |
| 1640 | row[j] = date(row[j]) |
| 1641 | elif type == BLOB: |
| 1642 | row[j] = v |
| 1643 | else: |
| 1644 | row[j] = decoder(decode_utf8(v)) |
| 1645 | return cls(rows=data, fields=fields) |
| 1646 | |
| 1647 | #--- DATASHEET ------------------------------------------------------------------------------------- |
| 1648 |
no test coverage detected