Returns a new Document from the given text file path. The given text file must be generated with Document.save().
(cls, path)
| 385 | |
| 386 | @classmethod |
| 387 | def load(cls, path): |
| 388 | """ Returns a new Document from the given text file path. |
| 389 | The given text file must be generated with Document.save(). |
| 390 | """ |
| 391 | # Open unicode file. |
| 392 | s = open(path, "rb").read() |
| 393 | s = s.lstrip(codecs.BOM_UTF8) |
| 394 | s = decode_utf8(s) |
| 395 | a = {} |
| 396 | v = {} |
| 397 | # Parse document name and type. |
| 398 | # Parse document terms and frequency. |
| 399 | for s in s.splitlines(): |
| 400 | if s.startswith("#"): # comment |
| 401 | a["description"] = a.get("description", "") + s.lstrip("#").strip() + "\n" |
| 402 | elif s.startswith("@name:"): |
| 403 | a["name"] = s[len("@name:")+1:].replace("\\n", "\n") |
| 404 | elif s.startswith("@type:"): |
| 405 | a["type"] = s[len("@type:")+1:].replace("\\n", "\n") |
| 406 | elif s.startswith("@language:"): |
| 407 | a["lang"] = s[len("@lang:")+1:].replace("\\n", "\n") |
| 408 | else: |
| 409 | s = s.split(" ") |
| 410 | w, f = " ".join(s[:-1]), s[-1] |
| 411 | if f.isdigit(): |
| 412 | v[w] = int(f) |
| 413 | else: |
| 414 | v[w] = float(f) |
| 415 | return cls(v, name = a.get("name"), |
| 416 | type = a.get("type"), |
| 417 | language = a.get("lang"), |
| 418 | description = a.get("description").rstrip("\n")) |
| 419 | |
| 420 | def save(self, path): |
| 421 | """ Saves the document as a text file at the given path. |