| 81 | self.comments.update(set(remap.keys())) |
| 82 | |
| 83 | def migrate(self): |
| 84 | tree = ElementTree.parse(self.xmlfile) |
| 85 | res = defaultdict(list) |
| 86 | |
| 87 | for post in tree.findall(Disqus.ns + "post"): |
| 88 | email = post.find("{0}author/{0}email".format(Disqus.ns)) |
| 89 | ip = post.find(Disqus.ns + "ipAddress") |
| 90 | comment_text = post.find(Disqus.ns + "message").text or "" |
| 91 | |
| 92 | item = { |
| 93 | "dsq:id": post.attrib.get(Disqus.internals + "id"), |
| 94 | "text": comment_text, |
| 95 | "author": post.find("{0}author/{0}name".format(Disqus.ns)).text, |
| 96 | "email": email.text if email is not None else "", |
| 97 | "created": mktime(strptime(post.find(Disqus.ns + "createdAt").text, "%Y-%m-%dT%H:%M:%SZ")), |
| 98 | "remote_addr": anonymize(ip.text if ip is not None else "0.0.0.0"), |
| 99 | "mode": 1 if post.find(Disqus.ns + "isDeleted").text == "false" else 4, |
| 100 | } |
| 101 | |
| 102 | if post.find(Disqus.ns + "parent") is not None: |
| 103 | item["dsq:parent"] = post.find(Disqus.ns + "parent").attrib.get(Disqus.internals + "id") |
| 104 | |
| 105 | res[post.find("%sthread" % Disqus.ns).attrib.get(Disqus.internals + "id")].append(item) |
| 106 | |
| 107 | progress = Progress(len(tree.findall(Disqus.ns + "thread"))) |
| 108 | for i, thread in enumerate(tree.findall(Disqus.ns + "thread")): |
| 109 | # Workaround for not crashing with empty thread ids: |
| 110 | thread_id = thread.find(Disqus.ns + "id") |
| 111 | if not thread_id: |
| 112 | thread_id = dict(text="<empty thread id>", empty=True) |
| 113 | |
| 114 | progress.update(i, thread_id.get("text")) |
| 115 | |
| 116 | # skip (possibly?) duplicate, but empty thread elements |
| 117 | if thread_id.get("empty") and not self.empty_id: |
| 118 | continue |
| 119 | |
| 120 | id = thread.attrib.get(Disqus.internals + "id") |
| 121 | if id in res: |
| 122 | self.threads.add(id) |
| 123 | self.insert(thread, res[id]) |
| 124 | |
| 125 | # in case a comment has been deleted (and no further childs) |
| 126 | self.db.comments._remove_stale() |
| 127 | |
| 128 | progress.finish("{0} threads, {1} comments".format(len(self.threads), len(self.comments))) |
| 129 | |
| 130 | orphans = set(map(lambda e: e.attrib.get(Disqus.internals + "id"), tree.findall(Disqus.ns + "post"))) - self.comments |
| 131 | if orphans and not self.threads: |
| 132 | print("Isso couldn't import any thread, try again with --empty-id") |
| 133 | elif orphans: |
| 134 | print("Found %i orphans:" % len(orphans)) |
| 135 | for post in tree.findall(Disqus.ns + "post"): |
| 136 | if post.attrib.get(Disqus.internals + "id") not in orphans: |
| 137 | continue |
| 138 | |
| 139 | email = post.find("{0}author/{0}email".format(Disqus.ns)) |
| 140 | comment_text = post.find(Disqus.ns + "message").text or "" |