MCPcopy
hub / github.com/DedSecInside/TorBot / LinkTree

Class LinkTree

torbot/modules/linktree.py:35–140  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

33
34
35class LinkTree(Tree):
36 def __init__(self, url: str, depth: int, client: httpx.Client) -> None:
37 super().__init__()
38 self._url = url
39 self._depth = depth
40 self._client = client
41
42 def load(self) -> None:
43 self._append_node(id=self._url, parent_id=None)
44 self._build_tree(url=self._url, depth=self._depth)
45
46 def _append_node(self, id: str, parent_id: str | None) -> None:
47 """
48 Creates a node for a tree using the given ID which corresponds to a URL.
49 If the parent_id is None, this will be considered a root node.
50 """
51 resp = self._client.get(id)
52 soup = BeautifulSoup(resp.text, 'html.parser')
53 title = soup.title.text.strip() if soup.title is not None else parse_hostname(id)
54 try:
55 [classification, accuracy] = classify(resp.text)
56 numbers = parse_phone_numbers(soup)
57 emails = parse_emails(soup)
58 data = LinkNode(title, id, resp.status_code, classification, accuracy, numbers, emails)
59 self.create_node(title, identifier=id, parent=parent_id, data=data)
60 except exceptions.DuplicatedNodeIdError:
61 logging.debug(f"found a duplicate URL {id}")
62
63 def _build_tree(self, url: str, depth: int) -> None:
64 """
65 Builds a tree from the root to the given depth.
66 """
67 if depth > 0:
68 depth -= 1
69 resp = self._client.get(url)
70 children = parse_links(resp.text)
71 for child in children:
72 self._append_node(id=child, parent_id=url)
73 self._build_tree(url=child, depth=depth)
74
75 def _get_tree_file_name(self) -> str:
76 root_id = self.root
77 root_node = self.get_node(root_id)
78 if root_node is None:
79 raise Exception('no root node can be found.')
80
81 return os.path.join(project_root_directory, f'{root_node.tag} - Depth {self._depth}')
82
83 def save(self) -> None:
84 """
85 Saves the tree to the current working directory under the given file name.
86 """
87 file_name = self._get_tree_file_name()
88 self.save2file(f'{file_name}.txt')
89
90 def saveJSON(self) -> None:
91 """
92 Saves the tree to the current working directory under the given file name in JSON.

Callers 1

runFunction · 0.90

Calls

no outgoing calls

Tested by

no test coverage detected