MCPcopy
hub / github.com/SqueezeAILab/LLMCompiler / post_process

Method post_process

src/docstore/wikipedia.py:78–109  ·  view source on GitHub ↗
(
        self, response_text: str, entity: str, skip_retry_when_postprocess: bool = False
    )

Source from the content-addressed store, hash-verified

76 return alternative
77
78 def post_process(
79 self, response_text: str, entity: str, skip_retry_when_postprocess: bool = False
80 ) -> str:
81 soup = BeautifulSoup(response_text, features="html.parser")
82 result_divs = soup.find_all("div", {"class": "mw-search-result-heading"})
83
84 if result_divs: # mismatch
85 self.result_titles = [
86 clean_str(div.get_text().strip()) for div in result_divs
87 ]
88 obs = f"Could not find {entity}. Similar: {self.result_titles[:5]}."
89 else:
90 page = [
91 p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")
92 ]
93 if any("may refer to:" in p for p in page):
94 if skip_retry_when_postprocess or self.skip_retry_when_postprocess:
95 obs = "Could not find " + entity + "."
96 else:
97 obs = self.search("[" + entity + "]", is_retry=True)
98 else:
99 self.page = ""
100 for p in page:
101 if len(p.split(" ")) > 2:
102 self.page += clean_str(p)
103 if not p.endswith("\n"):
104 self.page += "\n"
105 obs = self._get_page_obs(self.page)
106 self.lookup_keyword = self.lookup_list = self.lookup_cnt = None
107
108 obs = obs.replace("\\n", "")
109 return obs
110
111 async def apost_process(
112 self, response_text: str, entity: str, skip_retry_when_postprocess: bool = False

Callers 1

searchMethod · 0.95

Calls 3

searchMethod · 0.95
_get_page_obsMethod · 0.95
clean_strFunction · 0.85

Tested by

no test coverage detected