DuckDuckGo search engine class. methods are inherited from the duckduckgo_search package. do not change the methods. currently, the package is not working with our current setup.
| 84 | |
| 85 | |
| 86 | class DuckDuckGoSearch: |
| 87 | """DuckDuckGo search engine class. |
| 88 | methods are inherited from the duckduckgo_search package. |
| 89 | do not change the methods. |
| 90 | |
| 91 | currently, the package is not working with our current setup. |
| 92 | """ |
| 93 | def __init__(self): |
| 94 | from curl_cffi import requests as curl_requests |
| 95 | self.query_result = None |
| 96 | self.asession = curl_requests.Session(impersonate="chrome", allow_redirects=False) |
| 97 | self.asession.headers["Referer"] = "https://duckduckgo.com/" |
| 98 | |
| 99 | def _get_url(self, method, url, data): |
| 100 | try: |
| 101 | resp = self.asession.request(method, url, data=data) |
| 102 | if resp.status_code == 200: |
| 103 | return resp.content |
| 104 | if resp.status_code == (202, 301, 403): |
| 105 | raise Exception(f"Error: {resp.status_code} rate limit error") |
| 106 | if not resp: |
| 107 | return None |
| 108 | except Exception as error: |
| 109 | if "timeout" in str(error).lower(): |
| 110 | raise TimeoutError("Duckduckgo timed out error") |
| 111 | |
| 112 | def duck(self, query): |
| 113 | resp = self._get_url("POST", "https://duckduckgo.com/", data={"q": query}) |
| 114 | vqd = self.extract_vqd(resp) |
| 115 | |
| 116 | params = {"q": query, "kl": 'en-us', "p": "1", "s": "0", "df": "", "vqd": vqd, "ex": ""} |
| 117 | resp = self._get_url("GET", "https://links.duckduckgo.com/d.js", params) |
| 118 | page_data = self.text_extract_json(resp) |
| 119 | |
| 120 | results = [] |
| 121 | for row in page_data: |
| 122 | href = row.get("u") |
| 123 | if href and href != f"http://www.google.com/search?q={query}": |
| 124 | body = self.normalize(row["a"]) |
| 125 | if body: |
| 126 | result = { |
| 127 | "title": self.normalize(row["t"]), |
| 128 | "href": self.normalize_url(href), |
| 129 | "body": self.normalize(row["a"]), |
| 130 | } |
| 131 | results.append(result) |
| 132 | |
| 133 | self.query_result = results |
| 134 | |
| 135 | def search(self, query): |
| 136 | self.duck(query) |
| 137 | |
| 138 | def get_first_link(self): |
| 139 | return self.query_result[0]["href"] |
| 140 | |
| 141 | @staticmethod |
| 142 | def extract_vqd(html_bytes: bytes) -> str: |
| 143 | patterns = [(b'vqd="', 5, b'"'), (b"vqd=", 4, b"&"), (b"vqd='", 5, b"'")] |