MCPcopy Index your code
hub / github.com/geekcomputers/Python / Phonearena

Class Phonearena

mobilePhoneSpecsScrapper.py:11–95  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

9
10
11class Phonearena:
12 def __init__(self):
13 self.phones = []
14 self.features = ["Brand", "Model Name", "Model Image"]
15 self.temp1 = []
16 self.phones_brands = []
17 self.url = "https://www.phonearena.com/phones/" # GSMArena website url
18 # Folder name on which files going to save.
19 self.new_folder_name = "GSMArenaDataset"
20 # It create the absolute path of the GSMArenaDataset folder.
21 self.absolute_path = os.getcwd().strip() + "/" + self.new_folder_name
22
23 def crawl_html_page(self, sub_url):
24 url = sub_url # Url for html content parsing.
25
26 # Handing the connection error of the url.
27 try:
28 page = requests.get(url)
29 # It parses the html data from requested url.
30 soup = BeautifulSoup(page.text, "html.parser")
31 return soup
32
33 except ConnectionError:
34 print("Please check your network connection and re-run the script.")
35 exit()
36
37 except Exception:
38 print("Please check your network connection and re-run the script.")
39 exit()
40
41 def crawl_phone_urls(self):
42 phones_urls = []
43 for i in range(1, 238): # Right now they have 237 page of phone data.
44 print(self.url + "page/" + str(i))
45 soup = self.crawl_html_page(self.url + "page/" + str(i))
46 table = soup.findAll("div", {"class": "stream-item"})
47 table_a = [k.find("a") for k in table]
48 for a in table_a:
49 temp = a["href"]
50 phones_urls.append(temp)
51 return phones_urls
52
53 def crawl_phones_models_specification(self, li):
54 phone_data = {}
55 for link in li:
56 print(link)
57 try:
58 soup = self.crawl_html_page(link)
59 model = soup.find(class_="page__section page__section_quickSpecs")
60 model_name = model.find("header").h1.text
61 model_img_html = model.find(class_="head-image")
62 model_img = model_img_html.find("img")["data-src"]
63 specs_html = model.find(
64 class_="phone__section phone__section_widget_quickSpecs"
65 )
66 release_date = specs_html.find(class_="calendar")
67 release_date = release_date.find(class_="title").p.text
68 display = specs_html.find(class_="display")

Callers 1

Calls

no outgoing calls

Tested by

no test coverage detected