Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit b78fac2

Browse files
Merge pull request avinashkranjan#1146 from RohiniRG/RohiniRG-apps
Playstore scraper
2 parents d9b252e + 6813f5d commit b78fac2

File tree

4 files changed

+210
-0
lines changed

4 files changed

+210
-0
lines changed

‎PlaystoreScraper/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Google Playstore Scraper
2+
3+
- This script helps to scrape Google Playstore for a desired query relating to apps to obtain all relevant data regarding the resulting apps.
4+
5+
- In the `fetch_apps.py` , we take user input for the query, and we fetch and store all the app information related to apps of this query in a database file.
6+
7+
- In the `display_apps.py` , we display the desired results from the database to the user.
8+
9+
## Setup instructions
10+
11+
- The requirements can be installed as follows:
12+
13+
```shell
14+
$ pip install -r requirements.txt
15+
```
16+
17+
## Working screenshots
18+
19+
![Image](https://i.imgur.com/BYKNvFR.png)
20+
21+
## Author
22+
[Rohini Rao](www.github.com/RohiniRG)
23+

‎PlaystoreScraper/display_apps.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import sqlite3
2+
import os
3+
4+
5+
def sql_connection():
6+
"""
7+
Establishes a connection to the SQL file database
8+
:return connection object:
9+
"""
10+
path = os.path.abspath('PlaystoreDatabase.db')
11+
con = sqlite3.connect(path)
12+
return con
13+
14+
15+
def sql_fetcher(con):
16+
"""
17+
Fetches all the with the given query from our database
18+
:param con:
19+
:return:
20+
"""
21+
query = input("\nEnter query to search: r/")
22+
count = 0
23+
cur = con.cursor()
24+
cur.execute('SELECT * FROM apps') # SQL search query
25+
rows = cur.fetchall()
26+
27+
for r in rows:
28+
if query in r:
29+
count += 1
30+
print(f'\nURL: {r[1]}\nNAME: {r[2]}\nRATING: {r[3]}\n'
31+
f'REVIEWS: {r[4]}\nINSTALLS: {r[5]}\nVERSION: {r[6]}'
32+
f'\nLASTUPDATE: {r[7]}\nCOMPANY: {r[8]}\nCONTACT: {r[9]}')
33+
34+
if count:
35+
print(f'{count} posts fetched from database\n')
36+
else:
37+
print('\nNo posts stored for this query\n')
38+
39+
40+
con = sql_connection()
41+
42+
while 1:
43+
sql_fetcher(con)
44+
45+
ans = input('\nPress (y) to continue or any other key to exit: ').lower()
46+
if ans == 'y':
47+
continue
48+
else:
49+
print('\nExiting..\n')
50+
break
51+

‎PlaystoreScraper/fetch_apps.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import time
2+
from selenium import webdriver
3+
import sqlite3
4+
5+
6+
def sql_connection():
7+
"""
8+
Establishes a connection to the SQL file database
9+
:return connection object:
10+
"""
11+
con = sqlite3.connect('PlaystoreDatabase.db')
12+
return con
13+
14+
15+
def sql_table(con):
16+
"""
17+
Creates a table in the database (if it does not exist already)
18+
to store the app info
19+
:param con:
20+
:return:
21+
"""
22+
cur = con.cursor()
23+
cur.execute("CREATE TABLE IF NOT EXISTS apps(QUERY text, URL text, NAME text, RATING text, "
24+
" REVIEWS text, INSTALLS text, VERSION text, LASTUPDATE text, "
25+
" COMPANY text, CONTACT text)")
26+
con.commit()
27+
28+
29+
def sql_insert_table(con, entities):
30+
"""
31+
Inserts the desired data into the table to store app info
32+
:param con:
33+
:param entities:
34+
:return:
35+
"""
36+
cur = con.cursor()
37+
cur.execute('INSERT INTO apps(QUERY text, URL, NAME, RATING, REVIEWS, '
38+
'INSTALLS, VERSION, LASTUPDATE, COMPANY, CONTACT) '
39+
'VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', entities)
40+
con.commit()
41+
42+
43+
driver = webdriver.Chrome()
44+
45+
con = sql_connection()
46+
sql_table(con)
47+
48+
while 1:
49+
query = input("\nEnter search query: ")
50+
51+
driver.get(f'https://play.google.com/store/search?q={query}&c=apps')
52+
53+
print('\nGetting all the desired info...\n')
54+
time.sleep(5)
55+
56+
last_height = driver.execute_script("return document.body.scrollHeight")
57+
time.sleep(5)
58+
59+
while True:
60+
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
61+
62+
time.sleep(5)
63+
64+
new_height = driver.execute_script("return document.body.scrollHeight")
65+
if new_height == last_height:
66+
break
67+
last_height = new_height
68+
69+
70+
store_urls = []
71+
elems = driver.find_elements_by_xpath("//a[@href]")
72+
for elem in elems:
73+
if "details?id" in elem.get_attribute("href"):
74+
store_urls.append((elem.get_attribute("href")))
75+
76+
store_urls = list(dict.fromkeys(store_urls))
77+
78+
for every in store_urls:
79+
try:
80+
driver.get(every)
81+
url = every
82+
time.sleep(3)
83+
84+
header1 = driver.find_element_by_tag_name("h1")
85+
name = header1.text
86+
87+
star = driver.find_element_by_class_name("BHMmbe")
88+
rating = star.text
89+
90+
comments = driver.find_element_by_class_name("EymY4b")
91+
reviews = comments.text.split()[0]
92+
93+
stat_info_table = driver.find_elements_by_class_name("htlgb")
94+
stats = []
95+
for x in range (len(stat_info_table)):
96+
if x % 2 == 0:
97+
stats.append(stat_info_table[x].text)
98+
99+
stat_header = driver.find_elements_by_class_name("BgcNfc")
100+
for x in range (len(stat_header)):
101+
if stat_header[x].text == "Installs":
102+
installs = stats[x]
103+
104+
if stat_header[x].text == "Current Version":
105+
version = stats[x]
106+
107+
if stat_header[x].text == "Updated":
108+
lastupdate = stats[x]
109+
110+
if stat_header[x].text == "Offered By":
111+
company = stats[x]
112+
113+
if stat_header[x].text == "Developer":
114+
for y in stats[x].split("\n"):
115+
if "@" in y:
116+
contact = y
117+
break
118+
119+
entities = (query, url, name, rating, reviews, installs, version, lastupdate
120+
version, lastupdate, company, email)
121+
sql_insert_table(con, entities)
122+
123+
124+
except Exception as e:
125+
continue
126+
127+
print('\nAll info collected successfully!!\n')
128+
129+
ans = input('Press (y) to continue or any other key to exit: ').lower()
130+
if ans == 'y':
131+
continue
132+
else:
133+
print('Exiting..')
134+
break
135+

‎PlaystoreScraper/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
selenium==3.141.0

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /