Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 6ee448f

Browse files
create file for writing cleaned data in csv format
1 parent b21cfa8 commit 6ee448f

File tree

1 file changed

+81
-0
lines changed

1 file changed

+81
-0
lines changed

‎write_csv_data.py‎

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
4+
base_url = "https://www.yelp.com/search?find_desc=Restaurants&find_loc={}&start={}"
5+
city = "los+angeles"
6+
start = 0
7+
file_path = f'yelp-{city}-clean.txt'
8+
9+
while start < 990:
10+
print(start)
11+
url = base_url.format(city, start)
12+
response = requests.get(url)
13+
print(f"STATUS CODE: {response.status_code} FOR {response.url}")
14+
soup = BeautifulSoup(response.text, 'html.parser')
15+
businesses = soup.findAll('div', {'class': 'biz-listing-large'})
16+
17+
with open(file_path, 'a') as textFile:
18+
count = 0
19+
for biz in businesses:
20+
first_line = ""
21+
second_line = ""
22+
phone_number = ""
23+
24+
title = biz.find('a', {'class': 'biz-name'}).text
25+
# print(address)
26+
count += 1
27+
28+
try:
29+
address = biz.find('address').contents
30+
for item in address:
31+
if "br" in item:
32+
first_line += item.getText() + " "
33+
else:
34+
second_line += item.strip(" \n\r\t") + " "
35+
except Exception as e:
36+
print(e)
37+
address = None
38+
logs = open('errors.log', 'a')
39+
logs.write(str(e) + '\n')
40+
logs.close()
41+
42+
try:
43+
region = biz.find('span', {'class': 'neighborhood-str-list'}).contents
44+
for item in region:
45+
if "br" in item:
46+
first_line += item.getText() + " "
47+
else:
48+
second_line += item.strip(" \n\t\r") + " "
49+
except Exception as e:
50+
print(e)
51+
first_line = None
52+
second_line = None
53+
logs = open('errors.log', 'a')
54+
logs.write(str(e) + '\n')
55+
logs.close()
56+
57+
try:
58+
phone = biz.find('span', {'class': 'biz-phone'}).contents
59+
for item in phone:
60+
if "br" in item:
61+
phone_number += item.getText() + " "
62+
else:
63+
phone_number += item.strip(" \n\t\r") + " "
64+
except Exception as e:
65+
print(e)
66+
phone_number = None
67+
logs = open('errors.log', 'a')
68+
logs.write(str(e) + '\n')
69+
logs.close()
70+
71+
detail = f"{title}\n{second_line}\n{phone_number}\n"
72+
print(detail)
73+
74+
try:
75+
textFile.write(str(detail) + '\n\n')
76+
except Exception as e:
77+
logs = open('errors.log', 'a')
78+
logs.write(str(e) + '\n')
79+
logs.close()
80+
81+
start += 30

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /