|
1 | 1 | import requests
|
| 2 | +import csv |
2 | 3 | from bs4 import BeautifulSoup
|
3 | 4 |
|
4 | 5 | base_url = "https://www.yelp.com/search?find_desc=Restaurants&find_loc={}&start={}"
|
5 | 6 | city = "los+angeles"
|
6 | 7 | start = 0
|
7 | | -file_path = f'yelp-{city}-clean.txt' |
| 8 | +file_path = f'yelp-{city}-clean.csv' |
8 | 9 |
|
9 | | -while start < 990: |
| 10 | +def get_length(file_path): |
| 11 | + with open(file_path) as csvfile: |
| 12 | + reader = csv.reader(csvfile) |
| 13 | + reader_list = list(reader) |
| 14 | + return len(reader_list) |
| 15 | + |
| 16 | + |
| 17 | +while start < 60: |
10 | 18 | print(start)
|
11 | 19 | url = base_url.format(city, start)
|
12 | 20 | response = requests.get(url)
|
13 | 21 | print(f"STATUS CODE: {response.status_code} FOR {response.url}")
|
14 | 22 | soup = BeautifulSoup(response.text, 'html.parser')
|
15 | 23 | businesses = soup.findAll('div', {'class': 'biz-listing-large'})
|
16 | 24 |
|
17 | | - with open(file_path, 'a') as textFile: |
| 25 | + with open(file_path, 'a', newline='') as csvfile: |
| 26 | + fieldnames = ['id', 'title', 'address', 'phone'] |
| 27 | + reader = csv.DictReader(csvfile, fieldnames=fieldnames) |
| 28 | + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) |
18 | 29 | count = 0
|
| 30 | + if start == 0: |
| 31 | + writer.writeheader() |
19 | 32 | for biz in businesses:
|
20 | 33 | first_line = ""
|
21 | 34 | second_line = ""
|
|
36 | 49 | print(e)
|
37 | 50 | address = None
|
38 | 51 | logs = open('errors.log', 'a')
|
| 52 | + logs.write(str(__file__) + '-- STARTS' + '\n') |
39 | 53 | logs.write(str(e) + '\n')
|
| 54 | + logs.write(str(__file__) + '-- ENDS' + '\n\n') |
40 | 55 | logs.close()
|
41 | 56 |
|
42 | 57 | try:
|
|
51 | 66 | first_line = None
|
52 | 67 | second_line = None
|
53 | 68 | logs = open('errors.log', 'a')
|
| 69 | + logs.write(str(__file__) + '-- STARTS' + '\n') |
54 | 70 | logs.write(str(e) + '\n')
|
| 71 | + logs.write(str(__file__) + '-- ENDS' + '\n\n') |
55 | 72 | logs.close()
|
56 | 73 |
|
57 | 74 | try:
|
|
65 | 82 | print(e)
|
66 | 83 | phone_number = None
|
67 | 84 | logs = open('errors.log', 'a')
|
| 85 | + logs.write(str(__file__) + '-- STARTS' + '\n') |
68 | 86 | logs.write(str(e) + '\n')
|
| 87 | + logs.write(str(__file__) + '-- ENDS' + '\n\n') |
69 | 88 | logs.close()
|
70 | 89 |
|
71 | 90 | detail = f"{title}\n{second_line}\n{phone_number}\n"
|
72 | 91 | print(detail)
|
73 | 92 |
|
| 93 | + next_id = get_length(file_path) |
| 94 | + |
74 | 95 | try:
|
75 | | - textFile.write(str(detail) + '\n\n') |
| 96 | + writer.writerow({ |
| 97 | + 'id': next_id, |
| 98 | + 'title': title, |
| 99 | + 'address': second_line, |
| 100 | + 'phone': phone_number |
| 101 | + }) |
76 | 102 | except Exception as e:
|
77 | 103 | logs = open('errors.log', 'a')
|
| 104 | + logs.write(str(__file__) + '-- STARTS' + '\n') |
78 | 105 | logs.write(str(e) + '\n')
|
| 106 | + logs.write(str(__file__) + '-- ENDS' + '\n\n') |
79 | 107 | logs.close()
|
80 | 108 |
|
81 | 109 | start += 30
|
0 commit comments