Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit dd960f3

Browse files
avinashkranjan#1103 JobScraper Added
1 parent bd9f3c2 commit dd960f3

File tree

3 files changed

+142
-0
lines changed

3 files changed

+142
-0
lines changed

‎Job Scraper/JobScraper.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import urllib
2+
import time
3+
import requests
4+
import re
5+
import csv
6+
from bs4 import BeautifulSoup
7+
8+
9+
def write_csv(loc, info):
10+
"""
11+
The function writes the job openings collected in a .csv file
12+
"""
13+
headers = ['Title', 'Company Name', 'Location', 'Date', 'Summary', 'Url']
14+
15+
# Adding info into the rows of the file
16+
with open(loc+'_openings.csv', 'a', encoding='utf-8') as csv_f:
17+
csv_p = csv.writer(csv_f, delimiter=',')
18+
csv_p.writerow(headers)
19+
csv_p.writerows(info)
20+
21+
print(f'\n{loc}_openings.csv has been saved to your directory!\n')
22+
23+
24+
def job_scraper():
25+
"""
26+
The function scrapes the required number of job openings posted for a given job title and location
27+
and stores all the associated information in a .csv file
28+
"""
29+
title = input("\nEnter job title: ").replace(" ", "+")
30+
loc = input("Enter job location: ").replace(" ", "+")
31+
num = int(input("Enter the number of job openings to obtain: "))
32+
33+
url = f'https://in.indeed.com/jobs?q={title}&l={loc}'
34+
req_page = requests.get(url)
35+
36+
job_array = []
37+
38+
if req_page.status_code == 200:
39+
soup = BeautifulSoup(req_page.text, "html.parser")
40+
job_table = soup.find("td", id="resultsCol")
41+
count = 0
42+
43+
flag = 1
44+
while flag :
45+
for job_card in job_table.find_all("div", class_="jobsearch-SerpJobCard"):
46+
# Getting the job title
47+
title_elem = job_card.find('a', class_='jobtitle turnstileLink')
48+
title = title_elem.text.strip()
49+
50+
# Getting the company name
51+
company_details = job_card.find('div', class_='sjcl')
52+
company_name = company_details.find('span', class_='company')
53+
company_name = company_name.text.strip()
54+
55+
# Getting the company location
56+
company_loc = company_details.find('span', class_='location')
57+
if company_loc!= None:
58+
company_loc = company_loc.text.strip()
59+
else:
60+
company_loc = loc
61+
62+
# Getting the URL of the post
63+
link = job_card.find('a')['href']
64+
link = 'https://in.indeed.com' + link
65+
66+
# Getting the date of the post
67+
date_elem = job_card.find('span', class_='date')
68+
date = date_elem.text.strip()
69+
70+
# Getting the job summary
71+
summary_ele = job_card.findAll('div', attrs={'class': 'summary'})
72+
for span in summary_ele:
73+
span = span.text.strip()
74+
75+
count += 1
76+
77+
job_array.append([title, company_name, company_loc, date, span, link])
78+
if count == num:
79+
flag = 0
80+
break
81+
82+
# To go to the next page
83+
page = soup.find("ul", class_="pagination-list")
84+
found = 0
85+
for page in page.find_all('a'):
86+
if page.attrs['aria-label'] == 'Next':
87+
found = 1
88+
break
89+
90+
if found:
91+
next_page_link = 'https://in.indeed.com' + page.attrs['href']
92+
93+
time.sleep(2)
94+
95+
req_page = requests.get(next_page_link)
96+
soup = BeautifulSoup(req_page.text, "html.parser")
97+
job_table = soup.find("td", id="resultsCol")
98+
99+
else:
100+
flag = 0
101+
102+
write_csv(loc, job_array)
103+
104+
105+
else:
106+
print('There seems to be a problem fetching the results. Check your inputs, connections and try again')
107+
108+
109+
if __name__ == '__main__':
110+
job_scraper()
111+

‎Job Scraper/README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Job Scraper
2+
3+
- The python script uses BeautifulSoup to scrape job opening postings on indeed.com.
4+
- The script takes user input for the required job title, location, the number of posts to collect and collect all the associated information.
5+
- The information collected is stored in a `.csv` file.
6+
7+
## Setup instructions
8+
9+
- The requirements can be installed as follows:
10+
11+
```shell
12+
$ pip install -r requirements.txt
13+
```
14+
15+
## Screenshots
16+
17+
![Image](https://i.imgur.com/ZJZDKSP.png)
18+
19+
![Image](https://i.imgur.com/jsPXeJH.png)
20+
21+
22+
## Author
23+
[Akhil Bhalerao](www.github.com/iamakkkhil)
24+

‎Job Scraper/requirements.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
beautifulsoup4==4.9.3
2+
certifi==2020年12月5日
3+
chardet==4.0.0
4+
idna==2.10
5+
requests==2.25.1
6+
soupsieve==2.2.1
7+
urllib3==1.26.4

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /