Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit ac38a1f

Browse files
Merge pull request avinashkranjan#2326 from Juhibhojani/master
GeeksforGeeks Scrapper
2 parents d496483 + 11e3dac commit ac38a1f

File tree

3 files changed

+219
-0
lines changed

3 files changed

+219
-0
lines changed

‎GeeksforGeeks-Scrapper/gfg.py

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
from bs4 import BeautifulSoup
2+
from selenium import webdriver
3+
from selenium.common.exceptions import WebDriverException, NoSuchElementException
4+
from webdriver_manager.chrome import ChromeDriverManager
5+
from selenium.webdriver.chrome.service import Service
6+
from selenium.webdriver.support.ui import WebDriverWait
7+
8+
class geeksforgeeks:
9+
service = Service(ChromeDriverManager().install())
10+
driver = webdriver.Chrome(service=service)
11+
wait = WebDriverWait(driver, 100)
12+
# using seleinum to access html content
13+
url = f"https://practice.geeksforgeeks.org/courses?utm_source=geeksforgeeks&utm_medium=main_header&utm_campaign=courses"
14+
driver.get(url)
15+
html = driver.page_source
16+
soup = BeautifulSoup(html, "html.parser")
17+
18+
def get_popular_now(self):
19+
"""
20+
Fetches popular now courses and related information from gfg portal
21+
22+
:return: datatype : dictionary containing:
23+
-> Name : Name of courses
24+
-> Rating : Rating of courses
25+
-> Interested : Number of people interested
26+
-> Price : Price of given course
27+
"""
28+
try:
29+
popular_now = geeksforgeeks.soup.find(
30+
"div",
31+
{
32+
"class": "ui cards courseListingPage_cardLayout__multW courseListingPage_toggleCourseCards__pWBVA"
33+
},
34+
)
35+
name = []
36+
rating = []
37+
interested = []
38+
price = []
39+
40+
for items in popular_now.find_all(
41+
"a", {"class": "ui card courseListingPage_courseCardContainer__lLZiS"}
42+
):
43+
course_name = items.find(
44+
"h4",
45+
{
46+
"class": "ui left aligned header courseListingPage_myAuto__i6GdI sofia-pro course_heading"
47+
},
48+
)
49+
name.append(course_name.text)
50+
rating_geek = items.find("span", {"class": "urw-din"})
51+
if not rating_geek:
52+
rating_geek = "Information not available"
53+
else:
54+
rating_geek = rating_geek.text
55+
rating.append(rating_geek)
56+
interseted_geeks = items.find(
57+
"div",
58+
{
59+
"class": "courseListingPage_descriptionText__zN_K1 sofia-pro g-opacity-50 g-mb-0 grid_with__meta"
60+
},
61+
)
62+
interested.append(interseted_geeks.text.split(" ")[0])
63+
course_price = items.find(
64+
"p", {"class": "sofia-pro g-mb-0 courseListingPage_batchFee__0NlbJ"}
65+
)
66+
price.append(course_price.text)
67+
68+
course_popular_now = dict(
69+
{
70+
"Name": name,
71+
"Rating": rating,
72+
"Interested": interested,
73+
"Price": price,
74+
}
75+
)
76+
return {
77+
"data": course_popular_now,
78+
"message": "Popular Courses are now fetched",
79+
}
80+
except (WebDriverException, NoSuchElementException) as e:
81+
raise Exception(f"An error occurred while scraping popular courses: {str(e)}")
82+
83+
def get_self_paced(self):
84+
"""
85+
Fetches self-paced courses and related information from gfg portal
86+
87+
:return: datatype : dictionary containing:
88+
-> Name : Name of courses
89+
-> Rating : Rating of courses
90+
-> Interested : Number of people interested
91+
-> Price : Price of given course
92+
"""
93+
try:
94+
self_paced = geeksforgeeks.soup.find(
95+
"div",
96+
{
97+
"class": "ui cards courseListingPage_cardLayout__multW courseListingPage_courseCardsGrid__VYBzZ"
98+
},
99+
)
100+
name = []
101+
rating = []
102+
interested = []
103+
price = []
104+
for items in self_paced.find_all(
105+
"a", {"class": "ui card courseListingPage_courseCardContainer__lLZiS"}
106+
):
107+
course_name = items.find(
108+
"h4",
109+
{
110+
"class": "ui left aligned header courseListingPage_myAuto__i6GdI sofia-pro course_heading"
111+
},
112+
)
113+
name.append(course_name.text)
114+
course_rating = items.find("div", {"class": "courseListingPage_courseCardContentsGrid__jk3VM"}).find("span",{"class":"urw-din"})
115+
if not course_rating:
116+
course_rating = "Information not available"
117+
else:
118+
course_rating = course_rating.text
119+
rating.append(course_rating)
120+
course_interseted = items.find(
121+
"div",
122+
{
123+
"class": "courseListingPage_descriptionText__zN_K1 sofia-pro g-opacity-50 g-mb-0 grid_with__meta"
124+
},
125+
)
126+
interested.append(course_interseted.text.split(" ")[0])
127+
course_price = items.find(
128+
"p", {"class": "sofia-pro g-mb-0 courseListingPage_batchFee__0NlbJ"}
129+
)
130+
price.append(course_price.text)
131+
132+
course_self_paced = dict(
133+
{
134+
"Name": name,
135+
"Rating": rating,
136+
"Interested": interested,
137+
"Price": price,
138+
}
139+
)
140+
return {
141+
"data": course_self_paced,
142+
"message": "Self paced Courses are now fetched",
143+
}
144+
except (WebDriverException, NoSuchElementException) as e:
145+
raise Exception(f"An error occurred while scraping popular courses: {str(e)}")
146+
147+
def get_live_course(self):
148+
"""
149+
Fetches self-paced courses and related information from gfg portal
150+
151+
:return: datatype : dictionary containing:
152+
-> Name : Name of courses
153+
-> Rating : Rating of courses
154+
-> Interested : Number of people interested
155+
-> Price : Price of given course
156+
"""
157+
try:
158+
live = geeksforgeeks.soup.find(
159+
"div", {"class": "g-mt-8"}
160+
).next_sibling.next_sibling.next_sibling
161+
name = []
162+
rating = []
163+
interested = []
164+
price = []
165+
for item in live.find_all(
166+
"a", {"class": "ui card courseListingPage_courseCardContainer__lLZiS"}
167+
):
168+
course_name = item.find(
169+
"h4",
170+
{
171+
"class": "ui left aligned header courseListingPage_myAuto__i6GdI sofia-pro course_heading"
172+
},
173+
)
174+
name.append(course_name.text)
175+
course_rating = item.find("div", {"class": "meta"})
176+
if not course_rating:
177+
course_rating = "Information not available"
178+
else:
179+
course_rating = course_rating.text
180+
rating.append(course_rating)
181+
course_interseted = item.find(
182+
"div",
183+
{
184+
"class": "courseListingPage_descriptionText__zN_K1 sofia-pro g-opacity-50 g-mb-0 grid_with__meta"
185+
},
186+
)
187+
interested.append(course_interseted.text.split(" ")[0])
188+
course_price = item.find(
189+
"p", {"class": "sofia-pro g-mb-0 courseListingPage_batchFee__0NlbJ"}
190+
)
191+
if not course_price:
192+
course_price = "0"
193+
else:
194+
course_price = course_price.text
195+
price.append(course_price)
196+
197+
course_live = dict(
198+
{
199+
"Name": name,
200+
"Rating": rating,
201+
"Interested": interested,
202+
"Price": price,
203+
}
204+
)
205+
return {
206+
"data": course_live,
207+
"message": "Live Courses are now fetched",
208+
}
209+
except (WebDriverException, NoSuchElementException) as e:
210+
raise Exception(f"An error occurred while scraping popular courses: {str(e)}")
211+

‎GeeksforGeeks-Scrapper/readme.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Geeks for Geeks Scrapper
2+
3+
Scraps the courses in 3 different categories when the method is called. Returns data in form of JSON

‎GeeksforGeeks-Scrapper/requirements.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
requests==2.28.2
2+
beautifulsoup4==4.11.1
3+
selenium==4.9.1
4+
undetected-chromedriver==3.5.0
5+
webdriver_manager

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /