Commit ac38a1f

authored

Merge pull request avinashkranjan#2326 from Juhibhojani/master

GeeksforGeeks Scrapper

2 parents d496483 + 11e3dac commit ac38a1fCopy full SHA for ac38a1f

File tree

3 files changed

+219

-0

lines changed

GeeksforGeeks-Scrapper

3 files changed

+219

-0

lines changed

`‎GeeksforGeeks-Scrapper/gfg.py`

Lines changed: 211 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,211 @@`
	`1`	`+from bs4 import BeautifulSoup`
	`2`	`+from selenium import webdriver`
	`3`	`+from selenium.common.exceptions import WebDriverException, NoSuchElementException`
	`4`	`+from webdriver_manager.chrome import ChromeDriverManager`
	`5`	`+from selenium.webdriver.chrome.service import Service`
	`6`	`+from selenium.webdriver.support.ui import WebDriverWait`
	`7`	`+`
	`8`	`+class geeksforgeeks:`
	`9`	`+ service = Service(ChromeDriverManager().install())`
	`10`	`+ driver = webdriver.Chrome(service=service)`
	`11`	`+ wait = WebDriverWait(driver, 100)`
	`12`	`+ # using seleinum to access html content`
	`13`	`+ url = f"https://practice.geeksforgeeks.org/courses?utm_source=geeksforgeeks&utm_medium=main_header&utm_campaign=courses"`
	`14`	`+ driver.get(url)`
	`15`	`+ html = driver.page_source`
	`16`	`+ soup = BeautifulSoup(html, "html.parser")`
	`17`	`+`
	`18`	`+ def get_popular_now(self):`
	`19`	`+ """`
	`20`	`+ Fetches popular now courses and related information from gfg portal`
	`21`	`+`
	`22`	`+ :return: datatype : dictionary containing:`
	`23`	`+ -> Name : Name of courses`
	`24`	`+ -> Rating : Rating of courses`
	`25`	`+ -> Interested : Number of people interested`
	`26`	`+ -> Price : Price of given course`
	`27`	`+ """`
	`28`	`+ try:`
	`29`	`+ popular_now = geeksforgeeks.soup.find(`
	`30`	`+ "div",`
	`31`	`+ {`
	`32`	`+ "class": "ui cards courseListingPage_cardLayout__multW courseListingPage_toggleCourseCards__pWBVA"`
	`33`	`+ },`
	`34`	`+ )`
	`35`	`+ name = []`
	`36`	`+ rating = []`
	`37`	`+ interested = []`
	`38`	`+ price = []`
	`39`	`+`
	`40`	`+ for items in popular_now.find_all(`
	`41`	`+ "a", {"class": "ui card courseListingPage_courseCardContainer__lLZiS"}`
	`42`	`+ ):`
	`43`	`+ course_name = items.find(`
	`44`	`+ "h4",`
	`45`	`+ {`
	`46`	`+ "class": "ui left aligned header courseListingPage_myAuto__i6GdI sofia-pro course_heading"`
	`47`	`+ },`
	`48`	`+ )`
	`49`	`+ name.append(course_name.text)`
	`50`	`+ rating_geek = items.find("span", {"class": "urw-din"})`
	`51`	`+ if not rating_geek:`
	`52`	`+ rating_geek = "Information not available"`
	`53`	`+ else:`
	`54`	`+ rating_geek = rating_geek.text`
	`55`	`+ rating.append(rating_geek)`
	`56`	`+ interseted_geeks = items.find(`
	`57`	`+ "div",`
	`58`	`+ {`
	`59`	`+ "class": "courseListingPage_descriptionText__zN_K1 sofia-pro g-opacity-50 g-mb-0 grid_with__meta"`
	`60`	`+ },`
	`61`	`+ )`
	`62`	`+ interested.append(interseted_geeks.text.split(" ")[0])`
	`63`	`+ course_price = items.find(`
	`64`	`+ "p", {"class": "sofia-pro g-mb-0 courseListingPage_batchFee__0NlbJ"}`
	`65`	`+ )`
	`66`	`+ price.append(course_price.text)`
	`67`	`+`
	`68`	`+ course_popular_now = dict(`
	`69`	`+ {`
	`70`	`+ "Name": name,`
	`71`	`+ "Rating": rating,`
	`72`	`+ "Interested": interested,`
	`73`	`+ "Price": price,`
	`74`	`+ }`
	`75`	`+ )`
	`76`	`+ return {`
	`77`	`+ "data": course_popular_now,`
	`78`	`+ "message": "Popular Courses are now fetched",`
	`79`	`+ }`
	`80`	`+ except (WebDriverException, NoSuchElementException) as e:`
	`81`	`+ raise Exception(f"An error occurred while scraping popular courses: {str(e)}")`
	`82`	`+`
	`83`	`+ def get_self_paced(self):`
	`84`	`+ """`
	`85`	`+ Fetches self-paced courses and related information from gfg portal`
	`86`	`+`
	`87`	`+ :return: datatype : dictionary containing:`
	`88`	`+ -> Name : Name of courses`
	`89`	`+ -> Rating : Rating of courses`
	`90`	`+ -> Interested : Number of people interested`
	`91`	`+ -> Price : Price of given course`
	`92`	`+ """`
	`93`	`+ try:`
	`94`	`+ self_paced = geeksforgeeks.soup.find(`
	`95`	`+ "div",`
	`96`	`+ {`
	`97`	`+ "class": "ui cards courseListingPage_cardLayout__multW courseListingPage_courseCardsGrid__VYBzZ"`
	`98`	`+ },`
	`99`	`+ )`
	`100`	`+ name = []`
	`101`	`+ rating = []`
	`102`	`+ interested = []`
	`103`	`+ price = []`
	`104`	`+ for items in self_paced.find_all(`
	`105`	`+ "a", {"class": "ui card courseListingPage_courseCardContainer__lLZiS"}`
	`106`	`+ ):`
	`107`	`+ course_name = items.find(`
	`108`	`+ "h4",`
	`109`	`+ {`
	`110`	`+ "class": "ui left aligned header courseListingPage_myAuto__i6GdI sofia-pro course_heading"`
	`111`	`+ },`
	`112`	`+ )`
	`113`	`+ name.append(course_name.text)`
	`114`	`+ course_rating = items.find("div", {"class": "courseListingPage_courseCardContentsGrid__jk3VM"}).find("span",{"class":"urw-din"})`
	`115`	`+ if not course_rating:`
	`116`	`+ course_rating = "Information not available"`
	`117`	`+ else:`
	`118`	`+ course_rating = course_rating.text`
	`119`	`+ rating.append(course_rating)`
	`120`	`+ course_interseted = items.find(`
	`121`	`+ "div",`
	`122`	`+ {`
	`123`	`+ "class": "courseListingPage_descriptionText__zN_K1 sofia-pro g-opacity-50 g-mb-0 grid_with__meta"`
	`124`	`+ },`
	`125`	`+ )`
	`126`	`+ interested.append(course_interseted.text.split(" ")[0])`
	`127`	`+ course_price = items.find(`
	`128`	`+ "p", {"class": "sofia-pro g-mb-0 courseListingPage_batchFee__0NlbJ"}`
	`129`	`+ )`
	`130`	`+ price.append(course_price.text)`
	`131`	`+`
	`132`	`+ course_self_paced = dict(`
	`133`	`+ {`
	`134`	`+ "Name": name,`
	`135`	`+ "Rating": rating,`
	`136`	`+ "Interested": interested,`
	`137`	`+ "Price": price,`
	`138`	`+ }`
	`139`	`+ )`
	`140`	`+ return {`
	`141`	`+ "data": course_self_paced,`
	`142`	`+ "message": "Self paced Courses are now fetched",`
	`143`	`+ }`
	`144`	`+ except (WebDriverException, NoSuchElementException) as e:`
	`145`	`+ raise Exception(f"An error occurred while scraping popular courses: {str(e)}")`
	`146`	`+`
	`147`	`+ def get_live_course(self):`
	`148`	`+ """`
	`149`	`+ Fetches self-paced courses and related information from gfg portal`
	`150`	`+`
	`151`	`+ :return: datatype : dictionary containing:`
	`152`	`+ -> Name : Name of courses`
	`153`	`+ -> Rating : Rating of courses`
	`154`	`+ -> Interested : Number of people interested`
	`155`	`+ -> Price : Price of given course`
	`156`	`+ """`
	`157`	`+ try:`
	`158`	`+ live = geeksforgeeks.soup.find(`
	`159`	`+ "div", {"class": "g-mt-8"}`
	`160`	`+ ).next_sibling.next_sibling.next_sibling`
	`161`	`+ name = []`
	`162`	`+ rating = []`
	`163`	`+ interested = []`
	`164`	`+ price = []`
	`165`	`+ for item in live.find_all(`
	`166`	`+ "a", {"class": "ui card courseListingPage_courseCardContainer__lLZiS"}`
	`167`	`+ ):`
	`168`	`+ course_name = item.find(`
	`169`	`+ "h4",`
	`170`	`+ {`
	`171`	`+ "class": "ui left aligned header courseListingPage_myAuto__i6GdI sofia-pro course_heading"`
	`172`	`+ },`
	`173`	`+ )`
	`174`	`+ name.append(course_name.text)`
	`175`	`+ course_rating = item.find("div", {"class": "meta"})`
	`176`	`+ if not course_rating:`
	`177`	`+ course_rating = "Information not available"`
	`178`	`+ else:`
	`179`	`+ course_rating = course_rating.text`
	`180`	`+ rating.append(course_rating)`
	`181`	`+ course_interseted = item.find(`
	`182`	`+ "div",`
	`183`	`+ {`
	`184`	`+ "class": "courseListingPage_descriptionText__zN_K1 sofia-pro g-opacity-50 g-mb-0 grid_with__meta"`
	`185`	`+ },`
	`186`	`+ )`
	`187`	`+ interested.append(course_interseted.text.split(" ")[0])`
	`188`	`+ course_price = item.find(`
	`189`	`+ "p", {"class": "sofia-pro g-mb-0 courseListingPage_batchFee__0NlbJ"}`
	`190`	`+ )`
	`191`	`+ if not course_price:`
	`192`	`+ course_price = "0"`
	`193`	`+ else:`
	`194`	`+ course_price = course_price.text`
	`195`	`+ price.append(course_price)`
	`196`	`+`
	`197`	`+ course_live = dict(`
	`198`	`+ {`
	`199`	`+ "Name": name,`
	`200`	`+ "Rating": rating,`
	`201`	`+ "Interested": interested,`
	`202`	`+ "Price": price,`
	`203`	`+ }`
	`204`	`+ )`
	`205`	`+ return {`
	`206`	`+ "data": course_live,`
	`207`	`+ "message": "Live Courses are now fetched",`
	`208`	`+ }`
	`209`	`+ except (WebDriverException, NoSuchElementException) as e:`
	`210`	`+ raise Exception(f"An error occurred while scraping popular courses: {str(e)}")`
	`211`	`+`

`‎GeeksforGeeks-Scrapper/readme.md`

Lines changed: 3 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Geeks for Geeks Scrapper`
	`2`	`+`
	`3`	`+Scraps the courses in 3 different categories when the method is called. Returns data in form of JSON`

`‎GeeksforGeeks-Scrapper/requirements.txt`

Lines changed: 5 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,5 @@`
	`1`	`+requests==2.28.2`
	`2`	`+beautifulsoup4==4.11.1`
	`3`	`+selenium==4.9.1`
	`4`	`+undetected-chromedriver==3.5.0`
	`5`	`+webdriver_manager`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit ac38a1f

File tree

3 files changed

3 files changed

`‎GeeksforGeeks-Scrapper/gfg.py`

`‎GeeksforGeeks-Scrapper/readme.md`

`‎GeeksforGeeks-Scrapper/requirements.txt`

0 commit comments