Commit 4a217d0

authored

Merge pull request avinashkranjan#2076 from Swapnil-Singh-99/book_scraper

added a book scraper script

2 parents ec21bc1 + 89abbd3 commit 4a217d0Copy full SHA for 4a217d0

File tree

5 files changed

+181

-0

lines changed

Book_Scraper
- README.md
- assests
  - image-1.png
  - image.png
- book.py
- requirements.txt

5 files changed

+181

-0

lines changed

`‎Book_Scraper/README.md`

Lines changed: 13 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,13 @@`
	`1`	`+# Book Scraper`
	`2`	`+It is a Book Scraper Python Script whhich allow user to download books using console.`
	`3`	`+Ex: when a uer provides a name of a book`
	`4`	`+The script returns book name, size of book, author, extension type, language of book, book cover image, direct download.`
	`5`	`+`
	`6`	`+# Installation & Run`
	`7`	+`pip install -r requirements.txt`
	`8`	`+`
	`9`	+`python book.py`
	`10`	`+`
	`11`	`+# Screenshots`
	`12`	`+![Alt text](assests/image.png)`
	`13`	`+![Alt text](assests/image-1.png)`

`‎Book_Scraper/assests/image-1.png`

12.5 KB

Loading[フレーム]

`‎Book_Scraper/assests/image.png`

68.4 KB

Loading[フレーム]

`‎Book_Scraper/book.py`

Lines changed: 164 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,164 @@`
	`1`	`+# for scraping books`
	`2`	`+from bs4 import BeautifulSoup as bs`
	`3`	`+import requests`
	`4`	`+# to identify emoji unicode characters`
	`5`	`+import emoji`
	`6`	`+import pyfiglet`
	`7`	`+import itertools`
	`8`	`+import threading`
	`9`	`+import time`
	`10`	`+import sys`
	`11`	`+`
	`12`	`+`
	`13`	`+`
	`14`	`+def is_emoji(text):`
	`15`	`+ """This function returns True if there is an emoji in the given string else False"""`
	`16`	`+ return bool(emoji.get_emoji_regexp().search(text))`
	`17`	`+`
	`18`	`+def link_to_get(link):`
	`19`	`+ """This function will get the url of the image & book download direct link using the given link for book download"""`
	`20`	`+ response = requests.get(link)`
	`21`	`+ th_html = bs(response.text , "html.parser")`
	`22`	`+ td_all = th_html.find_all("td" ,id ="info")`
	`23`	`+ td_all = td_all[0]`
	`24`	`+ td_a = td_all.find_all("a")`
	`25`	`+ link_href = td_a[1].get("href")`
	`26`	`+ img_link_td = td_all.find("img" ,alt="cover")`
	`27`	`+ img_link_src = img_link_td.get("src")`
	`28`	`+ img_link = f"http://library.lol{img_link_src}"`
	`29`	`+ return [link_href, img_link]`
	`30`	`+`
	`31`	`+def book_get(name, mainres=100, results=5):`
	`32`	`+ """This function returns the list of books for the given name`
	`33`	`+`
	`34`	`+ You can give in name :`
	`35`	`+ 1. title of book`
	`36`	`+ 2. isbn of book`
	`37`	`+ 3. author of book`
	`38`	`+ 4. publisher of book`
	`39`	`+`
	`40`	`+ mainres :`
	`41`	`+ 1. 25`
	`42`	`+ 2. 50`
	`43`	`+ 3. 100`
	`44`	`+`
	`45`	`+ Results:`
	`46`	`+ [ 0.Book Name,`
	`47`	`+ 1.Author,`
	`48`	`+ 2.Size,`
	`49`	`+ 3.Book Type,`
	`50`	`+ 4.Book Link,`
	`51`	`+ 5.Book Image Link`
	`52`	`+ 6.Language]"""`
	`53`	`+ Books = []`
	`54`	`+ if is_emoji(name) == True:`
	`55`	`+ return "Error: emoji"`
	`56`	`+ if name == "":`
	`57`	`+ return "Error: enter name"`
	`58`	`+ name = name.replace(" ", "+")`
	`59`	`+ # getting request and response`
	`60`	`+ url = f"http://libgen.is/search.php?req={name}&lg_topic=libgen&open=0&view=simple&res={mainres}&phrase=1&column=def"`
	`61`	`+ # print(url)`
	`62`	`+ response = requests.get(url)`
	`63`	`+ bs_html = bs(response.text , "html.parser")`
	`64`	`+`
	`65`	`+ if "Search string must contain minimum 3 characters.." in bs_html.body:`
	`66`	`+ return "Error: Title Too Short"`
	`67`	`+`
	`68`	`+ # scraping the site for response`
	`69`	`+ table = bs_html.find_all("table")`
	`70`	`+ table = table[2]`
	`71`	`+ table_rows = table.find_all("tr")`
	`72`	`+ a = len(table_rows)`
	`73`	`+ table_rows.pop(0)`
	`74`	`+ # print(url, "\n\n")`
	`75`	`+ if a > 1 :`
	`76`	`+ counter = 1`
	`77`	`+ for i in table_rows :`
	`78`	`+ if counter <= results:`
	`79`	`+ # make book list`
	`80`	`+ book_lst = []`
	`81`	`+ # getting all table datas`
	`82`	`+ table_datas = i.find_all("td")`
	`83`	`+ # book name`
	`84`	`+ book_name = table_datas[2].get_text()`
	`85`	`+ # author name`
	`86`	`+ author = table_datas[1].get_text()`
	`87`	`+ # getting link to book`
	`88`	`+ link_row = table_datas[9]`
	`89`	`+ a = link_row.find("a" , href = True)`
	`90`	`+ link = a.get("href")`
	`91`	`+ # getting image url & direct book download link`
	`92`	`+ link_all = link_to_get(link)`
	`93`	`+ # getting language`
	`94`	`+ language_row = table_datas[6]`
	`95`	`+ language = language_row.get_text()`
	`96`	`+ # getting size of book`
	`97`	`+ size_row = table_datas[7]`
	`98`	`+ size = size_row.get_text()`
	`99`	`+ # getting type of book`
	`100`	`+ type_row = table_datas[8]`
	`101`	`+ type_ofit = type_row.get_text()`
	`102`	`+ # this will only take pdfs in English Language`
	`103`	`+ if (type_ofit != "pdf" and type_ofit != "epub") or language != "English":`
	`104`	`+ continue`
	`105`	`+ book_lst.append(book_name)`
	`106`	`+ book_lst.append(author)`
	`107`	`+ book_lst.append(size)`
	`108`	`+ book_lst.append(type_ofit)`
	`109`	`+ book_lst.append(link_all[0])`
	`110`	`+ book_lst.append(link_all[1])`
	`111`	`+ book_lst.append(language)`
	`112`	`+ Books.append(book_lst)`
	`113`	`+ # print(f"\n\n\n{book_lst}\n\n\n")`
	`114`	`+ counter+=1`
	`115`	`+ if len(Books) >=1 :`
	`116`	`+ return Books`
	`117`	`+ else :`
	`118`	`+ return "Error: no results found"`
	`119`	`+ else:`
	`120`	`+ return "Error: no results found"`
	`121`	`+`
	`122`	`+# a = book_get("Harry Potter",25,5)`
	`123`	`+# print(a)`
	`124`	`+# for i in a :`
	`125`	`+# print(f"\n\nName : {i[0]}\nAuthor : {i[1]}\nSize : {i[2]}\nFormat : {i[3]}\nLink : {i[4]}\nImage : {i[5]}\n\n")`
	`126`	`+`
	`127`	`+def animate():`
	`128`	`+ for c in itertools.cycle(['\|', '/', '-', '\\']):`
	`129`	`+ if done:`
	`130`	`+ break`
	`131`	`+ sys.stdout.write('\r...Searching Book ' + c)`
	`132`	`+ sys.stdout.flush()`
	`133`	`+ time.sleep(0.1)`
	`134`	`+`
	`135`	`+if __name__ == "__main__":`
	`136`	`+ print(pyfiglet.figlet_format("Book Scraper"))`
	`137`	`+ print("---------------------------------------------------------------")`
	`138`	`+ print("---------------------------------------------------------------")`
	`139`	`+ while(True):`
	`140`	`+ print("\nEnter your Choice: \n1 - Search Book\n2 - Exit")`
	`141`	`+ entry = int(input())`
	`142`	`+ if(entry == 1):`
	`143`	`+ print("Enter name of book : ")`
	`144`	`+ book_name = input()`
	`145`	`+ # loading`
	`146`	`+ done = False`
	`147`	`+ #here is the animation`
	`148`	`+ t = threading.Thread(target=animate)`
	`149`	`+ t.start()`
	`150`	`+ books = book_get(book_name, 25, 5)`
	`151`	`+ done = True`
	`152`	`+ try:`
	`153`	`+ for i in books :`
	`154`	`+ print(f"\n\nName : {i[0]}\nAuthor : {i[1]}\nSize : {i[2]}\nFormat : {i[3]}\nLink : {i[4]}\nImage : {i[5]}\n")`
	`155`	`+ except:`
	`156`	`+ if(book_get == "Error: no results found"):`
	`157`	`+ print("Book not Found/n")`
	`158`	`+ elif(book_get == "Error: Title Too Short"):`
	`159`	`+ print("Title too short/n")`
	`160`	`+ elif(entry == 2):`
	`161`	`+ print(pyfiglet.figlet_format("Thank You for Using"))`
	`162`	`+ print("---------------------------------------------------------------")`
	`163`	`+ print("---------------------------------------------------------------")`
	`164`	`+ break`

`‎Book_Scraper/requirements.txt`

Lines changed: 4 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,4 @@`
	`1`	`+beautifulsoup4==4.11.1`
	`2`	`+emoji==1.6.3`
	`3`	`+pyfiglet==0.8.post1`
	`4`	`+Requests==2.31.0`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 4a217d0

File tree

5 files changed

5 files changed

`‎Book_Scraper/README.md`

`‎Book_Scraper/assests/image-1.png`

`‎Book_Scraper/assests/image.png`

`‎Book_Scraper/book.py`

`‎Book_Scraper/requirements.txt`

0 commit comments