Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 4a217d0

Browse files
Merge pull request avinashkranjan#2076 from Swapnil-Singh-99/book_scraper
added a book scraper script
2 parents ec21bc1 + 89abbd3 commit 4a217d0

File tree

5 files changed

+181
-0
lines changed

5 files changed

+181
-0
lines changed

‎Book_Scraper/README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Book Scraper
2+
It is a Book Scraper Python Script whhich allow user to download books using console.
3+
Ex: when a uer provides a name of a book
4+
The script returns book name, size of book, author, extension type, language of book, book cover image, direct download.
5+
6+
# Installation & Run
7+
`pip install -r requirements.txt`
8+
9+
`python book.py`
10+
11+
# Screenshots
12+
![Alt text](assests/image.png)
13+
![Alt text](assests/image-1.png)

‎Book_Scraper/assests/image-1.png

12.5 KB
Loading[フレーム]

‎Book_Scraper/assests/image.png

68.4 KB
Loading[フレーム]

‎Book_Scraper/book.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
# for scraping books
2+
from bs4 import BeautifulSoup as bs
3+
import requests
4+
# to identify emoji unicode characters
5+
import emoji
6+
import pyfiglet
7+
import itertools
8+
import threading
9+
import time
10+
import sys
11+
12+
13+
14+
def is_emoji(text):
15+
"""This function returns True if there is an emoji in the given string else False"""
16+
return bool(emoji.get_emoji_regexp().search(text))
17+
18+
def link_to_get(link):
19+
"""This function will get the url of the image & book download direct link using the given link for book download"""
20+
response = requests.get(link)
21+
th_html = bs(response.text , "html.parser")
22+
td_all = th_html.find_all("td" ,id ="info")
23+
td_all = td_all[0]
24+
td_a = td_all.find_all("a")
25+
link_href = td_a[1].get("href")
26+
img_link_td = td_all.find("img" ,alt="cover")
27+
img_link_src = img_link_td.get("src")
28+
img_link = f"http://library.lol{img_link_src}"
29+
return [link_href, img_link]
30+
31+
def book_get(name, mainres=100, results=5):
32+
"""This function returns the list of books for the given name
33+
34+
You can give in name :
35+
1. title of book
36+
2. isbn of book
37+
3. author of book
38+
4. publisher of book
39+
40+
mainres :
41+
1. 25
42+
2. 50
43+
3. 100
44+
45+
Results:
46+
[ 0.Book Name,
47+
1.Author,
48+
2.Size,
49+
3.Book Type,
50+
4.Book Link,
51+
5.Book Image Link
52+
6.Language]"""
53+
Books = []
54+
if is_emoji(name) == True:
55+
return "Error: emoji"
56+
if name == "":
57+
return "Error: enter name"
58+
name = name.replace(" ", "+")
59+
# getting request and response
60+
url = f"http://libgen.is/search.php?req={name}&lg_topic=libgen&open=0&view=simple&res={mainres}&phrase=1&column=def"
61+
# print(url)
62+
response = requests.get(url)
63+
bs_html = bs(response.text , "html.parser")
64+
65+
if "Search string must contain minimum 3 characters.." in bs_html.body:
66+
return "Error: Title Too Short"
67+
68+
# scraping the site for response
69+
table = bs_html.find_all("table")
70+
table = table[2]
71+
table_rows = table.find_all("tr")
72+
a = len(table_rows)
73+
table_rows.pop(0)
74+
# print(url, "\n\n")
75+
if a > 1 :
76+
counter = 1
77+
for i in table_rows :
78+
if counter <= results:
79+
# make book list
80+
book_lst = []
81+
# getting all table datas
82+
table_datas = i.find_all("td")
83+
# book name
84+
book_name = table_datas[2].get_text()
85+
# author name
86+
author = table_datas[1].get_text()
87+
# getting link to book
88+
link_row = table_datas[9]
89+
a = link_row.find("a" , href = True)
90+
link = a.get("href")
91+
# getting image url & direct book download link
92+
link_all = link_to_get(link)
93+
# getting language
94+
language_row = table_datas[6]
95+
language = language_row.get_text()
96+
# getting size of book
97+
size_row = table_datas[7]
98+
size = size_row.get_text()
99+
# getting type of book
100+
type_row = table_datas[8]
101+
type_ofit = type_row.get_text()
102+
# this will only take pdfs in English Language
103+
if (type_ofit != "pdf" and type_ofit != "epub") or language != "English":
104+
continue
105+
book_lst.append(book_name)
106+
book_lst.append(author)
107+
book_lst.append(size)
108+
book_lst.append(type_ofit)
109+
book_lst.append(link_all[0])
110+
book_lst.append(link_all[1])
111+
book_lst.append(language)
112+
Books.append(book_lst)
113+
# print(f"\n\n\n{book_lst}\n\n\n")
114+
counter+=1
115+
if len(Books) >=1 :
116+
return Books
117+
else :
118+
return "Error: no results found"
119+
else:
120+
return "Error: no results found"
121+
122+
# a = book_get("Harry Potter",25,5)
123+
# print(a)
124+
# for i in a :
125+
# print(f"\n\nName : {i[0]}\nAuthor : {i[1]}\nSize : {i[2]}\nFormat : {i[3]}\nLink : {i[4]}\nImage : {i[5]}\n\n")
126+
127+
def animate():
128+
for c in itertools.cycle(['|', '/', '-', '\\']):
129+
if done:
130+
break
131+
sys.stdout.write('\r...Searching Book ' + c)
132+
sys.stdout.flush()
133+
time.sleep(0.1)
134+
135+
if __name__ == "__main__":
136+
print(pyfiglet.figlet_format("Book Scraper"))
137+
print("---------------------------------------------------------------")
138+
print("---------------------------------------------------------------")
139+
while(True):
140+
print("\nEnter your Choice: \n1 - Search Book\n2 - Exit")
141+
entry = int(input())
142+
if(entry == 1):
143+
print("Enter name of book : ")
144+
book_name = input()
145+
# loading
146+
done = False
147+
#here is the animation
148+
t = threading.Thread(target=animate)
149+
t.start()
150+
books = book_get(book_name, 25, 5)
151+
done = True
152+
try:
153+
for i in books :
154+
print(f"\n\nName : {i[0]}\nAuthor : {i[1]}\nSize : {i[2]}\nFormat : {i[3]}\nLink : {i[4]}\nImage : {i[5]}\n")
155+
except:
156+
if(book_get == "Error: no results found"):
157+
print("Book not Found/n")
158+
elif(book_get == "Error: Title Too Short"):
159+
print("Title too short/n")
160+
elif(entry == 2):
161+
print(pyfiglet.figlet_format("Thank You for Using"))
162+
print("---------------------------------------------------------------")
163+
print("---------------------------------------------------------------")
164+
break

‎Book_Scraper/requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
beautifulsoup4==4.11.1
2+
emoji==1.6.3
3+
pyfiglet==0.8.post1
4+
Requests==2.31.0

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /