Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 78ecb22

Browse files
creation and additon of pdf content functionality added
1 parent 79306d6 commit 78ecb22

File tree

1 file changed

+22
-6
lines changed

1 file changed

+22
-6
lines changed

‎Dev.to Scraper/scraper.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from selenium import webdriver
44
from selenium.webdriver.common.keys import Keys
55
import time
6+
from fpdf import FPDF
67

78
# Get input for category and number of articles
89
category = input("Enter category: ")
@@ -50,14 +51,29 @@
5051
article_content_div = article_content.find('div',class_='crayons-article__main')
5152
article_content_body = article_content_div.find('div',class_='crayons-article__body')
5253
p_tags = article_content_body.find_all('p')
53-
article_content=""
54-
for p_tag in p_tags:
55-
article_content += (p_tag.text.strip()+'\n')
5654

55+
title_string = (title_content.text.strip()).encode('latin-1', 'replace').decode('latin-1')
56+
author_string = ("By - {}".format(author_name.text.strip())).encode('latin-1', 'replace').decode('latin-1')
57+
58+
# Add a page
59+
pdf = FPDF()
60+
pdf.add_page()
61+
# set style and size of font
62+
pdf.set_font("Arial", size = 12)
63+
64+
# Title cell
65+
pdf.cell(200, 5, txt = title_string,ln = 1, align = 'C')
66+
# Author cell
67+
pdf.cell(200, 10, txt = author_string,ln = 2, align = 'C')
5768

58-
print("Title : " + title_content.text.strip())
59-
print("Author : "+ author_name.text.strip())
60-
print("Body : "+ article_content)
69+
for p_tag in p_tags:
70+
article_part = (p_tag.text.strip()).encode('latin-1', 'replace').decode('latin-1')
71+
# Add part of article to pdf
72+
pdf.multi_cell(0, 5, txt = article_part, align = 'L')
73+
74+
# save the pdf with name .pdf
75+
pdf_title = ''.join(e for e in title_string if e.isalnum())
76+
pdf.output("{}.pdf".format(pdf_title))
6177

6278
count = count + 1
6379
if(count == number_articles) :

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /