|
3 | 3 | from selenium import webdriver
|
4 | 4 | from selenium.webdriver.common.keys import Keys
|
5 | 5 | import time
|
| 6 | +from fpdf import FPDF |
6 | 7 |
|
7 | 8 | # Get input for category and number of articles
|
8 | 9 | category = input("Enter category: ")
|
|
50 | 51 | article_content_div = article_content.find('div',class_='crayons-article__main')
|
51 | 52 | article_content_body = article_content_div.find('div',class_='crayons-article__body')
|
52 | 53 | p_tags = article_content_body.find_all('p')
|
53 | | - article_content="" |
54 | | - for p_tag in p_tags: |
55 | | - article_content += (p_tag.text.strip()+'\n') |
56 | 54 |
|
| 55 | + title_string = (title_content.text.strip()).encode('latin-1', 'replace').decode('latin-1') |
| 56 | + author_string = ("By - {}".format(author_name.text.strip())).encode('latin-1', 'replace').decode('latin-1') |
| 57 | + |
| 58 | + # Add a page |
| 59 | + pdf = FPDF() |
| 60 | + pdf.add_page() |
| 61 | + # set style and size of font |
| 62 | + pdf.set_font("Arial", size = 12) |
| 63 | + |
| 64 | + # Title cell |
| 65 | + pdf.cell(200, 5, txt = title_string,ln = 1, align = 'C') |
| 66 | + # Author cell |
| 67 | + pdf.cell(200, 10, txt = author_string,ln = 2, align = 'C') |
57 | 68 |
|
58 | | - print("Title : " + title_content.text.strip()) |
59 | | - print("Author : "+ author_name.text.strip()) |
60 | | - print("Body : "+ article_content) |
| 69 | + for p_tag in p_tags: |
| 70 | + article_part = (p_tag.text.strip()).encode('latin-1', 'replace').decode('latin-1') |
| 71 | + # Add part of article to pdf |
| 72 | + pdf.multi_cell(0, 5, txt = article_part, align = 'L') |
| 73 | + |
| 74 | + # save the pdf with name .pdf |
| 75 | + pdf_title = ''.join(e for e in title_string if e.isalnum()) |
| 76 | + pdf.output("{}.pdf".format(pdf_title)) |
61 | 77 |
|
62 | 78 | count = count + 1
|
63 | 79 | if(count == number_articles) :
|
|
0 commit comments