Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit ec99e1e

Browse files
Merge pull request #11 from P0Saurabh/master
summrazation pdf
2 parents 132434a + d24c35d commit ec99e1e

File tree

2 files changed

+118
-0
lines changed

2 files changed

+118
-0
lines changed

β€ŽPDF summrazation/main.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import tkinter as tk
2+
from tkinter import filedialog, scrolledtext, messagebox, simpledialog, font
3+
from tkinter import PhotoImage # Import PhotoImage for handling images
4+
import torch
5+
from transformers import T5Tokenizer, T5ForConditionalGeneration
6+
from pdfminer.high_level import extract_text
7+
import requests
8+
from io import BytesIO
9+
from fpdf import FPDF # Import FPDF for PDF generation
10+
11+
model = T5ForConditionalGeneration.from_pretrained('t5-small')
12+
tokenizer = T5Tokenizer.from_pretrained('t5-small')
13+
device = torch.device('cpu')
14+
15+
def pdf_to_text(pdf_path):
16+
try:
17+
text = extract_text(pdf_path)
18+
return text
19+
except Exception as e:
20+
print(f"Error extracting text from PDF: {e}")
21+
return None
22+
23+
def summarize_text(text):
24+
preprocess_text = text.strip().replace("\n", " ")
25+
t5_prepared_text = "summarize: " + preprocess_text
26+
tokenized_text = tokenizer.encode(t5_prepared_text, return_tensors="pt").to(device)
27+
summary_ids = model.generate(tokenized_text,
28+
num_beams=4,
29+
no_repeat_ngram_size=2,
30+
min_length=30,
31+
max_length=100,
32+
early_stopping=True)
33+
output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
34+
return output
35+
36+
def download_pdf(url):
37+
try:
38+
response = requests.get(url)
39+
response.raise_for_status()
40+
return BytesIO(response.content)
41+
except requests.RequestException as e:
42+
messagebox.showerror("Download Error", f"Failed to download PDF: {e}")
43+
return None
44+
45+
def select_files():
46+
file_paths = filedialog.askopenfilenames(filetypes=[("PDF files", "*.pdf")])
47+
if file_paths:
48+
summaries = [summarize_text(pdf_to_text(pdf_file)) for pdf_file in file_paths if
49+
pdf_to_text(pdf_file) is not None]
50+
output_text.delete('1.0', tk.END)
51+
output_text.insert(tk.END, "\n\n".join(summaries))
52+
messagebox.showinfo("Completion", "Summarization completed!")
53+
else:
54+
messagebox.showinfo("No Files", "No PDF files were selected.")
55+
56+
def fetch_from_url():
57+
url = simpledialog.askstring("Enter URL", "Please enter the PDF URL:")
58+
if url:
59+
pdf_file = download_pdf(url)
60+
if pdf_file:
61+
summary = summarize_text(pdf_to_text(pdf_file))
62+
output_text.delete('1.0', tk.END)
63+
output_text.insert(tk.END, summary)
64+
messagebox.showinfo("Completion", "Summarization completed!")
65+
66+
def save_summary_as_pdf():
67+
text_to_save = output_text.get('1.0', tk.END)
68+
if not text_to_save.strip():
69+
messagebox.showerror("No Data", "There is no summary to save.")
70+
return
71+
72+
file_path = filedialog.asksaveasfilename(defaultextension=".pdf", filetypes=[("PDF files", "*.pdf")])
73+
if file_path:
74+
pdf = FPDF()
75+
pdf.add_page()
76+
77+
# Calculate center position for logo and title
78+
logo_x = (pdf.w - 30) / 2 # Centering logo horizontally
79+
pdf.image("p.png", x=logo_x, y=10, w=30)
80+
pdf.ln(28)
81+
# Add header title centered
82+
pdf.set_font("Arial", size=12)
83+
pdf.cell(0, 10, "MGM's College Of Engineering", ln=True, align='C')
84+
85+
# Add line separator
86+
pdf.set_line_width(0.5)
87+
pdf.line(10, 50, pdf.w - 10, 50)
88+
89+
# Add summary text
90+
pdf.set_font("Arial", size=10)
91+
pdf.multi_cell(0, 10, text_to_save)
92+
93+
pdf.output(file_path)
94+
95+
messagebox.showinfo("Saved as PDF", "The summary has been saved as PDF!")
96+
97+
app = tk.Tk()
98+
app.title("PDF Summarizer")
99+
app.geometry("800x600")
100+
app.config(bg="light blue")
101+
customFont = font.Font(family="Helvetica", size=12)
102+
103+
btn_load = tk.Button(app, text="Load PDFs", command=select_files, font=customFont, bg="navy", fg="white", padx=10,
104+
pady=5)
105+
btn_load.pack(pady=10)
106+
107+
btn_fetch = tk.Button(app, text="Fetch PDF from URL", command=fetch_from_url, font=customFont, bg="navy", fg="white",
108+
padx=10, pady=5)
109+
btn_fetch.pack(pady=10)
110+
111+
btn_save_pdf = tk.Button(app, text="Save Summary as PDF", command=save_summary_as_pdf, font=customFont, bg="navy",
112+
fg="white", padx=10, pady=5)
113+
btn_save_pdf.pack(pady=10)
114+
115+
output_text = scrolledtext.ScrolledText(app, width=70, height=20, font=customFont, padx=10, pady=10, wrap=tk.WORD)
116+
output_text.pack(pady=20)
117+
118+
app.mainloop()

β€ŽPDF summrazation/p.png

38.1 KB

0 commit comments

Comments
(0)

AltStyle γ«γ‚ˆγ£γ¦ε€‰ζ›γ•γ‚ŒγŸγƒšγƒΌγ‚Έ (->γ‚ͺγƒͺγ‚ΈγƒŠγƒ«) /