1
+ import PyPDF2
2
+ import fitz # PyMuPDF
3
+ from docx import Document
4
+ from PIL import Image
5
+ from docx .shared import Pt
6
+ from docx .enum .text import WD_PARAGRAPH_ALIGNMENT
7
+
8
+ def pdf_to_image (pdf_path , image_path ):
9
+ pdf_document = fitz .open (pdf_path )
10
+ for page_number in range (len (pdf_document )):
11
+ page = pdf_document [page_number ]
12
+ image = page .get_pixmap ()
13
+ image .save (f"{ image_path } page{ page_number + 1 } .png" )
14
+
15
+ def pdf_to_text (pdf_path , text_path ):
16
+ with open (pdf_path , 'rb' ) as file :
17
+ reader = PyPDF2 .PdfReader (file )
18
+ text = ''
19
+ for page_number in range (len (reader .pages )):
20
+ text += reader .pages [page_number ].extract_text ()
21
+
22
+ with open (text_path , 'w' , encoding = 'utf-8' ) as text_file :
23
+ text_file .write (text )
24
+
25
+ def text_to_document (text_path , doc_path ):
26
+ document = Document ()
27
+ with open (text_path , 'r' , encoding = 'utf-8' ) as text_file :
28
+ for line in text_file :
29
+ paragraph = document .add_paragraph (line .strip ())
30
+ paragraph .alignment = WD_PARAGRAPH_ALIGNMENT .LEFT
31
+ run = paragraph .runs [0 ]
32
+ run .font .size = Pt (12 ) # Set font size to 12pt (adjust as needed)
33
+ # You can add more formatting options here
34
+
35
+ document .save (doc_path )
36
+
37
+ # Example usage
38
+ pdf_file = r"C:\Users\Acer\Downloads\Resume.pdf"
39
+ image_output_path = r"C:\Users\Acer\Downloads"
40
+ text_path = r"C:\Users\Acer\Downloads\textfile.txt"
41
+
42
+ doc_output_path = r"C:\Users\Acer\Downloads\document.docx"
43
+
44
+ pdf_to_image (pdf_file , image_output_path )
45
+ pdf_to_text (pdf_file , text_path )
46
+ text_to_document (text_path , doc_output_path )
0 commit comments