@@ -9,12 +9,12 @@ def convert_pdf_to_text(pdf_path, text_path):
9
9
:param text_path: Path to save the output text file
10
10
"""
11
11
try :
12
- doc = fitz .open (pdf_path )
12
+ doc = fitz .open (pdf_path )# Open the PDF document
13
13
text = ""
14
14
for page in doc :
15
- text += page .get_text ("text" )
15
+ text += page .get_text ("text" )# Extract text from each page
16
16
with open (text_path , "w" , encoding = "utf-8" ) as txt_file :
17
- txt_file .write (text )
17
+ txt_file .write (text )# Save extracted text to a text file
18
18
print (f"Text extracted and saved to { text_path } " )
19
19
except Exception as e :
20
20
print (f"Error converting PDF to text: { e } " )
@@ -26,13 +26,13 @@ def convert_pdf_to_images(pdf_path, images_folder):
26
26
:param images_folder: Folder to save the output images
27
27
"""
28
28
try :
29
- doc = fitz .open (pdf_path )
30
- os .makedirs (images_folder , exist_ok = True )
29
+ doc = fitz .open (pdf_path )# Open the PDF document
30
+ os .makedirs (images_folder , exist_ok = True )# Create the output folder if it doesn't exist
31
31
for page_num , page in enumerate (doc ):
32
32
image_path = os .path .join (images_folder , f"page_{ page_num + 1 } .png" )
33
33
pix = page .get_pixmap (matrix = fitz .Matrix (2 , 2 )) # Increase resolution for better quality
34
34
img = Image .frombytes ("RGB" , (pix .width , pix .height ), pix .samples )
35
- img .save (image_path , format = "PNG" )
35
+ img .save (image_path , format = "PNG" )# Save each page as a PNG image
36
36
print (f"Images saved to { images_folder } " )
37
37
except Exception as e :
38
38
print (f"Error converting PDF to images: { e } " )
0 commit comments