Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings
This repository was archived by the owner on Jun 29, 2024. It is now read-only.

Commit 5231d31

Browse files
pdf_converter.py
1 parent 0287cb2 commit 5231d31

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

‎pdf_converter.py‎

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@ def convert_pdf_to_text(pdf_path, text_path):
99
:param text_path: Path to save the output text file
1010
"""
1111
try:
12-
doc = fitz.open(pdf_path)
12+
doc = fitz.open(pdf_path)# Open the PDF document
1313
text = ""
1414
for page in doc:
15-
text += page.get_text("text")
15+
text += page.get_text("text")# Extract text from each page
1616
with open(text_path, "w", encoding="utf-8") as txt_file:
17-
txt_file.write(text)
17+
txt_file.write(text)# Save extracted text to a text file
1818
print(f"Text extracted and saved to {text_path}")
1919
except Exception as e:
2020
print(f"Error converting PDF to text: {e}")
@@ -26,13 +26,13 @@ def convert_pdf_to_images(pdf_path, images_folder):
2626
:param images_folder: Folder to save the output images
2727
"""
2828
try:
29-
doc = fitz.open(pdf_path)
30-
os.makedirs(images_folder, exist_ok=True)
29+
doc = fitz.open(pdf_path)# Open the PDF document
30+
os.makedirs(images_folder, exist_ok=True)# Create the output folder if it doesn't exist
3131
for page_num, page in enumerate(doc):
3232
image_path = os.path.join(images_folder, f"page_{page_num + 1}.png")
3333
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # Increase resolution for better quality
3434
img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
35-
img.save(image_path, format="PNG")
35+
img.save(image_path, format="PNG")# Save each page as a PNG image
3636
print(f"Images saved to {images_folder}")
3737
except Exception as e:
3838
print(f"Error converting PDF to images: {e}")

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /