1
+ from skimage import io
2
+ from PyPDF2 import PdfFileReader
3
+ from pdf2image import convert_from_path
4
+ import numpy as np
5
+ import os
6
+ from PIL import Image
7
+ from fpdf import FPDF
8
+ import shutil
9
+
10
+ pdfFile = input ('PDF file location: ' )
11
+ dirname = os .path .dirname (os .path .normpath (pdfFile ))
12
+ outputFile = os .path .basename (pdfFile )
13
+ outputFile = os .path .splitext (outputFile )[0 ]
14
+ pdf_reader = PdfFileReader (pdfFile )
15
+ pages = pdf_reader .getNumPages ()
16
+ rang = int (pages ) + 1
17
+
18
+ # Select the pixel from the extracted images of pdf pages
19
+ def select_pixel (r ,g ,b ):
20
+ if r > 120 and r < 254 and g > 120 and g < 254 and b > 120 and b < 254 :
21
+ return True
22
+ else :
23
+ return False
24
+
25
+ # Handling of images for removing the watermark
26
+ def handle (imgs ):
27
+ for i in range (imgs .shape [0 ]):
28
+ for j in range (imgs .shape [1 ]):
29
+ if select_pixel (imgs [i ][j ][0 ],imgs [i ][j ][1 ],imgs [i ][j ][2 ]):
30
+ imgs [i ][j ][0 ] = imgs [i ][j ][1 ] = imgs [i ][j ][2 ] = 255
31
+ return imgs
32
+
33
+ images = convert_from_path (pdfFile )
34
+
35
+ try :
36
+ os .mkdir (dirname + '\img' )
37
+ except FileExistsError :
38
+ print ('Folder exist' )
39
+ index = 0
40
+ for img in images :
41
+ index += 1
42
+ img = np .array (img )
43
+ print (img .shape )
44
+ img = handle (img )
45
+ io .imsave (dirname + '\img\img' + str (index ) + '.jpg' , img )
46
+ print (index )
47
+
48
+ # Merging images to a sigle PDF
49
+ pdf = FPDF ()
50
+ sdir = dirname + "img/"
51
+ w ,h = 0 ,0
52
+
53
+ for i in range (1 , rang ):
54
+ fname = sdir + "img%.0d.jpg" % i
55
+ if os .path .exists (fname ):
56
+ if i == 1 :
57
+ cover = Image .open (fname )
58
+ w ,h = cover .size
59
+ pdf = FPDF (unit = "pt" , format = [w ,h ])
60
+ image = fname
61
+ pdf .add_page ()
62
+ pdf .image (image , 0 , 0 , w , h )
63
+ else :
64
+ print ("File not found:" , fname )
65
+ # print("processed %d" % i)
66
+ pdf .output (dirname + outputFile + '_rw.pdf' , "F" )
67
+ print ("done" )
0 commit comments