Commit fdbd1d2

committed

Ready to push

PDF-watermark-remover.py - original script sample.pdf - sample file which have watermark sample_rw.pdf - output file of scritp

1 parent b6c6188 commit fdbd1d2Copy full SHA for fdbd1d2

File tree

17 files changed

+107

-0

lines changed

PDF Watermark Remover

17 files changed

+107

-0

lines changed

`‎PDF Watermark Remover/PDF-Watermark-Remover.py`

Lines changed: 67 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,67 @@`
	`1`	`+from skimage import io`
	`2`	`+from PyPDF2 import PdfFileReader`
	`3`	`+from pdf2image import convert_from_path`
	`4`	`+import numpy as np`
	`5`	`+import os`
	`6`	`+from PIL import Image`
	`7`	`+from fpdf import FPDF`
	`8`	`+import shutil`
	`9`	`+`
	`10`	`+pdfFile = input('PDF file location: ')`
	`11`	`+dirname = os.path.dirname(os.path.normpath(pdfFile))`
	`12`	`+outputFile = os.path.basename(pdfFile)`
	`13`	`+outputFile = os.path.splitext(outputFile)[0]`
	`14`	`+pdf_reader = PdfFileReader(pdfFile)`
	`15`	`+pages = pdf_reader.getNumPages()`
	`16`	`+rang = int(pages) + 1`
	`17`	`+`
	`18`	`+# Select the pixel from the extracted images of pdf pages`
	`19`	`+def select_pixel(r,g,b):`
	`20`	`+ if r > 120 and r < 254 and g > 120 and g < 254 and b > 120 and b < 254:`
	`21`	`+ return True`
	`22`	`+ else:`
	`23`	`+ return False`
	`24`	`+`
	`25`	`+# Handling of images for removing the watermark`
	`26`	`+def handle(imgs):`
	`27`	`+ for i in range(imgs.shape[0]):`
	`28`	`+ for j in range(imgs.shape[1]):`
	`29`	`+ if select_pixel(imgs[i][j][0],imgs[i][j][1],imgs[i][j][2]):`
	`30`	`+ imgs[i][j][0] = imgs[i][j][1] = imgs[i][j][2] = 255`
	`31`	`+ return imgs`
	`32`	`+`
	`33`	`+images = convert_from_path(pdfFile)`
	`34`	`+`
	`35`	`+try:`
	`36`	`+ os.mkdir(dirname + '\img')`
	`37`	`+except FileExistsError:`
	`38`	`+ print('Folder exist')`
	`39`	`+index = 0`
	`40`	`+for img in images:`
	`41`	`+ index += 1`
	`42`	`+ img = np.array(img)`
	`43`	`+ print(img.shape)`
	`44`	`+ img = handle(img)`
	`45`	`+ io.imsave(dirname + '\img\img' + str(index) + '.jpg', img)`
	`46`	`+ print(index)`
	`47`	`+`
	`48`	`+# Merging images to a sigle PDF`
	`49`	`+pdf = FPDF()`
	`50`	`+sdir = dirname + "img/"`
	`51`	`+w,h = 0,0`
	`52`	`+`
	`53`	`+for i in range(1, rang):`
	`54`	`+ fname = sdir + "img%.0d.jpg" % i`
	`55`	`+ if os.path.exists(fname):`
	`56`	`+ if i == 1:`
	`57`	`+ cover = Image.open(fname)`
	`58`	`+ w,h = cover.size`
	`59`	`+ pdf = FPDF(unit = "pt", format = [w,h])`
	`60`	`+ image = fname`
	`61`	`+ pdf.add_page()`
	`62`	`+ pdf.image(image, 0, 0, w, h)`
	`63`	`+ else:`
	`64`	`+ print("File not found:", fname)`
	`65`	`+ # print("processed %d" % i)`
	`66`	`+pdf.output(dirname + outputFile + '_rw.pdf', "F")`
	`67`	`+print("done")`