Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit fdbd1d2

Browse files
Ready to push
PDF-watermark-remover.py - original script sample.pdf - sample file which have watermark sample_rw.pdf - output file of scritp
1 parent b6c6188 commit fdbd1d2

File tree

17 files changed

+107
-0
lines changed

17 files changed

+107
-0
lines changed
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
from skimage import io
2+
from PyPDF2 import PdfFileReader
3+
from pdf2image import convert_from_path
4+
import numpy as np
5+
import os
6+
from PIL import Image
7+
from fpdf import FPDF
8+
import shutil
9+
10+
pdfFile = input('PDF file location: ')
11+
dirname = os.path.dirname(os.path.normpath(pdfFile))
12+
outputFile = os.path.basename(pdfFile)
13+
outputFile = os.path.splitext(outputFile)[0]
14+
pdf_reader = PdfFileReader(pdfFile)
15+
pages = pdf_reader.getNumPages()
16+
rang = int(pages) + 1
17+
18+
# Select the pixel from the extracted images of pdf pages
19+
def select_pixel(r,g,b):
20+
if r > 120 and r < 254 and g > 120 and g < 254 and b > 120 and b < 254:
21+
return True
22+
else:
23+
return False
24+
25+
# Handling of images for removing the watermark
26+
def handle(imgs):
27+
for i in range(imgs.shape[0]):
28+
for j in range(imgs.shape[1]):
29+
if select_pixel(imgs[i][j][0],imgs[i][j][1],imgs[i][j][2]):
30+
imgs[i][j][0] = imgs[i][j][1] = imgs[i][j][2] = 255
31+
return imgs
32+
33+
images = convert_from_path(pdfFile)
34+
35+
try:
36+
os.mkdir(dirname + '\img')
37+
except FileExistsError:
38+
print('Folder exist')
39+
index = 0
40+
for img in images:
41+
index += 1
42+
img = np.array(img)
43+
print(img.shape)
44+
img = handle(img)
45+
io.imsave(dirname + '\img\img' + str(index) + '.jpg', img)
46+
print(index)
47+
48+
# Merging images to a sigle PDF
49+
pdf = FPDF()
50+
sdir = dirname + "img/"
51+
w,h = 0,0
52+
53+
for i in range(1, rang):
54+
fname = sdir + "img%.0d.jpg" % i
55+
if os.path.exists(fname):
56+
if i == 1:
57+
cover = Image.open(fname)
58+
w,h = cover.size
59+
pdf = FPDF(unit = "pt", format = [w,h])
60+
image = fname
61+
pdf.add_page()
62+
pdf.image(image, 0, 0, w, h)
63+
else:
64+
print("File not found:", fname)
65+
# print("processed %d" % i)
66+
pdf.output(dirname + outputFile + '_rw.pdf', "F")
67+
print("done")

‎PDF Watermark Remover/Results/img1.jpg

480 KB
Loading[フレーム]

‎PDF Watermark Remover/Results/img2.jpg

480 KB
Loading[フレーム]

‎PDF Watermark Remover/Results/img3.jpg

479 KB
Loading[フレーム]

‎PDF Watermark Remover/Results/img4.jpg

480 KB
Loading[フレーム]

‎PDF Watermark Remover/Results/img5.jpg

480 KB
Loading[フレーム]

‎PDF Watermark Remover/Results/img6.jpg

480 KB
Loading[フレーム]
220 KB
Loading[フレーム]
220 KB
Loading[フレーム]
219 KB
Loading[フレーム]

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /