Python 批量合并 pdf

"""
合并多个 pdf 到一个,使用方式:放到当前文件夹下运行。 pyPdf 库有点老了
"""
importos.path
frompyPdfimport PdfFileReader, PdfFileWriter # pip install pyPdf
defget_pdf_files(dst_dir):
 paths = []
 for root, dirs, files in os.walk(dst_dir):
 for filespath in files:
 if filespath.endswith('.pdf'): # pdf file
 abspath = os.path.join(root, filespath)
 paths.append(abspath)
 return paths
##########################合并同一个文件夹下所有PDF文件########################
defmerge_pdf(dst_dir, outfile, sort=True):
 output = PdfFileWriter()
 curpage = 0
 pdf_paths = sorted(get_pdf_files(dst_dir)) if sort else get_pdf_files(dst_dir)
 for each in pdf_paths:
 print(each)
 reader = PdfFileReader(file(each, "rb"))
 # 如果pdf文件已经加密,必须首先解密才能使用pyPdf
 if reader.isEncrypted == True:
 reader.decrypt("map")
 # 获得源pdf文件中页面总数
 page_count = reader.getNumPages()
 curpage += page_count
 print(page_count)
 for iPage in range(0, page_count):
 output.addPage(reader.getPage(iPage))
 print("All Pages Number:" + str(curpage))
 outputStream = file(dst_dir + outfile, "wb")
 output.write(outputStream)
 outputStream.close()
defmain():
 merged = "all.pdf"
 merge_pdf("./", merged)
if __name__ == '__main__':
 main()

推荐下边这个 pikepdf 库来批量操作 pdf,pyPdf 库挺久没有更新了

# -*- coding: utf-8 -*-
importos.path
frompikepdfimport Pdf, OutlineItem # pip install pikepdf
defget_pdf_files(dst_dir):
 paths = []
 for root, dirs, files in os.walk(dst_dir):
 for filespath in files:
 if filespath.endswith('.pdf'): # pdf file
 abspath = os.path.join(root, filespath)
 paths.append(abspath)
 return paths
defmerge_pdf(dst_dir, outfile, sort=True):
 # https://pikepdf.readthedocs.io/en/latest/topics/pages.html#merge-concatenate-pdf-from-several-pdfs
 pdf_paths = sorted(get_pdf_files(dst_dir)) if sort else get_pdf_files(dst_dir)
 pdf = Pdf.new()
 page_count = 0
 with pdf.open_outline() as outline:
 for path in pdf_paths:
 src = Pdf.open(path)
 print("merging:" + src.filename) # 打印一下进度
 oi = OutlineItem(os.path.basename(src.filename), page_count) # 增加目录
 outline.root.append(oi)
 page_count += len(src.pages)
 pdf.pages.extend(src.pages)
 pdf.save(outfile)
defmain():
 merged = "all.pdf"
 merge_pdf("./", merged)
if __name__ == '__main__':
 main()