|
| 1 | +from pdfrw import PdfReader, PdfWriter |
| 2 | +from pathlib import Path |
| 3 | +import os |
| 4 | +import ntpath |
| 5 | + |
| 6 | +#This is a simple function to get only the file name as a string from absolute path of the file. |
| 7 | +def path_leaf(path): |
| 8 | + head, tail = ntpath.split(path) |
| 9 | + return tail or ntpath.basename(head) |
| 10 | + |
| 11 | +def input_and_parse(n): |
| 12 | + """ |
| 13 | + The function input_and_parse gathers the inputs and parses and then sorts them in |
| 14 | + the order required by reorder(path,dic) function |
| 15 | + Parameters: |
| 16 | + n (int):No. of pages in the PDF file. |
| 17 | + Returns: |
| 18 | + dic :A parsed dictionary. |
| 19 | + """ |
| 20 | + |
| 21 | + print("enter the current page and the page you want it to be on seperate values by a comma ',' \n") |
| 22 | + # store the input in a list and then convert the input string into |
| 23 | + # using map function to convert the data in lists into int values |
| 24 | + lst = list(map(lambda x: [int(x[0]), int(x[1])], |
| 25 | + [input().split(',') for _ in range(n)])) |
| 26 | + # Swapping the position of the lst values to better parse it in dictionary |
| 27 | + lst = [[x[1], x[0]] for x in lst] |
| 28 | + lst.sort(key=lambda x: x[0]) |
| 29 | + dic = {curr: new for curr, new in lst} |
| 30 | + # now I have sorted the dic to the required needs of reorder function |
| 31 | + return dic |
| 32 | + |
| 33 | + |
| 34 | +def re_arrange(file_path,output_file_name,dic): |
| 35 | + """ |
| 36 | + The function reorder takes two arguments path and dic |
| 37 | + path is the path of the source pdf file which is in wrong |
| 38 | + order and then creates a modified pdf file with pages in the right order. |
| 39 | + Parameters: |
| 40 | + path : Path of the pdf file to be modified |
| 41 | + dic : A dictionary with key value pairs of pages. |
| 42 | + Returns: |
| 43 | + None |
| 44 | + """ |
| 45 | + file_path=Path(file_path) |
| 46 | + # create a pdf object using PdfReader that could be read |
| 47 | + pdf_obj = PdfReader(file_path) |
| 48 | + # pdf_obj.pages attribute gives the length of the pages in pdf |
| 49 | + total_pages = len(pdf_obj.pages) |
| 50 | + print("Total Pages in PDF are:", total_pages) |
| 51 | + # Initialising the writer object using the PdfWriter class,from this we would create a new modified Pdf |
| 52 | + writer = PdfWriter() |
| 53 | + |
| 54 | + # new and old here mean the new position of the "old" page location |
| 55 | + for new, old in dic.items(): |
| 56 | + # indexing pages list |
| 57 | + writer.addpage(pdf_obj.pages[old-1]) |
| 58 | + print(f"page{new} added from {old}") |
| 59 | + |
| 60 | + # accesing the name of the file without .pdf to save it with a new one |
| 61 | + writer.write(Path(os.path.dirname(file_path)+"\\"+output_file_name)) |
| 62 | + |
| 63 | + |
| 64 | +if __name__ == "__main__": |
| 65 | + file_path = input("Enter the path of the pdf file:") |
| 66 | + print("\n") |
| 67 | + output_file_name = path_leaf(file_path)[:-4]+"_modified.pdf" |
| 68 | + dic = input_and_parse(len(PdfReader(file_path).pages)) |
| 69 | + re_arrange(file_path,output_file_name,dic) |
| 70 | + print("New modified pdf file created succesfully!") |
| 71 | + |
| 72 | +""" |
| 73 | +I have added a wrong.pdf file which has pages in wrong order for testing purposes. |
| 74 | + |
| 75 | +pairs of pages with right,wrong format you can use this in pdf_reorder.py script |
| 76 | +1,5 |
| 77 | +2,2 |
| 78 | +3,3 |
| 79 | +4,1 |
| 80 | +5,4 |
| 81 | +""" |
0 commit comments