Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit a633552

Browse files
Merge pull request avinashkranjan#142 from Siddhant-K-code/master
PDF To CSV Converter
2 parents 0e20c53 + 99bba10 commit a633552

File tree

4 files changed

+104
-0
lines changed

4 files changed

+104
-0
lines changed

‎PDF To CSV Converter/main.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
print("[-+-] starting pdf_csv.py...")
2+
print("[-+-] import a pdf and convert it to a csv")
3+
# -----------------------------------------------------------------------------
4+
print("[-+-] importing required packages for pdf_csv.py...")
5+
import os
6+
import tabula # simple wrapper for tabula-java, read tables from PDF into csv
7+
#from modules.defaults import df # local module
8+
print("[-+-] pdf_csv.py packages imported! \n")
9+
#-----------------------------------------------------------------------------
10+
11+
# -----------------------------------------------------------------------------
12+
def pdf_csv(): # convert pdf to csv
13+
print("[-+-] default filenames:")
14+
filename = "sample1"
15+
pdf = filename + ".pdf"
16+
csv = filename + ".csv"
17+
print (pdf)
18+
print (csv + "\n")
19+
20+
print("[-+-] default directory:")
21+
print("[-+-] (based on current working directory of python file)")
22+
defaultdir = os.getcwd()
23+
print (defaultdir + "\n")
24+
25+
print("[-+-] default file paths:")
26+
pdf_path = os.path.join(defaultdir, pdf)
27+
csv_path = os.path.join(defaultdir, csv)
28+
print (pdf_path)
29+
print (csv_path + "\n")
30+
31+
print("[-+-] looking for default pdf...")
32+
if os.path.exists(pdf_path) == True: # check if the default pdf exists
33+
print("[-+-] pdf found: " + pdf + "\n")
34+
pdf_flag = True
35+
else:
36+
print("[-+-] looking for another pdf...")
37+
arr_pdf = [defaultdir for defaultdir in os.listdir() if defaultdir.endswith(".pdf")]
38+
if len(arr_pdf) == 1: # there has to be only 1 pdf in the directory
39+
print("[-+-] pdf found: " + arr_pdf[0] + "\n")
40+
pdf_path = os.path.join(defaultdir, arr_pdf[0])
41+
pdf_flag = True
42+
elif len(arr_pdf) > 1: # there are more than 1 pdf in the directory
43+
print("[-+-] more than 1 pdf found, exiting script!")
44+
pdf_flag = False
45+
# TODO add option to select from available pdfs
46+
else:
47+
print("[-+-] pdf cannot be found, exiting script!")
48+
pdf_flag = False
49+
50+
if pdf_flag == True:
51+
# check if csv exists at the default file path
52+
# if csv does not exist create a blank file at the default path
53+
try:
54+
print("[-+-] looking for default csv...")
55+
open(csv_path, "r")
56+
print("[-+-] csv found: " + csv + "\n")
57+
except IOError:
58+
print("[-+-] did not find csv at default file path!")
59+
print("[-+-] creating a blank csv file: " + csv + "... \n")
60+
open(csv_path, "w")
61+
62+
print("[-+-] converting pdf to csv...")
63+
# print("[-+-] pdf to csv conversion suppressed! \n")
64+
try:
65+
tabula.convert_into(pdf_path, csv_path, output_format="csv", pages="all")
66+
print ("[-+-] pdf to csv conversion complete!\n")
67+
except IOError:
68+
print("[-+-] pdf to csv conversion failed!")
69+
70+
print("[-+-] converted csv file can be found here: " + csv_path + "\n")
71+
72+
print("[-+-] finished pdf_csv.py successfully!")
73+
# -----------------------------------------------------------------------------
74+
75+
# -----------------------------------------------------------------------------
76+
pdf_csv() # run the program
77+
# -----------------------------------------------------------------------------
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
tabula-py

‎PDF To CSV Converter/sample1.csv

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
Date,Open,High,Low,Close / Last,Volume
2+
01/04/2017,62.48,62.75,62.12,62.3,"21,325,140"
3+
01/03/2017,62.79,62.84,62.125,62.58,"20,655,190"
4+
12/30/2016,62.96,62.99,62.03,62.14,"25,575,720"
5+
12/29/2016,62.86,63.2,62.73,62.9,"10,248,460"
6+
12/28/2016,63.4,63.4,62.83,62.99,"14,348,340"
7+
12/27/2016,63.21,64.07,63.21,63.28,"11,743,650"
8+
12/23/2016,63.45,63.54,62.8,63.24,"12,399,540"
9+
12/22/2016,63.84,64.1,63.405,63.55,"22,175,270"
10+
12/21/2016,63.43,63.7,63.12,63.54,"17,084,370"
11+
12/20/2016,63.69,63.8,63.025,63.54,"26,017,470"
12+
12/19/2016,62.56,63.77,62.42,63.62,"34,318,500"
13+
12/16/2016,62.95,62.95,62.115,62.3,"42,452,660"
14+
Date,Open,High,Low,Close / Last,Volume
15+
01/04/2017,117.55,119.66,117.29,118.69,"19,594,560"
16+
01/03/2017,116.03,117.84,115.51,116.86,"20,635,600"
17+
12/30/2016,116.595,116.83,114.7739,115.05,"18,668,290"
18+
12/29/2016,117,117.531,116.06,116.35,"9,925,082"
19+
12/28/2016,118.19,118.25,116.65,116.92,"11,985,740"
20+
12/27/2016,116.96,118.68,116.864,118.01,"12,034,590"
21+
12/23/2016,117,117.56,116.3,117.27,"10,885,030"
22+
12/22/2016,118.86,118.99,116.93,117.4,"16,226,770"
23+
12/21/2016,118.92,119.2,118.48,119.04,"10,747,610"
24+
12/20/2016,119.5,119.77,118.8,119.09,"13,673,570"
25+
12/19/2016,119.85,120.36,118.51,119.24,"15,871,360"
26+
12/16/2016,120.9,121.5,119.27,119.87,"25,316,220"

‎PDF To CSV Converter/sample1.pdf

23.9 KB
Binary file not shown.

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /