Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings
This repository was archived by the owner on May 25, 2022. It is now read-only.

Commit 29ae97f

Browse files
Merge pull request #69 from phileinSophos/master
split text and csv files using no of lines
2 parents 9ca7a0e + 189bb8d commit 29ae97f

File tree

2 files changed

+58
-0
lines changed

2 files changed

+58
-0
lines changed

‎projects/Split_File/README.md‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Split Files
2+
##### Execute
3+
`python split_files.py <csv/text_file> <split/line_number`

‎projects/Split_File/split_files.py‎

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import sys
2+
import os
3+
import shutil
4+
import pandas as pd
5+
6+
class Split_Files:
7+
'''
8+
Class file for split file program
9+
'''
10+
def __init__(self, filename, split_number):
11+
'''
12+
Getting the file name and the split index
13+
Initializing the output directory, if present then truncate it.
14+
Getting the file extension
15+
'''
16+
self.file_name = filename
17+
self.directory = "file_split"
18+
self.split = int(split_number)
19+
if os.path.exists(self.directory):
20+
shutil.rmtree(self.directory)
21+
os.mkdir(self.directory)
22+
if self.file_name.endswith('.txt'):
23+
self.file_extension = '.txt'
24+
else:
25+
self.file_extension = '.csv'
26+
self.file_number = 1
27+
28+
def split_data(self):
29+
'''
30+
spliting the input csv/txt file according to the index provided
31+
'''
32+
data = pd.read_csv(self.file_name, header=None)
33+
data.index += 1
34+
35+
split_frame = pd.DataFrame()
36+
output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}"
37+
38+
for i in range(1, len(data)+1):
39+
split_frame = split_frame.append(data.iloc[i-1])
40+
if i % self.split == 0:
41+
output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}"
42+
if self.file_extension == '.txt':
43+
split_frame.to_csv(output_file, header=False, index=False, sep=' ')
44+
else:
45+
split_frame.to_csv(output_file, header=False, index=False)
46+
split_frame.drop(split_frame.index, inplace=True)
47+
self.file_number += 1
48+
if not split_frame.empty:
49+
output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}"
50+
split_frame.to_csv(output_file, header=False, index=False)
51+
52+
if __name__ == '__main__':
53+
file, split_number = sys.argv[1], sys.argv[2]
54+
sp = Split_Files(file, split_number)
55+
sp.split_data()

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /