|
| 1 | +import sys |
| 2 | +import os |
| 3 | +import shutil |
| 4 | +import pandas as pd |
| 5 | + |
| 6 | +class Split_Files: |
| 7 | + ''' |
| 8 | + Class file for split file program |
| 9 | + ''' |
| 10 | + def __init__(self, filename, split_number): |
| 11 | + ''' |
| 12 | + Getting the file name and the split index |
| 13 | + Initializing the output directory, if present then truncate it. |
| 14 | + Getting the file extension |
| 15 | + ''' |
| 16 | + self.file_name = filename |
| 17 | + self.directory = "file_split" |
| 18 | + self.split = int(split_number) |
| 19 | + if os.path.exists(self.directory): |
| 20 | + shutil.rmtree(self.directory) |
| 21 | + os.mkdir(self.directory) |
| 22 | + if self.file_name.endswith('.txt'): |
| 23 | + self.file_extension = '.txt' |
| 24 | + else: |
| 25 | + self.file_extension = '.csv' |
| 26 | + self.file_number = 1 |
| 27 | + |
| 28 | + def split_data(self): |
| 29 | + ''' |
| 30 | + spliting the input csv/txt file according to the index provided |
| 31 | + ''' |
| 32 | + data = pd.read_csv(self.file_name, header=None) |
| 33 | + data.index += 1 |
| 34 | + |
| 35 | + split_frame = pd.DataFrame() |
| 36 | + output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}" |
| 37 | + |
| 38 | + for i in range(1, len(data)+1): |
| 39 | + split_frame = split_frame.append(data.iloc[i-1]) |
| 40 | + if i % self.split == 0: |
| 41 | + output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}" |
| 42 | + if self.file_extension == '.txt': |
| 43 | + split_frame.to_csv(output_file, header=False, index=False, sep=' ') |
| 44 | + else: |
| 45 | + split_frame.to_csv(output_file, header=False, index=False) |
| 46 | + split_frame.drop(split_frame.index, inplace=True) |
| 47 | + self.file_number += 1 |
| 48 | + if not split_frame.empty: |
| 49 | + output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}" |
| 50 | + split_frame.to_csv(output_file, header=False, index=False) |
| 51 | + |
| 52 | +if __name__ == '__main__': |
| 53 | + file, split_number = sys.argv[1], sys.argv[2] |
| 54 | + sp = Split_Files(file, split_number) |
| 55 | + sp.split_data() |
0 commit comments