Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 73ce81f

Browse files
committed
Add script to generate large CSV files for performance tests
1 parent 76475ee commit 73ce81f

File tree

2 files changed

+58
-0
lines changed

2 files changed

+58
-0
lines changed

‎source-code/polars/.gitignore‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
large_data*.csv
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#!/usr/bin/env python
2+
3+
from argparse import ArgumentParser
4+
from concurrent.futures import ProcessPoolExecutor
5+
import csv
6+
from datetime import datetime, timedelta
7+
import random
8+
import sys
9+
10+
11+
def write_file(args):
12+
file_name, rows, curr_time, delta_time, curr_vals, delta_val = args
13+
fieldnames = ['timestamp']
14+
fieldnames.extend(['C{0:d}'.format(i + 1) for i in range(len(curr_vals))])
15+
with open(file_name, 'w', newline='') as csv_file:
16+
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
17+
writer.writeheader()
18+
values = []
19+
for _ in range(rows//2):
20+
data = {'C{0:d}'.format(i + 1): curr_vals[i]
21+
for i in range(len(curr_vals))}
22+
data['timestamp'] = curr_time
23+
writer.writerow(data)
24+
values.append(data)
25+
curr_time += delta_time
26+
curr_vals = [x + random.uniform(-delta_val, delta_val)
27+
for x in curr_vals]
28+
while values:
29+
data = values.pop()
30+
data['timestamp'] = curr_time
31+
writer.writerow(data)
32+
curr_time += delta_time
33+
return file_name
34+
35+
36+
if __name__ == '__main__':
37+
arg_parser = ArgumentParser(description='create a set of CSV files')
38+
arg_parser.add_argument('--files', type=int, default=1,
39+
help='number of files to create')
40+
arg_parser.add_argument('base_name', help='base file name to use')
41+
arg_parser.add_argument('--cols', type=int, default=1,
42+
help='number of columns to generate')
43+
arg_parser.add_argument('--rows', type=int, default=100,
44+
help='number of rows to generate per file')
45+
arg_parser.add_argument('--workers', type=int, default=None,
46+
help='number of workersto use')
47+
options = arg_parser.parse_args()
48+
curr_time = datetime.now()
49+
delta_time = timedelta(seconds=1)
50+
curr_vals = [1.0]*options.cols
51+
delta_val = 0.01
52+
with ProcessPoolExecutor(max_workers=options.workers) as executor:
53+
args = [('{0}_{1:04d}.csv'.format(options.base_name, i + 1),
54+
options.rows, curr_time + i*options.rows*delta_time,
55+
delta_time, curr_vals, delta_val) for i in range(options.files)]
56+
for file_name in executor.map(write_file, args):
57+
print('{0} done'.format(file_name))

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /