I wrote a parser to extract data from a log file. The file format is a bit irregular, and so is also the parser code, as it turned out. It is a clutter of different loop types, different ways to iterate, and different ways of treating strings:
import re
import pandas as pd
import os
import sys
logfilename = sys.argv[1]
csvfilename = os.path.splitext(logfilename)[0] + '.csv'
step = 0
data = []
logfile = open(logfilename,'r')
def match_step(line):
return re.match(r'\.step (.*)', line)
# find first .step definition
for line in logfile:
match = match_step(line)
if match:
break
# iterate through all steps with parameters
while match:
step += 1
row = { 'step': int(step) }
parameters = match.group(1).split()
for p in parameters:
[key, value] = p.split('=')
row[key] = float(value)
data.append(row)
match = match_step(next(logfile))
# iterate through measurement definitions
for line in logfile:
match = re.match(r'Measurement: (.*)', line)
if match:
name = match.group(1)
next(logfile) # skip row with column details
# iterate through measurement results for each step
while True:
measurement = next(logfile).split()
if not measurement:
break
row = { 'step': int(measurement[0]), name: float(measurement[1]) }
data.append(row)
logfile.close()
frame = pd.DataFrame(data).set_index('step').groupby('step').first()
frame.to_csv(csvfilename)
What could I do to improve code readability and consistency, especially in the parsing portion of the code?
I do not really care about performance since the process to generate the original log file already takes some minutes.
This is an example log file:
Circuit: * D:\LTSpice\loss.asc
m1:1:v_sm: Missing value, assumed 0V @ DC
Per .tran options, skipping operating point for transient analysis.
.step iload=10 vdc=36 rg=1
.step iload=20 vdc=36 rg=1
.step iload=30 vdc=36 rg=1
.step iload=40 vdc=36 rg=1
.step iload=50 vdc=36 rg=1
.step iload=60 vdc=36 rg=1
.step iload=70 vdc=36 rg=1
.step iload=80 vdc=36 rg=1
.step iload=90 vdc=36 rg=1
.step iload=100 vdc=36 rg=1
.step iload=110 vdc=36 rg=1
.step iload=120 vdc=36 rg=1
.step iload=130 vdc=36 rg=1
.step iload=140 vdc=36 rg=1
.step iload=150 vdc=36 rg=1
.step iload=160 vdc=36 rg=1
.step iload=170 vdc=36 rg=1
.step iload=180 vdc=36 rg=1
.step iload=190 vdc=36 rg=1
.step iload=200 vdc=36 rg=1
.step iload=10 vdc=40 rg=1
.step iload=20 vdc=40 rg=1
.step iload=30 vdc=40 rg=1
.step iload=40 vdc=40 rg=1
.step iload=50 vdc=40 rg=1
.step iload=60 vdc=40 rg=1
.step iload=70 vdc=40 rg=1
.step iload=80 vdc=40 rg=1
.step iload=90 vdc=40 rg=1
.step iload=100 vdc=40 rg=1
.step iload=110 vdc=40 rg=1
.step iload=120 vdc=40 rg=1
.step iload=130 vdc=40 rg=1
.step iload=140 vdc=40 rg=1
.step iload=150 vdc=40 rg=1
.step iload=160 vdc=40 rg=1
.step iload=170 vdc=40 rg=1
.step iload=180 vdc=40 rg=1
.step iload=190 vdc=40 rg=1
.step iload=200 vdc=40 rg=1
.step iload=10 vdc=44 rg=1
.step iload=20 vdc=44 rg=1
.step iload=30 vdc=44 rg=1
.step iload=40 vdc=44 rg=1
.step iload=50 vdc=44 rg=1
.step iload=60 vdc=44 rg=1
.step iload=70 vdc=44 rg=1
.step iload=80 vdc=44 rg=1
.step iload=90 vdc=44 rg=1
.step iload=100 vdc=44 rg=1
.step iload=110 vdc=44 rg=1
.step iload=120 vdc=44 rg=1
.step iload=130 vdc=44 rg=1
.step iload=140 vdc=44 rg=1
.step iload=150 vdc=44 rg=1
.step iload=160 vdc=44 rg=1
.step iload=170 vdc=44 rg=1
.step iload=180 vdc=44 rg=1
.step iload=190 vdc=44 rg=1
.step iload=200 vdc=44 rg=1
.step iload=10 vdc=48 rg=1
.step iload=20 vdc=48 rg=1
.step iload=30 vdc=48 rg=1
.step iload=40 vdc=48 rg=1
.step iload=50 vdc=48 rg=1
.step iload=60 vdc=48 rg=1
.step iload=70 vdc=48 rg=1
.step iload=80 vdc=48 rg=1
.step iload=90 vdc=48 rg=1
.step iload=100 vdc=48 rg=1
.step iload=110 vdc=48 rg=1
.step iload=120 vdc=48 rg=1
.step iload=130 vdc=48 rg=1
.step iload=140 vdc=48 rg=1
.step iload=150 vdc=48 rg=1
.step iload=160 vdc=48 rg=1
.step iload=170 vdc=48 rg=1
.step iload=180 vdc=48 rg=1
.step iload=190 vdc=48 rg=1
.step iload=200 vdc=48 rg=1
.step iload=10 vdc=52 rg=1
.step iload=20 vdc=52 rg=1
.step iload=30 vdc=52 rg=1
.step iload=40 vdc=52 rg=1
.step iload=50 vdc=52 rg=1
.step iload=60 vdc=52 rg=1
.step iload=70 vdc=52 rg=1
.step iload=80 vdc=52 rg=1
.step iload=90 vdc=52 rg=1
.step iload=100 vdc=52 rg=1
.step iload=110 vdc=52 rg=1
.step iload=120 vdc=52 rg=1
.step iload=130 vdc=52 rg=1
.step iload=140 vdc=52 rg=1
.step iload=150 vdc=52 rg=1
.step iload=160 vdc=52 rg=1
.step iload=170 vdc=52 rg=1
.step iload=180 vdc=52 rg=1
.step iload=190 vdc=52 rg=1
.step iload=200 vdc=52 rg=1
.step iload=10 vdc=56 rg=1
.step iload=20 vdc=56 rg=1
.step iload=30 vdc=56 rg=1
.step iload=40 vdc=56 rg=1
.step iload=50 vdc=56 rg=1
.step iload=60 vdc=56 rg=1
.step iload=70 vdc=56 rg=1
.step iload=80 vdc=56 rg=1
.step iload=90 vdc=56 rg=1
.step iload=100 vdc=56 rg=1
.step iload=110 vdc=56 rg=1
.step iload=120 vdc=56 rg=1
.step iload=130 vdc=56 rg=1
.step iload=140 vdc=56 rg=1
.step iload=150 vdc=56 rg=1
.step iload=160 vdc=56 rg=1
.step iload=170 vdc=56 rg=1
.step iload=180 vdc=56 rg=1
.step iload=190 vdc=56 rg=1
.step iload=200 vdc=56 rg=1
.step iload=10 vdc=36 rg=2
.step iload=20 vdc=36 rg=2
.step iload=30 vdc=36 rg=2
.step iload=40 vdc=36 rg=2
.step iload=50 vdc=36 rg=2
.step iload=60 vdc=36 rg=2
.step iload=70 vdc=36 rg=2
.step iload=80 vdc=36 rg=2
.step iload=90 vdc=36 rg=2
.step iload=100 vdc=36 rg=2
.step iload=110 vdc=36 rg=2
.step iload=120 vdc=36 rg=2
.step iload=130 vdc=36 rg=2
.step iload=140 vdc=36 rg=2
.step iload=150 vdc=36 rg=2
.step iload=160 vdc=36 rg=2
.step iload=170 vdc=36 rg=2
.step iload=180 vdc=36 rg=2
.step iload=190 vdc=36 rg=2
.step iload=200 vdc=36 rg=2
.step iload=10 vdc=40 rg=2
.step iload=20 vdc=40 rg=2
.step iload=30 vdc=40 rg=2
.step iload=40 vdc=40 rg=2
.step iload=50 vdc=40 rg=2
.step iload=60 vdc=40 rg=2
.step iload=70 vdc=40 rg=2
.step iload=80 vdc=40 rg=2
.step iload=90 vdc=40 rg=2
.step iload=100 vdc=40 rg=2
.step iload=110 vdc=40 rg=2
.step iload=120 vdc=40 rg=2
.step iload=130 vdc=40 rg=2
.step iload=140 vdc=40 rg=2
.step iload=150 vdc=40 rg=2
.step iload=160 vdc=40 rg=2
.step iload=170 vdc=40 rg=2
.step iload=180 vdc=40 rg=2
.step iload=190 vdc=40 rg=2
.step iload=200 vdc=40 rg=2
.step iload=10 vdc=44 rg=2
.step iload=20 vdc=44 rg=2
.step iload=30 vdc=44 rg=2
.step iload=40 vdc=44 rg=2
.step iload=50 vdc=44 rg=2
.step iload=60 vdc=44 rg=2
.step iload=70 vdc=44 rg=2
.step iload=80 vdc=44 rg=2
.step iload=90 vdc=44 rg=2
.step iload=100 vdc=44 rg=2
.step iload=110 vdc=44 rg=2
.step iload=120 vdc=44 rg=2
.step iload=130 vdc=44 rg=2
.step iload=140 vdc=44 rg=2
.step iload=150 vdc=44 rg=2
.step iload=160 vdc=44 rg=2
.step iload=170 vdc=44 rg=2
.step iload=180 vdc=44 rg=2
.step iload=190 vdc=44 rg=2
.step iload=200 vdc=44 rg=2
.step iload=10 vdc=48 rg=2
.step iload=20 vdc=48 rg=2
.step iload=30 vdc=48 rg=2
.step iload=40 vdc=48 rg=2
.step iload=50 vdc=48 rg=2
.step iload=60 vdc=48 rg=2
.step iload=70 vdc=48 rg=2
.step iload=80 vdc=48 rg=2
.step iload=90 vdc=48 rg=2
.step iload=100 vdc=48 rg=2
.step iload=110 vdc=48 rg=2
.step iload=120 vdc=48 rg=2
.step iload=130 vdc=48 rg=2
.step iload=140 vdc=48 rg=2
.step iload=150 vdc=48 rg=2
.step iload=160 vdc=48 rg=2
.step iload=170 vdc=48 rg=2
.step iload=180 vdc=48 rg=2
.step iload=190 vdc=48 rg=2
.step iload=200 vdc=48 rg=2
.step iload=10 vdc=52 rg=2
.step iload=20 vdc=52 rg=2
.step iload=30 vdc=52 rg=2
.step iload=40 vdc=52 rg=2
.step iload=50 vdc=52 rg=2
.step iload=60 vdc=52 rg=2
.step iload=70 vdc=52 rg=2
.step iload=80 vdc=52 rg=2
.step iload=90 vdc=52 rg=2
.step iload=100 vdc=52 rg=2
.step iload=110 vdc=52 rg=2
.step iload=120 vdc=52 rg=2
.step iload=130 vdc=52 rg=2
.step iload=140 vdc=52 rg=2
.step iload=150 vdc=52 rg=2
.step iload=160 vdc=52 rg=2
.step iload=170 vdc=52 rg=2
.step iload=180 vdc=52 rg=2
.step iload=190 vdc=52 rg=2
.step iload=200 vdc=52 rg=2
.step iload=10 vdc=56 rg=2
.step iload=20 vdc=56 rg=2
.step iload=30 vdc=56 rg=2
.step iload=40 vdc=56 rg=2
.step iload=50 vdc=56 rg=2
.step iload=60 vdc=56 rg=2
.step iload=70 vdc=56 rg=2
.step iload=80 vdc=56 rg=2
.step iload=90 vdc=56 rg=2
.step iload=100 vdc=56 rg=2
.step iload=110 vdc=56 rg=2
.step iload=120 vdc=56 rg=2
.step iload=130 vdc=56 rg=2
.step iload=140 vdc=56 rg=2
.step iload=150 vdc=56 rg=2
.step iload=160 vdc=56 rg=2
.step iload=170 vdc=56 rg=2
.step iload=180 vdc=56 rg=2
.step iload=190 vdc=56 rg=2
.step iload=200 vdc=56 rg=2
Measurement: eon
step INTEG(v(drain)*ix(m1:d)) FROM TO
1 1.82588e-006 1e-005 1.02e-005
2 4.17134e-006 1e-005 1.02e-005
3 7.00321e-006 1e-005 1.02e-005
4 1.03301e-005 1e-005 1.02e-005
5 1.41369e-005 1e-005 1.02e-005
6 1.84238e-005 1e-005 1.02e-005
7 2.32117e-005 1e-005 1.02e-005
8 2.84883e-005 1e-005 1.02e-005
9 3.42491e-005 1e-005 1.02e-005
10 4.04575e-005 1e-005 1.02e-005
11 4.71822e-005 1e-005 1.02e-005
12 5.43457e-005 1e-005 1.02e-005
13 6.20163e-005 1e-005 1.02e-005
14 7.01661e-005 1e-005 1.02e-005
15 7.87419e-005 1e-005 1.02e-005
16 8.7822e-005 1e-005 1.02e-005
17 9.73496e-005 1e-005 1.02e-005
18 0.000107334 1e-005 1.02e-005
19 0.00011775 1e-005 1.02e-005
20 0.000128672 1e-005 1.02e-005
21 2.01474e-006 1e-005 1.02e-005
22 4.58268e-006 1e-005 1.02e-005
23 7.67373e-006 1e-005 1.02e-005
24 1.13006e-005 1e-005 1.02e-005
25 1.54471e-005 1e-005 1.02e-005
26 2.01132e-005 1e-005 1.02e-005
27 2.53309e-005 1e-005 1.02e-005
28 3.10536e-005 1e-005 1.02e-005
29 3.73023e-005 1e-005 1.02e-005
30 4.41231e-005 1e-005 1.02e-005
31 5.14188e-005 1e-005 1.02e-005
32 5.92874e-005 1e-005 1.02e-005
33 6.75954e-005 1e-005 1.02e-005
34 7.64433e-005 1e-005 1.02e-005
35 8.58157e-005 1e-005 1.02e-005
36 9.57088e-005 1e-005 1.02e-005
37 0.000106059 1e-005 1.02e-005
38 0.000116983 1e-005 1.02e-005
39 0.000128391 1e-005 1.02e-005
40 0.000140257 1e-005 1.02e-005
41 2.19203e-006 1e-005 1.02e-005
42 4.97675e-006 1e-005 1.02e-005
43 8.3144e-006 1e-005 1.02e-005
44 1.22347e-005 1e-005 1.02e-005
45 1.66951e-005 1e-005 1.02e-005
46 2.17464e-005 1e-005 1.02e-005
47 2.73532e-005 1e-005 1.02e-005
48 3.35368e-005 1e-005 1.02e-005
49 4.02896e-005 1e-005 1.02e-005
50 4.75927e-005 1e-005 1.02e-005
51 5.55096e-005 1e-005 1.02e-005
52 6.39629e-005 1e-005 1.02e-005
53 7.29618e-005 1e-005 1.02e-005
54 8.25448e-005 1e-005 1.02e-005
55 9.26558e-005 1e-005 1.02e-005
56 0.000103305 1e-005 1.02e-005
57 0.000114593 1e-005 1.02e-005
58 0.000126368 1e-005 1.02e-005
59 0.000138698 1e-005 1.02e-005
60 0.000151569 1e-005 1.02e-005
61 2.36219e-006 1e-005 1.02e-005
62 5.35277e-006 1e-005 1.02e-005
63 8.93444e-006 1e-005 1.02e-005
64 1.31182e-005 1e-005 1.02e-005
65 1.79136e-005 1e-005 1.02e-005
66 2.3314e-005 1e-005 1.02e-005
67 2.9333e-005 1e-005 1.02e-005
68 3.59266e-005 1e-005 1.02e-005
69 4.31761e-005 1e-005 1.02e-005
70 5.09889e-005 1e-005 1.02e-005
71 5.94634e-005 1e-005 1.02e-005
72 6.84967e-005 1e-005 1.02e-005
73 7.8198e-005 1e-005 1.02e-005
74 8.84392e-005 1e-005 1.02e-005
75 9.93694e-005 1e-005 1.02e-005
76 0.000110811 1e-005 1.02e-005
77 0.00012289 1e-005 1.02e-005
78 0.000135515 1e-005 1.02e-005
79 0.000148799 1e-005 1.02e-005
80 0.000162664 1e-005 1.02e-005
81 2.53127e-006 1e-005 1.02e-005
82 5.72998e-006 1e-005 1.02e-005
83 9.54195e-006 1e-005 1.02e-005
84 1.40013e-005 1e-005 1.02e-005
85 1.91076e-005 1e-005 1.02e-005
86 2.48313e-005 1e-005 1.02e-005
87 3.12246e-005 1e-005 1.02e-005
88 3.82451e-005 1e-005 1.02e-005
89 4.59383e-005 1e-005 1.02e-005
90 5.43192e-005 1e-005 1.02e-005
91 6.32779e-005 1e-005 1.02e-005
92 7.29517e-005 1e-005 1.02e-005
93 8.32275e-005 1e-005 1.02e-005
94 9.42133e-005 1e-005 1.02e-005
95 0.000105792 1e-005 1.02e-005
96 0.000118051 1e-005 1.02e-005
97 0.00013091 1e-005 1.02e-005
98 0.000144435 1e-005 1.02e-005
99 0.000158566 1e-005 1.02e-005
100 0.000173324 1e-005 1.02e-005
101 2.70084e-006 1e-005 1.02e-005
102 6.08804e-006 1e-005 1.02e-005
103 1.01309e-005 1e-005 1.02e-005
104 1.48588e-005 1e-005 1.02e-005
105 2.02551e-005 1e-005 1.02e-005
106 2.63259e-005 1e-005 1.02e-005
107 3.31072e-005 1e-005 1.02e-005
108 4.05473e-005 1e-005 1.02e-005
109 4.87006e-005 1e-005 1.02e-005
110 5.75245e-005 1e-005 1.02e-005
111 6.70787e-005 1e-005 1.02e-005
112 7.7313e-005 1e-005 1.02e-005
113 8.82438e-005 1e-005 1.02e-005
114 9.98267e-005 1e-005 1.02e-005
115 0.000112141 1e-005 1.02e-005
116 0.000125152 1e-005 1.02e-005
117 0.000138777 1e-005 1.02e-005
118 0.000153199 1e-005 1.02e-005
119 0.000168253 1e-005 1.02e-005
120 0.000183974 1e-005 1.02e-005
121 1.82588e-006 1e-005 1.02e-005
122 4.17134e-006 1e-005 1.02e-005
123 7.00321e-006 1e-005 1.02e-005
124 1.03301e-005 1e-005 1.02e-005
125 1.41369e-005 1e-005 1.02e-005
126 1.84238e-005 1e-005 1.02e-005
127 2.32117e-005 1e-005 1.02e-005
128 2.84883e-005 1e-005 1.02e-005
129 3.42491e-005 1e-005 1.02e-005
130 4.04575e-005 1e-005 1.02e-005
131 4.71822e-005 1e-005 1.02e-005
132 5.43457e-005 1e-005 1.02e-005
133 6.20163e-005 1e-005 1.02e-005
134 7.01661e-005 1e-005 1.02e-005
135 7.87419e-005 1e-005 1.02e-005
136 8.7822e-005 1e-005 1.02e-005
137 9.73496e-005 1e-005 1.02e-005
138 0.000107334 1e-005 1.02e-005
139 0.00011775 1e-005 1.02e-005
140 0.000128672 1e-005 1.02e-005
141 2.01474e-006 1e-005 1.02e-005
142 4.58268e-006 1e-005 1.02e-005
143 7.67373e-006 1e-005 1.02e-005
144 1.13006e-005 1e-005 1.02e-005
145 1.54471e-005 1e-005 1.02e-005
146 2.01132e-005 1e-005 1.02e-005
147 2.53309e-005 1e-005 1.02e-005
148 3.10536e-005 1e-005 1.02e-005
149 3.73023e-005 1e-005 1.02e-005
150 4.41231e-005 1e-005 1.02e-005
151 5.14188e-005 1e-005 1.02e-005
152 5.92874e-005 1e-005 1.02e-005
153 6.75954e-005 1e-005 1.02e-005
154 7.64433e-005 1e-005 1.02e-005
155 8.58157e-005 1e-005 1.02e-005
156 9.57088e-005 1e-005 1.02e-005
157 0.000106059 1e-005 1.02e-005
158 0.000116983 1e-005 1.02e-005
159 0.000128391 1e-005 1.02e-005
160 0.000140257 1e-005 1.02e-005
161 2.19203e-006 1e-005 1.02e-005
162 4.97675e-006 1e-005 1.02e-005
163 8.3144e-006 1e-005 1.02e-005
164 1.22347e-005 1e-005 1.02e-005
165 1.66951e-005 1e-005 1.02e-005
166 2.17464e-005 1e-005 1.02e-005
167 2.73532e-005 1e-005 1.02e-005
168 3.35368e-005 1e-005 1.02e-005
169 4.02896e-005 1e-005 1.02e-005
170 4.75927e-005 1e-005 1.02e-005
171 5.55096e-005 1e-005 1.02e-005
172 6.39629e-005 1e-005 1.02e-005
173 7.29618e-005 1e-005 1.02e-005
174 8.25448e-005 1e-005 1.02e-005
175 9.26558e-005 1e-005 1.02e-005
176 0.000103305 1e-005 1.02e-005
177 0.000114593 1e-005 1.02e-005
178 0.000126368 1e-005 1.02e-005
179 0.000138698 1e-005 1.02e-005
180 0.000151569 1e-005 1.02e-005
181 2.36219e-006 1e-005 1.02e-005
182 5.35277e-006 1e-005 1.02e-005
183 8.93444e-006 1e-005 1.02e-005
184 1.31182e-005 1e-005 1.02e-005
185 1.79136e-005 1e-005 1.02e-005
186 2.3314e-005 1e-005 1.02e-005
187 2.9333e-005 1e-005 1.02e-005
188 3.59266e-005 1e-005 1.02e-005
189 4.31761e-005 1e-005 1.02e-005
190 5.09889e-005 1e-005 1.02e-005
191 5.94634e-005 1e-005 1.02e-005
192 6.84967e-005 1e-005 1.02e-005
193 7.8198e-005 1e-005 1.02e-005
194 8.84392e-005 1e-005 1.02e-005
195 9.93694e-005 1e-005 1.02e-005
196 0.000110811 1e-005 1.02e-005
197 0.00012289 1e-005 1.02e-005
198 0.000135515 1e-005 1.02e-005
199 0.000148799 1e-005 1.02e-005
200 0.000162664 1e-005 1.02e-005
201 2.53127e-006 1e-005 1.02e-005
202 5.72998e-006 1e-005 1.02e-005
203 9.54195e-006 1e-005 1.02e-005
204 1.40013e-005 1e-005 1.02e-005
205 1.91076e-005 1e-005 1.02e-005
206 2.48313e-005 1e-005 1.02e-005
207 3.12246e-005 1e-005 1.02e-005
208 3.82451e-005 1e-005 1.02e-005
209 4.59383e-005 1e-005 1.02e-005
210 5.43192e-005 1e-005 1.02e-005
211 6.32779e-005 1e-005 1.02e-005
212 7.29517e-005 1e-005 1.02e-005
213 8.32275e-005 1e-005 1.02e-005
214 9.42133e-005 1e-005 1.02e-005
215 0.000105792 1e-005 1.02e-005
216 0.000118051 1e-005 1.02e-005
217 0.00013091 1e-005 1.02e-005
218 0.000144435 1e-005 1.02e-005
219 0.000158566 1e-005 1.02e-005
220 0.000173324 1e-005 1.02e-005
221 2.70084e-006 1e-005 1.02e-005
222 6.08804e-006 1e-005 1.02e-005
223 1.01309e-005 1e-005 1.02e-005
224 1.48588e-005 1e-005 1.02e-005
225 2.02551e-005 1e-005 1.02e-005
226 2.63259e-005 1e-005 1.02e-005
227 3.31072e-005 1e-005 1.02e-005
228 4.05473e-005 1e-005 1.02e-005
229 4.87006e-005 1e-005 1.02e-005
230 5.75245e-005 1e-005 1.02e-005
231 6.70787e-005 1e-005 1.02e-005
232 7.7313e-005 1e-005 1.02e-005
233 8.82438e-005 1e-005 1.02e-005
234 9.98267e-005 1e-005 1.02e-005
235 0.000112141 1e-005 1.02e-005
236 0.000125152 1e-005 1.02e-005
237 0.000138777 1e-005 1.02e-005
238 0.000153199 1e-005 1.02e-005
239 0.000168253 1e-005 1.02e-005
240 0.000183974 1e-005 1.02e-005
Measurement: eoff
step INTEG(v(drain)*ix(m1:d)) FROM TO
1 4.23893e-006 2e-005 2.02e-005
2 6.95585e-006 2e-005 2.02e-005
3 1.0193e-005 2e-005 2.02e-005
4 1.3824e-005 2e-005 2.02e-005
5 1.78051e-005 2e-005 2.02e-005
6 2.21101e-005 2e-005 2.02e-005
7 2.67549e-005 2e-005 2.02e-005
8 3.17371e-005 2e-005 2.02e-005
9 3.70578e-005 2e-005 2.02e-005
10 4.277e-005 2e-005 2.02e-005
11 4.87986e-005 2e-005 2.02e-005
12 5.51665e-005 2e-005 2.02e-005
13 6.19453e-005 2e-005 2.02e-005
14 6.915e-005 2e-005 2.02e-005
15 7.66868e-005 2e-005 2.02e-005
16 8.46352e-005 2e-005 2.02e-005
17 9.29818e-005 2e-005 2.02e-005
18 0.000101657 2e-005 2.02e-005
19 0.000110735 2e-005 2.02e-005
20 0.000120178 2e-005 2.02e-005
21 4.69635e-006 2e-005 2.02e-005
22 7.69407e-006 2e-005 2.02e-005
23 1.12591e-005 2e-005 2.02e-005
24 1.52438e-005 2e-005 2.02e-005
25 1.9606e-005 2e-005 2.02e-005
26 2.43249e-005 2e-005 2.02e-005
27 2.94065e-005 2e-005 2.02e-005
28 3.48604e-005 2e-005 2.02e-005
29 4.06825e-005 2e-005 2.02e-005
30 4.69548e-005 2e-005 2.02e-005
31 5.35431e-005 2e-005 2.02e-005
32 6.06112e-005 2e-005 2.02e-005
33 6.81013e-005 2e-005 2.02e-005
34 7.60042e-005 2e-005 2.02e-005
35 8.43295e-005 2e-005 2.02e-005
36 9.31359e-005 2e-005 2.02e-005
37 0.00010231 2e-005 2.02e-005
38 0.000111976 2e-005 2.02e-005
39 0.000121963 2e-005 2.02e-005
40 0.000132423 2e-005 2.02e-005
41 5.1599e-006 2e-005 2.02e-005
42 8.41701e-006 2e-005 2.02e-005
43 1.22973e-005 2e-005 2.02e-005
44 1.66192e-005 2e-005 2.02e-005
45 2.13463e-005 2e-005 2.02e-005
46 2.64597e-005 2e-005 2.02e-005
47 3.19739e-005 2e-005 2.02e-005
48 3.78825e-005 2e-005 2.02e-005
49 4.41972e-005 2e-005 2.02e-005
50 5.09872e-005 2e-005 2.02e-005
51 5.82033e-005 2e-005 2.02e-005
52 6.59461e-005 2e-005 2.02e-005
53 7.406e-005 2e-005 2.02e-005
54 8.27488e-005 2e-005 2.02e-005
55 9.18643e-005 2e-005 2.02e-005
56 0.000101476 2e-005 2.02e-005
57 0.000111573 2e-005 2.02e-005
58 0.000122229 2e-005 2.02e-005
59 0.000133124 2e-005 2.02e-005
60 0.000144596 2e-005 2.02e-005
61 5.63236e-006 2e-005 2.02e-005
62 9.13522e-006 2e-005 2.02e-005
63 1.33072e-005 2e-005 2.02e-005
64 1.79548e-005 2e-005 2.02e-005
65 2.30477e-005 2e-005 2.02e-005
66 2.85415e-005 2e-005 2.02e-005
67 3.44904e-005 2e-005 2.02e-005
68 4.0866e-005 2e-005 2.02e-005
69 4.7659e-005 2e-005 2.02e-005
70 5.50591e-005 2e-005 2.02e-005
71 6.27947e-005 2e-005 2.02e-005
72 7.11432e-005 2e-005 2.02e-005
73 8.00001e-005 2e-005 2.02e-005
74 8.94251e-005 2e-005 2.02e-005
75 9.93542e-005 2e-005 2.02e-005
76 0.000109774 2e-005 2.02e-005
77 0.00012074 2e-005 2.02e-005
78 0.000132208 2e-005 2.02e-005
79 0.000144181 2e-005 2.02e-005
80 0.00015664 2e-005 2.02e-005
81 6.10254e-006 2e-005 2.02e-005
82 9.83685e-006 2e-005 2.02e-005
83 1.42959e-005 2e-005 2.02e-005
84 1.92432e-005 2e-005 2.02e-005
85 2.4707e-005 2e-005 2.02e-005
86 3.05967e-005 2e-005 2.02e-005
87 3.69216e-005 2e-005 2.02e-005
88 4.37559e-005 2e-005 2.02e-005
89 5.11195e-005 2e-005 2.02e-005
90 5.89263e-005 2e-005 2.02e-005
91 6.734e-005 2e-005 2.02e-005
92 7.63461e-005 2e-005 2.02e-005
93 8.58748e-005 2e-005 2.02e-005
94 9.60459e-005 2e-005 2.02e-005
95 0.000106726 2e-005 2.02e-005
96 0.00011802 2e-005 2.02e-005
97 0.000129852 2e-005 2.02e-005
98 0.000142253 2e-005 2.02e-005
99 0.000155198 2e-005 2.02e-005
100 0.000168663 2e-005 2.02e-005
101 6.5849e-006 2e-005 2.02e-005
102 1.05374e-005 2e-005 2.02e-005
103 1.52673e-005 2e-005 2.02e-005
104 2.05492e-005 2e-005 2.02e-005
105 2.63589e-005 2e-005 2.02e-005
106 3.26249e-005 2e-005 2.02e-005
107 3.93226e-005 2e-005 2.02e-005
108 4.66382e-005 2e-005 2.02e-005
109 5.44663e-005 2e-005 2.02e-005
110 6.28883e-005 2e-005 2.02e-005
111 7.18675e-005 2e-005 2.02e-005
112 8.15138e-005 2e-005 2.02e-005
113 9.17877e-005 2e-005 2.02e-005
114 0.000102613 2e-005 2.02e-005
115 0.000114123 2e-005 2.02e-005
116 0.000126246 2e-005 2.02e-005
117 0.000138925 2e-005 2.02e-005
118 0.000152236 2e-005 2.02e-005
119 0.000166172 2e-005 2.02e-005
120 0.000180598 2e-005 2.02e-005
121 4.23893e-006 2e-005 2.02e-005
122 6.95585e-006 2e-005 2.02e-005
123 1.0193e-005 2e-005 2.02e-005
124 1.3824e-005 2e-005 2.02e-005
125 1.78051e-005 2e-005 2.02e-005
126 2.21101e-005 2e-005 2.02e-005
127 2.67549e-005 2e-005 2.02e-005
128 3.17371e-005 2e-005 2.02e-005
129 3.70578e-005 2e-005 2.02e-005
130 4.277e-005 2e-005 2.02e-005
131 4.87986e-005 2e-005 2.02e-005
132 5.51665e-005 2e-005 2.02e-005
133 6.19453e-005 2e-005 2.02e-005
134 6.915e-005 2e-005 2.02e-005
135 7.66868e-005 2e-005 2.02e-005
136 8.46352e-005 2e-005 2.02e-005
137 9.29818e-005 2e-005 2.02e-005
138 0.000101657 2e-005 2.02e-005
139 0.000110735 2e-005 2.02e-005
140 0.000120178 2e-005 2.02e-005
141 4.69635e-006 2e-005 2.02e-005
142 7.69407e-006 2e-005 2.02e-005
143 1.12591e-005 2e-005 2.02e-005
144 1.52438e-005 2e-005 2.02e-005
145 1.9606e-005 2e-005 2.02e-005
146 2.43249e-005 2e-005 2.02e-005
147 2.94065e-005 2e-005 2.02e-005
148 3.48604e-005 2e-005 2.02e-005
149 4.06825e-005 2e-005 2.02e-005
150 4.69548e-005 2e-005 2.02e-005
151 5.35431e-005 2e-005 2.02e-005
152 6.06112e-005 2e-005 2.02e-005
153 6.81013e-005 2e-005 2.02e-005
154 7.60042e-005 2e-005 2.02e-005
155 8.43295e-005 2e-005 2.02e-005
156 9.31359e-005 2e-005 2.02e-005
157 0.00010231 2e-005 2.02e-005
158 0.000111976 2e-005 2.02e-005
159 0.000121963 2e-005 2.02e-005
160 0.000132423 2e-005 2.02e-005
161 5.1599e-006 2e-005 2.02e-005
162 8.41701e-006 2e-005 2.02e-005
163 1.22973e-005 2e-005 2.02e-005
164 1.66192e-005 2e-005 2.02e-005
165 2.13463e-005 2e-005 2.02e-005
166 2.64597e-005 2e-005 2.02e-005
167 3.19739e-005 2e-005 2.02e-005
168 3.78825e-005 2e-005 2.02e-005
169 4.41972e-005 2e-005 2.02e-005
170 5.09872e-005 2e-005 2.02e-005
171 5.82033e-005 2e-005 2.02e-005
172 6.59461e-005 2e-005 2.02e-005
173 7.406e-005 2e-005 2.02e-005
174 8.27488e-005 2e-005 2.02e-005
175 9.18643e-005 2e-005 2.02e-005
176 0.000101476 2e-005 2.02e-005
177 0.000111573 2e-005 2.02e-005
178 0.000122229 2e-005 2.02e-005
179 0.000133124 2e-005 2.02e-005
180 0.000144596 2e-005 2.02e-005
181 5.63236e-006 2e-005 2.02e-005
182 9.13522e-006 2e-005 2.02e-005
183 1.33072e-005 2e-005 2.02e-005
184 1.79548e-005 2e-005 2.02e-005
185 2.30477e-005 2e-005 2.02e-005
186 2.85415e-005 2e-005 2.02e-005
187 3.44904e-005 2e-005 2.02e-005
188 4.0866e-005 2e-005 2.02e-005
189 4.7659e-005 2e-005 2.02e-005
190 5.50591e-005 2e-005 2.02e-005
191 6.27947e-005 2e-005 2.02e-005
192 7.11432e-005 2e-005 2.02e-005
193 8.00001e-005 2e-005 2.02e-005
194 8.94251e-005 2e-005 2.02e-005
195 9.93542e-005 2e-005 2.02e-005
196 0.000109774 2e-005 2.02e-005
197 0.00012074 2e-005 2.02e-005
198 0.000132208 2e-005 2.02e-005
199 0.000144181 2e-005 2.02e-005
200 0.00015664 2e-005 2.02e-005
201 6.10254e-006 2e-005 2.02e-005
202 9.83685e-006 2e-005 2.02e-005
203 1.42959e-005 2e-005 2.02e-005
204 1.92432e-005 2e-005 2.02e-005
205 2.4707e-005 2e-005 2.02e-005
206 3.05967e-005 2e-005 2.02e-005
207 3.69216e-005 2e-005 2.02e-005
208 4.37559e-005 2e-005 2.02e-005
209 5.11195e-005 2e-005 2.02e-005
210 5.89263e-005 2e-005 2.02e-005
211 6.734e-005 2e-005 2.02e-005
212 7.63461e-005 2e-005 2.02e-005
213 8.58748e-005 2e-005 2.02e-005
214 9.60459e-005 2e-005 2.02e-005
215 0.000106726 2e-005 2.02e-005
216 0.00011802 2e-005 2.02e-005
217 0.000129852 2e-005 2.02e-005
218 0.000142253 2e-005 2.02e-005
219 0.000155198 2e-005 2.02e-005
220 0.000168663 2e-005 2.02e-005
221 6.5849e-006 2e-005 2.02e-005
222 1.05374e-005 2e-005 2.02e-005
223 1.52673e-005 2e-005 2.02e-005
224 2.05492e-005 2e-005 2.02e-005
225 2.63589e-005 2e-005 2.02e-005
226 3.26249e-005 2e-005 2.02e-005
227 3.93226e-005 2e-005 2.02e-005
228 4.66382e-005 2e-005 2.02e-005
229 5.44663e-005 2e-005 2.02e-005
230 6.28883e-005 2e-005 2.02e-005
231 7.18675e-005 2e-005 2.02e-005
232 8.15138e-005 2e-005 2.02e-005
233 9.17877e-005 2e-005 2.02e-005
234 0.000102613 2e-005 2.02e-005
235 0.000114123 2e-005 2.02e-005
236 0.000126246 2e-005 2.02e-005
237 0.000138925 2e-005 2.02e-005
238 0.000152236 2e-005 2.02e-005
239 0.000166172 2e-005 2.02e-005
240 0.000180598 2e-005 2.02e-005
Date: Sun Sep 22 18:48:00 2019
Total elapsed time: 186.382 seconds.
tnom = 27
temp = 27
method = modified trap
totiter = 4603
traniter = 4603
tranpoints = 1964
accept = 1663
rejected = 301
matrix size = 43
fillins = 99
solver = Normal
Matrix Compiler1: 9.73 KB object code size 5.9/3.3/[0.7]
Matrix Compiler2: 5.38 KB object code size 1.4/3.0/[0.5]
The example log file can also be found here, as well as the code above, both on my GitHub.
Please note: I am not a professional programmer, I only write code to support my main activity which is electronics engineering.
-
6\$\begingroup\$ To whoever is voting to close this question: don't forget to leave a comment. \$\endgroup\$Mast– Mast ♦2019年09月23日 18:46:06 +00:00Commented Sep 23, 2019 at 18:46
4 Answers 4
I started reviewing this last night, and got way off track from what you need. You've been clear that this is hobbyist code that already works; the only reason you're hear asking how to improve it is (I guess) so it'll be easier for you to resume work on a year from now.
First, some responses to your actual code:
- Breaking your code up into functions is often good, even if the function is only used once. This can make things more verbose, but it's worth it if it helps us understand the flow of the code, or helps us consider pieces of it in isolation.
- It's my opinion that strong type signatures are always a good idea. They clarify what you code does, and unlike a comment they can't be wrong.
- The example log file has less than a thousand lines. For a small enough file, it will make sense to read the whole thing into a list of strings before trying to work with it. It's more performant to work with streams than lists, but realizing that performance benefit here would be hard because we have to hold all the data from the first n-1 sections in memory until we get to the last section. Until you start having performance problems, load all your data right at the beginning.
- List comprehensions are wonderful and cover all the basic python data structures. That said, in order for them to work for this situation you'll need some helper functions.
- You're writing this as a CLI tool. Using the main-function pattern will make the code easier to play with in other contexts.
What I wrote last night:
import re
import pandas as pd
import os
import sys
from itertools import chain, dropwhile, groupby
from typing import *
def read_lines(file_name:str) -> Iterable[str]:
with open(file_name, 'r') as f: # This is generally preferable to closing the file manually.
return [line.strip() for line in f]
def parse_lines(lines: Iterable[str]) -> Dict[int, Dict[str, float]]:
runs = [
run
for run
in map(lambda g: list(g[1]), groupby(lines, bool))
# filter out the headers, footers, and empty lines:
if any(l.startswith('.step') or l.startswith('Measurement:') for l in run)
]
steps = {
step_number: read_step(line)
for (step_number, line)
in enumerate(
dropwhile(lambda l: not l.startswith('.step'),
runs[0]),
1)
}
metrics = {
read_measurment_key(measurment_batch[0]): {
int(step_number): float(measurement)
for [step_number, measurement]
in map(lambda m: m.split()[0:2], measurment_batch[2:])
}
for measurment_batch in runs[1:]
}
return {
step_number: dict(chain(
step.items(),
(
(measurment_key, measurments[step_number])
for (measurment_key, measurments)
in metrics.items()
)
))
for (step_number, step) in steps.items()
}
def read_step(line: str) -> Dict[str, float]:
match = re.match(r'\.step (.*)', line) # For performance we could compile the regex.
return {
key: float(value)
for [key, value]
in map(
lambda parameter: parameter.split('='),
match.group(1).split()
)
}
def read_measurment_key(line: str) -> str:
match = re.match(r'Measurement: (.*)', line) # For performance we could compile the regex.
return match.group(1)
def main():
logfilename = 'loss.log' # Or read the argument as you did.
csvfilename = os.path.splitext(logfilename)[0] + '.csv'
log_file_lines = read_lines(logfilename)
data = parse_lines(log_file_lines)
frame = pd.DataFrame(
dict(step = step_number, **values)
for (step_number, values) in data.items()
).set_index('step')
frame.to_csv(csvfilename)
if __name__== "__main__":
main()
Is that better?
I don't know.
- Is it readable to you? It sounds like you probably don't already know most of the syntax used, and there's probably only so much new stuff you'll want to learn for this project.
- When you need to update it a year from now, will it be clear what the current
runs = [...]
section is doing? Will you be able to make the needed changes with minimal trial and error? - Does it actually work? I know it runs, and the output looks the same, but how do we know I didn't introduce some small bug such that 5% of your data is now wrong?
- How will it handle bad data? I didn't put in any kind of error detection.
What's good about it?
- The use of typed functions will help us be sure the code is doing what we intend it to do. I could actually have taken this further by moving more stuff out into individual functions.
- The use of list comprehensions and other "functional programming" styles means that we're never mutating the state of a variable. This also helps us be sure everything is working as intended.
- The use of pure functions also helps us be sure everything is working as intended.
- Basically every "line" can be read as "build a value", and the nature of those values is clearly (?) indicated by the names of the variables we assigning to or the functions we're returning from.
-
\$\begingroup\$ Thank you very much for your contribution. I will see how I can benefit from it and then get back. Some remarks: You write it is "hobbyist code". This is almost certainly true regarding my skill level, but nevertheless this code was created for a professional purpose, just in a different domain. Also, looking at your code (and that of @Reinderien) I realize that there is one feature in my original code I like: It follows the sequence of the original log file. In 80% of usage I actually read the log file and do not use the parser. That makes it easy to retrace what the parser is doing. \$\endgroup\$realtime– realtime2019年09月25日 18:36:07 +00:00Commented Sep 25, 2019 at 18:36
-
\$\begingroup\$ Interesting. I guess the (structured, explicit) way to implement that way of thinking would be a "Builder" pattern: A class that sets up an empty data structure (possibly a data-frame, possibly something custom) on
__init__()
, and then exposes a (stateful)ingest(self, line:str)
function and some kind of final-output function. But really it's not clear what would make one implementation better than another for you. \$\endgroup\$ShapeOfMatter– ShapeOfMatter2019年09月25日 18:46:01 +00:00Commented Sep 25, 2019 at 18:46
I wrote an example of what you can do to clean this up:
import re
from collections import OrderedDict
from csv import DictWriter
from os.path import splitext
from sys import argv
from typing import Iterable
def get_measure_matches(log: str) -> tuple:
return tuple(re.finditer(r'Measurement: (.*)$', log, re.M))
def parse_main(log: str, measure_matches: tuple) -> (list, OrderedDict):
all_cols = OrderedDict((('step', None),))
rows = []
main_row_re = re.compile(r'(\S+)=(\S+)')
main_lines = log[:measure_matches[0].start()].splitlines()
step = 1
for line in main_lines:
row = {}
for match in main_row_re.finditer(line):
k, v = match.groups()
all_cols[k] = None
row[k] = v
if row:
row['step'] = step
rows.append(row)
step += 1
for m in measure_matches:
all_cols[m[1]] = None
return rows, all_cols
def parse_measures(log: str, measure_matches: tuple, rows: Iterable[dict]):
measure_ends = (*(m.start() for m in measure_matches[1:]), -1)
measure_re = re.compile(r'^\s*(\S+)\s+(\S+)', re.M)
for measure, measure_end in zip(measure_matches, measure_ends):
measure_name = measure[1]
blob = log[measure.end(): measure_end]
for match in measure_re.finditer(blob):
try:
step, val = match.groups()
rows[int(step) - 1][measure_name] = float(val)
except ValueError:
pass
def write_csv(rows: Iterable[dict], cols: Iterable[str], csv_filename: str):
"""
step,iload,vdc,rg,eon,eoff
1,10.0,36.0,1.0,1.82588e-06,4.23893e-06
2,20.0,36.0,1.0,4.17134e-06,6.95585e-06
(etc)
"""
with open(csv_filename, 'w') as csv_file:
writer = DictWriter(csv_file, cols)
writer.writeheader()
writer.writerows(rows)
def main():
log_filename = argv[1]
with open(log_filename) as log_file:
log = log_file.read()
measure_matches = get_measure_matches(log)
rows, all_cols = parse_main(log, measure_matches)
parse_measures(log, measure_matches, rows)
csv_filename = splitext(log_filename)[0] + '.csv'
write_csv(rows, all_cols.keys(), csv_filename)
if __name__ == '__main__':
main()
Notes:
- There are methods
- No need to use pandas - this runs on base Python 3
- Better use of
finditer
- Type hints
- No need to call
group(n)
- Implicitly close files in a context manager
The log file looks like it is made up of bunch of sections separated by blank lines. So, an approach is to code functions to parse each section. The main driver can scan the log file to find the sections and then pass the parsing off to the parsing functions. The parsing functions have a common interface, to make it easy to add new parsing functions.
The code below is mostly comments and doc strings.
This is the driver. It iterates over the lines in the log file, trying to identify what kind of section it's reading. For example, if a line starts with '.step' it is in the 'step' section, etc.
def parse_log(lines):
"""
Parses a log file in which sections are delimited by blank lines.
Input 'lines' is an iterable over the lines of the log file.
Output is a defaultdict of dictionaries. The defaultdict is keyed by
step number. The dict for each step is a dict of the parameters and
measurements associated with that step:
defaultdict(dict,
{1: {'step': 1, 'iload': '10', 'vdc': '36', 'rg': '1',
'eon': '1.82588e-006', 'eoff': '4.23893e-006'},
2: {'step': 2, 'iload': '20', 'vdc': '36', 'rg': '1',
'eon': '4.17134e-006', 'eoff': '6.95585e-006'},
3: {'step': 3, 'iload': '30', 'vdc': '36', 'rg': '1',
'eon': '7.00321e-006', 'eoff': '1.0193e-005'},
...
})
"""
# Lines in the input are checked to see if the section can be identified. If a section is
# identified a section specific parsing function is called. If a section cannot be identified
# from a line, the line is appended to 'leadin'.
#
# Some sections are easier to identify after reading a few lines. For example, the
# 'step' section contains lines that start with '.step'.
#
# m1:1:v_sm: Missing value, assumed 0V @ DC
# Per .tran options, skipping operating point for transient analysis.
# .step iload=10 vdc=36 rg=1
# .step iload=20 vdc=36 rg=1
# ...
#
# The list of the 'leadin' lines are passed to 'section()' so that the section specific
# parsing function gets all of the lines in the section (e.g., the m1:1 ..., and Per ... lines
# in the example above).
#
# Section specific parsing functions are expected to return a dictionary, keyed by step number,
# of dictionaries containing parameter or measurement names and their values. The section
# specific dictionaries are merged and returned.
data = defaultdict(dict)
leadin = []
section_data = None
for line in lines:
line = line.lstrip()
if not line:
if leadin:
leadin = []
# If 'leadin' is is not empty, there was an unknown or unrecognized section.
# For debuging, it might be useful to print (or log) the first few lines.
#print(f"unknown section: {leadin[:5]}")
continue
leadin.append(line)
if line.startswith('.step'):
section_data = parse_step_section(section(lines, leadin))
elif line.startswith('Measurement: '):
section_data = parse_measurement_section(section(lines, leadin))
if section_data:
for step,fields in section_data.items():
data[step].update(fields)
leadin = []
section_data = None
return data
These are the section specific parsing routines:
def parse_step_section(lines):
"""
Section specific parse function for the 'step' section of a log file.
Input 'lines' is an iterable (e.g., a file, list, etc.) over lines of the section.
The section looks like:
m1:1:v_sm: Missing value, assumed 0V @ DC
Per .tran options, skipping operating point for transient analysis.
.step iload=10 vdc=36 rg=1
.step iload=20 vdc=36 rg=1
.step iload=30 vdc=36 rg=1
.step iload=40 vdc=36 rg=1
...
The '.step' lines are implicitly numbered, starting at 1.
Output is a dict of dicts. The outer dict is keyed by step. The inner dicts are
keyed by parameter name. Like this:
{1: {'step': 1, 'iload': '10', 'vdc': '36', 'rg': '1'},
2: {'step': 2, 'iload': '20', 'vdc': '36', 'rg': '1'},
3: {'step': 3, 'iload': '30', 'vdc': '36', 'rg': '1'},
4: {'step': 4, 'iload': '40', 'vdc': '36', 'rg': '1'},
...
}
"""
# The first two lines are skipped, because they aren't being used.
next(lines)
next(lines)
pattern = re.compile(r"(\S+)=(\S+)")
data = {}
for step,line in enumerate(lines, 1):
d = dict([('step',step)] + pattern.findall(line))
data[step] = d
return data
def parse_measurement_section(lines):
"""
Section specific parse function for a measurement section.
Input 'lines' is an iterable (e.g., a file, list, etc.) over lines of the section.
The section looks like:
Measurement: eon
step INTEG(v(drain)*ix(m1:d)) FROM TO
1 1.82588e-006 1e-005 1.02e-005
2 4.17134e-006 1e-005 1.02e-005
3 7.00321e-006 1e-005 1.02e-005
4 1.03301e-005 1e-005 1.02e-005
...
The first line contains the name of the measurement (e.g., 'eon').
The second line contains the header for the following table.
The table is terminated with a blank line.
This function only parses the first two columns and returns a dictionary like:
{1: {'eon': '1.82588e-006'},
2: {'eon': '4.17134e-006'},
3: {'eon': '7.00321e-006'},
4: {'eon': '1.03301e-005'},
....
}
"""
_, name = next(lines).strip().split()
# skip header line
next(lines)
step_meas_rest = (line.split(maxsplit=2) for line in lines)
data = {int(step):{name:measurement} for step,measurement,_ in step_meas_rest}
return data
This is a helper function.
def section(lines, leadin=None):
"""
Generator that yields lines from 'leadin' then from 'lines'. Leading blank lines are skipped.
It terminates when a trailing blank line is read.
"""
if leadin:
lines = it.chain(leadin, lines)
line = next(lines).lstrip()
while not line:
line = next(lines).lstrip()
while line:
yield line
line = next(lines).lstrip()
The main program:
def main(log, output):
logdata = parse_log(log)
fieldnames = logdata[1].keys()
writer = csv.DictWriter(output, fieldnames)
writer.writeheader()
for step, stepdata in logdata.items():
writer.writerow(stepdata)
The contributed answers each had some valuable contributions, but they did not really achieve the clarity (as perceived by me) I was hoping for. I guess this was mainly due to the fact that the answers were focused very much on how to express code elegantly, but in contrast my thinking is focused on the data.
So I picked some advice from those answers, i.e.
- follow the main function convention for CLI programs
- split everything into functions
- read the file to memory at the beginning
But I also kept much of my code, which follows along very much the data as it is presented in the log file. For me, everything I take from the log file is a kind of "observation", so storing them as a list of dictionaries makes perfect sense to me and using pandas to transform all those observation in a usable shape also seems straightforward to me.
So in summary, I implemented the advice above and cleaned up my existing code a bit to make it more uniform:
import re
import pandas as pd
import os
import sys
def read_log(filename):
with open(filename,'r') as logfile:
return logfile.readlines()
def parse_steps(log):
steps_data = []
step_number = 0
for line in log:
step_definition = re.match(r'\.step (.*)', line)
if step_definition:
step_number += 1
row = { 'step': step_number }
for parameter_match in re.finditer(r'(\S+)=(\S+)', step_definition[1]):
parameter, value = parameter_match.groups()
row[parameter] = float(value)
steps_data.append(row)
return steps_data
def parse_measurements(log):
measurements_data = []
log_iterator = iter(log)
while True:
try:
line = next(log_iterator)
except StopIteration:
break
measurement_definition = re.match(r'Measurement: (\S+)', line)
if measurement_definition:
measurement_name = measurement_definition[1]
next(log_iterator) # skip one line
while True:
line = next(log_iterator)
if re.match(r'^\s*\n', line): # empty line
break
measurement_observation = re.match(r'^\s*(\S+)\s+(\S+)', line)
if measurement_observation:
step, value = measurement_observation.groups()
row = { 'step': int(step), measurement_name: float(value) }
measurements_data.append(row)
return measurements_data
def main():
logfilename = sys.argv[1]
log = read_log(logfilename)
steps = parse_steps(log)
measurements = parse_measurements(log)
csvfilename = os.path.splitext(logfilename)[0] + '.csv'
frame = pd.DataFrame(steps + measurements).set_index('step').groupby('step').first()
frame.to_csv(csvfilename)
if __name__== "__main__":
main()
```
-
\$\begingroup\$ Are you happy with this revision or would you like a review of this code? \$\endgroup\$dfhwze– dfhwze2019年10月06日 19:34:00 +00:00Commented Oct 6, 2019 at 19:34
-
\$\begingroup\$ Nice rewrite. Clear and readable. I would add two things: 1) in main,
if sys.argc != 1 or argv[1] in ('-h', '--help')
: print a basic help/usage message and 2) a docstring or comment for the two parsing routines to document the format of the lines they are parsing. \$\endgroup\$RootTwo– RootTwo2019年10月07日 02:39:34 +00:00Commented Oct 7, 2019 at 2:39 -
\$\begingroup\$ @dfhwze I am fine with it, but if you spot some major problem I might have introduced, let me know. \$\endgroup\$realtime– realtime2019年10月07日 23:02:09 +00:00Commented Oct 7, 2019 at 23:02