7
\$\begingroup\$

I wrote a parser to extract data from a log file. The file format is a bit irregular, and so is also the parser code, as it turned out. It is a clutter of different loop types, different ways to iterate, and different ways of treating strings:

import re
import pandas as pd
import os
import sys
logfilename = sys.argv[1]
csvfilename = os.path.splitext(logfilename)[0] + '.csv'
step = 0
data = []
logfile = open(logfilename,'r')
def match_step(line):
 return re.match(r'\.step (.*)', line)
# find first .step definition
for line in logfile: 
 match = match_step(line)
 if match:
 break
# iterate through all steps with parameters
while match:
 step += 1
 row = { 'step': int(step) }
 parameters = match.group(1).split()
 for p in parameters:
 [key, value] = p.split('=')
 row[key] = float(value)
 data.append(row)
 match = match_step(next(logfile))
# iterate through measurement definitions
for line in logfile: 
 match = re.match(r'Measurement: (.*)', line)
 if match: 
 name = match.group(1)
 next(logfile) # skip row with column details
 # iterate through measurement results for each step
 while True:
 measurement = next(logfile).split()
 if not measurement:
 break
 row = { 'step': int(measurement[0]), name: float(measurement[1]) }
 data.append(row)
logfile.close()
frame = pd.DataFrame(data).set_index('step').groupby('step').first()
frame.to_csv(csvfilename)

What could I do to improve code readability and consistency, especially in the parsing portion of the code?

I do not really care about performance since the process to generate the original log file already takes some minutes.

This is an example log file:

Circuit: * D:\LTSpice\loss.asc
m1:1:v_sm: Missing value, assumed 0V @ DC
Per .tran options, skipping operating point for transient analysis.
.step iload=10 vdc=36 rg=1
.step iload=20 vdc=36 rg=1
.step iload=30 vdc=36 rg=1
.step iload=40 vdc=36 rg=1
.step iload=50 vdc=36 rg=1
.step iload=60 vdc=36 rg=1
.step iload=70 vdc=36 rg=1
.step iload=80 vdc=36 rg=1
.step iload=90 vdc=36 rg=1
.step iload=100 vdc=36 rg=1
.step iload=110 vdc=36 rg=1
.step iload=120 vdc=36 rg=1
.step iload=130 vdc=36 rg=1
.step iload=140 vdc=36 rg=1
.step iload=150 vdc=36 rg=1
.step iload=160 vdc=36 rg=1
.step iload=170 vdc=36 rg=1
.step iload=180 vdc=36 rg=1
.step iload=190 vdc=36 rg=1
.step iload=200 vdc=36 rg=1
.step iload=10 vdc=40 rg=1
.step iload=20 vdc=40 rg=1
.step iload=30 vdc=40 rg=1
.step iload=40 vdc=40 rg=1
.step iload=50 vdc=40 rg=1
.step iload=60 vdc=40 rg=1
.step iload=70 vdc=40 rg=1
.step iload=80 vdc=40 rg=1
.step iload=90 vdc=40 rg=1
.step iload=100 vdc=40 rg=1
.step iload=110 vdc=40 rg=1
.step iload=120 vdc=40 rg=1
.step iload=130 vdc=40 rg=1
.step iload=140 vdc=40 rg=1
.step iload=150 vdc=40 rg=1
.step iload=160 vdc=40 rg=1
.step iload=170 vdc=40 rg=1
.step iload=180 vdc=40 rg=1
.step iload=190 vdc=40 rg=1
.step iload=200 vdc=40 rg=1
.step iload=10 vdc=44 rg=1
.step iload=20 vdc=44 rg=1
.step iload=30 vdc=44 rg=1
.step iload=40 vdc=44 rg=1
.step iload=50 vdc=44 rg=1
.step iload=60 vdc=44 rg=1
.step iload=70 vdc=44 rg=1
.step iload=80 vdc=44 rg=1
.step iload=90 vdc=44 rg=1
.step iload=100 vdc=44 rg=1
.step iload=110 vdc=44 rg=1
.step iload=120 vdc=44 rg=1
.step iload=130 vdc=44 rg=1
.step iload=140 vdc=44 rg=1
.step iload=150 vdc=44 rg=1
.step iload=160 vdc=44 rg=1
.step iload=170 vdc=44 rg=1
.step iload=180 vdc=44 rg=1
.step iload=190 vdc=44 rg=1
.step iload=200 vdc=44 rg=1
.step iload=10 vdc=48 rg=1
.step iload=20 vdc=48 rg=1
.step iload=30 vdc=48 rg=1
.step iload=40 vdc=48 rg=1
.step iload=50 vdc=48 rg=1
.step iload=60 vdc=48 rg=1
.step iload=70 vdc=48 rg=1
.step iload=80 vdc=48 rg=1
.step iload=90 vdc=48 rg=1
.step iload=100 vdc=48 rg=1
.step iload=110 vdc=48 rg=1
.step iload=120 vdc=48 rg=1
.step iload=130 vdc=48 rg=1
.step iload=140 vdc=48 rg=1
.step iload=150 vdc=48 rg=1
.step iload=160 vdc=48 rg=1
.step iload=170 vdc=48 rg=1
.step iload=180 vdc=48 rg=1
.step iload=190 vdc=48 rg=1
.step iload=200 vdc=48 rg=1
.step iload=10 vdc=52 rg=1
.step iload=20 vdc=52 rg=1
.step iload=30 vdc=52 rg=1
.step iload=40 vdc=52 rg=1
.step iload=50 vdc=52 rg=1
.step iload=60 vdc=52 rg=1
.step iload=70 vdc=52 rg=1
.step iload=80 vdc=52 rg=1
.step iload=90 vdc=52 rg=1
.step iload=100 vdc=52 rg=1
.step iload=110 vdc=52 rg=1
.step iload=120 vdc=52 rg=1
.step iload=130 vdc=52 rg=1
.step iload=140 vdc=52 rg=1
.step iload=150 vdc=52 rg=1
.step iload=160 vdc=52 rg=1
.step iload=170 vdc=52 rg=1
.step iload=180 vdc=52 rg=1
.step iload=190 vdc=52 rg=1
.step iload=200 vdc=52 rg=1
.step iload=10 vdc=56 rg=1
.step iload=20 vdc=56 rg=1
.step iload=30 vdc=56 rg=1
.step iload=40 vdc=56 rg=1
.step iload=50 vdc=56 rg=1
.step iload=60 vdc=56 rg=1
.step iload=70 vdc=56 rg=1
.step iload=80 vdc=56 rg=1
.step iload=90 vdc=56 rg=1
.step iload=100 vdc=56 rg=1
.step iload=110 vdc=56 rg=1
.step iload=120 vdc=56 rg=1
.step iload=130 vdc=56 rg=1
.step iload=140 vdc=56 rg=1
.step iload=150 vdc=56 rg=1
.step iload=160 vdc=56 rg=1
.step iload=170 vdc=56 rg=1
.step iload=180 vdc=56 rg=1
.step iload=190 vdc=56 rg=1
.step iload=200 vdc=56 rg=1
.step iload=10 vdc=36 rg=2
.step iload=20 vdc=36 rg=2
.step iload=30 vdc=36 rg=2
.step iload=40 vdc=36 rg=2
.step iload=50 vdc=36 rg=2
.step iload=60 vdc=36 rg=2
.step iload=70 vdc=36 rg=2
.step iload=80 vdc=36 rg=2
.step iload=90 vdc=36 rg=2
.step iload=100 vdc=36 rg=2
.step iload=110 vdc=36 rg=2
.step iload=120 vdc=36 rg=2
.step iload=130 vdc=36 rg=2
.step iload=140 vdc=36 rg=2
.step iload=150 vdc=36 rg=2
.step iload=160 vdc=36 rg=2
.step iload=170 vdc=36 rg=2
.step iload=180 vdc=36 rg=2
.step iload=190 vdc=36 rg=2
.step iload=200 vdc=36 rg=2
.step iload=10 vdc=40 rg=2
.step iload=20 vdc=40 rg=2
.step iload=30 vdc=40 rg=2
.step iload=40 vdc=40 rg=2
.step iload=50 vdc=40 rg=2
.step iload=60 vdc=40 rg=2
.step iload=70 vdc=40 rg=2
.step iload=80 vdc=40 rg=2
.step iload=90 vdc=40 rg=2
.step iload=100 vdc=40 rg=2
.step iload=110 vdc=40 rg=2
.step iload=120 vdc=40 rg=2
.step iload=130 vdc=40 rg=2
.step iload=140 vdc=40 rg=2
.step iload=150 vdc=40 rg=2
.step iload=160 vdc=40 rg=2
.step iload=170 vdc=40 rg=2
.step iload=180 vdc=40 rg=2
.step iload=190 vdc=40 rg=2
.step iload=200 vdc=40 rg=2
.step iload=10 vdc=44 rg=2
.step iload=20 vdc=44 rg=2
.step iload=30 vdc=44 rg=2
.step iload=40 vdc=44 rg=2
.step iload=50 vdc=44 rg=2
.step iload=60 vdc=44 rg=2
.step iload=70 vdc=44 rg=2
.step iload=80 vdc=44 rg=2
.step iload=90 vdc=44 rg=2
.step iload=100 vdc=44 rg=2
.step iload=110 vdc=44 rg=2
.step iload=120 vdc=44 rg=2
.step iload=130 vdc=44 rg=2
.step iload=140 vdc=44 rg=2
.step iload=150 vdc=44 rg=2
.step iload=160 vdc=44 rg=2
.step iload=170 vdc=44 rg=2
.step iload=180 vdc=44 rg=2
.step iload=190 vdc=44 rg=2
.step iload=200 vdc=44 rg=2
.step iload=10 vdc=48 rg=2
.step iload=20 vdc=48 rg=2
.step iload=30 vdc=48 rg=2
.step iload=40 vdc=48 rg=2
.step iload=50 vdc=48 rg=2
.step iload=60 vdc=48 rg=2
.step iload=70 vdc=48 rg=2
.step iload=80 vdc=48 rg=2
.step iload=90 vdc=48 rg=2
.step iload=100 vdc=48 rg=2
.step iload=110 vdc=48 rg=2
.step iload=120 vdc=48 rg=2
.step iload=130 vdc=48 rg=2
.step iload=140 vdc=48 rg=2
.step iload=150 vdc=48 rg=2
.step iload=160 vdc=48 rg=2
.step iload=170 vdc=48 rg=2
.step iload=180 vdc=48 rg=2
.step iload=190 vdc=48 rg=2
.step iload=200 vdc=48 rg=2
.step iload=10 vdc=52 rg=2
.step iload=20 vdc=52 rg=2
.step iload=30 vdc=52 rg=2
.step iload=40 vdc=52 rg=2
.step iload=50 vdc=52 rg=2
.step iload=60 vdc=52 rg=2
.step iload=70 vdc=52 rg=2
.step iload=80 vdc=52 rg=2
.step iload=90 vdc=52 rg=2
.step iload=100 vdc=52 rg=2
.step iload=110 vdc=52 rg=2
.step iload=120 vdc=52 rg=2
.step iload=130 vdc=52 rg=2
.step iload=140 vdc=52 rg=2
.step iload=150 vdc=52 rg=2
.step iload=160 vdc=52 rg=2
.step iload=170 vdc=52 rg=2
.step iload=180 vdc=52 rg=2
.step iload=190 vdc=52 rg=2
.step iload=200 vdc=52 rg=2
.step iload=10 vdc=56 rg=2
.step iload=20 vdc=56 rg=2
.step iload=30 vdc=56 rg=2
.step iload=40 vdc=56 rg=2
.step iload=50 vdc=56 rg=2
.step iload=60 vdc=56 rg=2
.step iload=70 vdc=56 rg=2
.step iload=80 vdc=56 rg=2
.step iload=90 vdc=56 rg=2
.step iload=100 vdc=56 rg=2
.step iload=110 vdc=56 rg=2
.step iload=120 vdc=56 rg=2
.step iload=130 vdc=56 rg=2
.step iload=140 vdc=56 rg=2
.step iload=150 vdc=56 rg=2
.step iload=160 vdc=56 rg=2
.step iload=170 vdc=56 rg=2
.step iload=180 vdc=56 rg=2
.step iload=190 vdc=56 rg=2
.step iload=200 vdc=56 rg=2
Measurement: eon
 step INTEG(v(drain)*ix(m1:d)) FROM TO
 1 1.82588e-006 1e-005 1.02e-005
 2 4.17134e-006 1e-005 1.02e-005
 3 7.00321e-006 1e-005 1.02e-005
 4 1.03301e-005 1e-005 1.02e-005
 5 1.41369e-005 1e-005 1.02e-005
 6 1.84238e-005 1e-005 1.02e-005
 7 2.32117e-005 1e-005 1.02e-005
 8 2.84883e-005 1e-005 1.02e-005
 9 3.42491e-005 1e-005 1.02e-005
 10 4.04575e-005 1e-005 1.02e-005
 11 4.71822e-005 1e-005 1.02e-005
 12 5.43457e-005 1e-005 1.02e-005
 13 6.20163e-005 1e-005 1.02e-005
 14 7.01661e-005 1e-005 1.02e-005
 15 7.87419e-005 1e-005 1.02e-005
 16 8.7822e-005 1e-005 1.02e-005
 17 9.73496e-005 1e-005 1.02e-005
 18 0.000107334 1e-005 1.02e-005
 19 0.00011775 1e-005 1.02e-005
 20 0.000128672 1e-005 1.02e-005
 21 2.01474e-006 1e-005 1.02e-005
 22 4.58268e-006 1e-005 1.02e-005
 23 7.67373e-006 1e-005 1.02e-005
 24 1.13006e-005 1e-005 1.02e-005
 25 1.54471e-005 1e-005 1.02e-005
 26 2.01132e-005 1e-005 1.02e-005
 27 2.53309e-005 1e-005 1.02e-005
 28 3.10536e-005 1e-005 1.02e-005
 29 3.73023e-005 1e-005 1.02e-005
 30 4.41231e-005 1e-005 1.02e-005
 31 5.14188e-005 1e-005 1.02e-005
 32 5.92874e-005 1e-005 1.02e-005
 33 6.75954e-005 1e-005 1.02e-005
 34 7.64433e-005 1e-005 1.02e-005
 35 8.58157e-005 1e-005 1.02e-005
 36 9.57088e-005 1e-005 1.02e-005
 37 0.000106059 1e-005 1.02e-005
 38 0.000116983 1e-005 1.02e-005
 39 0.000128391 1e-005 1.02e-005
 40 0.000140257 1e-005 1.02e-005
 41 2.19203e-006 1e-005 1.02e-005
 42 4.97675e-006 1e-005 1.02e-005
 43 8.3144e-006 1e-005 1.02e-005
 44 1.22347e-005 1e-005 1.02e-005
 45 1.66951e-005 1e-005 1.02e-005
 46 2.17464e-005 1e-005 1.02e-005
 47 2.73532e-005 1e-005 1.02e-005
 48 3.35368e-005 1e-005 1.02e-005
 49 4.02896e-005 1e-005 1.02e-005
 50 4.75927e-005 1e-005 1.02e-005
 51 5.55096e-005 1e-005 1.02e-005
 52 6.39629e-005 1e-005 1.02e-005
 53 7.29618e-005 1e-005 1.02e-005
 54 8.25448e-005 1e-005 1.02e-005
 55 9.26558e-005 1e-005 1.02e-005
 56 0.000103305 1e-005 1.02e-005
 57 0.000114593 1e-005 1.02e-005
 58 0.000126368 1e-005 1.02e-005
 59 0.000138698 1e-005 1.02e-005
 60 0.000151569 1e-005 1.02e-005
 61 2.36219e-006 1e-005 1.02e-005
 62 5.35277e-006 1e-005 1.02e-005
 63 8.93444e-006 1e-005 1.02e-005
 64 1.31182e-005 1e-005 1.02e-005
 65 1.79136e-005 1e-005 1.02e-005
 66 2.3314e-005 1e-005 1.02e-005
 67 2.9333e-005 1e-005 1.02e-005
 68 3.59266e-005 1e-005 1.02e-005
 69 4.31761e-005 1e-005 1.02e-005
 70 5.09889e-005 1e-005 1.02e-005
 71 5.94634e-005 1e-005 1.02e-005
 72 6.84967e-005 1e-005 1.02e-005
 73 7.8198e-005 1e-005 1.02e-005
 74 8.84392e-005 1e-005 1.02e-005
 75 9.93694e-005 1e-005 1.02e-005
 76 0.000110811 1e-005 1.02e-005
 77 0.00012289 1e-005 1.02e-005
 78 0.000135515 1e-005 1.02e-005
 79 0.000148799 1e-005 1.02e-005
 80 0.000162664 1e-005 1.02e-005
 81 2.53127e-006 1e-005 1.02e-005
 82 5.72998e-006 1e-005 1.02e-005
 83 9.54195e-006 1e-005 1.02e-005
 84 1.40013e-005 1e-005 1.02e-005
 85 1.91076e-005 1e-005 1.02e-005
 86 2.48313e-005 1e-005 1.02e-005
 87 3.12246e-005 1e-005 1.02e-005
 88 3.82451e-005 1e-005 1.02e-005
 89 4.59383e-005 1e-005 1.02e-005
 90 5.43192e-005 1e-005 1.02e-005
 91 6.32779e-005 1e-005 1.02e-005
 92 7.29517e-005 1e-005 1.02e-005
 93 8.32275e-005 1e-005 1.02e-005
 94 9.42133e-005 1e-005 1.02e-005
 95 0.000105792 1e-005 1.02e-005
 96 0.000118051 1e-005 1.02e-005
 97 0.00013091 1e-005 1.02e-005
 98 0.000144435 1e-005 1.02e-005
 99 0.000158566 1e-005 1.02e-005
 100 0.000173324 1e-005 1.02e-005
 101 2.70084e-006 1e-005 1.02e-005
 102 6.08804e-006 1e-005 1.02e-005
 103 1.01309e-005 1e-005 1.02e-005
 104 1.48588e-005 1e-005 1.02e-005
 105 2.02551e-005 1e-005 1.02e-005
 106 2.63259e-005 1e-005 1.02e-005
 107 3.31072e-005 1e-005 1.02e-005
 108 4.05473e-005 1e-005 1.02e-005
 109 4.87006e-005 1e-005 1.02e-005
 110 5.75245e-005 1e-005 1.02e-005
 111 6.70787e-005 1e-005 1.02e-005
 112 7.7313e-005 1e-005 1.02e-005
 113 8.82438e-005 1e-005 1.02e-005
 114 9.98267e-005 1e-005 1.02e-005
 115 0.000112141 1e-005 1.02e-005
 116 0.000125152 1e-005 1.02e-005
 117 0.000138777 1e-005 1.02e-005
 118 0.000153199 1e-005 1.02e-005
 119 0.000168253 1e-005 1.02e-005
 120 0.000183974 1e-005 1.02e-005
 121 1.82588e-006 1e-005 1.02e-005
 122 4.17134e-006 1e-005 1.02e-005
 123 7.00321e-006 1e-005 1.02e-005
 124 1.03301e-005 1e-005 1.02e-005
 125 1.41369e-005 1e-005 1.02e-005
 126 1.84238e-005 1e-005 1.02e-005
 127 2.32117e-005 1e-005 1.02e-005
 128 2.84883e-005 1e-005 1.02e-005
 129 3.42491e-005 1e-005 1.02e-005
 130 4.04575e-005 1e-005 1.02e-005
 131 4.71822e-005 1e-005 1.02e-005
 132 5.43457e-005 1e-005 1.02e-005
 133 6.20163e-005 1e-005 1.02e-005
 134 7.01661e-005 1e-005 1.02e-005
 135 7.87419e-005 1e-005 1.02e-005
 136 8.7822e-005 1e-005 1.02e-005
 137 9.73496e-005 1e-005 1.02e-005
 138 0.000107334 1e-005 1.02e-005
 139 0.00011775 1e-005 1.02e-005
 140 0.000128672 1e-005 1.02e-005
 141 2.01474e-006 1e-005 1.02e-005
 142 4.58268e-006 1e-005 1.02e-005
 143 7.67373e-006 1e-005 1.02e-005
 144 1.13006e-005 1e-005 1.02e-005
 145 1.54471e-005 1e-005 1.02e-005
 146 2.01132e-005 1e-005 1.02e-005
 147 2.53309e-005 1e-005 1.02e-005
 148 3.10536e-005 1e-005 1.02e-005
 149 3.73023e-005 1e-005 1.02e-005
 150 4.41231e-005 1e-005 1.02e-005
 151 5.14188e-005 1e-005 1.02e-005
 152 5.92874e-005 1e-005 1.02e-005
 153 6.75954e-005 1e-005 1.02e-005
 154 7.64433e-005 1e-005 1.02e-005
 155 8.58157e-005 1e-005 1.02e-005
 156 9.57088e-005 1e-005 1.02e-005
 157 0.000106059 1e-005 1.02e-005
 158 0.000116983 1e-005 1.02e-005
 159 0.000128391 1e-005 1.02e-005
 160 0.000140257 1e-005 1.02e-005
 161 2.19203e-006 1e-005 1.02e-005
 162 4.97675e-006 1e-005 1.02e-005
 163 8.3144e-006 1e-005 1.02e-005
 164 1.22347e-005 1e-005 1.02e-005
 165 1.66951e-005 1e-005 1.02e-005
 166 2.17464e-005 1e-005 1.02e-005
 167 2.73532e-005 1e-005 1.02e-005
 168 3.35368e-005 1e-005 1.02e-005
 169 4.02896e-005 1e-005 1.02e-005
 170 4.75927e-005 1e-005 1.02e-005
 171 5.55096e-005 1e-005 1.02e-005
 172 6.39629e-005 1e-005 1.02e-005
 173 7.29618e-005 1e-005 1.02e-005
 174 8.25448e-005 1e-005 1.02e-005
 175 9.26558e-005 1e-005 1.02e-005
 176 0.000103305 1e-005 1.02e-005
 177 0.000114593 1e-005 1.02e-005
 178 0.000126368 1e-005 1.02e-005
 179 0.000138698 1e-005 1.02e-005
 180 0.000151569 1e-005 1.02e-005
 181 2.36219e-006 1e-005 1.02e-005
 182 5.35277e-006 1e-005 1.02e-005
 183 8.93444e-006 1e-005 1.02e-005
 184 1.31182e-005 1e-005 1.02e-005
 185 1.79136e-005 1e-005 1.02e-005
 186 2.3314e-005 1e-005 1.02e-005
 187 2.9333e-005 1e-005 1.02e-005
 188 3.59266e-005 1e-005 1.02e-005
 189 4.31761e-005 1e-005 1.02e-005
 190 5.09889e-005 1e-005 1.02e-005
 191 5.94634e-005 1e-005 1.02e-005
 192 6.84967e-005 1e-005 1.02e-005
 193 7.8198e-005 1e-005 1.02e-005
 194 8.84392e-005 1e-005 1.02e-005
 195 9.93694e-005 1e-005 1.02e-005
 196 0.000110811 1e-005 1.02e-005
 197 0.00012289 1e-005 1.02e-005
 198 0.000135515 1e-005 1.02e-005
 199 0.000148799 1e-005 1.02e-005
 200 0.000162664 1e-005 1.02e-005
 201 2.53127e-006 1e-005 1.02e-005
 202 5.72998e-006 1e-005 1.02e-005
 203 9.54195e-006 1e-005 1.02e-005
 204 1.40013e-005 1e-005 1.02e-005
 205 1.91076e-005 1e-005 1.02e-005
 206 2.48313e-005 1e-005 1.02e-005
 207 3.12246e-005 1e-005 1.02e-005
 208 3.82451e-005 1e-005 1.02e-005
 209 4.59383e-005 1e-005 1.02e-005
 210 5.43192e-005 1e-005 1.02e-005
 211 6.32779e-005 1e-005 1.02e-005
 212 7.29517e-005 1e-005 1.02e-005
 213 8.32275e-005 1e-005 1.02e-005
 214 9.42133e-005 1e-005 1.02e-005
 215 0.000105792 1e-005 1.02e-005
 216 0.000118051 1e-005 1.02e-005
 217 0.00013091 1e-005 1.02e-005
 218 0.000144435 1e-005 1.02e-005
 219 0.000158566 1e-005 1.02e-005
 220 0.000173324 1e-005 1.02e-005
 221 2.70084e-006 1e-005 1.02e-005
 222 6.08804e-006 1e-005 1.02e-005
 223 1.01309e-005 1e-005 1.02e-005
 224 1.48588e-005 1e-005 1.02e-005
 225 2.02551e-005 1e-005 1.02e-005
 226 2.63259e-005 1e-005 1.02e-005
 227 3.31072e-005 1e-005 1.02e-005
 228 4.05473e-005 1e-005 1.02e-005
 229 4.87006e-005 1e-005 1.02e-005
 230 5.75245e-005 1e-005 1.02e-005
 231 6.70787e-005 1e-005 1.02e-005
 232 7.7313e-005 1e-005 1.02e-005
 233 8.82438e-005 1e-005 1.02e-005
 234 9.98267e-005 1e-005 1.02e-005
 235 0.000112141 1e-005 1.02e-005
 236 0.000125152 1e-005 1.02e-005
 237 0.000138777 1e-005 1.02e-005
 238 0.000153199 1e-005 1.02e-005
 239 0.000168253 1e-005 1.02e-005
 240 0.000183974 1e-005 1.02e-005
Measurement: eoff
 step INTEG(v(drain)*ix(m1:d)) FROM TO
 1 4.23893e-006 2e-005 2.02e-005
 2 6.95585e-006 2e-005 2.02e-005
 3 1.0193e-005 2e-005 2.02e-005
 4 1.3824e-005 2e-005 2.02e-005
 5 1.78051e-005 2e-005 2.02e-005
 6 2.21101e-005 2e-005 2.02e-005
 7 2.67549e-005 2e-005 2.02e-005
 8 3.17371e-005 2e-005 2.02e-005
 9 3.70578e-005 2e-005 2.02e-005
 10 4.277e-005 2e-005 2.02e-005
 11 4.87986e-005 2e-005 2.02e-005
 12 5.51665e-005 2e-005 2.02e-005
 13 6.19453e-005 2e-005 2.02e-005
 14 6.915e-005 2e-005 2.02e-005
 15 7.66868e-005 2e-005 2.02e-005
 16 8.46352e-005 2e-005 2.02e-005
 17 9.29818e-005 2e-005 2.02e-005
 18 0.000101657 2e-005 2.02e-005
 19 0.000110735 2e-005 2.02e-005
 20 0.000120178 2e-005 2.02e-005
 21 4.69635e-006 2e-005 2.02e-005
 22 7.69407e-006 2e-005 2.02e-005
 23 1.12591e-005 2e-005 2.02e-005
 24 1.52438e-005 2e-005 2.02e-005
 25 1.9606e-005 2e-005 2.02e-005
 26 2.43249e-005 2e-005 2.02e-005
 27 2.94065e-005 2e-005 2.02e-005
 28 3.48604e-005 2e-005 2.02e-005
 29 4.06825e-005 2e-005 2.02e-005
 30 4.69548e-005 2e-005 2.02e-005
 31 5.35431e-005 2e-005 2.02e-005
 32 6.06112e-005 2e-005 2.02e-005
 33 6.81013e-005 2e-005 2.02e-005
 34 7.60042e-005 2e-005 2.02e-005
 35 8.43295e-005 2e-005 2.02e-005
 36 9.31359e-005 2e-005 2.02e-005
 37 0.00010231 2e-005 2.02e-005
 38 0.000111976 2e-005 2.02e-005
 39 0.000121963 2e-005 2.02e-005
 40 0.000132423 2e-005 2.02e-005
 41 5.1599e-006 2e-005 2.02e-005
 42 8.41701e-006 2e-005 2.02e-005
 43 1.22973e-005 2e-005 2.02e-005
 44 1.66192e-005 2e-005 2.02e-005
 45 2.13463e-005 2e-005 2.02e-005
 46 2.64597e-005 2e-005 2.02e-005
 47 3.19739e-005 2e-005 2.02e-005
 48 3.78825e-005 2e-005 2.02e-005
 49 4.41972e-005 2e-005 2.02e-005
 50 5.09872e-005 2e-005 2.02e-005
 51 5.82033e-005 2e-005 2.02e-005
 52 6.59461e-005 2e-005 2.02e-005
 53 7.406e-005 2e-005 2.02e-005
 54 8.27488e-005 2e-005 2.02e-005
 55 9.18643e-005 2e-005 2.02e-005
 56 0.000101476 2e-005 2.02e-005
 57 0.000111573 2e-005 2.02e-005
 58 0.000122229 2e-005 2.02e-005
 59 0.000133124 2e-005 2.02e-005
 60 0.000144596 2e-005 2.02e-005
 61 5.63236e-006 2e-005 2.02e-005
 62 9.13522e-006 2e-005 2.02e-005
 63 1.33072e-005 2e-005 2.02e-005
 64 1.79548e-005 2e-005 2.02e-005
 65 2.30477e-005 2e-005 2.02e-005
 66 2.85415e-005 2e-005 2.02e-005
 67 3.44904e-005 2e-005 2.02e-005
 68 4.0866e-005 2e-005 2.02e-005
 69 4.7659e-005 2e-005 2.02e-005
 70 5.50591e-005 2e-005 2.02e-005
 71 6.27947e-005 2e-005 2.02e-005
 72 7.11432e-005 2e-005 2.02e-005
 73 8.00001e-005 2e-005 2.02e-005
 74 8.94251e-005 2e-005 2.02e-005
 75 9.93542e-005 2e-005 2.02e-005
 76 0.000109774 2e-005 2.02e-005
 77 0.00012074 2e-005 2.02e-005
 78 0.000132208 2e-005 2.02e-005
 79 0.000144181 2e-005 2.02e-005
 80 0.00015664 2e-005 2.02e-005
 81 6.10254e-006 2e-005 2.02e-005
 82 9.83685e-006 2e-005 2.02e-005
 83 1.42959e-005 2e-005 2.02e-005
 84 1.92432e-005 2e-005 2.02e-005
 85 2.4707e-005 2e-005 2.02e-005
 86 3.05967e-005 2e-005 2.02e-005
 87 3.69216e-005 2e-005 2.02e-005
 88 4.37559e-005 2e-005 2.02e-005
 89 5.11195e-005 2e-005 2.02e-005
 90 5.89263e-005 2e-005 2.02e-005
 91 6.734e-005 2e-005 2.02e-005
 92 7.63461e-005 2e-005 2.02e-005
 93 8.58748e-005 2e-005 2.02e-005
 94 9.60459e-005 2e-005 2.02e-005
 95 0.000106726 2e-005 2.02e-005
 96 0.00011802 2e-005 2.02e-005
 97 0.000129852 2e-005 2.02e-005
 98 0.000142253 2e-005 2.02e-005
 99 0.000155198 2e-005 2.02e-005
 100 0.000168663 2e-005 2.02e-005
 101 6.5849e-006 2e-005 2.02e-005
 102 1.05374e-005 2e-005 2.02e-005
 103 1.52673e-005 2e-005 2.02e-005
 104 2.05492e-005 2e-005 2.02e-005
 105 2.63589e-005 2e-005 2.02e-005
 106 3.26249e-005 2e-005 2.02e-005
 107 3.93226e-005 2e-005 2.02e-005
 108 4.66382e-005 2e-005 2.02e-005
 109 5.44663e-005 2e-005 2.02e-005
 110 6.28883e-005 2e-005 2.02e-005
 111 7.18675e-005 2e-005 2.02e-005
 112 8.15138e-005 2e-005 2.02e-005
 113 9.17877e-005 2e-005 2.02e-005
 114 0.000102613 2e-005 2.02e-005
 115 0.000114123 2e-005 2.02e-005
 116 0.000126246 2e-005 2.02e-005
 117 0.000138925 2e-005 2.02e-005
 118 0.000152236 2e-005 2.02e-005
 119 0.000166172 2e-005 2.02e-005
 120 0.000180598 2e-005 2.02e-005
 121 4.23893e-006 2e-005 2.02e-005
 122 6.95585e-006 2e-005 2.02e-005
 123 1.0193e-005 2e-005 2.02e-005
 124 1.3824e-005 2e-005 2.02e-005
 125 1.78051e-005 2e-005 2.02e-005
 126 2.21101e-005 2e-005 2.02e-005
 127 2.67549e-005 2e-005 2.02e-005
 128 3.17371e-005 2e-005 2.02e-005
 129 3.70578e-005 2e-005 2.02e-005
 130 4.277e-005 2e-005 2.02e-005
 131 4.87986e-005 2e-005 2.02e-005
 132 5.51665e-005 2e-005 2.02e-005
 133 6.19453e-005 2e-005 2.02e-005
 134 6.915e-005 2e-005 2.02e-005
 135 7.66868e-005 2e-005 2.02e-005
 136 8.46352e-005 2e-005 2.02e-005
 137 9.29818e-005 2e-005 2.02e-005
 138 0.000101657 2e-005 2.02e-005
 139 0.000110735 2e-005 2.02e-005
 140 0.000120178 2e-005 2.02e-005
 141 4.69635e-006 2e-005 2.02e-005
 142 7.69407e-006 2e-005 2.02e-005
 143 1.12591e-005 2e-005 2.02e-005
 144 1.52438e-005 2e-005 2.02e-005
 145 1.9606e-005 2e-005 2.02e-005
 146 2.43249e-005 2e-005 2.02e-005
 147 2.94065e-005 2e-005 2.02e-005
 148 3.48604e-005 2e-005 2.02e-005
 149 4.06825e-005 2e-005 2.02e-005
 150 4.69548e-005 2e-005 2.02e-005
 151 5.35431e-005 2e-005 2.02e-005
 152 6.06112e-005 2e-005 2.02e-005
 153 6.81013e-005 2e-005 2.02e-005
 154 7.60042e-005 2e-005 2.02e-005
 155 8.43295e-005 2e-005 2.02e-005
 156 9.31359e-005 2e-005 2.02e-005
 157 0.00010231 2e-005 2.02e-005
 158 0.000111976 2e-005 2.02e-005
 159 0.000121963 2e-005 2.02e-005
 160 0.000132423 2e-005 2.02e-005
 161 5.1599e-006 2e-005 2.02e-005
 162 8.41701e-006 2e-005 2.02e-005
 163 1.22973e-005 2e-005 2.02e-005
 164 1.66192e-005 2e-005 2.02e-005
 165 2.13463e-005 2e-005 2.02e-005
 166 2.64597e-005 2e-005 2.02e-005
 167 3.19739e-005 2e-005 2.02e-005
 168 3.78825e-005 2e-005 2.02e-005
 169 4.41972e-005 2e-005 2.02e-005
 170 5.09872e-005 2e-005 2.02e-005
 171 5.82033e-005 2e-005 2.02e-005
 172 6.59461e-005 2e-005 2.02e-005
 173 7.406e-005 2e-005 2.02e-005
 174 8.27488e-005 2e-005 2.02e-005
 175 9.18643e-005 2e-005 2.02e-005
 176 0.000101476 2e-005 2.02e-005
 177 0.000111573 2e-005 2.02e-005
 178 0.000122229 2e-005 2.02e-005
 179 0.000133124 2e-005 2.02e-005
 180 0.000144596 2e-005 2.02e-005
 181 5.63236e-006 2e-005 2.02e-005
 182 9.13522e-006 2e-005 2.02e-005
 183 1.33072e-005 2e-005 2.02e-005
 184 1.79548e-005 2e-005 2.02e-005
 185 2.30477e-005 2e-005 2.02e-005
 186 2.85415e-005 2e-005 2.02e-005
 187 3.44904e-005 2e-005 2.02e-005
 188 4.0866e-005 2e-005 2.02e-005
 189 4.7659e-005 2e-005 2.02e-005
 190 5.50591e-005 2e-005 2.02e-005
 191 6.27947e-005 2e-005 2.02e-005
 192 7.11432e-005 2e-005 2.02e-005
 193 8.00001e-005 2e-005 2.02e-005
 194 8.94251e-005 2e-005 2.02e-005
 195 9.93542e-005 2e-005 2.02e-005
 196 0.000109774 2e-005 2.02e-005
 197 0.00012074 2e-005 2.02e-005
 198 0.000132208 2e-005 2.02e-005
 199 0.000144181 2e-005 2.02e-005
 200 0.00015664 2e-005 2.02e-005
 201 6.10254e-006 2e-005 2.02e-005
 202 9.83685e-006 2e-005 2.02e-005
 203 1.42959e-005 2e-005 2.02e-005
 204 1.92432e-005 2e-005 2.02e-005
 205 2.4707e-005 2e-005 2.02e-005
 206 3.05967e-005 2e-005 2.02e-005
 207 3.69216e-005 2e-005 2.02e-005
 208 4.37559e-005 2e-005 2.02e-005
 209 5.11195e-005 2e-005 2.02e-005
 210 5.89263e-005 2e-005 2.02e-005
 211 6.734e-005 2e-005 2.02e-005
 212 7.63461e-005 2e-005 2.02e-005
 213 8.58748e-005 2e-005 2.02e-005
 214 9.60459e-005 2e-005 2.02e-005
 215 0.000106726 2e-005 2.02e-005
 216 0.00011802 2e-005 2.02e-005
 217 0.000129852 2e-005 2.02e-005
 218 0.000142253 2e-005 2.02e-005
 219 0.000155198 2e-005 2.02e-005
 220 0.000168663 2e-005 2.02e-005
 221 6.5849e-006 2e-005 2.02e-005
 222 1.05374e-005 2e-005 2.02e-005
 223 1.52673e-005 2e-005 2.02e-005
 224 2.05492e-005 2e-005 2.02e-005
 225 2.63589e-005 2e-005 2.02e-005
 226 3.26249e-005 2e-005 2.02e-005
 227 3.93226e-005 2e-005 2.02e-005
 228 4.66382e-005 2e-005 2.02e-005
 229 5.44663e-005 2e-005 2.02e-005
 230 6.28883e-005 2e-005 2.02e-005
 231 7.18675e-005 2e-005 2.02e-005
 232 8.15138e-005 2e-005 2.02e-005
 233 9.17877e-005 2e-005 2.02e-005
 234 0.000102613 2e-005 2.02e-005
 235 0.000114123 2e-005 2.02e-005
 236 0.000126246 2e-005 2.02e-005
 237 0.000138925 2e-005 2.02e-005
 238 0.000152236 2e-005 2.02e-005
 239 0.000166172 2e-005 2.02e-005
 240 0.000180598 2e-005 2.02e-005
Date: Sun Sep 22 18:48:00 2019
Total elapsed time: 186.382 seconds.
tnom = 27
temp = 27
method = modified trap
totiter = 4603
traniter = 4603
tranpoints = 1964
accept = 1663
rejected = 301
matrix size = 43
fillins = 99
solver = Normal
Matrix Compiler1: 9.73 KB object code size 5.9/3.3/[0.7]
Matrix Compiler2: 5.38 KB object code size 1.4/3.0/[0.5]

The example log file can also be found here, as well as the code above, both on my GitHub.

Please note: I am not a professional programmer, I only write code to support my main activity which is electronics engineering.

asked Sep 23, 2019 at 18:41
\$\endgroup\$
1
  • 6
    \$\begingroup\$ To whoever is voting to close this question: don't forget to leave a comment. \$\endgroup\$ Commented Sep 23, 2019 at 18:46

4 Answers 4

3
\$\begingroup\$

I started reviewing this last night, and got way off track from what you need. You've been clear that this is hobbyist code that already works; the only reason you're hear asking how to improve it is (I guess) so it'll be easier for you to resume work on a year from now.

First, some responses to your actual code:

  • Breaking your code up into functions is often good, even if the function is only used once. This can make things more verbose, but it's worth it if it helps us understand the flow of the code, or helps us consider pieces of it in isolation.
  • It's my opinion that strong type signatures are always a good idea. They clarify what you code does, and unlike a comment they can't be wrong.
  • The example log file has less than a thousand lines. For a small enough file, it will make sense to read the whole thing into a list of strings before trying to work with it. It's more performant to work with streams than lists, but realizing that performance benefit here would be hard because we have to hold all the data from the first n-1 sections in memory until we get to the last section. Until you start having performance problems, load all your data right at the beginning.
  • List comprehensions are wonderful and cover all the basic python data structures. That said, in order for them to work for this situation you'll need some helper functions.
  • You're writing this as a CLI tool. Using the main-function pattern will make the code easier to play with in other contexts.

What I wrote last night:

import re
import pandas as pd
import os
import sys
from itertools import chain, dropwhile, groupby
from typing import *
def read_lines(file_name:str) -> Iterable[str]:
 with open(file_name, 'r') as f: # This is generally preferable to closing the file manually.
 return [line.strip() for line in f]
def parse_lines(lines: Iterable[str]) -> Dict[int, Dict[str, float]]:
 runs = [
 run
 for run
 in map(lambda g: list(g[1]), groupby(lines, bool))
 # filter out the headers, footers, and empty lines:
 if any(l.startswith('.step') or l.startswith('Measurement:') for l in run)
 ]
 steps = {
 step_number: read_step(line)
 for (step_number, line)
 in enumerate(
 dropwhile(lambda l: not l.startswith('.step'),
 runs[0]),
 1)
 }
 metrics = {
 read_measurment_key(measurment_batch[0]): {
 int(step_number): float(measurement)
 for [step_number, measurement]
 in map(lambda m: m.split()[0:2], measurment_batch[2:])
 }
 for measurment_batch in runs[1:]
 }
 return {
 step_number: dict(chain(
 step.items(),
 (
 (measurment_key, measurments[step_number])
 for (measurment_key, measurments)
 in metrics.items()
 )
 ))
 for (step_number, step) in steps.items()
 }
def read_step(line: str) -> Dict[str, float]:
 match = re.match(r'\.step (.*)', line) # For performance we could compile the regex.
 return {
 key: float(value)
 for [key, value]
 in map(
 lambda parameter: parameter.split('='),
 match.group(1).split()
 )
 }
def read_measurment_key(line: str) -> str:
 match = re.match(r'Measurement: (.*)', line) # For performance we could compile the regex.
 return match.group(1)
def main():
 logfilename = 'loss.log' # Or read the argument as you did.
 csvfilename = os.path.splitext(logfilename)[0] + '.csv'
 log_file_lines = read_lines(logfilename)
 data = parse_lines(log_file_lines)
 frame = pd.DataFrame(
 dict(step = step_number, **values)
 for (step_number, values) in data.items()
 ).set_index('step')
 frame.to_csv(csvfilename)
if __name__== "__main__":
 main()

Is that better?

I don't know.

  • Is it readable to you? It sounds like you probably don't already know most of the syntax used, and there's probably only so much new stuff you'll want to learn for this project.
  • When you need to update it a year from now, will it be clear what the current runs = [...] section is doing? Will you be able to make the needed changes with minimal trial and error?
  • Does it actually work? I know it runs, and the output looks the same, but how do we know I didn't introduce some small bug such that 5% of your data is now wrong?
  • How will it handle bad data? I didn't put in any kind of error detection.

What's good about it?

  • The use of typed functions will help us be sure the code is doing what we intend it to do. I could actually have taken this further by moving more stuff out into individual functions.
  • The use of list comprehensions and other "functional programming" styles means that we're never mutating the state of a variable. This also helps us be sure everything is working as intended.
  • The use of pure functions also helps us be sure everything is working as intended.
  • Basically every "line" can be read as "build a value", and the nature of those values is clearly (?) indicated by the names of the variables we assigning to or the functions we're returning from.
answered Sep 24, 2019 at 13:46
\$\endgroup\$
2
  • \$\begingroup\$ Thank you very much for your contribution. I will see how I can benefit from it and then get back. Some remarks: You write it is "hobbyist code". This is almost certainly true regarding my skill level, but nevertheless this code was created for a professional purpose, just in a different domain. Also, looking at your code (and that of @Reinderien) I realize that there is one feature in my original code I like: It follows the sequence of the original log file. In 80% of usage I actually read the log file and do not use the parser. That makes it easy to retrace what the parser is doing. \$\endgroup\$ Commented Sep 25, 2019 at 18:36
  • \$\begingroup\$ Interesting. I guess the (structured, explicit) way to implement that way of thinking would be a "Builder" pattern: A class that sets up an empty data structure (possibly a data-frame, possibly something custom) on __init__(), and then exposes a (stateful) ingest(self, line:str) function and some kind of final-output function. But really it's not clear what would make one implementation better than another for you. \$\endgroup\$ Commented Sep 25, 2019 at 18:46
4
\$\begingroup\$

I wrote an example of what you can do to clean this up:

import re
from collections import OrderedDict
from csv import DictWriter
from os.path import splitext
from sys import argv
from typing import Iterable
def get_measure_matches(log: str) -> tuple:
 return tuple(re.finditer(r'Measurement: (.*)$', log, re.M))
def parse_main(log: str, measure_matches: tuple) -> (list, OrderedDict):
 all_cols = OrderedDict((('step', None),))
 rows = []
 main_row_re = re.compile(r'(\S+)=(\S+)')
 main_lines = log[:measure_matches[0].start()].splitlines()
 step = 1
 for line in main_lines:
 row = {}
 for match in main_row_re.finditer(line):
 k, v = match.groups()
 all_cols[k] = None
 row[k] = v
 if row:
 row['step'] = step
 rows.append(row)
 step += 1
 for m in measure_matches:
 all_cols[m[1]] = None
 return rows, all_cols
def parse_measures(log: str, measure_matches: tuple, rows: Iterable[dict]):
 measure_ends = (*(m.start() for m in measure_matches[1:]), -1)
 measure_re = re.compile(r'^\s*(\S+)\s+(\S+)', re.M)
 for measure, measure_end in zip(measure_matches, measure_ends):
 measure_name = measure[1]
 blob = log[measure.end(): measure_end]
 for match in measure_re.finditer(blob):
 try:
 step, val = match.groups()
 rows[int(step) - 1][measure_name] = float(val)
 except ValueError:
 pass
def write_csv(rows: Iterable[dict], cols: Iterable[str], csv_filename: str):
 """
 step,iload,vdc,rg,eon,eoff
 1,10.0,36.0,1.0,1.82588e-06,4.23893e-06
 2,20.0,36.0,1.0,4.17134e-06,6.95585e-06
 (etc)
 """
 with open(csv_filename, 'w') as csv_file:
 writer = DictWriter(csv_file, cols)
 writer.writeheader()
 writer.writerows(rows)
def main():
 log_filename = argv[1]
 with open(log_filename) as log_file:
 log = log_file.read()
 measure_matches = get_measure_matches(log)
 rows, all_cols = parse_main(log, measure_matches)
 parse_measures(log, measure_matches, rows)
 csv_filename = splitext(log_filename)[0] + '.csv'
 write_csv(rows, all_cols.keys(), csv_filename)
if __name__ == '__main__':
 main()

Notes:

  • There are methods
  • No need to use pandas - this runs on base Python 3
  • Better use of finditer
  • Type hints
  • No need to call group(n)
  • Implicitly close files in a context manager
answered Sep 24, 2019 at 3:11
\$\endgroup\$
3
\$\begingroup\$

The log file looks like it is made up of bunch of sections separated by blank lines. So, an approach is to code functions to parse each section. The main driver can scan the log file to find the sections and then pass the parsing off to the parsing functions. The parsing functions have a common interface, to make it easy to add new parsing functions.

The code below is mostly comments and doc strings.

This is the driver. It iterates over the lines in the log file, trying to identify what kind of section it's reading. For example, if a line starts with '.step' it is in the 'step' section, etc.

def parse_log(lines):
 """
 Parses a log file in which sections are delimited by blank lines.
 Input 'lines' is an iterable over the lines of the log file.
 Output is a defaultdict of dictionaries. The defaultdict is keyed by
 step number. The dict for each step is a dict of the parameters and 
 measurements associated with that step:
 defaultdict(dict,
 {1: {'step': 1, 'iload': '10', 'vdc': '36', 'rg': '1',
 'eon': '1.82588e-006', 'eoff': '4.23893e-006'},
 2: {'step': 2, 'iload': '20', 'vdc': '36', 'rg': '1',
 'eon': '4.17134e-006', 'eoff': '6.95585e-006'},
 3: {'step': 3, 'iload': '30', 'vdc': '36', 'rg': '1',
 'eon': '7.00321e-006', 'eoff': '1.0193e-005'},
 ...
 })
 """
 # Lines in the input are checked to see if the section can be identified. If a section is
 # identified a section specific parsing function is called. If a section cannot be identified
 # from a line, the line is appended to 'leadin'.
 #
 # Some sections are easier to identify after reading a few lines. For example, the
 # 'step' section contains lines that start with '.step'. 
 #
 # m1:1:v_sm: Missing value, assumed 0V @ DC
 # Per .tran options, skipping operating point for transient analysis.
 # .step iload=10 vdc=36 rg=1
 # .step iload=20 vdc=36 rg=1
 # ...
 # 
 # The list of the 'leadin' lines are passed to 'section()' so that the section specific 
 # parsing function gets all of the lines in the section (e.g., the m1:1 ..., and Per ... lines
 # in the example above). 
 #
 # Section specific parsing functions are expected to return a dictionary, keyed by step number,
 # of dictionaries containing parameter or measurement names and their values. The section
 # specific dictionaries are merged and returned.
 data = defaultdict(dict)
 leadin = []
 section_data = None
 for line in lines:
 line = line.lstrip()
 if not line:
 if leadin:
 leadin = []
 # If 'leadin' is is not empty, there was an unknown or unrecognized section.
 # For debuging, it might be useful to print (or log) the first few lines.
 #print(f"unknown section: {leadin[:5]}")
 continue
 leadin.append(line)
 if line.startswith('.step'):
 section_data = parse_step_section(section(lines, leadin))
 elif line.startswith('Measurement: '):
 section_data = parse_measurement_section(section(lines, leadin))
 if section_data:
 for step,fields in section_data.items():
 data[step].update(fields)
 leadin = []
 section_data = None
 return data

These are the section specific parsing routines:

def parse_step_section(lines):
 """
 Section specific parse function for the 'step' section of a log file.
 Input 'lines' is an iterable (e.g., a file, list, etc.) over lines of the section. 
 The section looks like:
 m1:1:v_sm: Missing value, assumed 0V @ DC
 Per .tran options, skipping operating point for transient analysis.
 .step iload=10 vdc=36 rg=1
 .step iload=20 vdc=36 rg=1
 .step iload=30 vdc=36 rg=1
 .step iload=40 vdc=36 rg=1
 ...
 The '.step' lines are implicitly numbered, starting at 1.
 Output is a dict of dicts. The outer dict is keyed by step. The inner dicts are 
 keyed by parameter name. Like this:
 {1: {'step': 1, 'iload': '10', 'vdc': '36', 'rg': '1'},
 2: {'step': 2, 'iload': '20', 'vdc': '36', 'rg': '1'},
 3: {'step': 3, 'iload': '30', 'vdc': '36', 'rg': '1'},
 4: {'step': 4, 'iload': '40', 'vdc': '36', 'rg': '1'},
 ...
 }
 """
 # The first two lines are skipped, because they aren't being used.
 next(lines)
 next(lines)
 pattern = re.compile(r"(\S+)=(\S+)")
 data = {}
 for step,line in enumerate(lines, 1):
 d = dict([('step',step)] + pattern.findall(line))
 data[step] = d
 return data
def parse_measurement_section(lines):
 """
 Section specific parse function for a measurement section.
 Input 'lines' is an iterable (e.g., a file, list, etc.) over lines of the section. 
 The section looks like:
 Measurement: eon
 step INTEG(v(drain)*ix(m1:d)) FROM TO
 1 1.82588e-006 1e-005 1.02e-005
 2 4.17134e-006 1e-005 1.02e-005
 3 7.00321e-006 1e-005 1.02e-005
 4 1.03301e-005 1e-005 1.02e-005
 ...
 The first line contains the name of the measurement (e.g., 'eon').
 The second line contains the header for the following table.
 The table is terminated with a blank line.
 This function only parses the first two columns and returns a dictionary like:
 {1: {'eon': '1.82588e-006'},
 2: {'eon': '4.17134e-006'},
 3: {'eon': '7.00321e-006'},
 4: {'eon': '1.03301e-005'},
 ....
 }
 """
 _, name = next(lines).strip().split()
 # skip header line
 next(lines) 
 step_meas_rest = (line.split(maxsplit=2) for line in lines)
 data = {int(step):{name:measurement} for step,measurement,_ in step_meas_rest}
 return data

This is a helper function.

def section(lines, leadin=None):
 """
 Generator that yields lines from 'leadin' then from 'lines'. Leading blank lines are skipped.
 It terminates when a trailing blank line is read.
 """
 if leadin:
 lines = it.chain(leadin, lines)
 line = next(lines).lstrip()
 while not line:
 line = next(lines).lstrip()
 while line:
 yield line
 line = next(lines).lstrip()

The main program:

def main(log, output):
 logdata = parse_log(log)
 fieldnames = logdata[1].keys()
 writer = csv.DictWriter(output, fieldnames)
 writer.writeheader()
 for step, stepdata in logdata.items():
 writer.writerow(stepdata)
answered Sep 26, 2019 at 4:04
\$\endgroup\$
2
\$\begingroup\$

The contributed answers each had some valuable contributions, but they did not really achieve the clarity (as perceived by me) I was hoping for. I guess this was mainly due to the fact that the answers were focused very much on how to express code elegantly, but in contrast my thinking is focused on the data.

So I picked some advice from those answers, i.e.

  • follow the main function convention for CLI programs
  • split everything into functions
  • read the file to memory at the beginning

But I also kept much of my code, which follows along very much the data as it is presented in the log file. For me, everything I take from the log file is a kind of "observation", so storing them as a list of dictionaries makes perfect sense to me and using pandas to transform all those observation in a usable shape also seems straightforward to me.

So in summary, I implemented the advice above and cleaned up my existing code a bit to make it more uniform:

import re
import pandas as pd
import os
import sys
def read_log(filename):
 with open(filename,'r') as logfile:
 return logfile.readlines()
def parse_steps(log):
 steps_data = []
 step_number = 0
 for line in log:
 step_definition = re.match(r'\.step (.*)', line)
 if step_definition:
 step_number += 1
 row = { 'step': step_number }
 for parameter_match in re.finditer(r'(\S+)=(\S+)', step_definition[1]):
 parameter, value = parameter_match.groups()
 row[parameter] = float(value)
 steps_data.append(row)
 return steps_data
def parse_measurements(log):
 measurements_data = []
 log_iterator = iter(log)
 while True:
 try:
 line = next(log_iterator)
 except StopIteration:
 break
 measurement_definition = re.match(r'Measurement: (\S+)', line)
 if measurement_definition:
 measurement_name = measurement_definition[1]
 next(log_iterator) # skip one line
 while True:
 line = next(log_iterator)
 if re.match(r'^\s*\n', line): # empty line
 break
 measurement_observation = re.match(r'^\s*(\S+)\s+(\S+)', line)
 if measurement_observation:
 step, value = measurement_observation.groups()
 row = { 'step': int(step), measurement_name: float(value) }
 measurements_data.append(row)
 return measurements_data
def main():
 logfilename = sys.argv[1]
 log = read_log(logfilename)
 steps = parse_steps(log)
 measurements = parse_measurements(log)
 csvfilename = os.path.splitext(logfilename)[0] + '.csv'
 frame = pd.DataFrame(steps + measurements).set_index('step').groupby('step').first()
 frame.to_csv(csvfilename)
if __name__== "__main__":
 main()
```
answered Oct 6, 2019 at 19:31
\$\endgroup\$
3
  • \$\begingroup\$ Are you happy with this revision or would you like a review of this code? \$\endgroup\$ Commented Oct 6, 2019 at 19:34
  • \$\begingroup\$ Nice rewrite. Clear and readable. I would add two things: 1) in main, if sys.argc != 1 or argv[1] in ('-h', '--help'): print a basic help/usage message and 2) a docstring or comment for the two parsing routines to document the format of the lines they are parsing. \$\endgroup\$ Commented Oct 7, 2019 at 2:39
  • \$\begingroup\$ @dfhwze I am fine with it, but if you spot some major problem I might have introduced, let me know. \$\endgroup\$ Commented Oct 7, 2019 at 23:02

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.