I have written small program in Python which reads the following file and stores the result into a dictionary. I am getting expected output but I think it can be done in a better way. I am using Python 3.
Input
check_name : xa25
not run:
del_l6w_
dl_l4w_
dl_l22w_
de_l3w_
ckt_pw_
ckt_pw_
run:
inv_w_
buf_w_
End
Code
import collections
def main():
chk_cell = "input.txt"
chk_cell_data = collections.defaultdict(dict)
with open(chk_cell, "r") as fchk_cell:
check_name = ""
not_run_flag = False
run_flag = False
for data in fchk_cell.readlines():
data = data.strip()
if not data:
continue
if data.startswith('check_name'):
data = data.split(":")
check_name = data[1].strip()
continue
elif data.startswith('End') or '-' in data:
check_name = ""
not_run_flag = False
run_flag = False
continue
elif data.startswith('not run'):
chk_cell_data[check_name]['not_run'] = []
not_run_flag = True
continue
elif data.startswith('run'):
chk_cell_data[check_name]['run'] = []
not_run_flag = False
run_flag = True
continue
if not_run_flag:
chk_cell_data[check_name]['not_run'].append(data)
elif run_flag:
chk_cell_data[check_name]['run'].append(data)
print(chk_cell_data)
if __name__ == "__main__":
main()
-
\$\begingroup\$ Can you show an example of the output? \$\endgroup\$Reinderien– Reinderien2021年02月15日 14:02:53 +00:00Commented Feb 15, 2021 at 14:02
1 Answer 1
Type strength
Unless you have a really (really) good reason to use a dict
- such as planning to immediately serialize to JSON for some network operation - dict
is a poor choice for an internal representation. Use a @dataclass
.
Parsing strictness
You should be a little more strict about your parsing; your current implementation considers
check_name_for_something_that_makes_no_sense
to be a check_name
heading, and a data entry under the run:
heading called
check_name
to be a heading instead of a data entry. Instead, just compare the whole line.
Your example data do not show any reason for your -
check to be there, so I see no reason for it to exist.
Parse state
Instead of hanging onto flags to remember parse state, you can simply assign a reference to the list currently being populated.
Explicit iteration
No need to call readlines
. Just iterate over the file object itself.
Example implementation
from dataclasses import dataclass
from typing import TextIO, List
@dataclass
class RunData:
name: str
run: List[str]
not_run: List[str]
@classmethod
def from_file(cls, f: TextIO) -> 'RunData':
run = []
not_run = []
current_list = None
name = None
for line in f:
line = line.rstrip()
if line == 'run:':
current_list = run
elif line == 'not run:':
current_list = not_run
elif line.startswith('check_name :'):
name = line.split(': ', 1)[1]
elif line == 'End':
return cls(name, run, not_run)
else:
current_list.append(line)
def main():
chk_cell = "input.txt"
with open(chk_cell, "r") as fchk_cell:
chk_cell_data = RunData.from_file(fchk_cell)
print(chk_cell_data)
if __name__ == "__main__":
main()
prints
RunData(name='xa25', run=['inv_w_', 'buf_w_'], not_run=['del_l6w_', 'dl_l4w_', 'dl_l22w_', 'de_l3w_', 'ckt_pw_', 'ckt_pw_'])