This Python script extracts data from txt
files containing raw numerical data, notably the increase average, the relative evolution and the standard deviation of the values. All of these values require a period on which they are calculated. This period is set via the command line.
#! /usr/bin/env python3
import statistics
import sys
if len(sys.argv) != 2:
print("""SYNOPSIS
./%s period
DESCRIPTION
period the number of days defining a period""" % (sys.argv[0]))
exit(1)
increases = []
period = int(sys.argv[1])
values = []
def check_length(array):
if len(array) > period:
array.pop(0)
def mean(data):
"""Return the arithmetic mean of data."""
return round(sum(data) / len(data), 2)
def calculate_g():
increase = 0
try:
increase = (values[-1] * 10000 - values[-2] * 10000) / 10000
except IndexError:
pass
if increase > 0:
increases.append(increase)
else:
increases.append(0)
if len(values) < period + 1:
print("g=nan ", end="")
else:
increases_average = mean(increases[1:])
print("g=%.2f " % (increases_average), end="")
def calculate_r():
if len(values) < period + 1:
print("r=nan% ", end="")
return "nan"
else:
evolution = round((values[period] - values[0]) * 100 / values[0])
print("r=%.0f%% " % (evolution), end="")
return (1, -1)[evolution < 0]
def _ss(data):
"""Return sum of square deviations of sequence data."""
average = mean(data)
ss = sum((x-average)**2 for x in data)
return ss
def stddev():
"""Calculates the population standard deviation by default."""
n = len(values)
if n < period:
print("s=nan", end="")
else:
ss = _ss(values[-(period):])
print("s=%.2f" % (round((ss / period) ** 0.5, 2)), end="")
def main():
line = input()
prev_sign = 0
say = ""
switch = 0
while line != "STOP":
values.append(float(line))
calculate_g()
sign = calculate_r()
stddev()
if sign != "nan":
if sign == prev_sign * -1:
say = " a switch occurs"
switch += 1
else:
say = ""
prev_sign = sign
print("%s" % (say))
check_length(values)
check_length(increases)
line = input()
print("STOP\nGlobal tendency switched %i times" % (switch))
if __name__ == "__main__":
main()
and here is sample data:
27.7
31.0
32.7
34.7
35.9
37.4
38.2
39.5
40.3
42.2
41.3
40.4
39.8
38.7
36.5
35.7
33.4
29.8
27.5
25.2
24.7
23.1
22.8
22.7
23.6
24.3
24.5
26.7
27.0
27.4
29.8
29.4
31.5
29.6
29.8
28.9
28.7
27.2
25.7
26.0
25.2
21.6
20.3
21.1
20.4
19.8
19.1
19.6
21.2
21.0
21.4
24.0
25.5
25.5
26.4
29.4
32.1
31.4
32.3
35.2
38.3
36.6
38.4
39.9
40.5
39.4
39.0
40.5
42.1
38.7
37.5
38.1
36.5
35.4
STOP
I'm pretty sure my code isn't that clean since I'm pretty new to Python and I would like to know good practices.
1 Answer 1
Header comments
Add comments at the top of your file to briefly describe the purpose of your code. Describe the inputs and output and how to run the code. Your question states:
extracts data from
txt
files
but, the code does not read a file. It takes input from the standard input (stdin).
When I run the code, I see output such as:
g=1.00 r=25% s=0.00
You should explain in the comments what g
, r
and s
mean.
It is good that the code has usage information, but you should add more information about the required period
argument and how it is used in the code.
Add input prompt instructions
The code uses input
to accept user input from stdin while running, but the user does not know what input is valid.
Input checking
If the user inputs a string instead of a number, the code dies. Consider exiting more gracefully. For example, the special string STOP
ends the run gracefully. Perhaps allow it to end when the user enters any non-numeric input.
Names
You gave meaningful names to some of the variables and functions. However, some others could be improved. Consider:
def check_length(array):
The name array
is too generic. check
is good, but length
is again too generic.
calculate_g
: replace g
with something more meaningful. The same for r
in calculate_r
.
Lint check
I used pylint on the code, and I got this result (among others):
W0611: Unused import statistics (unused-import)
It is a good idea to remove unused code.
Parsing command line
Consider using argparse instead of sys.argv
for parsing the command line.
Here is the code with some of the above suggestions:
'''
Calculate statistics.
Input numbers from the command line.
'''
import sys
if len(sys.argv) != 2:
print("""SYNOPSIS
./%s period
DESCRIPTION
period the number of days defining a period""" % (sys.argv[0]))
exit(1)
increases = []
period = int(sys.argv[1])
values = []
def check_length(array):
if len(array) > period:
array.pop(0)
def mean(data):
"""Return the arithmetic mean of data."""
return round(sum(data) / len(data), 2)
def calculate_g():
increase = 0
try:
increase = (values[-1] * 10000 - values[-2] * 10000) / 10000
except IndexError:
pass
if increase > 0:
increases.append(increase)
else:
increases.append(0)
if len(values) < period + 1:
print("g=nan ", end="")
else:
increases_average = mean(increases[1:])
print("g=%.2f " % (increases_average), end="")
def calculate_r():
if len(values) < period + 1:
print("r=nan% ", end="")
return "nan"
else:
evolution = round((values[period] - values[0]) * 100 / values[0])
print("r=%.0f%% " % (evolution), end="")
return (1, -1)[evolution < 0]
def _ss(data):
"""Return sum of square deviations of sequence data."""
average = mean(data)
ss = sum((x-average)**2 for x in data)
return ss
def stddev():
"""Calculates the population standard deviation by default."""
n = len(values)
if n < period:
print("s=nan", end="")
else:
ss = _ss(values[-(period):])
print("s=%.2f" % (round((ss / period) ** 0.5, 2)), end="")
def main():
line = input('Enter a number or STOP to finish: ')
prev_sign = 0
say = ""
switch = 0
while line != "STOP":
values.append(float(line))
calculate_g()
sign = calculate_r()
stddev()
if sign != "nan":
if sign == prev_sign * -1:
say = " a switch occurs"
switch += 1
else:
say = ""
prev_sign = sign
print("%s" % (say))
check_length(values)
check_length(increases)
line = input('Enter a number or STOP to finish: ')
print("STOP\nGlobal tendency switched %i times" % (switch))
if __name__ == "__main__":
main()
Explore related questions
See similar questions with these tags.