Calculate the statistical mean of different files and save it into a CSV file

Question 1

In this code I calculate the statistical functions of values from different files and save them into CSV files (one filer for each statistical function).

def mean(data):
 pass
def standard_deviation(data):
 pass
def afd(data):
 pass
def norma_afd(data):
 pass
def asd(data):
 pass
def norma_asd(data):
 pass
def calculation(file):
 """
 Get the DataFrame and calculate for ever column the different statistical mean
 :param file: DataFrame 56x7681
 :return: 6 different 56x1 lists for (mean, std, afd, norm_afd, asd, norm_asd)
 """
 m, sd, afd_l, nafd, asd_l, nasd = ([] for _ in range(6))
 for column in file:
 data = file[column].to_numpy()
 m.append(mean(data))
 sd.append(standard_deviation(data))
 afd_l.append(afd(data))
 nafd.append(norma_afd(data))
 asd_l.append(asd(data))
 nasd.append(norma_asd(data))
 return m, sd, afd_l, nafd, asd_l, nasd
def run(load_path, save_path):
 """
 Get (yield) all the different DataFrame from a folder
 and calculate for each file the statistical mean and save it in a csv file
 :param load_path: the folder path to load all the different files
 :param save_path: the folder save path
 :return: none
 """
 m, sd, afd_l, nafd, asd_l, nasd = ([] for _ in range(6))
 for current_path, file in yield_data(load_path, data_type="data"):
 a, b, c, d, e, f = calculation(file)
 m.append(a)
 sd.append(b)
 afd_l.append(c)
 nafd.append(d)
 asd_l.append(e)
 nasd.append(f)
 if not os.path.exists(save_path):
 os.makedirs(save_path)
 pd.DataFrame(m).to_csv(save_path + os.path.sep + "mean.csv", index=False, header=False)
 pd.DataFrame(sd).to_csv(save_path + os.path.sep + "std.csv", index=False, header=False)
 pd.DataFrame(afd_l).to_csv(save_path + os.path.sep + "afd.csv", index=False, header=False)
 pd.DataFrame(nafd).to_csv(save_path + os.path.sep + "norm_afd.csv", index=False, header=False)
 pd.DataFrame(asd_l).to_csv(save_path + os.path.sep + "asd.csv", index=False, header=False)
 pd.DataFrame(nasd).to_csv(save_path + os.path.sep + "norm_asd.csv", index=False, header=False)

Is there a better and more efficient way to write this code? This may be a stupid question, but I would be really interested to know if there is a better way.

Question 2

Are you missing some import statements? Please edit to complete the program.

Question 3

As the code cannot be run, here is a wild guess on a way to restructure it:

def mean(fileResults, data):
 result = 0
 # do computation
 fileResults["mean"].append(result)
def standard_deviation(fileResults, data):
 pass
def afd(fileResults, data):
 pass
def norma_afd(fileResults, data):
 pass
def asd(fileResults, data):
 pass
def norma_asd(fileResults, data):
 pass
def calculation(file):
 """
 Get the DataFrame and calculate for ever column the different statistical mean
 :param file: DataFrame 56x7681
 :return: 6 different 56x1 lists for (mean, std, afd, norm_afd, asd, norm_asd)
 """
 fileResults = {
 "mean": [],
 "std": [],
 "afd": [],
 "norm_afd": [],
 "asd": [],
 "norm_asd": [],
 }
 functionCallList = [
 mean,
 standard_deviation,
 afd,
 norma_afd,
 asd,
 norma_asd,
 ]
 for column in file:
 data = file[column].to_numpy()
 for functionCall in functionCallList:
 functionCall(fileResults, data)
 return fileResults
def run(load_path, save_path):
 """
 Get (yield) all the different DataFrame from a folder
 and calculate for each file the statistical mean and save it in a csv file
 :param load_path: the folder path to load all the different files
 :param save_path: the folder save path
 :return: none
 """
 results = {}
 for current_path, file in yield_data(load_path, data_type="data"):
 fileResults = calculation(file)
 for key, value in fileResults.items():
 if key not in results:
 results[key] = []
 results[key].append(value)
 if not os.path.exists(save_path):
 os.makedirs(save_path)
 for key, value in results.items():
 pd.DataFrame(value).to_csv(save_path + os.path.sep + key + ".csv", index=False, header=False)

So, instead of repeating function call, mostly when function prototypes are the same, you can use a list of function callbacks. Then just iterate it.

You can also use a dictionnary to store your data, instead of n lists. It's a little bit more scalable, and clearer than returning a 6-tuple. It also avoids a lot of copy-paste when you save csv files.

Question 4

Agree, list of function is way to go.

Question 5

@VincentRG To work correct for my purpose I initial the dictionary fileResults in calculation with {"mean": [], "std": [], "afd": [], "norm_afd": [], "asd": [], "norm_asd": []} and the calculation methods append to the lists. But many thanks for your solution.

Question 6

Yes you're right, several columns per file are parsed, and for each column you do all computations. Hence the need for appending to each list for each column. I haven't seen that.

VincentRG VincentRG 1814 bronze badges · Accepted Answer · 2020-02-11 17:16:35Z

As the code cannot be run, here is a wild guess on a way to restructure it:

def mean(fileResults, data):
 result = 0
 # do computation
 fileResults["mean"].append(result)
def standard_deviation(fileResults, data):
 pass
def afd(fileResults, data):
 pass
def norma_afd(fileResults, data):
 pass
def asd(fileResults, data):
 pass
def norma_asd(fileResults, data):
 pass
def calculation(file):
 """
 Get the DataFrame and calculate for ever column the different statistical mean
 :param file: DataFrame 56x7681
 :return: 6 different 56x1 lists for (mean, std, afd, norm_afd, asd, norm_asd)
 """
 fileResults = {
 "mean": [],
 "std": [],
 "afd": [],
 "norm_afd": [],
 "asd": [],
 "norm_asd": [],
 }
 functionCallList = [
 mean,
 standard_deviation,
 afd,
 norma_afd,
 asd,
 norma_asd,
 ]
 for column in file:
 data = file[column].to_numpy()
 for functionCall in functionCallList:
 functionCall(fileResults, data)
 return fileResults
def run(load_path, save_path):
 """
 Get (yield) all the different DataFrame from a folder
 and calculate for each file the statistical mean and save it in a csv file
 :param load_path: the folder path to load all the different files
 :param save_path: the folder save path
 :return: none
 """
 results = {}
 for current_path, file in yield_data(load_path, data_type="data"):
 fileResults = calculation(file)
 for key, value in fileResults.items():
 if key not in results:
 results[key] = []
 results[key].append(value)
 if not os.path.exists(save_path):
 os.makedirs(save_path)
 for key, value in results.items():
 pd.DataFrame(value).to_csv(save_path + os.path.sep + key + ".csv", index=False, header=False)

So, instead of repeating function call, mostly when function prototypes are the same, you can use a list of function callbacks. Then just iterate it.

You can also use a dictionnary to store your data, instead of n lists. It's a little bit more scalable, and clearer than returning a 6-tuple. It also avoids a lot of copy-paste when you save csv files.

@VincentRG To work correct for my purpose I initial the dictionary fileResults in calculation with {"mean": [], "std": [], "afd": [], "norm_afd": [], "asd": [], "norm_asd": []} and the calculation methods append to the lists. But many thanks for your solution.
Yes you're right, several columns per file are parsed, and for each column you do all computations. Hence the need for appending to each list for each column. I haven't seen that.

Stack Exchange Network

Calculate the statistical mean of different files and save it into a CSV file

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Calculate the statistical mean of different files and save it into a CSV file

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions