I've written a script to automate the entry of laboratory instrument data into an Excel spreadsheet using pandas and win32com.
I've got the script functioning correctly, but it is painfully slow. In an attempt to profile the code, my acfmp_ToExcel
function seems to be the culprit. I've pasted the profiling data for this function at the bottom. Is there any way to get this code running faster? It takes anywhere from 20-30 seconds each time I run it.
What the function does is take a list of queries (strings within a column of a dataframe, df_acfmp
), then using those queries pull data from the other dataframe columns and put those values into an Excel spreadsheet at specific locations.
The function is essentially one bundle of code (if any():
) repeated 3 times within the main for
loop.
def acfmp_ToExcel(queries):
order_list = {'one':['_410-', '_510-'], 'two':['_420-', '_530-'], 'three': ['_430-', '_590-']}
queerz = Series(queries)
fronts = queerz[queerz.str.endswith("_F")]
fronts_plus = queerz[queerz.str.endswith("F+7")]
backs = queerz[queerz.str.endswith("_B")]
for each_queer in queerz:
if any(q in each_queer for q in order_list['one']):
locale_front = np.where(df_acfmp['Name'].str.contains(fronts.iloc[0]+'$'))
positions_front = locale_front[0]
fnd_f = 'F + 0 mm'
x = xsheet1.Range('b1:b1000').Find(fnd_f)
x_two = xsheet1.Range('b1:b1000').FindNext(x)
x_three = xsheet1.Range('b1:b1000').FindNext(x_two)
x_four = xsheet1.Range('b1:b1000').FindNext(x_three)
x_five = xsheet1.Range('b1:b1000').FindNext(x_four)
x_six = xsheet1.Range('b1:b1000').FindNext(x_five)
x_seven = xsheet1.Range('b1:b1000').FindNext(x_six)
front_queer = fronts_plus.iloc(0)
locale_fronts_plus = np.where(df_acfmp['Name'].str.contains(front_queer, regex = False))
positions_fronts_plus = locale_fronts_plus[0]
fnd_p = 'F + 7 mm'
y_ = xsheet1.Range('b1:b1000').Find(fnd_p)
y_two = xsheet1.Range('b1:b1000').FindNext(y_)
y_three = xsheet1.Range('b1:b1000').FindNext(y_two)
y_four = xsheet1.Range('b1:b1000').FindNext(y_three)
y_five = xsheet1.Range('b1:b1000').FindNext(y_four)
y_six = xsheet1.Range('b1:b1000').FindNext(y_five)
try:
y_seven = xsheet1.Range('b1:b1000').FindNext(y_six)
except: pass
locale_backs = np.where(df_acfmp['Name'].str.contains(backs.iloc[0]))
positions_backs = locale_backs[0]
fnd_b = 'Back'
z_ = xsheet1.Range('b1:b1000').find(fnd_b)
z_two = xsheet1.Range('b1:b1000').FindNext(z_)
z_three = xsheet1.Range('b1:b1000').FindNext(z_two)
z_four = xsheet1.Range('b1:b1000').FindNext(z_three)
z_five = xsheet1.Range('b1:b1000').FindNext(z_four)
z_six = xsheet1.Range('b1:b1000').FindNext(z_five)
try:
z_seven = xsheet1.Range('b1:b1000').FindNext(z_six)
except: pass
if 1 in df_acfmp['Stage_Number']:
for nums in range(5):
x_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if 2 in df_acfmp['Stage_Number']:
for nums in range(5):
x_five.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_five.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_five.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if 3 in df_acfmp['Stage_Number']:
for nums in range(5):
x_six.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_six.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_six.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if 4 in df_acfmp['Stage_Number']:
for nums in range(5):
x_seven.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_seven.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_seven.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if any(r in each_queer for r in order_list['two']):
locale_front = np.where(df_acfmp['Name'].str.contains(fronts.iloc[1] + '$'))
positions_front = locale_front[0]
fnd_f = 'F + 0 mm'
x = xsheet2.Range('b1:b1000').Find(fnd_f)
x_two = xsheet2.Range('b1:b1000').FindNext(x)
x_three = xsheet2.Range('b1:b1000').FindNext(x_two)
x_four = xsheet2.Range('b1:b1000').FindNext(x_three)
x_five = xsheet2.Range('b1:b1000').FindNext(x_four)
x_six = xsheet2.Range('b1:b1000').FindNext(x_five)
x_seven = xsheet2.Range('b1:b1000').FindNext(x_six)
front_queer = fronts_plus.iloc(1)
locale_fronts_plus = np.where(df_acfmp['Name'].str.contains(front_queer, regex = False))
positions_fronts_plus = locale_fronts_plus[0]
fnd_p = 'F + 7 mm'
y_ = xsheet2.Range('b1:b1000').Find(fnd_p)
y_two = xsheet2.Range('b1:b1000').FindNext(y_)
y_three = xsheet2.Range('b1:b1000').FindNext(y_two)
y_four = xsheet2.Range('b1:b1000').FindNext(y_three)
y_five = xsheet2.Range('b1:b1000').FindNext(y_four)
y_six = xsheet2.Range('b1:b1000').FindNext(y_five)
try:
y_seven = xsheet2.Range('b1:b1000').FindNext(y_six)
except: pass
locale_backs = np.where(df_acfmp['Name'].str.contains(backs.iloc[1]))
positions_backs = locale_backs[0]
fnd_b = 'Back'
z_ = xsheet2.Range('b1:b1000').find(fnd_b)
z_two = xsheet2.Range('b1:b1000').FindNext(z_)
z_three = xsheet2.Range('b1:b1000').FindNext(z_two)
z_four = xsheet2.Range('b1:b1000').FindNext(z_three)
z_five = xsheet2.Range('b1:b1000').FindNext(z_four)
z_six = xsheet2.Range('b1:b1000').FindNext(z_five)
try:
z_seven = xsheet2.Range('b1:b1000').FindNext(z_six)
except: pass
if 1 in df_acfmp['Stage_Number'].values:
for nums in range(5):
x_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if 2 in df_acfmp['Stage_Number'].values:
for nums in range(5):
x_five.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_five.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_five.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if 3 in df_acfmp['Stage_Number'].values:
for nums in range(5):
x_six.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_six.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_six.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if 4 in df_acfmp['Stage_Number'].values:
for nums in range(5):
x_seven.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_seven.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_seven.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if any(s in each_queer for s in order_list['three']):
#query_front = fronts.ix[1, 'filter'] + '$'
locale_front = np.where(df_acfmp['Name'].str.contains(fronts.iloc[2] + '$'))
positions_front = locale_front[0]
fnd_f = 'F + 0 mm'
x = xsheet3.Range('b1:b1000').Find(fnd_f)
x_two = xsheet3.Range('b1:b1000').FindNext(x)
x_three = xsheet3.Range('b1:b1000').FindNext(x_two)
x_four = xsheet3.Range('b1:b1000').FindNext(x_three)
x_five = xsheet3.Range('b1:b1000').FindNext(x_four)
x_six = xsheet3.Range('b1:b1000').FindNext(x_five)
x_seven = xsheet3.Range('b1:b1000').FindNext(x_six)
front_queer = fronts_plus.iloc(2)
locale_fronts_plus = np.where(df_acfmp['Name'].str.contains(front_queer, regex = False))
positions_fronts_plus = locale_fronts_plus[0]
fnd_p = 'F + 7 mm'
y_ = xsheet3.Range('b1:b1000').Find(fnd_p)
y_two = xsheet3.Range('b1:b1000').FindNext(y_)
y_three = xsheet3.Range('b1:b1000').FindNext(y_two)
y_four = xsheet3.Range('b1:b1000').FindNext(y_three)
y_five = xsheet3.Range('b1:b1000').FindNext(y_four)
y_six = xsheet3.Range('b1:b1000').FindNext(y_five)
try:
y_seven = xsheet1.Range('b1:b1000').FindNext(y_six)
except: pass
locale_backs = np.where(df_acfmp['Name'].str.contains(backs.iloc[2]))
positions_backs = locale_backs[0]
fnd_b = 'Back'
z_ = xsheet3.Range('b1:b1000').find(fnd_b)
z_two = xsheet3.Range('b1:b1000').FindNext(z_)
z_three = xsheet3.Range('b1:b1000').FindNext(z_two)
z_four = xsheet3.Range('b1:b1000').FindNext(z_three)
z_five = xsheet3.Range('b1:b1000').FindNext(z_four)
z_six = xsheet3.Range('b1:b1000').FindNext(z_five)
try:
z_seven = xsheet3.Range('b1:b1000').FindNext(z_six)
except: pass
if 1 in df_acfmp['Stage_Number'].values:
for nums in range(5):
x_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if 2 in df_acfmp['Stage_Number'].values:
for nums in range(5):
x_five.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_five.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_five.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if 3 in df_acfmp['Stage_Number'].values:
for nums in range(5):
x_six.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_six.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_six.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if 4 in df_acfmp['Stage_Number'].values:
for nums in range(5):
x_seven.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_seven.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_seven.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
Some profiling data that has led me to believe this is the culprit function. I am very new to profiling code so I'm not entirely sure what this is telling me.
ncalls tottime percall cumtime percall filename:lineno(function) 3 2.138 0.713 24.834 8.278 grab_enter.py:688(acfmp_ToExcel) Function called... ncalls tottime cumtime grab_enter.py:688(acfmp_ToExcel) -> 180 0.002 0.626 <COMObject <unknown>>:1(Range) 1 0.000 0.004 <COMObject Range>:1(FindNext) 189 0.003 0.007 C:\Python27\lib\site-packages\pandas\core\frame.py:1757(__getitem__) 36 0.000 0.001 C:\Python27\lib\site-packages\pandas\core\generic.py:686(__contains__) 891 0.003 0.005 C:\Python27\lib\site-packages\pandas\core\generic.py:1030(_indexer) 6 0.000 0.000 C:\Python27\lib\site-packages\pandas\core\generic.py:1932(__getattr__) 6 0.000 0.000 C:\Python27\lib\site-packages\pandas\core\generic.py:1949(__setattr__) 27 0.000 0.001 C:\Python27\lib\site-packages\pandas\core\indexing.py:49(__call__) 864 0.004 0.528 C:\Python27\lib\site-packages\pandas\core\indexing.py:1198(__getitem__) 3 0.000 0.001 C:\Python27\lib\site-packages\pandas\core\series.py:114(__init__) 72 0.000 0.001 C:\Python27\lib\site-packages\pandas\core\series.py:296(values) 9 0.000 0.003 C:\Python27\lib\site-packages\pandas\core\series.py:507(__getitem__) 3 0.000 0.000 C:\Python27\lib\site-packages\pandas\core\series.py:1011(__iter__) 6 0.000 0.000 C:\Python27\lib\site-packages\pandas\core\series.py:2454(str) 9 0.000 0.002 C:\Python27\lib\site-packages\pandas\core\strings.py:879(wrapper3) 81 0.001 0.025 C:\Python27\lib\site-packages\pandas\core\strings.py:963(contains) 810 0.010 3.217 C:\Python27\lib\site-packages\win32com\client\dynamic.py:184(__call__) 1944 0.053 9.291 C:\Python27\lib\site-packages\win32com\client\dynamic.py:444(__getattr__) 810 0.029 5.190 C:\Python27\lib\site-packages\win32com\client\dynamic.py:524(__setattr__) 9 0.000 0.000 grab_enter.py:697(<genexpr>) 9 0.000 0.000 grab_enter.py:765(<genexpr>) 9 0.000 0.000 grab_enter.py:834(<genexpr>) 81 0.000 0.000 {any} 81 0.001 0.003 {numpy.core.multiarray.where} 54 0.000 0.000 {range}
1 Answer 1
Do not bare except
try:
z_seven = xsheet1.Range('b1:b1000').FindNext(z_six)
except: pass
Should be avoided as any kind of error will be expected, instead use:
try:
z_seven = xsheet1.Range('b1:b1000').FindNext(z_six)
except TheExceptioIExpect:
pass
Remove the massive code duplication
if 1 in df_acfmp['Stage_Number']:
for nums in range(5):
x_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if 2 in df_acfmp['Stage_Number']:
for nums in range(5):
x_five.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_five.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_five.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if 3 in df_acfmp['Stage_Number']:
for nums in range(5):
x_six.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_six.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_six.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
if 4 in df_acfmp['Stage_Number']:
for nums in range(5):
x_seven.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_seven.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_seven.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
becomes:
def contains_any(items, lst):
return any(i in lst for i in items)
if contains_any([1,2,3,4], df_acfmp['Stage_Number']):
for nums in range(5):
x_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_front[0], nums]
y_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_fronts_plus[0], nums]
z_four.Offset(1, nums+2).Value = df_acfmp.iloc[positions_backs[0], nums]
-
1\$\begingroup\$ Thank you very much for the response. Reducing the large code duplication in the manner you mentioned helped reduce the run time significantly. I will look out for more repetitive portions of code as red flags for optimization. \$\endgroup\$big_ligands– big_ligands2015年04月18日 22:26:41 +00:00Commented Apr 18, 2015 at 22:26