In my project I have to make curve-fitting with a lots of parameters, so scipy curve_fit struggles to find the answer.
For example: \$\ c_0 + c_1 \cdot cos (b_0 + b_1\cdot x + b_2\cdot x^2+ b_3\cdot x^3)\$ ,where \$ c_i, b_i \$ are the params to determine.
That's why I made a method which first tries to fit the desired function to only a little part of the data, then extends the area of fitting and uses the last optimal parameters as initial values for the next cycle. I wrote this object oriented, however I'm not sure it's nicely written or I don't know it's necessary to make it OO. I've never been taught OOP, this is what I've understood so far. The class has an attribute called obj. That's because it's embedded in a PyQt5 project, so I need the plot there. I modified the code to work on it's own. Here is an example dataset you can try.
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
########## these functions are actually imported from another file ###########
def find_nearest(array, value):
array = np.asarray(array)
idx = (np.abs(value - array)).argmin()
return array[idx], idx
def cos_fit1(x,c0, c1, b0, b1):
return c0 + c1*np.cos(b0 + b1*x)
def cos_fit2(x,c0, c1, b0, b1, b2):
return c0 + c1*np.cos(b0 + b1*x + b2*x**2)
def cos_fit3(x,c0, c1, b0, b1, b2, b3):
return c0 + c1*np.cos(b0 + b1*x + b2*x**2 + b3*x**3)
def cos_fit4(x,c0, c1, b0, b1, b2, b3, b4):
return c0 + c1*np.cos(b0 + b1*x + b2*x**2 + b3*x**3 + b4*x**4)
def cos_fit5(x,c0, c1, b0, b1, b2, b3, b4, b5):
return c0 + c1*np.cos(b0 + b1*x + b2*x**2 + b3*x**3 + b4*x**4 + b5*x**5)
##############################################################################
class FitOptimizer(object):
"""Class to help achieve better fitting results."""
def __init__(self, x, y, ref, sam, func=None, p0=None):
self.x = x
self.y = y
self.ref = ref
self.sam = sam
if not isinstance(self.x, np.ndarray):
try:
self.x = np.asarray(self.x)
except:
raise
if not isinstance(self.y, np.ndarray):
try:
self.y = np.asarray(self.y)
except:
raise
if not isinstance(self.ref, np.ndarray):
try:
self.ref = np.asarray(self.ref)
except:
pass
if not isinstance(self.sam, np.ndarray):
try:
self.sam = np.asarray(self.sam)
except:
pass
if len(self.ref) == 0 or len(self.sam) == 0:
self._y_norm = self.y
else:
self._y_norm = (self.y - self.ref - self.sam) / (2 *
np.sqrt(self.sam * self.ref))
self.func = func
if p0 is None:
self.p0 = []
else:
self.p0 = p0
self.popt = self.p0
self._init_set = False
self.obj = None
self.counter = 0
def set_initial_region(self, percent, center):
""" Determines the initial region to fit"""
self._init_set = True
_, idx = find_nearest(self.x, center)
self._upper_bound = np.floor(idx + (percent/2)*(len(self.x) + 1))
self._lower_bound = np.floor(idx - (percent/2)*(len(self.x) + 1))
self._upper_bound = self._upper_bound.astype(int)
self._lower_bound = self._lower_bound.astype(int)
if self._lower_bound < 0:
self._lower_bound = 0
if self._upper_bound > len(self.x):
self._upper_bound = len(self.x)
self._x_curr = self.x[self._lower_bound:self._upper_bound]
self._y_curr = self._y_norm[self._lower_bound:self._upper_bound]
def _extend_region(self, extend_by=0.2):
""" Extends region of fit"""
self._new_lower = np.floor(self._lower_bound - extend_by*len(self.x))
self._new_upper = np.floor(self._upper_bound + extend_by*len(self.x))
self._new_lower = self._new_lower.astype(int)
self._new_upper = self._new_upper.astype(int)
self._lower_bound = self._new_lower
self._upper_bound = self._new_upper
if self._new_lower < 0:
self._new_lower = 0
if self._new_upper > len(self.x):
self._new_upper = len(self.x)
self._x_curr = self.x[self._new_lower:self._new_upper]
self._y_curr = self._y_norm[self._new_lower:self._new_upper]
def _make_fit(self):
""" Makes fit """
try:
if len(self._x_curr) == len(self.x):
return True
self.popt, self.pcov = curve_fit(self.func, self._x_curr, self._y_curr,
maxfev = 200000, p0 = self.p0)
self.p0 = self.popt
except RuntimeError:
if len(self.popt) > 4:
self.p0[:3] = self.popt[:3] + np.random.normal(0, 100, len(self.popt)-3)
else:
self.p0 = self.popt + np.random.normal(0,100, len(self.popt))
self.popt, self.pcov = curve_fit(self.func, self._x_curr, self._y_curr,
maxfev = 200000, p0 = self.p0)
def _fit_goodness(self):
""" r^2 value"""
residuals = self._y_curr - self.func(self._x_curr, *self.popt)
ss_res = np.sum(residuals**2)
ss_tot = np.sum((self._y_curr - np.mean(self._y_curr))**2)
return 1 - (ss_res / ss_tot)
def _show_fit(self):
""" Shows fit on self.obj """
try:
self.obj.plot(self._x_curr, self._y_curr, 'k-', label = 'Affected data')
self.obj.plot(self._x_curr, self.func(self._x_curr, *self.popt),
'r--', label = 'Fit')
self.obj.show()
except Exception as e:
print(e)
def run(self, r_extend_by, r_threshold=0.9, max_tries=10000):
if self._init_set == False:
raise ValueError('Set the initial conditions.')
self._make_fit()
while self._fit_goodness() > r_threshold:
self._extend_region(r_extend_by)
self._make_fit()
self.counter +=1
if self._make_fit() == True:
self._show_fit()
return self.popt
break
if self.counter == max_tries:
self._show_fit()
return np.zeros_like(self.popt)
break
while self._fit_goodness() < r_threshold:
self._make_fit()
self.counter +=1
if self.counter == max_tries:
self._show_fit()
return np.zeros_like(self.popt)
break
""" EXAMPLE """
a, b, c, d = np.loadtxt('test.txt', delimiter = ',', unpack = True )
f = FitOptimizer(a, b, c, d, func = cos_fit3)
f.obj = plt
f.p0 = [1,1,1,1,1,1]
f.set_initial_region(0.2, 2.4)
f.run(r_extend_by = 0.1, r_threshold = 0.85)
I appreciate any improvements in the code.
1 Answer 1
Don't repeat yourself
Your family of cos_fit
functions can call the full-form function, e.g.
def cos_fit1(x,c0, c1, b0, b1):
return cos_fit5(x, c0, c1, b0, b1, 0, 0, 0, 0)
You may also want to consider using built-in polynomial support, e.g. https://docs.scipy.org/doc/numpy/reference/generated/numpy.polynomial.polynomial.polyval.html#numpy.polynomial.polynomial.polyval
As for these expressions:
self._upper_bound = np.floor(idx + (percent/2)*(len(self.x) + 1))
self._lower_bound = np.floor(idx - (percent/2)*(len(self.x) + 1))
reuse the inner term:
delta = percent * (1 + len(self.x)) / 2
self._upper_bound = np.floor(idx + delta)
self._lower_bound = np.floor(idx - delta)
No-op except
Delete this:
except:
raise
It doesn't do anything helpful.
As for your exception-swallowing
except:
pass
this is a deeply bad idea. You need to narrow the exception type caught to the one you're actually expecting. If the program spends any length of time in the try
block, this form of except
will prevent Ctrl+C break from working.
Order of operations
(self.y - self.ref - self.sam) / (2 *
np.sqrt(self.sam * self.ref))
can be
(self.y - self.ref - self.sam) / 2 / np.sqrt(self.sam * self.ref)
Similarly, 1 - (ss_res / ss_tot)
doesn't need parens.
This is a comment
def _make_fit(self):
""" Makes fit """
No kidding! Either your comments should add more than the method name, or you should just delete them.
No-op break
return np.zeros_like(self.popt)
break
The break
is never executed due to the return
, so you can delete it.