DMelt:DataAnalysis/6 Advanced Data Fitting
Advanced data fitting
This section deals with advanced techniques of fitting data using data in arbitrary dimension, arbitrary function. In this section we will discuss how to perform more interactive fit using scripting.
Chi2 fit in 1D
Let us perform a fit of data points using a function defined by a Python script:
from java.lang.Math import * from jhplot import * class PowerLawFunc(): # function describing a power law def __init__(self, p0,p1): self.p0=p0; self.p1=p1; def valueOf(self, x): return self.p0*pow(x,self.p1); # power law def getPlot(self,min,max): # plotting part f=P1D("p0*x^p1") step=(max-min)/100.0 for i in range(100): x=min+step*i y=self.valueOf(x) f.add(x,y) return f
As you can see, this is a simple power-law function of one variable "x" defined in the method "valueOf". One interesting method of this function is getPlot(min,max) that allows to plot this function between min and max. This method returns jhplot.P1D plottable on jhplot.HPlot
Now let us perform a fit with this function using Minuit minimisation program and the Chi2 method.
[math]\displaystyle{ \chi^2 = \sum_{i} \frac{(y_i - e_i)^2}{ (\sigma_i)^2} }[/math]
where "y" is the data point, "e" is the expectation, $\sigma$ is “variance” related to the measurement error for "y"
Let us make a Jython script which:
- Performs Chi2 fit with the above function
- Prints evolution of chi2 for each iteration and print final chi2
- Calculates Minuit convergence steps and change Migrad/Minuit strategy in case of a failed fit
- Calculates calculates Migrad parameter correlations
- Calculate uncertainty on the fit parameters
- Plot data and the minimized function
For this task, we will use org.freehep.math.minuit.MnMigrad class.
The script that does all the above using this class is shown bellow:
from org.freehep.math.minuit import FCNBase,MnMigrad,MnUserParameters from java.lang.Math import * from jhplot import * from java.awt import Color data=P1D("data") # input data data.add(5,4,2); data.add(10,8,3) # each point has X, Y, standard error on Y data.add(20,10,3); data.add(30,12,3) data.add(40,18,4); data.add(70,20,4) data.add(90,26,5); data.add(150,37,6) data.add(200,50,6); data.add(300,60,7) data.add(500,90,9); data.add(700,120,10) class PowerLawFunc(FNon): # function describing a power law def value(self, x): return self.p[0]*pow(x[0],self.p[1]) class PowerLawChi2FCN(FCNBase): # define chi2 function def __init__(self, meas, pos, mvar): self.meas=meas; self.pos=pos; self.mvar=mvar; def valueOf(self, par): pl = PowerLawFunc("PowerLaw",1,2) pl.setParameters(par) chi2 = 0 for n in range(len(self.meas)): delta = pl.value([self.pos[n]]) - self.meas[n] sigma2=self.mvar[n]*self.mvar[n] if (sigma2>0): chi2=chi2+(delta*delta)/sigma2 print "Chi2=",chi2 return chi2; theFCN = PowerLawChi2FCN(data.getArrayY(), data.getArrayX(), data.getArrayErr()) upar = MnUserParameters() upar.add("p0", 1.0, 0.1) # initial value and expected error upar.add("p1", 0.2, 0.1); migrad =MnMigrad(theFCN, upar) print "start Migrad " vmin = migrad.minimize() if vmin.isValid()==False: print "Alternative strategy" migrad = MnMigrad(theFCN, upar, 2) vmin = migrad.minimize() state=vmin.userState() output=state.params() # output parameters print vmin # print details of the fit c1 = HPlot() # plot data and the fit function c1.visible() min=0; max=800 # min and max values for plotting c1.setRangeX(min,max) c1.setRangeY(0,150) c1.draw(data) # plot data pl = PowerLawFunc("PowerLaw",1,2) # plot fit result pl.setParameters(output) ff=F1D(pl,min,max) ff.setPenWidth(2); ff.setColor(Color.blue) c1.draw(ff)
The fit result of the data is shown below, where the blue result is the fit:
The fit parameters are:
p0=1.14152 +/- 0.276695 p1=0.704415 +/- 0.0431544
Here is the complete output of the script:
start Migrad Chi2= 470.358172895 Chi2= 151.491535555 Chi2= 152.13739461 Chi2= 151.491535555 Chi2= 150.199077966 Chi2= 146.963922788 Chi2= 137.231555692 Chi2= 108.020960351 Chi2= 29.5665676492 Chi2= 224.944087841 Chi2= 11.1675109612 Chi2= 3.09063261351 Chi2= 2.83286407423 Chi2= 2.83089798953 Chi2= 2.60969185498 Chi2= 2.58038208008 Chi2= 2.37208952261 Chi2= 2.37208949274 Chi2= 2.37209280753 Minuit did successfully converge. # of function calls: 77 minimum function value: 2.37209 minimum edm: 2.72672e-09 minimum internal state vector: LAVector parameters: 1.14152 0.704415 minimum internal covariance matrix: LASymMatrix parameters: 0.0765603 -0.0117485 -0.0117485 0.00186230 # ext. || name || type || value || error +/- 0 || p0 || free || 1.14152 || 0.276695 1 || p1 || free || 0.704415 || 0.0431544 MnUserCovariance: 0.0765603 -0.0117485 -0.0117485 0.00186230 MnUserCovariance parameter correlations: 1.00000 -0.983907 -0.983907 1.00000 MnGlobalCorrelationCoeff: 0.983907 0.983907
Maximum Likelihood fit in 1D
Alternatively, one can fit data using the Maximum Likelihood method. Again, we will use MnMigrad class. This approach is shown in this script:
from org.freehep.math.minuit import FCNBase,MnMigrad,MnUserParameters from java.lang.Math import * from jhplot import * from java.awt import Color data=P1D("data") data.add(5,4,2); data.add(10,8,3) # each point has X, Y, standard error on Y data.add(20,10,3); data.add(30,12,3) data.add(40,18,4); data.add(70,20,4) data.add(90,26,5); data.add(150,37,6) data.add(200,50,6); data.add(300,60,7) data.add(500,90,9); data.add(700,120,10) class PowerLawFunc(FNon): # function describing a power law def value(self, x): return self.p[0]*pow(x[0],self.p[1]) class PowerLawLogLikeFCN(FCNBase): # define log-likehood. def __init__(self, meas, pos, mvar): self.meas=meas; self.pos=pos; self.mvar=mvar; def valueOf(self, par): pl = PowerLawFunc("PowerLaw",1,2) pl.setParameters(par) logsum = 0 for n in range(len(self.meas)): mu = pl.value([self.pos[n]]) k=self.meas[n] logsum = logsum+(k*log(mu) - mu); print "Log likehood=",-1*logsum return -1*logsum; theFCN = PowerLawLogLikeFCN(data.getArrayY(), data.getArrayX(), data.getArrayErr()) upar = MnUserParameters() upar.add("p0", 1.0, 0.1) # initial value and expected error upar.add("p1", 0.2, 0.1); migrad =MnMigrad(theFCN, upar) print "start Migrad " vmin = migrad.minimize() if vmin.isValid()==False: print "Alternative strategy" migrad = MnMigrad(theFCN, upar, 2) vmin = migrad.minimize() state=vmin.userState() output=state.params() # output parameters print vmin # print details of the fit c1 = HPlot() # plot data and the fit function c1.visible() min=0; max=800 # min and max values for plotting c1.setRangeX(min,max) c1.setRangeY(0,150) c1.draw(data) pl = PowerLawFunc("PowerLaw",1,2) # plot fit result pl.setParameters(output) ff=F1D(pl,min,max) ff.setPenWidth(2); ff.setColor(Color.blue) c1.draw(ff)
Note the difference: we replaced "PowerLawChi2FCN" that implements a Chi2 minimization function with "PowerLawLogLikeFCN" which calculates the sum of $y_i*\log(e_i)-e_i$. The Minuit minimizes this function (note the minus in the front, which means that the likelihood sum should be maximal). As before, the script prints the value of the maximum likelihood. The above examples can be written in Java.
Fitting data using GROOT
GROOT provides another Java API to fit data. Here is a simple example that shows how to fit data using a custom function:
from javax.swing import JFrame from org.jlab.groot.data import H1F from org.jlab.groot.fitter import DataFitter from org.jlab.groot.graphics import * from org.jlab.groot.math import * from java.lang import Math # define a function class CustomFunction(Func1D): # function describing a power law def evaluate(self, x): xsum = 0.0 for i in range(self.getNPars()): xsum = xsum + self.getParameter(i)*Math.pow(x,i) return xsum frame = JFrame("Basic GROOT Demo") canvas =EmbeddedCanvas() frame.setSize(800,500) histogram = H1F("histogram",100,-5.0,5.0) f1 = CustomFunction("CustomFunction", -5.1, 5.1) f1.addParameter("p0") f1.setParameter(0, 30.0) f1.addParameter("p1") f1.setParameter(1, -6.0) f1.addParameter("p2") f1.setParameter(2, 1.8) f1.addParameter("p3") f1.setParameter(3, .0001) rndm = RandomFunc(f1) for j in range(64000): histogram.fill(rndm.random()) histogram.setTitleX("Randomly Generated Function") histogram.setTitleY("Counts") canvas.getPad(0).setTitle("CustomFunction Example") histogram.setLineWidth(2) histogram.setLineColor(21) histogram.setFillColor(34) histogram.setOptStat(10) f1.setLineColor(5) f1.setLineWidth(7) f1.setOptStat(111110) DataFitter.fit(f1, histogram, "Q") canvas.draw(histogram) canvas.draw(f1,"same") canvas.setFont("Avenir") canvas.setTitleSize(32) canvas.setAxisTitleSize(24) canvas.setAxisLabelSize(18) canvas.setStatBoxFontSize(18) frame.add(canvas) frame.setLocationRelativeTo(None) frame.setVisible(True)
which leads to this image:
Fitting data in multiple dimensions
The org.freehep.math.minuit.MnMigrad class can also perform fit in multiple dimensions. Now we will show how to fit data in a fully controlled way in several dimensions. As example, we will consider 2D data fitted with 2D function.
from org.freehep.math.minuit import FCNBase,MnMigrad,MnUserParameters from java.lang.Math import * from jhplot import * from java.awt import Color from java.util import Random h1 = H2D("Test",16,-5.0, 5.0, 16, -5.0, 5.0) rand = Random(); for i in range(10000): h1.fill(2*rand.nextGaussian(),rand.nextGaussian()) class Gauss2D(FNon): # 2D Gaussian def value(self, x): if self.p[2]>0 and self.p[4]>0: d1=(x[0]-self.p[1])/self.p[2] d2=(x[1]-self.p[3])/self.p[4] return self.p[0]*exp( -d1*d1-d2*d2 ) else: return 1.0e10 class GaussChi2(FCNBase): # define chi2 function def __init__(self, meas, posX, posY, errors): self.meas=meas; self.posX=posX; self.posY=posY; self.errX=0.5*(self.posX[1]-self.posX[0]) self.errY=0.5*(self.posY[1]-self.posY[0]) self.errors=errors; def valueOf(self, par): pl = Gauss2D("2D",2,5) # 2 varibles, 5 parameters pl.setParameters(par) chi2 = 0 for n1 in range(len(self.posX)): for n2 in range(len(self.posY)): delta = pl.value([self.posX[n1],self.posY[n2]])-self.meas[n1][n2] sigma2=self.errors[n1][n2]*self.errors[n1][n2] if (sigma2>0): chi2=chi2+(delta*delta)/sigma2; print "Chi2=",chi2 return chi2; theFCN = GaussChi2(h1.binHeights(), h1.getLowerEdgesX(), h1.getLowerEdgesY(), h1.binErrors()) upar = MnUserParameters() upar.add("p0", 1000.0, 1) # initial value and expected error upar.add("p1", 0.0, 0.1); upar.add("p2", 1.0, 0.1) # initial value and expected error upar.add("p3", 0.0, 0.1); upar.add("p4", 1.0, 0.1); migrad =MnMigrad(theFCN, upar) vmin = migrad.minimize() if vmin.isValid()==False: print "Alternative strategy" migrad = MnMigrad(theFCN, upar, 2) vmin = migrad.minimize() state=vmin.userState() output=state.params() # output parameters print vmin # print details of the minimization c1 = HPlot3D("Plot",600,400,2,1) c1.visible(1) c1.setRange(-5,5,-5,5,0,500) c1.draw(h1) c1.cd(2,1) # plot fit function c1.setRange(-5,5,-5,5,0,500) pl =Gauss2D("Gauss2D",2,5) # 2 varibles, 5 parameters pl.setParameters(output) # set fitted parameters ff=F2D(pl,-5,5,-5,5) c1.draw(ff)
The result of this script is shown below. The data are plotted on the left canvas, while the function with fitted parameters after the chi2 minimisation is on the right figure.
Similarly, one can implement the likelihood fit as it was discussed inn 1D case.