Source code for pkpd.protocols.protocol_pkpd_regression_labels

# **************************************************************************
# *
# * Authors:     Carlos Oscar Sorzano (info@kinestat.com)
# *
# * Kinestat Pharma
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'info@kinestat.com'
# *
# **************************************************************************

import numpy as np
import math

import pyworkflow.protocol.params as params
from .protocol_pkpd import ProtPKPD
from pkpd.objects import PKPDVariable


[docs]class ProtPKPDRegressionLabel(ProtPKPD): """ Perform a regression between two labels\n Protocol created by http://www.kinestatpharma.com\n """ _label = 'regression labels' #--------------------------- DEFINE param functions -------------------------------------------- def _defineParams(self, form): form.addSection('Input') form.addParam('inputExperiment', params.PointerParam, label="Input experiment", important=True, pointerClass='PKPDExperiment', help='Select an experiment with samples') form.addParam('labelY', params.StringParam, label="Y Label", default="", help='Name of a numerical label') form.addParam('labelX', params.StringParam, label="X Label", default="", help='Name of a numerical label') form.addParam('regressionType', params.EnumParam, choices=["Y=f(X)","Y=f(log(X))","log(Y)=f(X)","log(Y)=f(log(X))"], label="Regression type", default=0, help='f is always a polynomial of a given degree. log is the natural logarithm in base e\n') form.addParam('degree', params.IntParam, label="Degree", default=1, help='Degree of the polynomial f') #--------------------------- INSERT steps functions -------------------------------------------- def _insertAllSteps(self): self._insertFunctionStep('runRegression',self.inputExperiment.get().getObjId(), self.labelY.get(), self.labelX.get(), self.regressionType.get(), self.degree.get()) #--------------------------- STEPS functions --------------------------------------------
[docs] def runRegression(self, objId, labelY, labelX, regressionType, degree): X, Y, XtoUse, YtoUse = self.getXYValues() self.printSection("Performing regressions") p,V = np.polyfit(XtoUse,YtoUse,self.degree.get(),cov=True) mu = p Ypredicted = np.polyval(p, XtoUse) fhOut = open(self._getPath("results.txt"),'w') degree = self.degree.get() if self.regressionType.get()==0: modelToPrint = "%s="%labelY for i in range(0,degree+1): modelToPrint+="+(%f)*%s^(%d)"%(p[i],labelX,degree-i) self.doublePrint(fhOut,"Model: %s"%modelToPrint) self.doublePrint(fhOut," ") self.doublePrint(fhOut,"==========================================") self.doublePrint(fhOut,"%s %s %spredicted Error=%s-%spredicted "%(labelX,labelY,labelY,labelY,labelY)) self.doublePrint(fhOut,"==========================================") for i in range(0,len(X)): self.doublePrint(fhOut,"%f %f %f"%(X[i],Y[i],Ypredicted[i])) elif self.regressionType.get()==1: modelToPrint = "%s="%labelY for i in range(0,degree+1): modelToPrint+="+(%f)*(log(%s))^(%d)"%(p[i],labelX,degree-i) self.doublePrint(fhOut,"Model: %s"%modelToPrint) self.doublePrint(fhOut," ") self.doublePrint(fhOut,"=============================================") self.doublePrint(fhOut,"%s log(%s) %s %spredicted Error=%s-%spredicted"%(labelX,labelX,labelY,labelY,labelY,labelY)) self.doublePrint(fhOut,"=============================================") for i in range(0,len(X)): self.doublePrint(fhOut,"%f %f %f %f"%(X[i],math.log(X[i]),Y[i],Ypredicted[i])) elif self.regressionType.get()==2: modelToPrint = "log(%s)="%labelY for i in range(0,degree+1): modelToPrint+="+(%f)*%s^(%d)"%(p[i],labelX,degree-i) self.doublePrint(fhOut,"Model: %s"%modelToPrint) self.doublePrint(fhOut," ") self.doublePrint(fhOut,"============================================================================") self.doublePrint(fhOut,"%s %s log(%s) log(%spredicted) %spredicted Error=log(%s)-log(%spredicted)"%\ (labelX,labelY,labelY,labelY,labelY,labelY,labelY,labelY)) self.doublePrint(fhOut,"============================================================================") for i in range(0,len(X)): self.doublePrint(fhOut,"%f %f %f %f %f"%(X[i],Y[i],math.log(Y[i]), Ypredicted[i], math.exp(Ypredicted[i]))) elif self.regressionType.get()==3: modelToPrint = "log(%s)="%labelY for i in range(0,degree+1): modelToPrint+="+(%f)*(log(%s))^(%d)"%(p[i],labelX,degree-i) self.doublePrint(fhOut,"Model: %s"%modelToPrint) self.doublePrint(fhOut," ") self.doublePrint(fhOut,"======================================================================================") self.doublePrint(fhOut,"%s log(%s) %s log(%s) log(%spredicted) %spredicted Error=log(%s)-log(%spredicted)"%\ (labelX,labelX,labelY,labelY,labelY,labelY,labelY,labelY)) self.doublePrint(fhOut,"======================================================================================") for i in range(0,len(X)): self.doublePrint(fhOut,"%f %f %f %f %f %f"%(X[i],math.log(X[i]),Y[i],math.log(Y[i]), Ypredicted[i], math.exp(Ypredicted[i]))) e = YtoUse-Ypredicted R2 = (1-np.var(e)/np.var(YtoUse)) n=XtoUse.shape[0] # Number of samples p=degree if n-p>0: R2adj = 1-R2*(n-1)/(n-p)*(1-R2) else: R2adj = -1 logL = 0.5*(-n*(math.log(2*math.pi)+1-math.log(n)+math.log(np.sum(np.multiply(e,e))))) AIC = 2*p-2*logL if n-p-1>0: AICc = AIC+2*p*(p+1)/(n-p-1) else: AICc = -1 BIC = p*math.log(n)-2*logL self.doublePrint(fhOut,"------------------------") self.doublePrint(fhOut,"Mean error = %f"%np.mean(e)) self.doublePrint(fhOut,"Std error = %f"%np.std(e)) self.doublePrint(fhOut,"R2 = %f"%R2) self.doublePrint(fhOut,"R2adj = %f"%R2adj) self.doublePrint(fhOut,"AIC = %f"%AIC) self.doublePrint(fhOut,"AICc(Recommended) = %f"%AICc) self.doublePrint(fhOut,"BIC = %f"%BIC) self.doublePrint(fhOut,"------------------------") from scipy.stats import norm nstd = norm.ppf(0.975) perr = np.sqrt(np.diag(V)) lowerBound = mu-nstd*perr upperBound = mu+nstd*perr self.doublePrint(fhOut,"------------------------") for i in range(0,degree+1): self.doublePrint(fhOut,"Term of degree %d = %f Confidence interval 95%% [%f,%f]"%(degree-i,mu[i],lowerBound[i],upperBound[i])) if lowerBound[i]<0 and upperBound[i]>0: self.doublePrint(fhOut," This term may not be significant") self.doublePrint(fhOut,"------------------------") fhOut.close()
#--------------------------- INFO functions -------------------------------------------- def _summary(self): msg=["%s=f(%s) where f is a polynomial of degree %d"%(self.labelY.get(),self.labelX.get(),self.degree.get())] self.addFileContentToMessage(msg,self._getPath("results.txt")) return msg def _validate(self): msg = [] experiment = self.loadInputExperiment() label = self.labelY.get() if not label in experiment.variables: msg.append("%s is not a label of the experiment"%label) else: variable = experiment.variables[label] if variable.role != PKPDVariable.ROLE_LABEL: msg.append('%s is not a label'%label) else: if variable.varType!=PKPDVariable.TYPE_NUMERIC: msg.append("%s is not numeric"%label) label = self.labelX.get() if not label in experiment.variables: msg.append("%s is not a label of the experiment"%label) else: variable = experiment.variables[label] if variable.role != PKPDVariable.ROLE_LABEL: msg.append('%s is not a label'%label) else: if variable.varType!=PKPDVariable.TYPE_NUMERIC: msg.append("%s is not numeric"%label) return msg #--------------------------- UTILS functions --------------------------------------------
[docs] def getXYValues(self, printExperiment=True): self.experiment = self.readExperiment(self.inputExperiment.get().fnPKPD, printExperiment) X = [] Y = [] labelX = self.labelX.get() labelY = self.labelY.get() for sampleName, sample in self.experiment.samples.items(): X.append(float(sample.descriptors[labelX])) Y.append(float(sample.descriptors[labelY])) X = np.asarray(X, np.double) Y = np.asarray(Y, np.double) logX = np.log(X) logY = np.log(Y) if self.regressionType.get() == 0 or self.regressionType.get() == 2: XtoUse = X else: XtoUse = logX if self.regressionType.get() == 0 or self.regressionType.get() == 1: YtoUse = Y else: YtoUse = logY return X, Y, XtoUse, YtoUse
[docs] def getFunction(self): """ Parse the function string from the given result file. Replace the '^' char by the Python '**' operator. """ result = None f = open(self._getPath('results.txt')) for line in f: if line.strip().startswith('Model:'): result = line.split('=')[1].replace('^', '**') break f.close() return result
[docs] def evalFunction(self, xValues, func=None): """ Evaluate the function with the given values and return the list of resulting values. """ func = func or self.getFunction() xLabel = self.labelX.get() evalDict = {'log': np.log} yValues = [] for x in xValues: evalDict[xLabel] = x yValues.append(eval(func, globals(), evalDict)) return yValues
[docs] def filterVarForWizard(self, v): """ Define the type of variables required (used in wizard). """ return v.isNumeric() and v.isLabel()