Source code for pkpd.protocols.protocol_pkpd_regression_labels

# **************************************************************************
# *
# * Authors:     Carlos Oscar Sorzano (info@kinestat.com)
# *
# * Kinestat Pharma
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 2 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address 'info@kinestat.com'
# *
# **************************************************************************

import numpy as np
import math

import pyworkflow.protocol.params as params
from .protocol_pkpd import ProtPKPD
from pkpd.objects import PKPDVariable


[docs]class ProtPKPDRegressionLabel(ProtPKPD):
    """ Perform a regression between two labels\n
        Protocol created by http://www.kinestatpharma.com\n """
    _label = 'regression labels'

    #--------------------------- DEFINE param functions --------------------------------------------

    def _defineParams(self, form):
        form.addSection('Input')
        form.addParam('inputExperiment', params.PointerParam, label="Input experiment", important=True,
                      pointerClass='PKPDExperiment',
                      help='Select an experiment with samples')
        form.addParam('labelY', params.StringParam, label="Y Label", default="", help='Name of a numerical label')
        form.addParam('labelX', params.StringParam, label="X Label", default="", help='Name of a numerical label')
        form.addParam('regressionType', params.EnumParam, choices=["Y=f(X)","Y=f(log(X))","log(Y)=f(X)","log(Y)=f(log(X))"],
                      label="Regression type", default=0,
                      help='f is always a polynomial of a given degree. log is the natural logarithm in base e\n')
        form.addParam('degree', params.IntParam, label="Degree", default=1, help='Degree of the polynomial f')

    #--------------------------- INSERT steps functions --------------------------------------------

    def _insertAllSteps(self):
        self._insertFunctionStep('runRegression',self.inputExperiment.get().getObjId(), self.labelY.get(), self.labelX.get(),
                                 self.regressionType.get(), self.degree.get())

    #--------------------------- STEPS functions --------------------------------------------

[docs]    def runRegression(self, objId, labelY, labelX, regressionType, degree):
        X, Y, XtoUse, YtoUse = self.getXYValues()

        self.printSection("Performing regressions")

        p,V = np.polyfit(XtoUse,YtoUse,self.degree.get(),cov=True)
        mu = p
        Ypredicted = np.polyval(p, XtoUse)

        fhOut = open(self._getPath("results.txt"),'w')

        degree = self.degree.get()
        if self.regressionType.get()==0:
            modelToPrint = "%s="%labelY
            for i in range(0,degree+1):
                modelToPrint+="+(%f)*%s^(%d)"%(p[i],labelX,degree-i)
            self.doublePrint(fhOut,"Model: %s"%modelToPrint)
            self.doublePrint(fhOut," ")
            self.doublePrint(fhOut,"==========================================")
            self.doublePrint(fhOut,"%s    %s    %spredicted   Error=%s-%spredicted "%(labelX,labelY,labelY,labelY,labelY))
            self.doublePrint(fhOut,"==========================================")
            for i in range(0,len(X)):
                self.doublePrint(fhOut,"%f %f %f"%(X[i],Y[i],Ypredicted[i]))
        elif self.regressionType.get()==1:
            modelToPrint = "%s="%labelY
            for i in range(0,degree+1):
                modelToPrint+="+(%f)*(log(%s))^(%d)"%(p[i],labelX,degree-i)
            self.doublePrint(fhOut,"Model: %s"%modelToPrint)
            self.doublePrint(fhOut," ")
            self.doublePrint(fhOut,"=============================================")
            self.doublePrint(fhOut,"%s  log(%s)  %s    %spredicted Error=%s-%spredicted"%(labelX,labelX,labelY,labelY,labelY,labelY))
            self.doublePrint(fhOut,"=============================================")
            for i in range(0,len(X)):
                self.doublePrint(fhOut,"%f %f %f %f"%(X[i],math.log(X[i]),Y[i],Ypredicted[i]))
        elif self.regressionType.get()==2:
            modelToPrint = "log(%s)="%labelY
            for i in range(0,degree+1):
                modelToPrint+="+(%f)*%s^(%d)"%(p[i],labelX,degree-i)
            self.doublePrint(fhOut,"Model: %s"%modelToPrint)
            self.doublePrint(fhOut," ")
            self.doublePrint(fhOut,"============================================================================")
            self.doublePrint(fhOut,"%s     %s    log(%s)  log(%spredicted)  %spredicted  Error=log(%s)-log(%spredicted)"%\
                             (labelX,labelY,labelY,labelY,labelY,labelY,labelY,labelY))
            self.doublePrint(fhOut,"============================================================================")
            for i in range(0,len(X)):
                self.doublePrint(fhOut,"%f %f %f %f %f"%(X[i],Y[i],math.log(Y[i]), Ypredicted[i], math.exp(Ypredicted[i])))
        elif self.regressionType.get()==3:
            modelToPrint = "log(%s)="%labelY
            for i in range(0,degree+1):
                modelToPrint+="+(%f)*(log(%s))^(%d)"%(p[i],labelX,degree-i)
            self.doublePrint(fhOut,"Model: %s"%modelToPrint)
            self.doublePrint(fhOut," ")
            self.doublePrint(fhOut,"======================================================================================")
            self.doublePrint(fhOut,"%s     log(%s)     %s    log(%s)  log(%spredicted)  %spredicted Error=log(%s)-log(%spredicted)"%\
                             (labelX,labelX,labelY,labelY,labelY,labelY,labelY,labelY))
            self.doublePrint(fhOut,"======================================================================================")
            for i in range(0,len(X)):
                self.doublePrint(fhOut,"%f %f %f %f %f %f"%(X[i],math.log(X[i]),Y[i],math.log(Y[i]), Ypredicted[i], math.exp(Ypredicted[i])))

        e = YtoUse-Ypredicted
        R2 = (1-np.var(e)/np.var(YtoUse))
        n=XtoUse.shape[0] # Number of samples
        p=degree
        if n-p>0:
            R2adj = 1-R2*(n-1)/(n-p)*(1-R2)
        else:
            R2adj = -1
        logL = 0.5*(-n*(math.log(2*math.pi)+1-math.log(n)+math.log(np.sum(np.multiply(e,e)))))
        AIC = 2*p-2*logL
        if n-p-1>0:
            AICc = AIC+2*p*(p+1)/(n-p-1)
        else:
            AICc = -1
        BIC = p*math.log(n)-2*logL

        self.doublePrint(fhOut,"------------------------")
        self.doublePrint(fhOut,"Mean error = %f"%np.mean(e))
        self.doublePrint(fhOut,"Std error = %f"%np.std(e))
        self.doublePrint(fhOut,"R2 = %f"%R2)
        self.doublePrint(fhOut,"R2adj = %f"%R2adj)
        self.doublePrint(fhOut,"AIC = %f"%AIC)
        self.doublePrint(fhOut,"AICc(Recommended) = %f"%AICc)
        self.doublePrint(fhOut,"BIC = %f"%BIC)
        self.doublePrint(fhOut,"------------------------")

        from scipy.stats import norm
        nstd = norm.ppf(0.975)
        perr = np.sqrt(np.diag(V))
        lowerBound = mu-nstd*perr
        upperBound = mu+nstd*perr

        self.doublePrint(fhOut,"------------------------")
        for i in range(0,degree+1):
            self.doublePrint(fhOut,"Term of degree %d = %f Confidence interval 95%% [%f,%f]"%(degree-i,mu[i],lowerBound[i],upperBound[i]))
            if lowerBound[i]<0 and upperBound[i]>0:
                self.doublePrint(fhOut,"   This term may not be significant")
        self.doublePrint(fhOut,"------------------------")
        fhOut.close()

    #--------------------------- INFO functions --------------------------------------------
    def _summary(self):
        msg=["%s=f(%s) where f is a polynomial of degree %d"%(self.labelY.get(),self.labelX.get(),self.degree.get())]
        self.addFileContentToMessage(msg,self._getPath("results.txt"))
        return msg

    def _validate(self):
        msg = []
        experiment = self.loadInputExperiment()

        label = self.labelY.get()
        if not label in experiment.variables:
            msg.append("%s is not a label of the experiment"%label)
        else:
            variable = experiment.variables[label]
            if variable.role != PKPDVariable.ROLE_LABEL:
                msg.append('%s is not a label'%label)
            else:
                if variable.varType!=PKPDVariable.TYPE_NUMERIC:
                    msg.append("%s is not numeric"%label)

        label = self.labelX.get()
        if not label in experiment.variables:
            msg.append("%s is not a label of the experiment"%label)
        else:
            variable = experiment.variables[label]
            if variable.role != PKPDVariable.ROLE_LABEL:
                msg.append('%s is not a label'%label)
            else:
                if variable.varType!=PKPDVariable.TYPE_NUMERIC:
                    msg.append("%s is not numeric"%label)
        return msg

    #--------------------------- UTILS functions --------------------------------------------
[docs]    def getXYValues(self, printExperiment=True):
        self.experiment = self.readExperiment(self.inputExperiment.get().fnPKPD,
                                         printExperiment)
        X = []
        Y = []
        labelX = self.labelX.get()
        labelY = self.labelY.get()
        for sampleName, sample in self.experiment.samples.items():
            X.append(float(sample.descriptors[labelX]))
            Y.append(float(sample.descriptors[labelY]))
        X = np.asarray(X, np.double)
        Y = np.asarray(Y, np.double)
        logX = np.log(X)
        logY = np.log(Y)

        if self.regressionType.get() == 0 or self.regressionType.get() == 2:
            XtoUse = X
        else:
            XtoUse = logX

        if self.regressionType.get() == 0 or self.regressionType.get() == 1:
            YtoUse = Y
        else:
            YtoUse = logY

        return X, Y, XtoUse, YtoUse

[docs]    def getFunction(self):
        """ Parse the function string from the given result file.
        Replace the '^' char by the Python '**' operator. """
        result = None
        f = open(self._getPath('results.txt'))
        for line in f:
            if line.strip().startswith('Model:'):
                result = line.split('=')[1].replace('^', '**')
                break
        f.close()
        return result

[docs]    def evalFunction(self, xValues, func=None):
        """ Evaluate the function with the given values
        and return the list of resulting values. """
        func = func or self.getFunction()
        xLabel = self.labelX.get()
        evalDict = {'log': np.log}

        yValues = []
        for x in xValues:
            evalDict[xLabel] = x
            yValues.append(eval(func, globals(), evalDict))

        return yValues

[docs]    def filterVarForWizard(self, v):
        """ Define the type of variables required (used in wizard). """
        return v.isNumeric() and v.isLabel()