1- import numpy
1+ import copy
2+
3+ import numpy
24from matplotlib import pyplot
35
46from .probscale import _minimal_norm
@@ -387,17 +389,21 @@ def fit_line(x, y, xhat=None, fitprobs=None, fitlogs=None, dist=None,
387389 -------
388390 xhat, yhat : numpy arrays
389391 Linear model estimates of ``x`` and ``y``.
390- results : a statmodels result object
391- The object returned by numpy.polyfit
392+ results : dict
393+ Dictionary of linear fit results. Keys include:
392394
393- """
395+ - slope
396+ - intersept
397+ - yhat_lo (lower confidence interval of the estimated y-vals)
398+ - yhat_hi (upper confidence interval of the estimated y-vals)
394399
400+ """
395401 fitprobs = validate .fit_argument (fitprobs , "fitprobs" )
396402 fitlogs = validate .fit_argument (fitlogs , "fitlogs" )
397403
398404 # maybe set xhat to default values
399405 if xhat is None :
400- xhat = numpy . array ([ numpy . min ( x ), numpy . max ( x )] )
406+ xhat = copy . copy ( x )
401407
402408 # maybe set dist to default value
403409 if dist is None :
@@ -420,23 +426,109 @@ def fit_line(x, y, xhat=None, fitprobs=None, fitlogs=None, dist=None,
420426 if fitlogs in ['y' , 'both' ]:
421427 y = numpy .log (y )
422428
423- # do the best-fit
424- coeffs = numpy .polyfit (x , y , 1 )
429+ yhat , results = _fit_simple (x , y , xhat , fitlogs = fitlogs )
425430
426- # estimate y values
427- yhat = _estimate_from_fit (xhat , coeffs [0 ], coeffs [1 ],
428- xlog = fitlogs in ['x' , 'both' ],
429- ylog = fitlogs in ['y' , 'both' ])
431+ if estimate_ci :
432+ yhat_lo , yhat_hi = _fit_ci (x , y , xhat , fitlogs = fitlogs ,
433+ niter = niter , alpha = alpha )
434+ else :
435+ yhat_lo , yhat_hi = None , None
430436
431437 # maybe undo the ppf transform
432438 if fitprobs in ['y' , 'both' ]:
433- yhat = 100. * dist .cdf (yhat )
439+ yhat = 100. * dist .cdf (yhat )
440+ if yhat_lo is not None :
441+ yhat_lo = 100. * dist .cdf (yhat_lo )
442+ yhat_hi = 100. * dist .cdf (yhat_hi )
434443
435444 # maybe undo ppf transform
436445 if fitprobs in ['x' , 'both' ]:
437- xhat = 100. * dist .cdf (xhat )
446+ xhat = 100. * dist .cdf (xhat )
447+
448+ results ['yhat_lo' ] = yhat_lo
449+ results ['yhat_hi' ] = yhat_hi
450+
451+ return xhat , yhat , results
452+
453+
454+ def _fit_simple (x , y , xhat , fitlogs = None ):
455+ """
456+ Simple linear fit of x and y data using ``numpy.polyfit``.
457+
458+ Parameters
459+ ----------
460+ x, y : array-like
461+ fitlogs : str, optional.
462+ Defines which data should be log-transformed. Valid values are
463+ 'x', 'y', or 'both'.
464+
465+ Returns
466+ -------
467+ xhat, yhat : array-like
468+ Estimates of x and y based on the linear fit
469+ results : dict
470+ Dictionary of the fit coefficients
471+
472+ See also
473+ --------
474+ numpy.polyfit
475+
476+ """
477+
478+ # do the best-fit
479+ coeffs = numpy .polyfit (x , y , 1 )
480+
481+ results = {
482+ 'slope' : coeffs [0 ],
483+ 'intercept' : coeffs [1 ]
484+ }
485+
486+ # estimate y values
487+ yhat = _estimate_from_fit (xhat , coeffs [0 ], coeffs [1 ],
488+ xlog = fitlogs in ['x' , 'both' ],
489+ ylog = fitlogs in ['y' , 'both' ])
490+
491+ return yhat , results
492+
493+
494+ def _fit_ci (x , y , xhat , fitlogs = None , niter = 10000 , alpha = 0.05 ):
495+ """
496+ Percentile method bootstrapping of linear fit of x and y data using
497+ ``numpy.polyfit``.
498+
499+ Parameters
500+ ----------
501+ x, y : array-like
502+ fitlogs : str, optional.
503+ Defines which data should be log-transformed. Valid values are
504+ 'x', 'y', or 'both'.
505+ niter : int, optional (default is 10000)
506+ Number of bootstrap iterations to use
507+ alpha : float, optional
508+ Confidence level of the estimate.
509+
510+ Returns
511+ -------
512+ xhat, yhat : array-like
513+ Estimates of x and y based on the linear fit
514+ results : dict
515+ Dictionary of the fit coefficients
516+
517+ See also
518+ --------
519+ numpy.polyfit
520+
521+ """
522+
523+ index = _make_boot_index (len (x ), niter )
524+ yhat_array = numpy .array ([
525+ _fit_simple (x [ii ], y [ii ], xhat , fitlogs = fitlogs )[0 ]
526+ for ii in index
527+ ])
438528
439- return xhat , yhat , coeffs
529+ percentiles = 100 * numpy .array ([alpha * 0.5 , 1 - alpha * 0.5 ])
530+ yhat_lo , yhat_hi = numpy .percentile (yhat_array , percentiles , axis = 0 )
531+ return yhat_lo , yhat_hi
440532
441533
442534def _estimate_from_fit (xdata , slope , intercept , xlog = False , ylog = False ):
0 commit comments