{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\nOutlier detection via leave-one-out\n===================================\n\nOutliers can sometimes be identified by assessing the influence of each\ndatapoint. To assess the influence of one point, we fit the dataset while the\npoint and compare the result with the fit of the full dataset. The code below\nshows how to do this with lmfit. Note that the presented method is very basic.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "from collections import defaultdict\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nimport lmfit\n\nplt.rcParams['figure.dpi'] = 130\nplt.rcParams['figure.autolayout'] = True"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Generate test data and model. Apply the model to the data\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "x = np.linspace(0.3, 10, 100)\nnp.random.seed(1)\ny = 1.0 / (0.1 * x) + 2.0 + 3 * np.random.randn(x.size)\n\nparams = lmfit.Parameters()\nparams.add_many(('a', 0.1), ('b', 1))\n\n\ndef func(x, a, b):\n    return 1.0 / (a * x) + b\n\n\n# Make 5 points outliers\nidx = np.random.randint(0, x.size, 5)\ny[idx] += 10 * np.random.randn(idx.size)\n\n# Fit the data\nmodel = lmfit.Model(func, independent_vars=['x'])\nfit_result = model.fit(y, x=x, a=0.1, b=2)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "and gives the plot and fitting results below:\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "fit_result.plot_fit()\nplt.plot(x[idx], y[idx], 'o', color='r', label='outliers')\nplt.show()\nprint(fit_result.fit_report())"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Fit the dataset while omitting one data point\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "best_vals = defaultdict(lambda: np.zeros(x.size))\nstderrs = defaultdict(lambda: np.zeros(x.size))\nchi_sq = np.zeros_like(x)\nfor i in range(x.size):\n    idx2 = np.arange(0, x.size)\n    idx2 = np.delete(idx2, i)\n    tmp_x = x[idx2]\n    tmp = model.fit(y[idx2],\n                    x=tmp_x,\n                    a=fit_result.params['a'],\n                    b=fit_result.params['b'])\n    chi_sq[i] = tmp.chisqr\n    for p in tmp.params:\n        tpar = tmp.params[p]\n        best_vals[p][i] = tpar.value\n        stderrs[p][i] = (tpar.stderr / fit_result.params[p].stderr)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Plot the influence on the red. chisqr of each point\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "fig, ax = plt.subplots()\nax.plot(x, (fit_result.chisqr - chi_sq) / chi_sq)\nax.scatter(x[idx],\n           fit_result.chisqr / chi_sq[idx] - 1,\n           color='r',\n           label='outlier')\nax.set_ylabel(r'Relative red. $\\chi^2$ change')\nax.set_xlabel('x')\nax.legend()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Plot the influence on the parameter value and error of each point\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "fig, axs = plt.subplots(4, figsize=(4, 7), sharex='col')\naxs[0].plot(x, best_vals['a'])\naxs[0].scatter(x[idx], best_vals['a'][idx], color='r', label='outlier')\naxs[0].set_ylabel('best a')\n\naxs[1].plot(x, best_vals['b'])\naxs[1].scatter(x[idx], best_vals['b'][idx], color='r', label='outlier')\naxs[1].set_ylabel('best b')\n\naxs[2].plot(x, stderrs['a'])\naxs[2].scatter(x[idx], stderrs['a'][idx], color='r', label='outlier')\naxs[2].set_ylabel('err a change')\n\naxs[3].plot(x, stderrs['b'])\naxs[3].scatter(x[idx], stderrs['b'][idx], color='r', label='outlier')\naxs[3].set_ylabel('err b change')\n\naxs[3].set_xlabel('x')"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.6"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}