{
 "metadata": {
  "name": "predicting_with_trees"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas.rpy.common as com"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "iris = com.load_data('iris')\n",
      "iris.columns = map(lambda x: x.lower().replace('.', '_'), iris.columns)\n",
      "iris.columns"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "pyout",
       "prompt_number": 26,
       "text": [
        "Index([sepal_length, sepal_width, petal_length, petal_width, species], dtype=object)"
       ]
      }
     ],
     "prompt_number": 26
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "iris['species'].value_counts()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "pyout",
       "prompt_number": 27,
       "text": [
        "setosa        50\n",
        "versicolor    50\n",
        "virginica     50"
       ]
      }
     ],
     "prompt_number": 27
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "spec = iris.groupby('species')\n",
      "cols = ['k', 'r', 'g']\n",
      "\n",
      "i = 0\n",
      "for s, df in spec:\n",
      "    plot(df['petal_width'], df['sepal_width'], 'o', color=cols[i], label=s)\n",
      "    i += 1\n",
      "    \n",
      "legend()\n",
      "xlabel('Petal Width')\n",
      "ylabel('Sepal Width');"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "display_data",
       "png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAEKCAYAAAAW8vJGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl4E+XaP/Bv2oTWQm2hYS2UJUVsQboAFiq2US4bAevh\naFlaZRE9FqQU1OMGYoMsBxQXClXhHA8urywHlCM0BUGkIJZFD5XDCy8/oVAooAXZly5pc//+qA1M\nmzTJJDNJmvtzXbmuZjLPPM9MJvN05pl7bgURERhjjLE/+Lm7AYwxxjwLdwyMMcYEuGNgjDEmwB0D\nY4wxAe4YGGOMCXDHwBhjTEDyjqG2thZxcXFITU1t9FlhYSFCQkIQFxeHuLg4zJ07V+rmMMYYs0Ep\ndQWLFy9GdHQ0rl27ZvHz5ORkbNiwQepmMMYYs5OkZwynT59GQUEBnnnmGViLo+P4OsYY8yySnjE8\n//zzePvtt3H16lWLnysUChQVFSEmJgbh4eFYtGgRoqOjG83DGGPMcWL/8ZbsjCE/Px/t2rVDXFyc\n1cbFx8ejrKwMBw4cwNSpUzFixAiL8xERv4iQk5Pj9jZ4you3BW8L3hZNv5whWcdQVFSEDRs2oHv3\n7khPT8d3332HcePGCeYJDg5GUFAQAGDo0KEwGo24ePGiVE1ijDFmB8k6hvnz56OsrAwnTpzA6tWr\n8eCDD+Kzzz4TzFNeXm7u2fbt2wciQps2baRqEmOMMTtIfldSvfqxgmXLlgEAMjMzsW7dOnz44YdQ\nKpUICgrC6tWr5WqOV9Jqte5ugsfgbXELb4tbeFu4hoKcvRglMYVC4fT1MsYY8zXOHDtlO2NgdQwG\nA3Jzc1FVVYWAgABkZ2dj+PDh7m4WYza1adMGly5dcnczWAOtW7d2+dgsdwwyMhgMmDZtGkpKSszT\n6v/mzoF5ukuXLvHZuweS4pZ+flaSjHJzcwWdAlDXMSxZssRNLWKMsca4Y5BRVVWVxemVlZUyt4Qx\nxqzjjkFGAQEBFqcHBgbK3BLGGLOOOwYZZWdnQ6PRCKZpNBpMnTrVTS1ijLHGePBZRvUDzEuWLEFl\nZSUCAwMxdepUHnhmzA1KS0vRo0cP1NTUwM+P/0e+HccxMMbs0tRv0RW3Yct9K3d9x2A0GuHv7y9Z\nPVKz9r04dewkD+cFTWTMJ1j7Lebn55NGoyEA5pdGo6H8/Hy7l+2KZSxYsIDCw8MpODiYevXqRdu2\nbSOTyUR/+9vfSKPRUFhYGI0aNYouXrxIRERdunQhhUJBrVq1olatWtGePXvIZDLRnDlzqGvXrtSu\nXTsaN24cXblyhYiIKioq6IknnqCwsDAKDQ2lAQMGUHl5ORER/fOf/6SoqCgKDg6mHj160LJly+xu\nt7OsfS/OHDs9/qjLHQNjnsHabzElJUVwQK9/6XQ6u5ft7DKOHDlCXbp0oV9//ZWIiE6ePEklJSX0\n/vvv06BBg+jMmTNUXV1NmZmZlJ6eTkREpaWlpFAoqLa21rycjz/+mCIjI+nEiRN0/fp1euyxx2js\n2LFERPTRRx9RamoqVVRUkMlkov3799PVq1eJiMhgMNDx48eJiGjHjh0UFBRE+/fvt3v9nSFFx8AX\n1hhjTnHFbdjOLsPf3x9VVVU4dOgQjEYjIiIi0KNHDyxbtgxz585Fp06doFKpkJOTg3Xr1sFkMlm8\nzPLFF1/gxRdfRLdu3dCyZUv87W9/w+rVq1FbW4sWLVrgwoULOHr0KBQKBeLi4hAcHAwAGDZsGLp3\n7w4ASEpKQkpKCr7//nu719/TcMfAGHOKK27DdnYZkZGReP/996HX69G+fXukp6fj7NmzKC0txZ//\n/Ge0bt0arVu3RnR0NJRKJcrLyy0u59dff0XXrl3N7yMiIlBTU4Nz585h7Nix0Ol0GDNmDMLDw/HK\nK6+gpqYGALBp0yYMHDgQYWFhaN26NQoKCnDhwgW719/TcMfAGHOKK27DdsUy0tPT8f333+PkyZNQ\nKBR45ZVXEBERgc2bN+PSpUvm182bN9GxY0eLj5Lo1KkTSktLze9PnToFpVKJ9u3bQ6lU4o033sCh\nQ4dQVFSE/Px8fPbZZ6iqqsLjjz+Ol19+GefOncOlS5cwbNgwr75phm9XZYw5xRW3YTu7jF9++QWn\nT5/Gfffdh4CAAAQGBoKIMGnSJMyYMQOffvopIiIicP78eezevRuPPvoo2rZtCz8/P5SUlKBnz54A\n6jqXhQsXYujQoVCr1ZgxYwbGjBkDPz8/FBYWIiwsDNHR0QgODoZKpYK/vz+qq6tRXV0NtVoNPz8/\nbNq0CVu2bME999zj4Jb0IKJHJ2TiBU1kzCd48m/xv//9L917770UHBxMbdq0odTUVPr111/JZDLR\nu+++S7169aLg4GDSaDQ0c+ZMc7k33niD2rZtS6GhobR3714ymUz05ptvUpcuXaht27Y0duxYunz5\nMhERrVq1inr16kUtW7ak9u3b07Rp08wD13l5edS+fXsKDQ2lsWPHUnp6Os2aNUuWdbf2vTjzfXEc\nA2PMLvxb9ExSxDHwGANjjDEB7hhQF3Gp0+mg1Wqh0+lgMBjc3STGGHMbnx985uQ5jDEm5PNnDJw8\nhzHGhHy+Y+DkOYwxJuTzHQMnz2GMMSGf7xg4eQ5jjAn5/OAzJ89hjDEhDnBjjNmFf4tAcHAwDh48\niG7duoleRrdu3fDxxx9jyJAhLmmTFAFuPn/GwBhj9rp27ZrTy1AoFBYf4OdJmlXHIHdqQMZYnZ0G\nA7bk5kJZVYWagACkZGcjycHfniuW4aza2lqPTfNZU1MDpVKmQ7bopyzJxN4muiI1IGPMOmu/xR35\n+TRDoyECzK8ZGg3tcOC35+wyFixYQGlpaYJp2dnZlJ2dTVeuXKGJEydSx44dKTw8nF5//XXzw+9W\nrFhBiYmJ9Pzzz1NYWBjNmjWLjh49SklJSRQSEkJqtZpGjx5tXqZCoaCSkhIiIrp58ya98MIL1LVr\nVwoJCaHBgwdTRUUFERF9/fXXFB0dTaGhoaTVaun//u//zMvo1q0bbdu2jYiIKisradq0adSpUyfq\n1KkTTZ8+naqqqoiIaPv27RQeHk4LFy6kDh060Lhx4yyuu7XvxZnDe7PpGFyRXpAxZp213+LMlBTB\nAb3+9boDvz1nl3Hy5EkKCgqia9euERFRTU0NdezYkfbu3UsjRoygSZMm0c2bN+ncuXN07733mnMy\nr1ixgpRKJS1dupRqa2upoqKCxowZQ/PnzycioqqqKvrhhx/M9dzeMTz33HP0wAMP0NmzZ6m2tpZ2\n795NVVVV9P/+3/+jli1b0rfffks1NTX01ltvUWRkJBmNRiISdgyzZs2iQYMG0fnz5+n8+fOUmJho\nfirr9u3bSalU0quvvkrV1dXmTqchKTqGZnO7KgeqMeYeSiu/PX8HfnvOLiMiIgLx8fFYv349AOC7\n775Dy5Yt0a1bN2zatAnvvfce7rjjDrRt2xbTp0/H6tWrzWU7deqEKVOmwM/PD4GBgWjRogVKS0tx\n5swZtGjRAomJiY3qM5lMWLFiBRYvXoyOHTvCz88PAwcORIsWLbBmzRo88sgjGDJkCPz9/fHXv/4V\nFRUVKCoqarSclStX4o033oBarYZarUZOTg4+//xz8+d+fn6YPXs2VCqVrLFVzaZj4EA1xtyjxspv\nr9aB354rlpGRkYFVq1YBqDvgZmRk4OTJkzAajejYsaM5veekSZNw/vx5c7kuXboIlvPWW2+BiHDv\nvfeiT58+WLFiRaO6fv/9d1RWVjaKgQLq0oNGRESY3ysUCnTp0gVnzpxpNO/Zs2cbpRI9e/as+X3b\ntm3RokULu7eBqzSbjoED1Rhzj5TsbMxs8NubodHgIQd+e65YRlpaGgoLC3HmzBn8+9//RkZGBjp3\n7oyAgABcuHDBnNrzypUrOHjwoLlcwzuE2rdvj+XLl+PMmTNYtmwZnnvuORw/flwwj1qtRmBgII4d\nO9aoHZ06dcLJkyfN74kIZWVlCA8Ptzhvw1SinTp1sto2uTSbu5I4UI0x96i/c2jWkiXwr6xEbWAg\nHp461aE7ilyxjLZt20Kr1WLChAno0aMHevXqBQBISUnBCy+8gDlz5qBly5Y4ceIEzpw5g6SkJIvL\nWbt2LQYNGoTOnTsjNDQUCoUCfn7C/6H9/PwwceJEvPDCC/j888/Rrl077Nu3D/369cOoUaOwYMEC\nfPfdd7j//vuxePFiBAYGWrwklZ6ejrlz52LAgAEAgDfffBNjx461e50lI3p0QiZe0ETGfII3/BY/\n//xzUigUtGjRIvO0K1eu0OTJk6lz584UEhJCcXFxtGbNGiIi+uSTT+j+++8XLOPll1+m8PBwatWq\nFWk0Gvr73/9u/szPz888+FxRUUHTp0+n8PBwCgkJoeTkZPMA8fr16yk6OppCQkJIq9XS4cOHzcto\neFdSdnY2dezYkTp27EjTpk0T3JXUpUsXm+ts7Xtx5vviyGfGmF34t+iZvDK1Z21tLeLi4pCammrx\n8+zsbPTs2RMxMTEoLi6WujkWcQY3xhi7RfIxhsWLFyM6OtpiKHlBQQGOHTuGo0ePYu/evZg8eTL2\n7NkjdZMEOIMbY4wJSXrGcPr0aRQUFOCZZ56xeEqzYcMGjB8/HgCQkJCAy5cvo7y8XMomNcIZ3Bhj\nTEjSM4bnn38eb7/9Nq5evWrx8zNnzgjuIe7cuTNOnz6N9u3bC+bT6/Xmv7VaLbRarcvayIFxjLHm\noLCwEIWFhS5ZlmQdQ35+Ptq1a4e4uLgmG9vwTMLSfbu3dwyuxoFxjLHmoOE/zbNnzxa9LMkuJRUV\nFWHDhg3o3r070tPT8d1332HcuHGCecLDw1FWVmZ+f/r0aYtBIFLiwDjGGBOS5XbVHTt2YNGiRdi4\ncaNgekFBAZYuXYqCggLs2bMH06dPbzT4LMctcgaDgQPjGLOBb1f1TF6dqKf+EtGyZcsAAJmZmRg2\nbBgKCgoQGRmJli1bWnwmiRyGDx/OHQFjjP2BA9wYY3bx1t/isGHDkJ6eLvpRE46k83RF6k9HSXHG\nwB2DzPR6PZYuXWrOxpSVlSXp4DpjrtLcfovNhVdfSmJ1ncK8efNQU1NjnjZv3jzzZ4x5K8NWA3JX\n5qKKqhCgCEB2RjaGP+TY5VlXLMNRnpzK061EP2VJJl7QRLuFhYVZzDKnVqvd3TTGbLL2W8zfkk+a\nP2kIephfmj9pKH+L/ak9nV1GU6k9tVot/eMf/yAiy6k8L1y4QI888gjdeeedNGDAAJo5cyYNHjzY\nvJzbs7aNHz+ennvuORo+fDgFBwdTQkKC+bOG81pK/VlZWUlERGlpadShQwcKCQmhpKQkOnTokN3b\nqiFr34szx85mk4/BG9x+pnA7o9Eoc0sYc53clbkoiWvw9IC4EixZbf/TA5xdRnp6OgoKCnD9+nUA\ndWcCa9euxRNPPAFAGB+1b98+aDQanDt3DjNmzMBzzz2H4OBglJeX49NPP8Vnn33WZB6ENWvWQK/X\n49KlS4iMjMTMmTMtzvfXv/4VxcXF2L17Ny5evIi3337bvNzhw4fj2LFjOH/+POLj483t9BTcMchI\nqbR85U6lUsncEsZcp4qsPD2g1v6nBzi7DGupPe+9995G896eylOlUuGrr77C7NmzERgYiKioKIwf\nP97qtXmFQoHHHnsM/fv3h7+/P5544gn8/PPPjeZrKvUnAEyYMAEtW7aESqVCTk4ODhw4YPF5cu7C\nHYOMsrKyGnUOSqUSU6ZMcVOLGHNegMLK0wP87X96gCuWYSm1pyW3P4bn/PnzqKmpafRonqbc/sie\nO+64w3yWcrumUn+aTCa8+uqriIyMREhICLp37w6FQoHff/+96RWUEXcMMtLr9Zg5cybUajVCQkKg\nVqsxc+ZMHnhmXi07Ixua4gZPD9ivwdQx9j89wBXLsJTa05LbLxO1bdsWSqVS8ASG2/8Wq6nUn198\n8QU2bNiAbdu24cqVKzhx4gSIyKPu+OK7kmSm1+u5I2DNSv2dQ0tWL0FlbSUC/QMxNWuqQ3cUuWIZ\n1lJ7NsXf3x+PPfYY9Ho9/vGPf+DkyZP4/PPP0bVrV4vz23vwbir15/Xr1xEQEIA2bdrgxo0bmDFj\nht3rKBfuGBhjThv+0HCnby11xTIyMjIwbtw4vP322xY/VygUjQaWly5digkTJqBDhw64++67kZ6e\njp9++klQpqnyDT+vt2jRIrz22msYMGAArl+/jtjYWHzzzTcYN24cvvnmG4SHhyMsLAxvvvmm+YkQ\nnoID3FD3rKTc3FxUVVUhICAA2dnZNh+RITZQTUxdjHkCXwlwe+WVV3Du3Dm3PaLHUVIEuHl8kIDU\nTczPzyeNRiOIK9BoNJSfb/3+6ZycHFIqlYIySqWScnJyXF4XY57CCw4Xohw5coQOHDhAJpOJ9u7d\nS2q1mr7++mt3N8tu1r4XZ74vj/+mpd4ZU1JSLAad6XQ6q2XEBqqJqYsxT9FcO4Yff/yRIiMjKSgo\niLp3704LFixwd5McIkXH4PNjDGIyuIkNVONscYx5nv79++Po0aPuboZH8fnbVcVkcBMbqMbZ4hhj\n3sDnOwYxGdzEBqpxtjjGmDfw+UtJ9XcEOZLBrf7uo7y8PBiNRqhUKkyZMsXmXUli6mKMMbnx7aqM\nMbu0adMGly5dcnczWAOtW7fGxYsXG03nRD2MMcYEnDl2+vwYA1B3aUitViM0NBRqtVrSR1YYDAbo\ndDpotVrodDoYDAbJ6mKMMTF8foxBzqxqBoMB06ZNQ0nJrefO1//N4wyMMU/h85eS1Go1Lly4YHH6\n+fPnXVqXTqfDli1bLE7fvHmzS+tijPk2vpTkBDmzqnGAG2PMG/h8xyBnVjUOcGOMeQOf7xjkzKrG\nAW6MMW/Ag88ig9XE4AA3xpg38PnBZ8YYa4548JkxxpjLNKuOQWzwmJwBbowxJiXDVgN0T+mcWkaz\nGWMQGzwmZ4AbY4xJybDVgGl501ASV2J75iY0mzEGscFjcga4McaYlHRP6bCl2x/HQT14jEFs8Jic\nAW6MMSalKrJ8HHRUs+kYxAaPyRngxhhjUgpQWD4OOqrZdAxig8fkDHBjjDEpZWdkQ1OssT2jDc1m\n8Fls8JicAW6MMSal4Q/9cRxcvQTf4BvRy7E5+Pzll1/i1VdfRXl5uXkgQ6FQ4OrVq6IrdaiBHODG\nGGMOkzSDm0ajQX5+PqKiokRV4CzuGBhjzHGSRj536NBBVKdQWVmJhIQExMbGIjo6Gq+99lqjeQoL\nCxESEoK4uDjExcVh7ty5DtfjChkZGVCpVFAqlVCpVMjIyLBZxtOD4jhTHGNMLKtjDF9++SUAoH//\n/hg9ejRGjBiBFi1aAKjriR577LEmFxwYGIjt27cjKCgINTU1GDx4MHbt2oXBgwcL5ktOTsaGDRuc\nXQ/RMjIysGrVKsG0+vcrV660WMbTg+I4UxxjzBlWLyVNmDABCoUCQF2QRP3f9VasWGF3JTdv3kRy\ncjI+/fRTREdHm6cXFhbinXfewcaNG603UOJLSSqVymIsg0qlQnV1tcUynh4Ux5niGGPOHDutnjF8\n8sknAGDxv/xdu3bZtXCTyYT4+HiUlJRg8uTJgk4BqGt4UVERYmJiEB4ejkWLFjWaBxD+F67VaqHV\nau2q3x7WNpzJZLJaxtOD4jhTHGO+p7CwEIWFha5ZGNkQFxdn17SmXL58mRISEmj79u2C6VevXqUb\nN24QEVFBQQH17NmzUVk7mugUpVJJABq9VCqV1TJhYWEWy6jVaknbaq+UlBSL7dPpdO5uGmNMJs4c\nO60OPu/evRvvvPMOzp07h3fffRfvvPMO3nnnHej1etTW1jrU+YSEhGD48OH46aefBNODg4MRFBQE\nABg6dCiMRiMuXrzo0LKdNXLkSIvT09LSrJbx9KA4zhTHGHOG1UtJ1dXVuHbtGmpra3Ht2jXz9Dvv\nvBPr1q2zueDff/8dSqUSoaGhqKiowNatW5GTkyOYp7y8HO3atYNCocC+fftARGjTpo0Tq+O4+gHm\ndevWwWQywc/PD2lpaVYHngHPD4rjTHGMMWfYjGM4efIkunbt6vCCDx48iPHjx8NkMsFkMmHs2LF4\n6aWXsGzZMgBAZmYm8vLy8OGHH0KpVCIoKAjvvvsuBg4cKGwgxzEwxpjDJAlwS01NtVqBQqGQ7RZT\n7hgYY8xxkgS4vfjii3jxxRfRo0cP3HHHHXj22Wfxl7/8Ba1atUKPHj1EN9YTde3aFQqFwvyy5wyJ\ns8Uxb7TTYMDrOh30Wi1e1+mwkwMfZVGfVU07QQvdUzoYttre7mLKuIyt0en4+Hi7pknFjiY6JSIi\nwuIdPBEREVbL5Ofnk0ajEcyv0WgoPz+/ybpycnIa3QWlVCopJyfHxWvFWGM78vNphkZDBJhfMzQa\n2mFjv2XOyd+ST5o/aQh6mF+aP2kof4v17S6mTEPOHDttPhLj5s2bggja48eP4+bNm67un9zm1KlT\nDk0HgNzcXME2Aeoii5csWdJkXUuXLm0UA1FTU4O8vDw7W8uYeFtyczGvwX47r6QEW23st8w5uStz\nG6XaLIkrwZLV1re7mDKuZPOx2++99x4eeOABdO/eHQBQWlqK5cuXS94wT8bZ4pg3UlrZb/058FFS\n1rKqVdZa3+5iyriSzY7h4Ycfxi+//IIjR45AoVDg7rvvtpotzVdwtjjmjWqs7Le1NvZb5hxrWdUC\n/a1vdzFlXMnqpaRt27YBqHuYXkFBAUpKSnDs2DEYDAZ89dVXsjRODhEREQ5NBzhbHPNOKdnZmNlg\nv52h0eAhDnyUlKWsapr9GkwdY327iynjSlbPGHbu3IkhQ4Zg48aNjR6gB8Dm01W9RX2cxu1jChER\nETh58qTVMpwtjnmjpD/2z1lLlsC/shK1gYF4eOpU83QmjduzqlXWViLQPxBTs6aap7uqjCtZjWMo\nLi5GbGysxU5BThzHwBhjjpMkwK1fv344fvw4+vfvj8TERNx3330YNGgQgoODnWqsww3kjoExxhwm\nSYDbf/7zH5w+fRozZsxAixYtkJubC41Gg5iYGEyePFl0Yz2Rp2c78/T2Me/BAW63uDWATEL6t/RQ\nD1A7txB7gh2uXbtG3377Len1eurRowd169ZNdOCEo+xsomhig9Xk4untY96DA9xucUUAmSfKWZhD\nynuVdevkxLHT6qWkL774AkVFRfj5558REBCAAQMGYODAgRg0aBA6dOjgXG/kAKkvJXl6tjNPbx/z\nHq/rdJhrYV+apdNhjo/tS7qndNjSzcLv6pQOmz/23m2hHqDGhUf+yC6pt56IzBardyVlZmaiV69e\nmDRpEpKSktCrVy9RFXg6T8925untY96DA9xucXcAmVRq/C0H0TrKasdw+fJlHDhwALt378bs2bNx\n5MgRdOzYEYmJiRg0aBAefPBBlzTA3cQGq8nF09vHvAcHuN3i7gAyqShrbcYs28Xq4LNSqUS/fv2Q\nlZWFlStXYtOmTXj44Yfxz3/+Ew899JBLKvcEnp7tzNPbx7wHB7jd4u4AMqlkjcyCssD5zsHqGMOB\nAwdQVFRkflVXVyMxMdH8GjBggNOV29VAGW5XNRgMHp3tzNPbx7zHToMBW28LcHvIhwPcDFsNwgCy\nMfIFkElJ/5Yeeevy8PuPv7s+jiEuLg6DBw82dwRisri5AscxMMaY4yQJcPMU3DEwxpjjJAlw8yV9\n+vQRZHDr06ePu5vEfIyYwDNPD1ZbOF+PHn3V6BYbih591Vg4Xy9ZXZ4erJYxKQOqKBWUfZVQRamQ\nMSnD3U1qkmuGsL1Ynz59cOjQIcG0Q4cOoU+fPvjf//1fN7WK+ZKdBgO+mTZNkERn5h9/W7v+L6aM\nnBbO12PB2nm4/Pit2ycXrJ0HAHhlht6ldRm2GjAtb5ogsU1JXt3fnjBmkDEpA6t+XAWMuTVt1YZV\nwCRg5Ucr3dewJvj8paSmHhLo4ZuGNRNiAs88PVitR181Tjx+ofH0r9QoOXDepXV5erCaKkqFmjGN\n4wtUa1SoPlwtWb3OHDutnjGkpqY2WeGGDRtEVcgYExITeObpwWomP8uBVrUK12cr9PRgNVJZPjib\nlCaZW2I/qx3Diy++KGc7GPNZYgLPPD1Yzc9k+dDiT67PVujpwWoKo+WrEn41njvEa7Vj0Gq1MjbD\nfXr37t1ojKF+OmNySMnOxsySEsF4wQyNBg83EXgmpoycMsdk1Y0xjLh15hC6XolnR7s+W2F2RjZK\n8koEYwya/RpMzfKMbTEyeWTdmMKjt03cAKQlpbmtTbbYHGP45ZdfMGPGDBw6dMj8fB6FQoHjx4/L\n00AZbldtOADdu3dvHnhmshITeObpwWoL5+uxfE0eahVG+JMKz46e4vKB53qeHqyWMSkD63aug0lp\ngl+NH9KS0iQfeJY0juG+++7D7Nmz8cILL2Djxo1YsWIFamtrMWfOHFEVOtxAjmNgjDGHSdoxxMfH\nY//+/bjnnntw8OBBwTQ5cMfAGGOOkzTALTAwELW1tYiMjMTSpUvx1Vdf4caNG6Iqk5rYTGcZGRlQ\nqVRQKpVQqVTIyPDs4BPW/IgJVvtAr8dotRoTQkMxWq3GB3q99A11gKcHncnZPjF1uXX72crks3fv\nXrp69SqdOnWKxo8fT3/+859p9+7dojMDOcqOJhKR+Exn6enpgjL1r/T0dFc0nzGbxGRWy8vJoUyl\nUlAmU6mkvJwc+RreBE/PkCZn+8TU5Yr22XvstMTuALerV68CAO68804Juifr7D0dEpvpTKVSoabG\nQvCJSoXqaumCTxirJyZYbbRajTUXGgeQjVGrsfq8awPIxPD0oDM52yemLle0T9JLST/++CPuuece\n8ysmJgY//fSTqMqkJDbTmbUNZzJ5bvAJa17EBKvdYeGfGQAINLo+gEwMTw86k7N9Yupy9/az+ayk\niRMn4oMPPsD9998PANi1axcmTpyI//73v5I3zhFiM51ZeySGn5/nBp+w5kVMsFqF0vJPt1Ll+gAy\nMTw96EzO9ompy93bz+bRT6lUmjsFABg8eDCUVnZKdxKb6WzkyJEWp6eleW7wCWtexGRWS87KwqQG\nv8NMpRK1fKYzAAAY5klEQVRJU1wfQCaGp2dIk7N9Yupy9/azOcYwffp0VFRUID09HQCwZs0aBAYG\nYuzYsQDqbl2VtIEOXCcTm+ksIyMD69atg8lkgp+fH9LS0rBypWc+9ZA1T2KC1T7Q67EzLw+BRiMq\nVSokTZmC5zzoziRPDzqTs31i6nK2fZLGMWi12iafQLp9+3ZRFduL4xgYY8xxnMGNMcaYgKR3Jf32\n2294+umn8fDDDwMADh8+jI8//tjmgisrK5GQkIDY2FhER0fjtddeszhfdnY2evbsiZiYGBQXFzvY\nfCG9Xg+1Wo3Q0FCo1Wro7TytFhPgJjaYjnkPObOqiQlWeyUjA6kqFcYolUhVqfCKPfutyKApMeuV\n/KdkKO5SQNFHAcVdCiT/KdmuusQQs15is6qJqUv/lh7qAWqEDgyFeoAa+rf0dtXlNrYCHXQ6Ha1e\nvZruueceIiKqrq6m3r172xUkcePGDSIiMhqNlJCQQN9//73gc4PBQEOHDiUioj179lBCQkKjZdjR\nRCIiysnJIaVSKQhSUyqVlGMj4EdMgJvYYDrmPcQEnYkpQyQuWO3l9HR65rb5CaBnAHq5qf1WZNCU\nmPVKejSJEA9BXYgHJT2a1GRdYohZr/TMdIvtS89sOrBVTF05C3NIea9SUEZ5r5JyFuaIXWW72Hvs\ntFjW1gz9+vUjIqLY2FjztJiYGIcquXHjBvXv358OHTokmJ6ZmUmrV682v+/Vqxf99ttvwgbauXJh\nYWEWD/BqtbrJcg07k/qXSqWyWiYlJcViGZ1OZ1dbmeebmZIiOBDWv15v4jsWU4aIaFRYmMVyo5vY\ndx9p0JHUvx5par+dkCI8EP7x0k1sun1i1gs9G9cDPQg9xR+srBGzXsq7lRbLqKKsbz+xdYX1D7NY\nRj2g6WOTs5zpGGzed9qqVStcuC3Ccs+ePQgJCbHrbMRkMiE+Ph4lJSWYPHkyoqOjBZ+fOXMGXbp0\nMb/v3LkzTp8+jfbt2wvmu/2SkFartZgrwlL0MgAYbQT8kIgAN7HBdMx7yJlVTUywWksr+23LpvZb\nkUFTotarhYPTnSBmvcRmVRNTV42/lWOTn2uDEQsLC1FYWOiSZdnsGN555x2kpqbi+PHjSExMxPnz\n57Fu3Tq7Fu7n54eff/4ZV65cgU6nQ2FhYaODesMDs6U7oOwZK7AWW6GyEfAjJsBNbDAd8x5yZlUT\nE6x2w8p+e6Op/VZk0JSo9bL2NBkJnjIjZr3EZlUTU5ey1sqxyeTaYMSG/zTPnj1b9LJsDj7369cP\nO3bswA8//IDly5fj8OHDiImJcaiSkJAQDB8+vNGjNMLDw1FWVmZ+f/r0aYSHhzu07HpZWVmNOgel\nUokpNgJ+xAS4iQ2mY95DTNCZmDKAuGC16JEj8ZcG0/4CILqp/VZk0JSY9UqKSgIapoXf8Md0FxOz\nXiOTR1psn62samLqyhqZBWVBg2OTQYkpaZ4RjGiRtWtMe/fupbNnz5rff/LJJ5SamkpTp06lCxcu\n2LxGdf78ebp06RIREd28eZPuv/9++vbbbwXz3D74vHv3bqcGn4nqBqDVajWFhISQWq22OfBcLz09\nnVQqFfn7+5NKpbLryar5+fmk0+koOTmZdDodDzw3Qzvy8+l1nY5ykpPpdZ3O5iCy2DJEdQPQo9Vq\nGh8SQqPVaruekvpyejo9olLRaH9/ekSlanLguV7+lnzSTdRR8vhk0k3U2f20TjHrlfRoUt1YQ++6\nsQUpBp7riVmv9Mx0UkWpyP8ef1JFqWwOPDtTV87CHFIPUFNIQgipB6glH3gmkujpqnFxcdi2bRva\ntGmDnTt3YvTo0Vi6dCmKi4tx5MgRm5eTDh48iPHjx8NkMsFkMmHs2LF46aWXsGzZMgBAZmYmgLr/\n9Ddv3oyWLVtixYoVjSKpOY6BMcYcJ0mAW0xMDA4cOAAAmDJlCtq2bWu+1n/7Z1KTo2MwGAzIzc1F\nVVUVAgICkJ2dbdejNBhr6M99+qDy0CEEA7gGILB3b6y3I3/4ToMBW3JzoayqQk1AAFKys+3K+exo\nGTnp39Jj6dqlqPGvgbJWiayRWdC/rHd3s3yGU8dOa6cSvXv3purqaiIiuuuuu6iwsND8WXR0tOhT\nFEc10USX4JgE5iojeve2GFswwkbcj5wxE3Jx17377BZnjp1WB5/T09ORnJyMRx99FEFBQeYnrB49\nehShoaHieiEPlJubi5KSEsG0kpISLFmyxE0tYt6q8tAh/L3BtL//Mb0pW3JzMa/BPjivpARbm9gH\nxZSR09K1S1EzTHibZs2wGuSty3NTi5gjrN6uOnPmTDz44IP47bffkJKSYr59k4ia1UGTYxKYqwQ7\nOL2enDETcpHr3n0mjSbjGAYNGtRo2l133SVZY9yBYxKYq1xzcHo9OWMm5CLXvftMGj6fpoxjEpir\nBPbu3Si24Jk/pjdFzpgJuXjlvfvMjB+7DfEJfhhryJm7khxN1COmjJz0b+mRty4PRj8jVCYVpqRN\n4buSZMT5GBhjjAlImo+BMcaYb+GOgTErxCSnEZM8R2xdC+fr0aOvGt1iQ9GjrxoL5+vtqksuYhLa\niE0kJCYRjti6xJZzlFz1WGLz6aqM+aKdBgO+mTZNECsw84+/rV3HfyUjAxdXrcLG26b9ZdUqvAJg\n4cqVLq1r4Xw9Fqydh8uP37otdMHaeXXtmKFvatVkYdhqwLS8aSiJu7VOJXl1f1tLaC+mDFDXKcz7\nch5qHrm1LeZ9WbctrI1piK1LbDlHyVWPNTzGwJgFr+t0mLtlS6Pps3Q6zNm82WKZVJUKGy3kVkhV\nqbCx2vrzpsXU1aOvGicev9B4+ldqlBw4b7Uuueie0mFLt8brpDulw+aPLa+TmDIAoB6gxoVHGm8L\ntUGN8/ssbwuxdYkt5yhX1MNjDIy5mJgAMjHJc8TWZfKzHEBWq/CMADIxCW3EJhISE0wnti6x5Rwl\nVz3WcMfAmAViAsjEJM8RW5efyfJVYH/yjAAyMQltxCYSEhNMJ7YuseUcJVc91nDHwJgFYgLIxCTP\nEVtX5pgshP5beEAMXa/Es6M9I4BMTEIbsYmExATTia1LbDlHyVWPNTzGwJgVYgLIXsnIwOF169DS\nZMINPz9Ep6U1OfDsTF0L5+uxfE0eahVG+JMKz46e4hEDz/UMWw1YsnoJKmsrEegfiKljptocOBVT\nBhAXTCe2LrHlHOVsPRzgxhhjTIAHnxljjLkMxzEwryJn1jI5s6rJtV5ybj/DVgNyV+aiiqoQoAhA\ndka2zUshGZMysHbHWpCKoDAqMDJ5JFZ+ZPtSHHMx0Sl+ZOIFTWQykTNrmZxZ1eRaLzm3X/6WfNL8\nSSPI4Kb5k4byt1ivKz0znRAPQRnEg9Iz013ePl/gzLHT44+63DGwejNTUgQHtfrX6zqdR9Qltn1y\nrZec2y9lQorwAP/HSzfRel3Ku5UWy6iiVC5vny9w5tjJYwzMa8iZtUzOrGpyrZec209MgBapLA+U\nmpRNBwgy1+OOgXkNObOWyZlVTa71knP7iQnQUhgtBwj61fBhSm68xZnXkDNrmZxZ1eRaLzm3n5gA\nrZHJI4ENDSZuANKSmg4QZK7HcQzMq8iZtUzOrGpyrZec209MgFbGpAys27kOJqUJfjV+SEtK47uS\nROIAN8YYYwIc4MYYY8xlOMCNeRU5A7Q+0OuxY+lS3FFTgwqlEslZWXhOr5ekLjHBYMx7eNv3yx0D\n8xpiMp2J9YFej//Om4c1tyXemTRvHj4AXN45uDtbF5OWN36/PMbAvIaYTGdijVarseZC46xgY9Rq\nrD7v2gxpcmUFY+7hru+XxxiYT5AzQOsOCyk6ASDQ6PoMae7O1sWk5Y3fL3cMzGvIGaBVobR8lbVS\n5foMae7O1sWk5Y3fL3cMzGvIGaCVnJWFSQ06h0ylEklTXJ8hzd3Zupi0vPH75TEG5lXkDND6QK/H\nzrw8BBqNqFSpkDRliqR3JcmRFYy5hzu+Xw5wY4wxJsCDz4wxxlxGso6hrKwMDzzwAHr37o0+ffog\nNze30TyFhYUICQlBXFwc4uLiMHfuXKmaw3zYB3o9RqvVmBAaitFqNT6Q6HIQUHep63WdDnqtFq/r\ndNhpMDSLugxbDdA9pYN2gha6p3QwbLVdl1xlnCnHrBCdycGGX3/9lYqLi4mI6Nq1a3TXXXfR4cOH\nBfNs376dUlNTm1yOhE1kPiAvJ4cylUpBYppMpZLycnJcXpenZ5gTS0w2NrnKOFOuuXPm2CnZGUOH\nDh0QGxsLAGjVqhWioqJw9uxZSx2TVE1gDDuWLsVHDWISPqqpwc68PJfXtSU3VxCVDQDzSkqwdckS\nr64rd2WuIGoXAEriSrBktfW65CrjTDlmnSyPxCgtLUVxcTESEhIE0xUKBYqKihATE4Pw8HAsWrQI\n0dHRjcrrbzv112q10Gq1EreYNRdyBqp5eoY5scQEaMlVxplyzU1hYSEKCwtdsizJO4br168jLS0N\nixcvRqtWrQSfxcfHo6ysDEFBQdi0aRNGjBiBX375pdEy9BJeE2bNm5yBap6eYU4sMQFacpVxplxz\n0/Cf5tmzZ4telqR3JRmNRjz++ON48sknMWLEiEafBwcHIygoCAAwdOhQGI1GXLx4UcomMR8jZ6Ca\np2eYE0tMgJZcZZwpx6yTLI6BiDB+/HiEhYXhvffeszhPeXk52rVrB4VCgX379mHUqFEoLS0VNpDj\nGJiT5AxU8/QMc2KJCdCSq4wz5Zozjwxw27VrF5KSktC3b18oFHVJvufPn49Tp04BADIzM5GXl4cP\nP/wQSqUSQUFBePfddzFw4EBhA7ljYIwxh3lkx+Aq3DEwxpjjOPKZ+QwxwWpyBoIx1hxwBjfmNcRk\nVZMz6xtjzQVfSmJeQ0xWNTmzvjHmSfhSEvMJYoLV5AwEY6y54I6BeQ0xwWpyBoIx1lxwx8C8hphg\nNTkDwRhrLniMgXkVMcFqcgaCMeYpOI6BMcaYAA8+M8YYcxnuGEQyGAzQ6XTQarXQ6XQwcNCULDw9\nWE3ObHGMSYUD3EQwGAyYNm0aSm4Lmqr/ezhfu5aMpweriQnAY8wT8RiDCDqdDlssBE3pdDps5qAp\nyXh6sJqYADzGpMJjDDKrshI0VclBU5Ly9GA1ObPFMSYl7hhECLASNBXIQVOS8vRgNTmzxTEmJe4Y\nRMjOzoamQdCURqPBVA6akpSnB6vJmS2OMSnxGINIBoMBS5YsQWVlJQIDAzF16lQeeJaBpweryZkt\njrGmcIAbY4wxAR58Zowx5jLcMTCv4ukBbnJaOF+PHn3V6BYbih591Vg4X+/uJrFmggPcmNfw9AA3\nOS2cr8eCtfNw+fFbt8guWDsPAPDKDL2bWsWaCx5jYF7D0wPc5NSjrxonHm8cTNfjKzVKDnAwHeMx\nBuYjPD3ATU4mP8vBdLUKDqZjzuOOgXkNTw9wk5OfyfJVYH/iYDrmPO4YmNfw9AA3OWWOyULov4Wd\nQ+h6JZ4dzcF0zHk8xsC8iqcHuMlp4Xw9lq/JQ63CCH9S4dnRU3jgmZlxgBtjjDEBHnxmjDHmMtwx\nMMYYE+COgTHGmAB3DIwxxgS4Y2CMMSbAHQNjjDEB7hgYY4wJcMfAGGNMgDsGxhhjAtwxeJHCwkJ3\nN8Fj8La4hbfFLbwtXEOyjqGsrAwPPPAAevfujT59+iA3N9fifNnZ2ejZsydiYmJQXFwsVXOaBd7p\nb8vgNmGCz2dwq8f7xS28LVxDsgxuKpUK7733HmJjY3H9+nX069cPDz30EKKioszzFBQU4NixYzh6\n9Cj27t2LyZMnY8+ePVI1iXm52zO46QHoT5702QxujElJsjOGDh06IDY2FgDQqlUrREVF4ezZs4J5\nNmzYgPHjxwMAEhIScPnyZZSXl0vVJObltuTmCtJ6AsC8khJsXbLETS1irJkiGZw4cYIiIiLo2rVr\ngumPPPII/fDDD+b3Q4YMoZ9++kkwDwB+8Ytf/OKXiJdYkl1Kqnf9+nWkpaVh8eLFaNWqVaPPqcFj\nYRUKRZOfM8YYk5akdyUZjUY8/vjjePLJJzFixIhGn4eHh6OsrMz8/vTp0wgPD5eySYwxxmyQrGMg\nIjz99NOIjo7G9OnTLc7z6KOP4rPPPgMA7NmzB6GhoWjfvr1UTWKMMWYHyTK47dq1C0lJSejbt6/5\n8tD8+fNx6tQpAEBmZiYAICsrC5s3b0bLli2xYsUKxMfHS9Ecxhhj9hI9OuFimzZtol69elFkZCQt\nWLDA4jxTp06lyMhI6tu3L+3fv1/mFsrH1rbYvn073XnnnRQbG0uxsbE0Z84cN7RSek899RS1a9eO\n+vTpY3UeX9knbG0LX9kniIhOnTpFWq2WoqOjqXfv3rR48WKL8/nCvmHPthCzb3hEx1BTU0MajYZO\nnDhB1dXVFBMTQ4cPHxbMYzAYaOjQoUREtGfPHkpISHBHUyVnz7bYvn07paamuqmF8tm5cyft37/f\n6sHQV/YJItvbwlf2CSKiX3/9lYqLi4mI6Nq1a3TXXXf57PHCnm0hZt/wiEdi7Nu3D5GRkejWrRtU\nKhXGjBmDr7/+WjCPr8Q82LMtAN+4W+v+++9H69atrX7uK/sEYHtbAL6xTwAcI3U7e7YF4Pi+4REd\nw5kzZ9ClSxfz+86dO+PMmTM25zl9+rRsbZSLPdtCoVCgqKgIMTExGDZsGA4fPix3Mz2Cr+wT9vDV\nfaK0tBTFxcVISEgQTPfFfcPathCzb0gex2CPhrEL1jTs9ewt503sWaf4+HiUlZUhKCgImzZtwogR\nI/DLL7/I0DrP4wv7hD18cZ9wNkaqOWlqW4jZNzzijKFhPENZWRk6d+7c5DzNNebBnm0RHByMoKAg\nAMDQoUNhNBpx8eJFWdvpCXxln7CHr+0THCN1i61tIWbf8IiOoX///jh69ChKS0tRXV2NNWvW4NFH\nHxXM4ysxD/Zsi/LycvN/Q/v27QMRoU2bNu5orlv5yj5hD1/aJ4hjpMzs2RZi9g2PuJSkVCqxdOlS\n6HQ61NbW4umnn0ZUVBSWLVsGoC7mYdiwYSgoKEBkZKQ55qE5smdbrFu3Dh9++CGUSiWCgoKwevVq\nN7daGunp6dixYwd+//13dOnSBbNnz4bRaATgW/sEYHtb+Mo+AQA//PAD/ud//gd9+/ZFXFwcgMYx\nUr6yb9izLcTsG5IFuDHGGPNOHnEpiTHGmOfgjoExxpgAdwyMMcYEuGNgjDEmwB0Da3b8/f0RFxeH\ne+65B6NGjUJFRYXVeQ8cOIBNmzbZXGZhYSFSU1MbTY+Li8OBAwcAADU1NWjVqhW++OIL8+f9+vVD\ncXExcnJysG3btiaXu2PHDuzevdv82YQJE/Dll1/abBtjrsYdA2t2goKCUFxcjIMHD6JFixb46KOP\nrM5bXFyMgoIC0XUNHjwYRUVFAOo6mV69epnf37hxA8ePH0dsbCxmz56NIUOGNLms7du3m8sCzTtS\nl3k27hhYszZ48GAcO3YMN2/exMSJE5GQkID4+Hhs2LABRqMRb7zxBtasWYO4uDj861//wo8//ojE\nxETEx8fjvvvus/nogMTERPPBfPfu3Zg0aRJ+/vlnAHXBRP3794dCoRD8979582ZERUWhX79+WL9+\nPQDg5MmTWLZsGd577z3Ex8dj165dAICdO3fivvvug0aj4bMHJhvuGFizVVNTg82bN6Nv376YO3cu\nhgwZgr179+K7777DSy+9BKPRiDlz5mDMmDEoLi7GqFGjcPfdd+P777/H/v37MXv2bMyYMaPJOm7v\nGIqKipCUlISAgABcv34dRUVFSExMBFD3379CoUBlZSWeffZZ5Ofn4z//+Q9+++03KBQKdO3aFZMm\nTcILL7yA/fv3Y/DgwSAi/Pbbb/jhhx+Qn5+PV199VfJtxhjgIZHPjLlSRUWFOQo0KSkJEydOxKBB\ng7Bx40YsWrQIAFBVVYVTp06B6nKSmMtevnwZ48aNw7Fjx6BQKMzRxdZ07doV1dXVKC8vx5EjR9Cr\nVy8MGDAAe/fuxe7du5GdnW2el4hw5MgRdO/eHRqNBgDw5JNPYvny5YJ56ikUCvOzb6KioprlY6OZ\nZ+KOgTU7d9xxB4qLixtN/+qrr9CzZ0/BtL179wrez5o1C0OGDMH69etx8uRJaLVam/UlJibiX//6\nFzp27AgAGDhwIHbt2oV9+/Zh0KBBgnkbjhvYevBAixYt7J6XMVfhS0nMJ+h0OuTm5prf13ccwcHB\nuHbtmnn61atX0alTJwCw+/k6iYmJeP/9982XjQYNGoTPPvsMHTt2RHBwsHk+hUKBu+++G6WlpTh+\n/DgAYNWqVebPG7aFMXfhjoE1O5bu5pk1axaMRiP69u2LPn36ICcnBwDwwAMP4PDhw+bB55dffhmv\nvfYa4uPjUVtbK1iWtbuEEhMTUVpaaj476NChA0wmk7mjuF1AQACWL1+O4cOHo1+/fmjfvr15uamp\nqVi/fr1g8Nme+hlzNX6IHmOMMQE+Y2CMMSbAHQNjjDEB7hgYY4wJcMfAGGNMgDsGxhhjAtwxMMYY\nE/j/auRZlXr4uqMAAAAASUVORK5CYII=\n"
      }
     ],
     "prompt_number": 28
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import patsy as pt\n",
      "from sklearn import tree\n",
      "\n",
      "y, X = pt.dmatrices('species ~ sepal_width + petal_width - 1', iris)\n",
      "\n",
      "clf = tree.DecisionTreeClassifier(max_depth=3).fit(X, y)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 258
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import StringIO, pydot\n",
      "from IPython.core.display import HTML\n",
      "\n",
      "dot_data = StringIO.StringIO()\n",
      "tree.export_graphviz(clf, out_file=dot_data)\n",
      "graph = pydot.graph_from_dot_data(dot_data.getvalue())\n",
      "graph.write_png('tree.png')\n",
      "HTML('<img src=\"files/tree.png\" width=600 height=500/>')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<img src=\"files/tree.png\" width=600 height=500/>"
       ],
       "output_type": "pyout",
       "prompt_number": 259,
       "text": [
        "<IPython.core.display.HTML at 0x112fcb950>"
       ]
      }
     ],
     "prompt_number": 259
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "clf.tree_.threshold"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "pyout",
       "prompt_number": 260,
       "text": [
        "array([  8.00000012e-001,  -2.32035018e+077,   1.75000000e+000,\n",
        "         1.34999996e+000,   2.23443806e-314,   2.28130340e-314,\n",
        "         1.84999996e+000,   6.93069750e-310,   2.28041841e-314])"
       ]
      }
     ],
     "prompt_number": 260
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%load_ext rmagic"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 261
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%%R -o rnewdata\n",
      "set.seed(32313)\n",
      "rnewdata <- data.frame(sepal_width = runif(20,2,4.5),petal_width = runif(20,0,2.5))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 262
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas as pd\n",
      "\n",
      "newdata = pd.DataFrame(rnewdata.T, columns=['sepal_width', 'petal_width'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 263
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pred1 = clf.predict_proba(newdata)\n",
      "\n",
      "species = iris['species'].unique()\n",
      "idx = ['0', '1']\n",
      "cols = [x + '_' + y for x in species for y in idx]\n",
      "\n",
      "pd.DataFrame(np.hstack(pred1), columns=cols)\n",
      "\n",
      "# I don't understand what these probabilities mean"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>setosa_0</th>\n",
        "      <th>setosa_1</th>\n",
        "      <th>versicolor_0</th>\n",
        "      <th>versicolor_1</th>\n",
        "      <th>virginica_0</th>\n",
        "      <th>virginica_1</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <td><strong>0 </strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>1 </strong></td>\n",
        "      <td> 0</td>\n",
        "      <td> 1</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>2 </strong></td>\n",
        "      <td> 0</td>\n",
        "      <td> 1</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>3 </strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>4 </strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>5 </strong></td>\n",
        "      <td> 0</td>\n",
        "      <td> 1</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>6 </strong></td>\n",
        "      <td> 0</td>\n",
        "      <td> 1</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>7 </strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>8 </strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>9 </strong></td>\n",
        "      <td> 0</td>\n",
        "      <td> 1</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>10</strong></td>\n",
        "      <td> 0</td>\n",
        "      <td> 1</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>11</strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>12</strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>13</strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 0.192308</td>\n",
        "      <td> 0.807692</td>\n",
        "      <td> 0.807692</td>\n",
        "      <td> 0.192308</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>14</strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>15</strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>16</strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>17</strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 0.192308</td>\n",
        "      <td> 0.807692</td>\n",
        "      <td> 0.807692</td>\n",
        "      <td> 0.192308</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>18</strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 0.192308</td>\n",
        "      <td> 0.807692</td>\n",
        "      <td> 0.807692</td>\n",
        "      <td> 0.192308</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>19</strong></td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 0.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 1.000000</td>\n",
        "      <td> 0.000000</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "output_type": "pyout",
       "prompt_number": 264,
       "text": [
        "    setosa_0  setosa_1  versicolor_0  versicolor_1  virginica_0  virginica_1\n",
        "0          1         0      1.000000      0.000000     0.000000     1.000000\n",
        "1          0         1      1.000000      0.000000     1.000000     0.000000\n",
        "2          0         1      1.000000      0.000000     1.000000     0.000000\n",
        "3          1         0      0.000000      1.000000     1.000000     0.000000\n",
        "4          1         0      0.000000      1.000000     1.000000     0.000000\n",
        "5          0         1      1.000000      0.000000     1.000000     0.000000\n",
        "6          0         1      1.000000      0.000000     1.000000     0.000000\n",
        "7          1         0      1.000000      0.000000     0.000000     1.000000\n",
        "8          1         0      1.000000      0.000000     0.000000     1.000000\n",
        "9          0         1      1.000000      0.000000     1.000000     0.000000\n",
        "10         0         1      1.000000      0.000000     1.000000     0.000000\n",
        "11         1         0      0.000000      1.000000     1.000000     0.000000\n",
        "12         1         0      1.000000      0.000000     0.000000     1.000000\n",
        "13         1         0      0.192308      0.807692     0.807692     0.192308\n",
        "14         1         0      1.000000      0.000000     0.000000     1.000000\n",
        "15         1         0      1.000000      0.000000     0.000000     1.000000\n",
        "16         1         0      0.000000      1.000000     1.000000     0.000000\n",
        "17         1         0      0.192308      0.807692     0.807692     0.192308\n",
        "18         1         0      0.192308      0.807692     0.807692     0.192308\n",
        "19         1         0      0.000000      1.000000     1.000000     0.000000"
       ]
      }
     ],
     "prompt_number": 264
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "Cars93 = com.load_data('Cars93', package='MASS')\n",
      "Cars93.columns = map(lambda x: x.lower().replace('.', '_'), Cars93.columns)\n",
      "Cars93.ix[:6, :15]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>manufacturer</th>\n",
        "      <th>model</th>\n",
        "      <th>type</th>\n",
        "      <th>min_price</th>\n",
        "      <th>price</th>\n",
        "      <th>max_price</th>\n",
        "      <th>mpg_city</th>\n",
        "      <th>mpg_highway</th>\n",
        "      <th>airbags</th>\n",
        "      <th>drivetrain</th>\n",
        "      <th>cylinders</th>\n",
        "      <th>enginesize</th>\n",
        "      <th>horsepower</th>\n",
        "      <th>rpm</th>\n",
        "      <th>rev_per_mile</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <td><strong>1</strong></td>\n",
        "      <td> Acura</td>\n",
        "      <td> Integra</td>\n",
        "      <td>   Small</td>\n",
        "      <td> 12.9</td>\n",
        "      <td> 15.9</td>\n",
        "      <td> 18.8</td>\n",
        "      <td> 25</td>\n",
        "      <td> 31</td>\n",
        "      <td>               None</td>\n",
        "      <td> Front</td>\n",
        "      <td> 4</td>\n",
        "      <td> 1.8</td>\n",
        "      <td> 140</td>\n",
        "      <td> 6300</td>\n",
        "      <td> 2890</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>2</strong></td>\n",
        "      <td> Acura</td>\n",
        "      <td>  Legend</td>\n",
        "      <td> Midsize</td>\n",
        "      <td> 29.2</td>\n",
        "      <td> 33.9</td>\n",
        "      <td> 38.7</td>\n",
        "      <td> 18</td>\n",
        "      <td> 25</td>\n",
        "      <td> Driver & Passenger</td>\n",
        "      <td> Front</td>\n",
        "      <td> 6</td>\n",
        "      <td> 3.2</td>\n",
        "      <td> 200</td>\n",
        "      <td> 5500</td>\n",
        "      <td> 2335</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>3</strong></td>\n",
        "      <td>  Audi</td>\n",
        "      <td>      90</td>\n",
        "      <td> Compact</td>\n",
        "      <td> 25.9</td>\n",
        "      <td> 29.1</td>\n",
        "      <td> 32.3</td>\n",
        "      <td> 20</td>\n",
        "      <td> 26</td>\n",
        "      <td>        Driver only</td>\n",
        "      <td> Front</td>\n",
        "      <td> 6</td>\n",
        "      <td> 2.8</td>\n",
        "      <td> 172</td>\n",
        "      <td> 5500</td>\n",
        "      <td> 2280</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>4</strong></td>\n",
        "      <td>  Audi</td>\n",
        "      <td>     100</td>\n",
        "      <td> Midsize</td>\n",
        "      <td> 30.8</td>\n",
        "      <td> 37.7</td>\n",
        "      <td> 44.6</td>\n",
        "      <td> 19</td>\n",
        "      <td> 26</td>\n",
        "      <td> Driver & Passenger</td>\n",
        "      <td> Front</td>\n",
        "      <td> 6</td>\n",
        "      <td> 2.8</td>\n",
        "      <td> 172</td>\n",
        "      <td> 5500</td>\n",
        "      <td> 2535</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>5</strong></td>\n",
        "      <td>   BMW</td>\n",
        "      <td>    535i</td>\n",
        "      <td> Midsize</td>\n",
        "      <td> 23.7</td>\n",
        "      <td> 30.0</td>\n",
        "      <td> 36.2</td>\n",
        "      <td> 22</td>\n",
        "      <td> 30</td>\n",
        "      <td>        Driver only</td>\n",
        "      <td>  Rear</td>\n",
        "      <td> 4</td>\n",
        "      <td> 3.5</td>\n",
        "      <td> 208</td>\n",
        "      <td> 5700</td>\n",
        "      <td> 2545</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td><strong>6</strong></td>\n",
        "      <td> Buick</td>\n",
        "      <td> Century</td>\n",
        "      <td> Midsize</td>\n",
        "      <td> 14.2</td>\n",
        "      <td> 15.7</td>\n",
        "      <td> 17.3</td>\n",
        "      <td> 22</td>\n",
        "      <td> 31</td>\n",
        "      <td>        Driver only</td>\n",
        "      <td> Front</td>\n",
        "      <td> 4</td>\n",
        "      <td> 2.2</td>\n",
        "      <td> 110</td>\n",
        "      <td> 5200</td>\n",
        "      <td> 2565</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "output_type": "pyout",
       "prompt_number": 276,
       "text": [
        "  manufacturer    model     type  min_price  price  max_price  mpg_city  mpg_highway  \\\n",
        "1        Acura  Integra    Small       12.9   15.9       18.8        25           31   \n",
        "2        Acura   Legend  Midsize       29.2   33.9       38.7        18           25   \n",
        "3         Audi       90  Compact       25.9   29.1       32.3        20           26   \n",
        "4         Audi      100  Midsize       30.8   37.7       44.6        19           26   \n",
        "5          BMW     535i  Midsize       23.7   30.0       36.2        22           30   \n",
        "6        Buick  Century  Midsize       14.2   15.7       17.3        22           31   \n",
        "\n",
        "              airbags drivetrain cylinders  enginesize  horsepower   rpm  \\\n",
        "1                None      Front         4         1.8         140  6300   \n",
        "2  Driver & Passenger      Front         6         3.2         200  5500   \n",
        "3         Driver only      Front         6         2.8         172  5500   \n",
        "4  Driver & Passenger      Front         6         2.8         172  5500   \n",
        "5         Driver only       Rear         4         3.5         208  5700   \n",
        "6         Driver only      Front         4         2.2         110  5200   \n",
        "\n",
        "   rev_per_mile  \n",
        "1          2890  \n",
        "2          2335  \n",
        "3          2280  \n",
        "4          2535  \n",
        "5          2545  \n",
        "6          2565  "
       ]
      }
     ],
     "prompt_number": 276
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "y, X = pt.dmatrices('drivetrain ~ mpg_city + mpg_highway + airbags + \\\n",
      "                                  enginesize + width + length + weight + price + \\\n",
      "                                  cylinders + horsepower + wheelbase - 1', Cars93)\n",
      "\n",
      "clf = tree.DecisionTreeClassifier().fit(X, y)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 277
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dot_data = StringIO.StringIO()\n",
      "tree.export_graphviz(clf, out_file=dot_data)\n",
      "graph = pydot.graph_from_dot_data(dot_data.getvalue())\n",
      "graph.write_png('tree2.png')\n",
      "HTML('<img src=\"files/tree2.png\" width=1000 height=1500/>')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<img src=\"files/tree2.png\" width=1000 height=1500/>"
       ],
       "output_type": "pyout",
       "prompt_number": 280,
       "text": [
        "<IPython.core.display.HTML at 0x113b4a590>"
       ]
      }
     ],
     "prompt_number": 280
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# pruning not currently supported"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}