From 847cf17367bc29a6ce0c94fc8ac17b712659e73c Mon Sep 17 00:00:00 2001 From: Adam Comella Date: Wed, 21 Jun 2023 21:50:31 -0700 Subject: [PATCH] 09_tabular: Add note about bug when splitting data As reported in #325, there's a mistake in the predicate that splits the data into the training and validation sets. Jeremy commented that it won't be fixed in this edition: https://github.com/fastai/fastbook/pull/337#issuecomment-735401046 This PR adds an errata comment to the notebook so that readers are aware of the mistake. This can save them time if they run into the bug on their own. --- 09_tabular.ipynb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/09_tabular.ipynb b/09_tabular.ipynb index e3ed14c..62c7008 100644 --- a/09_tabular.ipynb +++ b/09_tabular.ipynb @@ -726,6 +726,13 @@ "metadata": {}, "outputs": [], "source": [ + "# Errata:\n", + "# This line should have been:\n", + "# cond = ((df.saleYear<2011) | ((df.saleYear==2011) & (df.saleMonth<10))\n", + "#\n", + "# Correcting this line is postponed to a future edition of the book because\n", + "# it requires a re-analysis of the data. For discussion see:\n", + "# https://github.com/fastai/fastbook/issues/325.\n", "cond = (df.saleYear<2011) | (df.saleMonth<10)\n", "train_idx = np.where( cond)[0]\n", "valid_idx = np.where(~cond)[0]\n",