Update and add clean

This commit is contained in:
Sylvain Gugger 2020-03-06 10:19:03 -08:00
parent e87f1d54e7
commit b2f1c12d4c
196 changed files with 33475 additions and 306 deletions

File diff suppressed because one or more lines are too long

View File

@ -1629,7 +1629,8 @@
],
"source": [
"#hide_output\n",
"VBox([widgets.Label('Select your bear!'), btn_upload, btn_run, out_pl, lbl_pred])"
"VBox([widgets.Label('Select your bear!'), \n",
" btn_upload, btn_run, out_pl, lbl_pred])"
]
},
{
@ -1946,20 +1947,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -1041,34 +1041,6 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {
"height": "600px",
"width": "365px"
},
"number_sections": false,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,

View File

@ -2543,20 +2543,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -1445,11 +1445,14 @@
"metadata": {},
"outputs": [],
"source": [
"biwi = DataBlock(blocks=(ImageBlock, PointBlock),\n",
" get_items=get_image_files,\n",
" get_y=get_ctr,\n",
" splitter=FuncSplitter(lambda o: o.parent.name=='13'),\n",
" batch_tfms=[*aug_transforms(size=(240,320)), Normalize.from_stats(*imagenet_stats)])"
"biwi = DataBlock(\n",
" blocks=(ImageBlock, PointBlock),\n",
" get_items=get_image_files,\n",
" get_y=get_ctr,\n",
" splitter=FuncSplitter(lambda o: o.parent.name=='13'),\n",
" batch_tfms=[*aug_transforms(size=(240,320)), \n",
" Normalize.from_stats(*imagenet_stats)]\n",
")"
]
},
{
@ -1925,6 +1928,18 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -460,7 +460,8 @@
],
"source": [
"dls = get_dls(128, 128)\n",
"learn = Learner(dls, xresnet50(), loss_func=CrossEntropyLossFlat(), metrics=accuracy)\n",
"learn = Learner(dls, xresnet50(), loss_func=CrossEntropyLossFlat(), \n",
" metrics=accuracy)\n",
"learn.fit_one_cycle(4, 3e-3)"
]
},
@ -1011,6 +1012,18 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -2296,6 +2296,18 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -9740,31 +9740,6 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,

View File

@ -1708,7 +1708,8 @@
"TEXT = \"I liked this movie because\"\n",
"N_WORDS = 40\n",
"N_SENTENCES = 2\n",
"preds = [learn.predict(TEXT, N_WORDS, temperature=0.75) for _ in range(N_SENTENCES)]"
"preds = [learn.predict(TEXT, N_WORDS, temperature=0.75) \n",
" for _ in range(N_SENTENCES)]"
]
},
{
@ -1891,7 +1892,8 @@
"metadata": {},
"outputs": [],
"source": [
"learn = text_classifier_learner(dls_clas, AWD_LSTM, drop_mult=0.5, metrics=accuracy).to_fp16()"
"learn = text_classifier_learner(dls_clas, AWD_LSTM, drop_mult=0.5, \n",
" metrics=accuracy).to_fp16()"
]
},
{
@ -2260,20 +2262,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -602,7 +602,8 @@
"source": [
"cut = int(len(files)*0.8)\n",
"splits = [list(range(cut)), list(range(cut,len(files)))]\n",
"tls = TfmdLists(files, [Tokenizer.from_folder(path), Numericalize], splits=splits)"
"tls = TfmdLists(files, [Tokenizer.from_folder(path), Numericalize], \n",
" splits=splits)"
]
},
{
@ -1083,7 +1084,8 @@
"class SiameseTransform(Transform):\n",
" def __init__(self, files, label_func, splits):\n",
" self.labels = files.map(label_func).unique()\n",
" self.lbl2files = {l: L(f for f in files if label_func(f) == l) for l in self.labels}\n",
" self.lbl2files = {l: L(f for f in files if label_func(f) == l) \n",
" for l in self.labels}\n",
" self.label_func = label_func\n",
" self.valid = {f: self._draw(f) for f in files[splits[1]]}\n",
" \n",
@ -1095,7 +1097,8 @@
" def _draw(self, f):\n",
" same = random.random() < 0.5\n",
" cls = self.label_func(f)\n",
" if not same: cls = random.choice(L(l for l in self.labels if l != cls)) \n",
" if not same: \n",
" cls = random.choice(L(l for l in self.labels if l != cls)) \n",
" return random.choice(self.lbl2files[cls]),same"
]
},
@ -1246,20 +1249,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -497,7 +497,8 @@
}
],
"source": [
"learn = Learner(dls, LMModel1(len(vocab), 64), loss_func=F.cross_entropy, metrics=accuracy)\n",
"learn = Learner(dls, LMModel1(len(vocab), 64), loss_func=F.cross_entropy, \n",
" metrics=accuracy)\n",
"learn.fit_one_cycle(4, 1e-3)"
]
},
@ -654,7 +655,8 @@
}
],
"source": [
"learn = Learner(dls, LMModel2(len(vocab), 64), loss_func=F.cross_entropy, metrics=accuracy)\n",
"learn = Learner(dls, LMModel2(len(vocab), 64), loss_func=F.cross_entropy, \n",
" metrics=accuracy)\n",
"learn.fit_one_cycle(4, 1e-3)"
]
},
@ -1474,7 +1476,8 @@
}
],
"source": [
"learn = Learner(dls, LMModel5(len(vocab), 64, 2), loss_func=CrossEntropyLossFlat(), \n",
"learn = Learner(dls, LMModel5(len(vocab), 64, 2), \n",
" loss_func=CrossEntropyLossFlat(), \n",
" metrics=accuracy, cbs=ModelReseter)\n",
"learn.fit_one_cycle(15, 3e-3)"
]
@ -1867,7 +1870,8 @@
}
],
"source": [
"learn = Learner(dls, LMModel6(len(vocab), 64, 2), loss_func=CrossEntropyLossFlat(), \n",
"learn = Learner(dls, LMModel6(len(vocab), 64, 2), \n",
" loss_func=CrossEntropyLossFlat(), \n",
" metrics=accuracy, cbs=ModelReseter)\n",
"learn.fit_one_cycle(15, 1e-2)"
]
@ -2354,20 +2358,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -3757,6 +3757,18 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -1268,6 +1268,18 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -233,7 +233,341 @@
"source": [
"This picture shows the CNN body on the left (in this case, it's a regular CNN, not a ResNet, and they're using 2x2 max pooling instead of stride 2 convolutions, since this paper was written before ResNets came along) and it shows the transposed convolutional layers on the right (they're called \"up-conv\" in this picture). Then then extra skip connections are shown as grey arrows crossing from left to right (these are sometimes called *cross connections*). You can see why it's called a \"U-net\" when you see this picture!\n",
"\n",
"With this architecture, the input to the transposed convolutions is not just the lower resolution grid in the preceding layer, but also the higher resolution grid in the resnet head. This allows the U-Net to use all of the information of the original image, as it is needed. One challenge with U-Nets is that the exact architecture depends on the image size. fastai has a unique `DynamicUnet` class which auto-generates an architecture of the right size based on the data provided."
"With this architecture, the input to the transposed convolutions is not just the lower resolution grid in the preceding layer, but also the higher resolution grid in the resnet head. This allows the U-Net to use all of the information of the original image, as it is needed. One challenge with U-Nets is that the exact architecture depends on the image size. fastai has a unique `DynamicUnet` class which auto-generates an architecture of the right size based on the data provided.\n",
"\n",
"Let's focus now on an example where we leverage the fastai library to write a custom model:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### A Siamese network"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#hide\n",
"from fastai2.vision.all import *\n",
"path = untar_data(URLs.PETS)\n",
"files = get_image_files(path/\"images\")\n",
"\n",
"class SiameseImage(Tuple):\n",
" def show(self, ctx=None, **kwargs): \n",
" img1,img2,same_breed = self\n",
" if not isinstance(img1, Tensor):\n",
" if img2.size != img1.size: img2 = img2.resize(img1.size)\n",
" t1,t2 = tensor(img1),tensor(img2)\n",
" t1,t2 = t1.permute(2,0,1),t2.permute(2,0,1)\n",
" else: t1,t2 = img1,img2\n",
" line = t1.new_zeros(t1.shape[0], t1.shape[1], 10)\n",
" return show_image(torch.cat([t1,line,t2], dim=2), \n",
" title=same_breed, ctx=ctx)\n",
" \n",
"def label_func(fname):\n",
" return re.match(r'^(.*)_\\d+.jpg$', fname.name).groups()[0]\n",
"\n",
"class SiameseTransform(Transform):\n",
" def __init__(self, files, label_func, splits):\n",
" self.labels = files.map(label_func).unique()\n",
" self.lbl2files = {l: L(f for f in files if label_func(f) == l) for l in self.labels}\n",
" self.label_func = label_func\n",
" self.valid = {f: self._draw(f) for f in files[splits[1]]}\n",
" \n",
" def encodes(self, f):\n",
" f2,t = self.valid.get(f, self._draw(f))\n",
" img1,img2 = PILImage.create(f),PILImage.create(f2)\n",
" return SiameseImage(img1, img2, t)\n",
" \n",
" def _draw(self, f):\n",
" same = random.random() < 0.5\n",
" cls = self.label_func(f)\n",
" if not same: cls = random.choice(L(l for l in self.labels if l != cls)) \n",
" return random.choice(self.lbl2files[cls]),same\n",
" \n",
"splits = RandomSplitter()(files)\n",
"tfm = SiameseTransform(files, label_func, splits)\n",
"tls = TfmdLists(files, tfm, splits=splits)\n",
"dls = tls.dataloaders(after_item=[Resize(224), ToTensor], \n",
" after_batch=[IntToFloatTensor, Normalize.from_stats(*imagenet_stats)])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's go back to the input pipeline we set up in <<chapter_midlevel_data>> for a Siamese network. If your remember, it consisted of pair of images with the label being `True` or `False`, depending on if they were in the same class or not.\n",
"\n",
"Using what we just saw, let's build a custom model for this task and train it. How? We will use a pretrained architecture and pass our two images throught it. Then we can concatenate the results and send them to a custom head that will return two predictions. In terms of modules, this looks like this:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class SiameseModel(Module):\n",
" def __init__(self, encoder, head):\n",
" self.encoder,self.head = encoder,head\n",
" \n",
" def forward(self, x1, x2):\n",
" ftrs = torch.cat([self.encoder(x1), self.encoder(x2)], dim=1)\n",
" return self.head(ftrs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To create our encoder, we just need to take a pretrained model and cut it, as we explained before. The function `create_body` does that for us, we just have to pass it the place we want to cut. If we remember our look in the dictionary of metadata for pretrained models, the cut value for a resnet is -2:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"encoder = create_body(resnet34, cut=-2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Then we can create our head. A look at the encoder tells us the last layer has 512 features, so this head will need to receive `512*4`. Why 4? First we have to multiply by 2 because we have two images. Then we need a second multiplication by 2 because of our concat-pool trick."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"head = create_head(512*4, 2, ps=0.5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"With our encoder and head, we can now build our model."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model = SiameseModel(encoder, head)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Before using `Learner`, we have two more things to define. First, we must define the loss function we want to use. It's regular cross entropy, but since our targets are booleans, we need to convert them to integers or PyTorch will throw an error."
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"def loss_func(out, targ):\n",
" return nn.CrossEntropyLoss()(out, targ.long())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"More importantly, to take full advantage of transfer learning, we have to define a custom *splitter*. A splitter is a function that tells the fastai library how to split the model in several parameter groups. This is what is used behind the scenes not only train the head of a model when we do transfert learning. \n",
"\n",
"Here we want two parameter groups: one for the encoder and one for the head. We can thus define the following splitter (`params` is jsut a function that returns all parameters of a given module):"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"def siamese_splitter(model):\n",
" return [params(model.encoder), params(model.head)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Then we can define our `Learner` by passing the data, model, loss function, spliiter and any metric we want. Since we are not using a convenience function from fastai for transfer learning (like `cnn_learner`), we have to call `learn.freeze` manually. This will make sure only the last parameter groups (in this case, the head) is trained. "
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"learn = Learner(dls, model, loss_func=loss_func, \n",
" splitter=siamese_splitter, metrics=accuracy)\n",
"learn.freeze()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Then we can directly train our model with the usual methods:"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>valid_loss</th>\n",
" <th>accuracy</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>0.367015</td>\n",
" <td>0.281242</td>\n",
" <td>0.885656</td>\n",
" <td>00:26</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>0.307688</td>\n",
" <td>0.214721</td>\n",
" <td>0.915426</td>\n",
" <td>00:26</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>0.275221</td>\n",
" <td>0.170615</td>\n",
" <td>0.936401</td>\n",
" <td>00:26</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>0.223771</td>\n",
" <td>0.159633</td>\n",
" <td>0.943843</td>\n",
" <td>00:26</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.fit_one_cycle(4, 3e-3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Before unfreezing and training a bit more with discriminative learning rates..."
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>valid_loss</th>\n",
" <th>accuracy</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>0.212744</td>\n",
" <td>0.159033</td>\n",
" <td>0.944520</td>\n",
" <td>00:35</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>0.201893</td>\n",
" <td>0.159615</td>\n",
" <td>0.942490</td>\n",
" <td>00:35</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>0.204606</td>\n",
" <td>0.152338</td>\n",
" <td>0.945196</td>\n",
" <td>00:36</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>0.213203</td>\n",
" <td>0.148346</td>\n",
" <td>0.947903</td>\n",
" <td>00:36</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.unfreeze()\n",
"learn.fit_one_cycle(4, slice(1e-6,1e-4))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"94.8% is very good when we remember a classifier trained the same way (with no data augmentation) had an arror rate of 7%."
]
},
{
@ -476,6 +810,18 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -1307,6 +1307,18 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -2037,7 +2037,8 @@
" return self.out\n",
" \n",
" def backward(self):\n",
" self.inp.g = 2. * (self.inp.squeeze() - self.targ).unsqueeze(-1) / self.targ.shape[0]"
" x = (self.inp.squeeze()-self.targ).unsqueeze(-1)\n",
" self.inp.g = 2.*x/self.targ.shape[0]"
]
},
{
@ -2189,7 +2190,8 @@
"source": [
"class Mse(LayerFunction):\n",
" def forward (self, inp, targ): return (inp.squeeze() - targ).pow(2).mean()\n",
" def bwd(self, out, inp, targ): inp.g = 2*(inp.squeeze()-targ).unsqueeze(-1) / targ.shape[0]"
" def bwd(self, out, inp, targ): \n",
" inp.g = 2*(inp.squeeze()-targ).unsqueeze(-1) / targ.shape[0]"
]
},
{
@ -2446,6 +2448,18 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -653,6 +653,18 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -26,6 +26,13 @@
"The end of chapter questionnaire is particularly important for this chapter. This is where we will be getting you started on the many interesting directions that you could take, using this chapter as your starting out point. What we are really saying is: follow through with this chapter on your computer, not on paper, and do lots of experiments, web searches, and whatever else you need to understand what's going on. You've built up the skills and expertise to do this in the rest of this book, so we think you are going to go great!"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First let's start with gathering (manually) the data."
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -42,7 +49,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -58,7 +65,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -67,7 +74,7 @@
"Path('/home/jhoward/.fastai/data/imagenette2-160/val/n03417042/n03417042_3752.JPEG')"
]
},
"execution_count": 3,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -86,7 +93,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -95,7 +102,7 @@
"Path('/home/jhoward/.fastai/data/imagenette2-160/val/n03417042/n03417042_3752.JPEG')"
]
},
"execution_count": 4,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -117,7 +124,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -127,7 +134,7 @@
"<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=213x160 at 0x7FA45AC27D50>"
]
},
"execution_count": 5,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -139,7 +146,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -148,7 +155,7 @@
"torch.Size([160, 213, 3])"
]
},
"execution_count": 6,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -167,7 +174,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -176,7 +183,7 @@
"(#10) ['n03417042','n03445777','n03888257','n03394916','n02979186','n03000684','n03425413','n01440764','n03028079','n02102040']"
]
},
"execution_count": 7,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -194,7 +201,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -212,7 +219,7 @@
" 'n02102040': 9}"
]
},
"execution_count": 8,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -225,14 +232,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"That's all the pieces we need to put together our `Dataset`:"
"That's all the pieces we need to put together our `Dataset`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dataset"
"### Dataset"
]
},
{
@ -244,7 +251,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -266,7 +273,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -275,7 +282,7 @@
"(9469, 3925)"
]
},
"execution_count": 10,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -295,7 +302,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -304,7 +311,7 @@
"(torch.Size([64, 64, 3]), tensor(0))"
]
},
"execution_count": 11,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -317,7 +324,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -346,7 +353,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -364,7 +371,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -373,7 +380,7 @@
"(torch.Size([2, 64, 64, 3]), tensor([0, 0]))"
]
},
"execution_count": 14,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -392,7 +399,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -419,7 +426,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -428,7 +435,7 @@
"(torch.Size([128, 64, 64, 3]), torch.Size([128]), 74)"
]
},
"execution_count": 16,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -452,7 +459,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -461,7 +468,7 @@
"[tensor([0.4544, 0.4453, 0.4141]), tensor([0.2812, 0.2766, 0.2981])]"
]
},
"execution_count": 17,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -480,7 +487,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -501,7 +508,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -511,7 +518,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -520,7 +527,7 @@
"(tensor([0.3732, 0.4907, 0.5633]), tensor([1.0212, 1.0311, 1.0131]))"
]
},
"execution_count": 20,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -537,6 +544,13 @@
"Here `tfm_x` isn't just applying `Normalize`, but is also permuting the axis order from `NHWC` to `NCHW` (see <<chapter_convolutions>> if you need a reminder what these acronyms refer to). PIL uses `HWC` axis order, which we can't use with PyTorch, hence the need for this `permute`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"That's all we need for the data for our model. So now we need the model itself!"
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -548,12 +562,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"That's all we need for the data for our model. So now we need the model itself! To create a model, we'll need `Module`. To create `Module`, we'll need `Parameter`, so let's start there. Recall that in <<chapter_collab>> we said that `Parameter` \"this class doesn't actually add any functionality (other than automatically calling `requires_grad_()` for us). It's only used as a 'marker' to show what to include in `parameters()`\". Here's a definition which does exactly that:"
"To create a model, we'll need `Module`. To create `Module`, we'll need `Parameter`, so let's start there. Recall that in <<chapter_collab>> we said that `Parameter` \"this class doesn't actually add any functionality (other than automatically calling `requires_grad_()` for us). It's only used as a 'marker' to show what to include in `parameters()`\". Here's a definition which does exactly that:"
]
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -573,7 +587,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -582,7 +596,7 @@
"tensor(3., requires_grad=True)"
]
},
"execution_count": 22,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -600,7 +614,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -658,7 +672,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -686,7 +700,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -695,7 +709,7 @@
"2"
]
},
"execution_count": 25,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -714,7 +728,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -723,7 +737,7 @@
"torch.Size([128, 4, 64, 64])"
]
},
"execution_count": 26,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -743,7 +757,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -766,7 +780,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -775,7 +789,7 @@
"torch.Size([3, 2])"
]
},
"execution_count": 28,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -795,7 +809,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -814,7 +828,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -823,7 +837,7 @@
"4"
]
},
"execution_count": 30,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -842,7 +856,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -851,7 +865,7 @@
"device(type='cuda', index=5)"
]
},
"execution_count": 31,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -865,7 +879,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Simple CNN"
"We can now use those pieces to create a CNN."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Simple CNN"
]
},
{
@ -877,7 +898,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -915,7 +936,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -932,7 +953,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -956,7 +977,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -965,7 +986,7 @@
"10"
]
},
"execution_count": 35,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -984,7 +1005,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -1003,7 +1024,7 @@
"torch.Size([128, 10])"
]
},
"execution_count": 36,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -1016,6 +1037,13 @@
"r.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We have data and model. Now we need a loss function."
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -1027,12 +1055,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"We have data and model. Now we need a loss function. We've already seen how to define \"negative log likelihood\":"
"We've already seen how to define \"negative log likelihood\":"
]
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -1048,7 +1076,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -1057,7 +1085,7 @@
"tensor(-1.2790, grad_fn=<SelectBackward>)"
]
},
"execution_count": 38,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -1077,7 +1105,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -1086,7 +1114,7 @@
"tensor(2.5666, grad_fn=<NegBackward>)"
]
},
"execution_count": 39,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -1109,7 +1137,7 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -1118,7 +1146,7 @@
"tensor(-1.2790, grad_fn=<SelectBackward>)"
]
},
"execution_count": 40,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -1144,7 +1172,7 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -1153,7 +1181,7 @@
"tensor(True)"
]
},
"execution_count": 41,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -1173,7 +1201,7 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -1182,7 +1210,7 @@
"tensor(3.9784, grad_fn=<SelectBackward>)"
]
},
"execution_count": 42,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -1204,7 +1232,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -1220,7 +1248,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -1229,7 +1257,7 @@
"tensor(-1.2790, grad_fn=<SelectBackward>)"
]
},
"execution_count": 44,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@ -1247,13 +1275,20 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def cross_entropy(preds, yb): return nll(log_softmax(preds), yb).mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's now combine all those pieces together to create a `Learner`."
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -1270,7 +1305,7 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -1291,7 +1326,7 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -1310,12 +1345,10 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#id class_learner\n",
"#caption The Learner class\n",
"class Learner:\n",
" def __init__(self, model, dls, loss_func, lr, cbs, opt_func=SGD):\n",
" store_attr(self, 'model,dls,loss_func,lr,cbs,opt_func')\n",
@ -1375,7 +1408,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Callbacks"
"### Callbacks"
]
},
{
@ -1391,7 +1424,7 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -1407,7 +1440,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -1430,7 +1463,7 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -1460,7 +1493,7 @@
},
{
"cell_type": "code",
"execution_count": 61,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -1508,14 +1541,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"It's quite amazing to realize that we can implement all the key ideas from fastai's `Learner` in so little code!"
"It's quite amazing to realize that we can implement all the key ideas from fastai's `Learner` in so little code! Let's now add some learning rate scheduling."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Annealing"
"### Scheduling the learning rate"
]
},
{
@ -1527,7 +1560,7 @@
},
{
"cell_type": "code",
"execution_count": 62,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -1556,7 +1589,7 @@
},
{
"cell_type": "code",
"execution_count": 63,
"execution_count": null,
"metadata": {},
"outputs": [
{
@ -1719,7 +1752,7 @@
},
{
"cell_type": "code",
"execution_count": 64,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -1756,6 +1789,13 @@
"plt.plot(onecyc.lrs);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"TK conclusion"
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -1841,6 +1881,9 @@
}
],
"metadata": {
"jupytext": {
"split_at_heading": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
@ -1856,20 +1899,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -69,6 +69,18 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -292,6 +292,18 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,

View File

@ -533,31 +533,6 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,

1504
clean/01_intro.ipynb Normal file

File diff suppressed because one or more lines are too long

1029
clean/02_production.ipynb Normal file

File diff suppressed because one or more lines are too long

261
clean/03_ethics.ipynb Normal file
View File

@ -0,0 +1,261 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data Ethics"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Sidebar: Acknowledgement: Dr Rachel Thomas"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### End sidebar"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Key examples for data ethics"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Bugs and recourse: Buggy algorithm used for healthcare benefits"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Feedback loops: YouTube's recommendation system"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Bias: Professor Lantanya Sweeney \"arrested\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Why does this matter?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Integrating machine learning with product design"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Topics in Data Ethics"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Recourse and accountability"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Feedback loops"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Bias"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Historical bias"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Measurement bias"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Aggregation Bias"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Representation Bias"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Addressing different types of bias"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Humans are biased, so does algorithmic bias matter?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Disinformation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Identifying and addressing ethical issues"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Analyze a project you are working on"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Processes to implement"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Ethical Lenses"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### The power of diversity"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Fairness, accountability, and transparency"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Role of Policy"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### The effectiveness of regulation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Rights and policy"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Cars: a historical precedent"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Conclusion"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Questionnaire"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Further research:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Section 1: that's a wrap!"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"jupytext": {
"split_at_heading": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

3966
clean/04_mnist_basics.ipynb Normal file

File diff suppressed because one or more lines are too long

1732
clean/05_pet_breeds.ipynb Normal file

File diff suppressed because one or more lines are too long

1332
clean/06_multicat.ipynb Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1685
clean/08_collab.ipynb Normal file

File diff suppressed because one or more lines are too long

8301
clean/09_tabular.ipynb Normal file

File diff suppressed because one or more lines are too long

1531
clean/10_nlp.ipynb Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

1590
clean/12_nlp_dive.ipynb Normal file

File diff suppressed because it is too large Load Diff

2626
clean/13_convolutions.ipynb Normal file

File diff suppressed because one or more lines are too long

862
clean/14_resnet.ipynb Normal file

File diff suppressed because one or more lines are too long

421
clean/15_arch_details.ipynb Normal file
View File

@ -0,0 +1,421 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#hide\n",
"from utils import *"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Application architectures deep dive"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Computer vision"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### cnn_learner"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'cut': -2,\n",
" 'split': <function fastai2.vision.learner._resnet_split(m)>,\n",
" 'stats': ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])}"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model_meta[resnet50]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Sequential(\n",
" (0): AdaptiveConcatPool2d(\n",
" (ap): AdaptiveAvgPool2d(output_size=1)\n",
" (mp): AdaptiveMaxPool2d(output_size=1)\n",
" )\n",
" (1): full: False\n",
" (2): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (3): Dropout(p=0.25, inplace=False)\n",
" (4): Linear(in_features=20, out_features=512, bias=False)\n",
" (5): ReLU(inplace=True)\n",
" (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (7): Dropout(p=0.5, inplace=False)\n",
" (8): Linear(in_features=512, out_features=2, bias=False)\n",
")"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"create_head(20,2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### unet_learner"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### A Siamese network"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#hide\n",
"from fastai2.vision.all import *\n",
"path = untar_data(URLs.PETS)\n",
"files = get_image_files(path/\"images\")\n",
"\n",
"class SiameseImage(Tuple):\n",
" def show(self, ctx=None, **kwargs): \n",
" img1,img2,same_breed = self\n",
" if not isinstance(img1, Tensor):\n",
" if img2.size != img1.size: img2 = img2.resize(img1.size)\n",
" t1,t2 = tensor(img1),tensor(img2)\n",
" t1,t2 = t1.permute(2,0,1),t2.permute(2,0,1)\n",
" else: t1,t2 = img1,img2\n",
" line = t1.new_zeros(t1.shape[0], t1.shape[1], 10)\n",
" return show_image(torch.cat([t1,line,t2], dim=2), \n",
" title=same_breed, ctx=ctx)\n",
" \n",
"def label_func(fname):\n",
" return re.match(r'^(.*)_\\d+.jpg$', fname.name).groups()[0]\n",
"\n",
"class SiameseTransform(Transform):\n",
" def __init__(self, files, label_func, splits):\n",
" self.labels = files.map(label_func).unique()\n",
" self.lbl2files = {l: L(f for f in files if label_func(f) == l) for l in self.labels}\n",
" self.label_func = label_func\n",
" self.valid = {f: self._draw(f) for f in files[splits[1]]}\n",
" \n",
" def encodes(self, f):\n",
" f2,t = self.valid.get(f, self._draw(f))\n",
" img1,img2 = PILImage.create(f),PILImage.create(f2)\n",
" return SiameseImage(img1, img2, t)\n",
" \n",
" def _draw(self, f):\n",
" same = random.random() < 0.5\n",
" cls = self.label_func(f)\n",
" if not same: cls = random.choice(L(l for l in self.labels if l != cls)) \n",
" return random.choice(self.lbl2files[cls]),same\n",
" \n",
"splits = RandomSplitter()(files)\n",
"tfm = SiameseTransform(files, label_func, splits)\n",
"tls = TfmdLists(files, tfm, splits=splits)\n",
"dls = tls.dataloaders(after_item=[Resize(224), ToTensor], \n",
" after_batch=[IntToFloatTensor, Normalize.from_stats(*imagenet_stats)])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class SiameseModel(Module):\n",
" def __init__(self, encoder, head):\n",
" self.encoder,self.head = encoder,head\n",
" \n",
" def forward(self, x1, x2):\n",
" ftrs = torch.cat([self.encoder(x1), self.encoder(x2)], dim=1)\n",
" return self.head(ftrs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"encoder = create_body(resnet34, cut=-2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"head = create_head(512*4, 2, ps=0.5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model = SiameseModel(encoder, head)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"def loss_func(out, targ):\n",
" return nn.CrossEntropyLoss()(out, targ.long())"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"def siamese_splitter(model):\n",
" return [params(model.encoder), params(model.head)]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"learn = Learner(dls, model, loss_func=loss_func, \n",
" splitter=siamese_splitter, metrics=accuracy)\n",
"learn.freeze()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>valid_loss</th>\n",
" <th>accuracy</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>0.367015</td>\n",
" <td>0.281242</td>\n",
" <td>0.885656</td>\n",
" <td>00:26</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>0.307688</td>\n",
" <td>0.214721</td>\n",
" <td>0.915426</td>\n",
" <td>00:26</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>0.275221</td>\n",
" <td>0.170615</td>\n",
" <td>0.936401</td>\n",
" <td>00:26</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>0.223771</td>\n",
" <td>0.159633</td>\n",
" <td>0.943843</td>\n",
" <td>00:26</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.fit_one_cycle(4, 3e-3)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>valid_loss</th>\n",
" <th>accuracy</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>0.212744</td>\n",
" <td>0.159033</td>\n",
" <td>0.944520</td>\n",
" <td>00:35</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>0.201893</td>\n",
" <td>0.159615</td>\n",
" <td>0.942490</td>\n",
" <td>00:35</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>0.204606</td>\n",
" <td>0.152338</td>\n",
" <td>0.945196</td>\n",
" <td>00:36</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>0.213203</td>\n",
" <td>0.148346</td>\n",
" <td>0.947903</td>\n",
" <td>00:36</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.unfreeze()\n",
"learn.fit_one_cycle(4, slice(1e-6,1e-4))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Natural language processing"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Tabular"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Wrapping up architectures"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Questionnaire"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Further research"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"jupytext": {
"split_at_heading": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

719
clean/16_accel_sgd.ipynb Normal file

File diff suppressed because one or more lines are too long

1565
clean/17_foundations.ipynb Normal file

File diff suppressed because it is too large Load Diff

462
clean/18_CAM.ipynb Normal file

File diff suppressed because one or more lines are too long

1319
clean/19_learner.ipynb Normal file

File diff suppressed because one or more lines are too long

42
clean/20_conclusion.ipynb Normal file
View File

@ -0,0 +1,42 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Concluding thoughts"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"jupytext": {
"split_at_heading": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

95
clean/app_blog.ipynb Normal file
View File

@ -0,0 +1,95 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#hide\n",
"from utils import *\n",
"from fastai2.vision.widgets import *"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Creating a blog"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Blogging with GitHub Pages"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Creating the repository"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Setting up your homepage"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Creating posts"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Synchronizing GitHub and your computer"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Jupyter for blogging"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"jupytext": {
"split_at_heading": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

273
clean/app_jupyter.ipynb Normal file

File diff suppressed because one or more lines are too long

1
clean/images Symbolic link
View File

@ -0,0 +1 @@
../images

Binary file not shown.

Before

Width:  |  Height:  |  Size: 42 KiB

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 102 KiB

After

Width:  |  Height:  |  Size: 143 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 99 KiB

After

Width:  |  Height:  |  Size: 138 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 71 KiB

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

After

Width:  |  Height:  |  Size: 108 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 48 KiB

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 180 KiB

After

Width:  |  Height:  |  Size: 244 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 808 B

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 664 B

After

Width:  |  Height:  |  Size: 884 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 180 KiB

After

Width:  |  Height:  |  Size: 244 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 808 B

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 664 B

After

Width:  |  Height:  |  Size: 884 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 444 KiB

After

Width:  |  Height:  |  Size: 685 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 155 KiB

After

Width:  |  Height:  |  Size: 371 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 585 KiB

After

Width:  |  Height:  |  Size: 819 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 623 KiB

After

Width:  |  Height:  |  Size: 911 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 350 KiB

After

Width:  |  Height:  |  Size: 471 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 180 KiB

After

Width:  |  Height:  |  Size: 295 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 70 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.7 KiB

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.2 KiB

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.9 KiB

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 73 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

After

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 69 KiB

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 111 KiB

After

Width:  |  Height:  |  Size: 161 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 86 KiB

After

Width:  |  Height:  |  Size: 124 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 55 KiB

After

Width:  |  Height:  |  Size: 194 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.4 KiB

After

Width:  |  Height:  |  Size: 7.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.2 KiB

After

Width:  |  Height:  |  Size: 4.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 51 KiB

After

Width:  |  Height:  |  Size: 83 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

After

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 50 KiB

After

Width:  |  Height:  |  Size: 69 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 122 KiB

After

Width:  |  Height:  |  Size: 177 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 41 KiB

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 89 KiB

After

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 141 KiB

After

Width:  |  Height:  |  Size: 170 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 653 KiB

After

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 84 KiB

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 10 KiB

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 61 KiB

After

Width:  |  Height:  |  Size: 83 KiB

Some files were not shown because too many files have changed in this diff Show More