Files

556 lines
1.4 MiB
Plaintext
Raw Permalink Normal View History

2020-02-06 16:47:03 -03:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SSD300 Inference Tutorial\n",
"\n",
"This is a brief tutorial that shows how to use a trained SSD300 for inference on the Pascal VOC datasets. If you'd like more detailed explanations, please refer to [`ssd300_training.ipynb`](https://github.com/pierluigiferrari/ssd_keras/blob/master/ssd300_training.ipynb)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/dlsaavedra/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
" from ._conv import register_converters as _register_converters\n",
"Using TensorFlow backend.\n"
]
}
],
"source": [
"from keras import backend as K\n",
"from keras.models import load_model\n",
"from keras.preprocessing import image\n",
"from keras.optimizers import Adam\n",
"from imageio import imread\n",
"import numpy as np\n",
"from matplotlib import pyplot as plt\n",
"\n",
"from models.keras_ssd300 import ssd_300\n",
"from keras_loss_function.keras_ssd_loss import SSDLoss\n",
"from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes\n",
"from keras_layers.keras_layer_DecodeDetections import DecodeDetections\n",
"from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast\n",
"from keras_layers.keras_layer_L2Normalization import L2Normalization\n",
"\n",
"from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast\n",
"\n",
"from data_generator.object_detection_2d_data_generator import DataGenerator\n",
"from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels\n",
"from data_generator.object_detection_2d_geometric_ops import Resize\n",
"from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Set the image size.\n",
"img_height = 300\n",
"img_width = 300"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Load a trained SSD\n",
"\n",
"Either load a trained model or build a model and load trained weights into it. Since the HDF5 files I'm providing contain only the weights for the various SSD versions, not the complete models, you'll have to go with the latter option when using this implementation for the first time. You can then of course save the model and next time load the full model directly, without having to build it.\n",
"\n",
"You can find the download links to all the trained model weights in the README."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1.1. Build the model and load trained weights into it"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ok\n"
]
}
],
"source": [
"# 1: Build the Keras model\n",
"\n",
"K.clear_session() # Clear previous models from memory.\n",
"\n",
"model = ssd_300(image_size=(img_height, img_width, 3),\n",
" n_classes=20,\n",
" mode='inference',\n",
" l2_regularization=0.0005,\n",
" scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]\n",
" aspect_ratios_per_layer=[[1.0, 2.0, 0.5],\n",
" [1.0, 2.0, 0.5, 3.0, 1.0/3.0],\n",
" [1.0, 2.0, 0.5, 3.0, 1.0/3.0],\n",
" [1.0, 2.0, 0.5, 3.0, 1.0/3.0],\n",
" [1.0, 2.0, 0.5],\n",
" [1.0, 2.0, 0.5]],\n",
" two_boxes_for_ar1=True,\n",
" steps=[8, 16, 32, 64, 100, 300],\n",
" offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n",
" clip_boxes=False,\n",
" variances=[0.1, 0.1, 0.2, 0.2],\n",
" normalize_coords=True,\n",
" subtract_mean=[123, 117, 104],\n",
" swap_channels=[2, 1, 0],\n",
" confidence_thresh=0.5,\n",
" iou_threshold=0.5,\n",
" top_k=200,\n",
" nms_max_output_size=400)\n",
"\n",
"# 2: Load the trained weights into the model.\n",
"\n",
"# TODO: Set the path of the trained weights.\n",
"weights_path = 'VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5'\n",
"\n",
"model.load_weights(weights_path, by_name=True)\n",
"\n",
"# 3: Compile the model so that Keras won't complain the next time you load it.\n",
"\n",
"adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)\n",
"\n",
"ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)\n",
"\n",
"model.compile(optimizer=adam, loss=ssd_loss.compute_loss)\n",
"print('ok')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Or"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1.2. Load a trained model"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Cannot create group in read only mode.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-8-d1cfbcce9ba6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m'L2Normalization'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mL2Normalization\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m'DecodeDetections'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mDecodeDetections\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m 'compute_loss': ssd_loss.compute_loss})\n\u001b[0m",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/keras/engine/saving.py\u001b[0m in \u001b[0;36mload_model\u001b[0;34m(filepath, custom_objects, compile)\u001b[0m\n\u001b[1;32m 417\u001b[0m \u001b[0mf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mh5dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'r'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 419\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_deserialize_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcustom_objects\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 420\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 421\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mopened_new_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/keras/engine/saving.py\u001b[0m in \u001b[0;36m_deserialize_model\u001b[0;34m(f, custom_objects, compile)\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 220\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 221\u001b[0;31m \u001b[0mmodel_config\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'model_config'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 222\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmodel_config\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'No model found in config.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/keras/utils/io_utils.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, attr)\u001b[0m\n\u001b[1;32m 300\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 301\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_only\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 302\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Cannot create group in read only mode.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 303\u001b[0m \u001b[0mval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mH5Dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate_group\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 304\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Cannot create group in read only mode."
]
}
],
"source": [
"# TODO: Set the path to the `.h5` file of the model to be loaded.\n",
"model_path = 'VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5'\n",
"\n",
"# We need to create an SSDLoss object in order to pass that to the model loader.\n",
"ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)\n",
"\n",
"K.clear_session() # Clear previous models from memory.\n",
"\n",
"model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,\n",
" 'L2Normalization': L2Normalization,\n",
" 'DecodeDetections': DecodeDetections,\n",
" 'compute_loss': ssd_loss.compute_loss})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Load some images\n",
"\n",
"Load some images for which you'd like the model to make predictions."
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"orig_images = [] # Store the images here.\n",
"input_images = [] # Store resized versions of the images here.\n",
"\n",
"# We'll only load one image in this example.\n",
"img_path = 'Prueba_1.png'\n",
"\n",
"orig_images.append(imread(img_path))\n",
"img = image.load_img(img_path, target_size=(img_height, img_width))\n",
"img = image.img_to_array(img) \n",
"input_images.append(img)\n",
"input_images = np.array(input_images)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Make predictions"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"y_pred = model.predict(input_images)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`y_pred` contains a fixed number of predictions per batch item (200 if you use the original model configuration), many of which are low-confidence predictions or dummy entries. We therefore need to apply a confidence threshold to filter out the bad predictions. Set this confidence threshold value how you see fit."
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predicted boxes:\n",
"\n",
" class conf xmin ymin xmax ymax\n",
"[[ 8. 0.7 98.11 15.88 192.36 139.94]]\n"
]
}
],
"source": [
"confidence_threshold = 0.5\n",
"\n",
"y_pred_thresh = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])]\n",
"\n",
"np.set_printoptions(precision=2, suppress=True, linewidth=90)\n",
"print(\"Predicted boxes:\\n\")\n",
"print(' class conf xmin ymin xmax ymax')\n",
"print(y_pred_thresh[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. Visualize the predictions\n",
"\n",
"We just resized the input image above and made predictions on the distorted image. We'd like to visualize the predictions on the image in its original size though, so below we'll transform the coordinates of the predicted boxes accordingly."
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0wAAAKvCAYAAABZOk8vAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzsvUuPJEmSJvaJqtrDPSIyq7Je3dPTvTO7HGCH5GJBgCABHngjQeyFB15Iguc98QeQBx75J/bAEw/8BcvHgQBPHGBOS2KHM9zd2e7pR3V1V2VmZIS720NVhQdRUVMzN/fwyKrO7eK6AFWR7m6mpqr2kk8/kU+ImXG1q13tale72tWudrWrXe1qVzs286+6A1e72tWudrWrXe1qV7va1a72+2pXwHS1q13tale72tWudrWrXe1qJ+wKmK52tatd7WpXu9rVrna1q13thF0B09WudrWrXe1qV7va1a52taudsCtgutrVrna1q13tale72tWudrUTdgVMV7va1a52tatd7WpXu9rVrnbCPjhgIqL/hIj+ioj+ORH9Nx/6+Fe72tWudrWrXe1qV7va1a52qdGHrMNERBbA/wvgPwLwCwB/DuC/YOa/+GCduNrVrna1q13tale72tWudrUL7UMzTP8egH/OzH/NzAOA/wnAf/qB+3C1q13tale72tWudrWrXe1qF5n7wMf7EYCfF59/AeDfP7fDJ68M/+TH824yAFpspzzZ8vvn2Fq7a22f2u5SIxAYfPLzpf36trZ2XD3W8pgMwBTbfxfzfWkfASCCT55zc2L+nprXS7f5UMbFX8J0Hk5tO/udZRT6OYLSZ8Iv+o8xdBUQAaojKhdyO6+qXdoSMMQyyyz/jiBEJvme9bg0uw/0OwYQ2eS2KLUVOZ0/Nrk/IX3HIDAjn9npumMQARP5TdOxmEDE+d/aJhi5DyVpzkzFv/Ufsn3+CwDEIMfSbybwaADDIMv5d6Rjx2BAhmFMPjoik+zHBITUNqa286QZGZsxEY0NaIzP82Ugc17227OsaVmKKK2cWwB5v8AGBhFE03wacJ5jQxEWjFAcp7yXdW6P733pN9JYtU+W5EoLbGQe5OzBIsKzgaMIRwEVBeQrgJDuutSndAeW1/Olz5WynfK7WFynBEZITwmdi3I//Xf5zKPFdjoHZbt5Lmi6Qsvfyn11rk9ZeZ/rPGuH5kEgNNuWQYggeLYy93q/pX8bighs8lj13BFxnm9macOAYfL553Rukc+fRZzmKPVL50hHPj27ivml/PA4act7Ol8jyzlbmUKZn+I+P32Y2bWyds7OtTGNVf5tirfS8pm4vH9KW/tN5137tdZPPX55P5dXAxfnQM2zne0fQQjpmTKyhVs8V/Re7mKFxvjpeQh5tgYQbLo2pH0DIuCl3aPKd/98rETHM/yUTzO/F1fedxfYc7Y/5Vtc2vbaZwCIOB7HJe2tfX/umvp9sEvn+7s4j0/N/3Pt5z/3+OZ1fPK186EB00XPJiL6hwD+IQD84Y8s/vf/+fPZ75EZFRmExa4WdPQdAFSwsh/kJh95ekiY4maOzLPP5fcA8jHL7WwxpJHj6v7LtgxRblPbBTDruwVBXJ94st1T86Bj1vEuLYBz+yPEeW7JoWOf2xw55n7p8bVN3b9sT/cr2yxNt9e+Lvdffrf8TX8/dc7L+dL9KtjVvqztv+znWh/O/abjr8iiY5+/A+bX2MgRFoSGHEYEjBzRkp31QefegmbXannMCKGH9dfAjACgT83s2KFjue7/23/xn+Ff/tUPgQC4zw/4W5+9gY8Gj32Dz24e8R988tf4tze/gEmtdVzBgtFxhRvTI6QX5cgOY2rTIKLjGn2sMLKV36NDzw6PvkEfK5kDEBwF9NHhftzAgDFEmx0EHw0iGzgTYGhy2CIThuDwtt9gP1bwQa5FdaxiNLLN6MAsDlaMBI6E8VABvQGNhOqdgRkJbgeYAJgxTZoByDPUT/Ebwrs/HWHvRtz+2Ra3vwrYf2rw7u8AH/8F4G8IoQG2X8kcvf63CLFiNK8NqgfAdQwzArECKF1yux8Sui8CYABW4GUZCATUEXbj8YNX7/C3X36NP95+AwPGPtawiOijS3NrsLEjLGIGV3KNWEQm9LHCO9/AR4suONQ2IDKhCxVaO8IQozYePopD/areow8ON65HZJOBliMBb4YYkSmfZwAYosPGDHjhOvRxemX00eHH7Ws8hhaBTXaiAhv8drjDjeuxNQP+/vZv8Jl9h11sUFNARR4jO5i0fUsjIhtUFBBA+d9PWQChpYCxcOT0ewvGyBaGIur8TJDvB5i8LYAMIDt2+XsFlrrfyBYVBYxs0ZKfrvk0jnrlOTPAokv3gS3am23DFjWFPBelRTYwFGfnQud3hM336NuwxduwxcgWfazwENq80KHnaecbAMAhVKhMgKMIQxGRDTwb7HyNGzdgY0cAyOe7ooBPqwf8wL3FC9PlOdR+6dyFAtgHNojpaXzK6sX51fHrWPX8W0SM7I62OzVXuo9auW95jehnnU8AiE8E3AxsZwsXLY25/Yp8bmuERVVcDzaPzaEin48HTPNQXgcWEaHoi9wPHi352TUZFu+jGvHou1+HF6gQEGHwNmzRcYVdbHDvt7AU8al7SNeyycf6m/4VDDFeuZ2c5+jwGBq89Vs4CtjYEXe2AwD8ZrhDRQH/4KN/gh+7d6jAqCg/YmEBVAvfxYIwglEV7159t47gtN+0jyHK78T4jBQS9bOW/srSAiafp+zj0k9b7qNtlv1b+jk9xzzOc21e2kc1bWMsrufqLAT97mx5zsqx6RyeG8ul52XpN43M2BqLfQxo1TdN10tLFh0//c44Zf/xP/j6ou0+NGD6BYAfF5//EMCvlhsx8z8C8I8A4N/5+zUDyM5jRQZ2AVZCfoisX9zqNK85sKUpKACAnv1sO/13BYvxxMt8CaK0v3oRVWQyZHwKWAHIYKm8UWZGQMchO9zqtJdjVtBgVx5OcfZikW10bstjal9PgQ8LWWVVwHLuJlj7bfldRRYjhyeBDDB/0CwB4rK/5VzovgYGodiuPOYS3Noz58yQzG9gP/tO+748P3lMK4A3pmPbxfU3A/qY7gndbmQdgzgGN+TxTdwAAJhkfTJGgzFYjNGAiPFXv/wCB1/h9g86/N3mS+xig8gGe66wNT12URytirw4L7AYCuDUscMYHd74LXZBtu2Dw8gGlhh9cOiCS2Ng7H2N26rHEBw8G/TBwUeDEA0MCcOjTvthrNCPDpEJtQtwNiBEg9FbjMFiGCzGty3szsA9GrgOMB7Y9oAZGMYDFIFYCf0UK4Lx6RlwkN/sKL9tfxvx4mcGQIPmTYfoCMNtjRd/DYQW8K0ArugIt78aQdHBHRi2G4uLAHj8gQNFwHiG6wjkCVwxyFNelTeDAXtCGA1+jRfw0aCiiI+qPRrjsY91Ai3yfZVW+OUcy7wfQo1P6kfAAz5u4UzAS+vxpt+ith6tHRFZWSiZf0cRfXBorMfGjjiECjd2RB8dPq8e0rWn939AFyvsY40hOjgTsY91cW8Qbl2PN/4GBix9NB770OBvDq/gTMDWDGjNiC5W2FEDSxEDWwxsxeFnRkU+O7sDW1isAwu5XyZmTP92yWmPbGAXz+WWPAaY2fal06zgRwHUDY0zh1OPJyvqHgGUz4P0t8qOcr4EEgh5G7cCDGHR0oilaVv10S9l/1N/SBzZHdfZEde/u9jgIWwwssUYXQGkHW5tj32scQgVDEX4aDFEi8oEVCbgEATM6d8+ONTGT8AZBJuAUccVWh6zg69gSeepS+/NG+qx42bqezIFFgYxA4gSDIYCFOp8jewQYXJb4QSgiTz/XkFsCZyEaVMw5hN40cXCMS0SxQIUCxhSMKhAP7ePmIGPXtN1Bnmc5s2l56ay1GkhDWYGMgEkADxBNgXghiLqEyDRgmHB2HF1dO2q3dCAgS0MBCC3GPFVeIkIwhfuIc+pnofIBiNbfO4ehBWmiB4V+ugQmHDnRrRmTKBYmM2KAjquEJhQJZCu4whIoKnwP0Jyctcc5mm/EgjIsRR8XGI
"text/plain": [
"<Figure size 1440x864 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Display the image and draw the predicted boxes onto it.\n",
"\n",
"# Set the colors for the bounding boxes\n",
"colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()\n",
"classes = ['background',\n",
" 'aeroplane', 'bicycle', 'bird', 'boat',\n",
" 'bottle', 'bus', 'car', 'cat',\n",
" 'chair', 'cow', 'diningtable', 'dog',\n",
" 'horse', 'motorbike', 'person', 'pottedplant',\n",
" 'sheep', 'sofa', 'train', 'tvmonitor']\n",
"\n",
"plt.figure(figsize=(20,12))\n",
"plt.imshow(orig_images[0])\n",
"\n",
"current_axis = plt.gca()\n",
"\n",
"for box in y_pred_thresh[0]:\n",
" # Transform the predicted bounding boxes for the 300x300 image to the original image dimensions.\n",
" xmin = box[2] * orig_images[0].shape[1] / img_width\n",
" ymin = box[3] * orig_images[0].shape[0] / img_height\n",
" xmax = box[4] * orig_images[0].shape[1] / img_width\n",
" ymax = box[5] * orig_images[0].shape[0] / img_height\n",
" color = colors[int(box[0])]\n",
" label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])\n",
" current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color=color, fill=False, linewidth=2)) \n",
" current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':color, 'alpha':1.0})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 5. Make predictions on Pascal VOC 2007 Test\n",
"\n",
"Let's use a `DataGenerator` to make predictions on the Pascal VOC 2007 test dataset and visualize the predicted boxes alongside the ground truth boxes for comparison. Everything here is preset already, but if you'd like to learn more about the data generator and its capabilities, take a look at the detailed tutorial in [this](https://github.com/pierluigiferrari/data_generator_object_detection_2d) repository."
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] No such file or directory: '../../datasets/VOCdevkit/VOC2007/ImageSets/Main/test.txt'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-24-7ce751a5a826>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mexclude_truncated\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mexclude_difficult\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m ret=False)\n\u001b[0m\u001b[1;32m 27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0mconvert_to_3_channels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mConvertTo3Channels\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Desktop/Tesis/8.-Object_Detection/keras-ssd-master/data_generator/object_detection_2d_data_generator.py\u001b[0m in \u001b[0;36mparse_xml\u001b[0;34m(self, images_dirs, image_set_filenames, annotations_dirs, classes, include_classes, exclude_truncated, exclude_difficult, ret, verbose)\u001b[0m\n\u001b[1;32m 463\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mimages_dir\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mimage_set_filename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mannotations_dir\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimages_dirs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mimage_set_filenames\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mannotations_dirs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 464\u001b[0m \u001b[0;31m# Read the image set file that so that we know all the IDs of all the images to be included in the dataset.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 465\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimage_set_filename\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 466\u001b[0m \u001b[0mimage_ids\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mline\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;31m# Note: These are strings, not integers.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 467\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mimage_ids\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mimage_ids\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../../datasets/VOCdevkit/VOC2007/ImageSets/Main/test.txt'"
]
}
],
"source": [
"# Create a `BatchGenerator` instance and parse the Pascal VOC labels.\n",
"\n",
"dataset = DataGenerator()\n",
"\n",
"# TODO: Set the paths to the datasets here.\n",
"\n",
"VOC_2007_images_dir = '../../datasets/VOCdevkit/VOC2007/JPEGImages/'\n",
"VOC_2007_annotations_dir = '../../datasets/VOCdevkit/VOC2007/Annotations/'\n",
"VOC_2007_test_image_set_filename = '../../datasets/VOCdevkit/VOC2007/ImageSets/Main/test.txt'\n",
"\n",
"# The XML parser needs to now what object class names to look for and in which order to map them to integers.\n",
"classes = ['background',\n",
" 'aeroplane', 'bicycle', 'bird', 'boat',\n",
" 'bottle', 'bus', 'car', 'cat',\n",
" 'chair', 'cow', 'diningtable', 'dog',\n",
" 'horse', 'motorbike', 'person', 'pottedplant',\n",
" 'sheep', 'sofa', 'train', 'tvmonitor']\n",
"\n",
"dataset.parse_xml(images_dirs=[VOC_2007_images_dir],\n",
" image_set_filenames=[VOC_2007_test_image_set_filename],\n",
" annotations_dirs=[VOC_2007_annotations_dir],\n",
" classes=classes,\n",
" include_classes='all',\n",
" exclude_truncated=False,\n",
" exclude_difficult=True,\n",
" ret=False)\n",
"\n",
"convert_to_3_channels = ConvertTo3Channels()\n",
"resize = Resize(height=img_height, width=img_width)\n",
"\n",
"generator = dataset.generate(batch_size=1,\n",
" shuffle=True,\n",
" transformations=[convert_to_3_channels,\n",
" resize],\n",
" returns={'processed_images',\n",
" 'filenames',\n",
" 'inverse_transform',\n",
" 'original_images',\n",
" 'original_labels'},\n",
" keep_images_without_gt=False)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Image: ../../datasets/VOCdevkit/VOC2007/JPEGImages/004927.jpg\n",
"\n",
"Ground truth boxes:\n",
"\n",
"[[ 7 58 26 433 303]\n",
" [ 15 409 52 439 149]\n",
" [ 15 369 60 394 114]\n",
" [ 15 31 65 45 111]\n",
" [ 15 48 67 65 110]\n",
" [ 15 67 65 81 107]]\n"
]
}
],
"source": [
"# Generate a batch and make predictions.\n",
"\n",
"batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(generator)\n",
"\n",
"i = 0 # Which batch item to look at\n",
"\n",
"print(\"Image:\", batch_filenames[i])\n",
"print()\n",
"print(\"Ground truth boxes:\\n\")\n",
"print(np.array(batch_original_labels[i]))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Predict.\n",
"\n",
"y_pred = model.predict(batch_images)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predicted boxes:\n",
"\n",
" class conf xmin ymin xmax ymax\n",
"[[ 7. 1. 59.19 20.12 429.33 307.77]\n",
" [ 15. 0.89 361.66 55.22 394.27 122.28]\n",
" [ 15. 0.7 89.83 65.21 108.34 116.95]\n",
" [ 15. 0.57 345.61 57.24 368.72 108.1 ]\n",
" [ 15. 0.55 430.29 61.72 462.75 140.24]\n",
" [ 15. 0.53 406.14 56.13 436.42 145.34]\n",
" [ 15. 0.52 40.03 67.8 55.35 109.8 ]]\n"
]
}
],
"source": [
"confidence_threshold = 0.5\n",
"\n",
"# Perform confidence thresholding.\n",
"y_pred_thresh = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])]\n",
"\n",
"# Convert the predictions for the original image.\n",
"y_pred_thresh_inv = apply_inverse_transforms(y_pred_thresh, batch_inverse_transforms)\n",
"\n",
"np.set_printoptions(precision=2, suppress=True, linewidth=90)\n",
"print(\"Predicted boxes:\\n\")\n",
"print(' class conf xmin ymin xmax ymax')\n",
"print(y_pred_thresh_inv[i])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA5EAAAKvCAYAAADz1UvWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsvXe0Z8ddJ/ipG37pvX79OkepW6FbkiUnSU6MGYNBBswM\nOSwYFotgvDb2YXdhgOUYHBjs2VkdMAtDmgN4h2UwXo+9u4NZbDzCActJWMJWsNRSd0udu19+75du\nqP2j6ltVt27d+7u/191yA/U5p8/v9b0VvlW34jcyzjk8PDw8PDw8PDw8PDw8PJog+FoT4OHh4eHh\n4eHh4eHh4fGPB/4S6eHh4eHh4eHh4eHh4dEY/hLp4eHh4eHh4eHh4eHh0Rj+Eunh4eHh4eHh4eHh\n4eHRGP4S6eHh4eHh4eHh4eHh4dEY/hLp4eHh4eHh4eHh4eHh0Rj+Eunh4eHh4eHh4eHh4eHRGFft\nEskY+1bG2FcZY8cYY794terx8PDw8PDw8PDw8PDweO7AOOdXvlDGQgBPALgHwCkAXwDwQ5zzR694\nZR4eHh4eHh4eHh4eHh7PGaKrVO5LARzjnD8NAIyxPwfwnQCcl8j5+W183779KF5oWSkdKz0SDzi4\nkYYVcnOu39rZWblAwJGaSmCMuV5X0lsuczqwKhqoSPpT9ptKX9csUYjjpZt+zjkCZvVzZR/Ugzky\ncEff2OmKpLPi06Zd27zJlwVmjUkG5mij+H8Yhuj3+wCAU6dOAQBmel2RjwFhIMcyCwEAO3bslvki\n5HkmSlLdYdRL44Fx45l8r76d+H8yHmFh8QIAIIqEYsJoNFJlUnfTuyAQtLTiLubntwMAsjyX7wJV\ntupaNV558f+6GxAEmjZmjekCXGXx4rxwrSF2tqpnNnl2OQB3NqMJppkyV2loKlgzqDHMNky1BDSo\nsFAmK/e3qyjXlLbLqiOhPJz0eHKRyqwMBXod+aro2ywmLetXFlU9WF0rc8xtZ8rNTqJNoul4t9fu\nOjTZt/5Jw7GAbupYYMydK4KK+VvYh636cuOMyHOZn8rJM6ytLgEAsiwBAKRZqtPIvXk4FO8OHToE\nAOj1eqqM1bV1kS9JMDe3BYDeK7m1f9lUU2NYUOzvs2fPEMXgPC+82xiNAQDbt2/Hrp27AABJIp4F\nLFD16W6o6//J897jyuEqyPRKhTet4stf/odLnPNdk9JdrUvkAQDPGv8/BeBlZgLG2BsAvAEA9uzZ\nhz/6oz9DkiRqMoVhDKA4ydTEo4ui/M2yTJXbarUK79JsjNw65AayF8MwVOkoDZCrd7mcP/QuiiJV\nF5WVy08Sha1SPepAy7OKRaIejLFSG6MoUmXb76heunSYMA/XOp1uu00f/T9JErRaUaGewsIXWPmc\nZ39W+lWLtPFrp1O0G1rXURgW8unvZubTz+w2E/I8BwuDQhmhLDvLskpazLIDUP/pt/R90lSXyXkm\n06WSUEH7/PwcHnzwCwCAX/xFofF91wtfAABotVJsmRVjOWZbAQA//mNvBQBs2bod66M1QSsT9eQ8\nlvVmABcbRhSKjY3zDGki2prKQc0CQefZU0/hfX/62wCA3btnAADHjj0t6MwitGPRJ/PbxOV2ZkZs\ngocO3o7v+FevAwCsbQwBAN2ZjuiXKFT9ANnvmWx7wOLSnOu1O5L2FGGkx53uPy6LKn6vIAhK88Jc\nCxjCQj7XHKCy7DEAlMdtnue1mz4LyoPfHudmmXb55vwiOsrrk9GeLHfSabfVBl3azTIJtJ6Z85Hq\nS7muj8qP5KHEXIPstpp9TXPGps/Mp9qHTNFI39JkFpTWCaNeu9/NMaCeyU+tvn0Ql9YVM789jswx\nY9dn0mrT56LdTmOiei2q/86FdMgqy3CV6fo++r1jH7DSgBfno1mv/SwIgtLeaf/frK+u/8w89rfP\nssy5FhAtjBXrdvWt+vY1e0wAVpq3VFQYhqXy9fctt8XFHGvyzaktJnK42izpNMq2c5r9rtpo9WOW\nZaX+Nr+dvda4vr29nonyy2tc1X5gjts8L6+3mSwrSOVlUP5/lGZIJJN12BcM1HwsftPBCj7+0Q8B\nAFbXzgIAFpcvAgAGSQ4EYj984thpAMDv/e5/BAC8+MUvRpaKffhj/+1TAIBLF87jnnu+CQAwOyP2\n01SmEWdLapfcOzm1j6Pdbou/M5H+3f/2HQCALB0hSQeQ/wEAfPHpEwCAH/7hH8Eb7v0pAMC5s4K+\nbkvUG4e6PtDFuWb8CTrSUp/Sr/19q+atXeZmYI4Bs466NbVq3ptwjVHu6hQHPWbZk9Ynk67Ss7x6\nja/bG+ra5TprV6U3nx86fOBkZaEGrtYlciI4538A4A8A4LbbbueMsdoDnJW3Mk2aFgc6mD4YqvKz\n8qJDUAuTUa1Zvn0ZIZiH19IiZ7xzldkEdr3mRHJNarses62uTb/q0GVW66KhtDE5aK9bdJrQwgJW\n2e+utro2UOchXpJOB1vk5UXO7g+gfEA167MPP6Ktdt+K8vv9Pm6++WYAemzS+M3zIbbISxnxSc+e\nFZvYzJZ5nS4g7pIxxjnNI7pE8tJiMxqKi99NN92E6687DABYXBTS0F5XXCZHgzGSRGymw6Eoc3ZG\n/J44cQLnzwt65rfvKbQ5yzLHpY4ur+XvZS7e5f4uw3URs9/JTimUZR7+7fFgXtrsQ67r4uK8RCpm\ncfkiUbfYN1nrXBuAi2HTBOrQ5To0uLQFatZbgjne69pctSGa7VLvmFEn6vq9vInXoerQ7rqom/S5\nLj92fXXret1lv64Ms32hxURresmYhtHg2u9cdLouFGqO1ex55j5Pz23Gpms82XtGFe2KwWGtQeYF\nzr6AVLW/6p1r31HIqw9pJsO2zGQor0uuy1MT+oBinxmP3HNIMbw5AoOZKh6Jd26mO9S7uu9TRXOh\nHNK8KaxB1et71WVc/F1Mn8P8JsTYpLoZ8rTYVpr+J048jY2+kCSOx+PCb6c7hyeeFrKSl7705QCA\nl7/8pQCAhx/+Mp469gQA4G/u/zQA4Pu/93vUmFxbE0zgrVvFJXQwGCgGLV3u6EIRBEwJR545eaqQ\nv9trGeO7OKZfcMfz1cUvjuNif+S5OpdEkptm9lHd3udCk/Ns7V49Jex173Jpq4JrH7jcepoy/qrK\nnfYyXjcfXevftLhal8jTAK4z/n9QPnOCNuhiY2mxj9T/1BIg09FkjuO4tDFpzqseBDTZYpo0Lu5+\nUN6MXROpfFEsv9MT0SUZRCFNFZhSPyjSJDj3eeGdyUltciA1F2F7UyVVSJPL7izPWqybXiKbLTr1\nElWzzOLfkyepS/KhDhjGhs2t9gEh8jwpVFO4t6g+dl2MIlm+eN7v93HokJgm8/PzAID1/gYAYG5O\nT82hvPAtLl2S7+aw0l+ubJsah+pSHBqHMmujZzle8hKhJPDBDx4DAOzZIy6FTz/1FFpyXo1HYjNK\nEilN5QmePi42ybt3ivSjkeiXKAqQkcqPHJOdDkkby2Mzkaq5AXfPC/tw0oR5FIahesZtNW+mWR+l\nQzK0hEqPI+pH45mL0cCKB/y6S6c5jjPHgbtqnBcO+HWXT+uRmYa+DUOzeaiZObqv1CZkSQ/MtcR1\nEAlch1u453HTrbZuTXA9qzyoo3wBNmmv4rZPWm9dtNRt4nWX9SabvuuwVlK/5IGebM4y6uqhPdXB\nSZ+C4WDvQ1Xp7XeTLslVh6w6Drx4ry8Vdj67PTmvv9BWMWzr2lp3tmzKGDFylN4ps5QJU76KsT6J\njro1zv52rrOHykfLNa9nlrjWTV23+DU1K+xLsfkt1P4h96JMSitPPnNc5RuPpTZNKKSCHJrheO+9\n9wIAvvSlhwEAH/voR3HPPfcAAN50y+2qjl6vJ8pvaY0yAGi323q/ItOTnNoMdNpSc0gykiltqzWL\n0bDYHx0ptTx69Kg6O9hrMs/1WqzHZr3G3GYvgXXz9nLRdN1tUvc0F+FJkr/LubjatEx7adxMPWYd\n036nq+Wd9QsAjjDGbmC
"text/plain": [
"<matplotlib.figure.Figure at 0x7fb4989c5da0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Display the image and draw the predicted boxes onto it.\n",
"\n",
"# Set the colors for the bounding boxes\n",
"colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()\n",
"\n",
"plt.figure(figsize=(20,12))\n",
"plt.imshow(batch_original_images[i])\n",
"\n",
"current_axis = plt.gca()\n",
"\n",
"for box in batch_original_labels[i]:\n",
" xmin = box[1]\n",
" ymin = box[2]\n",
" xmax = box[3]\n",
" ymax = box[4]\n",
" label = '{}'.format(classes[int(box[0])])\n",
" current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color='green', fill=False, linewidth=2)) \n",
" current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':'green', 'alpha':1.0})\n",
"\n",
"for box in y_pred_thresh_inv[i]:\n",
" xmin = box[2]\n",
" ymin = box[3]\n",
" xmax = box[4]\n",
" ymax = box[5]\n",
" color = colors[int(box[0])]\n",
" label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])\n",
" current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color=color, fill=False, linewidth=2)) \n",
" current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':color, 'alpha':1.0})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}