Files

980 lines
1.5 MiB
Plaintext
Raw Permalink Normal View History

2020-02-06 16:47:03 -03:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SSD512 Inference Tutorial\n",
"\n",
"This is a brief tutorial that shows how to use a trained SSD512 for inference on the Pascal VOC datasets. It is the same as the SSD300 inference tutorial but with all parameters preset for SSD512 for Pascal VOC. If you'd like more detailed explanations on how to use the model generally, please refer to [`ssd300_training.ipynb`](https://github.com/pierluigiferrari/ssd_keras/blob/master/ssd300_training.ipynb)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/dl-desktop/Desktop/Tesis/8.-Object_Detection/keras-ssd-master/data_generator/object_detection_2d_data_generator.py:43: UserWarning: 'BeautifulSoup' module is missing. The XML-parser will be unavailable.\n",
" warnings.warn(\"'BeautifulSoup' module is missing. The XML-parser will be unavailable.\")\n"
]
}
],
"source": [
"from keras import backend as K\n",
"from keras.models import load_model\n",
"from keras.preprocessing import image\n",
"from keras.optimizers import Adam\n",
"from imageio import imread\n",
"import numpy as np\n",
"from matplotlib import pyplot as plt\n",
"\n",
"from models.keras_ssd512 import ssd_512\n",
"from keras_loss_function.keras_ssd_loss import SSDLoss\n",
"from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes\n",
"from keras_layers.keras_layer_DecodeDetections import DecodeDetections\n",
"from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast\n",
"from keras_layers.keras_layer_L2Normalization import L2Normalization\n",
"\n",
"from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast\n",
"\n",
"from data_generator.object_detection_2d_data_generator import DataGenerator\n",
"from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels\n",
"from data_generator.object_detection_2d_geometric_ops import Resize\n",
"from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Set the image size.\n",
"img_height = 512\n",
"img_width = 512"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Load a trained SSD\n",
"\n",
"Either load a trained model or build a model and load trained weights into it. Since the HDF5 files I'm providing contain only the weights for the various SSD versions, not the complete models, you'll have to go with the latter option when using this implementation for the first time. You can then of course save the model and next time load the full model directly, without having to build it.\n",
"\n",
"You can find the download links to all the trained model weights in the README."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1.1. Build the model and load trained weights into it"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ok\n"
]
}
],
"source": [
"# 1: Build the Keras model\n",
"\n",
"K.clear_session() # Clear previous models from memory.\n",
"\n",
"model = ssd_512(image_size=(img_height, img_width, 3),\n",
" n_classes=20,\n",
" mode='inference',\n",
" l2_regularization=0.0005,\n",
" scales=[0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05], # The scales for MS COCO are [0.04, 0.1, 0.26, 0.42, 0.58, 0.74, 0.9, 1.06]\n",
" aspect_ratios_per_layer=[[1.0, 2.0, 0.5],\n",
" [1.0, 2.0, 0.5, 3.0, 1.0/3.0],\n",
" [1.0, 2.0, 0.5, 3.0, 1.0/3.0],\n",
" [1.0, 2.0, 0.5, 3.0, 1.0/3.0],\n",
" [1.0, 2.0, 0.5, 3.0, 1.0/3.0],\n",
" [1.0, 2.0, 0.5],\n",
" [1.0, 2.0, 0.5]],\n",
" two_boxes_for_ar1=True,\n",
" steps=[8, 16, 32, 64, 128, 256, 512],\n",
" offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n",
" clip_boxes=False,\n",
" variances=[0.1, 0.1, 0.2, 0.2],\n",
" normalize_coords=True,\n",
" subtract_mean=[123, 117, 104],\n",
" swap_channels=[2, 1, 0],\n",
" confidence_thresh=0.5,\n",
" iou_threshold=0.45,\n",
" top_k=200,\n",
" nms_max_output_size=400)\n",
"\n",
"# 2: Load the trained weights into the model.\n",
"\n",
"# TODO: Set the path of the trained weights.\n",
"weights_path = 'VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5'\n",
"\n",
"model.load_weights(weights_path, by_name=True)\n",
"\n",
"# 3: Compile the model so that Keras won't complain the next time you load it.\n",
"\n",
"adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)\n",
"\n",
"ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)\n",
"\n",
"model.compile(optimizer=adam, loss=ssd_loss.compute_loss)\n",
"print('ok')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Or"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"import keras\n",
"model.save('prueba.h5')\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1.2. Load a trained model"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"__________________________________________________________________________________________________\n",
"Layer (type) Output Shape Param # Connected to \n",
"==================================================================================================\n",
"input_1 (InputLayer) (None, 512, 512, 3) 0 \n",
"__________________________________________________________________________________________________\n",
"identity_layer (Lambda) (None, 512, 512, 3) 0 input_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"input_mean_normalization (Lambd (None, 512, 512, 3) 0 identity_layer[0][0] \n",
"__________________________________________________________________________________________________\n",
"input_channel_swap (Lambda) (None, 512, 512, 3) 0 input_mean_normalization[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv1_1 (Conv2D) (None, 512, 512, 64) 1792 input_channel_swap[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv1_2 (Conv2D) (None, 512, 512, 64) 36928 conv1_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"pool1 (MaxPooling2D) (None, 256, 256, 64) 0 conv1_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2_1 (Conv2D) (None, 256, 256, 128 73856 pool1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2_2 (Conv2D) (None, 256, 256, 128 147584 conv2_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"pool2 (MaxPooling2D) (None, 128, 128, 128 0 conv2_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv3_1 (Conv2D) (None, 128, 128, 256 295168 pool2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv3_2 (Conv2D) (None, 128, 128, 256 590080 conv3_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv3_3 (Conv2D) (None, 128, 128, 256 590080 conv3_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"pool3 (MaxPooling2D) (None, 64, 64, 256) 0 conv3_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_1 (Conv2D) (None, 64, 64, 512) 1180160 pool3[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_2 (Conv2D) (None, 64, 64, 512) 2359808 conv4_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3 (Conv2D) (None, 64, 64, 512) 2359808 conv4_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"pool4 (MaxPooling2D) (None, 32, 32, 512) 0 conv4_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv5_1 (Conv2D) (None, 32, 32, 512) 2359808 pool4[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv5_2 (Conv2D) (None, 32, 32, 512) 2359808 conv5_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv5_3 (Conv2D) (None, 32, 32, 512) 2359808 conv5_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"pool5 (MaxPooling2D) (None, 32, 32, 512) 0 conv5_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc6 (Conv2D) (None, 32, 32, 1024) 4719616 pool5[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7 (Conv2D) (None, 32, 32, 1024) 1049600 fc6[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_1 (Conv2D) (None, 32, 32, 256) 262400 fc7[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_padding (ZeroPadding2D) (None, 34, 34, 256) 0 conv6_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2 (Conv2D) (None, 16, 16, 512) 1180160 conv6_padding[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_1 (Conv2D) (None, 16, 16, 128) 65664 conv6_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_padding (ZeroPadding2D) (None, 18, 18, 128) 0 conv7_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2 (Conv2D) (None, 8, 8, 256) 295168 conv7_padding[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_1 (Conv2D) (None, 8, 8, 128) 32896 conv7_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_padding (ZeroPadding2D) (None, 10, 10, 128) 0 conv8_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2 (Conv2D) (None, 4, 4, 256) 295168 conv8_padding[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_1 (Conv2D) (None, 4, 4, 128) 32896 conv8_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_padding (ZeroPadding2D) (None, 6, 6, 128) 0 conv9_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2 (Conv2D) (None, 2, 2, 256) 295168 conv9_padding[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_1 (Conv2D) (None, 2, 2, 128) 32896 conv9_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_padding (ZeroPadding2D) (None, 4, 4, 128) 0 conv10_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm (L2Normalization) (None, 64, 64, 512) 512 conv4_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2 (Conv2D) (None, 1, 1, 256) 524544 conv10_padding[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm_mbox_conf (Conv2D) (None, 64, 64, 84) 387156 conv4_3_norm[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7_mbox_conf (Conv2D) (None, 32, 32, 126) 1161342 fc7[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2_mbox_conf (Conv2D) (None, 16, 16, 126) 580734 conv6_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2_mbox_conf (Conv2D) (None, 8, 8, 126) 290430 conv7_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2_mbox_conf (Conv2D) (None, 4, 4, 126) 290430 conv8_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2_mbox_conf (Conv2D) (None, 2, 2, 84) 193620 conv9_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2_mbox_conf (Conv2D) (None, 1, 1, 84) 193620 conv10_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm_mbox_loc (Conv2D) (None, 64, 64, 16) 73744 conv4_3_norm[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7_mbox_loc (Conv2D) (None, 32, 32, 24) 221208 fc7[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2_mbox_loc (Conv2D) (None, 16, 16, 24) 110616 conv6_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2_mbox_loc (Conv2D) (None, 8, 8, 24) 55320 conv7_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2_mbox_loc (Conv2D) (None, 4, 4, 24) 55320 conv8_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2_mbox_loc (Conv2D) (None, 2, 2, 16) 36880 conv9_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2_mbox_loc (Conv2D) (None, 1, 1, 16) 36880 conv10_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm_mbox_conf_reshape (None, 16384, 21) 0 conv4_3_norm_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7_mbox_conf_reshape (Reshape) (None, 6144, 21) 0 fc7_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2_mbox_conf_reshape (Resh (None, 1536, 21) 0 conv6_2_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2_mbox_conf_reshape (Resh (None, 384, 21) 0 conv7_2_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2_mbox_conf_reshape (Resh (None, 96, 21) 0 conv8_2_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2_mbox_conf_reshape (Resh (None, 16, 21) 0 conv9_2_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2_mbox_conf_reshape (Res (None, 4, 21) 0 conv10_2_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm_mbox_priorbox (Anc (None, 64, 64, 4, 8) 0 conv4_3_norm_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7_mbox_priorbox (AnchorBoxes) (None, 32, 32, 6, 8) 0 fc7_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2_mbox_priorbox (AnchorBo (None, 16, 16, 6, 8) 0 conv6_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2_mbox_priorbox (AnchorBo (None, 8, 8, 6, 8) 0 conv7_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2_mbox_priorbox (AnchorBo (None, 4, 4, 6, 8) 0 conv8_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2_mbox_priorbox (AnchorBo (None, 2, 2, 4, 8) 0 conv9_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2_mbox_priorbox (AnchorB (None, 1, 1, 4, 8) 0 conv10_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"mbox_conf (Concatenate) (None, 24564, 21) 0 conv4_3_norm_mbox_conf_reshape[0]\n",
" fc7_mbox_conf_reshape[0][0] \n",
" conv6_2_mbox_conf_reshape[0][0] \n",
" conv7_2_mbox_conf_reshape[0][0] \n",
" conv8_2_mbox_conf_reshape[0][0] \n",
" conv9_2_mbox_conf_reshape[0][0] \n",
" conv10_2_mbox_conf_reshape[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm_mbox_loc_reshape ( (None, 16384, 4) 0 conv4_3_norm_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7_mbox_loc_reshape (Reshape) (None, 6144, 4) 0 fc7_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2_mbox_loc_reshape (Resha (None, 1536, 4) 0 conv6_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2_mbox_loc_reshape (Resha (None, 384, 4) 0 conv7_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2_mbox_loc_reshape (Resha (None, 96, 4) 0 conv8_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2_mbox_loc_reshape (Resha (None, 16, 4) 0 conv9_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2_mbox_loc_reshape (Resh (None, 4, 4) 0 conv10_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm_mbox_priorbox_resh (None, 16384, 8) 0 conv4_3_norm_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7_mbox_priorbox_reshape (Resh (None, 6144, 8) 0 fc7_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2_mbox_priorbox_reshape ( (None, 1536, 8) 0 conv6_2_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2_mbox_priorbox_reshape ( (None, 384, 8) 0 conv7_2_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2_mbox_priorbox_reshape ( (None, 96, 8) 0 conv8_2_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2_mbox_priorbox_reshape ( (None, 16, 8) 0 conv9_2_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2_mbox_priorbox_reshape (None, 4, 8) 0 conv10_2_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"mbox_conf_softmax (Activation) (None, 24564, 21) 0 mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"mbox_loc (Concatenate) (None, 24564, 4) 0 conv4_3_norm_mbox_loc_reshape[0][\n",
" fc7_mbox_loc_reshape[0][0] \n",
" conv6_2_mbox_loc_reshape[0][0] \n",
" conv7_2_mbox_loc_reshape[0][0] \n",
" conv8_2_mbox_loc_reshape[0][0] \n",
" conv9_2_mbox_loc_reshape[0][0] \n",
" conv10_2_mbox_loc_reshape[0][0] \n",
"__________________________________________________________________________________________________\n",
"mbox_priorbox (Concatenate) (None, 24564, 8) 0 conv4_3_norm_mbox_priorbox_reshap\n",
" fc7_mbox_priorbox_reshape[0][0] \n",
" conv6_2_mbox_priorbox_reshape[0][\n",
" conv7_2_mbox_priorbox_reshape[0][\n",
" conv8_2_mbox_priorbox_reshape[0][\n",
" conv9_2_mbox_priorbox_reshape[0][\n",
" conv10_2_mbox_priorbox_reshape[0]\n",
"__________________________________________________________________________________________________\n",
"predictions (Concatenate) (None, 24564, 33) 0 mbox_conf_softmax[0][0] \n",
" mbox_loc[0][0] \n",
" mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"decoded_predictions (DecodeDete (None, <tf.Tensor 't 0 predictions[0][0] \n",
"==================================================================================================\n",
"Total params: 27,188,676\n",
"Trainable params: 27,188,676\n",
"Non-trainable params: 0\n",
"__________________________________________________________________________________________________\n",
"__________________________________________________________________________________________________\n",
"Layer (type) Output Shape Param # Connected to \n",
"==================================================================================================\n",
"input_1 (InputLayer) (None, 512, 512, 3) 0 \n",
"__________________________________________________________________________________________________\n",
"identity_layer (Lambda) (None, 512, 512, 3) 0 input_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"input_mean_normalization (Lambd (None, 512, 512, 3) 0 identity_layer[0][0] \n",
"__________________________________________________________________________________________________\n",
"input_channel_swap (Lambda) (None, 512, 512, 3) 0 input_mean_normalization[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv1_1 (Conv2D) (None, 512, 512, 64) 1792 input_channel_swap[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv1_2 (Conv2D) (None, 512, 512, 64) 36928 conv1_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"pool1 (MaxPooling2D) (None, 256, 256, 64) 0 conv1_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2_1 (Conv2D) (None, 256, 256, 128 73856 pool1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2_2 (Conv2D) (None, 256, 256, 128 147584 conv2_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"pool2 (MaxPooling2D) (None, 128, 128, 128 0 conv2_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv3_1 (Conv2D) (None, 128, 128, 256 295168 pool2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv3_2 (Conv2D) (None, 128, 128, 256 590080 conv3_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv3_3 (Conv2D) (None, 128, 128, 256 590080 conv3_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"pool3 (MaxPooling2D) (None, 64, 64, 256) 0 conv3_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_1 (Conv2D) (None, 64, 64, 512) 1180160 pool3[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_2 (Conv2D) (None, 64, 64, 512) 2359808 conv4_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3 (Conv2D) (None, 64, 64, 512) 2359808 conv4_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"pool4 (MaxPooling2D) (None, 32, 32, 512) 0 conv4_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv5_1 (Conv2D) (None, 32, 32, 512) 2359808 pool4[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv5_2 (Conv2D) (None, 32, 32, 512) 2359808 conv5_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv5_3 (Conv2D) (None, 32, 32, 512) 2359808 conv5_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"pool5 (MaxPooling2D) (None, 32, 32, 512) 0 conv5_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc6 (Conv2D) (None, 32, 32, 1024) 4719616 pool5[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7 (Conv2D) (None, 32, 32, 1024) 1049600 fc6[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_1 (Conv2D) (None, 32, 32, 256) 262400 fc7[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_padding (ZeroPadding2D) (None, 34, 34, 256) 0 conv6_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2 (Conv2D) (None, 16, 16, 512) 1180160 conv6_padding[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_1 (Conv2D) (None, 16, 16, 128) 65664 conv6_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_padding (ZeroPadding2D) (None, 18, 18, 128) 0 conv7_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2 (Conv2D) (None, 8, 8, 256) 295168 conv7_padding[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_1 (Conv2D) (None, 8, 8, 128) 32896 conv7_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_padding (ZeroPadding2D) (None, 10, 10, 128) 0 conv8_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2 (Conv2D) (None, 4, 4, 256) 295168 conv8_padding[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_1 (Conv2D) (None, 4, 4, 128) 32896 conv8_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_padding (ZeroPadding2D) (None, 6, 6, 128) 0 conv9_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2 (Conv2D) (None, 2, 2, 256) 295168 conv9_padding[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_1 (Conv2D) (None, 2, 2, 128) 32896 conv9_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_padding (ZeroPadding2D) (None, 4, 4, 128) 0 conv10_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm (L2Normalization) (None, 64, 64, 512) 512 conv4_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2 (Conv2D) (None, 1, 1, 256) 524544 conv10_padding[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm_mbox_conf (Conv2D) (None, 64, 64, 84) 387156 conv4_3_norm[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7_mbox_conf (Conv2D) (None, 32, 32, 126) 1161342 fc7[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2_mbox_conf (Conv2D) (None, 16, 16, 126) 580734 conv6_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2_mbox_conf (Conv2D) (None, 8, 8, 126) 290430 conv7_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2_mbox_conf (Conv2D) (None, 4, 4, 126) 290430 conv8_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2_mbox_conf (Conv2D) (None, 2, 2, 84) 193620 conv9_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2_mbox_conf (Conv2D) (None, 1, 1, 84) 193620 conv10_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm_mbox_loc (Conv2D) (None, 64, 64, 16) 73744 conv4_3_norm[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7_mbox_loc (Conv2D) (None, 32, 32, 24) 221208 fc7[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2_mbox_loc (Conv2D) (None, 16, 16, 24) 110616 conv6_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2_mbox_loc (Conv2D) (None, 8, 8, 24) 55320 conv7_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2_mbox_loc (Conv2D) (None, 4, 4, 24) 55320 conv8_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2_mbox_loc (Conv2D) (None, 2, 2, 16) 36880 conv9_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2_mbox_loc (Conv2D) (None, 1, 1, 16) 36880 conv10_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm_mbox_conf_reshape (None, 16384, 21) 0 conv4_3_norm_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7_mbox_conf_reshape (Reshape) (None, 6144, 21) 0 fc7_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2_mbox_conf_reshape (Resh (None, 1536, 21) 0 conv6_2_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2_mbox_conf_reshape (Resh (None, 384, 21) 0 conv7_2_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2_mbox_conf_reshape (Resh (None, 96, 21) 0 conv8_2_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2_mbox_conf_reshape (Resh (None, 16, 21) 0 conv9_2_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2_mbox_conf_reshape (Res (None, 4, 21) 0 conv10_2_mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm_mbox_priorbox (Anc (None, 64, 64, 4, 8) 0 conv4_3_norm_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7_mbox_priorbox (AnchorBoxes) (None, 32, 32, 6, 8) 0 fc7_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2_mbox_priorbox (AnchorBo (None, 16, 16, 6, 8) 0 conv6_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2_mbox_priorbox (AnchorBo (None, 8, 8, 6, 8) 0 conv7_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2_mbox_priorbox (AnchorBo (None, 4, 4, 6, 8) 0 conv8_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2_mbox_priorbox (AnchorBo (None, 2, 2, 4, 8) 0 conv9_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2_mbox_priorbox (AnchorB (None, 1, 1, 4, 8) 0 conv10_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"mbox_conf (Concatenate) (None, 24564, 21) 0 conv4_3_norm_mbox_conf_reshape[0]\n",
" fc7_mbox_conf_reshape[0][0] \n",
" conv6_2_mbox_conf_reshape[0][0] \n",
" conv7_2_mbox_conf_reshape[0][0] \n",
" conv8_2_mbox_conf_reshape[0][0] \n",
" conv9_2_mbox_conf_reshape[0][0] \n",
" conv10_2_mbox_conf_reshape[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm_mbox_loc_reshape ( (None, 16384, 4) 0 conv4_3_norm_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7_mbox_loc_reshape (Reshape) (None, 6144, 4) 0 fc7_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2_mbox_loc_reshape (Resha (None, 1536, 4) 0 conv6_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2_mbox_loc_reshape (Resha (None, 384, 4) 0 conv7_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2_mbox_loc_reshape (Resha (None, 96, 4) 0 conv8_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2_mbox_loc_reshape (Resha (None, 16, 4) 0 conv9_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2_mbox_loc_reshape (Resh (None, 4, 4) 0 conv10_2_mbox_loc[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv4_3_norm_mbox_priorbox_resh (None, 16384, 8) 0 conv4_3_norm_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"fc7_mbox_priorbox_reshape (Resh (None, 6144, 8) 0 fc7_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv6_2_mbox_priorbox_reshape ( (None, 1536, 8) 0 conv6_2_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv7_2_mbox_priorbox_reshape ( (None, 384, 8) 0 conv7_2_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv8_2_mbox_priorbox_reshape ( (None, 96, 8) 0 conv8_2_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv9_2_mbox_priorbox_reshape ( (None, 16, 8) 0 conv9_2_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv10_2_mbox_priorbox_reshape (None, 4, 8) 0 conv10_2_mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"mbox_conf_softmax (Activation) (None, 24564, 21) 0 mbox_conf[0][0] \n",
"__________________________________________________________________________________________________\n",
"mbox_loc (Concatenate) (None, 24564, 4) 0 conv4_3_norm_mbox_loc_reshape[0][\n",
" fc7_mbox_loc_reshape[0][0] \n",
" conv6_2_mbox_loc_reshape[0][0] \n",
" conv7_2_mbox_loc_reshape[0][0] \n",
" conv8_2_mbox_loc_reshape[0][0] \n",
" conv9_2_mbox_loc_reshape[0][0] \n",
" conv10_2_mbox_loc_reshape[0][0] \n",
"__________________________________________________________________________________________________\n",
"mbox_priorbox (Concatenate) (None, 24564, 8) 0 conv4_3_norm_mbox_priorbox_reshap\n",
" fc7_mbox_priorbox_reshape[0][0] \n",
" conv6_2_mbox_priorbox_reshape[0][\n",
" conv7_2_mbox_priorbox_reshape[0][\n",
" conv8_2_mbox_priorbox_reshape[0][\n",
" conv9_2_mbox_priorbox_reshape[0][\n",
" conv10_2_mbox_priorbox_reshape[0]\n",
"__________________________________________________________________________________________________\n",
"predictions (Concatenate) (None, 24564, 33) 0 mbox_conf_softmax[0][0] \n",
" mbox_loc[0][0] \n",
" mbox_priorbox[0][0] \n",
"__________________________________________________________________________________________________\n",
"decoded_predictions (DecodeDete (None, <tf.Tensor 't 0 predictions[0][0] \n",
"==================================================================================================\n",
"Total params: 27,188,676\n",
"Trainable params: 27,188,676\n",
"Non-trainable params: 0\n",
"__________________________________________________________________________________________________\n"
]
}
],
"source": [
"# TODO: Set the path to the `.h5` file of the model to be loaded.\n",
"model_path = 'prueba.h5'\n",
"\n",
"# We need to create an SSDLoss object in order to pass that to the model loader.\n",
"ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)\n",
"\n",
"#K.clear_session() # Clear previous models from memory.\n",
"\n",
"model1 = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,\n",
" 'L2Normalization': L2Normalization,\n",
" 'DecodeDetections': DecodeDetections,\n",
" 'compute_loss': ssd_loss.compute_loss})\n",
"model.summary()\n",
"model1.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Load some images\n",
"\n",
"Load some images for which you'd like the model to make predictions."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"orig_images = [] # Store the images here.\n",
"input_images = [] # Store resized versions of the images here.\n",
"\n",
"# We'll only load one image in this example.\n",
"img_path = 'Prueba_2.jpg'\n",
"\n",
"orig_images.append(imread(img_path))\n",
"img = image.load_img(img_path, target_size=(img_height, img_width))\n",
"img = image.img_to_array(img)\n",
"input_images.append(img)\n",
"input_images = np.array(input_images)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Make predictions"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"y_pred = model.predict(input_images)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`y_pred` contains a fixed number of predictions per batch item (200 if you use the original model configuration), many of which are low-confidence predictions or dummy entries. We therefore need to apply a confidence threshold to filter out the bad predictions. Set this confidence threshold value how you see fit."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predicted boxes:\n",
"\n",
" class conf xmin ymin xmax ymax\n",
"[[ 15. 1. 286.69 101.61 456.36 486.13]\n",
" [ 15. 0.99 238.73 109.55 352.84 480.42]\n",
" [ 15. 0.98 77.15 113.1 238.19 337.83]\n",
" [ 15. 0.91 422.4 111.09 509.53 483.1 ]\n",
" [ 15. 0.78 16.28 203.04 52.34 318.41]\n",
" [ 7. 0.66 55.09 214.51 137.62 285.31]]\n"
]
}
],
"source": [
"confidence_threshold = 0.5\n",
"\n",
"y_pred_thresh = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])]\n",
"\n",
"np.set_printoptions(precision=2, suppress=True, linewidth=90)\n",
"print(\"Predicted boxes:\\n\")\n",
"print(' class conf xmin ymin xmax ymax')\n",
"print(y_pred_thresh[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. Visualize the predictions\n",
"\n",
"We just resized the input image above and made predictions on the distorted image. We'd like to visualize the predictions on the image in its original size though, so below we'll transform the coordinates of the predicted boxes accordingly."
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArgAAAKvCAYAAACBE7wMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzsvcmSHMmWpvfpYINPMQBIIBOZt+rerlvVLcKe2BSST0AR7rgl+QC14gNQuOYzkCK94JoPQGkhl1w3RVrYZLOGruqqujcrbwLIBBARPtigAxdq5mHhYeZuFu4eCOD6EQEs3Ez16FE1M7Vffz2qR3jvOclJTnKSk5zkJCc5yUm+FJGf2oCTnOQkJznJSU5ykpOc5JByArgnOclJTnKSk5zkJCf5ouQEcE9ykpOc5CQnOclJTvJFyQngnuQkJznJSU5ykpOc5IuSE8A9yUlOcpKTnOQkJznJFyUngHuSk5zkJCc5yUlOcpIvSo4CcIUQ/6UQ4i+EEH8lhPjvj1HGSU5ykpOc5CQnOclJTtIm4tD74AohFPCXwH8BfA/8a+C/8d7/fwct6CQnOclJTnKSk5zkJCdpkWMwuP8Z8Ffe+//gvS+A/xX4r45QzklOcpKTnOQkJznJSU5yT/QRdH4L/Lbx+3vgP9+W4fx85l99/RUA4qCm7KdNDMg+jAi/n7irrFpvuD68PkPq0JJ7yyVfpdj3jm3m9+tTe+kWdw4PtOVBp4fp7pt7UPauxG0PaXvavuX5LaUdQ0SLYceLxritZptl7tsKTzSi5PoGi9sTXaY+5oPwiNL1eLW9I11pfVuf33nmftr9H/F2Ba1nD/IoHrIO/t6fhzHRV7pavkFAuB/9S7pfv/797RBp7++G9PmPKYfvFH788R1XV9e9FB8D4PYSIcSfAn8K8PLlc/6n//l/rM/vyte7DLUfuhtUVttD5waUpcT2B7FpS5dd9fnmddnB0ddparu99wgh7unobAPh+qf13RMF9/IK16nz7rnu9rpvV/+03vesEyBrkN8jbdu1Ie3VmVber5vsuOlD3oa+z76nrb1F6/twiPLb6jYE4A7p7uUWML153PaMP1hE/x6kq732Bf/eBb11uzvXbZNoeRbrfmXTxs8pRLy1tvV8W5sb2wEk296HAU2wrd23yb3ndEOsv6+389649vvbJkJ6vPd3rnfWoeXd6XzPdlxrWNBeVFteodZ/t+na1YbbbOsr9XenS29Thj0KD3tuDieH7xf/9E//h95pjwFw/x74ReP3d9W5O+K9/5fAvwT4k3/4Dz6f3u4TyxDQvQsINl+gIXqHShfr1gqMOvL1BdNDOpbhbXmvtN7Add/27R5o7KV252BplxwCqByjvY4lNXhvO+4jrQDoAG1wCMZ78z3c9x0b+u4eo22HyP79REfa6tjHvqFt0LfOQ56PYTbcvhO1vu773l5W23t2bHlIOY9l20mGyzEA7r8G/lgI8SsCsP2vgf/2COV8ttIFlralfQiDuw3gdoHb/gxuf7Z3m6tBkzXeVv79Y6fKvdLWn50+dRNst32XdKcbAAoGcJKPCSTb26t/2n3LOoTsGpg1f+/vqtNWfv+0xwKHbDzT28D8rqKGvh+b+Q4t+4DArvxdsydt7GVTa80eeu87dfSVh4DsXvmHDMKGDPwGvGd9pdutpEXHAQZQn5oQOAHsdjk4wPXeGyHEfwf8H4AC/hfv/b87dDm/r3Kozr7tpdz1+6Fp26Zab5P7O2DZ+4d/CLvt6vfxDyPx7utDyh16/aHyyfUeqWN9ygwutLBSX/D3ZR8G9yRBugZ8bdPbffM/tgwFVvuwr5t5mr/3eQZb9T7Ahs10cJ80Or0nn16O4oPrvf9XwL96YN47v/swaX18Yzb1dclmOW0+O/szdP3LfwjIah5di99Un3JqNmHbaL7vCLvZls1Oqq1sKcW96yFPrf9uh9mLad1h1906yN73QIp2pmVXp7/7nj4eozmEnWpL2+UTO+Q+dLmr7GPX0LR983fr2o9VaWXtOnxwW9nAI93zzfP189vetv3KPxrbvEV3X9mfHevyeWzz7bzb19XPwDY/0HultTwLUt7vw7r8X4cwuPXZLl/VbfYOBXxtz1lbXWrGe/M73Yec2WVPX7/nrtnQh/pNt+m6W95eah8kzXuulOpMs48M7Zf6yCdbZHaSL0/6fKCao/pmntu8w/0ZD8luDGKme+oYquuTs7KfqKynwFJ9DnJqp255zOnbpzBVfIxnYRup0UxzKMZ5nzrsm3cff/RahgzYPmdWt+/A4CnJCeA+IenLYh2i89i3rPp0P0ayHlU382zqaabdZGvdnd/heJfx2s7gtlvVZ5S/Le2QvnUQM9+x6KJd8YC0A2QIA7uPzqFp9n72jwYOj9A2R2Kmh4in/7P7FHD3MdrmWDMEh2iwff2I285uA2x9F1a27mtwIEayq2/qbdu9dP0Xjg65521MZ9eOHENmgIbNQvVO2pH/dnel+t+QXUU+tZwA7hORQQDoSGXvm69bz/1po676tvngNo990u0jh9DZd1AxNP++8lSfqU/xzH/O8inu4+dyjz45g0sXgGmzoX2qd18bjqZzwK4G+w5Eh7kHtYHW7Xk369AGbvuC2WO63JxkPzkB3CcoR2exGtfaQGef/E029iE2bZZ7+/v+B7XPgrhd5Q053z/tcbYkG5T/SH3oYzK4Q/z19v3ADIE6w+r7+TO4Q0HFMWzYV4b6efbJP4TBHVLWIe7vp/Q5/lQM7qa+PsB0m446+WbefZ+PbTa3XOlV1tDy9h3bNcv6nFwTajkB3Cci3Yzm47AP+zCVu0Bu24KwLja3Wd1tTG6zU9ptw8P8rHbV6yHAebPurWmOtMjsU7NbfdIfGyQN0v9EO/RHZfgHMbifvr2GffwPDw673t1jAO8haTv75taomrsH0/sOhnal3bbQriv9XTJkd/mb+jaJlH0Z3M8REHbJYywyO4Y8CYAr6A+4RNtDd0BbunZLaHbwfddGCr/dV6UL8DXtaBPnTAfoq/PU+u7uhNCe9va8a23I9t0KfCPAYX30lW+O3GBhZRXh6Dby1yZ54deH5rOw1r/5LHiP2PDTbfrrNuu1yQBva/Ngglr/dVtc4z42VrZ7F7W2TTt94VrLa5O2iFCHGPm3Pbf7giU1YM/OPh/poauzh6Zti07W1QZuwGp22qamu3DCwEHY5grx/jq7FA6IdDcgWl/97rWlHbLKfR9peXXouhGmJ7gb9OG27R//1jsuur4PLWm7QFTrLgqi85lp/u7S2faOANiWb0bzW3L32PLuDdkjWNbfLm4Hmu0fqHv2NKX73Qm6vfdQ9+91mvoZbbO1JeJYVxu01XeTuLo9tvXO232h2+u1Ya+o3glhQl4PWmvyvAQviVWK9x6rw31wziGkR4hgv7Ue7xRaRxSZRUqNI0MIUV23pGlKnudsrouB7miyTfvrv2MkvrLZC7Drz6lHeFA+1KV7Y7f78iQA7pcqu5i8XWn65t3GOA5Jq7YFZKhBKzUYbgGMm2m3dDq7WMYuML6Zbtv5PnXuK+s8zbwtfsA78zd+PzZ7uu/0/LHZ1U9V1mPb0Fdvl3/gU5au9+Ep1mHf+7u/q8j2j38vDVv6wS6/1IcOMoaU9Zh3eN9ByWbbbILQvjr6+AEPKesQ/c/6u9wgjYwxaK3x7tZW4R1KCJRSgdDxAuEE0nviOCHLckxu+Lf/9v/i13/yC168eIEtS6IoYnFzQ5qmW1n3B9nuA9AdEtJ6U04A94nLULZnM28be9m/nAeC5Q3g99AXdYjd2+wc5svU91zNtG4yydul2R6PDeKeKmh9CmD2KcqQwd0hgFWbdM1+tKdtKWngAHVveUTw3AqsOvYubpUOU1vvb5eKjrbdBrYeY5AxBHR2pe0LGvu2QZ86N9O134fueh26rIPeIy+pZx8DuywQUuJN2JHI2fA9C3sK37a/EAIhPdYarCv58ccf+YNfvlrbrpTCWotSCmPMfia2nBN+P8enE8A9ohyCwd0GwraxJduOQ+zo1MV9oFZrkBvn2zbi7pKhbOu2utZ/11Ngh2Z
"text/plain": [
"<Figure size 1440x864 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Display the image and draw the predicted boxes onto it.\n",
"\n",
"# Set the colors for the bounding boxes\n",
"colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()\n",
"classes = ['background',\n",
" 'aeroplane', 'bicycle', 'bird', 'boat',\n",
" 'bottle', 'bus', 'car', 'cat',\n",
" 'chair', 'cow', 'diningtable', 'dog',\n",
" 'horse', 'motorbike', 'person', 'pottedplant',\n",
" 'sheep', 'sofa', 'train', 'tvmonitor']\n",
"\n",
"plt.figure(figsize=(20,12))\n",
"plt.imshow(orig_images[0])\n",
"\n",
"current_axis = plt.gca()\n",
"\n",
"for box in y_pred_thresh[0]:\n",
" # Transform the predicted bounding boxes for the 512x512 image to the original image dimensions.\n",
" xmin = box[-4] * orig_images[0].shape[1] / img_width\n",
" ymin = box[-3] * orig_images[0].shape[0] / img_height\n",
" xmax = box[-2] * orig_images[0].shape[1] / img_width\n",
" ymax = box[-1] * orig_images[0].shape[0] / img_height\n",
" color = colors[int(box[0])]\n",
" label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])\n",
" current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color=color, fill=False, linewidth=2)) \n",
" current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':color, 'alpha':1.0})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 5. Make predictions on Pascal VOC 2007 Test\n",
"\n",
"Let's use a `DataGenerator` to make predictions on the Pascal VOC 2007 test dataset and visualize the predicted boxes alongside the ground truth boxes for comparison. Everything here is preset already, but if you'd like to learn more about the data generator and its capabilities, take a look at the detailed tutorial in [this](https://github.com/pierluigiferrari/data_generator_object_detection_2d) repository."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"test.txt: 100%|██████████| 4952/4952 [00:14<00:00, 344.23it/s]\n"
]
}
],
"source": [
"# Create a `BatchGenerator` instance and parse the Pascal VOC labels.\n",
"\n",
"dataset = DataGenerator()\n",
"\n",
"# TODO: Set the paths to the datasets here.\n",
"\n",
"VOC_2007_images_dir = '../../datasets/VOCdevkit/VOC2007/JPEGImages/'\n",
"VOC_2007_annotations_dir = '../../datasets/VOCdevkit/VOC2007/Annotations/'\n",
"VOC_2007_test_image_set_filename = '../../datasets/VOCdevkit/VOC2007/ImageSets/Main/test.txt'\n",
"\n",
"# The XML parser needs to now what object class names to look for and in which order to map them to integers.\n",
"classes = ['background',\n",
" 'aeroplane', 'bicycle', 'bird', 'boat',\n",
" 'bottle', 'bus', 'car', 'cat',\n",
" 'chair', 'cow', 'diningtable', 'dog',\n",
" 'horse', 'motorbike', 'person', 'pottedplant',\n",
" 'sheep', 'sofa', 'train', 'tvmonitor']\n",
"\n",
"dataset.parse_xml(images_dirs=[VOC_2007_images_dir],\n",
" image_set_filenames=[VOC_2007_test_image_set_filename],\n",
" annotations_dirs=[VOC_2007_annotations_dir],\n",
" classes=classes,\n",
" include_classes='all',\n",
" exclude_truncated=False,\n",
" exclude_difficult=True,\n",
" ret=False)\n",
"\n",
"convert_to_3_channels = ConvertTo3Channels()\n",
"resize = Resize(height=img_height, width=img_width)\n",
"\n",
"generator = dataset.generate(batch_size=1,\n",
" shuffle=True,\n",
" transformations=[convert_to_3_channels,\n",
" resize],\n",
" returns={'processed_images',\n",
" 'filenames',\n",
" 'inverse_transform',\n",
" 'original_images',\n",
" 'original_labels'},\n",
" keep_images_without_gt=False)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Image: ../../datasets/VOCdevkit/VOC2007/JPEGImages/002168.jpg\n",
"\n",
"Ground truth boxes:\n",
"\n",
"[[ 15 114 174 164 307]\n",
" [ 15 231 174 280 302]\n",
" [ 15 298 179 342 301]\n",
" [ 15 367 179 403 294]\n",
" [ 15 461 177 500 307]\n",
" [ 15 168 188 193 252]\n",
" [ 15 326 181 353 274]\n",
" [ 15 262 185 290 273]\n",
" [ 2 430 230 500 310]\n",
" [ 2 358 227 429 299]\n",
" [ 2 295 233 351 305]\n",
" [ 2 153 223 185 281]\n",
" [ 2 121 230 155 321]]\n"
]
}
],
"source": [
"# Generate a batch and make predictions.\n",
"\n",
"batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next(generator)\n",
"\n",
"i = 0 # Which batch item to look at\n",
"\n",
"print(\"Image:\", batch_filenames[i])\n",
"print()\n",
"print(\"Ground truth boxes:\\n\")\n",
"print(np.array(batch_original_labels[i]))"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Predict.\n",
"\n",
"y_pred = model.predict(batch_images)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predicted boxes:\n",
"\n",
" class conf xmin ymin xmax ymax\n",
"[[ 2. 0.99 369.02 230.42 424.52 297.93]\n",
" [ 15. 0.99 300.39 182.55 341.59 282.61]\n",
" [ 2. 0.99 108.17 230.5 161.82 326.89]\n",
" [ 15. 0.98 111.66 167.27 160.03 303.8 ]\n",
" [ 2. 0.98 221.35 232.19 282.26 308.72]\n",
" [ 15. 0.98 453.38 190.71 496.67 309.35]\n",
" [ 15. 0.97 227.5 175.29 275.51 286.6 ]\n",
" [ 15. 0.97 366.8 180.56 409.09 285.06]\n",
" [ 2. 0.96 428.15 233.78 501.21 312.65]\n",
" [ 15. 0.93 317.28 183.11 354.99 285.52]\n",
" [ 2. 0.91 297.79 229.87 351.55 303.34]\n",
" [ 2. 0.79 146.91 221.45 190.54 287.08]]\n"
]
}
],
"source": [
"confidence_threshold = 0.5\n",
"\n",
"# Perform confidence thresholding.\n",
"y_pred_thresh = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])]\n",
"\n",
"# Convert the predictions for the original image.\n",
"y_pred_thresh_inv = apply_inverse_transforms(y_pred_thresh, batch_inverse_transforms)\n",
"\n",
"np.set_printoptions(precision=2, suppress=True, linewidth=90)\n",
"print(\"Predicted boxes:\\n\")\n",
"print(' class conf xmin ymin xmax ymax')\n",
"print(y_pred_thresh_inv[i])"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA5gAAAKvCAYAAAAPwGAcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsvc+vbUuSHhSZa+1z7r3vZ9V7RfUPu+2uxt2y2hKiQbZk\nqSUmSJ4hJghGDJA8MVOEh/4zUA+MhGSJASMGjRjiCTLGkkFgY+i2utvdVrld5ap6r96795y9ViaD\nlREZGfFl7nXuO08+gvyudPfZe+XKjPydGV9kZMg508TExMTExMTExMTExMTEN0X81y3AxMTExMTE\nxMTExMTExP83MDeYExMTExMTExMTExMTE8+CucGcmJiYmJiYmJiYmJiYeBbMDebExMTExMTExMTE\nxMTEs2BuMCcmJiYmJiYmJiYmJiaeBXODOTExMTExMTExMTExMfEsmBvMiYmJiYmJiYmJiYmJiWfB\nt7bBDCH8tRDCPwkh/F4I4W9+W+lMTExMTExMTExMTExMvAyEnPPzRxrCQkT/NxH9+0T0x0T094no\nP8k5/6NnT2xiYmJiYmJiYmJiYmLiRWD9luL9y0T0eznnf0pEFEL4b4noPyAiuMG8vPkk3338C81v\nIQQiIkIb4PKIiPiPTJTdw4rcxrnF4zOkTGshcfO+t1FGolReDDGKLCHLY5eEiOCle3aEEGrZ2IRg\nGWT12L1A4I36KoczGRy9lZs6MbJTgHFyfHlYYjbNrJ60z2Dbaf5oywGnGmCqFlbmRhbXEHTBlLSl\njZaWpWVHWe4Xg6sfWJ4l/tyIeSuX+vWs2tH7geunaY+2DY/KofnR5rG2sVP50nVj85WbRnoirtqe\nUF87goDya97riNDEeQa1n3EUMbIMPty5dFBPUWFtv2vKYDzS6NcDiLIRyY139X0rux4LeuV+PIQi\ntXXJTTNl904Af7nxRRVDAHmwea39mHzZnkBu4le/uvHcSkydMG3fbIYe295V/qS/2zRUt8ojmZq2\nSeRDlEA9Ebzo7XxgwmvY5MaK+W8yLuJJU4mH33HDX3/+0XOv6yeUb0jfH29tAer61aI+KUr03qm5\nzzy6UV1nwqMpqYdmpDvzgqqHOi8qASXebH5RawiduIu2XU+H4H8zEbu4eiK/P0c1WsiA8fNm/8fr\nVP0Twqi9JzNmoX2J/AYTQw33vQvsvZCdYKjBe7z70e//KOf8vW+S9re1wfxlIvpn6vsfE9Ff0QFC\nCH+diP46EdHdx9+nv/Sf/k6z6OLPlDYiOio0lo0eP4tq45f3VH5bm2cpJQplIbAsFyIi+tHr4/PV\n250+p9dERPTwxRfHe3elMb8O9HW+EhHR+uYVERE9Pl7pVT7ivUslH6X2riHTtewNVg6zH5/7miml\nJLLqPIQQZJFhB/t931Wej994Q7CuK12vV1cOR5harfxs2zZ5vixLeZok3ZGttMRrZI8xEiXcQhNl\nsIGo+URx6jrrIrZNltsHEdFqy2HX8fhyJ1lo7817KSiZ81FWXD4hBJeflJJbcOgyqgv6I8xelBm6\nHPi3+/V1853jaPIF+omOk8PH0OYPyZAXeeT6lw5vsW27tCN+T7/TK48lBKlfloHj0e+t61rS2Vwc\nKF6bXoyRtpTkbyKiCCZU7ntS9ymp/kHyG6cldRHaZ8fz9reUEl0uFxcH553zKIqvklckw55rOvxM\n2pjKly0H3Wb47/v7e5HB9kOdH5HBtF9UflpezgfLxWW25+TaLUOPFyzDEmq7SqYutRy2f8DyU/3J\nlp9+1tuYJsryHn++e/fO5R3lz5Zb039L+ek82H7B7SRteyNrLz1GIpXuxbdpnhfPyKzbWp2bU/MZ\nQmj6sv6MMTZjFFEt97guLk4kk26bXF5S9ludX7lvojzYvqbbAuoD9j15P12phyOMH7Ot7BZ6HnFr\nHQpuDcFA/V7SiMH1D857zlnaFmPf96ECy463eu3ixhIz5yLZUZz2d/vsjPLIxqnrFK1jbPhm7RDb\ncUzXg1uzke/j8erXMzavegy57luT3rIsdc7Mvn/InM7lnaoMdo5+fHwkomNMsX2ht7axMstcXuLW\n7RbVTTBVjsZBlIYdE2Tsi0HmhjNjFsW6/t7J95PFrgtU3TyWNbYd+/W6hPtQk6/yDM07dg2mnyVq\nx8gGbq3t50fUl6QPgLLN7XTSpPt//M5/+IdeiKfh29pg3kTO+XeI6HeIiD74hd/I5Tc3QNiFAofT\nCCFQlImibXjHJ1f8UZqv0gdERPQ6fEUf3P2ciIj+rd/6FSIi+oM/+WMiIvrZY6IcjoXYV18dC4nL\n/YVimb0id2LZpBHFWGRI3Fi4QT26TTHsGKntIHoQ2IvsurP1JpUcYrNYdc/lvbrhlMmePHqL1n3f\n3eChtfy2k+iJtbegsH8zZDEji5T+IFXTQVowNMFxGytlENVAm9syC+QHsmYy4cV0kfOoh85ma1lc\ne2elwbquLi69yEbhLXiwSjm5xb4MML57DSdQhu6XaOFj+29dUNcwtm+jSQUt7kbsgQ5r49f9q7cw\n15tIGycRyWKDQF51f7oln6576XslXd3v5b1cF/N2AcxzRCTfhxolXJGZFxlaVrQx4AUi97m7uzsn\nQ7aTv/5bysFvcm0Zx6V+l2epv2G2f+s862duIwLi0mVly1YUHWl39cQb9aFCjHyfCyFIXNx/KdVN\npe0zMvZRprD4+aOXLx52QgiyuNDlYRVrDDQuiUJq98q04ZwG+okoVlRerPJz9L7ehPPnqjd08XbZ\nyGahFFIMQZhiKwOSBynFWmWO36TyZ2/u0+0PLbztOCuKM7Bh1ZYptn32Nricrq3PVolm5ALjtKRz\nQiF4azM7Gut7Mun4kexIYesQa32hjaVNB80jNq7mudnUZLVeitmPF9xX4mCoqf2rtk27OUMbOK3o\nRJsfG57jtASHBpp3eK2I5vYRbDvPwSuGdf7duLlnkmUpmPfdxlTFxwpihN5mPOdMi5rL+dlIOVg3\nz35Os3KdgV4jMvRG045jNsxz4dvaYP4JEf1Z9f3PlN+GQItJAosmNMHVBsc9sE7S0UzYHxTWctl+\nRHfLT4mI6G/8Z3+ViIj+h//pO0RE9Lf/zu/SZ7/0AyKqjeTd/igLS7e4iZlW6Welw8ZjQbYsfjNo\n800kexkZfJIyV+GNM7NhxwIBL9YIdFxUtkktuO2gG827PaxlU51NEKRVZSTwdyA1IfEiHG06ZYD0\ngxqSs9fJcs7NAuz4LHkRicD71G6SOIxdqLcb5lYmXW+ujFhbutaFppRtqhOC1lDrOJsNEudrVwJw\nW0tV62n7VZKCqe3NlsN+rZpuy5jqsLywlU2KGvg4f5rZ4GeaYei189ECMITQLH6IajkGxcJIGbFF\nghrsXd3fmNgkfl7Ex9BVwCCtuW47olSgNu96ETpie6xCQaczZOzqgOMYX9kMmXzYdGz9MpZlabTl\nWpbLZfV9IXj5dDq9/MN+xfkDk6vWQNt2VNnv3F2oIwsGBJ0X3qzbxStcLKe6WLast25DvbGkxNKE\nP9JtF2KjRbn0qyU2zLIOrxdRtl2gBao23R9tOE4tCtXRArtBQu0dWSDYjQArk88qNWpZBTfm9xRa\n9jdbbrpdWKYElkfgrNSy7jFPepzpriWMnL7d+TzUF+qzUbmNAMvLdjU1R3VZLPKKEN3ukbKJMdqI\nDjeWBcuijlgNNiU2DtS2RptjtHm3cfGGCc0jEczNzXxqykjLZDdPSPkeSj2tSs7uJkiPQYuvN3vE\narxZrWOC1TNqSw5pH5xuCLLWGynhUJ+RMgVzEprvRfbY79tSjiJKdP22VXLhNh10nCYNGqzb3wff\nlhfZv09EfyGE8KshhDsi+o+J6L//ltKamJiYmJiYmJiYmJiYeAH4VhjMnPMWQvjPieh/pINK/Ns5\n5//z1nvHeTXe87bUPmKJtNbEa3QUo2M0LteHQxP/hjZ6c3f8/fBQZKDjLOabN5Hevn1LRET3bw6T\n2pCq1nVnLQRr6XKqTh9
"text/plain": [
"<matplotlib.figure.Figure at 0x7f87f9904d68>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Display the image and draw the predicted boxes onto it.\n",
"\n",
"# Set the colors for the bounding boxes\n",
"colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()\n",
"\n",
"plt.figure(figsize=(20,12))\n",
"plt.imshow(batch_original_images[i])\n",
"\n",
"current_axis = plt.gca()\n",
"\n",
"for box in batch_original_labels[i]:\n",
" xmin = box[1]\n",
" ymin = box[2]\n",
" xmax = box[3]\n",
" ymax = box[4]\n",
" label = '{}'.format(classes[int(box[0])])\n",
" current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color='green', fill=False, linewidth=2)) \n",
" current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':'green', 'alpha':1.0})\n",
"\n",
"for box in y_pred_thresh_inv[i]:\n",
" xmin = box[2]\n",
" ymin = box[3]\n",
" xmax = box[4]\n",
" ymax = box[5]\n",
" color = colors[int(box[0])]\n",
" label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])\n",
" current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color=color, fill=False, linewidth=2)) \n",
" current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':color, 'alpha':1.0})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}