Summary

2020-02-06 16:47:03 -03:00
parent 6328265287
commit b586f22bf0
318 changed files with 25111 additions and 664 deletions
--- a/ssd_keras-master/keras_layers/init.py
+++ b/ssd_keras-master/keras_layers/init.py
--- a/ssd_keras-master/keras_layers/init.pyc
+++ b/ssd_keras-master/keras_layers/init.pyc
--- a/ssd_keras-master/keras_layers/pycache/init.cpython-36.pyc
+++ b/ssd_keras-master/keras_layers/pycache/init.cpython-36.pyc
--- a/ssd_keras-master/keras_layers/pycache/keras_layer_AnchorBoxes.cpython-36.pyc
+++ b/ssd_keras-master/keras_layers/pycache/keras_layer_AnchorBoxes.cpython-36.pyc
--- a/ssd_keras-master/keras_layers/pycache/keras_layer_DecodeDetections.cpython-36.pyc
+++ b/ssd_keras-master/keras_layers/pycache/keras_layer_DecodeDetections.cpython-36.pyc
--- a/ssd_keras-master/keras_layers/pycache/keras_layer_DecodeDetectionsFast.cpython-36.pyc
+++ b/ssd_keras-master/keras_layers/pycache/keras_layer_DecodeDetectionsFast.cpython-36.pyc
--- a/ssd_keras-master/keras_layers/pycache/keras_layer_L2Normalization.cpython-36.pyc
+++ b/ssd_keras-master/keras_layers/pycache/keras_layer_L2Normalization.cpython-36.pyc
--- a/ssd_keras-master/keras_layers/keras_layer_AnchorBoxes.py
+++ b/ssd_keras-master/keras_layers/keras_layer_AnchorBoxes.py
@@ -0,0 +1,278 @@
+'''
+A custom Keras layer to generate anchor boxes.
+
+Copyright (C) 2018 Pierluigi Ferrari
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+from __future__ import division
+import numpy as np
+import keras.backend as K
+from keras.engine.topology import InputSpec
+from keras.engine.topology import Layer
+
+from bounding_box_utils.bounding_box_utils import convert_coordinates
+
+class AnchorBoxes(Layer):
+    '''
+    A Keras layer to create an output tensor containing anchor box coordinates
+    and variances based on the input tensor and the passed arguments.
+
+    A set of 2D anchor boxes of different aspect ratios is created for each spatial unit of
+    the input tensor. The number of anchor boxes created per unit depends on the arguments
+    `aspect_ratios` and `two_boxes_for_ar1`, in the default case it is 4. The boxes
+    are parameterized by the coordinate tuple `(xmin, xmax, ymin, ymax)`.
+
+    The logic implemented by this layer is identical to the logic in the module
+    `ssd_box_encode_decode_utils.py`.
+
+    The purpose of having this layer in the network is to make the model self-sufficient
+    at inference time. Since the model is predicting offsets to the anchor boxes
+    (rather than predicting absolute box coordinates directly), one needs to know the anchor
+    box coordinates in order to construct the final prediction boxes from the predicted offsets.
+    If the model's output tensor did not contain the anchor box coordinates, the necessary
+    information to convert the predicted offsets back to absolute coordinates would be missing
+    in the model output. The reason why it is necessary to predict offsets to the anchor boxes
+    rather than to predict absolute box coordinates directly is explained in `README.md`.
+
+    Input shape:
+        4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'`
+        or `(batch, height, width, channels)` if `dim_ordering = 'tf'`.
+
+    Output shape:
+        5D tensor of shape `(batch, height, width, n_boxes, 8)`. The last axis contains
+        the four anchor box coordinates and the four variance values for each box.
+    '''
+
+    def __init__(self,
+                 img_height,
+                 img_width,
+                 this_scale,
+                 next_scale,
+                 aspect_ratios=[0.5, 1.0, 2.0],
+                 two_boxes_for_ar1=True,
+                 this_steps=None,
+                 this_offsets=None,
+                 clip_boxes=False,
+                 variances=[0.1, 0.1, 0.2, 0.2],
+                 coords='centroids',
+                 normalize_coords=False,
+                 **kwargs):
+        '''
+        All arguments need to be set to the same values as in the box encoding process, otherwise the behavior is undefined.
+        Some of these arguments are explained in more detail in the documentation of the `SSDBoxEncoder` class.
+
+        Arguments:
+            img_height (int): The height of the input images.
+            img_width (int): The width of the input images.
+            this_scale (float): A float in [0, 1], the scaling factor for the size of the generated anchor boxes
+                as a fraction of the shorter side of the input image.
+            next_scale (float): A float in [0, 1], the next larger scaling factor. Only relevant if
+                `self.two_boxes_for_ar1 == True`.
+            aspect_ratios (list, optional): The list of aspect ratios for which default boxes are to be
+                generated for this layer.
+            two_boxes_for_ar1 (bool, optional): Only relevant if `aspect_ratios` contains 1.
+                If `True`, two default boxes will be generated for aspect ratio 1. The first will be generated
+                using the scaling factor for the respective layer, the second one will be generated using
+                geometric mean of said scaling factor and next bigger scaling factor.
+            clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries.
+            variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by
+                its respective variance value.
+            coords (str, optional): The box coordinate format to be used internally in the model (i.e. this is not the input format
+                of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height),
+                'corners' for the format `(xmin, ymin, xmax,  ymax)`, or 'minmax' for the format `(xmin, xmax, ymin, ymax)`.
+            normalize_coords (bool, optional): Set to `True` if the model uses relative instead of absolute coordinates,
+                i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates.
+        '''
+        if K.backend() != 'tensorflow':
+            raise TypeError("This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend()))
+
+        if (this_scale < 0) or (next_scale < 0) or (this_scale > 1):
+            raise ValueError("`this_scale` must be in [0, 1] and `next_scale` must be >0, but `this_scale` == {}, `next_scale` == {}".format(this_scale, next_scale))
+
+        if len(variances) != 4:
+            raise ValueError("4 variance values must be pased, but {} values were received.".format(len(variances)))
+        variances = np.array(variances)
+        if np.any(variances <= 0):
+            raise ValueError("All variances must be >0, but the variances given are {}".format(variances))
+
+        self.img_height = img_height
+        self.img_width = img_width
+        self.this_scale = this_scale
+        self.next_scale = next_scale
+        self.aspect_ratios = aspect_ratios
+        self.two_boxes_for_ar1 = two_boxes_for_ar1
+        self.this_steps = this_steps
+        self.this_offsets = this_offsets
+        self.clip_boxes = clip_boxes
+        self.variances = variances
+        self.coords = coords
+        self.normalize_coords = normalize_coords
+        # Compute the number of boxes per cell
+        if (1 in aspect_ratios) and two_boxes_for_ar1:
+            self.n_boxes = len(aspect_ratios) + 1
+        else:
+            self.n_boxes = len(aspect_ratios)
+        super(AnchorBoxes, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        self.input_spec = [InputSpec(shape=input_shape)]
+        super(AnchorBoxes, self).build(input_shape)
+
+    def call(self, x, mask=None):
+        '''
+        Return an anchor box tensor based on the shape of the input tensor.
+
+        The logic implemented here is identical to the logic in the module `ssd_box_encode_decode_utils.py`.
+
+        Note that this tensor does not participate in any graph computations at runtime. It is being created
+        as a constant once during graph creation and is just being output along with the rest of the model output
+        during runtime. Because of this, all logic is implemented as Numpy array operations and it is sufficient
+        to convert the resulting Numpy array into a Keras tensor at the very end before outputting it.
+
+        Arguments:
+            x (tensor): 4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'`
+                or `(batch, height, width, channels)` if `dim_ordering = 'tf'`. The input for this
+                layer must be the output of the localization predictor layer.
+        '''
+
+        # Compute box width and height for each aspect ratio
+        # The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`.
+        size = min(self.img_height, self.img_width)
+        # Compute the box widths and and heights for all aspect ratios
+        wh_list = []
+        for ar in self.aspect_ratios:
+            if (ar == 1):
+                # Compute the regular anchor box for aspect ratio 1.
+                box_height = box_width = self.this_scale * size
+                wh_list.append((box_width, box_height))
+                if self.two_boxes_for_ar1:
+                    # Compute one slightly larger version using the geometric mean of this scale value and the next.
+                    box_height = box_width = np.sqrt(self.this_scale * self.next_scale) * size
+                    wh_list.append((box_width, box_height))
+            else:
+                box_height = self.this_scale * size / np.sqrt(ar)
+                box_width = self.this_scale * size * np.sqrt(ar)
+                wh_list.append((box_width, box_height))
+        wh_list = np.array(wh_list)
+
+        # We need the shape of the input tensor
+        if K.image_dim_ordering() == 'tf':
+            batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape
+        else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future
+            batch_size, feature_map_channels, feature_map_height, feature_map_width = x._keras_shape
+
+        # Compute the grid of box center points. They are identical for all aspect ratios.
+
+        # Compute the step sizes, i.e. how far apart the anchor box center points will be vertically and horizontally.
+        if (self.this_steps is None):
+            step_height = self.img_height / feature_map_height
+            step_width = self.img_width / feature_map_width
+        else:
+            if isinstance(self.this_steps, (list, tuple)) and (len(self.this_steps) == 2):
+                step_height = self.this_steps[0]
+                step_width = self.this_steps[1]
+            elif isinstance(self.this_steps, (int, float)):
+                step_height = self.this_steps
+                step_width = self.this_steps
+        # Compute the offsets, i.e. at what pixel values the first anchor box center point will be from the top and from the left of the image.
+        if (self.this_offsets is None):
+            offset_height = 0.5
+            offset_width = 0.5
+        else:
+            if isinstance(self.this_offsets, (list, tuple)) and (len(self.this_offsets) == 2):
+                offset_height = self.this_offsets[0]
+                offset_width = self.this_offsets[1]
+            elif isinstance(self.this_offsets, (int, float)):
+                offset_height = self.this_offsets
+                offset_width = self.this_offsets
+        # Now that we have the offsets and step sizes, compute the grid of anchor box center points.
+        cy = np.linspace(offset_height * step_height, (offset_height + feature_map_height - 1) * step_height, feature_map_height)
+        cx = np.linspace(offset_width * step_width, (offset_width + feature_map_width - 1) * step_width, feature_map_width)
+        cx_grid, cy_grid = np.meshgrid(cx, cy)
+        cx_grid = np.expand_dims(cx_grid, -1) # This is necessary for np.tile() to do what we want further down
+        cy_grid = np.expand_dims(cy_grid, -1) # This is necessary for np.tile() to do what we want further down
+
+        # Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)`
+        # where the last dimension will contain `(cx, cy, w, h)`
+        boxes_tensor = np.zeros((feature_map_height, feature_map_width, self.n_boxes, 4))
+
+        boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # Set cx
+        boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # Set cy
+        boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w
+        boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h
+
+        # Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)`
+        boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2corners')
+
+        # If `clip_boxes` is enabled, clip the coordinates to lie within the image boundaries
+        if self.clip_boxes:
+            x_coords = boxes_tensor[:,:,:,[0, 2]]
+            x_coords[x_coords >= self.img_width] = self.img_width - 1
+            x_coords[x_coords < 0] = 0
+            boxes_tensor[:,:,:,[0, 2]] = x_coords
+            y_coords = boxes_tensor[:,:,:,[1, 3]]
+            y_coords[y_coords >= self.img_height] = self.img_height - 1
+            y_coords[y_coords < 0] = 0
+            boxes_tensor[:,:,:,[1, 3]] = y_coords
+
+        # If `normalize_coords` is enabled, normalize the coordinates to be within [0,1]
+        if self.normalize_coords:
+            boxes_tensor[:, :, :, [0, 2]] /= self.img_width
+            boxes_tensor[:, :, :, [1, 3]] /= self.img_height
+
+        # TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth.
+        if self.coords == 'centroids':
+            # Convert `(xmin, ymin, xmax, ymax)` back to `(cx, cy, w, h)`.
+            boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2centroids', border_pixels='half')
+        elif self.coords == 'minmax':
+            # Convert `(xmin, ymin, xmax, ymax)` to `(xmin, xmax, ymin, ymax).
+            boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2minmax', border_pixels='half')
+
+        # Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape
+        # as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis.
+        variances_tensor = np.zeros_like(boxes_tensor) # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)`
+        variances_tensor += self.variances # Long live broadcasting
+        # Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)`
+        boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1)
+
+        # Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along
+        # The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)`
+        boxes_tensor = np.expand_dims(boxes_tensor, axis=0)
+        boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1))
+
+        return boxes_tensor
+
+    def compute_output_shape(self, input_shape):
+        if K.image_dim_ordering() == 'tf':
+            batch_size, feature_map_height, feature_map_width, feature_map_channels = input_shape
+        else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future
+            batch_size, feature_map_channels, feature_map_height, feature_map_width = input_shape
+        return (batch_size, feature_map_height, feature_map_width, self.n_boxes, 8)
+
+    def get_config(self):
+        config = {
+            'img_height': self.img_height,
+            'img_width': self.img_width,
+            'this_scale': self.this_scale,
+            'next_scale': self.next_scale,
+            'aspect_ratios': list(self.aspect_ratios),
+            'two_boxes_for_ar1': self.two_boxes_for_ar1,
+            'clip_boxes': self.clip_boxes,
+            'variances': list(self.variances),
+            'coords': self.coords,
+            'normalize_coords': self.normalize_coords
+        }
+        base_config = super(AnchorBoxes, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
--- a/ssd_keras-master/keras_layers/keras_layer_AnchorBoxes.pyc
+++ b/ssd_keras-master/keras_layers/keras_layer_AnchorBoxes.pyc
--- a/ssd_keras-master/keras_layers/keras_layer_DecodeDetections.py
+++ b/ssd_keras-master/keras_layers/keras_layer_DecodeDetections.py
@@ -0,0 +1,283 @@
+'''
+A custom Keras layer to decode the raw SSD prediction output. Corresponds to the
+`DetectionOutput` layer type in the original Caffe implementation of SSD.
+
+Copyright (C) 2018 Pierluigi Ferrari
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+from __future__ import division
+import numpy as np
+import tensorflow as tf
+import keras.backend as K
+from keras.engine.topology import InputSpec
+from keras.engine.topology import Layer
+
+class DecodeDetections(Layer):
+    '''
+    A Keras layer to decode the raw SSD prediction output.
+
+    Input shape:
+        3D tensor of shape `(batch_size, n_boxes, n_classes + 12)`.
+
+    Output shape:
+        3D tensor of shape `(batch_size, top_k, 6)`.
+    '''
+
+    def __init__(self,
+                 confidence_thresh=0.01,
+                 iou_threshold=0.45,
+                 top_k=200,
+                 nms_max_output_size=400,
+                 coords='centroids',
+                 normalize_coords=True,
+                 img_height=None,
+                 img_width=None,
+                 **kwargs):
+        '''
+        All default argument values follow the Caffe implementation.
+
+        Arguments:
+            confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
+                positive class in order to be considered for the non-maximum suppression stage for the respective class.
+                A lower value will result in a larger part of the selection process being done by the non-maximum suppression
+                stage, while a larger value will result in a larger part of the selection process happening in the confidence
+                thresholding stage.
+            iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold`
+                with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
+                to the box score.
+            top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
+                non-maximum suppression stage.
+            nms_max_output_size (int, optional): The maximum number of predictions that will be left after performing non-maximum
+                suppression.
+            coords (str, optional): The box coordinate format that the model outputs. Must be 'centroids'
+                i.e. the format `(cx, cy, w, h)` (box center coordinates, width, and height). Other coordinate formats are
+                currently not supported.
+            normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1])
+                and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs
+                relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`.
+                Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect
+                coordinates. Requires `img_height` and `img_width` if set to `True`.
+            img_height (int, optional): The height of the input images. Only needed if `normalize_coords` is `True`.
+            img_width (int, optional): The width of the input images. Only needed if `normalize_coords` is `True`.
+        '''
+        if K.backend() != 'tensorflow':
+            raise TypeError("This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend()))
+
+        if normalize_coords and ((img_height is None) or (img_width is None)):
+            raise ValueError("If relative box coordinates are supposed to be converted to absolute coordinates, the decoder needs the image size in order to decode the predictions, but `img_height == {}` and `img_width == {}`".format(img_height, img_width))
+
+        if coords != 'centroids':
+            raise ValueError("The DetectionOutput layer currently only supports the 'centroids' coordinate format.")
+
+        # We need these members for the config.
+        self.confidence_thresh = confidence_thresh
+        self.iou_threshold = iou_threshold
+        self.top_k = top_k
+        self.normalize_coords = normalize_coords
+        self.img_height = img_height
+        self.img_width = img_width
+        self.coords = coords
+        self.nms_max_output_size = nms_max_output_size
+
+        # We need these members for TensorFlow.
+        self.tf_confidence_thresh = tf.constant(self.confidence_thresh, name='confidence_thresh')
+        self.tf_iou_threshold = tf.constant(self.iou_threshold, name='iou_threshold')
+        self.tf_top_k = tf.constant(self.top_k, name='top_k')
+        self.tf_normalize_coords = tf.constant(self.normalize_coords, name='normalize_coords')
+        self.tf_img_height = tf.constant(self.img_height, dtype=tf.float32, name='img_height')
+        self.tf_img_width = tf.constant(self.img_width, dtype=tf.float32, name='img_width')
+        self.tf_nms_max_output_size = tf.constant(self.nms_max_output_size, name='nms_max_output_size')
+
+        super(DecodeDetections, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        self.input_spec = [InputSpec(shape=input_shape)]
+        super(DecodeDetections, self).build(input_shape)
+
+    def call(self, y_pred, mask=None):
+        '''
+        Returns:
+            3D tensor of shape `(batch_size, top_k, 6)`. The second axis is zero-padded
+            to always yield `top_k` predictions per batch item. The last axis contains
+            the coordinates for each predicted box in the format
+            `[class_id, confidence, xmin, ymin, xmax, ymax]`.
+        '''
+
+        #####################################################################################
+        # 1. Convert the box coordinates from predicted anchor box offsets to predicted
+        #    absolute coordinates
+        #####################################################################################
+
+        # Convert anchor box offsets to image offsets.
+        cx = y_pred[...,-12] * y_pred[...,-4] * y_pred[...,-6] + y_pred[...,-8] # cx = cx_pred * cx_variance * w_anchor + cx_anchor
+        cy = y_pred[...,-11] * y_pred[...,-3] * y_pred[...,-5] + y_pred[...,-7] # cy = cy_pred * cy_variance * h_anchor + cy_anchor
+        w = tf.exp(y_pred[...,-10] * y_pred[...,-2]) * y_pred[...,-6] # w = exp(w_pred * variance_w) * w_anchor
+        h = tf.exp(y_pred[...,-9] * y_pred[...,-1]) * y_pred[...,-5] # h = exp(h_pred * variance_h) * h_anchor
+
+        # Convert 'centroids' to 'corners'.
+        xmin = cx - 0.5 * w
+        ymin = cy - 0.5 * h
+        xmax = cx + 0.5 * w
+        ymax = cy + 0.5 * h
+
+        # If the model predicts box coordinates relative to the image dimensions and they are supposed
+        # to be converted back to absolute coordinates, do that.
+        def normalized_coords():
+            xmin1 = tf.expand_dims(xmin * self.tf_img_width, axis=-1)
+            ymin1 = tf.expand_dims(ymin * self.tf_img_height, axis=-1)
+            xmax1 = tf.expand_dims(xmax * self.tf_img_width, axis=-1)
+            ymax1 = tf.expand_dims(ymax * self.tf_img_height, axis=-1)
+            return xmin1, ymin1, xmax1, ymax1
+        def non_normalized_coords():
+            return tf.expand_dims(xmin, axis=-1), tf.expand_dims(ymin, axis=-1), tf.expand_dims(xmax, axis=-1), tf.expand_dims(ymax, axis=-1)
+
+        xmin, ymin, xmax, ymax = tf.cond(self.tf_normalize_coords, normalized_coords, non_normalized_coords)
+
+        # Concatenate the one-hot class confidences and the converted box coordinates to form the decoded predictions tensor.
+        y_pred = tf.concat(values=[y_pred[...,:-12], xmin, ymin, xmax, ymax], axis=-1)
+
+        #####################################################################################
+        # 2. Perform confidence thresholding, per-class non-maximum suppression, and
+        #    top-k filtering.
+        #####################################################################################
+
+        batch_size = tf.shape(y_pred)[0] # Output dtype: tf.int32
+        n_boxes = tf.shape(y_pred)[1]
+        n_classes = y_pred.shape[2] - 4
+        class_indices = tf.range(1, n_classes)
+
+        # Create a function that filters the predictions for the given batch item. Specifically, it performs:
+        # - confidence thresholding
+        # - non-maximum suppression (NMS)
+        # - top-k filtering
+        def filter_predictions(batch_item):
+
+            # Create a function that filters the predictions for one single class.
+            def filter_single_class(index):
+
+                # From a tensor of shape (n_boxes, n_classes + 4 coordinates) extract
+                # a tensor of shape (n_boxes, 1 + 4 coordinates) that contains the
+                # confidnece values for just one class, determined by `index`.
+                confidences = tf.expand_dims(batch_item[..., index], axis=-1)
+                class_id = tf.fill(dims=tf.shape(confidences), value=tf.to_float(index))
+                box_coordinates = batch_item[...,-4:]
+
+                single_class = tf.concat([class_id, confidences, box_coordinates], axis=-1)
+
+                # Apply confidence thresholding with respect to the class defined by `index`.
+                threshold_met = single_class[:,1] > self.tf_confidence_thresh
+                single_class = tf.boolean_mask(tensor=single_class,
+                                               mask=threshold_met)
+
+                # If any boxes made the threshold, perform NMS.
+                def perform_nms():
+                    scores = single_class[...,1]
+
+                    # `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`.
+                    xmin = tf.expand_dims(single_class[...,-4], axis=-1)
+                    ymin = tf.expand_dims(single_class[...,-3], axis=-1)
+                    xmax = tf.expand_dims(single_class[...,-2], axis=-1)
+                    ymax = tf.expand_dims(single_class[...,-1], axis=-1)
+                    boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1)
+
+                    maxima_indices = tf.image.non_max_suppression(boxes=boxes,
+                                                                  scores=scores,
+                                                                  max_output_size=self.tf_nms_max_output_size,
+                                                                  iou_threshold=self.iou_threshold,
+                                                                  name='non_maximum_suppresion')
+                    maxima = tf.gather(params=single_class,
+                                       indices=maxima_indices,
+                                       axis=0)
+                    return maxima
+
+                def no_confident_predictions():
+                    return tf.constant(value=0.0, shape=(1,6))
+
+                single_class_nms = tf.cond(tf.equal(tf.size(single_class), 0), no_confident_predictions, perform_nms)
+
+                # Make sure `single_class` is exactly `self.nms_max_output_size` elements long.
+                padded_single_class = tf.pad(tensor=single_class_nms,
+                                             paddings=[[0, self.tf_nms_max_output_size - tf.shape(single_class_nms)[0]], [0, 0]],
+                                             mode='CONSTANT',
+                                             constant_values=0.0)
+
+                return padded_single_class
+
+            # Iterate `filter_single_class()` over all class indices.
+            filtered_single_classes = tf.map_fn(fn=lambda i: filter_single_class(i),
+                                                elems=tf.range(1,n_classes),
+                                                dtype=tf.float32,
+                                                parallel_iterations=128,
+                                                back_prop=False,
+                                                swap_memory=False,
+                                                infer_shape=True,
+                                                name='loop_over_classes')
+
+            # Concatenate the filtered results for all individual classes to one tensor.
+            filtered_predictions = tf.reshape(tensor=filtered_single_classes, shape=(-1,6))
+
+            # Perform top-k filtering for this batch item or pad it in case there are
+            # fewer than `self.top_k` boxes left at this point. Either way, produce a
+            # tensor of length `self.top_k`. By the time we return the final results tensor
+            # for the whole batch, all batch items must have the same number of predicted
+            # boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k`
+            # predictions are left after the filtering process above, we pad the missing
+            # predictions with zeros as dummy entries.
+            def top_k():
+                return tf.gather(params=filtered_predictions,
+                                 indices=tf.nn.top_k(filtered_predictions[:, 1], k=self.tf_top_k, sorted=True).indices,
+                                 axis=0)
+            def pad_and_top_k():
+                padded_predictions = tf.pad(tensor=filtered_predictions,
+                                            paddings=[[0, self.tf_top_k - tf.shape(filtered_predictions)[0]], [0, 0]],
+                                            mode='CONSTANT',
+                                            constant_values=0.0)
+                return tf.gather(params=padded_predictions,
+                                 indices=tf.nn.top_k(padded_predictions[:, 1], k=self.tf_top_k, sorted=True).indices,
+                                 axis=0)
+
+            top_k_boxes = tf.cond(tf.greater_equal(tf.shape(filtered_predictions)[0], self.tf_top_k), top_k, pad_and_top_k)
+
+            return top_k_boxes
+
+        # Iterate `filter_predictions()` over all batch items.
+        output_tensor = tf.map_fn(fn=lambda x: filter_predictions(x),
+                                  elems=y_pred,
+                                  dtype=None,
+                                  parallel_iterations=128,
+                                  back_prop=False,
+                                  swap_memory=False,
+                                  infer_shape=True,
+                                  name='loop_over_batch')
+
+        return output_tensor
+
+    def compute_output_shape(self, input_shape):
+        batch_size, n_boxes, last_axis = input_shape
+        return (batch_size, self.tf_top_k, 6) # Last axis: (class_ID, confidence, 4 box coordinates)
+
+    def get_config(self):
+        config = {
+            'confidence_thresh': self.confidence_thresh,
+            'iou_threshold': self.iou_threshold,
+            'top_k': self.top_k,
+            'nms_max_output_size': self.nms_max_output_size,
+            'coords': self.coords,
+            'normalize_coords': self.normalize_coords,
+            'img_height': self.img_height,
+            'img_width': self.img_width,
+        }
+        base_config = super(DecodeDetections, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
--- a/ssd_keras-master/keras_layers/keras_layer_DecodeDetections.pyc
+++ b/ssd_keras-master/keras_layers/keras_layer_DecodeDetections.pyc
--- a/ssd_keras-master/keras_layers/keras_layer_DecodeDetectionsFast.py
+++ b/ssd_keras-master/keras_layers/keras_layer_DecodeDetectionsFast.py
@@ -0,0 +1,266 @@
+'''
+A custom Keras layer to decode the raw SSD prediction output. This is a modified
+and more efficient version of the `DetectionOutput` layer type in the original Caffe
+implementation of SSD. For a faithful replication of the original layer, please
+refer to the `DecodeDetections` layer.
+
+Copyright (C) 2018 Pierluigi Ferrari
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+from __future__ import division
+import numpy as np
+import tensorflow as tf
+import keras.backend as K
+from keras.engine.topology import InputSpec
+from keras.engine.topology import Layer
+
+class DecodeDetectionsFast(Layer):
+    '''
+    A Keras layer to decode the raw SSD prediction output.
+
+    Input shape:
+        3D tensor of shape `(batch_size, n_boxes, n_classes + 12)`.
+
+    Output shape:
+        3D tensor of shape `(batch_size, top_k, 6)`.
+    '''
+
+    def __init__(self,
+                 confidence_thresh=0.01,
+                 iou_threshold=0.45,
+                 top_k=200,
+                 nms_max_output_size=400,
+                 coords='centroids',
+                 normalize_coords=True,
+                 img_height=None,
+                 img_width=None,
+                 **kwargs):
+        '''
+        All default argument values follow the Caffe implementation.
+
+        Arguments:
+            confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
+                positive class in order to be considered for the non-maximum suppression stage for the respective class.
+                A lower value will result in a larger part of the selection process being done by the non-maximum suppression
+                stage, while a larger value will result in a larger part of the selection process happening in the confidence
+                thresholding stage.
+            iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold`
+                with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
+                to the box score.
+            top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
+                non-maximum suppression stage.
+            nms_max_output_size (int, optional): The maximum number of predictions that will be left after performing non-maximum
+                suppression.
+            coords (str, optional): The box coordinate format that the model outputs. Must be 'centroids'
+                i.e. the format `(cx, cy, w, h)` (box center coordinates, width, and height). Other coordinate formats are
+                currently not supported.
+            normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1])
+                and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs
+                relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`.
+                Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect
+                coordinates. Requires `img_height` and `img_width` if set to `True`.
+            img_height (int, optional): The height of the input images. Only needed if `normalize_coords` is `True`.
+            img_width (int, optional): The width of the input images. Only needed if `normalize_coords` is `True`.
+        '''
+        if K.backend() != 'tensorflow':
+            raise TypeError("This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend()))
+
+        if normalize_coords and ((img_height is None) or (img_width is None)):
+            raise ValueError("If relative box coordinates are supposed to be converted to absolute coordinates, the decoder needs the image size in order to decode the predictions, but `img_height == {}` and `img_width == {}`".format(img_height, img_width))
+
+        if coords != 'centroids':
+            raise ValueError("The DetectionOutput layer currently only supports the 'centroids' coordinate format.")
+
+        # We need these members for the config.
+        self.confidence_thresh = confidence_thresh
+        self.iou_threshold = iou_threshold
+        self.top_k = top_k
+        self.normalize_coords = normalize_coords
+        self.img_height = img_height
+        self.img_width = img_width
+        self.coords = coords
+        self.nms_max_output_size = nms_max_output_size
+
+        # We need these members for TensorFlow.
+        self.tf_confidence_thresh = tf.constant(self.confidence_thresh, name='confidence_thresh')
+        self.tf_iou_threshold = tf.constant(self.iou_threshold, name='iou_threshold')
+        self.tf_top_k = tf.constant(self.top_k, name='top_k')
+        self.tf_normalize_coords = tf.constant(self.normalize_coords, name='normalize_coords')
+        self.tf_img_height = tf.constant(self.img_height, dtype=tf.float32, name='img_height')
+        self.tf_img_width = tf.constant(self.img_width, dtype=tf.float32, name='img_width')
+        self.tf_nms_max_output_size = tf.constant(self.nms_max_output_size, name='nms_max_output_size')
+
+        super(DecodeDetectionsFast, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        self.input_spec = [InputSpec(shape=input_shape)]
+        super(DecodeDetectionsFast, self).build(input_shape)
+
+    def call(self, y_pred, mask=None):
+        '''
+        Returns:
+            3D tensor of shape `(batch_size, top_k, 6)`. The second axis is zero-padded
+            to always yield `top_k` predictions per batch item. The last axis contains
+            the coordinates for each predicted box in the format
+            `[class_id, confidence, xmin, ymin, xmax, ymax]`.
+        '''
+
+        #####################################################################################
+        # 1. Convert the box coordinates from predicted anchor box offsets to predicted
+        #    absolute coordinates
+        #####################################################################################
+
+        # Extract the predicted class IDs as the indices of the highest confidence values.
+        class_ids = tf.expand_dims(tf.to_float(tf.argmax(y_pred[...,:-12], axis=-1)), axis=-1)
+        # Extract the confidences of the maximal classes.
+        confidences = tf.reduce_max(y_pred[...,:-12], axis=-1, keep_dims=True)
+
+        # Convert anchor box offsets to image offsets.
+        cx = y_pred[...,-12] * y_pred[...,-4] * y_pred[...,-6] + y_pred[...,-8] # cx = cx_pred * cx_variance * w_anchor + cx_anchor
+        cy = y_pred[...,-11] * y_pred[...,-3] * y_pred[...,-5] + y_pred[...,-7] # cy = cy_pred * cy_variance * h_anchor + cy_anchor
+        w = tf.exp(y_pred[...,-10] * y_pred[...,-2]) * y_pred[...,-6] # w = exp(w_pred * variance_w) * w_anchor
+        h = tf.exp(y_pred[...,-9] * y_pred[...,-1]) * y_pred[...,-5] # h = exp(h_pred * variance_h) * h_anchor
+
+        # Convert 'centroids' to 'corners'.
+        xmin = cx - 0.5 * w
+        ymin = cy - 0.5 * h
+        xmax = cx + 0.5 * w
+        ymax = cy + 0.5 * h
+
+        # If the model predicts box coordinates relative to the image dimensions and they are supposed
+        # to be converted back to absolute coordinates, do that.
+        def normalized_coords():
+            xmin1 = tf.expand_dims(xmin * self.tf_img_width, axis=-1)
+            ymin1 = tf.expand_dims(ymin * self.tf_img_height, axis=-1)
+            xmax1 = tf.expand_dims(xmax * self.tf_img_width, axis=-1)
+            ymax1 = tf.expand_dims(ymax * self.tf_img_height, axis=-1)
+            return xmin1, ymin1, xmax1, ymax1
+        def non_normalized_coords():
+            return tf.expand_dims(xmin, axis=-1), tf.expand_dims(ymin, axis=-1), tf.expand_dims(xmax, axis=-1), tf.expand_dims(ymax, axis=-1)
+
+        xmin, ymin, xmax, ymax = tf.cond(self.tf_normalize_coords, normalized_coords, non_normalized_coords)
+
+        # Concatenate the one-hot class confidences and the converted box coordinates to form the decoded predictions tensor.
+        y_pred = tf.concat(values=[class_ids, confidences, xmin, ymin, xmax, ymax], axis=-1)
+
+        #####################################################################################
+        # 2. Perform confidence thresholding, non-maximum suppression, and top-k filtering.
+        #####################################################################################
+
+        batch_size = tf.shape(y_pred)[0] # Output dtype: tf.int32
+        n_boxes = tf.shape(y_pred)[1]
+        n_classes = y_pred.shape[2] - 4
+        class_indices = tf.range(1, n_classes)
+
+        # Create a function that filters the predictions for the given batch item. Specifically, it performs:
+        # - confidence thresholding
+        # - non-maximum suppression (NMS)
+        # - top-k filtering
+        def filter_predictions(batch_item):
+
+            # Keep only the non-background boxes.
+            positive_boxes = tf.not_equal(batch_item[...,0], 0.0)
+            predictions = tf.boolean_mask(tensor=batch_item,
+                                          mask=positive_boxes)
+
+            def perform_confidence_thresholding():
+                # Apply confidence thresholding.
+                threshold_met = predictions[:,1] > self.tf_confidence_thresh
+                return tf.boolean_mask(tensor=predictions,
+                                       mask=threshold_met)
+            def no_positive_boxes():
+                return tf.constant(value=0.0, shape=(1,6))
+
+            # If there are any positive predictions, perform confidence thresholding.
+            predictions_conf_thresh = tf.cond(tf.equal(tf.size(predictions), 0), no_positive_boxes, perform_confidence_thresholding)
+
+            def perform_nms():
+                scores = predictions_conf_thresh[...,1]
+
+                # `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`.
+                xmin = tf.expand_dims(predictions_conf_thresh[...,-4], axis=-1)
+                ymin = tf.expand_dims(predictions_conf_thresh[...,-3], axis=-1)
+                xmax = tf.expand_dims(predictions_conf_thresh[...,-2], axis=-1)
+                ymax = tf.expand_dims(predictions_conf_thresh[...,-1], axis=-1)
+                boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1)
+
+                maxima_indices = tf.image.non_max_suppression(boxes=boxes,
+                                                              scores=scores,
+                                                              max_output_size=self.tf_nms_max_output_size,
+                                                              iou_threshold=self.iou_threshold,
+                                                              name='non_maximum_suppresion')
+                maxima = tf.gather(params=predictions_conf_thresh,
+                                   indices=maxima_indices,
+                                   axis=0)
+                return maxima
+            def no_confident_predictions():
+                return tf.constant(value=0.0, shape=(1,6))
+
+            # If any boxes made the threshold, perform NMS.
+            predictions_nms = tf.cond(tf.equal(tf.size(predictions_conf_thresh), 0), no_confident_predictions, perform_nms)
+
+            # Perform top-k filtering for this batch item or pad it in case there are
+            # fewer than `self.top_k` boxes left at this point. Either way, produce a
+            # tensor of length `self.top_k`. By the time we return the final results tensor
+            # for the whole batch, all batch items must have the same number of predicted
+            # boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k`
+            # predictions are left after the filtering process above, we pad the missing
+            # predictions with zeros as dummy entries.
+            def top_k():
+                return tf.gather(params=predictions_nms,
+                                 indices=tf.nn.top_k(predictions_nms[:, 1], k=self.tf_top_k, sorted=True).indices,
+                                 axis=0)
+            def pad_and_top_k():
+                padded_predictions = tf.pad(tensor=predictions_nms,
+                                            paddings=[[0, self.tf_top_k - tf.shape(predictions_nms)[0]], [0, 0]],
+                                            mode='CONSTANT',
+                                            constant_values=0.0)
+                return tf.gather(params=padded_predictions,
+                                 indices=tf.nn.top_k(padded_predictions[:, 1], k=self.tf_top_k, sorted=True).indices,
+                                 axis=0)
+
+            top_k_boxes = tf.cond(tf.greater_equal(tf.shape(predictions_nms)[0], self.tf_top_k), top_k, pad_and_top_k)
+
+            return top_k_boxes
+
+        # Iterate `filter_predictions()` over all batch items.
+        output_tensor = tf.map_fn(fn=lambda x: filter_predictions(x),
+                                  elems=y_pred,
+                                  dtype=None,
+                                  parallel_iterations=128,
+                                  back_prop=False,
+                                  swap_memory=False,
+                                  infer_shape=True,
+                                  name='loop_over_batch')
+
+        return output_tensor
+
+    def compute_output_shape(self, input_shape):
+        batch_size, n_boxes, last_axis = input_shape
+        return (batch_size, self.tf_top_k, 6) # Last axis: (class_ID, confidence, 4 box coordinates)
+
+    def get_config(self):
+        config = {
+            'confidence_thresh': self.confidence_thresh,
+            'iou_threshold': self.iou_threshold,
+            'top_k': self.top_k,
+            'nms_max_output_size': self.nms_max_output_size,
+            'coords': self.coords,
+            'normalize_coords': self.normalize_coords,
+            'img_height': self.img_height,
+            'img_width': self.img_width,
+        }
+        base_config = super(DecodeDetectionsFast, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
--- a/ssd_keras-master/keras_layers/keras_layer_DecodeDetectionsFast.pyc
+++ b/ssd_keras-master/keras_layers/keras_layer_DecodeDetectionsFast.pyc
--- a/ssd_keras-master/keras_layers/keras_layer_L2Normalization.py
+++ b/ssd_keras-master/keras_layers/keras_layer_L2Normalization.py
@@ -0,0 +1,70 @@
+'''
+A custom Keras layer to perform L2-normalization.
+
+Copyright (C) 2018 Pierluigi Ferrari
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+from __future__ import division
+import numpy as np
+import keras.backend as K
+from keras.engine.topology import InputSpec
+from keras.engine.topology import Layer
+
+class L2Normalization(Layer):
+    '''
+    Performs L2 normalization on the input tensor with a learnable scaling parameter
+    as described in the paper "Parsenet: Looking Wider to See Better" (see references)
+    and as used in the original SSD model.
+
+    Arguments:
+        gamma_init (int): The initial scaling parameter. Defaults to 20 following the
+            SSD paper.
+
+    Input shape:
+        4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'`
+        or `(batch, height, width, channels)` if `dim_ordering = 'tf'`.
+
+    Returns:
+        The scaled tensor. Same shape as the input tensor.
+
+    References:
+        http://cs.unc.edu/~wliu/papers/parsenet.pdf
+    '''
+
+    def __init__(self, gamma_init=20, **kwargs):
+        if K.image_dim_ordering() == 'tf':
+            self.axis = 3
+        else:
+            self.axis = 1
+        self.gamma_init = gamma_init
+        super(L2Normalization, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        self.input_spec = [InputSpec(shape=input_shape)]
+        gamma = self.gamma_init * np.ones((input_shape[self.axis],))
+        self.gamma = K.variable(gamma, name='{}_gamma'.format(self.name))
+        self.trainable_weights = [self.gamma]
+        super(L2Normalization, self).build(input_shape)
+
+    def call(self, x, mask=None):
+        output = K.l2_normalize(x, self.axis)
+        return output * self.gamma
+
+    def get_config(self):
+        config = {
+            'gamma_init': self.gamma_init
+        }
+        base_config = super(L2Normalization, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
--- a/ssd_keras-master/keras_layers/keras_layer_L2Normalization.pyc
+++ b/ssd_keras-master/keras_layers/keras_layer_L2Normalization.pyc