Summary
This commit is contained in:
0
ssd_keras-master/keras_layers/__init__.py
Normal file
0
ssd_keras-master/keras_layers/__init__.py
Normal file
BIN
ssd_keras-master/keras_layers/__init__.pyc
Normal file
BIN
ssd_keras-master/keras_layers/__init__.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
278
ssd_keras-master/keras_layers/keras_layer_AnchorBoxes.py
Normal file
278
ssd_keras-master/keras_layers/keras_layer_AnchorBoxes.py
Normal file
@@ -0,0 +1,278 @@
|
||||
'''
|
||||
A custom Keras layer to generate anchor boxes.
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
import keras.backend as K
|
||||
from keras.engine.topology import InputSpec
|
||||
from keras.engine.topology import Layer
|
||||
|
||||
from bounding_box_utils.bounding_box_utils import convert_coordinates
|
||||
|
||||
class AnchorBoxes(Layer):
|
||||
'''
|
||||
A Keras layer to create an output tensor containing anchor box coordinates
|
||||
and variances based on the input tensor and the passed arguments.
|
||||
|
||||
A set of 2D anchor boxes of different aspect ratios is created for each spatial unit of
|
||||
the input tensor. The number of anchor boxes created per unit depends on the arguments
|
||||
`aspect_ratios` and `two_boxes_for_ar1`, in the default case it is 4. The boxes
|
||||
are parameterized by the coordinate tuple `(xmin, xmax, ymin, ymax)`.
|
||||
|
||||
The logic implemented by this layer is identical to the logic in the module
|
||||
`ssd_box_encode_decode_utils.py`.
|
||||
|
||||
The purpose of having this layer in the network is to make the model self-sufficient
|
||||
at inference time. Since the model is predicting offsets to the anchor boxes
|
||||
(rather than predicting absolute box coordinates directly), one needs to know the anchor
|
||||
box coordinates in order to construct the final prediction boxes from the predicted offsets.
|
||||
If the model's output tensor did not contain the anchor box coordinates, the necessary
|
||||
information to convert the predicted offsets back to absolute coordinates would be missing
|
||||
in the model output. The reason why it is necessary to predict offsets to the anchor boxes
|
||||
rather than to predict absolute box coordinates directly is explained in `README.md`.
|
||||
|
||||
Input shape:
|
||||
4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'`
|
||||
or `(batch, height, width, channels)` if `dim_ordering = 'tf'`.
|
||||
|
||||
Output shape:
|
||||
5D tensor of shape `(batch, height, width, n_boxes, 8)`. The last axis contains
|
||||
the four anchor box coordinates and the four variance values for each box.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
img_height,
|
||||
img_width,
|
||||
this_scale,
|
||||
next_scale,
|
||||
aspect_ratios=[0.5, 1.0, 2.0],
|
||||
two_boxes_for_ar1=True,
|
||||
this_steps=None,
|
||||
this_offsets=None,
|
||||
clip_boxes=False,
|
||||
variances=[0.1, 0.1, 0.2, 0.2],
|
||||
coords='centroids',
|
||||
normalize_coords=False,
|
||||
**kwargs):
|
||||
'''
|
||||
All arguments need to be set to the same values as in the box encoding process, otherwise the behavior is undefined.
|
||||
Some of these arguments are explained in more detail in the documentation of the `SSDBoxEncoder` class.
|
||||
|
||||
Arguments:
|
||||
img_height (int): The height of the input images.
|
||||
img_width (int): The width of the input images.
|
||||
this_scale (float): A float in [0, 1], the scaling factor for the size of the generated anchor boxes
|
||||
as a fraction of the shorter side of the input image.
|
||||
next_scale (float): A float in [0, 1], the next larger scaling factor. Only relevant if
|
||||
`self.two_boxes_for_ar1 == True`.
|
||||
aspect_ratios (list, optional): The list of aspect ratios for which default boxes are to be
|
||||
generated for this layer.
|
||||
two_boxes_for_ar1 (bool, optional): Only relevant if `aspect_ratios` contains 1.
|
||||
If `True`, two default boxes will be generated for aspect ratio 1. The first will be generated
|
||||
using the scaling factor for the respective layer, the second one will be generated using
|
||||
geometric mean of said scaling factor and next bigger scaling factor.
|
||||
clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries.
|
||||
variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by
|
||||
its respective variance value.
|
||||
coords (str, optional): The box coordinate format to be used internally in the model (i.e. this is not the input format
|
||||
of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height),
|
||||
'corners' for the format `(xmin, ymin, xmax, ymax)`, or 'minmax' for the format `(xmin, xmax, ymin, ymax)`.
|
||||
normalize_coords (bool, optional): Set to `True` if the model uses relative instead of absolute coordinates,
|
||||
i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates.
|
||||
'''
|
||||
if K.backend() != 'tensorflow':
|
||||
raise TypeError("This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend()))
|
||||
|
||||
if (this_scale < 0) or (next_scale < 0) or (this_scale > 1):
|
||||
raise ValueError("`this_scale` must be in [0, 1] and `next_scale` must be >0, but `this_scale` == {}, `next_scale` == {}".format(this_scale, next_scale))
|
||||
|
||||
if len(variances) != 4:
|
||||
raise ValueError("4 variance values must be pased, but {} values were received.".format(len(variances)))
|
||||
variances = np.array(variances)
|
||||
if np.any(variances <= 0):
|
||||
raise ValueError("All variances must be >0, but the variances given are {}".format(variances))
|
||||
|
||||
self.img_height = img_height
|
||||
self.img_width = img_width
|
||||
self.this_scale = this_scale
|
||||
self.next_scale = next_scale
|
||||
self.aspect_ratios = aspect_ratios
|
||||
self.two_boxes_for_ar1 = two_boxes_for_ar1
|
||||
self.this_steps = this_steps
|
||||
self.this_offsets = this_offsets
|
||||
self.clip_boxes = clip_boxes
|
||||
self.variances = variances
|
||||
self.coords = coords
|
||||
self.normalize_coords = normalize_coords
|
||||
# Compute the number of boxes per cell
|
||||
if (1 in aspect_ratios) and two_boxes_for_ar1:
|
||||
self.n_boxes = len(aspect_ratios) + 1
|
||||
else:
|
||||
self.n_boxes = len(aspect_ratios)
|
||||
super(AnchorBoxes, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
super(AnchorBoxes, self).build(input_shape)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
'''
|
||||
Return an anchor box tensor based on the shape of the input tensor.
|
||||
|
||||
The logic implemented here is identical to the logic in the module `ssd_box_encode_decode_utils.py`.
|
||||
|
||||
Note that this tensor does not participate in any graph computations at runtime. It is being created
|
||||
as a constant once during graph creation and is just being output along with the rest of the model output
|
||||
during runtime. Because of this, all logic is implemented as Numpy array operations and it is sufficient
|
||||
to convert the resulting Numpy array into a Keras tensor at the very end before outputting it.
|
||||
|
||||
Arguments:
|
||||
x (tensor): 4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'`
|
||||
or `(batch, height, width, channels)` if `dim_ordering = 'tf'`. The input for this
|
||||
layer must be the output of the localization predictor layer.
|
||||
'''
|
||||
|
||||
# Compute box width and height for each aspect ratio
|
||||
# The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`.
|
||||
size = min(self.img_height, self.img_width)
|
||||
# Compute the box widths and and heights for all aspect ratios
|
||||
wh_list = []
|
||||
for ar in self.aspect_ratios:
|
||||
if (ar == 1):
|
||||
# Compute the regular anchor box for aspect ratio 1.
|
||||
box_height = box_width = self.this_scale * size
|
||||
wh_list.append((box_width, box_height))
|
||||
if self.two_boxes_for_ar1:
|
||||
# Compute one slightly larger version using the geometric mean of this scale value and the next.
|
||||
box_height = box_width = np.sqrt(self.this_scale * self.next_scale) * size
|
||||
wh_list.append((box_width, box_height))
|
||||
else:
|
||||
box_height = self.this_scale * size / np.sqrt(ar)
|
||||
box_width = self.this_scale * size * np.sqrt(ar)
|
||||
wh_list.append((box_width, box_height))
|
||||
wh_list = np.array(wh_list)
|
||||
|
||||
# We need the shape of the input tensor
|
||||
if K.image_dim_ordering() == 'tf':
|
||||
batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape
|
||||
else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future
|
||||
batch_size, feature_map_channels, feature_map_height, feature_map_width = x._keras_shape
|
||||
|
||||
# Compute the grid of box center points. They are identical for all aspect ratios.
|
||||
|
||||
# Compute the step sizes, i.e. how far apart the anchor box center points will be vertically and horizontally.
|
||||
if (self.this_steps is None):
|
||||
step_height = self.img_height / feature_map_height
|
||||
step_width = self.img_width / feature_map_width
|
||||
else:
|
||||
if isinstance(self.this_steps, (list, tuple)) and (len(self.this_steps) == 2):
|
||||
step_height = self.this_steps[0]
|
||||
step_width = self.this_steps[1]
|
||||
elif isinstance(self.this_steps, (int, float)):
|
||||
step_height = self.this_steps
|
||||
step_width = self.this_steps
|
||||
# Compute the offsets, i.e. at what pixel values the first anchor box center point will be from the top and from the left of the image.
|
||||
if (self.this_offsets is None):
|
||||
offset_height = 0.5
|
||||
offset_width = 0.5
|
||||
else:
|
||||
if isinstance(self.this_offsets, (list, tuple)) and (len(self.this_offsets) == 2):
|
||||
offset_height = self.this_offsets[0]
|
||||
offset_width = self.this_offsets[1]
|
||||
elif isinstance(self.this_offsets, (int, float)):
|
||||
offset_height = self.this_offsets
|
||||
offset_width = self.this_offsets
|
||||
# Now that we have the offsets and step sizes, compute the grid of anchor box center points.
|
||||
cy = np.linspace(offset_height * step_height, (offset_height + feature_map_height - 1) * step_height, feature_map_height)
|
||||
cx = np.linspace(offset_width * step_width, (offset_width + feature_map_width - 1) * step_width, feature_map_width)
|
||||
cx_grid, cy_grid = np.meshgrid(cx, cy)
|
||||
cx_grid = np.expand_dims(cx_grid, -1) # This is necessary for np.tile() to do what we want further down
|
||||
cy_grid = np.expand_dims(cy_grid, -1) # This is necessary for np.tile() to do what we want further down
|
||||
|
||||
# Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)`
|
||||
# where the last dimension will contain `(cx, cy, w, h)`
|
||||
boxes_tensor = np.zeros((feature_map_height, feature_map_width, self.n_boxes, 4))
|
||||
|
||||
boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # Set cx
|
||||
boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # Set cy
|
||||
boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w
|
||||
boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h
|
||||
|
||||
# Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)`
|
||||
boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2corners')
|
||||
|
||||
# If `clip_boxes` is enabled, clip the coordinates to lie within the image boundaries
|
||||
if self.clip_boxes:
|
||||
x_coords = boxes_tensor[:,:,:,[0, 2]]
|
||||
x_coords[x_coords >= self.img_width] = self.img_width - 1
|
||||
x_coords[x_coords < 0] = 0
|
||||
boxes_tensor[:,:,:,[0, 2]] = x_coords
|
||||
y_coords = boxes_tensor[:,:,:,[1, 3]]
|
||||
y_coords[y_coords >= self.img_height] = self.img_height - 1
|
||||
y_coords[y_coords < 0] = 0
|
||||
boxes_tensor[:,:,:,[1, 3]] = y_coords
|
||||
|
||||
# If `normalize_coords` is enabled, normalize the coordinates to be within [0,1]
|
||||
if self.normalize_coords:
|
||||
boxes_tensor[:, :, :, [0, 2]] /= self.img_width
|
||||
boxes_tensor[:, :, :, [1, 3]] /= self.img_height
|
||||
|
||||
# TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth.
|
||||
if self.coords == 'centroids':
|
||||
# Convert `(xmin, ymin, xmax, ymax)` back to `(cx, cy, w, h)`.
|
||||
boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2centroids', border_pixels='half')
|
||||
elif self.coords == 'minmax':
|
||||
# Convert `(xmin, ymin, xmax, ymax)` to `(xmin, xmax, ymin, ymax).
|
||||
boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2minmax', border_pixels='half')
|
||||
|
||||
# Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape
|
||||
# as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis.
|
||||
variances_tensor = np.zeros_like(boxes_tensor) # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)`
|
||||
variances_tensor += self.variances # Long live broadcasting
|
||||
# Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)`
|
||||
boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1)
|
||||
|
||||
# Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along
|
||||
# The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)`
|
||||
boxes_tensor = np.expand_dims(boxes_tensor, axis=0)
|
||||
boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1))
|
||||
|
||||
return boxes_tensor
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
if K.image_dim_ordering() == 'tf':
|
||||
batch_size, feature_map_height, feature_map_width, feature_map_channels = input_shape
|
||||
else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future
|
||||
batch_size, feature_map_channels, feature_map_height, feature_map_width = input_shape
|
||||
return (batch_size, feature_map_height, feature_map_width, self.n_boxes, 8)
|
||||
|
||||
def get_config(self):
|
||||
config = {
|
||||
'img_height': self.img_height,
|
||||
'img_width': self.img_width,
|
||||
'this_scale': self.this_scale,
|
||||
'next_scale': self.next_scale,
|
||||
'aspect_ratios': list(self.aspect_ratios),
|
||||
'two_boxes_for_ar1': self.two_boxes_for_ar1,
|
||||
'clip_boxes': self.clip_boxes,
|
||||
'variances': list(self.variances),
|
||||
'coords': self.coords,
|
||||
'normalize_coords': self.normalize_coords
|
||||
}
|
||||
base_config = super(AnchorBoxes, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
BIN
ssd_keras-master/keras_layers/keras_layer_AnchorBoxes.pyc
Normal file
BIN
ssd_keras-master/keras_layers/keras_layer_AnchorBoxes.pyc
Normal file
Binary file not shown.
283
ssd_keras-master/keras_layers/keras_layer_DecodeDetections.py
Normal file
283
ssd_keras-master/keras_layers/keras_layer_DecodeDetections.py
Normal file
@@ -0,0 +1,283 @@
|
||||
'''
|
||||
A custom Keras layer to decode the raw SSD prediction output. Corresponds to the
|
||||
`DetectionOutput` layer type in the original Caffe implementation of SSD.
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import keras.backend as K
|
||||
from keras.engine.topology import InputSpec
|
||||
from keras.engine.topology import Layer
|
||||
|
||||
class DecodeDetections(Layer):
|
||||
'''
|
||||
A Keras layer to decode the raw SSD prediction output.
|
||||
|
||||
Input shape:
|
||||
3D tensor of shape `(batch_size, n_boxes, n_classes + 12)`.
|
||||
|
||||
Output shape:
|
||||
3D tensor of shape `(batch_size, top_k, 6)`.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
confidence_thresh=0.01,
|
||||
iou_threshold=0.45,
|
||||
top_k=200,
|
||||
nms_max_output_size=400,
|
||||
coords='centroids',
|
||||
normalize_coords=True,
|
||||
img_height=None,
|
||||
img_width=None,
|
||||
**kwargs):
|
||||
'''
|
||||
All default argument values follow the Caffe implementation.
|
||||
|
||||
Arguments:
|
||||
confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
|
||||
positive class in order to be considered for the non-maximum suppression stage for the respective class.
|
||||
A lower value will result in a larger part of the selection process being done by the non-maximum suppression
|
||||
stage, while a larger value will result in a larger part of the selection process happening in the confidence
|
||||
thresholding stage.
|
||||
iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold`
|
||||
with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
|
||||
to the box score.
|
||||
top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
|
||||
non-maximum suppression stage.
|
||||
nms_max_output_size (int, optional): The maximum number of predictions that will be left after performing non-maximum
|
||||
suppression.
|
||||
coords (str, optional): The box coordinate format that the model outputs. Must be 'centroids'
|
||||
i.e. the format `(cx, cy, w, h)` (box center coordinates, width, and height). Other coordinate formats are
|
||||
currently not supported.
|
||||
normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1])
|
||||
and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs
|
||||
relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`.
|
||||
Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect
|
||||
coordinates. Requires `img_height` and `img_width` if set to `True`.
|
||||
img_height (int, optional): The height of the input images. Only needed if `normalize_coords` is `True`.
|
||||
img_width (int, optional): The width of the input images. Only needed if `normalize_coords` is `True`.
|
||||
'''
|
||||
if K.backend() != 'tensorflow':
|
||||
raise TypeError("This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend()))
|
||||
|
||||
if normalize_coords and ((img_height is None) or (img_width is None)):
|
||||
raise ValueError("If relative box coordinates are supposed to be converted to absolute coordinates, the decoder needs the image size in order to decode the predictions, but `img_height == {}` and `img_width == {}`".format(img_height, img_width))
|
||||
|
||||
if coords != 'centroids':
|
||||
raise ValueError("The DetectionOutput layer currently only supports the 'centroids' coordinate format.")
|
||||
|
||||
# We need these members for the config.
|
||||
self.confidence_thresh = confidence_thresh
|
||||
self.iou_threshold = iou_threshold
|
||||
self.top_k = top_k
|
||||
self.normalize_coords = normalize_coords
|
||||
self.img_height = img_height
|
||||
self.img_width = img_width
|
||||
self.coords = coords
|
||||
self.nms_max_output_size = nms_max_output_size
|
||||
|
||||
# We need these members for TensorFlow.
|
||||
self.tf_confidence_thresh = tf.constant(self.confidence_thresh, name='confidence_thresh')
|
||||
self.tf_iou_threshold = tf.constant(self.iou_threshold, name='iou_threshold')
|
||||
self.tf_top_k = tf.constant(self.top_k, name='top_k')
|
||||
self.tf_normalize_coords = tf.constant(self.normalize_coords, name='normalize_coords')
|
||||
self.tf_img_height = tf.constant(self.img_height, dtype=tf.float32, name='img_height')
|
||||
self.tf_img_width = tf.constant(self.img_width, dtype=tf.float32, name='img_width')
|
||||
self.tf_nms_max_output_size = tf.constant(self.nms_max_output_size, name='nms_max_output_size')
|
||||
|
||||
super(DecodeDetections, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
super(DecodeDetections, self).build(input_shape)
|
||||
|
||||
def call(self, y_pred, mask=None):
|
||||
'''
|
||||
Returns:
|
||||
3D tensor of shape `(batch_size, top_k, 6)`. The second axis is zero-padded
|
||||
to always yield `top_k` predictions per batch item. The last axis contains
|
||||
the coordinates for each predicted box in the format
|
||||
`[class_id, confidence, xmin, ymin, xmax, ymax]`.
|
||||
'''
|
||||
|
||||
#####################################################################################
|
||||
# 1. Convert the box coordinates from predicted anchor box offsets to predicted
|
||||
# absolute coordinates
|
||||
#####################################################################################
|
||||
|
||||
# Convert anchor box offsets to image offsets.
|
||||
cx = y_pred[...,-12] * y_pred[...,-4] * y_pred[...,-6] + y_pred[...,-8] # cx = cx_pred * cx_variance * w_anchor + cx_anchor
|
||||
cy = y_pred[...,-11] * y_pred[...,-3] * y_pred[...,-5] + y_pred[...,-7] # cy = cy_pred * cy_variance * h_anchor + cy_anchor
|
||||
w = tf.exp(y_pred[...,-10] * y_pred[...,-2]) * y_pred[...,-6] # w = exp(w_pred * variance_w) * w_anchor
|
||||
h = tf.exp(y_pred[...,-9] * y_pred[...,-1]) * y_pred[...,-5] # h = exp(h_pred * variance_h) * h_anchor
|
||||
|
||||
# Convert 'centroids' to 'corners'.
|
||||
xmin = cx - 0.5 * w
|
||||
ymin = cy - 0.5 * h
|
||||
xmax = cx + 0.5 * w
|
||||
ymax = cy + 0.5 * h
|
||||
|
||||
# If the model predicts box coordinates relative to the image dimensions and they are supposed
|
||||
# to be converted back to absolute coordinates, do that.
|
||||
def normalized_coords():
|
||||
xmin1 = tf.expand_dims(xmin * self.tf_img_width, axis=-1)
|
||||
ymin1 = tf.expand_dims(ymin * self.tf_img_height, axis=-1)
|
||||
xmax1 = tf.expand_dims(xmax * self.tf_img_width, axis=-1)
|
||||
ymax1 = tf.expand_dims(ymax * self.tf_img_height, axis=-1)
|
||||
return xmin1, ymin1, xmax1, ymax1
|
||||
def non_normalized_coords():
|
||||
return tf.expand_dims(xmin, axis=-1), tf.expand_dims(ymin, axis=-1), tf.expand_dims(xmax, axis=-1), tf.expand_dims(ymax, axis=-1)
|
||||
|
||||
xmin, ymin, xmax, ymax = tf.cond(self.tf_normalize_coords, normalized_coords, non_normalized_coords)
|
||||
|
||||
# Concatenate the one-hot class confidences and the converted box coordinates to form the decoded predictions tensor.
|
||||
y_pred = tf.concat(values=[y_pred[...,:-12], xmin, ymin, xmax, ymax], axis=-1)
|
||||
|
||||
#####################################################################################
|
||||
# 2. Perform confidence thresholding, per-class non-maximum suppression, and
|
||||
# top-k filtering.
|
||||
#####################################################################################
|
||||
|
||||
batch_size = tf.shape(y_pred)[0] # Output dtype: tf.int32
|
||||
n_boxes = tf.shape(y_pred)[1]
|
||||
n_classes = y_pred.shape[2] - 4
|
||||
class_indices = tf.range(1, n_classes)
|
||||
|
||||
# Create a function that filters the predictions for the given batch item. Specifically, it performs:
|
||||
# - confidence thresholding
|
||||
# - non-maximum suppression (NMS)
|
||||
# - top-k filtering
|
||||
def filter_predictions(batch_item):
|
||||
|
||||
# Create a function that filters the predictions for one single class.
|
||||
def filter_single_class(index):
|
||||
|
||||
# From a tensor of shape (n_boxes, n_classes + 4 coordinates) extract
|
||||
# a tensor of shape (n_boxes, 1 + 4 coordinates) that contains the
|
||||
# confidnece values for just one class, determined by `index`.
|
||||
confidences = tf.expand_dims(batch_item[..., index], axis=-1)
|
||||
class_id = tf.fill(dims=tf.shape(confidences), value=tf.to_float(index))
|
||||
box_coordinates = batch_item[...,-4:]
|
||||
|
||||
single_class = tf.concat([class_id, confidences, box_coordinates], axis=-1)
|
||||
|
||||
# Apply confidence thresholding with respect to the class defined by `index`.
|
||||
threshold_met = single_class[:,1] > self.tf_confidence_thresh
|
||||
single_class = tf.boolean_mask(tensor=single_class,
|
||||
mask=threshold_met)
|
||||
|
||||
# If any boxes made the threshold, perform NMS.
|
||||
def perform_nms():
|
||||
scores = single_class[...,1]
|
||||
|
||||
# `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`.
|
||||
xmin = tf.expand_dims(single_class[...,-4], axis=-1)
|
||||
ymin = tf.expand_dims(single_class[...,-3], axis=-1)
|
||||
xmax = tf.expand_dims(single_class[...,-2], axis=-1)
|
||||
ymax = tf.expand_dims(single_class[...,-1], axis=-1)
|
||||
boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1)
|
||||
|
||||
maxima_indices = tf.image.non_max_suppression(boxes=boxes,
|
||||
scores=scores,
|
||||
max_output_size=self.tf_nms_max_output_size,
|
||||
iou_threshold=self.iou_threshold,
|
||||
name='non_maximum_suppresion')
|
||||
maxima = tf.gather(params=single_class,
|
||||
indices=maxima_indices,
|
||||
axis=0)
|
||||
return maxima
|
||||
|
||||
def no_confident_predictions():
|
||||
return tf.constant(value=0.0, shape=(1,6))
|
||||
|
||||
single_class_nms = tf.cond(tf.equal(tf.size(single_class), 0), no_confident_predictions, perform_nms)
|
||||
|
||||
# Make sure `single_class` is exactly `self.nms_max_output_size` elements long.
|
||||
padded_single_class = tf.pad(tensor=single_class_nms,
|
||||
paddings=[[0, self.tf_nms_max_output_size - tf.shape(single_class_nms)[0]], [0, 0]],
|
||||
mode='CONSTANT',
|
||||
constant_values=0.0)
|
||||
|
||||
return padded_single_class
|
||||
|
||||
# Iterate `filter_single_class()` over all class indices.
|
||||
filtered_single_classes = tf.map_fn(fn=lambda i: filter_single_class(i),
|
||||
elems=tf.range(1,n_classes),
|
||||
dtype=tf.float32,
|
||||
parallel_iterations=128,
|
||||
back_prop=False,
|
||||
swap_memory=False,
|
||||
infer_shape=True,
|
||||
name='loop_over_classes')
|
||||
|
||||
# Concatenate the filtered results for all individual classes to one tensor.
|
||||
filtered_predictions = tf.reshape(tensor=filtered_single_classes, shape=(-1,6))
|
||||
|
||||
# Perform top-k filtering for this batch item or pad it in case there are
|
||||
# fewer than `self.top_k` boxes left at this point. Either way, produce a
|
||||
# tensor of length `self.top_k`. By the time we return the final results tensor
|
||||
# for the whole batch, all batch items must have the same number of predicted
|
||||
# boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k`
|
||||
# predictions are left after the filtering process above, we pad the missing
|
||||
# predictions with zeros as dummy entries.
|
||||
def top_k():
|
||||
return tf.gather(params=filtered_predictions,
|
||||
indices=tf.nn.top_k(filtered_predictions[:, 1], k=self.tf_top_k, sorted=True).indices,
|
||||
axis=0)
|
||||
def pad_and_top_k():
|
||||
padded_predictions = tf.pad(tensor=filtered_predictions,
|
||||
paddings=[[0, self.tf_top_k - tf.shape(filtered_predictions)[0]], [0, 0]],
|
||||
mode='CONSTANT',
|
||||
constant_values=0.0)
|
||||
return tf.gather(params=padded_predictions,
|
||||
indices=tf.nn.top_k(padded_predictions[:, 1], k=self.tf_top_k, sorted=True).indices,
|
||||
axis=0)
|
||||
|
||||
top_k_boxes = tf.cond(tf.greater_equal(tf.shape(filtered_predictions)[0], self.tf_top_k), top_k, pad_and_top_k)
|
||||
|
||||
return top_k_boxes
|
||||
|
||||
# Iterate `filter_predictions()` over all batch items.
|
||||
output_tensor = tf.map_fn(fn=lambda x: filter_predictions(x),
|
||||
elems=y_pred,
|
||||
dtype=None,
|
||||
parallel_iterations=128,
|
||||
back_prop=False,
|
||||
swap_memory=False,
|
||||
infer_shape=True,
|
||||
name='loop_over_batch')
|
||||
|
||||
return output_tensor
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
batch_size, n_boxes, last_axis = input_shape
|
||||
return (batch_size, self.tf_top_k, 6) # Last axis: (class_ID, confidence, 4 box coordinates)
|
||||
|
||||
def get_config(self):
|
||||
config = {
|
||||
'confidence_thresh': self.confidence_thresh,
|
||||
'iou_threshold': self.iou_threshold,
|
||||
'top_k': self.top_k,
|
||||
'nms_max_output_size': self.nms_max_output_size,
|
||||
'coords': self.coords,
|
||||
'normalize_coords': self.normalize_coords,
|
||||
'img_height': self.img_height,
|
||||
'img_width': self.img_width,
|
||||
}
|
||||
base_config = super(DecodeDetections, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
BIN
ssd_keras-master/keras_layers/keras_layer_DecodeDetections.pyc
Normal file
BIN
ssd_keras-master/keras_layers/keras_layer_DecodeDetections.pyc
Normal file
Binary file not shown.
@@ -0,0 +1,266 @@
|
||||
'''
|
||||
A custom Keras layer to decode the raw SSD prediction output. This is a modified
|
||||
and more efficient version of the `DetectionOutput` layer type in the original Caffe
|
||||
implementation of SSD. For a faithful replication of the original layer, please
|
||||
refer to the `DecodeDetections` layer.
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import keras.backend as K
|
||||
from keras.engine.topology import InputSpec
|
||||
from keras.engine.topology import Layer
|
||||
|
||||
class DecodeDetectionsFast(Layer):
|
||||
'''
|
||||
A Keras layer to decode the raw SSD prediction output.
|
||||
|
||||
Input shape:
|
||||
3D tensor of shape `(batch_size, n_boxes, n_classes + 12)`.
|
||||
|
||||
Output shape:
|
||||
3D tensor of shape `(batch_size, top_k, 6)`.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
confidence_thresh=0.01,
|
||||
iou_threshold=0.45,
|
||||
top_k=200,
|
||||
nms_max_output_size=400,
|
||||
coords='centroids',
|
||||
normalize_coords=True,
|
||||
img_height=None,
|
||||
img_width=None,
|
||||
**kwargs):
|
||||
'''
|
||||
All default argument values follow the Caffe implementation.
|
||||
|
||||
Arguments:
|
||||
confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
|
||||
positive class in order to be considered for the non-maximum suppression stage for the respective class.
|
||||
A lower value will result in a larger part of the selection process being done by the non-maximum suppression
|
||||
stage, while a larger value will result in a larger part of the selection process happening in the confidence
|
||||
thresholding stage.
|
||||
iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold`
|
||||
with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
|
||||
to the box score.
|
||||
top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
|
||||
non-maximum suppression stage.
|
||||
nms_max_output_size (int, optional): The maximum number of predictions that will be left after performing non-maximum
|
||||
suppression.
|
||||
coords (str, optional): The box coordinate format that the model outputs. Must be 'centroids'
|
||||
i.e. the format `(cx, cy, w, h)` (box center coordinates, width, and height). Other coordinate formats are
|
||||
currently not supported.
|
||||
normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1])
|
||||
and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs
|
||||
relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`.
|
||||
Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect
|
||||
coordinates. Requires `img_height` and `img_width` if set to `True`.
|
||||
img_height (int, optional): The height of the input images. Only needed if `normalize_coords` is `True`.
|
||||
img_width (int, optional): The width of the input images. Only needed if `normalize_coords` is `True`.
|
||||
'''
|
||||
if K.backend() != 'tensorflow':
|
||||
raise TypeError("This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend()))
|
||||
|
||||
if normalize_coords and ((img_height is None) or (img_width is None)):
|
||||
raise ValueError("If relative box coordinates are supposed to be converted to absolute coordinates, the decoder needs the image size in order to decode the predictions, but `img_height == {}` and `img_width == {}`".format(img_height, img_width))
|
||||
|
||||
if coords != 'centroids':
|
||||
raise ValueError("The DetectionOutput layer currently only supports the 'centroids' coordinate format.")
|
||||
|
||||
# We need these members for the config.
|
||||
self.confidence_thresh = confidence_thresh
|
||||
self.iou_threshold = iou_threshold
|
||||
self.top_k = top_k
|
||||
self.normalize_coords = normalize_coords
|
||||
self.img_height = img_height
|
||||
self.img_width = img_width
|
||||
self.coords = coords
|
||||
self.nms_max_output_size = nms_max_output_size
|
||||
|
||||
# We need these members for TensorFlow.
|
||||
self.tf_confidence_thresh = tf.constant(self.confidence_thresh, name='confidence_thresh')
|
||||
self.tf_iou_threshold = tf.constant(self.iou_threshold, name='iou_threshold')
|
||||
self.tf_top_k = tf.constant(self.top_k, name='top_k')
|
||||
self.tf_normalize_coords = tf.constant(self.normalize_coords, name='normalize_coords')
|
||||
self.tf_img_height = tf.constant(self.img_height, dtype=tf.float32, name='img_height')
|
||||
self.tf_img_width = tf.constant(self.img_width, dtype=tf.float32, name='img_width')
|
||||
self.tf_nms_max_output_size = tf.constant(self.nms_max_output_size, name='nms_max_output_size')
|
||||
|
||||
super(DecodeDetectionsFast, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
super(DecodeDetectionsFast, self).build(input_shape)
|
||||
|
||||
def call(self, y_pred, mask=None):
|
||||
'''
|
||||
Returns:
|
||||
3D tensor of shape `(batch_size, top_k, 6)`. The second axis is zero-padded
|
||||
to always yield `top_k` predictions per batch item. The last axis contains
|
||||
the coordinates for each predicted box in the format
|
||||
`[class_id, confidence, xmin, ymin, xmax, ymax]`.
|
||||
'''
|
||||
|
||||
#####################################################################################
|
||||
# 1. Convert the box coordinates from predicted anchor box offsets to predicted
|
||||
# absolute coordinates
|
||||
#####################################################################################
|
||||
|
||||
# Extract the predicted class IDs as the indices of the highest confidence values.
|
||||
class_ids = tf.expand_dims(tf.to_float(tf.argmax(y_pred[...,:-12], axis=-1)), axis=-1)
|
||||
# Extract the confidences of the maximal classes.
|
||||
confidences = tf.reduce_max(y_pred[...,:-12], axis=-1, keep_dims=True)
|
||||
|
||||
# Convert anchor box offsets to image offsets.
|
||||
cx = y_pred[...,-12] * y_pred[...,-4] * y_pred[...,-6] + y_pred[...,-8] # cx = cx_pred * cx_variance * w_anchor + cx_anchor
|
||||
cy = y_pred[...,-11] * y_pred[...,-3] * y_pred[...,-5] + y_pred[...,-7] # cy = cy_pred * cy_variance * h_anchor + cy_anchor
|
||||
w = tf.exp(y_pred[...,-10] * y_pred[...,-2]) * y_pred[...,-6] # w = exp(w_pred * variance_w) * w_anchor
|
||||
h = tf.exp(y_pred[...,-9] * y_pred[...,-1]) * y_pred[...,-5] # h = exp(h_pred * variance_h) * h_anchor
|
||||
|
||||
# Convert 'centroids' to 'corners'.
|
||||
xmin = cx - 0.5 * w
|
||||
ymin = cy - 0.5 * h
|
||||
xmax = cx + 0.5 * w
|
||||
ymax = cy + 0.5 * h
|
||||
|
||||
# If the model predicts box coordinates relative to the image dimensions and they are supposed
|
||||
# to be converted back to absolute coordinates, do that.
|
||||
def normalized_coords():
|
||||
xmin1 = tf.expand_dims(xmin * self.tf_img_width, axis=-1)
|
||||
ymin1 = tf.expand_dims(ymin * self.tf_img_height, axis=-1)
|
||||
xmax1 = tf.expand_dims(xmax * self.tf_img_width, axis=-1)
|
||||
ymax1 = tf.expand_dims(ymax * self.tf_img_height, axis=-1)
|
||||
return xmin1, ymin1, xmax1, ymax1
|
||||
def non_normalized_coords():
|
||||
return tf.expand_dims(xmin, axis=-1), tf.expand_dims(ymin, axis=-1), tf.expand_dims(xmax, axis=-1), tf.expand_dims(ymax, axis=-1)
|
||||
|
||||
xmin, ymin, xmax, ymax = tf.cond(self.tf_normalize_coords, normalized_coords, non_normalized_coords)
|
||||
|
||||
# Concatenate the one-hot class confidences and the converted box coordinates to form the decoded predictions tensor.
|
||||
y_pred = tf.concat(values=[class_ids, confidences, xmin, ymin, xmax, ymax], axis=-1)
|
||||
|
||||
#####################################################################################
|
||||
# 2. Perform confidence thresholding, non-maximum suppression, and top-k filtering.
|
||||
#####################################################################################
|
||||
|
||||
batch_size = tf.shape(y_pred)[0] # Output dtype: tf.int32
|
||||
n_boxes = tf.shape(y_pred)[1]
|
||||
n_classes = y_pred.shape[2] - 4
|
||||
class_indices = tf.range(1, n_classes)
|
||||
|
||||
# Create a function that filters the predictions for the given batch item. Specifically, it performs:
|
||||
# - confidence thresholding
|
||||
# - non-maximum suppression (NMS)
|
||||
# - top-k filtering
|
||||
def filter_predictions(batch_item):
|
||||
|
||||
# Keep only the non-background boxes.
|
||||
positive_boxes = tf.not_equal(batch_item[...,0], 0.0)
|
||||
predictions = tf.boolean_mask(tensor=batch_item,
|
||||
mask=positive_boxes)
|
||||
|
||||
def perform_confidence_thresholding():
|
||||
# Apply confidence thresholding.
|
||||
threshold_met = predictions[:,1] > self.tf_confidence_thresh
|
||||
return tf.boolean_mask(tensor=predictions,
|
||||
mask=threshold_met)
|
||||
def no_positive_boxes():
|
||||
return tf.constant(value=0.0, shape=(1,6))
|
||||
|
||||
# If there are any positive predictions, perform confidence thresholding.
|
||||
predictions_conf_thresh = tf.cond(tf.equal(tf.size(predictions), 0), no_positive_boxes, perform_confidence_thresholding)
|
||||
|
||||
def perform_nms():
|
||||
scores = predictions_conf_thresh[...,1]
|
||||
|
||||
# `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`.
|
||||
xmin = tf.expand_dims(predictions_conf_thresh[...,-4], axis=-1)
|
||||
ymin = tf.expand_dims(predictions_conf_thresh[...,-3], axis=-1)
|
||||
xmax = tf.expand_dims(predictions_conf_thresh[...,-2], axis=-1)
|
||||
ymax = tf.expand_dims(predictions_conf_thresh[...,-1], axis=-1)
|
||||
boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1)
|
||||
|
||||
maxima_indices = tf.image.non_max_suppression(boxes=boxes,
|
||||
scores=scores,
|
||||
max_output_size=self.tf_nms_max_output_size,
|
||||
iou_threshold=self.iou_threshold,
|
||||
name='non_maximum_suppresion')
|
||||
maxima = tf.gather(params=predictions_conf_thresh,
|
||||
indices=maxima_indices,
|
||||
axis=0)
|
||||
return maxima
|
||||
def no_confident_predictions():
|
||||
return tf.constant(value=0.0, shape=(1,6))
|
||||
|
||||
# If any boxes made the threshold, perform NMS.
|
||||
predictions_nms = tf.cond(tf.equal(tf.size(predictions_conf_thresh), 0), no_confident_predictions, perform_nms)
|
||||
|
||||
# Perform top-k filtering for this batch item or pad it in case there are
|
||||
# fewer than `self.top_k` boxes left at this point. Either way, produce a
|
||||
# tensor of length `self.top_k`. By the time we return the final results tensor
|
||||
# for the whole batch, all batch items must have the same number of predicted
|
||||
# boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k`
|
||||
# predictions are left after the filtering process above, we pad the missing
|
||||
# predictions with zeros as dummy entries.
|
||||
def top_k():
|
||||
return tf.gather(params=predictions_nms,
|
||||
indices=tf.nn.top_k(predictions_nms[:, 1], k=self.tf_top_k, sorted=True).indices,
|
||||
axis=0)
|
||||
def pad_and_top_k():
|
||||
padded_predictions = tf.pad(tensor=predictions_nms,
|
||||
paddings=[[0, self.tf_top_k - tf.shape(predictions_nms)[0]], [0, 0]],
|
||||
mode='CONSTANT',
|
||||
constant_values=0.0)
|
||||
return tf.gather(params=padded_predictions,
|
||||
indices=tf.nn.top_k(padded_predictions[:, 1], k=self.tf_top_k, sorted=True).indices,
|
||||
axis=0)
|
||||
|
||||
top_k_boxes = tf.cond(tf.greater_equal(tf.shape(predictions_nms)[0], self.tf_top_k), top_k, pad_and_top_k)
|
||||
|
||||
return top_k_boxes
|
||||
|
||||
# Iterate `filter_predictions()` over all batch items.
|
||||
output_tensor = tf.map_fn(fn=lambda x: filter_predictions(x),
|
||||
elems=y_pred,
|
||||
dtype=None,
|
||||
parallel_iterations=128,
|
||||
back_prop=False,
|
||||
swap_memory=False,
|
||||
infer_shape=True,
|
||||
name='loop_over_batch')
|
||||
|
||||
return output_tensor
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
batch_size, n_boxes, last_axis = input_shape
|
||||
return (batch_size, self.tf_top_k, 6) # Last axis: (class_ID, confidence, 4 box coordinates)
|
||||
|
||||
def get_config(self):
|
||||
config = {
|
||||
'confidence_thresh': self.confidence_thresh,
|
||||
'iou_threshold': self.iou_threshold,
|
||||
'top_k': self.top_k,
|
||||
'nms_max_output_size': self.nms_max_output_size,
|
||||
'coords': self.coords,
|
||||
'normalize_coords': self.normalize_coords,
|
||||
'img_height': self.img_height,
|
||||
'img_width': self.img_width,
|
||||
}
|
||||
base_config = super(DecodeDetectionsFast, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
Binary file not shown.
70
ssd_keras-master/keras_layers/keras_layer_L2Normalization.py
Normal file
70
ssd_keras-master/keras_layers/keras_layer_L2Normalization.py
Normal file
@@ -0,0 +1,70 @@
|
||||
'''
|
||||
A custom Keras layer to perform L2-normalization.
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
import keras.backend as K
|
||||
from keras.engine.topology import InputSpec
|
||||
from keras.engine.topology import Layer
|
||||
|
||||
class L2Normalization(Layer):
|
||||
'''
|
||||
Performs L2 normalization on the input tensor with a learnable scaling parameter
|
||||
as described in the paper "Parsenet: Looking Wider to See Better" (see references)
|
||||
and as used in the original SSD model.
|
||||
|
||||
Arguments:
|
||||
gamma_init (int): The initial scaling parameter. Defaults to 20 following the
|
||||
SSD paper.
|
||||
|
||||
Input shape:
|
||||
4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'`
|
||||
or `(batch, height, width, channels)` if `dim_ordering = 'tf'`.
|
||||
|
||||
Returns:
|
||||
The scaled tensor. Same shape as the input tensor.
|
||||
|
||||
References:
|
||||
http://cs.unc.edu/~wliu/papers/parsenet.pdf
|
||||
'''
|
||||
|
||||
def __init__(self, gamma_init=20, **kwargs):
|
||||
if K.image_dim_ordering() == 'tf':
|
||||
self.axis = 3
|
||||
else:
|
||||
self.axis = 1
|
||||
self.gamma_init = gamma_init
|
||||
super(L2Normalization, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
gamma = self.gamma_init * np.ones((input_shape[self.axis],))
|
||||
self.gamma = K.variable(gamma, name='{}_gamma'.format(self.name))
|
||||
self.trainable_weights = [self.gamma]
|
||||
super(L2Normalization, self).build(input_shape)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
output = K.l2_normalize(x, self.axis)
|
||||
return output * self.gamma
|
||||
|
||||
def get_config(self):
|
||||
config = {
|
||||
'gamma_init': self.gamma_init
|
||||
}
|
||||
base_config = super(L2Normalization, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
BIN
ssd_keras-master/keras_layers/keras_layer_L2Normalization.pyc
Normal file
BIN
ssd_keras-master/keras_layers/keras_layer_L2Normalization.pyc
Normal file
Binary file not shown.
Reference in New Issue
Block a user