Summary
This commit is contained in:
200
ssd_keras-master/eval_utils/coco_utils.py
Normal file
200
ssd_keras-master/eval_utils/coco_utils.py
Normal file
@@ -0,0 +1,200 @@
|
||||
'''
|
||||
A few utilities that are useful when working with the MS COCO datasets.
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
import json
|
||||
from tqdm import trange
|
||||
from math import ceil
|
||||
import sys
|
||||
|
||||
from data_generator.object_detection_2d_geometric_ops import Resize
|
||||
from data_generator.object_detection_2d_patch_sampling_ops import RandomPadFixedAR
|
||||
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
|
||||
from ssd_encoder_decoder.ssd_output_decoder import decode_detections
|
||||
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms
|
||||
|
||||
def get_coco_category_maps(annotations_file):
|
||||
'''
|
||||
Builds dictionaries that map between MS COCO category IDs, transformed category IDs, and category names.
|
||||
The original MS COCO category IDs are not consecutive unfortunately: The 80 category IDs are spread
|
||||
across the integers 1 through 90 with some integers skipped. Since we usually use a one-hot
|
||||
class representation in neural networks, we need to map these non-consecutive original COCO category
|
||||
IDs (let's call them 'cats') to consecutive category IDs (let's call them 'classes').
|
||||
|
||||
Arguments:
|
||||
annotations_file (str): The filepath to any MS COCO annotations JSON file.
|
||||
|
||||
Returns:
|
||||
1) cats_to_classes: A dictionary that maps between the original (keys) and the transformed category IDs (values).
|
||||
2) classes_to_cats: A dictionary that maps between the transformed (keys) and the original category IDs (values).
|
||||
3) cats_to_names: A dictionary that maps between original category IDs (keys) and the respective category names (values).
|
||||
4) classes_to_names: A list of the category names (values) with their indices representing the transformed IDs.
|
||||
'''
|
||||
with open(annotations_file, 'r') as f:
|
||||
annotations = json.load(f)
|
||||
cats_to_classes = {}
|
||||
classes_to_cats = {}
|
||||
cats_to_names = {}
|
||||
classes_to_names = []
|
||||
classes_to_names.append('background') # Need to add the background class first so that the indexing is right.
|
||||
for i, cat in enumerate(annotations['categories']):
|
||||
cats_to_classes[cat['id']] = i + 1
|
||||
classes_to_cats[i + 1] = cat['id']
|
||||
cats_to_names[cat['id']] = cat['name']
|
||||
classes_to_names.append(cat['name'])
|
||||
|
||||
return cats_to_classes, classes_to_cats, cats_to_names, classes_to_names
|
||||
|
||||
def predict_all_to_json(out_file,
|
||||
model,
|
||||
img_height,
|
||||
img_width,
|
||||
classes_to_cats,
|
||||
data_generator,
|
||||
batch_size,
|
||||
data_generator_mode='resize',
|
||||
model_mode='training',
|
||||
confidence_thresh=0.01,
|
||||
iou_threshold=0.45,
|
||||
top_k=200,
|
||||
pred_coords='centroids',
|
||||
normalize_coords=True):
|
||||
'''
|
||||
Runs detection predictions over the whole dataset given a model and saves them in a JSON file
|
||||
in the MS COCO detection results format.
|
||||
|
||||
Arguments:
|
||||
out_file (str): The file name (full path) under which to save the results JSON file.
|
||||
model (Keras model): A Keras SSD model object.
|
||||
img_height (int): The input image height for the model.
|
||||
img_width (int): The input image width for the model.
|
||||
classes_to_cats (dict): A dictionary that maps the consecutive class IDs predicted by the model
|
||||
to the non-consecutive original MS COCO category IDs.
|
||||
data_generator (DataGenerator): A `DataGenerator` object with the evaluation dataset.
|
||||
batch_size (int): The batch size for the evaluation.
|
||||
data_generator_mode (str, optional): Either of 'resize' or 'pad'. If 'resize', the input images will
|
||||
be resized (i.e. warped) to `(img_height, img_width)`. This mode does not preserve the aspect ratios of the images.
|
||||
If 'pad', the input images will be first padded so that they have the aspect ratio defined by `img_height`
|
||||
and `img_width` and then resized to `(img_height, img_width)`. This mode preserves the aspect ratios of the images.
|
||||
model_mode (str, optional): The mode in which the model was created, i.e. 'training', 'inference' or 'inference_fast'.
|
||||
This is needed in order to know whether the model output is already decoded or still needs to be decoded. Refer to
|
||||
the model documentation for the meaning of the individual modes.
|
||||
confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
|
||||
positive class in order to be considered for the non-maximum suppression stage for the respective class.
|
||||
A lower value will result in a larger part of the selection process being done by the non-maximum suppression
|
||||
stage, while a larger value will result in a larger part of the selection process happening in the confidence
|
||||
thresholding stage.
|
||||
iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold`
|
||||
with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
|
||||
to the box score.
|
||||
top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
|
||||
non-maximum suppression stage. Defaults to 200, following the paper.
|
||||
input_coords (str, optional): The box coordinate format that the model outputs. Can be either 'centroids'
|
||||
for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format
|
||||
`(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`.
|
||||
normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1])
|
||||
and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs
|
||||
relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`.
|
||||
Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect
|
||||
coordinates. Requires `img_height` and `img_width` if set to `True`.
|
||||
|
||||
Returns:
|
||||
None.
|
||||
'''
|
||||
|
||||
convert_to_3_channels = ConvertTo3Channels()
|
||||
resize = Resize(height=img_height,width=img_width)
|
||||
if data_generator_mode == 'resize':
|
||||
transformations = [convert_to_3_channels,
|
||||
resize]
|
||||
elif data_generator_mode == 'pad':
|
||||
random_pad = RandomPadFixedAR(patch_aspect_ratio=img_width/img_height, clip_boxes=False)
|
||||
transformations = [convert_to_3_channels,
|
||||
random_pad,
|
||||
resize]
|
||||
else:
|
||||
raise ValueError("Unexpected argument value: `data_generator_mode` can be either of 'resize' or 'pad', but received '{}'.".format(data_generator_mode))
|
||||
|
||||
# Set the generator parameters.
|
||||
generator = data_generator.generate(batch_size=batch_size,
|
||||
shuffle=False,
|
||||
transformations=transformations,
|
||||
label_encoder=None,
|
||||
returns={'processed_images',
|
||||
'image_ids',
|
||||
'inverse_transform'},
|
||||
keep_images_without_gt=True)
|
||||
# Put the results in this list.
|
||||
results = []
|
||||
# Compute the number of batches to iterate over the entire dataset.
|
||||
n_images = data_generator.get_dataset_size()
|
||||
print("Number of images in the evaluation dataset: {}".format(n_images))
|
||||
n_batches = int(ceil(n_images / batch_size))
|
||||
# Loop over all batches.
|
||||
tr = trange(n_batches, file=sys.stdout)
|
||||
tr.set_description('Producing results file')
|
||||
for i in tr:
|
||||
# Generate batch.
|
||||
batch_X, batch_image_ids, batch_inverse_transforms = next(generator)
|
||||
# Predict.
|
||||
y_pred = model.predict(batch_X)
|
||||
# If the model was created in 'training' mode, the raw predictions need to
|
||||
# be decoded and filtered, otherwise that's already taken care of.
|
||||
if model_mode == 'training':
|
||||
# Decode.
|
||||
y_pred = decode_detections(y_pred,
|
||||
confidence_thresh=confidence_thresh,
|
||||
iou_threshold=iou_threshold,
|
||||
top_k=top_k,
|
||||
input_coords=pred_coords,
|
||||
normalize_coords=normalize_coords,
|
||||
img_height=img_height,
|
||||
img_width=img_width)
|
||||
else:
|
||||
# Filter out the all-zeros dummy elements of `y_pred`.
|
||||
y_pred_filtered = []
|
||||
for i in range(len(y_pred)):
|
||||
y_pred_filtered.append(y_pred[i][y_pred[i,:,0] != 0])
|
||||
y_pred = y_pred_filtered
|
||||
# Convert the predicted box coordinates for the original images.
|
||||
y_pred = apply_inverse_transforms(y_pred, batch_inverse_transforms)
|
||||
|
||||
# Convert each predicted box into the results format.
|
||||
for k, batch_item in enumerate(y_pred):
|
||||
for box in batch_item:
|
||||
class_id = box[0]
|
||||
# Transform the consecutive class IDs back to the original COCO category IDs.
|
||||
cat_id = classes_to_cats[class_id]
|
||||
# Round the box coordinates to reduce the JSON file size.
|
||||
xmin = float(round(box[2], 1))
|
||||
ymin = float(round(box[3], 1))
|
||||
xmax = float(round(box[4], 1))
|
||||
ymax = float(round(box[5], 1))
|
||||
width = xmax - xmin
|
||||
height = ymax - ymin
|
||||
bbox = [xmin, ymin, width, height]
|
||||
result = {}
|
||||
result['image_id'] = batch_image_ids[k]
|
||||
result['category_id'] = cat_id
|
||||
result['score'] = float(round(box[1], 3))
|
||||
result['bbox'] = bbox
|
||||
results.append(result)
|
||||
|
||||
with open(out_file, 'w') as f:
|
||||
json.dump(results, f)
|
||||
|
||||
print("Prediction results saved in '{}'".format(out_file))
|
||||
Reference in New Issue
Block a user