337 lines
12 KiB
Python
337 lines
12 KiB
Python
import cv2
|
|
import numpy as np
|
|
import os
|
|
from .bbox import BoundBox, bbox_iou
|
|
from scipy.special import expit
|
|
import tensorflow as tf
|
|
|
|
def _sigmoid(x):
|
|
return expit(x)
|
|
|
|
def makedirs(path):
|
|
try:
|
|
os.makedirs(path)
|
|
except OSError:
|
|
if not os.path.isdir(path):
|
|
raise
|
|
|
|
def evaluate(model,
|
|
generator,
|
|
iou_threshold=0.5,
|
|
obj_thresh=0.5,
|
|
nms_thresh=0.45,
|
|
net_h=416,
|
|
net_w=416,
|
|
save_path=None):
|
|
""" Evaluate a given dataset using a given model.
|
|
code originally from https://github.com/fizyr/keras-retinanet
|
|
|
|
# Arguments
|
|
model : The model to evaluate.
|
|
generator : The generator that represents the dataset to evaluate.
|
|
iou_threshold : The threshold used to consider when a detection is positive or negative.
|
|
obj_thresh : The threshold used to distinguish between object and non-object
|
|
nms_thresh : The threshold used to determine whether two detections are duplicates
|
|
net_h : The height of the input image to the model, higher value results in better accuracy
|
|
net_w : The width of the input image to the model
|
|
save_path : The path to save images with visualized detections to.
|
|
# Returns
|
|
A dict mapping class names to mAP scores.
|
|
"""
|
|
# gather all detections and annotations
|
|
all_detections = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
|
|
all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
|
|
|
|
for i in range(generator.size()):
|
|
raw_image = [generator.load_image(i)]
|
|
|
|
# make the boxes and the labels
|
|
pred_boxes = get_yolo_boxes(model, raw_image, net_h, net_w, generator.get_anchors(), obj_thresh, nms_thresh)[0]
|
|
|
|
score = np.array([box.get_score() for box in pred_boxes])
|
|
pred_labels = np.array([box.label for box in pred_boxes])
|
|
|
|
if len(pred_boxes) > 0:
|
|
pred_boxes = np.array([[box.xmin, box.ymin, box.xmax, box.ymax, box.get_score()] for box in pred_boxes])
|
|
else:
|
|
pred_boxes = np.array([[]])
|
|
|
|
# sort the boxes and the labels according to scores
|
|
score_sort = np.argsort(-score)
|
|
pred_labels = pred_labels[score_sort]
|
|
pred_boxes = pred_boxes[score_sort]
|
|
|
|
# copy detections to all_detections
|
|
for label in range(generator.num_classes()):
|
|
all_detections[i][label] = pred_boxes[pred_labels == label, :]
|
|
|
|
annotations = generator.load_annotation(i)
|
|
|
|
# copy detections to all_annotations
|
|
for label in range(generator.num_classes()):
|
|
all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
|
|
|
|
# compute mAP by comparing all detections and all annotations
|
|
average_precisions = {}
|
|
|
|
for label in range(generator.num_classes()):
|
|
false_positives = np.zeros((0,))
|
|
true_positives = np.zeros((0,))
|
|
scores = np.zeros((0,))
|
|
num_annotations = 0.0
|
|
|
|
for i in range(generator.size()):
|
|
detections = all_detections[i][label]
|
|
annotations = all_annotations[i][label]
|
|
num_annotations += annotations.shape[0]
|
|
detected_annotations = []
|
|
|
|
for d in detections:
|
|
scores = np.append(scores, d[4])
|
|
|
|
if annotations.shape[0] == 0:
|
|
false_positives = np.append(false_positives, 1)
|
|
true_positives = np.append(true_positives, 0)
|
|
continue
|
|
|
|
overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations)
|
|
assigned_annotation = np.argmax(overlaps, axis=1)
|
|
max_overlap = overlaps[0, assigned_annotation]
|
|
|
|
if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
|
|
false_positives = np.append(false_positives, 0)
|
|
true_positives = np.append(true_positives, 1)
|
|
detected_annotations.append(assigned_annotation)
|
|
else:
|
|
false_positives = np.append(false_positives, 1)
|
|
true_positives = np.append(true_positives, 0)
|
|
|
|
# no annotations -> AP for this class is 0 (is this correct?)
|
|
if num_annotations == 0:
|
|
average_precisions[label] = 0
|
|
continue
|
|
|
|
# sort by score
|
|
indices = np.argsort(-scores)
|
|
false_positives = false_positives[indices]
|
|
true_positives = true_positives[indices]
|
|
|
|
# compute false positives and true positives
|
|
false_positives = np.cumsum(false_positives)
|
|
true_positives = np.cumsum(true_positives)
|
|
|
|
# compute recall and precision
|
|
recall = true_positives / num_annotations
|
|
precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
|
|
|
|
# compute average precision
|
|
average_precision = compute_ap(recall, precision)
|
|
average_precisions[label] = average_precision,num_annotations
|
|
|
|
return average_precisions
|
|
|
|
def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
|
|
if (float(net_w)/image_w) < (float(net_h)/image_h):
|
|
new_w = net_w
|
|
new_h = (image_h*net_w)/image_w
|
|
else:
|
|
new_h = net_w
|
|
new_w = (image_w*net_h)/image_h
|
|
|
|
for i in range(len(boxes)):
|
|
x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
|
|
y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
|
|
|
|
boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
|
|
boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
|
|
boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
|
|
boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
|
|
|
|
def do_nms(boxes, nms_thresh):
|
|
if len(boxes) > 0:
|
|
nb_class = len(boxes[0].classes)
|
|
else:
|
|
return
|
|
|
|
for c in range(nb_class):
|
|
sorted_indices = np.argsort([-box.classes[c] for box in boxes])
|
|
|
|
for i in range(len(sorted_indices)):
|
|
index_i = sorted_indices[i]
|
|
|
|
if boxes[index_i].classes[c] == 0: continue
|
|
|
|
for j in range(i+1, len(sorted_indices)):
|
|
index_j = sorted_indices[j]
|
|
|
|
if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
|
|
boxes[index_j].classes[c] = 0
|
|
|
|
def decode_netout(netout_old, anchors, obj_thresh, net_h, net_w):
|
|
grid_h, grid_w = netout_old.shape[:2]
|
|
nb_box = 3
|
|
#netout = netout.reshape((grid_h, grid_w, nb_box, -1))
|
|
netout_old = tf.reshape(netout_old, (grid_h, grid_w, nb_box, -1))
|
|
nb_class = netout_old.shape[-1] - 5
|
|
|
|
boxes = []
|
|
## Tensorflow v.2
|
|
#print(tf.shape(netout))
|
|
aux_1 = _sigmoid(netout_old[..., :2])
|
|
#print(tf.shape(aux_1))
|
|
aux_2 = _sigmoid(netout_old[..., 4])
|
|
#print(tf.shape(aux_2[..., np.newaxis]))
|
|
aux_3 = aux_2[..., np.newaxis] * _softmax(netout_old[..., 5:])
|
|
aux_4 = aux_3 * (aux_3 > obj_thresh)
|
|
#print(tf.shape(aux_4))
|
|
netout = tf.concat([aux_1,netout_old[..., 2:4] ,aux_2[..., np.newaxis], aux_4], 3)
|
|
#print(tf.shape(new_netout))
|
|
|
|
#netout[..., :2] = _sigmoid(netout[..., :2])
|
|
#netout[..., 4] = _sigmoid(netout[..., 4])
|
|
#netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
|
|
#netout[..., 5:] *= netout[..., 5:] > obj_thresh
|
|
|
|
for i in range(grid_h*grid_w):
|
|
row = i // grid_w
|
|
col = i % grid_w
|
|
|
|
for b in range(nb_box):
|
|
# 4th element is objectness score
|
|
objectness = netout[row, col, b, 4]
|
|
|
|
if(objectness <= obj_thresh): continue
|
|
|
|
# first 4 elements are x, y, w, and h
|
|
x, y, w, h = netout[row,col,b,:4]
|
|
|
|
x = (col + x) / grid_w # center position, unit: image width
|
|
y = (row + y) / grid_h # center position, unit: image height
|
|
w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
|
|
h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
|
|
|
|
# last elements are class probabilities
|
|
classes = np.array(netout[row,col,b,5:])
|
|
|
|
box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
|
|
|
|
boxes.append(box)
|
|
|
|
return boxes
|
|
|
|
def preprocess_input(image, net_h, net_w):
|
|
new_h, new_w, _ = image.shape
|
|
|
|
# determine the new size of the image
|
|
if (float(net_w)/new_w) < (float(net_h)/new_h):
|
|
new_h = (new_h * net_w)//new_w
|
|
new_w = net_w
|
|
else:
|
|
new_w = (new_w * net_h)//new_h
|
|
new_h = net_h
|
|
|
|
# resize the image to the new size
|
|
resized = cv2.resize(image[:,:,::-1]/255., (new_w, new_h))
|
|
|
|
# embed the image into the standard letter box
|
|
new_image = np.ones((net_h, net_w, 3)) * 0.5
|
|
new_image[(net_h-new_h)//2:(net_h+new_h)//2, (net_w-new_w)//2:(net_w+new_w)//2, :] = resized
|
|
new_image = np.expand_dims(new_image, 0)
|
|
|
|
return new_image
|
|
|
|
def normalize(image):
|
|
return image/255.
|
|
|
|
def get_yolo_boxes(model, images, net_h, net_w, anchors, obj_thresh, nms_thresh):
|
|
image_h, image_w, _ = images[0].shape
|
|
nb_images = len(images)
|
|
batch_input = np.zeros((nb_images, net_h, net_w, 3))
|
|
|
|
# preprocess the input
|
|
for i in range(nb_images):
|
|
batch_input[i] = preprocess_input(images[i], net_h, net_w)
|
|
|
|
# run the prediction
|
|
batch_output = model.predict_on_batch(batch_input)
|
|
batch_boxes = [None]*nb_images
|
|
|
|
for i in range(nb_images):
|
|
yolos = [batch_output[0][i], batch_output[1][i], batch_output[2][i]]
|
|
boxes = []
|
|
|
|
# decode the output of the network
|
|
for j in range(len(yolos)):
|
|
yolo_anchors = anchors[(2-j)*6:(3-j)*6] # config['model']['anchors']
|
|
boxes += decode_netout(yolos[j], yolo_anchors, obj_thresh, net_h, net_w)
|
|
|
|
# correct the sizes of the bounding boxes
|
|
correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)
|
|
|
|
# suppress non-maximal boxes
|
|
do_nms(boxes, nms_thresh)
|
|
|
|
batch_boxes[i] = boxes
|
|
|
|
return batch_boxes
|
|
|
|
def compute_overlap(a, b):
|
|
"""
|
|
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
|
|
Parameters
|
|
----------
|
|
a: (N, 4) ndarray of float
|
|
b: (K, 4) ndarray of float
|
|
Returns
|
|
-------
|
|
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
|
|
"""
|
|
area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
|
|
|
|
iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
|
|
ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
|
|
|
|
iw = np.maximum(iw, 0)
|
|
ih = np.maximum(ih, 0)
|
|
|
|
ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
|
|
|
|
ua = np.maximum(ua, np.finfo(float).eps)
|
|
|
|
intersection = iw * ih
|
|
|
|
return intersection / ua
|
|
|
|
def compute_ap(recall, precision):
|
|
""" Compute the average precision, given the recall and precision curves.
|
|
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
|
|
|
|
# Arguments
|
|
recall: The recall curve (list).
|
|
precision: The precision curve (list).
|
|
# Returns
|
|
The average precision as computed in py-faster-rcnn.
|
|
"""
|
|
# correct AP calculation
|
|
# first append sentinel values at the end
|
|
mrec = np.concatenate(([0.], recall, [1.]))
|
|
mpre = np.concatenate(([0.], precision, [0.]))
|
|
|
|
# compute the precision envelope
|
|
for i in range(mpre.size - 1, 0, -1):
|
|
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
|
|
|
|
# to calculate area under PR curve, look for points
|
|
# where X axis (recall) changes value
|
|
i = np.where(mrec[1:] != mrec[:-1])[0]
|
|
|
|
# and sum (\Delta recall) * prec
|
|
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
|
|
return ap
|
|
|
|
def _softmax(x, axis=-1):
|
|
x = x - np.amax(x, axis, keepdims=True)
|
|
e_x = np.exp(x)
|
|
|
|
return e_x / e_x.sum(axis, keepdims=True)
|