tensorflow2

This commit is contained in:
Daniel Saavedra
2020-03-25 18:23:00 -03:00
parent 7010af8a58
commit 7cf0c577a1
25 changed files with 1016 additions and 309 deletions

191
keras-yolo3-master/yolo.py Executable file → Normal file
View File

@@ -1,12 +1,12 @@
from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.engine.topology import Layer
from tensorflow.keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda
from tensorflow.keras.layers import add, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer
import tensorflow as tf
class YoloLayer(Layer):
def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh,
grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale,
def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh,
grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale,
**kwargs):
# make the model settings persistent
self.ignore_thresh = ignore_thresh
@@ -16,13 +16,13 @@ class YoloLayer(Layer):
self.obj_scale = obj_scale
self.noobj_scale = noobj_scale
self.xywh_scale = xywh_scale
self.class_scale = class_scale
self.class_scale = class_scale
# make a persistent mesh grid
max_grid_h, max_grid_w = max_grid
cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)))
cell_y = tf.transpose(cell_x, (0,2,1,3,4))
cell_x = tf.cast(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)), dtype=tf.float32)
cell_y = tf.transpose(a=cell_x, perm=(0,2,1,3,4))
self.cell_grid = tf.tile(tf.concat([cell_x,cell_y],-1), [batch_size, 1, 1, 3, 1])
super(YoloLayer, self).__init__(**kwargs)
@@ -34,30 +34,30 @@ class YoloLayer(Layer):
input_image, y_pred, y_true, true_boxes = x
# adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
y_pred = tf.reshape(y_pred, tf.concat([tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0))
y_pred = tf.reshape(y_pred, tf.concat([tf.shape(input=y_pred)[:3], tf.constant([3, -1])], axis=0))
# initialize the masks
object_mask = tf.expand_dims(y_true[..., 4], 4)
# the variable to keep track of number of batches processed
batch_seen = tf.Variable(0.)
batch_seen = tf.Variable(0.)
# compute grid factor and net factor
grid_h = tf.shape(y_true)[1]
grid_w = tf.shape(y_true)[2]
grid_h = tf.shape(input=y_true)[1]
grid_w = tf.shape(input=y_true)[2]
grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1,1,1,1,2])
net_h = tf.shape(input_image)[1]
net_w = tf.shape(input_image)[2]
net_h = tf.shape(input=input_image)[1]
net_w = tf.shape(input=input_image)[2]
net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1,1,1,1,2])
"""
Adjust prediction
"""
pred_box_xy = (self.cell_grid[:,:grid_h,:grid_w,:,:] + tf.sigmoid(y_pred[..., :2])) # sigma(t_xy) + c_xy
pred_box_wh = y_pred[..., 2:4] # t_wh
pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4) # adjust confidence
pred_box_class = y_pred[..., 5:] # adjust class probabilities
pred_box_class = y_pred[..., 5:] # adjust class probabilities
"""
Adjust ground truth
@@ -65,47 +65,47 @@ class YoloLayer(Layer):
true_box_xy = y_true[..., 0:2] # (sigma(t_xy) + c_xy)
true_box_wh = y_true[..., 2:4] # t_wh
true_box_conf = tf.expand_dims(y_true[..., 4], 4)
true_box_class = tf.argmax(y_true[..., 5:], -1)
true_box_class = tf.argmax(input=y_true[..., 5:], axis=-1)
"""
Compare each predicted box to all true boxes
"""
"""
# initially, drag all objectness of all boxes to 0
conf_delta = pred_box_conf - 0
conf_delta = pred_box_conf - 0
# then, ignore the boxes which have good overlap with some true box
true_xy = true_boxes[..., 0:2] / grid_factor
true_wh = true_boxes[..., 2:4] / net_factor
true_wh_half = true_wh / 2.
true_mins = true_xy - true_wh_half
true_maxes = true_xy + true_wh_half
pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
pred_wh = tf.expand_dims(tf.exp(pred_box_wh) * self.anchors / net_factor, 4)
pred_wh_half = pred_wh / 2.
pred_mins = pred_xy - pred_wh_half
pred_maxes = pred_xy + pred_wh_half
pred_maxes = pred_xy + pred_wh_half
intersect_mins = tf.maximum(pred_mins, true_mins)
intersect_maxes = tf.minimum(pred_maxes, true_maxes)
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
true_areas = true_wh[..., 0] * true_wh[..., 1]
pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
union_areas = pred_areas + true_areas - intersect_areas
iou_scores = tf.truediv(intersect_areas, union_areas)
best_ious = tf.reduce_max(iou_scores, axis=4)
conf_delta *= tf.expand_dims(tf.to_float(best_ious < self.ignore_thresh), 4)
best_ious = tf.reduce_max(input_tensor=iou_scores, axis=4)
conf_delta *= tf.expand_dims(tf.cast(best_ious < self.ignore_thresh, dtype=tf.float32), 4)
"""
Compute some online statistics
"""
"""
true_xy = true_box_xy / grid_factor
true_wh = tf.exp(true_box_wh) * self.anchors / net_factor
@@ -114,51 +114,52 @@ class YoloLayer(Layer):
true_maxes = true_xy + true_wh_half
pred_xy = pred_box_xy / grid_factor
pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor
pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor
pred_wh_half = pred_wh / 2.
pred_mins = pred_xy - pred_wh_half
pred_maxes = pred_xy + pred_wh_half
pred_maxes = pred_xy + pred_wh_half
intersect_mins = tf.maximum(pred_mins, true_mins)
intersect_maxes = tf.minimum(pred_maxes, true_maxes)
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
true_areas = true_wh[..., 0] * true_wh[..., 1]
pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
union_areas = pred_areas + true_areas - intersect_areas
iou_scores = tf.truediv(intersect_areas, union_areas)
iou_scores = object_mask * tf.expand_dims(iou_scores, 4)
count = tf.reduce_sum(object_mask)
count_noobj = tf.reduce_sum(1 - object_mask)
detect_mask = tf.to_float((pred_box_conf*object_mask) >= 0.5)
class_mask = tf.expand_dims(tf.to_float(tf.equal(tf.argmax(pred_box_class, -1), true_box_class)), 4)
recall50 = tf.reduce_sum(tf.to_float(iou_scores >= 0.5 ) * detect_mask * class_mask) / (count + 1e-3)
recall75 = tf.reduce_sum(tf.to_float(iou_scores >= 0.75) * detect_mask * class_mask) / (count + 1e-3)
avg_iou = tf.reduce_sum(iou_scores) / (count + 1e-3)
avg_obj = tf.reduce_sum(pred_box_conf * object_mask) / (count + 1e-3)
avg_noobj = tf.reduce_sum(pred_box_conf * (1-object_mask)) / (count_noobj + 1e-3)
avg_cat = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3)
count = tf.reduce_sum(input_tensor=object_mask)
count_noobj = tf.reduce_sum(input_tensor=1 - object_mask)
detect_mask = tf.cast((pred_box_conf*object_mask) >= 0.5, dtype=tf.float32)
class_mask = tf.expand_dims(tf.cast(tf.equal(tf.argmax(input=pred_box_class, axis=-1), true_box_class), dtype=tf.float32), 4)
recall50 = tf.reduce_sum(input_tensor=tf.cast(iou_scores >= 0.5, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3)
recall75 = tf.reduce_sum(input_tensor=tf.cast(iou_scores >= 0.75, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3)
avg_iou = tf.reduce_sum(input_tensor=iou_scores) / (count + 1e-3)
avg_obj = tf.reduce_sum(input_tensor=pred_box_conf * object_mask) / (count + 1e-3)
avg_noobj = tf.reduce_sum(input_tensor=pred_box_conf * (1-object_mask)) / (count_noobj + 1e-3)
avg_cat = tf.reduce_sum(input_tensor=object_mask * class_mask) / (count + 1e-3)
"""
Warm-up training
"""
batch_seen = tf.assign_add(batch_seen, 1.)
true_box_xy, true_box_wh, xywh_mask = tf.cond(tf.less(batch_seen, self.warmup_batches+1),
lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask),
true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask),
#batch_seen = tf.assign_add(batch_seen, 1.)
batch_seen.assign_add(1.)
true_box_xy, true_box_wh, xywh_mask = tf.cond(pred=tf.less(batch_seen, self.warmup_batches+1),
true_fn=lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask),
true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask),
tf.ones_like(object_mask)],
lambda: [true_box_xy,
false_fn=lambda: [true_box_xy,
true_box_wh,
object_mask])
"""
Compare each true box to all anchor boxes
"""
"""
wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
wh_scale = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale
@@ -169,10 +170,10 @@ class YoloLayer(Layer):
tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
self.class_scale
loss_xy = tf.reduce_sum(tf.square(xy_delta), list(range(1,5)))
loss_wh = tf.reduce_sum(tf.square(wh_delta), list(range(1,5)))
loss_conf = tf.reduce_sum(tf.square(conf_delta), list(range(1,5)))
loss_class = tf.reduce_sum(class_delta, list(range(1,5)))
loss_xy = tf.reduce_sum(input_tensor=tf.square(xy_delta), axis=list(range(1,5)))
loss_wh = tf.reduce_sum(input_tensor=tf.square(wh_delta), axis=list(range(1,5)))
loss_conf = tf.reduce_sum(input_tensor=tf.square(conf_delta), axis=list(range(1,5)))
loss_class = tf.reduce_sum(input_tensor=class_delta, axis=list(range(1,5)))
loss = loss_xy + loss_wh + loss_conf + loss_class
@@ -181,12 +182,12 @@ class YoloLayer(Layer):
#loss = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000)
#loss = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000)
#loss = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000)
#loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000)
#loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000)
#loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy),
# tf.reduce_sum(loss_wh),
# tf.reduce_sum(loss_conf),
# tf.reduce_sum(loss_class)], message='loss xy, wh, conf, class: \t', summarize=1000)
#loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000)
#loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000)
#loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy),
# tf.reduce_sum(loss_wh),
# tf.reduce_sum(loss_conf),
# tf.reduce_sum(loss_class)], message='loss xy, wh, conf, class: \t', summarize=1000)
return loss*self.grid_scale
@@ -197,30 +198,30 @@ class YoloLayer(Layer):
def _conv_block(inp, convs, do_skip=True):
x = inp
count = 0
for conv in convs:
if count == (len(convs) - 2) and do_skip:
skip_connection = x
count += 1
if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # unlike tensorflow darknet prefer left and top paddings
x = Conv2D(conv['filter'],
conv['kernel'],
strides=conv['stride'],
x = Conv2D(conv['filter'],
conv['kernel'],
strides=conv['stride'],
padding='valid' if conv['stride'] > 1 else 'same', # unlike tensorflow darknet prefer left and top paddings
name='conv_' + str(conv['layer_idx']),
name='conv_' + str(conv['layer_idx']),
use_bias=False if conv['bnorm'] else True)(x)
if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
return add([skip_connection, x]) if do_skip else x
return add([skip_connection, x]) if do_skip else x
def create_yolov3_model(
nb_class,
anchors,
max_box_per_image,
max_grid,
batch_size,
nb_class,
anchors,
max_box_per_image,
max_grid,
batch_size,
warmup_batches,
ignore_thresh,
grid_scales,
@@ -259,9 +260,9 @@ def create_yolov3_model(
for i in range(7):
x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
skip_36 = x
# Layer 37 => 40
x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
@@ -271,9 +272,9 @@ def create_yolov3_model(
for i in range(7):
x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
skip_61 = x
# Layer 62 => 65
x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
@@ -283,7 +284,7 @@ def create_yolov3_model(
for i in range(3):
x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
# Layer 75 => 79
x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
@@ -294,11 +295,11 @@ def create_yolov3_model(
# Layer 80 => 82
pred_yolo_1 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80},
{'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], do_skip=False)
loss_yolo_1 = YoloLayer(anchors[12:],
[1*num for num in max_grid],
batch_size,
warmup_batches,
ignore_thresh,
loss_yolo_1 = YoloLayer(anchors[12:],
[1*num for num in max_grid],
batch_size,
warmup_batches,
ignore_thresh,
grid_scales[0],
obj_scale,
noobj_scale,
@@ -320,11 +321,11 @@ def create_yolov3_model(
# Layer 92 => 94
pred_yolo_2 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92},
{'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], do_skip=False)
loss_yolo_2 = YoloLayer(anchors[6:12],
[2*num for num in max_grid],
batch_size,
warmup_batches,
ignore_thresh,
loss_yolo_2 = YoloLayer(anchors[6:12],
[2*num for num in max_grid],
batch_size,
warmup_batches,
ignore_thresh,
grid_scales[1],
obj_scale,
noobj_scale,
@@ -344,16 +345,16 @@ def create_yolov3_model(
{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 103},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104},
{'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], do_skip=False)
loss_yolo_3 = YoloLayer(anchors[:6],
[4*num for num in max_grid],
batch_size,
warmup_batches,
ignore_thresh,
loss_yolo_3 = YoloLayer(anchors[:6],
[4*num for num in max_grid],
batch_size,
warmup_batches,
ignore_thresh,
grid_scales[2],
obj_scale,
noobj_scale,
xywh_scale,
class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes])
class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes])
train_model = Model([input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3], [loss_yolo_1, loss_yolo_2, loss_yolo_3])
infer_model = Model(input_image, [pred_yolo_1, pred_yolo_2, pred_yolo_3])
@@ -361,4 +362,4 @@ def create_yolov3_model(
return [train_model, infer_model]
def dummy_loss(y_true, y_pred):
return tf.sqrt(tf.reduce_sum(y_pred))
return tf.sqrt(tf.reduce_sum(input_tensor=y_pred))