tensorflow2

2020-03-25 18:23:00 -03:00
parent 7010af8a58
commit 7cf0c577a1
25 changed files with 1016 additions and 309 deletions
--- a/keras-yolo3-master/yolo.py
+++ b/keras-yolo3-master/yolo.py
@@ -1,12 +1,12 @@
-from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda
-from keras.layers.merge import add, concatenate
-from keras.models import Model
-from keras.engine.topology import Layer
+from tensorflow.keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda
+from tensorflow.keras.layers import add, concatenate
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Layer
 import tensorflow as tf

 class YoloLayer(Layer):
-    def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh, 
-                    grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale, 
+    def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh,
+                    grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale,
                    **kwargs):
        # make the model settings persistent
        self.ignore_thresh  = ignore_thresh
@@ -16,13 +16,13 @@ class YoloLayer(Layer):
        self.obj_scale      = obj_scale
        self.noobj_scale    = noobj_scale
        self.xywh_scale     = xywh_scale
-        self.class_scale    = class_scale        
+        self.class_scale    = class_scale

        # make a persistent mesh grid
        max_grid_h, max_grid_w = max_grid

-        cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)))
-        cell_y = tf.transpose(cell_x, (0,2,1,3,4))
+        cell_x = tf.cast(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)), dtype=tf.float32)
+        cell_y = tf.transpose(a=cell_x, perm=(0,2,1,3,4))
        self.cell_grid = tf.tile(tf.concat([cell_x,cell_y],-1), [batch_size, 1, 1, 3, 1])

        super(YoloLayer, self).__init__(**kwargs)
@@ -34,30 +34,30 @@ class YoloLayer(Layer):
        input_image, y_pred, y_true, true_boxes = x

        # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
-        y_pred = tf.reshape(y_pred, tf.concat([tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0))
-        
+        y_pred = tf.reshape(y_pred, tf.concat([tf.shape(input=y_pred)[:3], tf.constant([3, -1])], axis=0))
+
        # initialize the masks
        object_mask     = tf.expand_dims(y_true[..., 4], 4)

        # the variable to keep track of number of batches processed
-        batch_seen = tf.Variable(0.)        
+        batch_seen = tf.Variable(0.)

        # compute grid factor and net factor
-        grid_h      = tf.shape(y_true)[1]
-        grid_w      = tf.shape(y_true)[2]
+        grid_h      = tf.shape(input=y_true)[1]
+        grid_w      = tf.shape(input=y_true)[2]
        grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1,1,1,1,2])

-        net_h       = tf.shape(input_image)[1]
-        net_w       = tf.shape(input_image)[2]            
+        net_h       = tf.shape(input=input_image)[1]
+        net_w       = tf.shape(input=input_image)[2]
        net_factor  = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1,1,1,1,2])
-        
+
        """
        Adjust prediction
        """
        pred_box_xy    = (self.cell_grid[:,:grid_h,:grid_w,:,:] + tf.sigmoid(y_pred[..., :2]))  # sigma(t_xy) + c_xy
        pred_box_wh    = y_pred[..., 2:4]                                                       # t_wh
        pred_box_conf  = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4)                          # adjust confidence
-        pred_box_class = y_pred[..., 5:]                                                        # adjust class probabilities      
+        pred_box_class = y_pred[..., 5:]                                                        # adjust class probabilities

        """
        Adjust ground truth
@@ -65,47 +65,47 @@ class YoloLayer(Layer):
        true_box_xy    = y_true[..., 0:2] # (sigma(t_xy) + c_xy)
        true_box_wh    = y_true[..., 2:4] # t_wh
        true_box_conf  = tf.expand_dims(y_true[..., 4], 4)
-        true_box_class = tf.argmax(y_true[..., 5:], -1)         
+        true_box_class = tf.argmax(input=y_true[..., 5:], axis=-1)

        """
        Compare each predicted box to all true boxes
-        """        
+        """
        # initially, drag all objectness of all boxes to 0
-        conf_delta  = pred_box_conf - 0 
+        conf_delta  = pred_box_conf - 0

        # then, ignore the boxes which have good overlap with some true box
        true_xy = true_boxes[..., 0:2] / grid_factor
        true_wh = true_boxes[..., 2:4] / net_factor
-        
+
        true_wh_half = true_wh / 2.
        true_mins    = true_xy - true_wh_half
        true_maxes   = true_xy + true_wh_half
-        
+
        pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
        pred_wh = tf.expand_dims(tf.exp(pred_box_wh) * self.anchors / net_factor, 4)
-        
+
        pred_wh_half = pred_wh / 2.
        pred_mins    = pred_xy - pred_wh_half
-        pred_maxes   = pred_xy + pred_wh_half    
+        pred_maxes   = pred_xy + pred_wh_half

        intersect_mins  = tf.maximum(pred_mins,  true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)

        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
-        
+
        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores  = tf.truediv(intersect_areas, union_areas)

-        best_ious   = tf.reduce_max(iou_scores, axis=4)        
-        conf_delta *= tf.expand_dims(tf.to_float(best_ious < self.ignore_thresh), 4)
+        best_ious   = tf.reduce_max(input_tensor=iou_scores, axis=4)
+        conf_delta *= tf.expand_dims(tf.cast(best_ious < self.ignore_thresh, dtype=tf.float32), 4)

        """
        Compute some online statistics
-        """            
+        """
        true_xy = true_box_xy / grid_factor
        true_wh = tf.exp(true_box_wh) * self.anchors / net_factor

@@ -114,51 +114,52 @@ class YoloLayer(Layer):
        true_maxes   = true_xy + true_wh_half

        pred_xy = pred_box_xy / grid_factor
-        pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor 
-        
+        pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor
+
        pred_wh_half = pred_wh / 2.
        pred_mins    = pred_xy - pred_wh_half
-        pred_maxes   = pred_xy + pred_wh_half      
+        pred_maxes   = pred_xy + pred_wh_half

        intersect_mins  = tf.maximum(pred_mins,  true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
-        
+
        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores  = tf.truediv(intersect_areas, union_areas)
        iou_scores  = object_mask * tf.expand_dims(iou_scores, 4)
-        
-        count       = tf.reduce_sum(object_mask)
-        count_noobj = tf.reduce_sum(1 - object_mask)
-        detect_mask = tf.to_float((pred_box_conf*object_mask) >= 0.5)
-        class_mask  = tf.expand_dims(tf.to_float(tf.equal(tf.argmax(pred_box_class, -1), true_box_class)), 4)
-        recall50    = tf.reduce_sum(tf.to_float(iou_scores >= 0.5 ) * detect_mask  * class_mask) / (count + 1e-3)
-        recall75    = tf.reduce_sum(tf.to_float(iou_scores >= 0.75) * detect_mask  * class_mask) / (count + 1e-3)    
-        avg_iou     = tf.reduce_sum(iou_scores) / (count + 1e-3)
-        avg_obj     = tf.reduce_sum(pred_box_conf  * object_mask)  / (count + 1e-3)
-        avg_noobj   = tf.reduce_sum(pred_box_conf  * (1-object_mask))  / (count_noobj + 1e-3)
-        avg_cat     = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3) 
+
+
+        count       = tf.reduce_sum(input_tensor=object_mask)
+        count_noobj = tf.reduce_sum(input_tensor=1 - object_mask)
+        detect_mask = tf.cast((pred_box_conf*object_mask) >= 0.5, dtype=tf.float32)
+        class_mask  = tf.expand_dims(tf.cast(tf.equal(tf.argmax(input=pred_box_class, axis=-1), true_box_class), dtype=tf.float32), 4)
+        recall50    = tf.reduce_sum(input_tensor=tf.cast(iou_scores >= 0.5, dtype=tf.float32) * detect_mask  * class_mask) / (count + 1e-3)
+        recall75    = tf.reduce_sum(input_tensor=tf.cast(iou_scores >= 0.75, dtype=tf.float32) * detect_mask  * class_mask) / (count + 1e-3)
+        avg_iou     = tf.reduce_sum(input_tensor=iou_scores) / (count + 1e-3)
+        avg_obj     = tf.reduce_sum(input_tensor=pred_box_conf  * object_mask)  / (count + 1e-3)
+        avg_noobj   = tf.reduce_sum(input_tensor=pred_box_conf  * (1-object_mask))  / (count_noobj + 1e-3)
+        avg_cat     = tf.reduce_sum(input_tensor=object_mask * class_mask) / (count + 1e-3)

        """
        Warm-up training
        """
-        batch_seen = tf.assign_add(batch_seen, 1.)
-        
-        true_box_xy, true_box_wh, xywh_mask = tf.cond(tf.less(batch_seen, self.warmup_batches+1), 
-                              lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask), 
-                                       true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask), 
+        #batch_seen = tf.assign_add(batch_seen, 1.)
+        batch_seen.assign_add(1.)
+        true_box_xy, true_box_wh, xywh_mask = tf.cond(pred=tf.less(batch_seen, self.warmup_batches+1),
+                              true_fn=lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask),
+                                       true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask),
                                       tf.ones_like(object_mask)],
-                              lambda: [true_box_xy, 
+                              false_fn=lambda: [true_box_xy,
                                       true_box_wh,
                                       object_mask])

        """
        Compare each true box to all anchor boxes
-        """      
+        """
        wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
        wh_scale = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale

@@ -169,10 +170,10 @@ class YoloLayer(Layer):
                      tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
                      self.class_scale

-        loss_xy    = tf.reduce_sum(tf.square(xy_delta),       list(range(1,5)))
-        loss_wh    = tf.reduce_sum(tf.square(wh_delta),       list(range(1,5)))
-        loss_conf  = tf.reduce_sum(tf.square(conf_delta),     list(range(1,5)))
-        loss_class = tf.reduce_sum(class_delta,               list(range(1,5)))
+        loss_xy    = tf.reduce_sum(input_tensor=tf.square(xy_delta),       axis=list(range(1,5)))
+        loss_wh    = tf.reduce_sum(input_tensor=tf.square(wh_delta),       axis=list(range(1,5)))
+        loss_conf  = tf.reduce_sum(input_tensor=tf.square(conf_delta),     axis=list(range(1,5)))
+        loss_class = tf.reduce_sum(input_tensor=class_delta,               axis=list(range(1,5)))

        loss = loss_xy + loss_wh + loss_conf + loss_class

@@ -181,12 +182,12 @@ class YoloLayer(Layer):
        #loss = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000)
        #loss = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000)
        #loss = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000)
-        #loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000)   
-        #loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000)     
-        #loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy), 
-        #                               tf.reduce_sum(loss_wh), 
-         #                              tf.reduce_sum(loss_conf), 
-          #                             tf.reduce_sum(loss_class)],  message='loss xy, wh, conf, class: \t',   summarize=1000)   
+        #loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000)
+        #loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000)
+        #loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy),
+        #                               tf.reduce_sum(loss_wh),
+         #                              tf.reduce_sum(loss_conf),
+          #                             tf.reduce_sum(loss_class)],  message='loss xy, wh, conf, class: \t',   summarize=1000)


        return loss*self.grid_scale
@@ -197,30 +198,30 @@ class YoloLayer(Layer):
 def _conv_block(inp, convs, do_skip=True):
    x = inp
    count = 0
-    
+
    for conv in convs:
        if count == (len(convs) - 2) and do_skip:
            skip_connection = x
        count += 1
-        
+
        if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # unlike tensorflow darknet prefer left and top paddings
-        x = Conv2D(conv['filter'], 
-                   conv['kernel'], 
-                   strides=conv['stride'], 
+        x = Conv2D(conv['filter'],
+                   conv['kernel'],
+                   strides=conv['stride'],
                   padding='valid' if conv['stride'] > 1 else 'same', # unlike tensorflow darknet prefer left and top paddings
-                   name='conv_' + str(conv['layer_idx']), 
+                   name='conv_' + str(conv['layer_idx']),
                   use_bias=False if conv['bnorm'] else True)(x)
        if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
        if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)

-    return add([skip_connection, x]) if do_skip else x        
+    return add([skip_connection, x]) if do_skip else x

 def create_yolov3_model(
-    nb_class, 
-    anchors, 
-    max_box_per_image, 
-    max_grid, 
-    batch_size, 
+    nb_class,
+    anchors,
+    max_box_per_image,
+    max_grid,
+    batch_size,
    warmup_batches,
    ignore_thresh,
    grid_scales,
@@ -259,9 +260,9 @@ def create_yolov3_model(
    for i in range(7):
        x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
                            {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
-        
+
    skip_36 = x
-        
+
    # Layer 37 => 40
    x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
@@ -271,9 +272,9 @@ def create_yolov3_model(
    for i in range(7):
        x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
                            {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
-        
+
    skip_61 = x
-        
+
    # Layer 62 => 65
    x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
@@ -283,7 +284,7 @@ def create_yolov3_model(
    for i in range(3):
        x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
                            {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
-        
+
    # Layer 75 => 79
    x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
@@ -294,11 +295,11 @@ def create_yolov3_model(
    # Layer 80 => 82
    pred_yolo_1 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 80},
                             {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], do_skip=False)
-    loss_yolo_1 = YoloLayer(anchors[12:], 
-                            [1*num for num in max_grid], 
-                            batch_size, 
-                            warmup_batches, 
-                            ignore_thresh, 
+    loss_yolo_1 = YoloLayer(anchors[12:],
+                            [1*num for num in max_grid],
+                            batch_size,
+                            warmup_batches,
+                            ignore_thresh,
                            grid_scales[0],
                            obj_scale,
                            noobj_scale,
@@ -320,11 +321,11 @@ def create_yolov3_model(
    # Layer 92 => 94
    pred_yolo_2 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 92},
                             {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], do_skip=False)
-    loss_yolo_2 = YoloLayer(anchors[6:12], 
-                            [2*num for num in max_grid], 
-                            batch_size, 
-                            warmup_batches, 
-                            ignore_thresh, 
+    loss_yolo_2 = YoloLayer(anchors[6:12],
+                            [2*num for num in max_grid],
+                            batch_size,
+                            warmup_batches,
+                            ignore_thresh,
                            grid_scales[1],
                            obj_scale,
                            noobj_scale,
@@ -344,16 +345,16 @@ def create_yolov3_model(
                             {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 103},
                             {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 104},
                             {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], do_skip=False)
-    loss_yolo_3 = YoloLayer(anchors[:6], 
-                            [4*num for num in max_grid], 
-                            batch_size, 
-                            warmup_batches, 
-                            ignore_thresh, 
+    loss_yolo_3 = YoloLayer(anchors[:6],
+                            [4*num for num in max_grid],
+                            batch_size,
+                            warmup_batches,
+                            ignore_thresh,
                            grid_scales[2],
                            obj_scale,
                            noobj_scale,
                            xywh_scale,
-                            class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes]) 
+                            class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes])

    train_model = Model([input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3], [loss_yolo_1, loss_yolo_2, loss_yolo_3])
    infer_model = Model(input_image, [pred_yolo_1, pred_yolo_2, pred_yolo_3])
@@ -361,4 +362,4 @@ def create_yolov3_model(
    return [train_model, infer_model]

 def dummy_loss(y_true, y_pred):
-    return tf.sqrt(tf.reduce_sum(y_pred))
+    return tf.sqrt(tf.reduce_sum(input_tensor=y_pred))