tensorflow2

This commit is contained in:
Daniel Saavedra
2020-03-25 18:23:00 -03:00
parent 7010af8a58
commit 7cf0c577a1
25 changed files with 1016 additions and 309 deletions

View File

@@ -4,13 +4,13 @@
"max_input_size": 400, "max_input_size": 400,
"anchors": [5,7, 10,14, 15, 15, 26,32, 45,119, 54,18, 94,59, 109,183, 200,21], "anchors": [5,7, 10,14, 15, 15, 26,32, 45,119, 54,18, 94,59, 109,183, 200,21],
"labels": ["4"], "labels": ["4"],
"backend": "full_yolo_backend.h5" "backend": "keras-yolo3-master/full_yolo_backend.h5"
}, },
"train": { "train": {
"train_image_folder": "../Train&Test_D/Train/images/", "train_image_folder": "Train&Test_D/Train/images/",
"train_annot_folder": "../Train&Test_D/Train/anns/", "train_annot_folder": "Train&Test_D/Train/anns/",
"cache_name": "../Result_yolo3_fault_4/Result_yolo3_fault_4.pkl", "cache_name": "Result_yolo3_fault_4/Result_yolo3_fault_4.pkl",
"train_times": 1, "train_times": 1,
@@ -28,21 +28,21 @@
"class_scale": 1, "class_scale": 1,
"tensorboard_dir": "log_experimento_fault_gpu", "tensorboard_dir": "log_experimento_fault_gpu",
"saved_weights_name": "../Result_yolo3_fault_4/yolo3_full_fault_4.h5", "saved_weights_name": "Result_yolo3_fault_4/yolo3_full_fault_4.h5",
"debug": true "debug": true
}, },
"valid": { "valid": {
"valid_image_folder": "../Train&Test_D/Test/images/", "valid_image_folder": "Train&Test_D/Test/images/",
"valid_annot_folder": "../Train&Test_D/Test/anns/", "valid_annot_folder": "Train&Test_D/Test/anns/",
"cache_name": "../Result_yolo3_fault_4/Result_yolo3_fault_4.pkl", "cache_name": "Result_yolo3_fault_4/Result_yolo3_fault_4.pkl",
"valid_times": 1 "valid_times": 1
}, },
"test": { "test": {
"test_image_folder": "../Train&Test_D/Test/images/", "test_image_folder": "Train&Test_D/Test/images/",
"test_annot_folder": "../Train&Test_D/Test/anns/", "test_annot_folder": "Train&Test_D/Test/anns/",
"cache_name": "../Result_yolo3_fault_4/Result_yolo3_fault_4.pkl", "cache_name": "Result_yolo3_fault_4/Result_yolo3_fault_4.pkl",
"test_times": 1 "test_times": 1
} }

View File

@@ -1,15 +1,16 @@
from keras.callbacks import TensorBoard, ModelCheckpoint from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import warnings
class CustomTensorBoard(TensorBoard): class CustomTensorBoard(TensorBoard):
""" to log the loss after each batch """ to log the loss after each batch
""" """
def __init__(self, log_every=1, **kwargs): def __init__(self, log_every=1, **kwargs):
super(CustomTensorBoard, self).__init__(**kwargs) super(CustomTensorBoard, self).__init__(**kwargs)
self.log_every = log_every self.log_every = log_every
self.counter = 0 self.counter = 0
def on_batch_end(self, batch, logs=None): def on_batch_end(self, batch, logs=None):
self.counter+=1 self.counter+=1
if self.counter%self.log_every==0: if self.counter%self.log_every==0:
@@ -22,7 +23,7 @@ class CustomTensorBoard(TensorBoard):
summary_value.tag = name summary_value.tag = name
self.writer.add_summary(summary, self.counter) self.writer.add_summary(summary, self.counter)
self.writer.flush() self.writer.flush()
super(CustomTensorBoard, self).on_batch_end(batch, logs) super(CustomTensorBoard, self).on_batch_end(batch, logs)
class CustomModelCheckpoint(ModelCheckpoint): class CustomModelCheckpoint(ModelCheckpoint):
@@ -67,4 +68,4 @@ class CustomModelCheckpoint(ModelCheckpoint):
else: else:
self.model_to_save.save(filepath, overwrite=True) self.model_to_save.save(filepath, overwrite=True)
super(CustomModelCheckpoint, self).on_batch_end(epoch, logs) super(CustomModelCheckpoint, self).on_batch_end(epoch, logs)

View File

@@ -8,9 +8,9 @@ from voc import parse_voc_annotation
from yolo import create_yolov3_model from yolo import create_yolov3_model
from generator import BatchGenerator from generator import BatchGenerator
from utils.utils import normalize, evaluate from utils.utils import normalize, evaluate
from keras.callbacks import EarlyStopping, ModelCheckpoint from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import Adam from tensorflow.keras.optimizers import Adam
from keras.models import load_model from tensorflow.keras.models import load_model
def _main_(args): def _main_(args):
config_path = args.conf config_path = args.conf

View File

@@ -1,22 +1,22 @@
import cv2 import cv2
import copy import copy
import numpy as np import numpy as np
from keras.utils import Sequence from tensorflow.keras.utils import Sequence
from utils.bbox import BoundBox, bbox_iou from utils.bbox import BoundBox, bbox_iou
from utils.image import apply_random_scale_and_crop, random_distort_image, random_flip, correct_bounding_boxes from utils.image import apply_random_scale_and_crop, random_distort_image, random_flip, correct_bounding_boxes
class BatchGenerator(Sequence): class BatchGenerator(Sequence):
def __init__(self, def __init__(self,
instances, instances,
anchors, anchors,
labels, labels,
downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
max_box_per_image=30, max_box_per_image=30,
batch_size=1, batch_size=1,
min_net_size=320, min_net_size=320,
max_net_size=608, max_net_size=608,
shuffle=True, shuffle=True,
jitter=True, jitter=True,
norm=None norm=None
): ):
self.instances = instances self.instances = instances
@@ -30,13 +30,13 @@ class BatchGenerator(Sequence):
self.jitter = jitter self.jitter = jitter
self.norm = norm self.norm = norm
self.anchors = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)] self.anchors = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)]
self.net_h = 416 self.net_h = 416
self.net_w = 416 self.net_w = 416
if shuffle: np.random.shuffle(self.instances) if shuffle: np.random.shuffle(self.instances)
def __len__(self): def __len__(self):
return int(np.ceil(float(len(self.instances))/self.batch_size)) return int(np.ceil(float(len(self.instances))/self.batch_size))
def __getitem__(self, idx): def __getitem__(self, idx):
# get image input size, change every 10 batches # get image input size, change every 10 batches
@@ -63,7 +63,7 @@ class BatchGenerator(Sequence):
dummy_yolo_1 = np.zeros((r_bound - l_bound, 1)) dummy_yolo_1 = np.zeros((r_bound - l_bound, 1))
dummy_yolo_2 = np.zeros((r_bound - l_bound, 1)) dummy_yolo_2 = np.zeros((r_bound - l_bound, 1))
dummy_yolo_3 = np.zeros((r_bound - l_bound, 1)) dummy_yolo_3 = np.zeros((r_bound - l_bound, 1))
instance_count = 0 instance_count = 0
true_box_index = 0 true_box_index = 0
@@ -71,18 +71,18 @@ class BatchGenerator(Sequence):
for train_instance in self.instances[l_bound:r_bound]: for train_instance in self.instances[l_bound:r_bound]:
# augment input image and fix object's position and size # augment input image and fix object's position and size
img, all_objs = self._aug_image(train_instance, net_h, net_w) img, all_objs = self._aug_image(train_instance, net_h, net_w)
for obj in all_objs: for obj in all_objs:
# find the best anchor box for this object # find the best anchor box for this object
max_anchor = None max_anchor = None
max_index = -1 max_index = -1
max_iou = -1 max_iou = -1
shifted_box = BoundBox(0, shifted_box = BoundBox(0,
0, 0,
obj['xmax']-obj['xmin'], obj['xmax']-obj['xmin'],
obj['ymax']-obj['ymin']) obj['ymax']-obj['ymin'])
for i in range(len(self.anchors)): for i in range(len(self.anchors)):
anchor = self.anchors[i] anchor = self.anchors[i]
iou = bbox_iou(shifted_box, anchor) iou = bbox_iou(shifted_box, anchor)
@@ -90,18 +90,18 @@ class BatchGenerator(Sequence):
if max_iou < iou: if max_iou < iou:
max_anchor = anchor max_anchor = anchor
max_index = i max_index = i
max_iou = iou max_iou = iou
# determine the yolo to be responsible for this bounding box # determine the yolo to be responsible for this bounding box
yolo = yolos[max_index//3] yolo = yolos[max_index//3]
grid_h, grid_w = yolo.shape[1:3] grid_h, grid_w = yolo.shape[1:3]
# determine the position of the bounding box on the grid # determine the position of the bounding box on the grid
center_x = .5*(obj['xmin'] + obj['xmax']) center_x = .5*(obj['xmin'] + obj['xmax'])
center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x
center_y = .5*(obj['ymin'] + obj['ymax']) center_y = .5*(obj['ymin'] + obj['ymax'])
center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y
# determine the sizes of the bounding box # determine the sizes of the bounding box
w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w
h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h
@@ -109,7 +109,7 @@ class BatchGenerator(Sequence):
box = [center_x, center_y, w, h] box = [center_x, center_y, w, h]
# determine the index of the label # determine the index of the label
obj_indx = self.labels.index(obj['name']) obj_indx = self.labels.index(obj['name'])
# determine the location of the cell responsible for this object # determine the location of the cell responsible for this object
grid_x = int(np.floor(center_x)) grid_x = int(np.floor(center_x))
@@ -126,25 +126,25 @@ class BatchGenerator(Sequence):
t_batch[instance_count, 0, 0, 0, true_box_index] = true_box t_batch[instance_count, 0, 0, 0, true_box_index] = true_box
true_box_index += 1 true_box_index += 1
true_box_index = true_box_index % self.max_box_per_image true_box_index = true_box_index % self.max_box_per_image
# assign input image to x_batch # assign input image to x_batch
if self.norm != None: if self.norm != None:
x_batch[instance_count] = self.norm(img) x_batch[instance_count] = self.norm(img)
else: else:
# plot image and bounding boxes for sanity check # plot image and bounding boxes for sanity check
for obj in all_objs: for obj in all_objs:
cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3) cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
cv2.putText(img, obj['name'], cv2.putText(img, obj['name'],
(obj['xmin']+2, obj['ymin']+12), (obj['xmin']+2, obj['ymin']+12),
0, 1.2e-3 * img.shape[0], 0, 1.2e-3 * img.shape[0],
(0,255,0), 2) (0,255,0), 2)
x_batch[instance_count] = img x_batch[instance_count] = img
# increase instance counter in the current batch # increase instance counter in the current batch
instance_count += 1 instance_count += 1
return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3] return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
def _get_net_size(self, idx): def _get_net_size(self, idx):
@@ -154,16 +154,16 @@ class BatchGenerator(Sequence):
#print("resizing: ", net_size, net_size) #print("resizing: ", net_size, net_size)
self.net_h, self.net_w = net_size, net_size self.net_h, self.net_w = net_size, net_size
return self.net_h, self.net_w return self.net_h, self.net_w
def _aug_image(self, instance, net_h, net_w): def _aug_image(self, instance, net_h, net_w):
image_name = instance['filename'] image_name = instance['filename']
image = cv2.imread(image_name) # RGB image image = cv2.imread(image_name) # RGB image
if image is None: print('Cannot find ', image_name) if image is None: print('Cannot find ', image_name)
image = image[:,:,::-1] # RGB image image = image[:,:,::-1] # RGB image
image_h, image_w, _ = image.shape image_h, image_w, _ = image.shape
# determine the amount of scaling and cropping # determine the amount of scaling and cropping
dw = self.jitter * image_w; dw = self.jitter * image_w;
dh = self.jitter * image_h; dh = self.jitter * image_h;
@@ -177,33 +177,33 @@ class BatchGenerator(Sequence):
else: else:
new_w = int(scale * net_w); new_w = int(scale * net_w);
new_h = int(net_w / new_ar); new_h = int(net_w / new_ar);
dx = int(np.random.uniform(0, net_w - new_w)); dx = int(np.random.uniform(0, net_w - new_w));
dy = int(np.random.uniform(0, net_h - new_h)); dy = int(np.random.uniform(0, net_h - new_h));
# apply scaling and cropping # apply scaling and cropping
im_sized = apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy) im_sized = apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy)
# randomly distort hsv space # randomly distort hsv space
im_sized = random_distort_image(im_sized) im_sized = random_distort_image(im_sized)
# randomly flip # randomly flip
flip = np.random.randint(2) flip = np.random.randint(2)
im_sized = random_flip(im_sized, flip) im_sized = random_flip(im_sized, flip)
# correct the size and pos of bounding boxes # correct the size and pos of bounding boxes
all_objs = correct_bounding_boxes(instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h) all_objs = correct_bounding_boxes(instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h)
return im_sized, all_objs return im_sized, all_objs
def on_epoch_end(self): def on_epoch_end(self):
if self.shuffle: np.random.shuffle(self.instances) if self.shuffle: np.random.shuffle(self.instances)
def num_classes(self): def num_classes(self):
return len(self.labels) return len(self.labels)
def size(self): def size(self):
return len(self.instances) return len(self.instances)
def get_anchors(self): def get_anchors(self):
anchors = [] anchors = []
@@ -225,4 +225,4 @@ class BatchGenerator(Sequence):
return np.array(annots) return np.array(annots)
def load_image(self, i): def load_image(self, i):
return cv2.imread(self.instances[i]['filename']) return cv2.imread(self.instances[i]['filename'])

44
keras-yolo3-master/train.py Executable file → Normal file
View File

@@ -8,13 +8,16 @@ from voc import parse_voc_annotation
from yolo import create_yolov3_model, dummy_loss from yolo import create_yolov3_model, dummy_loss
from generator import BatchGenerator from generator import BatchGenerator
from utils.utils import normalize, evaluate, makedirs from utils.utils import normalize, evaluate, makedirs
from keras.callbacks import EarlyStopping, ReduceLROnPlateau from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.optimizers import Adam from tensorflow.keras.optimizers import Adam
from callbacks import CustomModelCheckpoint, CustomTensorBoard from callbacks import CustomModelCheckpoint, CustomTensorBoard
from utils.multi_gpu_model import multi_gpu_model from utils.multi_gpu_model import multi_gpu_model
import tensorflow as tf import tensorflow as tf
import keras from tensorflow import keras
from keras.models import load_model from tensorflow.keras.models import load_model
tf.keras.backend.clear_session()
tf.config.experimental_run_functions_eagerly(True)
def create_training_instances( def create_training_instances(
train_annot_folder, train_annot_folder,
@@ -66,28 +69,34 @@ def create_callbacks(saved_weights_name, tensorboard_logs, model_to_save):
makedirs(tensorboard_logs) makedirs(tensorboard_logs)
early_stop = EarlyStopping( early_stop = EarlyStopping(
monitor = 'loss', monitor = 'val_loss',
min_delta = 0.01, min_delta = 0.01,
patience = 25, patience = 25,
mode = 'min', mode = 'min',
verbose = 1 verbose = 1
) )
checkpoint = CustomModelCheckpoint( """checkpoint = CustomModelCheckpoint(
model_to_save = model_to_save, model_to_save = model_to_save,
filepath = saved_weights_name,# + '{epoch:02d}.h5', filepath = saved_weights_name,# + '{epoch:02d}.h5',
monitor = 'loss', monitor = 'loss',
verbose = 1, verbose = 1,
save_best_only = True, save_best_only = True,
mode = 'min', mode = 'min',
period = 1 save_freq = 1
) )"""
checkpoint = ModelCheckpoint(filepath=saved_weights_name,
monitor='val_loss',
save_best_only=True,
save_weights_only=True,
verbose=1)
reduce_on_plateau = ReduceLROnPlateau( reduce_on_plateau = ReduceLROnPlateau(
monitor = 'loss', monitor = 'val_loss',
factor = 0.5, factor = 0.5,
patience = 15, patience = 15,
verbose = 1, verbose = 1,
mode = 'min', mode = 'min',
epsilon = 0.01, min_delta = 0.01,
cooldown = 0, cooldown = 0,
min_lr = 0 min_lr = 0
) )
@@ -96,7 +105,7 @@ def create_callbacks(saved_weights_name, tensorboard_logs, model_to_save):
write_graph = True, write_graph = True,
write_images = True, write_images = True,
) )
return [early_stop, checkpoint, reduce_on_plateau, tensorboard] return [early_stop, checkpoint, reduce_on_plateau]
def create_model( def create_model(
nb_class, nb_class,
@@ -245,21 +254,24 @@ def _main_(args):
backend = config['model']['backend'] backend = config['model']['backend']
) )
############################### ###############################
# Kick off the training # Kick off the training
############################### ###############################
callbacks = create_callbacks(config['train']['saved_weights_name'], config['train']['tensorboard_dir'], infer_model) callbacks = create_callbacks(config['train']['saved_weights_name'], config['train']['tensorboard_dir'], infer_model)
train_model.fit_generator( train_model.fit(
generator = train_generator, x = train_generator,
validation_data = valid_generator,
steps_per_epoch = len(train_generator) * config['train']['train_times'], steps_per_epoch = len(train_generator) * config['train']['train_times'],
epochs = config['train']['nb_epochs'] + config['train']['warmup_epochs'], epochs = config['train']['nb_epochs'] + config['train']['warmup_epochs'],
verbose = 2 if config['train']['debug'] else 1, verbose = 2 if config['train']['debug'] else 1,
callbacks = callbacks,
workers = 4, workers = 4,
max_queue_size = 8 max_queue_size = 8,
callbacks = callbacks
) )
# make a GPU version of infer_model for evaluation # make a GPU version of infer_model for evaluation
if multi_gpu > 1: if multi_gpu > 1:
infer_model = load_model(config['train']['saved_weights_name']) infer_model = load_model(config['train']['saved_weights_name'])
@@ -284,7 +296,7 @@ def _main_(args):
return return
print('mAP using the weighted average of precisions among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances))) print('mAP using the weighted average of precisions among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)))
print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances))) print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances)))
if __name__ == '__main__': if __name__ == '__main__':
argparser = argparse.ArgumentParser(description='train and evaluate YOLO_v3 model on any dataset') argparser = argparse.ArgumentParser(description='train and evaluate YOLO_v3 model on any dataset')

294
keras-yolo3-master/train_old.py Executable file
View File

@@ -0,0 +1,294 @@
#! /usr/bin/env python
import argparse
import os
import numpy as np
import json
from voc import parse_voc_annotation
from yolo import create_yolov3_model, dummy_loss
from generator import BatchGenerator
from utils.utils import normalize, evaluate, makedirs
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from callbacks import CustomModelCheckpoint, CustomTensorBoard
from utils.multi_gpu_model import multi_gpu_model
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model
def create_training_instances(
train_annot_folder,
train_image_folder,
train_cache,
valid_annot_folder,
valid_image_folder,
valid_cache,
labels,
):
# parse annotations of the training set
train_ints, train_labels = parse_voc_annotation(train_annot_folder, train_image_folder, train_cache, labels)
# parse annotations of the validation set, if any, otherwise split the training set
if os.path.exists(valid_annot_folder):
valid_ints, valid_labels = parse_voc_annotation(valid_annot_folder, valid_image_folder, valid_cache, labels)
else:
print("valid_annot_folder not exists. Spliting the trainining set.")
train_valid_split = int(0.8*len(train_ints))
np.random.seed(0)
np.random.shuffle(train_ints)
np.random.seed()
valid_ints = train_ints[train_valid_split:]
train_ints = train_ints[:train_valid_split]
# compare the seen labels with the given labels in config.json
if len(labels) > 0:
overlap_labels = set(labels).intersection(set(train_labels.keys()))
print('Seen labels: \t' + str(train_labels) + '\n')
print('Given labels: \t' + str(labels))
# return None, None, None if some given label is not in the dataset
if len(overlap_labels) < len(labels):
print('Some labels have no annotations! Please revise the list of labels in the config.json.')
return None, None, None
else:
print('No labels are provided. Train on all seen labels.')
print(train_labels)
labels = train_labels.keys()
max_box_per_image = max([len(inst['object']) for inst in (train_ints + valid_ints)])
return train_ints, valid_ints, sorted(labels), max_box_per_image
def create_callbacks(saved_weights_name, tensorboard_logs, model_to_save):
makedirs(tensorboard_logs)
early_stop = EarlyStopping(
monitor = 'loss',
min_delta = 0.01,
patience = 25,
mode = 'min',
verbose = 1
)
checkpoint = CustomModelCheckpoint(
model_to_save = model_to_save,
filepath = saved_weights_name,# + '{epoch:02d}.h5',
monitor = 'loss',
verbose = 1,
save_best_only = True,
mode = 'min',
save_freq = 1
)
reduce_on_plateau = ReduceLROnPlateau(
monitor = 'loss',
factor = 0.5,
patience = 15,
verbose = 1,
mode = 'min',
min_delta = 0.01,
cooldown = 0,
min_lr = 0
)
tensorboard = CustomTensorBoard(
log_dir = tensorboard_logs,
write_graph = True,
write_images = True,
)
return [early_stop, checkpoint, reduce_on_plateau, tensorboard]
def create_model(
nb_class,
anchors,
max_box_per_image,
max_grid, batch_size,
warmup_batches,
ignore_thresh,
multi_gpu,
saved_weights_name,
lr,
grid_scales,
obj_scale,
noobj_scale,
xywh_scale,
class_scale,
backend
):
if multi_gpu > 1:
with tf.device('/cpu:0'):
template_model, infer_model = create_yolov3_model(
nb_class = nb_class,
anchors = anchors,
max_box_per_image = max_box_per_image,
max_grid = max_grid,
batch_size = batch_size//multi_gpu,
warmup_batches = warmup_batches,
ignore_thresh = ignore_thresh,
grid_scales = grid_scales,
obj_scale = obj_scale,
noobj_scale = noobj_scale,
xywh_scale = xywh_scale,
class_scale = class_scale
)
else:
template_model, infer_model = create_yolov3_model(
nb_class = nb_class,
anchors = anchors,
max_box_per_image = max_box_per_image,
max_grid = max_grid,
batch_size = batch_size,
warmup_batches = warmup_batches,
ignore_thresh = ignore_thresh,
grid_scales = grid_scales,
obj_scale = obj_scale,
noobj_scale = noobj_scale,
xywh_scale = xywh_scale,
class_scale = class_scale
)
# load the pretrained weight if exists, otherwise load the backend weight only
if os.path.exists(saved_weights_name):
print("\nLoading pretrained weights.\n")
template_model.load_weights(saved_weights_name)
else:
template_model.load_weights(backend, by_name=True)
if multi_gpu > 1:
train_model = multi_gpu_model(template_model, gpus=multi_gpu)
else:
train_model = template_model
optimizer = Adam(lr=lr, clipnorm=0.001)
train_model.compile(loss=dummy_loss, optimizer=optimizer)
return train_model, infer_model
def _main_(args):
config_path = args.conf
with open(config_path) as config_buffer:
config = json.loads(config_buffer.read())
###############################
# Parse the annotations
###############################
train_ints, valid_ints, labels, max_box_per_image = create_training_instances(
config['train']['train_annot_folder'],
config['train']['train_image_folder'],
config['train']['cache_name'],
config['valid']['valid_annot_folder'],
config['valid']['valid_image_folder'],
config['valid']['cache_name'],
config['model']['labels']
)
print('\nTraining on: \t' + str(labels) + '\n')
###############################
# Create the generators
###############################
train_generator = BatchGenerator(
instances = train_ints,
anchors = config['model']['anchors'],
labels = labels,
downsample = 32, # ratio between network input's size and network output's size, 32 for YOLOv3
max_box_per_image = max_box_per_image,
batch_size = config['train']['batch_size'],
min_net_size = config['model']['min_input_size'],
max_net_size = config['model']['max_input_size'],
shuffle = True,
jitter = 0.3,
norm = normalize
)
valid_generator = BatchGenerator(
instances = valid_ints,
anchors = config['model']['anchors'],
labels = labels,
downsample = 32, # ratio between network input's size and network output's size, 32 for YOLOv3
max_box_per_image = max_box_per_image,
batch_size = config['train']['batch_size'],
min_net_size = config['model']['min_input_size'],
max_net_size = config['model']['max_input_size'],
shuffle = True,
jitter = 0.0,
norm = normalize
)
###############################
# Create the model
###############################
if os.path.exists(config['train']['saved_weights_name']):
config['train']['warmup_epochs'] = 0
warmup_batches = config['train']['warmup_epochs'] * (config['train']['train_times']*len(train_generator))
os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus']
multi_gpu = len(config['train']['gpus'].split(','))
print('multi_gpu:' + str(multi_gpu))
train_model, infer_model = create_model(
nb_class = len(labels),
anchors = config['model']['anchors'],
max_box_per_image = max_box_per_image,
max_grid = [config['model']['max_input_size'], config['model']['max_input_size']],
batch_size = config['train']['batch_size'],
warmup_batches = warmup_batches,
ignore_thresh = config['train']['ignore_thresh'],
multi_gpu = multi_gpu,
saved_weights_name = config['train']['saved_weights_name'],
lr = config['train']['learning_rate'],
grid_scales = config['train']['grid_scales'],
obj_scale = config['train']['obj_scale'],
noobj_scale = config['train']['noobj_scale'],
xywh_scale = config['train']['xywh_scale'],
class_scale = config['train']['class_scale'],
backend = config['model']['backend']
)
###############################
# Kick off the training
###############################
callbacks = create_callbacks(config['train']['saved_weights_name'], config['train']['tensorboard_dir'], infer_model)
train_model.fit(
generator = train_generator,
steps_per_epoch = len(train_generator) * config['train']['train_times'],
epochs = config['train']['nb_epochs'] + config['train']['warmup_epochs'],
verbose = 2 if config['train']['debug'] else 1,
callbacks = callbacks,
workers = 4,
max_queue_size = 8
)
# make a GPU version of infer_model for evaluation
if multi_gpu > 1:
infer_model = load_model(config['train']['saved_weights_name'])
###############################
# Run the evaluation
###############################
# compute mAP for all the classes
average_precisions = evaluate(infer_model, valid_generator)
# print the score
total_instances = []
precisions = []
for label, (average_precision, num_annotations) in average_precisions.items():
print('{:.0f} instances of class'.format(num_annotations),
labels[label], 'with average precision: {:.4f}'.format(average_precision))
total_instances.append(num_annotations)
precisions.append(average_precision)
if sum(total_instances) == 0:
print('No test instances found.')
return
print('mAP using the weighted average of precisions among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)))
print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances)))
if __name__ == '__main__':
argparser = argparse.ArgumentParser(description='train and evaluate YOLO_v3 model on any dataset')
argparser.add_argument('-c', '--conf', help='path to configuration file')
args = argparser.parse_args()
_main_(args)

View File

@@ -9,7 +9,7 @@ class BoundBox:
self.ymin = ymin self.ymin = ymin
self.xmax = xmax self.xmax = xmax
self.ymax = ymax self.ymax = ymax
self.c = c self.c = c
self.classes = classes self.classes = classes
@@ -19,14 +19,14 @@ class BoundBox:
def get_label(self): def get_label(self):
if self.label == -1: if self.label == -1:
self.label = np.argmax(self.classes) self.label = np.argmax(self.classes)
return self.label return self.label
def get_score(self): def get_score(self):
if self.score == -1: if self.score == -1:
self.score = self.classes[self.get_label()] self.score = self.classes[self.get_label()]
return self.score return self.score
def _interval_overlap(interval_a, interval_b): def _interval_overlap(interval_a, interval_b):
x1, x2 = interval_a x1, x2 = interval_a
@@ -41,49 +41,51 @@ def _interval_overlap(interval_a, interval_b):
if x2 < x3: if x2 < x3:
return 0 return 0
else: else:
return min(x2,x4) - x3 return min(x2,x4) - x3
def bbox_iou(box1, box2): def bbox_iou(box1, box2):
intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax]) intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax]) intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
intersect = intersect_w * intersect_h intersect = intersect_w * intersect_h
w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
union = w1*h1 + w2*h2 - intersect union = w1*h1 + w2*h2 - intersect
if union == 0: return 0
return float(intersect) / union return float(intersect) / union
def draw_boxes(image, boxes, labels, obj_thresh, quiet=True): def draw_boxes(image, boxes, labels, obj_thresh, quiet=True):
for box in boxes: for box in boxes:
label_str = '' label_str = ''
label = -1 label = -1
for i in range(len(labels)): for i in range(len(labels)):
if box.classes[i] > obj_thresh: if box.classes[i] > obj_thresh:
if label_str != '': label_str += ', ' if label_str != '': label_str += ', '
label_str += (labels[i] + ' ' + str(round(box.get_score()*100,0)) + '%') label_str += (labels[i] + ' ' + str(round(box.get_score()*100,0)) + '%')
label = i label = i
if not quiet: print(label_str) if not quiet: print(label_str)
if label >= 0: if label >= 0:
text_size = cv2.getTextSize(label_str, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-4 * image.shape[0], 2) text_size = cv2.getTextSize(label_str, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-4 * image.shape[0], 2)
width, height = text_size[0][0], text_size[0][1] width, height = text_size[0][0], text_size[0][1]
region = np.array([[box.xmin-3, box.ymin], region = np.array([[box.xmin-3, box.ymin],
[box.xmin-3, box.ymin-height-16], [box.xmin-3, box.ymin-height-16],
[box.xmin+width+6, box.ymin-height-16], [box.xmin+width+6, box.ymin-height-16],
[box.xmin+width+6, box.ymin]], dtype='int32') [box.xmin+width+6, box.ymin]], dtype='int32')
cv2.rectangle(img=image, pt1=(box.xmin,box.ymin), pt2=(box.xmax,box.ymax), color=get_color(label), thickness=1) cv2.rectangle(img=image, pt1=(box.xmin,box.ymin), pt2=(box.xmax,box.ymax), color=get_color(label), thickness=1)
cv2.fillPoly(img=image, pts=[region], color=get_color(label)) cv2.fillPoly(img=image, pts=[region], color=get_color(label))
cv2.putText(img=image, cv2.putText(img=image,
text=label_str, text=label_str,
org=(box.xmin+6, box.ymin - 6), org=(box.xmin+6, box.ymin - 6),
fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.7e-3 * image.shape[0], fontScale=0.7e-3 * image.shape[0],
color=(0,0,0), color=(0,0,0),
thickness=2) thickness=2)
return image return image

View File

@@ -1,5 +1,5 @@
from keras.layers import Lambda, concatenate from tensorflow.keras.layers import Lambda, concatenate
from keras.models import Model from tensorflow.keras.models import Model
import tensorflow as tf import tensorflow as tf
def multi_gpu_model(model, gpus): def multi_gpu_model(model, gpus):
@@ -59,4 +59,4 @@ def multi_gpu_model(model, gpus):
for name, outputs in zip(model.output_names, all_outputs): for name, outputs in zip(model.output_names, all_outputs):
merged.append(concatenate(outputs, merged.append(concatenate(outputs,
axis=0, name=name)) axis=0, name=name))
return Model(model.inputs, merged) return Model(model.inputs, merged)

View File

@@ -3,6 +3,7 @@ import numpy as np
import os import os
from .bbox import BoundBox, bbox_iou from .bbox import BoundBox, bbox_iou
from scipy.special import expit from scipy.special import expit
import tensorflow as tf
def _sigmoid(x): def _sigmoid(x):
return expit(x) return expit(x)
@@ -166,18 +167,30 @@ def do_nms(boxes, nms_thresh):
if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh: if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
boxes[index_j].classes[c] = 0 boxes[index_j].classes[c] = 0
def decode_netout(netout, anchors, obj_thresh, net_h, net_w): def decode_netout(netout_old, anchors, obj_thresh, net_h, net_w):
grid_h, grid_w = netout.shape[:2] grid_h, grid_w = netout_old.shape[:2]
nb_box = 3 nb_box = 3
netout = netout.reshape((grid_h, grid_w, nb_box, -1)) #netout = netout.reshape((grid_h, grid_w, nb_box, -1))
nb_class = netout.shape[-1] - 5 netout_old = tf.reshape(netout_old, (grid_h, grid_w, nb_box, -1))
nb_class = netout_old.shape[-1] - 5
boxes = [] boxes = []
## Tensorflow v.2
#print(tf.shape(netout))
aux_1 = _sigmoid(netout_old[..., :2])
#print(tf.shape(aux_1))
aux_2 = _sigmoid(netout_old[..., 4])
#print(tf.shape(aux_2[..., np.newaxis]))
aux_3 = aux_2[..., np.newaxis] * _softmax(netout_old[..., 5:])
aux_4 = aux_3 * (aux_3 > obj_thresh)
#print(tf.shape(aux_4))
netout = tf.concat([aux_1,netout_old[..., 2:4] ,aux_2[..., np.newaxis], aux_4], 3)
#print(tf.shape(new_netout))
netout[..., :2] = _sigmoid(netout[..., :2]) #netout[..., :2] = _sigmoid(netout[..., :2])
netout[..., 4] = _sigmoid(netout[..., 4]) #netout[..., 4] = _sigmoid(netout[..., 4])
netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:]) #netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
netout[..., 5:] *= netout[..., 5:] > obj_thresh #netout[..., 5:] *= netout[..., 5:] > obj_thresh
for i in range(grid_h*grid_w): for i in range(grid_h*grid_w):
row = i // grid_w row = i // grid_w
@@ -198,7 +211,7 @@ def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
# last elements are class probabilities # last elements are class probabilities
classes = netout[row,col,b,5:] classes = np.array(netout[row,col,b,5:])
box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes) box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)

191
keras-yolo3-master/yolo.py Executable file → Normal file
View File

@@ -1,12 +1,12 @@
from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda from tensorflow.keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda
from keras.layers.merge import add, concatenate from tensorflow.keras.layers import add, concatenate
from keras.models import Model from tensorflow.keras.models import Model
from keras.engine.topology import Layer from tensorflow.keras.layers import Layer
import tensorflow as tf import tensorflow as tf
class YoloLayer(Layer): class YoloLayer(Layer):
def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh, def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh,
grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale, grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale,
**kwargs): **kwargs):
# make the model settings persistent # make the model settings persistent
self.ignore_thresh = ignore_thresh self.ignore_thresh = ignore_thresh
@@ -16,13 +16,13 @@ class YoloLayer(Layer):
self.obj_scale = obj_scale self.obj_scale = obj_scale
self.noobj_scale = noobj_scale self.noobj_scale = noobj_scale
self.xywh_scale = xywh_scale self.xywh_scale = xywh_scale
self.class_scale = class_scale self.class_scale = class_scale
# make a persistent mesh grid # make a persistent mesh grid
max_grid_h, max_grid_w = max_grid max_grid_h, max_grid_w = max_grid
cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1))) cell_x = tf.cast(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)), dtype=tf.float32)
cell_y = tf.transpose(cell_x, (0,2,1,3,4)) cell_y = tf.transpose(a=cell_x, perm=(0,2,1,3,4))
self.cell_grid = tf.tile(tf.concat([cell_x,cell_y],-1), [batch_size, 1, 1, 3, 1]) self.cell_grid = tf.tile(tf.concat([cell_x,cell_y],-1), [batch_size, 1, 1, 3, 1])
super(YoloLayer, self).__init__(**kwargs) super(YoloLayer, self).__init__(**kwargs)
@@ -34,30 +34,30 @@ class YoloLayer(Layer):
input_image, y_pred, y_true, true_boxes = x input_image, y_pred, y_true, true_boxes = x
# adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class] # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
y_pred = tf.reshape(y_pred, tf.concat([tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0)) y_pred = tf.reshape(y_pred, tf.concat([tf.shape(input=y_pred)[:3], tf.constant([3, -1])], axis=0))
# initialize the masks # initialize the masks
object_mask = tf.expand_dims(y_true[..., 4], 4) object_mask = tf.expand_dims(y_true[..., 4], 4)
# the variable to keep track of number of batches processed # the variable to keep track of number of batches processed
batch_seen = tf.Variable(0.) batch_seen = tf.Variable(0.)
# compute grid factor and net factor # compute grid factor and net factor
grid_h = tf.shape(y_true)[1] grid_h = tf.shape(input=y_true)[1]
grid_w = tf.shape(y_true)[2] grid_w = tf.shape(input=y_true)[2]
grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1,1,1,1,2]) grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1,1,1,1,2])
net_h = tf.shape(input_image)[1] net_h = tf.shape(input=input_image)[1]
net_w = tf.shape(input_image)[2] net_w = tf.shape(input=input_image)[2]
net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1,1,1,1,2]) net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1,1,1,1,2])
""" """
Adjust prediction Adjust prediction
""" """
pred_box_xy = (self.cell_grid[:,:grid_h,:grid_w,:,:] + tf.sigmoid(y_pred[..., :2])) # sigma(t_xy) + c_xy pred_box_xy = (self.cell_grid[:,:grid_h,:grid_w,:,:] + tf.sigmoid(y_pred[..., :2])) # sigma(t_xy) + c_xy
pred_box_wh = y_pred[..., 2:4] # t_wh pred_box_wh = y_pred[..., 2:4] # t_wh
pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4) # adjust confidence pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4) # adjust confidence
pred_box_class = y_pred[..., 5:] # adjust class probabilities pred_box_class = y_pred[..., 5:] # adjust class probabilities
""" """
Adjust ground truth Adjust ground truth
@@ -65,47 +65,47 @@ class YoloLayer(Layer):
true_box_xy = y_true[..., 0:2] # (sigma(t_xy) + c_xy) true_box_xy = y_true[..., 0:2] # (sigma(t_xy) + c_xy)
true_box_wh = y_true[..., 2:4] # t_wh true_box_wh = y_true[..., 2:4] # t_wh
true_box_conf = tf.expand_dims(y_true[..., 4], 4) true_box_conf = tf.expand_dims(y_true[..., 4], 4)
true_box_class = tf.argmax(y_true[..., 5:], -1) true_box_class = tf.argmax(input=y_true[..., 5:], axis=-1)
""" """
Compare each predicted box to all true boxes Compare each predicted box to all true boxes
""" """
# initially, drag all objectness of all boxes to 0 # initially, drag all objectness of all boxes to 0
conf_delta = pred_box_conf - 0 conf_delta = pred_box_conf - 0
# then, ignore the boxes which have good overlap with some true box # then, ignore the boxes which have good overlap with some true box
true_xy = true_boxes[..., 0:2] / grid_factor true_xy = true_boxes[..., 0:2] / grid_factor
true_wh = true_boxes[..., 2:4] / net_factor true_wh = true_boxes[..., 2:4] / net_factor
true_wh_half = true_wh / 2. true_wh_half = true_wh / 2.
true_mins = true_xy - true_wh_half true_mins = true_xy - true_wh_half
true_maxes = true_xy + true_wh_half true_maxes = true_xy + true_wh_half
pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4) pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
pred_wh = tf.expand_dims(tf.exp(pred_box_wh) * self.anchors / net_factor, 4) pred_wh = tf.expand_dims(tf.exp(pred_box_wh) * self.anchors / net_factor, 4)
pred_wh_half = pred_wh / 2. pred_wh_half = pred_wh / 2.
pred_mins = pred_xy - pred_wh_half pred_mins = pred_xy - pred_wh_half
pred_maxes = pred_xy + pred_wh_half pred_maxes = pred_xy + pred_wh_half
intersect_mins = tf.maximum(pred_mins, true_mins) intersect_mins = tf.maximum(pred_mins, true_mins)
intersect_maxes = tf.minimum(pred_maxes, true_maxes) intersect_maxes = tf.minimum(pred_maxes, true_maxes)
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
true_areas = true_wh[..., 0] * true_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1]
pred_areas = pred_wh[..., 0] * pred_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
union_areas = pred_areas + true_areas - intersect_areas union_areas = pred_areas + true_areas - intersect_areas
iou_scores = tf.truediv(intersect_areas, union_areas) iou_scores = tf.truediv(intersect_areas, union_areas)
best_ious = tf.reduce_max(iou_scores, axis=4) best_ious = tf.reduce_max(input_tensor=iou_scores, axis=4)
conf_delta *= tf.expand_dims(tf.to_float(best_ious < self.ignore_thresh), 4) conf_delta *= tf.expand_dims(tf.cast(best_ious < self.ignore_thresh, dtype=tf.float32), 4)
""" """
Compute some online statistics Compute some online statistics
""" """
true_xy = true_box_xy / grid_factor true_xy = true_box_xy / grid_factor
true_wh = tf.exp(true_box_wh) * self.anchors / net_factor true_wh = tf.exp(true_box_wh) * self.anchors / net_factor
@@ -114,51 +114,52 @@ class YoloLayer(Layer):
true_maxes = true_xy + true_wh_half true_maxes = true_xy + true_wh_half
pred_xy = pred_box_xy / grid_factor pred_xy = pred_box_xy / grid_factor
pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor
pred_wh_half = pred_wh / 2. pred_wh_half = pred_wh / 2.
pred_mins = pred_xy - pred_wh_half pred_mins = pred_xy - pred_wh_half
pred_maxes = pred_xy + pred_wh_half pred_maxes = pred_xy + pred_wh_half
intersect_mins = tf.maximum(pred_mins, true_mins) intersect_mins = tf.maximum(pred_mins, true_mins)
intersect_maxes = tf.minimum(pred_maxes, true_maxes) intersect_maxes = tf.minimum(pred_maxes, true_maxes)
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
true_areas = true_wh[..., 0] * true_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1]
pred_areas = pred_wh[..., 0] * pred_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
union_areas = pred_areas + true_areas - intersect_areas union_areas = pred_areas + true_areas - intersect_areas
iou_scores = tf.truediv(intersect_areas, union_areas) iou_scores = tf.truediv(intersect_areas, union_areas)
iou_scores = object_mask * tf.expand_dims(iou_scores, 4) iou_scores = object_mask * tf.expand_dims(iou_scores, 4)
count = tf.reduce_sum(object_mask)
count_noobj = tf.reduce_sum(1 - object_mask) count = tf.reduce_sum(input_tensor=object_mask)
detect_mask = tf.to_float((pred_box_conf*object_mask) >= 0.5) count_noobj = tf.reduce_sum(input_tensor=1 - object_mask)
class_mask = tf.expand_dims(tf.to_float(tf.equal(tf.argmax(pred_box_class, -1), true_box_class)), 4) detect_mask = tf.cast((pred_box_conf*object_mask) >= 0.5, dtype=tf.float32)
recall50 = tf.reduce_sum(tf.to_float(iou_scores >= 0.5 ) * detect_mask * class_mask) / (count + 1e-3) class_mask = tf.expand_dims(tf.cast(tf.equal(tf.argmax(input=pred_box_class, axis=-1), true_box_class), dtype=tf.float32), 4)
recall75 = tf.reduce_sum(tf.to_float(iou_scores >= 0.75) * detect_mask * class_mask) / (count + 1e-3) recall50 = tf.reduce_sum(input_tensor=tf.cast(iou_scores >= 0.5, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3)
avg_iou = tf.reduce_sum(iou_scores) / (count + 1e-3) recall75 = tf.reduce_sum(input_tensor=tf.cast(iou_scores >= 0.75, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3)
avg_obj = tf.reduce_sum(pred_box_conf * object_mask) / (count + 1e-3) avg_iou = tf.reduce_sum(input_tensor=iou_scores) / (count + 1e-3)
avg_noobj = tf.reduce_sum(pred_box_conf * (1-object_mask)) / (count_noobj + 1e-3) avg_obj = tf.reduce_sum(input_tensor=pred_box_conf * object_mask) / (count + 1e-3)
avg_cat = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3) avg_noobj = tf.reduce_sum(input_tensor=pred_box_conf * (1-object_mask)) / (count_noobj + 1e-3)
avg_cat = tf.reduce_sum(input_tensor=object_mask * class_mask) / (count + 1e-3)
""" """
Warm-up training Warm-up training
""" """
batch_seen = tf.assign_add(batch_seen, 1.) #batch_seen = tf.assign_add(batch_seen, 1.)
batch_seen.assign_add(1.)
true_box_xy, true_box_wh, xywh_mask = tf.cond(tf.less(batch_seen, self.warmup_batches+1), true_box_xy, true_box_wh, xywh_mask = tf.cond(pred=tf.less(batch_seen, self.warmup_batches+1),
lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask), true_fn=lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask),
true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask), true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask),
tf.ones_like(object_mask)], tf.ones_like(object_mask)],
lambda: [true_box_xy, false_fn=lambda: [true_box_xy,
true_box_wh, true_box_wh,
object_mask]) object_mask])
""" """
Compare each true box to all anchor boxes Compare each true box to all anchor boxes
""" """
wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
wh_scale = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale wh_scale = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale
@@ -169,10 +170,10 @@ class YoloLayer(Layer):
tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \ tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
self.class_scale self.class_scale
loss_xy = tf.reduce_sum(tf.square(xy_delta), list(range(1,5))) loss_xy = tf.reduce_sum(input_tensor=tf.square(xy_delta), axis=list(range(1,5)))
loss_wh = tf.reduce_sum(tf.square(wh_delta), list(range(1,5))) loss_wh = tf.reduce_sum(input_tensor=tf.square(wh_delta), axis=list(range(1,5)))
loss_conf = tf.reduce_sum(tf.square(conf_delta), list(range(1,5))) loss_conf = tf.reduce_sum(input_tensor=tf.square(conf_delta), axis=list(range(1,5)))
loss_class = tf.reduce_sum(class_delta, list(range(1,5))) loss_class = tf.reduce_sum(input_tensor=class_delta, axis=list(range(1,5)))
loss = loss_xy + loss_wh + loss_conf + loss_class loss = loss_xy + loss_wh + loss_conf + loss_class
@@ -181,12 +182,12 @@ class YoloLayer(Layer):
#loss = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000) #loss = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000)
#loss = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000) #loss = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000)
#loss = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000) #loss = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000)
#loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000) #loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000)
#loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000) #loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000)
#loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy), #loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy),
# tf.reduce_sum(loss_wh), # tf.reduce_sum(loss_wh),
# tf.reduce_sum(loss_conf), # tf.reduce_sum(loss_conf),
# tf.reduce_sum(loss_class)], message='loss xy, wh, conf, class: \t', summarize=1000) # tf.reduce_sum(loss_class)], message='loss xy, wh, conf, class: \t', summarize=1000)
return loss*self.grid_scale return loss*self.grid_scale
@@ -197,30 +198,30 @@ class YoloLayer(Layer):
def _conv_block(inp, convs, do_skip=True): def _conv_block(inp, convs, do_skip=True):
x = inp x = inp
count = 0 count = 0
for conv in convs: for conv in convs:
if count == (len(convs) - 2) and do_skip: if count == (len(convs) - 2) and do_skip:
skip_connection = x skip_connection = x
count += 1 count += 1
if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # unlike tensorflow darknet prefer left and top paddings if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # unlike tensorflow darknet prefer left and top paddings
x = Conv2D(conv['filter'], x = Conv2D(conv['filter'],
conv['kernel'], conv['kernel'],
strides=conv['stride'], strides=conv['stride'],
padding='valid' if conv['stride'] > 1 else 'same', # unlike tensorflow darknet prefer left and top paddings padding='valid' if conv['stride'] > 1 else 'same', # unlike tensorflow darknet prefer left and top paddings
name='conv_' + str(conv['layer_idx']), name='conv_' + str(conv['layer_idx']),
use_bias=False if conv['bnorm'] else True)(x) use_bias=False if conv['bnorm'] else True)(x)
if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x) if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x) if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
return add([skip_connection, x]) if do_skip else x return add([skip_connection, x]) if do_skip else x
def create_yolov3_model( def create_yolov3_model(
nb_class, nb_class,
anchors, anchors,
max_box_per_image, max_box_per_image,
max_grid, max_grid,
batch_size, batch_size,
warmup_batches, warmup_batches,
ignore_thresh, ignore_thresh,
grid_scales, grid_scales,
@@ -259,9 +260,9 @@ def create_yolov3_model(
for i in range(7): for i in range(7):
x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3}, x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}]) {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
skip_36 = x skip_36 = x
# Layer 37 => 40 # Layer 37 => 40
x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37}, x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38}, {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
@@ -271,9 +272,9 @@ def create_yolov3_model(
for i in range(7): for i in range(7):
x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3}, x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}]) {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
skip_61 = x skip_61 = x
# Layer 62 => 65 # Layer 62 => 65
x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62}, x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63}, {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
@@ -283,7 +284,7 @@ def create_yolov3_model(
for i in range(3): for i in range(3):
x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3}, x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}]) {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
# Layer 75 => 79 # Layer 75 => 79
x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75}, x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76}, {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
@@ -294,11 +295,11 @@ def create_yolov3_model(
# Layer 80 => 82 # Layer 80 => 82
pred_yolo_1 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80}, pred_yolo_1 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80},
{'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], do_skip=False) {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], do_skip=False)
loss_yolo_1 = YoloLayer(anchors[12:], loss_yolo_1 = YoloLayer(anchors[12:],
[1*num for num in max_grid], [1*num for num in max_grid],
batch_size, batch_size,
warmup_batches, warmup_batches,
ignore_thresh, ignore_thresh,
grid_scales[0], grid_scales[0],
obj_scale, obj_scale,
noobj_scale, noobj_scale,
@@ -320,11 +321,11 @@ def create_yolov3_model(
# Layer 92 => 94 # Layer 92 => 94
pred_yolo_2 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92}, pred_yolo_2 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92},
{'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], do_skip=False) {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], do_skip=False)
loss_yolo_2 = YoloLayer(anchors[6:12], loss_yolo_2 = YoloLayer(anchors[6:12],
[2*num for num in max_grid], [2*num for num in max_grid],
batch_size, batch_size,
warmup_batches, warmup_batches,
ignore_thresh, ignore_thresh,
grid_scales[1], grid_scales[1],
obj_scale, obj_scale,
noobj_scale, noobj_scale,
@@ -344,16 +345,16 @@ def create_yolov3_model(
{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 103}, {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 103},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104}, {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104},
{'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], do_skip=False) {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], do_skip=False)
loss_yolo_3 = YoloLayer(anchors[:6], loss_yolo_3 = YoloLayer(anchors[:6],
[4*num for num in max_grid], [4*num for num in max_grid],
batch_size, batch_size,
warmup_batches, warmup_batches,
ignore_thresh, ignore_thresh,
grid_scales[2], grid_scales[2],
obj_scale, obj_scale,
noobj_scale, noobj_scale,
xywh_scale, xywh_scale,
class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes]) class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes])
train_model = Model([input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3], [loss_yolo_1, loss_yolo_2, loss_yolo_3]) train_model = Model([input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3], [loss_yolo_1, loss_yolo_2, loss_yolo_3])
infer_model = Model(input_image, [pred_yolo_1, pred_yolo_2, pred_yolo_3]) infer_model = Model(input_image, [pred_yolo_1, pred_yolo_2, pred_yolo_3])
@@ -361,4 +362,4 @@ def create_yolov3_model(
return [train_model, infer_model] return [train_model, infer_model]
def dummy_loss(y_true, y_pred): def dummy_loss(y_true, y_pred):
return tf.sqrt(tf.reduce_sum(y_pred)) return tf.sqrt(tf.reduce_sum(input_tensor=y_pred))

View File

@@ -1,9 +1,9 @@
import argparse import argparse
import os import os
import numpy as np import numpy as np
from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D from tensorflow.keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D
from keras.layers.merge import add, concatenate from tensorflow.keras.layers.merge import add, concatenate
from keras.models import Model from tensorflow.keras.models import Model
import struct import struct
import cv2 import cv2
@@ -37,12 +37,12 @@ class WeightReader:
w_f.read(4) w_f.read(4)
transpose = (major > 1000) or (minor > 1000) transpose = (major > 1000) or (minor > 1000)
binary = w_f.read() binary = w_f.read()
self.offset = 0 self.offset = 0
self.all_weights = np.frombuffer(binary, dtype='float32') self.all_weights = np.frombuffer(binary, dtype='float32')
def read_bytes(self, size): def read_bytes(self, size):
self.offset = self.offset + size self.offset = self.offset + size
return self.all_weights[self.offset-size:self.offset] return self.all_weights[self.offset-size:self.offset]
@@ -61,14 +61,14 @@ class WeightReader:
beta = self.read_bytes(size) # bias beta = self.read_bytes(size) # bias
gamma = self.read_bytes(size) # scale gamma = self.read_bytes(size) # scale
mean = self.read_bytes(size) # mean mean = self.read_bytes(size) # mean
var = self.read_bytes(size) # variance var = self.read_bytes(size) # variance
weights = norm_layer.set_weights([gamma, beta, mean, var]) weights = norm_layer.set_weights([gamma, beta, mean, var])
if len(conv_layer.get_weights()) > 1: if len(conv_layer.get_weights()) > 1:
bias = self.read_bytes(np.prod(conv_layer.get_weights()[1].shape)) bias = self.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape)) kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape))) kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
kernel = kernel.transpose([2,3,1,0]) kernel = kernel.transpose([2,3,1,0])
conv_layer.set_weights([kernel, bias]) conv_layer.set_weights([kernel, bias])
@@ -78,8 +78,8 @@ class WeightReader:
kernel = kernel.transpose([2,3,1,0]) kernel = kernel.transpose([2,3,1,0])
conv_layer.set_weights([kernel]) conv_layer.set_weights([kernel])
except ValueError: except ValueError:
print("no convolution #" + str(i)) print("no convolution #" + str(i))
def reset(self): def reset(self):
self.offset = 0 self.offset = 0
@@ -89,7 +89,7 @@ class BoundBox:
self.ymin = ymin self.ymin = ymin
self.xmax = xmax self.xmax = xmax
self.ymax = ymax self.ymax = ymax
self.objness = objness self.objness = objness
self.classes = classes self.classes = classes
@@ -99,30 +99,30 @@ class BoundBox:
def get_label(self): def get_label(self):
if self.label == -1: if self.label == -1:
self.label = np.argmax(self.classes) self.label = np.argmax(self.classes)
return self.label return self.label
def get_score(self): def get_score(self):
if self.score == -1: if self.score == -1:
self.score = self.classes[self.get_label()] self.score = self.classes[self.get_label()]
return self.score return self.score
def _conv_block(inp, convs, skip=True): def _conv_block(inp, convs, skip=True):
x = inp x = inp
count = 0 count = 0
for conv in convs: for conv in convs:
if count == (len(convs) - 2) and skip: if count == (len(convs) - 2) and skip:
skip_connection = x skip_connection = x
count += 1 count += 1
if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top
x = Conv2D(conv['filter'], x = Conv2D(conv['filter'],
conv['kernel'], conv['kernel'],
strides=conv['stride'], strides=conv['stride'],
padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top
name='conv_' + str(conv['layer_idx']), name='conv_' + str(conv['layer_idx']),
use_bias=False if conv['bnorm'] else True)(x) use_bias=False if conv['bnorm'] else True)(x)
if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x) if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x) if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
@@ -142,7 +142,7 @@ def _interval_overlap(interval_a, interval_b):
if x2 < x3: if x2 < x3:
return 0 return 0
else: else:
return min(x2,x4) - x3 return min(x2,x4) - x3
def _sigmoid(x): def _sigmoid(x):
return 1. / (1. + np.exp(-x)) return 1. / (1. + np.exp(-x))
@@ -150,14 +150,14 @@ def _sigmoid(x):
def bbox_iou(box1, box2): def bbox_iou(box1, box2):
intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax]) intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax]) intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
intersect = intersect_w * intersect_h intersect = intersect_w * intersect_h
w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
union = w1*h1 + w2*h2 - intersect union = w1*h1 + w2*h2 - intersect
return float(intersect) / union return float(intersect) / union
def make_yolov3_model(): def make_yolov3_model():
@@ -187,9 +187,9 @@ def make_yolov3_model():
for i in range(7): for i in range(7):
x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3}, x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}]) {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
skip_36 = x skip_36 = x
# Layer 37 => 40 # Layer 37 => 40
x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37}, x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38}, {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
@@ -199,9 +199,9 @@ def make_yolov3_model():
for i in range(7): for i in range(7):
x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3}, x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}]) {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
skip_61 = x skip_61 = x
# Layer 62 => 65 # Layer 62 => 65
x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62}, x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63}, {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
@@ -211,7 +211,7 @@ def make_yolov3_model():
for i in range(3): for i in range(3):
x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3}, x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}]) {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
# Layer 75 => 79 # Layer 75 => 79
x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75}, x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76}, {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
@@ -253,7 +253,7 @@ def make_yolov3_model():
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104}, {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104},
{'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False) {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False)
model = Model(input_image, [yolo_82, yolo_94, yolo_106]) model = Model(input_image, [yolo_82, yolo_94, yolo_106])
return model return model
def preprocess_input(image, net_h, net_w): def preprocess_input(image, net_h, net_w):
@@ -293,25 +293,25 @@ def decode_netout(netout, anchors, obj_thresh, nms_thresh, net_h, net_w):
for i in range(grid_h*grid_w): for i in range(grid_h*grid_w):
row = i / grid_w row = i / grid_w
col = i % grid_w col = i % grid_w
for b in range(nb_box): for b in range(nb_box):
# 4th element is objectness score # 4th element is objectness score
objectness = netout[int(row)][int(col)][b][4] objectness = netout[int(row)][int(col)][b][4]
#objectness = netout[..., :4] #objectness = netout[..., :4]
if(objectness.all() <= obj_thresh): continue if(objectness.all() <= obj_thresh): continue
# first 4 elements are x, y, w, and h # first 4 elements are x, y, w, and h
x, y, w, h = netout[int(row)][int(col)][b][:4] x, y, w, h = netout[int(row)][int(col)][b][:4]
x = (col + x) / grid_w # center position, unit: image width x = (col + x) / grid_w # center position, unit: image width
y = (row + y) / grid_h # center position, unit: image height y = (row + y) / grid_h # center position, unit: image height
w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
# last elements are class probabilities # last elements are class probabilities
classes = netout[int(row)][col][b][5:] classes = netout[int(row)][col][b][5:]
box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes) box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
#box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, None, classes) #box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, None, classes)
@@ -326,22 +326,22 @@ def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
else: else:
new_h = net_w new_h = net_w
new_w = (image_w*net_h)/image_h new_w = (image_w*net_h)/image_h
for i in range(len(boxes)): for i in range(len(boxes)):
x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w) boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w) boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h) boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h) boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
def do_nms(boxes, nms_thresh): def do_nms(boxes, nms_thresh):
if len(boxes) > 0: if len(boxes) > 0:
nb_class = len(boxes[0].classes) nb_class = len(boxes[0].classes)
else: else:
return return
for c in range(nb_class): for c in range(nb_class):
sorted_indices = np.argsort([-box.classes[c] for box in boxes]) sorted_indices = np.argsort([-box.classes[c] for box in boxes])
@@ -355,28 +355,28 @@ def do_nms(boxes, nms_thresh):
if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh: if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
boxes[index_j].classes[c] = 0 boxes[index_j].classes[c] = 0
def draw_boxes(image, boxes, labels, obj_thresh): def draw_boxes(image, boxes, labels, obj_thresh):
for box in boxes: for box in boxes:
label_str = '' label_str = ''
label = -1 label = -1
for i in range(len(labels)): for i in range(len(labels)):
if box.classes[i] > obj_thresh: if box.classes[i] > obj_thresh:
label_str += labels[i] label_str += labels[i]
label = i label = i
print(labels[i] + ': ' + str(box.classes[i]*100) + '%') print(labels[i] + ': ' + str(box.classes[i]*100) + '%')
if label >= 0: if label >= 0:
cv2.rectangle(image, (box.xmin,box.ymin), (box.xmax,box.ymax), (0,255,0), 3) cv2.rectangle(image, (box.xmin,box.ymin), (box.xmax,box.ymax), (0,255,0), 3)
cv2.putText(image, cv2.putText(image,
label_str + ' ' + str(box.get_score()), label_str + ' ' + str(box.get_score()),
(box.xmin, box.ymin - 13), (box.xmin, box.ymin - 13),
cv2.FONT_HERSHEY_SIMPLEX, cv2.FONT_HERSHEY_SIMPLEX,
1e-3 * image.shape[0], 1e-3 * image.shape[0],
(0,255,0), 2) (0,255,0), 2)
return image return image
def _main_(args): def _main_(args):
weights_path = args.weights weights_path = args.weights
@@ -412,7 +412,7 @@ def _main_(args):
# run the prediction # run the prediction
yolos = yolov3.predict(new_image) yolos = yolov3.predict(new_image)
boxes = [] boxes = []
for i in range(len(yolos)): for i in range(len(yolos)):
# decode the output of the network # decode the output of the network
boxes += decode_netout(yolos[i][0], anchors[i], obj_thresh, nms_thresh, net_h, net_w) boxes += decode_netout(yolos[i][0], anchors[i], obj_thresh, nms_thresh, net_h, net_w)
@@ -421,13 +421,13 @@ def _main_(args):
correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w) correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)
# suppress non-maximal boxes # suppress non-maximal boxes
do_nms(boxes, nms_thresh) do_nms(boxes, nms_thresh)
# draw bounding boxes on the image using labels # draw bounding boxes on the image using labels
draw_boxes(image, boxes, labels, obj_thresh) draw_boxes(image, boxes, labels, obj_thresh)
# write the image with bounding boxes to file # write the image with bounding boxes to file
cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], (image).astype('uint8')) cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], (image).astype('uint8'))
if __name__ == '__main__': if __name__ == '__main__':
args = argparser.parse_args() args = argparser.parse_args()

365
keras-yolo3-master/yolo_old.py Executable file
View File

@@ -0,0 +1,365 @@
from tensorflow.keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda
from tensorflow.keras.layers import add, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer
import tensorflow as tf
class YoloLayer(Layer):
def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh,
grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale,
**kwargs):
# make the model settings persistent
self.ignore_thresh = ignore_thresh
self.warmup_batches = warmup_batches
self.anchors = tf.constant(anchors, dtype='float', shape=[1,1,1,3,2])
self.grid_scale = grid_scale
self.obj_scale = obj_scale
self.noobj_scale = noobj_scale
self.xywh_scale = xywh_scale
self.class_scale = class_scale
# make a persistent mesh grid
max_grid_h, max_grid_w = max_grid
cell_x = tf.cast(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)), dtype=tf.float32)
cell_y = tf.transpose(cell_x, (0,2,1,3,4))
self.cell_grid = tf.tile(tf.concat([cell_x,cell_y],-1), [batch_size, 1, 1, 3, 1])
super(YoloLayer, self).__init__(**kwargs)
def build(self, input_shape):
super(YoloLayer, self).build(input_shape) # Be sure to call this somewhere!
def call(self, x):
input_image, y_pred, y_true, true_boxes = x
# adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
y_pred = tf.reshape(y_pred, tf.concat([tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0))
# initialize the masks
object_mask = tf.expand_dims(y_true[..., 4], 4)
# the variable to keep track of number of batches processed
batch_seen = tf.Variable(0.)
# compute grid factor and net factor
grid_h = tf.shape(y_true)[1]
grid_w = tf.shape(y_true)[2]
grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1,1,1,1,2])
net_h = tf.shape(input_image)[1]
net_w = tf.shape(input_image)[2]
net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1,1,1,1,2])
"""
Adjust prediction
"""
pred_box_xy = (self.cell_grid[:,:grid_h,:grid_w,:,:] + tf.sigmoid(y_pred[..., :2])) # sigma(t_xy) + c_xy
pred_box_wh = y_pred[..., 2:4] # t_wh
pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4) # adjust confidence
pred_box_class = y_pred[..., 5:] # adjust class probabilities
"""
Adjust ground truth
"""
true_box_xy = y_true[..., 0:2] # (sigma(t_xy) + c_xy)
true_box_wh = y_true[..., 2:4] # t_wh
true_box_conf = tf.expand_dims(y_true[..., 4], 4)
true_box_class = tf.argmax(y_true[..., 5:], -1)
"""
Compare each predicted box to all true boxes
"""
# initially, drag all objectness of all boxes to 0
conf_delta = pred_box_conf - 0
# then, ignore the boxes which have good overlap with some true box
true_xy = true_boxes[..., 0:2] / grid_factor
true_wh = true_boxes[..., 2:4] / net_factor
true_wh_half = true_wh / 2.
true_mins = true_xy - true_wh_half
true_maxes = true_xy + true_wh_half
pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
pred_wh = tf.expand_dims(tf.exp(pred_box_wh) * self.anchors / net_factor, 4)
pred_wh_half = pred_wh / 2.
pred_mins = pred_xy - pred_wh_half
pred_maxes = pred_xy + pred_wh_half
intersect_mins = tf.maximum(pred_mins, true_mins)
intersect_maxes = tf.minimum(pred_maxes, true_maxes)
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
true_areas = true_wh[..., 0] * true_wh[..., 1]
pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
union_areas = pred_areas + true_areas - intersect_areas
iou_scores = tf.truediv(intersect_areas, union_areas)
best_ious = tf.reduce_max(iou_scores, axis=4)
conf_delta *= tf.expand_dims(tf.cast(best_ious < self.ignore_thresh, dtype=tf.float32), 4)
"""
Compute some online statistics
"""
true_xy = true_box_xy / grid_factor
true_wh = tf.exp(true_box_wh) * self.anchors / net_factor
true_wh_half = true_wh / 2.
true_mins = true_xy - true_wh_half
true_maxes = true_xy + true_wh_half
pred_xy = pred_box_xy / grid_factor
pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor
pred_wh_half = pred_wh / 2.
pred_mins = pred_xy - pred_wh_half
pred_maxes = pred_xy + pred_wh_half
intersect_mins = tf.maximum(pred_mins, true_mins)
intersect_maxes = tf.minimum(pred_maxes, true_maxes)
intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
true_areas = true_wh[..., 0] * true_wh[..., 1]
pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
union_areas = pred_areas + true_areas - intersect_areas
iou_scores = tf.truediv(intersect_areas, union_areas)
iou_scores = object_mask * tf.expand_dims(iou_scores, 4)
count = tf.reduce_sum(object_mask)
count_noobj = tf.reduce_sum(1 - object_mask)
detect_mask = tf.cast((pred_box_conf*object_mask) >= 0.5, dtype=tf.float32)
class_mask = tf.expand_dims(tf.cast(tf.equal(tf.argmax(pred_box_class, -1), true_box_class), dtype=tf.float32), 4)
recall50 = tf.reduce_sum(tf.cast(iou_scores >= 0.5, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3)
recall75 = tf.reduce_sum(tf.cast(iou_scores >= 0.75, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3)
avg_iou = tf.reduce_sum(iou_scores) / (count + 1e-3)
avg_obj = tf.reduce_sum(pred_box_conf * object_mask) / (count + 1e-3)
avg_noobj = tf.reduce_sum(pred_box_conf * (1-object_mask)) / (count_noobj + 1e-3)
avg_cat = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3)
"""
Warm-up training
"""
#batch_seen = tf.assign_add(batch_seen, 1.)
batch_seen.assign_add(1.)
true_box_xy, true_box_wh, xywh_mask = tf.cond(tf.less(batch_seen, self.warmup_batches+1),
lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask),
true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask),
tf.ones_like(object_mask)],
lambda: [true_box_xy,
true_box_wh,
object_mask])
"""
Compare each true box to all anchor boxes
"""
wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
wh_scale = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale
xy_delta = xywh_mask * (pred_box_xy-true_box_xy) * wh_scale * self.xywh_scale
wh_delta = xywh_mask * (pred_box_wh-true_box_wh) * wh_scale * self.xywh_scale
conf_delta = object_mask * (pred_box_conf-true_box_conf) * self.obj_scale + (1-object_mask) * conf_delta * self.noobj_scale
class_delta = object_mask * \
tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
self.class_scale
loss_xy = tf.reduce_sum(tf.square(xy_delta), list(range(1,5)))
loss_wh = tf.reduce_sum(tf.square(wh_delta), list(range(1,5)))
loss_conf = tf.reduce_sum(tf.square(conf_delta), list(range(1,5)))
loss_class = tf.reduce_sum(class_delta, list(range(1,5)))
loss = loss_xy + loss_wh + loss_conf + loss_class
#loss = tf.Print(loss, [grid_h, avg_obj], message='avg_obj \t\t', summarize=1000)
#loss = tf.Print(loss, [grid_h, avg_noobj], message='avg_noobj \t\t', summarize=1000)
#loss = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000)
#loss = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000)
#loss = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000)
#loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000)
#loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000)
#loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy),
# tf.reduce_sum(loss_wh),
# tf.reduce_sum(loss_conf),
# tf.reduce_sum(loss_class)], message='loss xy, wh, conf, class: \t', summarize=1000)
return loss*self.grid_scale
def compute_output_shape(self, input_shape):
return [(None, 1)]
def _conv_block(inp, convs, do_skip=True):
x = inp
count = 0
for conv in convs:
if count == (len(convs) - 2) and do_skip:
skip_connection = x
count += 1
if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # unlike tensorflow darknet prefer left and top paddings
x = Conv2D(conv['filter'],
conv['kernel'],
strides=conv['stride'],
padding='valid' if conv['stride'] > 1 else 'same', # unlike tensorflow darknet prefer left and top paddings
name='conv_' + str(conv['layer_idx']),
use_bias=False if conv['bnorm'] else True)(x)
if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
return add([skip_connection, x]) if do_skip else x
def create_yolov3_model(
nb_class,
anchors,
max_box_per_image,
max_grid,
batch_size,
warmup_batches,
ignore_thresh,
grid_scales,
obj_scale,
noobj_scale,
xywh_scale,
class_scale
):
input_image = Input(shape=(None, None, 3)) # net_h, net_w, 3
true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))
true_yolo_1 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) # grid_h, grid_w, nb_anchor, 5+nb_class
true_yolo_2 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) # grid_h, grid_w, nb_anchor, 5+nb_class
true_yolo_3 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) # grid_h, grid_w, nb_anchor, 5+nb_class
# Layer 0 => 4
x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
{'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
{'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
{'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])
# Layer 5 => 8
x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
{'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
{'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])
# Layer 9 => 11
x = _conv_block(x, [{'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
{'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])
# Layer 12 => 15
x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])
# Layer 16 => 36
for i in range(7):
x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
skip_36 = x
# Layer 37 => 40
x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])
# Layer 41 => 61
for i in range(7):
x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
skip_61 = x
# Layer 62 => 65
x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])
# Layer 66 => 74
for i in range(3):
x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
# Layer 75 => 79
x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], do_skip=False)
# Layer 80 => 82
pred_yolo_1 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80},
{'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], do_skip=False)
loss_yolo_1 = YoloLayer(anchors[12:],
[1*num for num in max_grid],
batch_size,
warmup_batches,
ignore_thresh,
grid_scales[0],
obj_scale,
noobj_scale,
xywh_scale,
class_scale)([input_image, pred_yolo_1, true_yolo_1, true_boxes])
# Layer 83 => 86
x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], do_skip=False)
x = UpSampling2D(2)(x)
x = concatenate([x, skip_61])
# Layer 87 => 91
x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], do_skip=False)
# Layer 92 => 94
pred_yolo_2 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92},
{'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], do_skip=False)
loss_yolo_2 = YoloLayer(anchors[6:12],
[2*num for num in max_grid],
batch_size,
warmup_batches,
ignore_thresh,
grid_scales[1],
obj_scale,
noobj_scale,
xywh_scale,
class_scale)([input_image, pred_yolo_2, true_yolo_2, true_boxes])
# Layer 95 => 98
x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 96}], do_skip=False)
x = UpSampling2D(2)(x)
x = concatenate([x, skip_36])
# Layer 99 => 106
pred_yolo_3 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 99},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 100},
{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 101},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 102},
{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 103},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104},
{'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], do_skip=False)
loss_yolo_3 = YoloLayer(anchors[:6],
[4*num for num in max_grid],
batch_size,
warmup_batches,
ignore_thresh,
grid_scales[2],
obj_scale,
noobj_scale,
xywh_scale,
class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes])
train_model = Model([input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3], [loss_yolo_1, loss_yolo_2, loss_yolo_3])
infer_model = Model(input_image, [pred_yolo_1, pred_yolo_2, pred_yolo_3])
return [train_model, infer_model]
def dummy_loss(y_true, y_pred):
return tf.sqrt(tf.reduce_sum(y_pred))

View File

@@ -8,9 +8,10 @@ import cv2
import sys import sys
sys.path += [os.path.abspath('keras-yolo3-master')] sys.path += [os.path.abspath('keras-yolo3-master')]
from utils.utils import get_yolo_boxes, makedirs from utils.utils import get_yolo_boxes, makedirs
from utils.bbox import draw_boxes from utils.bbox import draw_boxes
from keras.models import load_model from tensorflow.keras.models import load_model
from tqdm import tqdm from tqdm import tqdm
import numpy as np import numpy as np

View File

@@ -10,75 +10,75 @@ sys.path += [os.path.abspath('keras-yolo3-master')]
from utils.utils import get_yolo_boxes, makedirs from utils.utils import get_yolo_boxes, makedirs
from utils.bbox import draw_boxes from utils.bbox import draw_boxes
from keras.models import load_model from tensorflow.keras.models import load_model
from tqdm import tqdm from tqdm import tqdm
import numpy as np import numpy as np
def disconnect(image, boxes, obj_thresh = 0.5, area_min = 400, merge = 0, z_thresh = 1.8): def disconnect(image, boxes, obj_thresh = 0.5, area_min = 400, merge = 0, z_thresh = 1.8):
new_boxes = [] new_boxes = []
for num, box in enumerate(boxes): for num, box in enumerate(boxes):
xmin = box.xmin + merge xmin = box.xmin + merge
xmax = box.xmax - merge xmax = box.xmax - merge
ymin = box.ymin + merge ymin = box.ymin + merge
ymax = box.ymax - merge ymax = box.ymax - merge
if xmin > 0 and ymin > 0 and xmax < image.shape[1] and ymax < image.shape[0] and box.get_score() > obj_thresh: if xmin > 0 and ymin > 0 and xmax < image.shape[1] and ymax < image.shape[0] and box.get_score() > obj_thresh:
area = (ymax - ymin)*(xmax - xmin) area = (ymax - ymin)*(xmax - xmin)
z_score = np.sum(image[np.int(ymin):np.int(ymax), np.int(xmin):np.int(xmax)]) / area z_score = np.sum(image[np.int(ymin):np.int(ymax), np.int(xmin):np.int(xmax)]) / area
if area > area_min: if area > area_min:
box.z_score = z_score box.z_score = z_score
new_boxes.append(box) new_boxes.append(box)
#boxes_area_score[str(num)] = {'xmin': xmin, 'xmax': xmax, 'ymin': ymin, 'ymax': ymax, 'score' : score, 'area' : area} #boxes_area_score[str(num)] = {'xmin': xmin, 'xmax': xmax, 'ymin': ymin, 'ymax': ymax, 'score' : score, 'area' : area}
mean_score = np.mean([box.z_score for box in new_boxes]) mean_score = np.mean([box.z_score for box in new_boxes])
sd_score = np.std([box.z_score for box in new_boxes]) sd_score = np.std([box.z_score for box in new_boxes])
new_boxes = [box for box in new_boxes if (box.z_score - mean_score)/sd_score > z_thresh] new_boxes = [box for box in new_boxes if (box.z_score - mean_score)/sd_score > z_thresh]
for box in new_boxes: for box in new_boxes:
z_score = (box.z_score - mean_score)/sd_score z_score = (box.z_score - mean_score)/sd_score
box.classes[0] = min((z_score-z_thresh)*0.5/(3-z_thresh)+ 0.5, 1) box.classes[0] = min((z_score-z_thresh)*0.5/(3-z_thresh)+ 0.5, 1)
return new_boxes return new_boxes
def disconnect_plot(image, boxes, obj_thresh = 0.5, area_min = 400, merge = 0, z_thresh = 1.8): def disconnect_plot(image, boxes, obj_thresh = 0.5, area_min = 400, merge = 0, z_thresh = 1.8):
new_boxes = [] new_boxes = []
for num, box in enumerate(boxes): for num, box in enumerate(boxes):
xmin = box.xmin + merge xmin = box.xmin + merge
xmax = box.xmax - merge xmax = box.xmax - merge
ymin = box.ymin + merge ymin = box.ymin + merge
ymax = box.ymax - merge ymax = box.ymax - merge
if xmin > 0 and ymin > 0 and xmax < image.shape[1] and ymax < image.shape[0] and box.get_score() > obj_thresh: if xmin > 0 and ymin > 0 and xmax < image.shape[1] and ymax < image.shape[0] and box.get_score() > obj_thresh:
area = (ymax - ymin)*(xmax - xmin) area = (ymax - ymin)*(xmax - xmin)
z_score = np.sum(image[np.int(ymin):np.int(ymax), np.int(xmin):np.int(xmax)]) / area z_score = np.sum(image[np.int(ymin):np.int(ymax), np.int(xmin):np.int(xmax)]) / area
if area > area_min: if area > area_min:
box.z_score = z_score box.z_score = z_score
new_boxes.append(box) new_boxes.append(box)
#boxes_area_score[str(num)] = {'xmin': xmin, 'xmax': xmax, 'ymin': ymin, 'ymax': ymax, 'score' : score, 'area' : area} #boxes_area_score[str(num)] = {'xmin': xmin, 'xmax': xmax, 'ymin': ymin, 'ymax': ymax, 'score' : score, 'area' : area}
mean_score = np.mean([box.z_score for box in new_boxes]) mean_score = np.mean([box.z_score for box in new_boxes])
sd_score = np.std([box.z_score for box in new_boxes]) sd_score = np.std([box.z_score for box in new_boxes])
normal_score = ([box.z_score for box in new_boxes] - mean_score)/sd_score normal_score = ([box.z_score for box in new_boxes] - mean_score)/sd_score
# plt.figure() # plt.figure()
# _ = plt.hist(normal_score, bins='auto') # arguments are passed to np.histogram # _ = plt.hist(normal_score, bins='auto') # arguments are passed to np.histogram
# plt.title("Histogram with 'auto' bins") # plt.title("Histogram with 'auto' bins")
# plt.show() # plt.show()
# #
# plt.figure() # plt.figure()
# mean = np.mean([boxes_area_score[i]['area'] for i in boxes_area_score]) # mean = np.mean([boxes_area_score[i]['area'] for i in boxes_area_score])
# sd = np.std([boxes_area_score[i]['area'] for i in boxes_area_score]) # sd = np.std([boxes_area_score[i]['area'] for i in boxes_area_score])
@@ -86,37 +86,37 @@ def disconnect_plot(image, boxes, obj_thresh = 0.5, area_min = 400, merge = 0,
# _ = plt.hist(normal, bins='auto') # arguments are passed to np.histogram # _ = plt.hist(normal, bins='auto') # arguments are passed to np.histogram
# plt.title("Histogram with 'auto' bins") # plt.title("Histogram with 'auto' bins")
# plt.show() # plt.show()
new_boxes = [box for box in new_boxes if (box.z_score - mean_score)/sd_score > z_thresh] new_boxes = [box for box in new_boxes if (box.z_score - mean_score)/sd_score > z_thresh]
for box in new_boxes: for box in new_boxes:
z_score = (box.z_score - mean_score)/sd_score z_score = (box.z_score - mean_score)/sd_score
box.classes[0] = min((z_score-z_thresh)*0.5/(3-z_thresh)+ 0.5, 1) box.classes[0] = min((z_score-z_thresh)*0.5/(3-z_thresh)+ 0.5, 1)
colors = plt.cm.brg(np.linspace(0, 1, 21)).tolist() colors = plt.cm.brg(np.linspace(0, 1, 21)).tolist()
plt.figure(figsize=(10,6)) plt.figure(figsize=(10,6))
plt.imshow(I,cmap = 'gray') plt.imshow(I,cmap = 'gray')
current_axis = plt.gca() current_axis = plt.gca()
for box in new_boxes: for box in new_boxes:
color = colors[2] color = colors[2]
#boxes_area_score[key]['score_norm'] = (boxes_area_score[key]['score'] - mean) / sd #boxes_area_score[key]['score_norm'] = (boxes_area_score[key]['score'] - mean) / sd
#z_score = (box.score - mean_score) / sd_score #z_score = (box.score - mean_score) / sd_score
#z_score = (boxes_area_score[key]['area'] ) #z_score = (boxes_area_score[key]['area'] )
### Escribe el z-score ### Escribe el z-score
#if z_score > 1: #if z_score > 1:
current_axis.text((box.xmin + box.xmax)/2, current_axis.text((box.xmin + box.xmax)/2,
(box.ymin+ box.ymax)/2, (box.ymin+ box.ymax)/2,
'%.2f' % box.classes[0], size='x-large', '%.2f' % box.classes[0], size='x-large',
color='white', bbox={'facecolor':color, 'alpha':1.0}) color='white', bbox={'facecolor':color, 'alpha':1.0})
return new_boxes return new_boxes
def _main_(args): def _main_(args):
@@ -225,21 +225,21 @@ def _main_(args):
# the main loop # the main loop
times = [] times = []
images = [cv2.imread(image_path) for image_path in image_paths] images = [cv2.imread(image_path) for image_path in image_paths]
print(images) print(images)
start = time.time() start = time.time()
# predict the bounding boxes # predict the bounding boxes
boxes = get_yolo_boxes(infer_model, images, net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh) boxes = get_yolo_boxes(infer_model, images, net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh)
boxes = [[box for box in boxes_image if box.get_score() > obj_thresh] for boxes_image in boxes] boxes = [[box for box in boxes_image if box.get_score() > obj_thresh] for boxes_image in boxes]
print('Elapsed time = {}'.format(time.time() - start)) print('Elapsed time = {}'.format(time.time() - start))
times.append(time.time() - start) times.append(time.time() - start)
boxes_disc = [disconnect(image, boxes_image, z_thresh = 1.8) for image, boxes_image in zip(images, boxes)] boxes_disc = [disconnect(image, boxes_image, z_thresh = 1.8) for image, boxes_image in zip(images, boxes)]
for image, boxes_image in zip(images, boxes_disc): for image, boxes_image in zip(images, boxes_disc):
# draw bounding boxes on the image using labels # draw bounding boxes on the image using labels
I = image.copy() I = image.copy()
draw_boxes(I, boxes_image, config['model']['labels'], obj_thresh) draw_boxes(I, boxes_image, config['model']['labels'], obj_thresh)

18
requirements.txt Normal file
View File

@@ -0,0 +1,18 @@
absl-py
astor
gast==0.2.2
grpcio
h5py
Markdown
numpy
opencv-contrib-python
opt-einsum
protobuf
PyYAML
scipy
six
tensorflow==2.1
termcolor
tqdm
Werkzeug
wrapt