diff --git a/config_full_yolo_fault_4_train.json b/config_full_yolo_fault_4_train.json index 8acf479..f998e40 100755 --- a/config_full_yolo_fault_4_train.json +++ b/config_full_yolo_fault_4_train.json @@ -4,13 +4,13 @@ "max_input_size": 400, "anchors": [5,7, 10,14, 15, 15, 26,32, 45,119, 54,18, 94,59, 109,183, 200,21], "labels": ["4"], - "backend": "full_yolo_backend.h5" + "backend": "keras-yolo3-master/full_yolo_backend.h5" }, "train": { - "train_image_folder": "../Train&Test_D/Train/images/", - "train_annot_folder": "../Train&Test_D/Train/anns/", - "cache_name": "../Result_yolo3_fault_4/Result_yolo3_fault_4.pkl", + "train_image_folder": "Train&Test_D/Train/images/", + "train_annot_folder": "Train&Test_D/Train/anns/", + "cache_name": "Result_yolo3_fault_4/Result_yolo3_fault_4.pkl", "train_times": 1, @@ -28,21 +28,21 @@ "class_scale": 1, "tensorboard_dir": "log_experimento_fault_gpu", - "saved_weights_name": "../Result_yolo3_fault_4/yolo3_full_fault_4.h5", + "saved_weights_name": "Result_yolo3_fault_4/yolo3_full_fault_4.h5", "debug": true }, "valid": { - "valid_image_folder": "../Train&Test_D/Test/images/", - "valid_annot_folder": "../Train&Test_D/Test/anns/", - "cache_name": "../Result_yolo3_fault_4/Result_yolo3_fault_4.pkl", + "valid_image_folder": "Train&Test_D/Test/images/", + "valid_annot_folder": "Train&Test_D/Test/anns/", + "cache_name": "Result_yolo3_fault_4/Result_yolo3_fault_4.pkl", "valid_times": 1 }, "test": { - "test_image_folder": "../Train&Test_D/Test/images/", - "test_annot_folder": "../Train&Test_D/Test/anns/", - "cache_name": "../Result_yolo3_fault_4/Result_yolo3_fault_4.pkl", + "test_image_folder": "Train&Test_D/Test/images/", + "test_annot_folder": "Train&Test_D/Test/anns/", + "cache_name": "Result_yolo3_fault_4/Result_yolo3_fault_4.pkl", "test_times": 1 } diff --git a/keras-yolo3-master/__pycache__/callbacks.cpython-37.pyc b/keras-yolo3-master/__pycache__/callbacks.cpython-37.pyc new file mode 100644 index 0000000..9b109fb Binary files /dev/null and b/keras-yolo3-master/__pycache__/callbacks.cpython-37.pyc differ diff --git a/keras-yolo3-master/__pycache__/generator.cpython-37.pyc b/keras-yolo3-master/__pycache__/generator.cpython-37.pyc new file mode 100644 index 0000000..7a9f954 Binary files /dev/null and b/keras-yolo3-master/__pycache__/generator.cpython-37.pyc differ diff --git a/keras-yolo3-master/__pycache__/voc.cpython-37.pyc b/keras-yolo3-master/__pycache__/voc.cpython-37.pyc index 3fc60f7..91455b1 100644 Binary files a/keras-yolo3-master/__pycache__/voc.cpython-37.pyc and b/keras-yolo3-master/__pycache__/voc.cpython-37.pyc differ diff --git a/keras-yolo3-master/__pycache__/yolo.cpython-37.pyc b/keras-yolo3-master/__pycache__/yolo.cpython-37.pyc index c00ab4b..da07877 100644 Binary files a/keras-yolo3-master/__pycache__/yolo.cpython-37.pyc and b/keras-yolo3-master/__pycache__/yolo.cpython-37.pyc differ diff --git a/keras-yolo3-master/callbacks.py b/keras-yolo3-master/callbacks.py index 008001b..fc6bb46 100755 --- a/keras-yolo3-master/callbacks.py +++ b/keras-yolo3-master/callbacks.py @@ -1,15 +1,16 @@ -from keras.callbacks import TensorBoard, ModelCheckpoint +from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint import tensorflow as tf import numpy as np +import warnings class CustomTensorBoard(TensorBoard): """ to log the loss after each batch - """ + """ def __init__(self, log_every=1, **kwargs): super(CustomTensorBoard, self).__init__(**kwargs) self.log_every = log_every self.counter = 0 - + def on_batch_end(self, batch, logs=None): self.counter+=1 if self.counter%self.log_every==0: @@ -22,7 +23,7 @@ class CustomTensorBoard(TensorBoard): summary_value.tag = name self.writer.add_summary(summary, self.counter) self.writer.flush() - + super(CustomTensorBoard, self).on_batch_end(batch, logs) class CustomModelCheckpoint(ModelCheckpoint): @@ -67,4 +68,4 @@ class CustomModelCheckpoint(ModelCheckpoint): else: self.model_to_save.save(filepath, overwrite=True) - super(CustomModelCheckpoint, self).on_batch_end(epoch, logs) \ No newline at end of file + super(CustomModelCheckpoint, self).on_batch_end(epoch, logs) diff --git a/keras-yolo3-master/evaluate.py b/keras-yolo3-master/evaluate.py index 08d08f4..f94a68a 100644 --- a/keras-yolo3-master/evaluate.py +++ b/keras-yolo3-master/evaluate.py @@ -8,9 +8,9 @@ from voc import parse_voc_annotation from yolo import create_yolov3_model from generator import BatchGenerator from utils.utils import normalize, evaluate -from keras.callbacks import EarlyStopping, ModelCheckpoint -from keras.optimizers import Adam -from keras.models import load_model +from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.models import load_model def _main_(args): config_path = args.conf diff --git a/keras-yolo3-master/generator.py b/keras-yolo3-master/generator.py index 2949e20..90f2b56 100755 --- a/keras-yolo3-master/generator.py +++ b/keras-yolo3-master/generator.py @@ -1,22 +1,22 @@ import cv2 import copy import numpy as np -from keras.utils import Sequence +from tensorflow.keras.utils import Sequence from utils.bbox import BoundBox, bbox_iou from utils.image import apply_random_scale_and_crop, random_distort_image, random_flip, correct_bounding_boxes class BatchGenerator(Sequence): - def __init__(self, - instances, - anchors, - labels, + def __init__(self, + instances, + anchors, + labels, downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 max_box_per_image=30, batch_size=1, min_net_size=320, - max_net_size=608, - shuffle=True, - jitter=True, + max_net_size=608, + shuffle=True, + jitter=True, norm=None ): self.instances = instances @@ -30,13 +30,13 @@ class BatchGenerator(Sequence): self.jitter = jitter self.norm = norm self.anchors = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)] - self.net_h = 416 + self.net_h = 416 self.net_w = 416 if shuffle: np.random.shuffle(self.instances) - + def __len__(self): - return int(np.ceil(float(len(self.instances))/self.batch_size)) + return int(np.ceil(float(len(self.instances))/self.batch_size)) def __getitem__(self, idx): # get image input size, change every 10 batches @@ -63,7 +63,7 @@ class BatchGenerator(Sequence): dummy_yolo_1 = np.zeros((r_bound - l_bound, 1)) dummy_yolo_2 = np.zeros((r_bound - l_bound, 1)) dummy_yolo_3 = np.zeros((r_bound - l_bound, 1)) - + instance_count = 0 true_box_index = 0 @@ -71,18 +71,18 @@ class BatchGenerator(Sequence): for train_instance in self.instances[l_bound:r_bound]: # augment input image and fix object's position and size img, all_objs = self._aug_image(train_instance, net_h, net_w) - + for obj in all_objs: # find the best anchor box for this object - max_anchor = None + max_anchor = None max_index = -1 max_iou = -1 - shifted_box = BoundBox(0, + shifted_box = BoundBox(0, 0, - obj['xmax']-obj['xmin'], - obj['ymax']-obj['ymin']) - + obj['xmax']-obj['xmin'], + obj['ymax']-obj['ymin']) + for i in range(len(self.anchors)): anchor = self.anchors[i] iou = bbox_iou(shifted_box, anchor) @@ -90,18 +90,18 @@ class BatchGenerator(Sequence): if max_iou < iou: max_anchor = anchor max_index = i - max_iou = iou - + max_iou = iou + # determine the yolo to be responsible for this bounding box yolo = yolos[max_index//3] grid_h, grid_w = yolo.shape[1:3] - + # determine the position of the bounding box on the grid center_x = .5*(obj['xmin'] + obj['xmax']) center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x center_y = .5*(obj['ymin'] + obj['ymax']) center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y - + # determine the sizes of the bounding box w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h @@ -109,7 +109,7 @@ class BatchGenerator(Sequence): box = [center_x, center_y, w, h] # determine the index of the label - obj_indx = self.labels.index(obj['name']) + obj_indx = self.labels.index(obj['name']) # determine the location of the cell responsible for this object grid_x = int(np.floor(center_x)) @@ -126,25 +126,25 @@ class BatchGenerator(Sequence): t_batch[instance_count, 0, 0, 0, true_box_index] = true_box true_box_index += 1 - true_box_index = true_box_index % self.max_box_per_image + true_box_index = true_box_index % self.max_box_per_image # assign input image to x_batch - if self.norm != None: + if self.norm != None: x_batch[instance_count] = self.norm(img) else: # plot image and bounding boxes for sanity check for obj in all_objs: cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3) - cv2.putText(img, obj['name'], - (obj['xmin']+2, obj['ymin']+12), - 0, 1.2e-3 * img.shape[0], + cv2.putText(img, obj['name'], + (obj['xmin']+2, obj['ymin']+12), + 0, 1.2e-3 * img.shape[0], (0,255,0), 2) - + x_batch[instance_count] = img # increase instance counter in the current batch - instance_count += 1 - + instance_count += 1 + return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3] def _get_net_size(self, idx): @@ -154,16 +154,16 @@ class BatchGenerator(Sequence): #print("resizing: ", net_size, net_size) self.net_h, self.net_w = net_size, net_size return self.net_h, self.net_w - + def _aug_image(self, instance, net_h, net_w): image_name = instance['filename'] image = cv2.imread(image_name) # RGB image if image is None: print('Cannot find ', image_name) image = image[:,:,::-1] # RGB image - + image_h, image_w, _ = image.shape - + # determine the amount of scaling and cropping dw = self.jitter * image_w; dh = self.jitter * image_h; @@ -177,33 +177,33 @@ class BatchGenerator(Sequence): else: new_w = int(scale * net_w); new_h = int(net_w / new_ar); - + dx = int(np.random.uniform(0, net_w - new_w)); dy = int(np.random.uniform(0, net_h - new_h)); - + # apply scaling and cropping im_sized = apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy) - + # randomly distort hsv space im_sized = random_distort_image(im_sized) - + # randomly flip flip = np.random.randint(2) im_sized = random_flip(im_sized, flip) - + # correct the size and pos of bounding boxes all_objs = correct_bounding_boxes(instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h) - - return im_sized, all_objs + + return im_sized, all_objs def on_epoch_end(self): if self.shuffle: np.random.shuffle(self.instances) - + def num_classes(self): return len(self.labels) def size(self): - return len(self.instances) + return len(self.instances) def get_anchors(self): anchors = [] @@ -225,4 +225,4 @@ class BatchGenerator(Sequence): return np.array(annots) def load_image(self, i): - return cv2.imread(self.instances[i]['filename']) + return cv2.imread(self.instances[i]['filename']) diff --git a/keras-yolo3-master/train.py b/keras-yolo3-master/train.py old mode 100755 new mode 100644 index dfe7bbf..c7be93f --- a/keras-yolo3-master/train.py +++ b/keras-yolo3-master/train.py @@ -8,13 +8,16 @@ from voc import parse_voc_annotation from yolo import create_yolov3_model, dummy_loss from generator import BatchGenerator from utils.utils import normalize, evaluate, makedirs -from keras.callbacks import EarlyStopping, ReduceLROnPlateau -from keras.optimizers import Adam +from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint +from tensorflow.keras.optimizers import Adam from callbacks import CustomModelCheckpoint, CustomTensorBoard from utils.multi_gpu_model import multi_gpu_model import tensorflow as tf -import keras -from keras.models import load_model +from tensorflow import keras +from tensorflow.keras.models import load_model + +tf.keras.backend.clear_session() +tf.config.experimental_run_functions_eagerly(True) def create_training_instances( train_annot_folder, @@ -66,28 +69,34 @@ def create_callbacks(saved_weights_name, tensorboard_logs, model_to_save): makedirs(tensorboard_logs) early_stop = EarlyStopping( - monitor = 'loss', + monitor = 'val_loss', min_delta = 0.01, patience = 25, mode = 'min', verbose = 1 ) - checkpoint = CustomModelCheckpoint( + """checkpoint = CustomModelCheckpoint( model_to_save = model_to_save, filepath = saved_weights_name,# + '{epoch:02d}.h5', monitor = 'loss', verbose = 1, save_best_only = True, mode = 'min', - period = 1 - ) + save_freq = 1 + )""" + checkpoint = ModelCheckpoint(filepath=saved_weights_name, + monitor='val_loss', + save_best_only=True, + save_weights_only=True, + verbose=1) + reduce_on_plateau = ReduceLROnPlateau( - monitor = 'loss', + monitor = 'val_loss', factor = 0.5, patience = 15, verbose = 1, mode = 'min', - epsilon = 0.01, + min_delta = 0.01, cooldown = 0, min_lr = 0 ) @@ -96,7 +105,7 @@ def create_callbacks(saved_weights_name, tensorboard_logs, model_to_save): write_graph = True, write_images = True, ) - return [early_stop, checkpoint, reduce_on_plateau, tensorboard] + return [early_stop, checkpoint, reduce_on_plateau] def create_model( nb_class, @@ -245,21 +254,24 @@ def _main_(args): backend = config['model']['backend'] ) + ############################### # Kick off the training ############################### callbacks = create_callbacks(config['train']['saved_weights_name'], config['train']['tensorboard_dir'], infer_model) - train_model.fit_generator( - generator = train_generator, + train_model.fit( + x = train_generator, + validation_data = valid_generator, steps_per_epoch = len(train_generator) * config['train']['train_times'], epochs = config['train']['nb_epochs'] + config['train']['warmup_epochs'], verbose = 2 if config['train']['debug'] else 1, - callbacks = callbacks, workers = 4, - max_queue_size = 8 + max_queue_size = 8, + callbacks = callbacks ) + # make a GPU version of infer_model for evaluation if multi_gpu > 1: infer_model = load_model(config['train']['saved_weights_name']) @@ -284,7 +296,7 @@ def _main_(args): return print('mAP using the weighted average of precisions among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances))) - print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances))) + print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances))) if __name__ == '__main__': argparser = argparse.ArgumentParser(description='train and evaluate YOLO_v3 model on any dataset') diff --git a/keras-yolo3-master/train_old.py b/keras-yolo3-master/train_old.py new file mode 100755 index 0000000..8355aac --- /dev/null +++ b/keras-yolo3-master/train_old.py @@ -0,0 +1,294 @@ +#! /usr/bin/env python + +import argparse +import os +import numpy as np +import json +from voc import parse_voc_annotation +from yolo import create_yolov3_model, dummy_loss +from generator import BatchGenerator +from utils.utils import normalize, evaluate, makedirs +from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau +from tensorflow.keras.optimizers import Adam +from callbacks import CustomModelCheckpoint, CustomTensorBoard +from utils.multi_gpu_model import multi_gpu_model +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras.models import load_model + +def create_training_instances( + train_annot_folder, + train_image_folder, + train_cache, + valid_annot_folder, + valid_image_folder, + valid_cache, + labels, +): + # parse annotations of the training set + train_ints, train_labels = parse_voc_annotation(train_annot_folder, train_image_folder, train_cache, labels) + + # parse annotations of the validation set, if any, otherwise split the training set + if os.path.exists(valid_annot_folder): + valid_ints, valid_labels = parse_voc_annotation(valid_annot_folder, valid_image_folder, valid_cache, labels) + else: + print("valid_annot_folder not exists. Spliting the trainining set.") + + train_valid_split = int(0.8*len(train_ints)) + np.random.seed(0) + np.random.shuffle(train_ints) + np.random.seed() + + valid_ints = train_ints[train_valid_split:] + train_ints = train_ints[:train_valid_split] + + # compare the seen labels with the given labels in config.json + if len(labels) > 0: + overlap_labels = set(labels).intersection(set(train_labels.keys())) + + print('Seen labels: \t' + str(train_labels) + '\n') + print('Given labels: \t' + str(labels)) + + # return None, None, None if some given label is not in the dataset + if len(overlap_labels) < len(labels): + print('Some labels have no annotations! Please revise the list of labels in the config.json.') + return None, None, None + else: + print('No labels are provided. Train on all seen labels.') + print(train_labels) + labels = train_labels.keys() + + max_box_per_image = max([len(inst['object']) for inst in (train_ints + valid_ints)]) + + return train_ints, valid_ints, sorted(labels), max_box_per_image + +def create_callbacks(saved_weights_name, tensorboard_logs, model_to_save): + makedirs(tensorboard_logs) + + early_stop = EarlyStopping( + monitor = 'loss', + min_delta = 0.01, + patience = 25, + mode = 'min', + verbose = 1 + ) + checkpoint = CustomModelCheckpoint( + model_to_save = model_to_save, + filepath = saved_weights_name,# + '{epoch:02d}.h5', + monitor = 'loss', + verbose = 1, + save_best_only = True, + mode = 'min', + save_freq = 1 + ) + reduce_on_plateau = ReduceLROnPlateau( + monitor = 'loss', + factor = 0.5, + patience = 15, + verbose = 1, + mode = 'min', + min_delta = 0.01, + cooldown = 0, + min_lr = 0 + ) + tensorboard = CustomTensorBoard( + log_dir = tensorboard_logs, + write_graph = True, + write_images = True, + ) + return [early_stop, checkpoint, reduce_on_plateau, tensorboard] + +def create_model( + nb_class, + anchors, + max_box_per_image, + max_grid, batch_size, + warmup_batches, + ignore_thresh, + multi_gpu, + saved_weights_name, + lr, + grid_scales, + obj_scale, + noobj_scale, + xywh_scale, + class_scale, + backend +): + if multi_gpu > 1: + with tf.device('/cpu:0'): + template_model, infer_model = create_yolov3_model( + nb_class = nb_class, + anchors = anchors, + max_box_per_image = max_box_per_image, + max_grid = max_grid, + batch_size = batch_size//multi_gpu, + warmup_batches = warmup_batches, + ignore_thresh = ignore_thresh, + grid_scales = grid_scales, + obj_scale = obj_scale, + noobj_scale = noobj_scale, + xywh_scale = xywh_scale, + class_scale = class_scale + ) + else: + template_model, infer_model = create_yolov3_model( + nb_class = nb_class, + anchors = anchors, + max_box_per_image = max_box_per_image, + max_grid = max_grid, + batch_size = batch_size, + warmup_batches = warmup_batches, + ignore_thresh = ignore_thresh, + grid_scales = grid_scales, + obj_scale = obj_scale, + noobj_scale = noobj_scale, + xywh_scale = xywh_scale, + class_scale = class_scale + ) + + # load the pretrained weight if exists, otherwise load the backend weight only + if os.path.exists(saved_weights_name): + print("\nLoading pretrained weights.\n") + template_model.load_weights(saved_weights_name) + else: + template_model.load_weights(backend, by_name=True) + + if multi_gpu > 1: + train_model = multi_gpu_model(template_model, gpus=multi_gpu) + else: + train_model = template_model + + optimizer = Adam(lr=lr, clipnorm=0.001) + train_model.compile(loss=dummy_loss, optimizer=optimizer) + + return train_model, infer_model + +def _main_(args): + config_path = args.conf + + with open(config_path) as config_buffer: + config = json.loads(config_buffer.read()) + + ############################### + # Parse the annotations + ############################### + train_ints, valid_ints, labels, max_box_per_image = create_training_instances( + config['train']['train_annot_folder'], + config['train']['train_image_folder'], + config['train']['cache_name'], + config['valid']['valid_annot_folder'], + config['valid']['valid_image_folder'], + config['valid']['cache_name'], + config['model']['labels'] + ) + print('\nTraining on: \t' + str(labels) + '\n') + + ############################### + # Create the generators + ############################### + train_generator = BatchGenerator( + instances = train_ints, + anchors = config['model']['anchors'], + labels = labels, + downsample = 32, # ratio between network input's size and network output's size, 32 for YOLOv3 + max_box_per_image = max_box_per_image, + batch_size = config['train']['batch_size'], + min_net_size = config['model']['min_input_size'], + max_net_size = config['model']['max_input_size'], + shuffle = True, + jitter = 0.3, + norm = normalize + ) + + valid_generator = BatchGenerator( + instances = valid_ints, + anchors = config['model']['anchors'], + labels = labels, + downsample = 32, # ratio between network input's size and network output's size, 32 for YOLOv3 + max_box_per_image = max_box_per_image, + batch_size = config['train']['batch_size'], + min_net_size = config['model']['min_input_size'], + max_net_size = config['model']['max_input_size'], + shuffle = True, + jitter = 0.0, + norm = normalize + ) + + ############################### + # Create the model + ############################### + if os.path.exists(config['train']['saved_weights_name']): + config['train']['warmup_epochs'] = 0 + warmup_batches = config['train']['warmup_epochs'] * (config['train']['train_times']*len(train_generator)) + + os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus'] + multi_gpu = len(config['train']['gpus'].split(',')) + print('multi_gpu:' + str(multi_gpu)) + + train_model, infer_model = create_model( + nb_class = len(labels), + anchors = config['model']['anchors'], + max_box_per_image = max_box_per_image, + max_grid = [config['model']['max_input_size'], config['model']['max_input_size']], + batch_size = config['train']['batch_size'], + warmup_batches = warmup_batches, + ignore_thresh = config['train']['ignore_thresh'], + multi_gpu = multi_gpu, + saved_weights_name = config['train']['saved_weights_name'], + lr = config['train']['learning_rate'], + grid_scales = config['train']['grid_scales'], + obj_scale = config['train']['obj_scale'], + noobj_scale = config['train']['noobj_scale'], + xywh_scale = config['train']['xywh_scale'], + class_scale = config['train']['class_scale'], + backend = config['model']['backend'] + ) + + ############################### + # Kick off the training + ############################### + callbacks = create_callbacks(config['train']['saved_weights_name'], config['train']['tensorboard_dir'], infer_model) + + train_model.fit( + generator = train_generator, + steps_per_epoch = len(train_generator) * config['train']['train_times'], + epochs = config['train']['nb_epochs'] + config['train']['warmup_epochs'], + verbose = 2 if config['train']['debug'] else 1, + callbacks = callbacks, + workers = 4, + max_queue_size = 8 + ) + + # make a GPU version of infer_model for evaluation + if multi_gpu > 1: + infer_model = load_model(config['train']['saved_weights_name']) + + ############################### + # Run the evaluation + ############################### + # compute mAP for all the classes + average_precisions = evaluate(infer_model, valid_generator) + + # print the score + total_instances = [] + precisions = [] + for label, (average_precision, num_annotations) in average_precisions.items(): + print('{:.0f} instances of class'.format(num_annotations), + labels[label], 'with average precision: {:.4f}'.format(average_precision)) + total_instances.append(num_annotations) + precisions.append(average_precision) + + if sum(total_instances) == 0: + print('No test instances found.') + return + + print('mAP using the weighted average of precisions among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances))) + print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances))) + +if __name__ == '__main__': + argparser = argparse.ArgumentParser(description='train and evaluate YOLO_v3 model on any dataset') + argparser.add_argument('-c', '--conf', help='path to configuration file') + + args = argparser.parse_args() + _main_(args) diff --git a/keras-yolo3-master/utils/__pycache__/__init__.cpython-37.pyc b/keras-yolo3-master/utils/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..08961ed Binary files /dev/null and b/keras-yolo3-master/utils/__pycache__/__init__.cpython-37.pyc differ diff --git a/keras-yolo3-master/utils/__pycache__/bbox.cpython-37.pyc b/keras-yolo3-master/utils/__pycache__/bbox.cpython-37.pyc new file mode 100644 index 0000000..7c721c5 Binary files /dev/null and b/keras-yolo3-master/utils/__pycache__/bbox.cpython-37.pyc differ diff --git a/keras-yolo3-master/utils/__pycache__/colors.cpython-37.pyc b/keras-yolo3-master/utils/__pycache__/colors.cpython-37.pyc new file mode 100644 index 0000000..76363b9 Binary files /dev/null and b/keras-yolo3-master/utils/__pycache__/colors.cpython-37.pyc differ diff --git a/keras-yolo3-master/utils/__pycache__/image.cpython-37.pyc b/keras-yolo3-master/utils/__pycache__/image.cpython-37.pyc new file mode 100644 index 0000000..ae46ee7 Binary files /dev/null and b/keras-yolo3-master/utils/__pycache__/image.cpython-37.pyc differ diff --git a/keras-yolo3-master/utils/__pycache__/multi_gpu_model.cpython-37.pyc b/keras-yolo3-master/utils/__pycache__/multi_gpu_model.cpython-37.pyc new file mode 100644 index 0000000..eabe2eb Binary files /dev/null and b/keras-yolo3-master/utils/__pycache__/multi_gpu_model.cpython-37.pyc differ diff --git a/keras-yolo3-master/utils/__pycache__/utils.cpython-37.pyc b/keras-yolo3-master/utils/__pycache__/utils.cpython-37.pyc new file mode 100644 index 0000000..54d597e Binary files /dev/null and b/keras-yolo3-master/utils/__pycache__/utils.cpython-37.pyc differ diff --git a/keras-yolo3-master/utils/bbox.py b/keras-yolo3-master/utils/bbox.py index 47706e5..0bcabb2 100755 --- a/keras-yolo3-master/utils/bbox.py +++ b/keras-yolo3-master/utils/bbox.py @@ -9,7 +9,7 @@ class BoundBox: self.ymin = ymin self.xmax = xmax self.ymax = ymax - + self.c = c self.classes = classes @@ -19,14 +19,14 @@ class BoundBox: def get_label(self): if self.label == -1: self.label = np.argmax(self.classes) - + return self.label - + def get_score(self): if self.score == -1: self.score = self.classes[self.get_label()] - - return self.score + + return self.score def _interval_overlap(interval_a, interval_b): x1, x2 = interval_a @@ -41,49 +41,51 @@ def _interval_overlap(interval_a, interval_b): if x2 < x3: return 0 else: - return min(x2,x4) - x3 + return min(x2,x4) - x3 def bbox_iou(box1, box2): intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax]) - intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax]) - + intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax]) + intersect = intersect_w * intersect_h w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin - + union = w1*h1 + w2*h2 - intersect - + + if union == 0: return 0 + return float(intersect) / union def draw_boxes(image, boxes, labels, obj_thresh, quiet=True): for box in boxes: label_str = '' label = -1 - + for i in range(len(labels)): if box.classes[i] > obj_thresh: if label_str != '': label_str += ', ' label_str += (labels[i] + ' ' + str(round(box.get_score()*100,0)) + '%') label = i if not quiet: print(label_str) - + if label >= 0: text_size = cv2.getTextSize(label_str, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-4 * image.shape[0], 2) width, height = text_size[0][0], text_size[0][1] - region = np.array([[box.xmin-3, box.ymin], - [box.xmin-3, box.ymin-height-16], - [box.xmin+width+6, box.ymin-height-16], - [box.xmin+width+6, box.ymin]], dtype='int32') + region = np.array([[box.xmin-3, box.ymin], + [box.xmin-3, box.ymin-height-16], + [box.xmin+width+6, box.ymin-height-16], + [box.xmin+width+6, box.ymin]], dtype='int32') cv2.rectangle(img=image, pt1=(box.xmin,box.ymin), pt2=(box.xmax,box.ymax), color=get_color(label), thickness=1) cv2.fillPoly(img=image, pts=[region], color=get_color(label)) - cv2.putText(img=image, - text=label_str, - org=(box.xmin+6, box.ymin - 6), - fontFace=cv2.FONT_HERSHEY_SIMPLEX, - fontScale=0.7e-3 * image.shape[0], - color=(0,0,0), + cv2.putText(img=image, + text=label_str, + org=(box.xmin+6, box.ymin - 6), + fontFace=cv2.FONT_HERSHEY_SIMPLEX, + fontScale=0.7e-3 * image.shape[0], + color=(0,0,0), thickness=2) - - return image + + return image diff --git a/keras-yolo3-master/utils/multi_gpu_model.py b/keras-yolo3-master/utils/multi_gpu_model.py index 9064582..0ceb2fc 100755 --- a/keras-yolo3-master/utils/multi_gpu_model.py +++ b/keras-yolo3-master/utils/multi_gpu_model.py @@ -1,5 +1,5 @@ -from keras.layers import Lambda, concatenate -from keras.models import Model +from tensorflow.keras.layers import Lambda, concatenate +from tensorflow.keras.models import Model import tensorflow as tf def multi_gpu_model(model, gpus): @@ -59,4 +59,4 @@ def multi_gpu_model(model, gpus): for name, outputs in zip(model.output_names, all_outputs): merged.append(concatenate(outputs, axis=0, name=name)) - return Model(model.inputs, merged) \ No newline at end of file + return Model(model.inputs, merged) diff --git a/keras-yolo3-master/utils/utils.py b/keras-yolo3-master/utils/utils.py index aca993c..edd5157 100644 --- a/keras-yolo3-master/utils/utils.py +++ b/keras-yolo3-master/utils/utils.py @@ -3,6 +3,7 @@ import numpy as np import os from .bbox import BoundBox, bbox_iou from scipy.special import expit +import tensorflow as tf def _sigmoid(x): return expit(x) @@ -166,18 +167,30 @@ def do_nms(boxes, nms_thresh): if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh: boxes[index_j].classes[c] = 0 -def decode_netout(netout, anchors, obj_thresh, net_h, net_w): - grid_h, grid_w = netout.shape[:2] +def decode_netout(netout_old, anchors, obj_thresh, net_h, net_w): + grid_h, grid_w = netout_old.shape[:2] nb_box = 3 - netout = netout.reshape((grid_h, grid_w, nb_box, -1)) - nb_class = netout.shape[-1] - 5 + #netout = netout.reshape((grid_h, grid_w, nb_box, -1)) + netout_old = tf.reshape(netout_old, (grid_h, grid_w, nb_box, -1)) + nb_class = netout_old.shape[-1] - 5 boxes = [] + ## Tensorflow v.2 + #print(tf.shape(netout)) + aux_1 = _sigmoid(netout_old[..., :2]) + #print(tf.shape(aux_1)) + aux_2 = _sigmoid(netout_old[..., 4]) + #print(tf.shape(aux_2[..., np.newaxis])) + aux_3 = aux_2[..., np.newaxis] * _softmax(netout_old[..., 5:]) + aux_4 = aux_3 * (aux_3 > obj_thresh) + #print(tf.shape(aux_4)) + netout = tf.concat([aux_1,netout_old[..., 2:4] ,aux_2[..., np.newaxis], aux_4], 3) + #print(tf.shape(new_netout)) - netout[..., :2] = _sigmoid(netout[..., :2]) - netout[..., 4] = _sigmoid(netout[..., 4]) - netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:]) - netout[..., 5:] *= netout[..., 5:] > obj_thresh + #netout[..., :2] = _sigmoid(netout[..., :2]) + #netout[..., 4] = _sigmoid(netout[..., 4]) + #netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:]) + #netout[..., 5:] *= netout[..., 5:] > obj_thresh for i in range(grid_h*grid_w): row = i // grid_w @@ -198,7 +211,7 @@ def decode_netout(netout, anchors, obj_thresh, net_h, net_w): h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height # last elements are class probabilities - classes = netout[row,col,b,5:] + classes = np.array(netout[row,col,b,5:]) box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes) diff --git a/keras-yolo3-master/yolo.py b/keras-yolo3-master/yolo.py old mode 100755 new mode 100644 index 352083a..df09778 --- a/keras-yolo3-master/yolo.py +++ b/keras-yolo3-master/yolo.py @@ -1,12 +1,12 @@ -from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda -from keras.layers.merge import add, concatenate -from keras.models import Model -from keras.engine.topology import Layer +from tensorflow.keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda +from tensorflow.keras.layers import add, concatenate +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Layer import tensorflow as tf class YoloLayer(Layer): - def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh, - grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale, + def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh, + grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale, **kwargs): # make the model settings persistent self.ignore_thresh = ignore_thresh @@ -16,13 +16,13 @@ class YoloLayer(Layer): self.obj_scale = obj_scale self.noobj_scale = noobj_scale self.xywh_scale = xywh_scale - self.class_scale = class_scale + self.class_scale = class_scale # make a persistent mesh grid max_grid_h, max_grid_w = max_grid - cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1))) - cell_y = tf.transpose(cell_x, (0,2,1,3,4)) + cell_x = tf.cast(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)), dtype=tf.float32) + cell_y = tf.transpose(a=cell_x, perm=(0,2,1,3,4)) self.cell_grid = tf.tile(tf.concat([cell_x,cell_y],-1), [batch_size, 1, 1, 3, 1]) super(YoloLayer, self).__init__(**kwargs) @@ -34,30 +34,30 @@ class YoloLayer(Layer): input_image, y_pred, y_true, true_boxes = x # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class] - y_pred = tf.reshape(y_pred, tf.concat([tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0)) - + y_pred = tf.reshape(y_pred, tf.concat([tf.shape(input=y_pred)[:3], tf.constant([3, -1])], axis=0)) + # initialize the masks object_mask = tf.expand_dims(y_true[..., 4], 4) # the variable to keep track of number of batches processed - batch_seen = tf.Variable(0.) + batch_seen = tf.Variable(0.) # compute grid factor and net factor - grid_h = tf.shape(y_true)[1] - grid_w = tf.shape(y_true)[2] + grid_h = tf.shape(input=y_true)[1] + grid_w = tf.shape(input=y_true)[2] grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1,1,1,1,2]) - net_h = tf.shape(input_image)[1] - net_w = tf.shape(input_image)[2] + net_h = tf.shape(input=input_image)[1] + net_w = tf.shape(input=input_image)[2] net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1,1,1,1,2]) - + """ Adjust prediction """ pred_box_xy = (self.cell_grid[:,:grid_h,:grid_w,:,:] + tf.sigmoid(y_pred[..., :2])) # sigma(t_xy) + c_xy pred_box_wh = y_pred[..., 2:4] # t_wh pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4) # adjust confidence - pred_box_class = y_pred[..., 5:] # adjust class probabilities + pred_box_class = y_pred[..., 5:] # adjust class probabilities """ Adjust ground truth @@ -65,47 +65,47 @@ class YoloLayer(Layer): true_box_xy = y_true[..., 0:2] # (sigma(t_xy) + c_xy) true_box_wh = y_true[..., 2:4] # t_wh true_box_conf = tf.expand_dims(y_true[..., 4], 4) - true_box_class = tf.argmax(y_true[..., 5:], -1) + true_box_class = tf.argmax(input=y_true[..., 5:], axis=-1) """ Compare each predicted box to all true boxes - """ + """ # initially, drag all objectness of all boxes to 0 - conf_delta = pred_box_conf - 0 + conf_delta = pred_box_conf - 0 # then, ignore the boxes which have good overlap with some true box true_xy = true_boxes[..., 0:2] / grid_factor true_wh = true_boxes[..., 2:4] / net_factor - + true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half - + pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4) pred_wh = tf.expand_dims(tf.exp(pred_box_wh) * self.anchors / net_factor, 4) - + pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half - pred_maxes = pred_xy + pred_wh_half + pred_maxes = pred_xy + pred_wh_half intersect_mins = tf.maximum(pred_mins, true_mins) intersect_maxes = tf.minimum(pred_maxes, true_maxes) intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] - + true_areas = true_wh[..., 0] * true_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = tf.truediv(intersect_areas, union_areas) - best_ious = tf.reduce_max(iou_scores, axis=4) - conf_delta *= tf.expand_dims(tf.to_float(best_ious < self.ignore_thresh), 4) + best_ious = tf.reduce_max(input_tensor=iou_scores, axis=4) + conf_delta *= tf.expand_dims(tf.cast(best_ious < self.ignore_thresh, dtype=tf.float32), 4) """ Compute some online statistics - """ + """ true_xy = true_box_xy / grid_factor true_wh = tf.exp(true_box_wh) * self.anchors / net_factor @@ -114,51 +114,52 @@ class YoloLayer(Layer): true_maxes = true_xy + true_wh_half pred_xy = pred_box_xy / grid_factor - pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor - + pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor + pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half - pred_maxes = pred_xy + pred_wh_half + pred_maxes = pred_xy + pred_wh_half intersect_mins = tf.maximum(pred_mins, true_mins) intersect_maxes = tf.minimum(pred_maxes, true_maxes) intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] - + true_areas = true_wh[..., 0] * true_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = tf.truediv(intersect_areas, union_areas) iou_scores = object_mask * tf.expand_dims(iou_scores, 4) - - count = tf.reduce_sum(object_mask) - count_noobj = tf.reduce_sum(1 - object_mask) - detect_mask = tf.to_float((pred_box_conf*object_mask) >= 0.5) - class_mask = tf.expand_dims(tf.to_float(tf.equal(tf.argmax(pred_box_class, -1), true_box_class)), 4) - recall50 = tf.reduce_sum(tf.to_float(iou_scores >= 0.5 ) * detect_mask * class_mask) / (count + 1e-3) - recall75 = tf.reduce_sum(tf.to_float(iou_scores >= 0.75) * detect_mask * class_mask) / (count + 1e-3) - avg_iou = tf.reduce_sum(iou_scores) / (count + 1e-3) - avg_obj = tf.reduce_sum(pred_box_conf * object_mask) / (count + 1e-3) - avg_noobj = tf.reduce_sum(pred_box_conf * (1-object_mask)) / (count_noobj + 1e-3) - avg_cat = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3) + + + count = tf.reduce_sum(input_tensor=object_mask) + count_noobj = tf.reduce_sum(input_tensor=1 - object_mask) + detect_mask = tf.cast((pred_box_conf*object_mask) >= 0.5, dtype=tf.float32) + class_mask = tf.expand_dims(tf.cast(tf.equal(tf.argmax(input=pred_box_class, axis=-1), true_box_class), dtype=tf.float32), 4) + recall50 = tf.reduce_sum(input_tensor=tf.cast(iou_scores >= 0.5, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3) + recall75 = tf.reduce_sum(input_tensor=tf.cast(iou_scores >= 0.75, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3) + avg_iou = tf.reduce_sum(input_tensor=iou_scores) / (count + 1e-3) + avg_obj = tf.reduce_sum(input_tensor=pred_box_conf * object_mask) / (count + 1e-3) + avg_noobj = tf.reduce_sum(input_tensor=pred_box_conf * (1-object_mask)) / (count_noobj + 1e-3) + avg_cat = tf.reduce_sum(input_tensor=object_mask * class_mask) / (count + 1e-3) """ Warm-up training """ - batch_seen = tf.assign_add(batch_seen, 1.) - - true_box_xy, true_box_wh, xywh_mask = tf.cond(tf.less(batch_seen, self.warmup_batches+1), - lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask), - true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask), + #batch_seen = tf.assign_add(batch_seen, 1.) + batch_seen.assign_add(1.) + true_box_xy, true_box_wh, xywh_mask = tf.cond(pred=tf.less(batch_seen, self.warmup_batches+1), + true_fn=lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask), + true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask), tf.ones_like(object_mask)], - lambda: [true_box_xy, + false_fn=lambda: [true_box_xy, true_box_wh, object_mask]) """ Compare each true box to all anchor boxes - """ + """ wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor wh_scale = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale @@ -169,10 +170,10 @@ class YoloLayer(Layer): tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \ self.class_scale - loss_xy = tf.reduce_sum(tf.square(xy_delta), list(range(1,5))) - loss_wh = tf.reduce_sum(tf.square(wh_delta), list(range(1,5))) - loss_conf = tf.reduce_sum(tf.square(conf_delta), list(range(1,5))) - loss_class = tf.reduce_sum(class_delta, list(range(1,5))) + loss_xy = tf.reduce_sum(input_tensor=tf.square(xy_delta), axis=list(range(1,5))) + loss_wh = tf.reduce_sum(input_tensor=tf.square(wh_delta), axis=list(range(1,5))) + loss_conf = tf.reduce_sum(input_tensor=tf.square(conf_delta), axis=list(range(1,5))) + loss_class = tf.reduce_sum(input_tensor=class_delta, axis=list(range(1,5))) loss = loss_xy + loss_wh + loss_conf + loss_class @@ -181,12 +182,12 @@ class YoloLayer(Layer): #loss = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000) #loss = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000) #loss = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000) - #loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000) - #loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000) - #loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy), - # tf.reduce_sum(loss_wh), - # tf.reduce_sum(loss_conf), - # tf.reduce_sum(loss_class)], message='loss xy, wh, conf, class: \t', summarize=1000) + #loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000) + #loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000) + #loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy), + # tf.reduce_sum(loss_wh), + # tf.reduce_sum(loss_conf), + # tf.reduce_sum(loss_class)], message='loss xy, wh, conf, class: \t', summarize=1000) return loss*self.grid_scale @@ -197,30 +198,30 @@ class YoloLayer(Layer): def _conv_block(inp, convs, do_skip=True): x = inp count = 0 - + for conv in convs: if count == (len(convs) - 2) and do_skip: skip_connection = x count += 1 - + if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # unlike tensorflow darknet prefer left and top paddings - x = Conv2D(conv['filter'], - conv['kernel'], - strides=conv['stride'], + x = Conv2D(conv['filter'], + conv['kernel'], + strides=conv['stride'], padding='valid' if conv['stride'] > 1 else 'same', # unlike tensorflow darknet prefer left and top paddings - name='conv_' + str(conv['layer_idx']), + name='conv_' + str(conv['layer_idx']), use_bias=False if conv['bnorm'] else True)(x) if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x) if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x) - return add([skip_connection, x]) if do_skip else x + return add([skip_connection, x]) if do_skip else x def create_yolov3_model( - nb_class, - anchors, - max_box_per_image, - max_grid, - batch_size, + nb_class, + anchors, + max_box_per_image, + max_grid, + batch_size, warmup_batches, ignore_thresh, grid_scales, @@ -259,9 +260,9 @@ def create_yolov3_model( for i in range(7): x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3}, {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}]) - + skip_36 = x - + # Layer 37 => 40 x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37}, {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38}, @@ -271,9 +272,9 @@ def create_yolov3_model( for i in range(7): x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3}, {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}]) - + skip_61 = x - + # Layer 62 => 65 x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62}, {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63}, @@ -283,7 +284,7 @@ def create_yolov3_model( for i in range(3): x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3}, {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}]) - + # Layer 75 => 79 x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75}, {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76}, @@ -294,11 +295,11 @@ def create_yolov3_model( # Layer 80 => 82 pred_yolo_1 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80}, {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], do_skip=False) - loss_yolo_1 = YoloLayer(anchors[12:], - [1*num for num in max_grid], - batch_size, - warmup_batches, - ignore_thresh, + loss_yolo_1 = YoloLayer(anchors[12:], + [1*num for num in max_grid], + batch_size, + warmup_batches, + ignore_thresh, grid_scales[0], obj_scale, noobj_scale, @@ -320,11 +321,11 @@ def create_yolov3_model( # Layer 92 => 94 pred_yolo_2 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92}, {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], do_skip=False) - loss_yolo_2 = YoloLayer(anchors[6:12], - [2*num for num in max_grid], - batch_size, - warmup_batches, - ignore_thresh, + loss_yolo_2 = YoloLayer(anchors[6:12], + [2*num for num in max_grid], + batch_size, + warmup_batches, + ignore_thresh, grid_scales[1], obj_scale, noobj_scale, @@ -344,16 +345,16 @@ def create_yolov3_model( {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 103}, {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104}, {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], do_skip=False) - loss_yolo_3 = YoloLayer(anchors[:6], - [4*num for num in max_grid], - batch_size, - warmup_batches, - ignore_thresh, + loss_yolo_3 = YoloLayer(anchors[:6], + [4*num for num in max_grid], + batch_size, + warmup_batches, + ignore_thresh, grid_scales[2], obj_scale, noobj_scale, xywh_scale, - class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes]) + class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes]) train_model = Model([input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3], [loss_yolo_1, loss_yolo_2, loss_yolo_3]) infer_model = Model(input_image, [pred_yolo_1, pred_yolo_2, pred_yolo_3]) @@ -361,4 +362,4 @@ def create_yolov3_model( return [train_model, infer_model] def dummy_loss(y_true, y_pred): - return tf.sqrt(tf.reduce_sum(y_pred)) + return tf.sqrt(tf.reduce_sum(input_tensor=y_pred)) diff --git a/keras-yolo3-master/yolo3_one_file_to_detect_them_all.py b/keras-yolo3-master/yolo3_one_file_to_detect_them_all.py index 231e2e2..83e31c0 100755 --- a/keras-yolo3-master/yolo3_one_file_to_detect_them_all.py +++ b/keras-yolo3-master/yolo3_one_file_to_detect_them_all.py @@ -1,9 +1,9 @@ import argparse import os import numpy as np -from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D -from keras.layers.merge import add, concatenate -from keras.models import Model +from tensorflow.keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D +from tensorflow.keras.layers.merge import add, concatenate +from tensorflow.keras.models import Model import struct import cv2 @@ -37,12 +37,12 @@ class WeightReader: w_f.read(4) transpose = (major > 1000) or (minor > 1000) - + binary = w_f.read() self.offset = 0 self.all_weights = np.frombuffer(binary, dtype='float32') - + def read_bytes(self, size): self.offset = self.offset + size return self.all_weights[self.offset-size:self.offset] @@ -61,14 +61,14 @@ class WeightReader: beta = self.read_bytes(size) # bias gamma = self.read_bytes(size) # scale mean = self.read_bytes(size) # mean - var = self.read_bytes(size) # variance + var = self.read_bytes(size) # variance - weights = norm_layer.set_weights([gamma, beta, mean, var]) + weights = norm_layer.set_weights([gamma, beta, mean, var]) if len(conv_layer.get_weights()) > 1: bias = self.read_bytes(np.prod(conv_layer.get_weights()[1].shape)) kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape)) - + kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape))) kernel = kernel.transpose([2,3,1,0]) conv_layer.set_weights([kernel, bias]) @@ -78,8 +78,8 @@ class WeightReader: kernel = kernel.transpose([2,3,1,0]) conv_layer.set_weights([kernel]) except ValueError: - print("no convolution #" + str(i)) - + print("no convolution #" + str(i)) + def reset(self): self.offset = 0 @@ -89,7 +89,7 @@ class BoundBox: self.ymin = ymin self.xmax = xmax self.ymax = ymax - + self.objness = objness self.classes = classes @@ -99,30 +99,30 @@ class BoundBox: def get_label(self): if self.label == -1: self.label = np.argmax(self.classes) - + return self.label - + def get_score(self): if self.score == -1: self.score = self.classes[self.get_label()] - + return self.score def _conv_block(inp, convs, skip=True): x = inp count = 0 - + for conv in convs: if count == (len(convs) - 2) and skip: skip_connection = x count += 1 - + if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top - x = Conv2D(conv['filter'], - conv['kernel'], - strides=conv['stride'], + x = Conv2D(conv['filter'], + conv['kernel'], + strides=conv['stride'], padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top - name='conv_' + str(conv['layer_idx']), + name='conv_' + str(conv['layer_idx']), use_bias=False if conv['bnorm'] else True)(x) if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x) if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x) @@ -142,7 +142,7 @@ def _interval_overlap(interval_a, interval_b): if x2 < x3: return 0 else: - return min(x2,x4) - x3 + return min(x2,x4) - x3 def _sigmoid(x): return 1. / (1. + np.exp(-x)) @@ -150,14 +150,14 @@ def _sigmoid(x): def bbox_iou(box1, box2): intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax]) intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax]) - + intersect = intersect_w * intersect_h w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin - + union = w1*h1 + w2*h2 - intersect - + return float(intersect) / union def make_yolov3_model(): @@ -187,9 +187,9 @@ def make_yolov3_model(): for i in range(7): x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3}, {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}]) - + skip_36 = x - + # Layer 37 => 40 x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37}, {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38}, @@ -199,9 +199,9 @@ def make_yolov3_model(): for i in range(7): x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3}, {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}]) - + skip_61 = x - + # Layer 62 => 65 x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62}, {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63}, @@ -211,7 +211,7 @@ def make_yolov3_model(): for i in range(3): x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3}, {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}]) - + # Layer 75 => 79 x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75}, {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76}, @@ -253,7 +253,7 @@ def make_yolov3_model(): {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104}, {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False) - model = Model(input_image, [yolo_82, yolo_94, yolo_106]) + model = Model(input_image, [yolo_82, yolo_94, yolo_106]) return model def preprocess_input(image, net_h, net_w): @@ -293,25 +293,25 @@ def decode_netout(netout, anchors, obj_thresh, nms_thresh, net_h, net_w): for i in range(grid_h*grid_w): row = i / grid_w col = i % grid_w - + for b in range(nb_box): # 4th element is objectness score objectness = netout[int(row)][int(col)][b][4] #objectness = netout[..., :4] - + if(objectness.all() <= obj_thresh): continue - + # first 4 elements are x, y, w, and h x, y, w, h = netout[int(row)][int(col)][b][:4] x = (col + x) / grid_w # center position, unit: image width y = (row + y) / grid_h # center position, unit: image height w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width - h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height - + h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height + # last elements are class probabilities classes = netout[int(row)][col][b][5:] - + box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes) #box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, None, classes) @@ -326,22 +326,22 @@ def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w): else: new_h = net_w new_w = (image_w*net_h)/image_h - + for i in range(len(boxes)): x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h - + boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w) boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w) boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h) boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h) - + def do_nms(boxes, nms_thresh): if len(boxes) > 0: nb_class = len(boxes[0].classes) else: return - + for c in range(nb_class): sorted_indices = np.argsort([-box.classes[c] for box in boxes]) @@ -355,28 +355,28 @@ def do_nms(boxes, nms_thresh): if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh: boxes[index_j].classes[c] = 0 - + def draw_boxes(image, boxes, labels, obj_thresh): for box in boxes: label_str = '' label = -1 - + for i in range(len(labels)): if box.classes[i] > obj_thresh: label_str += labels[i] label = i print(labels[i] + ': ' + str(box.classes[i]*100) + '%') - + if label >= 0: cv2.rectangle(image, (box.xmin,box.ymin), (box.xmax,box.ymax), (0,255,0), 3) - cv2.putText(image, - label_str + ' ' + str(box.get_score()), - (box.xmin, box.ymin - 13), - cv2.FONT_HERSHEY_SIMPLEX, - 1e-3 * image.shape[0], + cv2.putText(image, + label_str + ' ' + str(box.get_score()), + (box.xmin, box.ymin - 13), + cv2.FONT_HERSHEY_SIMPLEX, + 1e-3 * image.shape[0], (0,255,0), 2) - - return image + + return image def _main_(args): weights_path = args.weights @@ -412,7 +412,7 @@ def _main_(args): # run the prediction yolos = yolov3.predict(new_image) boxes = [] - + for i in range(len(yolos)): # decode the output of the network boxes += decode_netout(yolos[i][0], anchors[i], obj_thresh, nms_thresh, net_h, net_w) @@ -421,13 +421,13 @@ def _main_(args): correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w) # suppress non-maximal boxes - do_nms(boxes, nms_thresh) + do_nms(boxes, nms_thresh) # draw bounding boxes on the image using labels - draw_boxes(image, boxes, labels, obj_thresh) - + draw_boxes(image, boxes, labels, obj_thresh) + # write the image with bounding boxes to file - cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], (image).astype('uint8')) + cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], (image).astype('uint8')) if __name__ == '__main__': args = argparser.parse_args() diff --git a/keras-yolo3-master/yolo_old.py b/keras-yolo3-master/yolo_old.py new file mode 100755 index 0000000..1fadf56 --- /dev/null +++ b/keras-yolo3-master/yolo_old.py @@ -0,0 +1,365 @@ +from tensorflow.keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda +from tensorflow.keras.layers import add, concatenate +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Layer +import tensorflow as tf + +class YoloLayer(Layer): + def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh, + grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale, + **kwargs): + # make the model settings persistent + self.ignore_thresh = ignore_thresh + self.warmup_batches = warmup_batches + self.anchors = tf.constant(anchors, dtype='float', shape=[1,1,1,3,2]) + self.grid_scale = grid_scale + self.obj_scale = obj_scale + self.noobj_scale = noobj_scale + self.xywh_scale = xywh_scale + self.class_scale = class_scale + + # make a persistent mesh grid + max_grid_h, max_grid_w = max_grid + + cell_x = tf.cast(tf.reshape(tf.tile(tf.range(max_grid_w), [max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1)), dtype=tf.float32) + cell_y = tf.transpose(cell_x, (0,2,1,3,4)) + self.cell_grid = tf.tile(tf.concat([cell_x,cell_y],-1), [batch_size, 1, 1, 3, 1]) + + super(YoloLayer, self).__init__(**kwargs) + + def build(self, input_shape): + super(YoloLayer, self).build(input_shape) # Be sure to call this somewhere! + + def call(self, x): + input_image, y_pred, y_true, true_boxes = x + + # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class] + y_pred = tf.reshape(y_pred, tf.concat([tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0)) + + # initialize the masks + object_mask = tf.expand_dims(y_true[..., 4], 4) + + # the variable to keep track of number of batches processed + batch_seen = tf.Variable(0.) + + # compute grid factor and net factor + grid_h = tf.shape(y_true)[1] + grid_w = tf.shape(y_true)[2] + grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32), [1,1,1,1,2]) + + net_h = tf.shape(input_image)[1] + net_w = tf.shape(input_image)[2] + net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32), [1,1,1,1,2]) + + """ + Adjust prediction + """ + pred_box_xy = (self.cell_grid[:,:grid_h,:grid_w,:,:] + tf.sigmoid(y_pred[..., :2])) # sigma(t_xy) + c_xy + pred_box_wh = y_pred[..., 2:4] # t_wh + pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4) # adjust confidence + pred_box_class = y_pred[..., 5:] # adjust class probabilities + + """ + Adjust ground truth + """ + true_box_xy = y_true[..., 0:2] # (sigma(t_xy) + c_xy) + true_box_wh = y_true[..., 2:4] # t_wh + true_box_conf = tf.expand_dims(y_true[..., 4], 4) + true_box_class = tf.argmax(y_true[..., 5:], -1) + + """ + Compare each predicted box to all true boxes + """ + # initially, drag all objectness of all boxes to 0 + conf_delta = pred_box_conf - 0 + + # then, ignore the boxes which have good overlap with some true box + true_xy = true_boxes[..., 0:2] / grid_factor + true_wh = true_boxes[..., 2:4] / net_factor + + true_wh_half = true_wh / 2. + true_mins = true_xy - true_wh_half + true_maxes = true_xy + true_wh_half + + pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4) + pred_wh = tf.expand_dims(tf.exp(pred_box_wh) * self.anchors / net_factor, 4) + + pred_wh_half = pred_wh / 2. + pred_mins = pred_xy - pred_wh_half + pred_maxes = pred_xy + pred_wh_half + + intersect_mins = tf.maximum(pred_mins, true_mins) + intersect_maxes = tf.minimum(pred_maxes, true_maxes) + + intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) + intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] + + true_areas = true_wh[..., 0] * true_wh[..., 1] + pred_areas = pred_wh[..., 0] * pred_wh[..., 1] + + union_areas = pred_areas + true_areas - intersect_areas + iou_scores = tf.truediv(intersect_areas, union_areas) + + best_ious = tf.reduce_max(iou_scores, axis=4) + conf_delta *= tf.expand_dims(tf.cast(best_ious < self.ignore_thresh, dtype=tf.float32), 4) + + """ + Compute some online statistics + """ + true_xy = true_box_xy / grid_factor + true_wh = tf.exp(true_box_wh) * self.anchors / net_factor + + true_wh_half = true_wh / 2. + true_mins = true_xy - true_wh_half + true_maxes = true_xy + true_wh_half + + pred_xy = pred_box_xy / grid_factor + pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor + + pred_wh_half = pred_wh / 2. + pred_mins = pred_xy - pred_wh_half + pred_maxes = pred_xy + pred_wh_half + + intersect_mins = tf.maximum(pred_mins, true_mins) + intersect_maxes = tf.minimum(pred_maxes, true_maxes) + intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) + intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] + + true_areas = true_wh[..., 0] * true_wh[..., 1] + pred_areas = pred_wh[..., 0] * pred_wh[..., 1] + + union_areas = pred_areas + true_areas - intersect_areas + iou_scores = tf.truediv(intersect_areas, union_areas) + iou_scores = object_mask * tf.expand_dims(iou_scores, 4) + + + count = tf.reduce_sum(object_mask) + count_noobj = tf.reduce_sum(1 - object_mask) + detect_mask = tf.cast((pred_box_conf*object_mask) >= 0.5, dtype=tf.float32) + class_mask = tf.expand_dims(tf.cast(tf.equal(tf.argmax(pred_box_class, -1), true_box_class), dtype=tf.float32), 4) + recall50 = tf.reduce_sum(tf.cast(iou_scores >= 0.5, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3) + recall75 = tf.reduce_sum(tf.cast(iou_scores >= 0.75, dtype=tf.float32) * detect_mask * class_mask) / (count + 1e-3) + avg_iou = tf.reduce_sum(iou_scores) / (count + 1e-3) + avg_obj = tf.reduce_sum(pred_box_conf * object_mask) / (count + 1e-3) + avg_noobj = tf.reduce_sum(pred_box_conf * (1-object_mask)) / (count_noobj + 1e-3) + avg_cat = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3) + + """ + Warm-up training + """ + #batch_seen = tf.assign_add(batch_seen, 1.) + batch_seen.assign_add(1.) + true_box_xy, true_box_wh, xywh_mask = tf.cond(tf.less(batch_seen, self.warmup_batches+1), + lambda: [true_box_xy + (0.5 + self.cell_grid[:,:grid_h,:grid_w,:,:]) * (1-object_mask), + true_box_wh + tf.zeros_like(true_box_wh) * (1-object_mask), + tf.ones_like(object_mask)], + lambda: [true_box_xy, + true_box_wh, + object_mask]) + + """ + Compare each true box to all anchor boxes + """ + wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor + wh_scale = tf.expand_dims(2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) # the smaller the box, the bigger the scale + + xy_delta = xywh_mask * (pred_box_xy-true_box_xy) * wh_scale * self.xywh_scale + wh_delta = xywh_mask * (pred_box_wh-true_box_wh) * wh_scale * self.xywh_scale + conf_delta = object_mask * (pred_box_conf-true_box_conf) * self.obj_scale + (1-object_mask) * conf_delta * self.noobj_scale + class_delta = object_mask * \ + tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \ + self.class_scale + + loss_xy = tf.reduce_sum(tf.square(xy_delta), list(range(1,5))) + loss_wh = tf.reduce_sum(tf.square(wh_delta), list(range(1,5))) + loss_conf = tf.reduce_sum(tf.square(conf_delta), list(range(1,5))) + loss_class = tf.reduce_sum(class_delta, list(range(1,5))) + + loss = loss_xy + loss_wh + loss_conf + loss_class + + #loss = tf.Print(loss, [grid_h, avg_obj], message='avg_obj \t\t', summarize=1000) + #loss = tf.Print(loss, [grid_h, avg_noobj], message='avg_noobj \t\t', summarize=1000) + #loss = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000) + #loss = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000) + #loss = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000) + #loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000) + #loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000) + #loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy), + # tf.reduce_sum(loss_wh), + # tf.reduce_sum(loss_conf), + # tf.reduce_sum(loss_class)], message='loss xy, wh, conf, class: \t', summarize=1000) + + + return loss*self.grid_scale + + def compute_output_shape(self, input_shape): + return [(None, 1)] + +def _conv_block(inp, convs, do_skip=True): + x = inp + count = 0 + + for conv in convs: + if count == (len(convs) - 2) and do_skip: + skip_connection = x + count += 1 + + if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # unlike tensorflow darknet prefer left and top paddings + x = Conv2D(conv['filter'], + conv['kernel'], + strides=conv['stride'], + padding='valid' if conv['stride'] > 1 else 'same', # unlike tensorflow darknet prefer left and top paddings + name='conv_' + str(conv['layer_idx']), + use_bias=False if conv['bnorm'] else True)(x) + if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x) + if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x) + + return add([skip_connection, x]) if do_skip else x + +def create_yolov3_model( + nb_class, + anchors, + max_box_per_image, + max_grid, + batch_size, + warmup_batches, + ignore_thresh, + grid_scales, + obj_scale, + noobj_scale, + xywh_scale, + class_scale +): + input_image = Input(shape=(None, None, 3)) # net_h, net_w, 3 + true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4)) + true_yolo_1 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) # grid_h, grid_w, nb_anchor, 5+nb_class + true_yolo_2 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) # grid_h, grid_w, nb_anchor, 5+nb_class + true_yolo_3 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) # grid_h, grid_w, nb_anchor, 5+nb_class + + # Layer 0 => 4 + x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0}, + {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1}, + {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2}, + {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}]) + + # Layer 5 => 8 + x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5}, + {'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6}, + {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}]) + + # Layer 9 => 11 + x = _conv_block(x, [{'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9}, + {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}]) + + # Layer 12 => 15 + x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12}, + {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13}, + {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}]) + + # Layer 16 => 36 + for i in range(7): + x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3}, + {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}]) + + skip_36 = x + + # Layer 37 => 40 + x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37}, + {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38}, + {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}]) + + # Layer 41 => 61 + for i in range(7): + x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3}, + {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}]) + + skip_61 = x + + # Layer 62 => 65 + x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62}, + {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63}, + {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}]) + + # Layer 66 => 74 + for i in range(3): + x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3}, + {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}]) + + # Layer 75 => 79 + x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75}, + {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76}, + {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77}, + {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78}, + {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], do_skip=False) + + # Layer 80 => 82 + pred_yolo_1 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80}, + {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], do_skip=False) + loss_yolo_1 = YoloLayer(anchors[12:], + [1*num for num in max_grid], + batch_size, + warmup_batches, + ignore_thresh, + grid_scales[0], + obj_scale, + noobj_scale, + xywh_scale, + class_scale)([input_image, pred_yolo_1, true_yolo_1, true_boxes]) + + # Layer 83 => 86 + x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], do_skip=False) + x = UpSampling2D(2)(x) + x = concatenate([x, skip_61]) + + # Layer 87 => 91 + x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87}, + {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88}, + {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89}, + {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90}, + {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], do_skip=False) + + # Layer 92 => 94 + pred_yolo_2 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92}, + {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], do_skip=False) + loss_yolo_2 = YoloLayer(anchors[6:12], + [2*num for num in max_grid], + batch_size, + warmup_batches, + ignore_thresh, + grid_scales[1], + obj_scale, + noobj_scale, + xywh_scale, + class_scale)([input_image, pred_yolo_2, true_yolo_2, true_boxes]) + + # Layer 95 => 98 + x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 96}], do_skip=False) + x = UpSampling2D(2)(x) + x = concatenate([x, skip_36]) + + # Layer 99 => 106 + pred_yolo_3 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 99}, + {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 100}, + {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 101}, + {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 102}, + {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 103}, + {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104}, + {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], do_skip=False) + loss_yolo_3 = YoloLayer(anchors[:6], + [4*num for num in max_grid], + batch_size, + warmup_batches, + ignore_thresh, + grid_scales[2], + obj_scale, + noobj_scale, + xywh_scale, + class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes]) + + train_model = Model([input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3], [loss_yolo_1, loss_yolo_2, loss_yolo_3]) + infer_model = Model(input_image, [pred_yolo_1, pred_yolo_2, pred_yolo_3]) + + return [train_model, infer_model] + +def dummy_loss(y_true, y_pred): + return tf.sqrt(tf.reduce_sum(y_pred)) diff --git a/predict_yolo3.py b/predict_yolo3.py index c0e9de2..1c02af2 100755 --- a/predict_yolo3.py +++ b/predict_yolo3.py @@ -8,9 +8,10 @@ import cv2 import sys sys.path += [os.path.abspath('keras-yolo3-master')] + from utils.utils import get_yolo_boxes, makedirs from utils.bbox import draw_boxes -from keras.models import load_model +from tensorflow.keras.models import load_model from tqdm import tqdm import numpy as np diff --git a/predict_yolo3_disconnect.py b/predict_yolo3_disconnect.py index 1460b10..f7d3d1c 100755 --- a/predict_yolo3_disconnect.py +++ b/predict_yolo3_disconnect.py @@ -10,75 +10,75 @@ sys.path += [os.path.abspath('keras-yolo3-master')] from utils.utils import get_yolo_boxes, makedirs from utils.bbox import draw_boxes -from keras.models import load_model +from tensorflow.keras.models import load_model from tqdm import tqdm import numpy as np def disconnect(image, boxes, obj_thresh = 0.5, area_min = 400, merge = 0, z_thresh = 1.8): - + new_boxes = [] for num, box in enumerate(boxes): - + xmin = box.xmin + merge xmax = box.xmax - merge ymin = box.ymin + merge ymax = box.ymax - merge - + if xmin > 0 and ymin > 0 and xmax < image.shape[1] and ymax < image.shape[0] and box.get_score() > obj_thresh: - + area = (ymax - ymin)*(xmax - xmin) z_score = np.sum(image[np.int(ymin):np.int(ymax), np.int(xmin):np.int(xmax)]) / area - + if area > area_min: - + box.z_score = z_score new_boxes.append(box) #boxes_area_score[str(num)] = {'xmin': xmin, 'xmax': xmax, 'ymin': ymin, 'ymax': ymax, 'score' : score, 'area' : area} - + mean_score = np.mean([box.z_score for box in new_boxes]) sd_score = np.std([box.z_score for box in new_boxes]) - + new_boxes = [box for box in new_boxes if (box.z_score - mean_score)/sd_score > z_thresh] - + for box in new_boxes: - + z_score = (box.z_score - mean_score)/sd_score box.classes[0] = min((z_score-z_thresh)*0.5/(3-z_thresh)+ 0.5, 1) - + return new_boxes - + def disconnect_plot(image, boxes, obj_thresh = 0.5, area_min = 400, merge = 0, z_thresh = 1.8): - + new_boxes = [] for num, box in enumerate(boxes): - + xmin = box.xmin + merge xmax = box.xmax - merge ymin = box.ymin + merge ymax = box.ymax - merge - + if xmin > 0 and ymin > 0 and xmax < image.shape[1] and ymax < image.shape[0] and box.get_score() > obj_thresh: - + area = (ymax - ymin)*(xmax - xmin) z_score = np.sum(image[np.int(ymin):np.int(ymax), np.int(xmin):np.int(xmax)]) / area - + if area > area_min: - + box.z_score = z_score new_boxes.append(box) #boxes_area_score[str(num)] = {'xmin': xmin, 'xmax': xmax, 'ymin': ymin, 'ymax': ymax, 'score' : score, 'area' : area} - + mean_score = np.mean([box.z_score for box in new_boxes]) sd_score = np.std([box.z_score for box in new_boxes]) - + normal_score = ([box.z_score for box in new_boxes] - mean_score)/sd_score -# plt.figure() +# plt.figure() # _ = plt.hist(normal_score, bins='auto') # arguments are passed to np.histogram # plt.title("Histogram with 'auto' bins") # plt.show() -# +# # plt.figure() # mean = np.mean([boxes_area_score[i]['area'] for i in boxes_area_score]) # sd = np.std([boxes_area_score[i]['area'] for i in boxes_area_score]) @@ -86,37 +86,37 @@ def disconnect_plot(image, boxes, obj_thresh = 0.5, area_min = 400, merge = 0, # _ = plt.hist(normal, bins='auto') # arguments are passed to np.histogram # plt.title("Histogram with 'auto' bins") # plt.show() - + new_boxes = [box for box in new_boxes if (box.z_score - mean_score)/sd_score > z_thresh] - + for box in new_boxes: - + z_score = (box.z_score - mean_score)/sd_score box.classes[0] = min((z_score-z_thresh)*0.5/(3-z_thresh)+ 0.5, 1) - - - - + + + + colors = plt.cm.brg(np.linspace(0, 1, 21)).tolist() plt.figure(figsize=(10,6)) plt.imshow(I,cmap = 'gray') current_axis = plt.gca() - + for box in new_boxes: - + color = colors[2] - + #boxes_area_score[key]['score_norm'] = (boxes_area_score[key]['score'] - mean) / sd - #z_score = (box.score - mean_score) / sd_score - #z_score = (boxes_area_score[key]['area'] ) - + #z_score = (box.score - mean_score) / sd_score + #z_score = (boxes_area_score[key]['area'] ) + ### Escribe el z-score #if z_score > 1: current_axis.text((box.xmin + box.xmax)/2, (box.ymin+ box.ymax)/2, '%.2f' % box.classes[0], size='x-large', color='white', bbox={'facecolor':color, 'alpha':1.0}) - + return new_boxes def _main_(args): @@ -225,21 +225,21 @@ def _main_(args): # the main loop times = [] images = [cv2.imread(image_path) for image_path in image_paths] - + print(images) start = time.time() # predict the bounding boxes boxes = get_yolo_boxes(infer_model, images, net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh) boxes = [[box for box in boxes_image if box.get_score() > obj_thresh] for boxes_image in boxes] - + print('Elapsed time = {}'.format(time.time() - start)) times.append(time.time() - start) - + boxes_disc = [disconnect(image, boxes_image, z_thresh = 1.8) for image, boxes_image in zip(images, boxes)] - + for image, boxes_image in zip(images, boxes_disc): - - + + # draw bounding boxes on the image using labels I = image.copy() draw_boxes(I, boxes_image, config['model']['labels'], obj_thresh) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0826547 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,18 @@ +absl-py +astor +gast==0.2.2 +grpcio +h5py +Markdown +numpy +opencv-contrib-python +opt-einsum +protobuf +PyYAML +scipy +six +tensorflow==2.1 +termcolor +tqdm +Werkzeug +wrapt