Summary
This commit is contained in:
0
ssd_keras-master/data_generator/__init__.py
Normal file
0
ssd_keras-master/data_generator/__init__.py
Normal file
BIN
ssd_keras-master/data_generator/__init__.pyc
Normal file
BIN
ssd_keras-master/data_generator/__init__.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,183 @@
|
||||
'''
|
||||
The data augmentation operations of the original SSD implementation.
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
|
||||
from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation
|
||||
from data_generator.object_detection_2d_geometric_ops import RandomFlip, RandomTranslate, RandomScale
|
||||
from data_generator.object_detection_2d_image_boxes_validation_utils import BoundGenerator, BoxFilter, ImageValidator
|
||||
|
||||
class DataAugmentationConstantInputSize:
|
||||
'''
|
||||
Applies a chain of photometric and geometric image transformations. For documentation, please refer
|
||||
to the documentation of the individual transformations involved.
|
||||
|
||||
Important: This augmentation chain is suitable for constant-size images only.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
random_brightness=(-48, 48, 0.5),
|
||||
random_contrast=(0.5, 1.8, 0.5),
|
||||
random_saturation=(0.5, 1.8, 0.5),
|
||||
random_hue=(18, 0.5),
|
||||
random_flip=0.5,
|
||||
random_translate=((0.03,0.5), (0.03,0.5), 0.5),
|
||||
random_scale=(0.5, 2.0, 0.5),
|
||||
n_trials_max=3,
|
||||
clip_boxes=True,
|
||||
overlap_criterion='area',
|
||||
bounds_box_filter=(0.3, 1.0),
|
||||
bounds_validator=(0.5, 1.0),
|
||||
n_boxes_min=1,
|
||||
background=(0,0,0),
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
|
||||
if (random_scale[0] >= 1) or (random_scale[1] <= 1):
|
||||
raise ValueError("This sequence of transformations only makes sense if the minimum scaling factor is <1 and the maximum scaling factor is >1.")
|
||||
|
||||
self.n_trials_max = n_trials_max
|
||||
self.clip_boxes = clip_boxes
|
||||
self.overlap_criterion = overlap_criterion
|
||||
self.bounds_box_filter = bounds_box_filter
|
||||
self.bounds_validator = bounds_validator
|
||||
self.n_boxes_min = n_boxes_min
|
||||
self.background = background
|
||||
self.labels_format = labels_format
|
||||
|
||||
# Determines which boxes are kept in an image after the transformations have been applied.
|
||||
self.box_filter = BoxFilter(check_overlap=True,
|
||||
check_min_area=True,
|
||||
check_degenerate=True,
|
||||
overlap_criterion=self.overlap_criterion,
|
||||
overlap_bounds=self.bounds_box_filter,
|
||||
min_area=16,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
# Determines whether the result of the transformations is a valid training image.
|
||||
self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion,
|
||||
bounds=self.bounds_validator,
|
||||
n_boxes_min=self.n_boxes_min,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
# Utility distortions
|
||||
self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
|
||||
self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
|
||||
self.convert_to_float32 = ConvertDataType(to='float32')
|
||||
self.convert_to_uint8 = ConvertDataType(to='uint8')
|
||||
self.convert_to_3_channels = ConvertTo3Channels() # Make sure all images end up having 3 channels.
|
||||
|
||||
# Photometric transformations
|
||||
self.random_brightness = RandomBrightness(lower=random_brightness[0], upper=random_brightness[1], prob=random_brightness[2])
|
||||
self.random_contrast = RandomContrast(lower=random_contrast[0], upper=random_contrast[1], prob=random_contrast[2])
|
||||
self.random_saturation = RandomSaturation(lower=random_saturation[0], upper=random_saturation[1], prob=random_saturation[2])
|
||||
self.random_hue = RandomHue(max_delta=random_hue[0], prob=random_hue[1])
|
||||
|
||||
# Geometric transformations
|
||||
self.random_flip = RandomFlip(dim='horizontal', prob=random_flip, labels_format=self.labels_format)
|
||||
self.random_translate = RandomTranslate(dy_minmax=random_translate[0],
|
||||
dx_minmax=random_translate[1],
|
||||
prob=random_translate[2],
|
||||
clip_boxes=self.clip_boxes,
|
||||
box_filter=self.box_filter,
|
||||
image_validator=self.image_validator,
|
||||
n_trials_max=self.n_trials_max,
|
||||
background=self.background,
|
||||
labels_format=self.labels_format)
|
||||
self.random_zoom_in = RandomScale(min_factor=1.0,
|
||||
max_factor=random_scale[1],
|
||||
prob=random_scale[2],
|
||||
clip_boxes=self.clip_boxes,
|
||||
box_filter=self.box_filter,
|
||||
image_validator=self.image_validator,
|
||||
n_trials_max=self.n_trials_max,
|
||||
background=self.background,
|
||||
labels_format=self.labels_format)
|
||||
self.random_zoom_out = RandomScale(min_factor=random_scale[0],
|
||||
max_factor=1.0,
|
||||
prob=random_scale[2],
|
||||
clip_boxes=self.clip_boxes,
|
||||
box_filter=self.box_filter,
|
||||
image_validator=self.image_validator,
|
||||
n_trials_max=self.n_trials_max,
|
||||
background=self.background,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
# If we zoom in, do translation before scaling.
|
||||
self.sequence1 = [self.convert_to_3_channels,
|
||||
self.convert_to_float32,
|
||||
self.random_brightness,
|
||||
self.random_contrast,
|
||||
self.convert_to_uint8,
|
||||
self.convert_RGB_to_HSV,
|
||||
self.convert_to_float32,
|
||||
self.random_saturation,
|
||||
self.random_hue,
|
||||
self.convert_to_uint8,
|
||||
self.convert_HSV_to_RGB,
|
||||
self.random_translate,
|
||||
self.random_zoom_in,
|
||||
self.random_flip]
|
||||
|
||||
# If we zoom out, do scaling before translation.
|
||||
self.sequence2 = [self.convert_to_3_channels,
|
||||
self.convert_to_float32,
|
||||
self.random_brightness,
|
||||
self.convert_to_uint8,
|
||||
self.convert_RGB_to_HSV,
|
||||
self.convert_to_float32,
|
||||
self.random_saturation,
|
||||
self.random_hue,
|
||||
self.convert_to_uint8,
|
||||
self.convert_HSV_to_RGB,
|
||||
self.convert_to_float32,
|
||||
self.random_contrast,
|
||||
self.convert_to_uint8,
|
||||
self.random_zoom_out,
|
||||
self.random_translate,
|
||||
self.random_flip]
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
|
||||
self.random_translate.labels_format = self.labels_format
|
||||
self.random_zoom_in.labels_format = self.labels_format
|
||||
self.random_zoom_out.labels_format = self.labels_format
|
||||
self.random_flip.labels_format = self.labels_format
|
||||
|
||||
# Choose sequence 1 with probability 0.5.
|
||||
if np.random.choice(2):
|
||||
|
||||
if not (labels is None):
|
||||
for transform in self.sequence1:
|
||||
image, labels = transform(image, labels)
|
||||
return image, labels
|
||||
else:
|
||||
for transform in self.sequence1:
|
||||
image = transform(image)
|
||||
return image
|
||||
# Choose sequence 2 with probability 0.5.
|
||||
else:
|
||||
|
||||
if not (labels is None):
|
||||
for transform in self.sequence2:
|
||||
image, labels = transform(image, labels)
|
||||
return image, labels
|
||||
else:
|
||||
for transform in self.sequence2:
|
||||
image = transform(image)
|
||||
return image
|
||||
@@ -0,0 +1,280 @@
|
||||
'''
|
||||
The data augmentation operations of the original SSD implementation.
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
import cv2
|
||||
import inspect
|
||||
|
||||
from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation, RandomChannelSwap
|
||||
from data_generator.object_detection_2d_patch_sampling_ops import PatchCoordinateGenerator, RandomPatch, RandomPatchInf
|
||||
from data_generator.object_detection_2d_geometric_ops import ResizeRandomInterp, RandomFlip
|
||||
from data_generator.object_detection_2d_image_boxes_validation_utils import BoundGenerator, BoxFilter, ImageValidator
|
||||
|
||||
class SSDRandomCrop:
|
||||
'''
|
||||
Performs the same random crops as defined by the `batch_sampler` instructions
|
||||
of the original Caffe implementation of SSD. A description of this random cropping
|
||||
strategy can also be found in the data augmentation section of the paper:
|
||||
https://arxiv.org/abs/1512.02325
|
||||
'''
|
||||
|
||||
def __init__(self, labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
|
||||
self.labels_format = labels_format
|
||||
|
||||
# This randomly samples one of the lower IoU bounds defined
|
||||
# by the `sample_space` every time it is called.
|
||||
self.bound_generator = BoundGenerator(sample_space=((None, None),
|
||||
(0.1, None),
|
||||
(0.3, None),
|
||||
(0.5, None),
|
||||
(0.7, None),
|
||||
(0.9, None)),
|
||||
weights=None)
|
||||
|
||||
# Produces coordinates for candidate patches such that the height
|
||||
# and width of the patches are between 0.3 and 1.0 of the height
|
||||
# and width of the respective image and the aspect ratio of the
|
||||
# patches is between 0.5 and 2.0.
|
||||
self.patch_coord_generator = PatchCoordinateGenerator(must_match='h_w',
|
||||
min_scale=0.3,
|
||||
max_scale=1.0,
|
||||
scale_uniformly=False,
|
||||
min_aspect_ratio = 0.5,
|
||||
max_aspect_ratio = 2.0)
|
||||
|
||||
# Filters out boxes whose center point does not lie within the
|
||||
# chosen patches.
|
||||
self.box_filter = BoxFilter(check_overlap=True,
|
||||
check_min_area=False,
|
||||
check_degenerate=False,
|
||||
overlap_criterion='center_point',
|
||||
labels_format=self.labels_format)
|
||||
|
||||
# Determines whether a given patch is considered a valid patch.
|
||||
# Defines a patch to be valid if at least one ground truth bounding box
|
||||
# (n_boxes_min == 1) has an IoU overlap with the patch that
|
||||
# meets the requirements defined by `bound_generator`.
|
||||
self.image_validator = ImageValidator(overlap_criterion='iou',
|
||||
n_boxes_min=1,
|
||||
labels_format=self.labels_format,
|
||||
border_pixels='half')
|
||||
|
||||
# Performs crops according to the parameters set in the objects above.
|
||||
# Runs until either a valid patch is found or the original input image
|
||||
# is returned unaltered. Runs a maximum of 50 trials to find a valid
|
||||
# patch for each new sampled IoU threshold. Every 50 trials, the original
|
||||
# image is returned as is with probability (1 - prob) = 0.143.
|
||||
self.random_crop = RandomPatchInf(patch_coord_generator=self.patch_coord_generator,
|
||||
box_filter=self.box_filter,
|
||||
image_validator=self.image_validator,
|
||||
bound_generator=self.bound_generator,
|
||||
n_trials_max=50,
|
||||
clip_boxes=True,
|
||||
prob=0.857,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
def __call__(self, image, labels=None, return_inverter=False):
|
||||
self.random_crop.labels_format = self.labels_format
|
||||
return self.random_crop(image, labels, return_inverter)
|
||||
|
||||
class SSDExpand:
|
||||
'''
|
||||
Performs the random image expansion as defined by the `train_transform_param` instructions
|
||||
of the original Caffe implementation of SSD. A description of this expansion strategy
|
||||
can also be found in section 3.6 ("Data Augmentation for Small Object Accuracy") of the paper:
|
||||
https://arxiv.org/abs/1512.02325
|
||||
'''
|
||||
|
||||
def __init__(self, background=(123, 117, 104), labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the
|
||||
background pixels of the translated images.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
|
||||
self.labels_format = labels_format
|
||||
|
||||
# Generate coordinates for patches that are between 1.0 and 4.0 times
|
||||
# the size of the input image in both spatial dimensions.
|
||||
self.patch_coord_generator = PatchCoordinateGenerator(must_match='h_w',
|
||||
min_scale=1.0,
|
||||
max_scale=4.0,
|
||||
scale_uniformly=True)
|
||||
|
||||
# With probability 0.5, place the input image randomly on a canvas filled with
|
||||
# mean color values according to the parameters set above. With probability 0.5,
|
||||
# return the input image unaltered.
|
||||
self.expand = RandomPatch(patch_coord_generator=self.patch_coord_generator,
|
||||
box_filter=None,
|
||||
image_validator=None,
|
||||
n_trials_max=1,
|
||||
clip_boxes=False,
|
||||
prob=0.5,
|
||||
background=background,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
def __call__(self, image, labels=None, return_inverter=False):
|
||||
self.expand.labels_format = self.labels_format
|
||||
return self.expand(image, labels, return_inverter)
|
||||
|
||||
class SSDPhotometricDistortions:
|
||||
'''
|
||||
Performs the photometric distortions defined by the `train_transform_param` instructions
|
||||
of the original Caffe implementation of SSD.
|
||||
'''
|
||||
|
||||
def __init__(self):
|
||||
|
||||
self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
|
||||
self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
|
||||
self.convert_to_float32 = ConvertDataType(to='float32')
|
||||
self.convert_to_uint8 = ConvertDataType(to='uint8')
|
||||
self.convert_to_3_channels = ConvertTo3Channels()
|
||||
self.random_brightness = RandomBrightness(lower=-32, upper=32, prob=0.5)
|
||||
self.random_contrast = RandomContrast(lower=0.5, upper=1.5, prob=0.5)
|
||||
self.random_saturation = RandomSaturation(lower=0.5, upper=1.5, prob=0.5)
|
||||
self.random_hue = RandomHue(max_delta=18, prob=0.5)
|
||||
self.random_channel_swap = RandomChannelSwap(prob=0.0)
|
||||
|
||||
self.sequence1 = [self.convert_to_3_channels,
|
||||
self.convert_to_float32,
|
||||
self.random_brightness,
|
||||
self.random_contrast,
|
||||
self.convert_to_uint8,
|
||||
self.convert_RGB_to_HSV,
|
||||
self.convert_to_float32,
|
||||
self.random_saturation,
|
||||
self.random_hue,
|
||||
self.convert_to_uint8,
|
||||
self.convert_HSV_to_RGB,
|
||||
self.random_channel_swap]
|
||||
|
||||
self.sequence2 = [self.convert_to_3_channels,
|
||||
self.convert_to_float32,
|
||||
self.random_brightness,
|
||||
self.convert_to_uint8,
|
||||
self.convert_RGB_to_HSV,
|
||||
self.convert_to_float32,
|
||||
self.random_saturation,
|
||||
self.random_hue,
|
||||
self.convert_to_uint8,
|
||||
self.convert_HSV_to_RGB,
|
||||
self.convert_to_float32,
|
||||
self.random_contrast,
|
||||
self.convert_to_uint8,
|
||||
self.random_channel_swap]
|
||||
|
||||
def __call__(self, image, labels):
|
||||
|
||||
# Choose sequence 1 with probability 0.5.
|
||||
if np.random.choice(2):
|
||||
|
||||
for transform in self.sequence1:
|
||||
image, labels = transform(image, labels)
|
||||
return image, labels
|
||||
# Choose sequence 2 with probability 0.5.
|
||||
else:
|
||||
|
||||
for transform in self.sequence2:
|
||||
image, labels = transform(image, labels)
|
||||
return image, labels
|
||||
|
||||
class SSDDataAugmentation:
|
||||
'''
|
||||
Reproduces the data augmentation pipeline used in the training of the original
|
||||
Caffe implementation of SSD.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
img_height=300,
|
||||
img_width=300,
|
||||
background=(123, 117, 104),
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
height (int): The desired height of the output images in pixels.
|
||||
width (int): The desired width of the output images in pixels.
|
||||
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the
|
||||
background pixels of the translated images.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
|
||||
self.labels_format = labels_format
|
||||
|
||||
self.photometric_distortions = SSDPhotometricDistortions()
|
||||
self.expand = SSDExpand(background=background, labels_format=self.labels_format)
|
||||
self.random_crop = SSDRandomCrop(labels_format=self.labels_format)
|
||||
self.random_flip = RandomFlip(dim='horizontal', prob=0.5, labels_format=self.labels_format)
|
||||
|
||||
# This box filter makes sure that the resized images don't contain any degenerate boxes.
|
||||
# Resizing the images could lead the boxes to becomes smaller. For boxes that are already
|
||||
# pretty small, that might result in boxes with height and/or width zero, which we obviously
|
||||
# cannot allow.
|
||||
self.box_filter = BoxFilter(check_overlap=False,
|
||||
check_min_area=False,
|
||||
check_degenerate=True,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
self.resize = ResizeRandomInterp(height=img_height,
|
||||
width=img_width,
|
||||
interpolation_modes=[cv2.INTER_NEAREST,
|
||||
cv2.INTER_LINEAR,
|
||||
cv2.INTER_CUBIC,
|
||||
cv2.INTER_AREA,
|
||||
cv2.INTER_LANCZOS4],
|
||||
box_filter=self.box_filter,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
self.sequence = [self.photometric_distortions,
|
||||
self.expand,
|
||||
self.random_crop,
|
||||
self.random_flip,
|
||||
self.resize]
|
||||
|
||||
def __call__(self, image, labels, return_inverter=False):
|
||||
self.expand.labels_format = self.labels_format
|
||||
self.random_crop.labels_format = self.labels_format
|
||||
self.random_flip.labels_format = self.labels_format
|
||||
self.resize.labels_format = self.labels_format
|
||||
|
||||
inverters = []
|
||||
|
||||
for transform in self.sequence:
|
||||
if return_inverter and ('return_inverter' in inspect.signature(transform).parameters):
|
||||
image, labels, inverter = transform(image, labels, return_inverter=True)
|
||||
inverters.append(inverter)
|
||||
else:
|
||||
image, labels = transform(image, labels)
|
||||
|
||||
if return_inverter:
|
||||
return image, labels, inverters[::-1]
|
||||
else:
|
||||
return image, labels
|
||||
@@ -0,0 +1,157 @@
|
||||
'''
|
||||
A data augmentation pipeline for datasets in bird's eye view, i.e. where there is
|
||||
no "up" or "down" in the images.
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
|
||||
from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation
|
||||
from data_generator.object_detection_2d_geometric_ops import Resize, RandomFlip, RandomRotate
|
||||
from data_generator.object_detection_2d_patch_sampling_ops import PatchCoordinateGenerator, RandomPatch
|
||||
from data_generator.object_detection_2d_image_boxes_validation_utils import BoxFilter, ImageValidator
|
||||
|
||||
class DataAugmentationSatellite:
|
||||
'''
|
||||
A data augmentation pipeline for datasets in bird's eye view, i.e. where there is
|
||||
no "up" or "down" in the images.
|
||||
|
||||
Applies a chain of photometric and geometric image transformations. For documentation, please refer
|
||||
to the documentation of the individual transformations involved.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
resize_height,
|
||||
resize_width,
|
||||
random_brightness=(-48, 48, 0.5),
|
||||
random_contrast=(0.5, 1.8, 0.5),
|
||||
random_saturation=(0.5, 1.8, 0.5),
|
||||
random_hue=(18, 0.5),
|
||||
random_flip=0.5,
|
||||
random_rotate=([90, 180, 270], 0.5),
|
||||
min_scale=0.3,
|
||||
max_scale=2.0,
|
||||
min_aspect_ratio = 0.8,
|
||||
max_aspect_ratio = 1.25,
|
||||
n_trials_max=3,
|
||||
clip_boxes=True,
|
||||
overlap_criterion='area',
|
||||
bounds_box_filter=(0.3, 1.0),
|
||||
bounds_validator=(0.5, 1.0),
|
||||
n_boxes_min=1,
|
||||
background=(0,0,0),
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
|
||||
self.n_trials_max = n_trials_max
|
||||
self.clip_boxes = clip_boxes
|
||||
self.overlap_criterion = overlap_criterion
|
||||
self.bounds_box_filter = bounds_box_filter
|
||||
self.bounds_validator = bounds_validator
|
||||
self.n_boxes_min = n_boxes_min
|
||||
self.background = background
|
||||
self.labels_format = labels_format
|
||||
|
||||
# Determines which boxes are kept in an image after the transformations have been applied.
|
||||
self.box_filter_patch = BoxFilter(check_overlap=True,
|
||||
check_min_area=False,
|
||||
check_degenerate=False,
|
||||
overlap_criterion=self.overlap_criterion,
|
||||
overlap_bounds=self.bounds_box_filter,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
self.box_filter_resize = BoxFilter(check_overlap=False,
|
||||
check_min_area=True,
|
||||
check_degenerate=True,
|
||||
min_area=16,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
# Determines whether the result of the transformations is a valid training image.
|
||||
self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion,
|
||||
bounds=self.bounds_validator,
|
||||
n_boxes_min=self.n_boxes_min,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
# Utility transformations
|
||||
self.convert_to_3_channels = ConvertTo3Channels() # Make sure all images end up having 3 channels.
|
||||
self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
|
||||
self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
|
||||
self.convert_to_float32 = ConvertDataType(to='float32')
|
||||
self.convert_to_uint8 = ConvertDataType(to='uint8')
|
||||
self.resize = Resize(height=resize_height,
|
||||
width=resize_width,
|
||||
box_filter=self.box_filter_resize,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
# Photometric transformations
|
||||
self.random_brightness = RandomBrightness(lower=random_brightness[0], upper=random_brightness[1], prob=random_brightness[2])
|
||||
self.random_contrast = RandomContrast(lower=random_contrast[0], upper=random_contrast[1], prob=random_contrast[2])
|
||||
self.random_saturation = RandomSaturation(lower=random_saturation[0], upper=random_saturation[1], prob=random_saturation[2])
|
||||
self.random_hue = RandomHue(max_delta=random_hue[0], prob=random_hue[1])
|
||||
|
||||
# Geometric transformations
|
||||
self.random_horizontal_flip = RandomFlip(dim='horizontal', prob=random_flip, labels_format=self.labels_format)
|
||||
self.random_vertical_flip = RandomFlip(dim='vertical', prob=random_flip, labels_format=self.labels_format)
|
||||
self.random_rotate = RandomRotate(angles=random_rotate[0], prob=random_rotate[1], labels_format=self.labels_format)
|
||||
self.patch_coord_generator = PatchCoordinateGenerator(must_match='w_ar',
|
||||
min_scale=min_scale,
|
||||
max_scale=max_scale,
|
||||
scale_uniformly=False,
|
||||
min_aspect_ratio = min_aspect_ratio,
|
||||
max_aspect_ratio = max_aspect_ratio)
|
||||
self.random_patch = RandomPatch(patch_coord_generator=self.patch_coord_generator,
|
||||
box_filter=self.box_filter_patch,
|
||||
image_validator=self.image_validator,
|
||||
n_trials_max=self.n_trials_max,
|
||||
clip_boxes=self.clip_boxes,
|
||||
prob=1.0,
|
||||
can_fail=False,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
# Define the processing chain.
|
||||
self.transformations = [self.convert_to_3_channels,
|
||||
self.convert_to_float32,
|
||||
self.random_brightness,
|
||||
self.random_contrast,
|
||||
self.convert_to_uint8,
|
||||
self.convert_RGB_to_HSV,
|
||||
self.convert_to_float32,
|
||||
self.random_saturation,
|
||||
self.random_hue,
|
||||
self.convert_to_uint8,
|
||||
self.convert_HSV_to_RGB,
|
||||
self.random_horizontal_flip,
|
||||
self.random_vertical_flip,
|
||||
self.random_rotate,
|
||||
self.random_patch,
|
||||
self.resize]
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
|
||||
self.random_patch.labels_format = self.labels_format
|
||||
self.random_horizontal_flip.labels_format = self.labels_format
|
||||
self.random_vertical_flip.labels_format = self.labels_format
|
||||
self.random_rotate.labels_format = self.labels_format
|
||||
self.resize.labels_format = self.labels_format
|
||||
|
||||
if not (labels is None):
|
||||
for transform in self.transformations:
|
||||
image, labels = transform(image, labels)
|
||||
return image, labels
|
||||
else:
|
||||
for transform in self.sequence1:
|
||||
image = transform(image)
|
||||
return image
|
||||
@@ -0,0 +1,152 @@
|
||||
'''
|
||||
A data augmentation pipeline suitable for variable-size images that produces effects
|
||||
that are similar (but not identical) to those of the original SSD data augmentation
|
||||
pipeline while being faster.
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
|
||||
from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation
|
||||
from data_generator.object_detection_2d_geometric_ops import Resize, RandomFlip
|
||||
from data_generator.object_detection_2d_patch_sampling_ops import PatchCoordinateGenerator, RandomPatch
|
||||
from data_generator.object_detection_2d_image_boxes_validation_utils import BoxFilter, ImageValidator
|
||||
|
||||
class DataAugmentationVariableInputSize:
|
||||
'''
|
||||
A data augmentation pipeline suitable for variable-size images that produces effects
|
||||
that are similar (but not identical!) to those of the original SSD data augmentation
|
||||
pipeline while being faster.
|
||||
|
||||
Applies a chain of photometric and geometric image transformations. For documentation, please refer
|
||||
to the documentation of the individual transformations involved.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
resize_height,
|
||||
resize_width,
|
||||
random_brightness=(-48, 48, 0.5),
|
||||
random_contrast=(0.5, 1.8, 0.5),
|
||||
random_saturation=(0.5, 1.8, 0.5),
|
||||
random_hue=(18, 0.5),
|
||||
random_flip=0.5,
|
||||
min_scale=0.3,
|
||||
max_scale=2.0,
|
||||
min_aspect_ratio = 0.5,
|
||||
max_aspect_ratio = 2.0,
|
||||
n_trials_max=3,
|
||||
clip_boxes=True,
|
||||
overlap_criterion='area',
|
||||
bounds_box_filter=(0.3, 1.0),
|
||||
bounds_validator=(0.5, 1.0),
|
||||
n_boxes_min=1,
|
||||
background=(0,0,0),
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
|
||||
self.n_trials_max = n_trials_max
|
||||
self.clip_boxes = clip_boxes
|
||||
self.overlap_criterion = overlap_criterion
|
||||
self.bounds_box_filter = bounds_box_filter
|
||||
self.bounds_validator = bounds_validator
|
||||
self.n_boxes_min = n_boxes_min
|
||||
self.background = background
|
||||
self.labels_format = labels_format
|
||||
|
||||
# Determines which boxes are kept in an image after the transformations have been applied.
|
||||
self.box_filter_patch = BoxFilter(check_overlap=True,
|
||||
check_min_area=False,
|
||||
check_degenerate=False,
|
||||
overlap_criterion=self.overlap_criterion,
|
||||
overlap_bounds=self.bounds_box_filter,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
self.box_filter_resize = BoxFilter(check_overlap=False,
|
||||
check_min_area=True,
|
||||
check_degenerate=True,
|
||||
min_area=16,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
# Determines whether the result of the transformations is a valid training image.
|
||||
self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion,
|
||||
bounds=self.bounds_validator,
|
||||
n_boxes_min=self.n_boxes_min,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
# Utility transformations
|
||||
self.convert_to_3_channels = ConvertTo3Channels() # Make sure all images end up having 3 channels.
|
||||
self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
|
||||
self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
|
||||
self.convert_to_float32 = ConvertDataType(to='float32')
|
||||
self.convert_to_uint8 = ConvertDataType(to='uint8')
|
||||
self.resize = Resize(height=resize_height,
|
||||
width=resize_width,
|
||||
box_filter=self.box_filter_resize,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
# Photometric transformations
|
||||
self.random_brightness = RandomBrightness(lower=random_brightness[0], upper=random_brightness[1], prob=random_brightness[2])
|
||||
self.random_contrast = RandomContrast(lower=random_contrast[0], upper=random_contrast[1], prob=random_contrast[2])
|
||||
self.random_saturation = RandomSaturation(lower=random_saturation[0], upper=random_saturation[1], prob=random_saturation[2])
|
||||
self.random_hue = RandomHue(max_delta=random_hue[0], prob=random_hue[1])
|
||||
|
||||
# Geometric transformations
|
||||
self.random_flip = RandomFlip(dim='horizontal', prob=random_flip, labels_format=self.labels_format)
|
||||
self.patch_coord_generator = PatchCoordinateGenerator(must_match='w_ar',
|
||||
min_scale=min_scale,
|
||||
max_scale=max_scale,
|
||||
scale_uniformly=False,
|
||||
min_aspect_ratio = min_aspect_ratio,
|
||||
max_aspect_ratio = max_aspect_ratio)
|
||||
self.random_patch = RandomPatch(patch_coord_generator=self.patch_coord_generator,
|
||||
box_filter=self.box_filter_patch,
|
||||
image_validator=self.image_validator,
|
||||
n_trials_max=self.n_trials_max,
|
||||
clip_boxes=self.clip_boxes,
|
||||
prob=1.0,
|
||||
can_fail=False,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
# Define the processing chain
|
||||
self.transformations = [self.convert_to_3_channels,
|
||||
self.convert_to_float32,
|
||||
self.random_brightness,
|
||||
self.random_contrast,
|
||||
self.convert_to_uint8,
|
||||
self.convert_RGB_to_HSV,
|
||||
self.convert_to_float32,
|
||||
self.random_saturation,
|
||||
self.random_hue,
|
||||
self.convert_to_uint8,
|
||||
self.convert_HSV_to_RGB,
|
||||
self.random_patch,
|
||||
self.random_flip,
|
||||
self.resize]
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
|
||||
self.random_patch.labels_format = self.labels_format
|
||||
self.random_flip.labels_format = self.labels_format
|
||||
self.resize.labels_format = self.labels_format
|
||||
|
||||
if not (labels is None):
|
||||
for transform in self.transformations:
|
||||
image, labels = transform(image, labels)
|
||||
return image, labels
|
||||
else:
|
||||
for transform in self.sequence1:
|
||||
image = transform(image)
|
||||
return image
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -0,0 +1,779 @@
|
||||
'''
|
||||
Various geometric image transformations for 2D object detection, both deterministic
|
||||
and probabilistic.
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
import cv2
|
||||
import random
|
||||
|
||||
from data_generator.object_detection_2d_image_boxes_validation_utils import BoxFilter, ImageValidator
|
||||
|
||||
class Resize:
|
||||
'''
|
||||
Resizes images to a specified height and width in pixels.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
height,
|
||||
width,
|
||||
interpolation_mode=cv2.INTER_LINEAR,
|
||||
box_filter=None,
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
height (int): The desired height of the output images in pixels.
|
||||
width (int): The desired width of the output images in pixels.
|
||||
interpolation_mode (int, optional): An integer that denotes a valid
|
||||
OpenCV interpolation mode. For example, integers 0 through 5 are
|
||||
valid interpolation modes.
|
||||
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
|
||||
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
|
||||
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
|
||||
the validity of the bounding boxes is not checked.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
if not (isinstance(box_filter, BoxFilter) or box_filter is None):
|
||||
raise ValueError("`box_filter` must be either `None` or a `BoxFilter` object.")
|
||||
self.out_height = height
|
||||
self.out_width = width
|
||||
self.interpolation_mode = interpolation_mode
|
||||
self.box_filter = box_filter
|
||||
self.labels_format = labels_format
|
||||
|
||||
def __call__(self, image, labels=None, return_inverter=False):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
|
||||
xmin = self.labels_format['xmin']
|
||||
ymin = self.labels_format['ymin']
|
||||
xmax = self.labels_format['xmax']
|
||||
ymax = self.labels_format['ymax']
|
||||
|
||||
image = cv2.resize(image,
|
||||
dsize=(self.out_width, self.out_height),
|
||||
interpolation=self.interpolation_mode)
|
||||
|
||||
if return_inverter:
|
||||
def inverter(labels):
|
||||
labels = np.copy(labels)
|
||||
labels[:, [ymin+1, ymax+1]] = np.round(labels[:, [ymin+1, ymax+1]] * (img_height / self.out_height), decimals=0)
|
||||
labels[:, [xmin+1, xmax+1]] = np.round(labels[:, [xmin+1, xmax+1]] * (img_width / self.out_width), decimals=0)
|
||||
return labels
|
||||
|
||||
if labels is None:
|
||||
if return_inverter:
|
||||
return image, inverter
|
||||
else:
|
||||
return image
|
||||
else:
|
||||
labels = np.copy(labels)
|
||||
labels[:, [ymin, ymax]] = np.round(labels[:, [ymin, ymax]] * (self.out_height / img_height), decimals=0)
|
||||
labels[:, [xmin, xmax]] = np.round(labels[:, [xmin, xmax]] * (self.out_width / img_width), decimals=0)
|
||||
|
||||
if not (self.box_filter is None):
|
||||
self.box_filter.labels_format = self.labels_format
|
||||
labels = self.box_filter(labels=labels,
|
||||
image_height=self.out_height,
|
||||
image_width=self.out_width)
|
||||
|
||||
if return_inverter:
|
||||
return image, labels, inverter
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class ResizeRandomInterp:
|
||||
'''
|
||||
Resizes images to a specified height and width in pixels using a radnomly
|
||||
selected interpolation mode.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
height,
|
||||
width,
|
||||
interpolation_modes=[cv2.INTER_NEAREST,
|
||||
cv2.INTER_LINEAR,
|
||||
cv2.INTER_CUBIC,
|
||||
cv2.INTER_AREA,
|
||||
cv2.INTER_LANCZOS4],
|
||||
box_filter=None,
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
height (int): The desired height of the output image in pixels.
|
||||
width (int): The desired width of the output image in pixels.
|
||||
interpolation_modes (list/tuple, optional): A list/tuple of integers
|
||||
that represent valid OpenCV interpolation modes. For example,
|
||||
integers 0 through 5 are valid interpolation modes.
|
||||
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
|
||||
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
|
||||
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
|
||||
the validity of the bounding boxes is not checked.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
if not (isinstance(interpolation_modes, (list, tuple))):
|
||||
raise ValueError("`interpolation_mode` must be a list or tuple.")
|
||||
self.height = height
|
||||
self.width = width
|
||||
self.interpolation_modes = interpolation_modes
|
||||
self.box_filter = box_filter
|
||||
self.labels_format = labels_format
|
||||
self.resize = Resize(height=self.height,
|
||||
width=self.width,
|
||||
box_filter=self.box_filter,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
def __call__(self, image, labels=None, return_inverter=False):
|
||||
self.resize.interpolation_mode = np.random.choice(self.interpolation_modes)
|
||||
self.resize.labels_format = self.labels_format
|
||||
return self.resize(image, labels, return_inverter)
|
||||
|
||||
class Flip:
|
||||
'''
|
||||
Flips images horizontally or vertically.
|
||||
'''
|
||||
def __init__(self,
|
||||
dim='horizontal',
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
dim (str, optional): Can be either of 'horizontal' and 'vertical'.
|
||||
If 'horizontal', images will be flipped horizontally, i.e. along
|
||||
the vertical axis. If 'horizontal', images will be flipped vertically,
|
||||
i.e. along the horizontal axis.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
if not (dim in {'horizontal', 'vertical'}): raise ValueError("`dim` can be one of 'horizontal' and 'vertical'.")
|
||||
self.dim = dim
|
||||
self.labels_format = labels_format
|
||||
|
||||
def __call__(self, image, labels=None, return_inverter=False):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
|
||||
xmin = self.labels_format['xmin']
|
||||
ymin = self.labels_format['ymin']
|
||||
xmax = self.labels_format['xmax']
|
||||
ymax = self.labels_format['ymax']
|
||||
|
||||
if self.dim == 'horizontal':
|
||||
image = image[:,::-1]
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
labels = np.copy(labels)
|
||||
labels[:, [xmin, xmax]] = img_width - labels[:, [xmax, xmin]]
|
||||
return image, labels
|
||||
else:
|
||||
image = image[::-1]
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
labels = np.copy(labels)
|
||||
labels[:, [ymin, ymax]] = img_height - labels[:, [ymax, ymin]]
|
||||
return image, labels
|
||||
|
||||
class RandomFlip:
|
||||
'''
|
||||
Randomly flips images horizontally or vertically. The randomness only refers
|
||||
to whether or not the image will be flipped.
|
||||
'''
|
||||
def __init__(self,
|
||||
dim='horizontal',
|
||||
prob=0.5,
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
dim (str, optional): Can be either of 'horizontal' and 'vertical'.
|
||||
If 'horizontal', images will be flipped horizontally, i.e. along
|
||||
the vertical axis. If 'horizontal', images will be flipped vertically,
|
||||
i.e. along the horizontal axis.
|
||||
prob (float, optional): `(1 - prob)` determines the probability with which the original,
|
||||
unaltered image is returned.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
self.dim = dim
|
||||
self.prob = prob
|
||||
self.labels_format = labels_format
|
||||
self.flip = Flip(dim=self.dim, labels_format=self.labels_format)
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
p = np.random.uniform(0,1)
|
||||
if p >= (1.0-self.prob):
|
||||
self.flip.labels_format = self.labels_format
|
||||
return self.flip(image, labels)
|
||||
elif labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class Translate:
|
||||
'''
|
||||
Translates images horizontally and/or vertically.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
dy,
|
||||
dx,
|
||||
clip_boxes=True,
|
||||
box_filter=None,
|
||||
background=(0,0,0),
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
dy (float): The fraction of the image height by which to translate images along the
|
||||
vertical axis. Positive values translate images downwards, negative values
|
||||
translate images upwards.
|
||||
dx (float): The fraction of the image width by which to translate images along the
|
||||
horizontal axis. Positive values translate images to the right, negative values
|
||||
translate images to the left.
|
||||
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
|
||||
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
|
||||
image after the translation.
|
||||
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
|
||||
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
|
||||
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
|
||||
the validity of the bounding boxes is not checked.
|
||||
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the
|
||||
background pixels of the translated images.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
|
||||
if not (isinstance(box_filter, BoxFilter) or box_filter is None):
|
||||
raise ValueError("`box_filter` must be either `None` or a `BoxFilter` object.")
|
||||
self.dy_rel = dy
|
||||
self.dx_rel = dx
|
||||
self.clip_boxes = clip_boxes
|
||||
self.box_filter = box_filter
|
||||
self.background = background
|
||||
self.labels_format = labels_format
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
|
||||
# Compute the translation matrix.
|
||||
dy_abs = int(round(img_height * self.dy_rel))
|
||||
dx_abs = int(round(img_width * self.dx_rel))
|
||||
M = np.float32([[1, 0, dx_abs],
|
||||
[0, 1, dy_abs]])
|
||||
|
||||
# Translate the image.
|
||||
image = cv2.warpAffine(image,
|
||||
M=M,
|
||||
dsize=(img_width, img_height),
|
||||
borderMode=cv2.BORDER_CONSTANT,
|
||||
borderValue=self.background)
|
||||
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
xmin = self.labels_format['xmin']
|
||||
ymin = self.labels_format['ymin']
|
||||
xmax = self.labels_format['xmax']
|
||||
ymax = self.labels_format['ymax']
|
||||
|
||||
labels = np.copy(labels)
|
||||
# Translate the box coordinates to the translated image's coordinate system.
|
||||
labels[:,[xmin,xmax]] += dx_abs
|
||||
labels[:,[ymin,ymax]] += dy_abs
|
||||
|
||||
# Compute all valid boxes for this patch.
|
||||
if not (self.box_filter is None):
|
||||
self.box_filter.labels_format = self.labels_format
|
||||
labels = self.box_filter(labels=labels,
|
||||
image_height=img_height,
|
||||
image_width=img_width)
|
||||
|
||||
if self.clip_boxes:
|
||||
labels[:,[ymin,ymax]] = np.clip(labels[:,[ymin,ymax]], a_min=0, a_max=img_height-1)
|
||||
labels[:,[xmin,xmax]] = np.clip(labels[:,[xmin,xmax]], a_min=0, a_max=img_width-1)
|
||||
|
||||
return image, labels
|
||||
|
||||
class RandomTranslate:
|
||||
'''
|
||||
Randomly translates images horizontally and/or vertically.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
dy_minmax=(0.03,0.3),
|
||||
dx_minmax=(0.03,0.3),
|
||||
prob=0.5,
|
||||
clip_boxes=True,
|
||||
box_filter=None,
|
||||
image_validator=None,
|
||||
n_trials_max=3,
|
||||
background=(0,0,0),
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
dy_minmax (list/tuple, optional): A 2-tuple `(min, max)` of non-negative floats that
|
||||
determines the minimum and maximum relative translation of images along the vertical
|
||||
axis both upward and downward. That is, images will be randomly translated by at least
|
||||
`min` and at most `max` either upward or downward. For example, if `dy_minmax == (0.05,0.3)`,
|
||||
an image of size `(100,100)` will be translated by at least 5 and at most 30 pixels
|
||||
either upward or downward. The translation direction is chosen randomly.
|
||||
dx_minmax (list/tuple, optional): A 2-tuple `(min, max)` of non-negative floats that
|
||||
determines the minimum and maximum relative translation of images along the horizontal
|
||||
axis both to the left and right. That is, images will be randomly translated by at least
|
||||
`min` and at most `max` either left or right. For example, if `dx_minmax == (0.05,0.3)`,
|
||||
an image of size `(100,100)` will be translated by at least 5 and at most 30 pixels
|
||||
either left or right. The translation direction is chosen randomly.
|
||||
prob (float, optional): `(1 - prob)` determines the probability with which the original,
|
||||
unaltered image is returned.
|
||||
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
|
||||
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
|
||||
image after the translation.
|
||||
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
|
||||
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
|
||||
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
|
||||
the validity of the bounding boxes is not checked.
|
||||
image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given.
|
||||
An `ImageValidator` object to determine whether a translated image is valid. If `None`,
|
||||
any outcome is valid.
|
||||
n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given.
|
||||
Determines the maxmial number of trials to produce a valid image. If no valid image could
|
||||
be produced in `n_trials_max` trials, returns the unaltered input image.
|
||||
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the
|
||||
background pixels of the translated images.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
if dy_minmax[0] > dy_minmax[1]:
|
||||
raise ValueError("It must be `dy_minmax[0] <= dy_minmax[1]`.")
|
||||
if dx_minmax[0] > dx_minmax[1]:
|
||||
raise ValueError("It must be `dx_minmax[0] <= dx_minmax[1]`.")
|
||||
if dy_minmax[0] < 0 or dx_minmax[0] < 0:
|
||||
raise ValueError("It must be `dy_minmax[0] >= 0` and `dx_minmax[0] >= 0`.")
|
||||
if not (isinstance(image_validator, ImageValidator) or image_validator is None):
|
||||
raise ValueError("`image_validator` must be either `None` or an `ImageValidator` object.")
|
||||
self.dy_minmax = dy_minmax
|
||||
self.dx_minmax = dx_minmax
|
||||
self.prob = prob
|
||||
self.clip_boxes = clip_boxes
|
||||
self.box_filter = box_filter
|
||||
self.image_validator = image_validator
|
||||
self.n_trials_max = n_trials_max
|
||||
self.background = background
|
||||
self.labels_format = labels_format
|
||||
self.translate = Translate(dy=0,
|
||||
dx=0,
|
||||
clip_boxes=self.clip_boxes,
|
||||
box_filter=self.box_filter,
|
||||
background=self.background,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
|
||||
p = np.random.uniform(0,1)
|
||||
if p >= (1.0-self.prob):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
|
||||
xmin = self.labels_format['xmin']
|
||||
ymin = self.labels_format['ymin']
|
||||
xmax = self.labels_format['xmax']
|
||||
ymax = self.labels_format['ymax']
|
||||
|
||||
# Override the preset labels format.
|
||||
if not self.image_validator is None:
|
||||
self.image_validator.labels_format = self.labels_format
|
||||
self.translate.labels_format = self.labels_format
|
||||
|
||||
for _ in range(max(1, self.n_trials_max)):
|
||||
|
||||
# Pick the relative amount by which to translate.
|
||||
dy_abs = np.random.uniform(self.dy_minmax[0], self.dy_minmax[1])
|
||||
dx_abs = np.random.uniform(self.dx_minmax[0], self.dx_minmax[1])
|
||||
# Pick the direction in which to translate.
|
||||
dy = np.random.choice([-dy_abs, dy_abs])
|
||||
dx = np.random.choice([-dx_abs, dx_abs])
|
||||
self.translate.dy_rel = dy
|
||||
self.translate.dx_rel = dx
|
||||
|
||||
if (labels is None) or (self.image_validator is None):
|
||||
# We either don't have any boxes or if we do, we will accept any outcome as valid.
|
||||
return self.translate(image, labels)
|
||||
else:
|
||||
# Translate the box coordinates to the translated image's coordinate system.
|
||||
new_labels = np.copy(labels)
|
||||
new_labels[:, [ymin, ymax]] += int(round(img_height * dy))
|
||||
new_labels[:, [xmin, xmax]] += int(round(img_width * dx))
|
||||
|
||||
# Check if the patch is valid.
|
||||
if self.image_validator(labels=new_labels,
|
||||
image_height=img_height,
|
||||
image_width=img_width):
|
||||
return self.translate(image, labels)
|
||||
|
||||
# If all attempts failed, return the unaltered input image.
|
||||
if labels is None:
|
||||
return image
|
||||
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
elif labels is None:
|
||||
return image
|
||||
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class Scale:
|
||||
'''
|
||||
Scales images, i.e. zooms in or out.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
factor,
|
||||
clip_boxes=True,
|
||||
box_filter=None,
|
||||
background=(0,0,0),
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
factor (float): The fraction of the image size by which to scale images. Must be positive.
|
||||
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
|
||||
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
|
||||
image after the translation.
|
||||
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
|
||||
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
|
||||
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
|
||||
the validity of the bounding boxes is not checked.
|
||||
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential
|
||||
background pixels of the scaled images.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
|
||||
if factor <= 0:
|
||||
raise ValueError("It must be `factor > 0`.")
|
||||
if not (isinstance(box_filter, BoxFilter) or box_filter is None):
|
||||
raise ValueError("`box_filter` must be either `None` or a `BoxFilter` object.")
|
||||
self.factor = factor
|
||||
self.clip_boxes = clip_boxes
|
||||
self.box_filter = box_filter
|
||||
self.background = background
|
||||
self.labels_format = labels_format
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
|
||||
# Compute the rotation matrix.
|
||||
M = cv2.getRotationMatrix2D(center=(img_width / 2, img_height / 2),
|
||||
angle=0,
|
||||
scale=self.factor)
|
||||
|
||||
# Scale the image.
|
||||
image = cv2.warpAffine(image,
|
||||
M=M,
|
||||
dsize=(img_width, img_height),
|
||||
borderMode=cv2.BORDER_CONSTANT,
|
||||
borderValue=self.background)
|
||||
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
xmin = self.labels_format['xmin']
|
||||
ymin = self.labels_format['ymin']
|
||||
xmax = self.labels_format['xmax']
|
||||
ymax = self.labels_format['ymax']
|
||||
|
||||
labels = np.copy(labels)
|
||||
# Scale the bounding boxes accordingly.
|
||||
# Transform two opposite corner points of the rectangular boxes using the rotation matrix `M`.
|
||||
toplefts = np.array([labels[:,xmin], labels[:,ymin], np.ones(labels.shape[0])])
|
||||
bottomrights = np.array([labels[:,xmax], labels[:,ymax], np.ones(labels.shape[0])])
|
||||
new_toplefts = (np.dot(M, toplefts)).T
|
||||
new_bottomrights = (np.dot(M, bottomrights)).T
|
||||
labels[:,[xmin,ymin]] = np.round(new_toplefts, decimals=0).astype(np.int)
|
||||
labels[:,[xmax,ymax]] = np.round(new_bottomrights, decimals=0).astype(np.int)
|
||||
|
||||
# Compute all valid boxes for this patch.
|
||||
if not (self.box_filter is None):
|
||||
self.box_filter.labels_format = self.labels_format
|
||||
labels = self.box_filter(labels=labels,
|
||||
image_height=img_height,
|
||||
image_width=img_width)
|
||||
|
||||
if self.clip_boxes:
|
||||
labels[:,[ymin,ymax]] = np.clip(labels[:,[ymin,ymax]], a_min=0, a_max=img_height-1)
|
||||
labels[:,[xmin,xmax]] = np.clip(labels[:,[xmin,xmax]], a_min=0, a_max=img_width-1)
|
||||
|
||||
return image, labels
|
||||
|
||||
class RandomScale:
|
||||
'''
|
||||
Randomly scales images.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
min_factor=0.5,
|
||||
max_factor=1.5,
|
||||
prob=0.5,
|
||||
clip_boxes=True,
|
||||
box_filter=None,
|
||||
image_validator=None,
|
||||
n_trials_max=3,
|
||||
background=(0,0,0),
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
min_factor (float, optional): The minimum fraction of the image size by which to scale images.
|
||||
Must be positive.
|
||||
max_factor (float, optional): The maximum fraction of the image size by which to scale images.
|
||||
Must be positive.
|
||||
prob (float, optional): `(1 - prob)` determines the probability with which the original,
|
||||
unaltered image is returned.
|
||||
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
|
||||
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
|
||||
image after the translation.
|
||||
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
|
||||
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
|
||||
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
|
||||
the validity of the bounding boxes is not checked.
|
||||
image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given.
|
||||
An `ImageValidator` object to determine whether a scaled image is valid. If `None`,
|
||||
any outcome is valid.
|
||||
n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given.
|
||||
Determines the maxmial number of trials to produce a valid image. If no valid image could
|
||||
be produced in `n_trials_max` trials, returns the unaltered input image.
|
||||
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential
|
||||
background pixels of the scaled images.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
|
||||
if not (0 < min_factor <= max_factor):
|
||||
raise ValueError("It must be `0 < min_factor <= max_factor`.")
|
||||
if not (isinstance(image_validator, ImageValidator) or image_validator is None):
|
||||
raise ValueError("`image_validator` must be either `None` or an `ImageValidator` object.")
|
||||
self.min_factor = min_factor
|
||||
self.max_factor = max_factor
|
||||
self.prob = prob
|
||||
self.clip_boxes = clip_boxes
|
||||
self.box_filter = box_filter
|
||||
self.image_validator = image_validator
|
||||
self.n_trials_max = n_trials_max
|
||||
self.background = background
|
||||
self.labels_format = labels_format
|
||||
self.scale = Scale(factor=1.0,
|
||||
clip_boxes=self.clip_boxes,
|
||||
box_filter=self.box_filter,
|
||||
background=self.background,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
|
||||
p = np.random.uniform(0,1)
|
||||
if p >= (1.0-self.prob):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
|
||||
xmin = self.labels_format['xmin']
|
||||
ymin = self.labels_format['ymin']
|
||||
xmax = self.labels_format['xmax']
|
||||
ymax = self.labels_format['ymax']
|
||||
|
||||
# Override the preset labels format.
|
||||
if not self.image_validator is None:
|
||||
self.image_validator.labels_format = self.labels_format
|
||||
self.scale.labels_format = self.labels_format
|
||||
|
||||
for _ in range(max(1, self.n_trials_max)):
|
||||
|
||||
# Pick a scaling factor.
|
||||
factor = np.random.uniform(self.min_factor, self.max_factor)
|
||||
self.scale.factor = factor
|
||||
|
||||
if (labels is None) or (self.image_validator is None):
|
||||
# We either don't have any boxes or if we do, we will accept any outcome as valid.
|
||||
return self.scale(image, labels)
|
||||
else:
|
||||
# Scale the bounding boxes accordingly.
|
||||
# Transform two opposite corner points of the rectangular boxes using the rotation matrix `M`.
|
||||
toplefts = np.array([labels[:,xmin], labels[:,ymin], np.ones(labels.shape[0])])
|
||||
bottomrights = np.array([labels[:,xmax], labels[:,ymax], np.ones(labels.shape[0])])
|
||||
|
||||
# Compute the rotation matrix.
|
||||
M = cv2.getRotationMatrix2D(center=(img_width / 2, img_height / 2),
|
||||
angle=0,
|
||||
scale=factor)
|
||||
|
||||
new_toplefts = (np.dot(M, toplefts)).T
|
||||
new_bottomrights = (np.dot(M, bottomrights)).T
|
||||
|
||||
new_labels = np.copy(labels)
|
||||
new_labels[:,[xmin,ymin]] = np.around(new_toplefts, decimals=0).astype(np.int)
|
||||
new_labels[:,[xmax,ymax]] = np.around(new_bottomrights, decimals=0).astype(np.int)
|
||||
|
||||
# Check if the patch is valid.
|
||||
if self.image_validator(labels=new_labels,
|
||||
image_height=img_height,
|
||||
image_width=img_width):
|
||||
return self.scale(image, labels)
|
||||
|
||||
# If all attempts failed, return the unaltered input image.
|
||||
if labels is None:
|
||||
return image
|
||||
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
elif labels is None:
|
||||
return image
|
||||
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class Rotate:
|
||||
'''
|
||||
Rotates images counter-clockwise by 90, 180, or 270 degrees.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
angle,
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
angle (int): The angle in degrees by which to rotate the images counter-clockwise.
|
||||
Only 90, 180, and 270 are valid values.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
|
||||
if not angle in {90, 180, 270}:
|
||||
raise ValueError("`angle` must be in the set {90, 180, 270}.")
|
||||
self.angle = angle
|
||||
self.labels_format = labels_format
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
|
||||
# Compute the rotation matrix.
|
||||
M = cv2.getRotationMatrix2D(center=(img_width / 2, img_height / 2),
|
||||
angle=self.angle,
|
||||
scale=1)
|
||||
|
||||
# Get the sine and cosine from the rotation matrix.
|
||||
cos_angle = np.abs(M[0, 0])
|
||||
sin_angle = np.abs(M[0, 1])
|
||||
|
||||
# Compute the new bounding dimensions of the image.
|
||||
img_width_new = int(img_height * sin_angle + img_width * cos_angle)
|
||||
img_height_new = int(img_height * cos_angle + img_width * sin_angle)
|
||||
|
||||
# Adjust the rotation matrix to take into account the translation.
|
||||
M[1, 2] += (img_height_new - img_height) / 2
|
||||
M[0, 2] += (img_width_new - img_width) / 2
|
||||
|
||||
# Rotate the image.
|
||||
image = cv2.warpAffine(image,
|
||||
M=M,
|
||||
dsize=(img_width_new, img_height_new))
|
||||
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
xmin = self.labels_format['xmin']
|
||||
ymin = self.labels_format['ymin']
|
||||
xmax = self.labels_format['xmax']
|
||||
ymax = self.labels_format['ymax']
|
||||
|
||||
labels = np.copy(labels)
|
||||
# Rotate the bounding boxes accordingly.
|
||||
# Transform two opposite corner points of the rectangular boxes using the rotation matrix `M`.
|
||||
toplefts = np.array([labels[:,xmin], labels[:,ymin], np.ones(labels.shape[0])])
|
||||
bottomrights = np.array([labels[:,xmax], labels[:,ymax], np.ones(labels.shape[0])])
|
||||
new_toplefts = (np.dot(M, toplefts)).T
|
||||
new_bottomrights = (np.dot(M, bottomrights)).T
|
||||
labels[:,[xmin,ymin]] = np.round(new_toplefts, decimals=0).astype(np.int)
|
||||
labels[:,[xmax,ymax]] = np.round(new_bottomrights, decimals=0).astype(np.int)
|
||||
|
||||
if self.angle == 90:
|
||||
# ymin and ymax were switched by the rotation.
|
||||
labels[:,[ymax,ymin]] = labels[:,[ymin,ymax]]
|
||||
elif self.angle == 180:
|
||||
# ymin and ymax were switched by the rotation,
|
||||
# and also xmin and xmax were switched.
|
||||
labels[:,[ymax,ymin]] = labels[:,[ymin,ymax]]
|
||||
labels[:,[xmax,xmin]] = labels[:,[xmin,xmax]]
|
||||
elif self.angle == 270:
|
||||
# xmin and xmax were switched by the rotation.
|
||||
labels[:,[xmax,xmin]] = labels[:,[xmin,xmax]]
|
||||
|
||||
return image, labels
|
||||
|
||||
class RandomRotate:
|
||||
'''
|
||||
Randomly rotates images counter-clockwise.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
angles=[90, 180, 270],
|
||||
prob=0.5,
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
angle (list): The list of angles in degrees from which one is randomly selected to rotate
|
||||
the images counter-clockwise. Only 90, 180, and 270 are valid values.
|
||||
prob (float, optional): `(1 - prob)` determines the probability with which the original,
|
||||
unaltered image is returned.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
for angle in angles:
|
||||
if not angle in {90, 180, 270}:
|
||||
raise ValueError("`angles` can only contain the values 90, 180, and 270.")
|
||||
self.angles = angles
|
||||
self.prob = prob
|
||||
self.labels_format = labels_format
|
||||
self.rotate = Rotate(angle=90, labels_format=self.labels_format)
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
|
||||
p = np.random.uniform(0,1)
|
||||
if p >= (1.0-self.prob):
|
||||
# Pick a rotation angle.
|
||||
self.rotate.angle = random.choice(self.angles)
|
||||
self.rotate.labels_format = self.labels_format
|
||||
return self.rotate(image, labels)
|
||||
|
||||
elif labels is None:
|
||||
return image
|
||||
|
||||
else:
|
||||
return image, labels
|
||||
@@ -0,0 +1,322 @@
|
||||
'''
|
||||
Utilities for 2D object detection related to answering the following questions:
|
||||
1. Given an image size and bounding boxes, which bounding boxes meet certain
|
||||
requirements with respect to the image size?
|
||||
2. Given an image size and bounding boxes, is an image of that size valid with
|
||||
respect to the bounding boxes according to certain requirements?
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
|
||||
from bounding_box_utils.bounding_box_utils import iou
|
||||
|
||||
class BoundGenerator:
|
||||
'''
|
||||
Generates pairs of floating point values that represent lower and upper bounds
|
||||
from a given sample space.
|
||||
'''
|
||||
def __init__(self,
|
||||
sample_space=((0.1, None),
|
||||
(0.3, None),
|
||||
(0.5, None),
|
||||
(0.7, None),
|
||||
(0.9, None),
|
||||
(None, None)),
|
||||
weights=None):
|
||||
'''
|
||||
Arguments:
|
||||
sample_space (list or tuple): A list, tuple, or array-like object of shape
|
||||
`(n, 2)` that contains `n` samples to choose from, where each sample
|
||||
is a 2-tuple of scalars and/or `None` values.
|
||||
weights (list or tuple, optional): A list or tuple representing the distribution
|
||||
over the sample space. If `None`, a uniform distribution will be assumed.
|
||||
'''
|
||||
|
||||
if (not (weights is None)) and len(weights) != len(sample_space):
|
||||
raise ValueError("`weights` must either be `None` for uniform distribution or have the same length as `sample_space`.")
|
||||
|
||||
self.sample_space = []
|
||||
for bound_pair in sample_space:
|
||||
if len(bound_pair) != 2:
|
||||
raise ValueError("All elements of the sample space must be 2-tuples.")
|
||||
bound_pair = list(bound_pair)
|
||||
if bound_pair[0] is None: bound_pair[0] = 0.0
|
||||
if bound_pair[1] is None: bound_pair[1] = 1.0
|
||||
if bound_pair[0] > bound_pair[1]:
|
||||
raise ValueError("For all sample space elements, the lower bound cannot be greater than the upper bound.")
|
||||
self.sample_space.append(bound_pair)
|
||||
|
||||
self.sample_space_size = len(self.sample_space)
|
||||
|
||||
if weights is None:
|
||||
self.weights = [1.0/self.sample_space_size] * self.sample_space_size
|
||||
else:
|
||||
self.weights = weights
|
||||
|
||||
def __call__(self):
|
||||
'''
|
||||
Returns:
|
||||
An item of the sample space, i.e. a 2-tuple of scalars.
|
||||
'''
|
||||
i = np.random.choice(self.sample_space_size, p=self.weights)
|
||||
return self.sample_space[i]
|
||||
|
||||
class BoxFilter:
|
||||
'''
|
||||
Returns all bounding boxes that are valid with respect to a the defined criteria.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
check_overlap=True,
|
||||
check_min_area=True,
|
||||
check_degenerate=True,
|
||||
overlap_criterion='center_point',
|
||||
overlap_bounds=(0.3, 1.0),
|
||||
min_area=16,
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4},
|
||||
border_pixels='half'):
|
||||
'''
|
||||
Arguments:
|
||||
check_overlap (bool, optional): Whether or not to enforce the overlap requirements defined by
|
||||
`overlap_criterion` and `overlap_bounds`. Sometimes you might want to use the box filter only
|
||||
to enforce a certain minimum area for all boxes (see next argument), in such cases you can
|
||||
turn the overlap requirements off.
|
||||
check_min_area (bool, optional): Whether or not to enforce the minimum area requirement defined
|
||||
by `min_area`. If `True`, any boxes that have an area (in pixels) that is smaller than `min_area`
|
||||
will be removed from the labels of an image. Bounding boxes below a certain area aren't useful
|
||||
training examples. An object that takes up only, say, 5 pixels in an image is probably not
|
||||
recognizable anymore, neither for a human, nor for an object detection model. It makes sense
|
||||
to remove such boxes.
|
||||
check_degenerate (bool, optional): Whether or not to check for and remove degenerate bounding boxes.
|
||||
Degenerate bounding boxes are boxes that have `xmax <= xmin` and/or `ymax <= ymin`. In particular,
|
||||
boxes with a width and/or height of zero are degenerate. It is obviously important to filter out
|
||||
such boxes, so you should only set this option to `False` if you are certain that degenerate
|
||||
boxes are not possible in your data and processing chain.
|
||||
overlap_criterion (str, optional): Can be either of 'center_point', 'iou', or 'area'. Determines
|
||||
which boxes are considered valid with respect to a given image. If set to 'center_point',
|
||||
a given bounding box is considered valid if its center point lies within the image.
|
||||
If set to 'area', a given bounding box is considered valid if the quotient of its intersection
|
||||
area with the image and its own area is within the given `overlap_bounds`. If set to 'iou', a given
|
||||
bounding box is considered valid if its IoU with the image is within the given `overlap_bounds`.
|
||||
overlap_bounds (list or BoundGenerator, optional): Only relevant if `overlap_criterion` is 'area' or 'iou'.
|
||||
Determines the lower and upper bounds for `overlap_criterion`. Can be either a 2-tuple of scalars
|
||||
representing a lower bound and an upper bound, or a `BoundGenerator` object, which provides
|
||||
the possibility to generate bounds randomly.
|
||||
min_area (int, optional): Only relevant if `check_min_area` is `True`. Defines the minimum area in
|
||||
pixels that a bounding box must have in order to be valid. Boxes with an area smaller than this
|
||||
will be removed.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
border_pixels (str, optional): How to treat the border pixels of the bounding boxes.
|
||||
Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong
|
||||
to the boxes. If 'exclude', the border pixels do not belong to the boxes.
|
||||
If 'half', then one of each of the two horizontal and vertical borders belong
|
||||
to the boxex, but not the other.
|
||||
'''
|
||||
if not isinstance(overlap_bounds, (list, tuple, BoundGenerator)):
|
||||
raise ValueError("`overlap_bounds` must be either a 2-tuple of scalars or a `BoundGenerator` object.")
|
||||
if isinstance(overlap_bounds, (list, tuple)) and (overlap_bounds[0] > overlap_bounds[1]):
|
||||
raise ValueError("The lower bound must not be greater than the upper bound.")
|
||||
if not (overlap_criterion in {'iou', 'area', 'center_point'}):
|
||||
raise ValueError("`overlap_criterion` must be one of 'iou', 'area', or 'center_point'.")
|
||||
self.overlap_criterion = overlap_criterion
|
||||
self.overlap_bounds = overlap_bounds
|
||||
self.min_area = min_area
|
||||
self.check_overlap = check_overlap
|
||||
self.check_min_area = check_min_area
|
||||
self.check_degenerate = check_degenerate
|
||||
self.labels_format = labels_format
|
||||
self.border_pixels = border_pixels
|
||||
|
||||
def __call__(self,
|
||||
labels,
|
||||
image_height=None,
|
||||
image_width=None):
|
||||
'''
|
||||
Arguments:
|
||||
labels (array): The labels to be filtered. This is an array with shape `(m,n)`, where
|
||||
`m` is the number of bounding boxes and `n` is the number of elements that defines
|
||||
each bounding box (box coordinates, class ID, etc.). The box coordinates are expected
|
||||
to be in the image's coordinate system.
|
||||
image_height (int): Only relevant if `check_overlap == True`. The height of the image
|
||||
(in pixels) to compare the box coordinates to.
|
||||
image_width (int): `check_overlap == True`. The width of the image (in pixels) to compare
|
||||
the box coordinates to.
|
||||
|
||||
Returns:
|
||||
An array containing the labels of all boxes that are valid.
|
||||
'''
|
||||
|
||||
labels = np.copy(labels)
|
||||
|
||||
xmin = self.labels_format['xmin']
|
||||
ymin = self.labels_format['ymin']
|
||||
xmax = self.labels_format['xmax']
|
||||
ymax = self.labels_format['ymax']
|
||||
|
||||
# Record the boxes that pass all checks here.
|
||||
requirements_met = np.ones(shape=labels.shape[0], dtype=np.bool)
|
||||
|
||||
if self.check_degenerate:
|
||||
|
||||
non_degenerate = (labels[:,xmax] > labels[:,xmin]) * (labels[:,ymax] > labels[:,ymin])
|
||||
requirements_met *= non_degenerate
|
||||
|
||||
if self.check_min_area:
|
||||
|
||||
min_area_met = (labels[:,xmax] - labels[:,xmin]) * (labels[:,ymax] - labels[:,ymin]) >= self.min_area
|
||||
requirements_met *= min_area_met
|
||||
|
||||
if self.check_overlap:
|
||||
|
||||
# Get the lower and upper bounds.
|
||||
if isinstance(self.overlap_bounds, BoundGenerator):
|
||||
lower, upper = self.overlap_bounds()
|
||||
else:
|
||||
lower, upper = self.overlap_bounds
|
||||
|
||||
# Compute which boxes are valid.
|
||||
|
||||
if self.overlap_criterion == 'iou':
|
||||
# Compute the patch coordinates.
|
||||
image_coords = np.array([0, 0, image_width, image_height])
|
||||
# Compute the IoU between the patch and all of the ground truth boxes.
|
||||
image_boxes_iou = iou(image_coords, labels[:, [xmin, ymin, xmax, ymax]], coords='corners', mode='element-wise', border_pixels=self.border_pixels)
|
||||
requirements_met *= (image_boxes_iou > lower) * (image_boxes_iou <= upper)
|
||||
|
||||
elif self.overlap_criterion == 'area':
|
||||
if self.border_pixels == 'half':
|
||||
d = 0
|
||||
elif self.border_pixels == 'include':
|
||||
d = 1 # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`.
|
||||
elif self.border_pixels == 'exclude':
|
||||
d = -1 # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`.
|
||||
# Compute the areas of the boxes.
|
||||
box_areas = (labels[:,xmax] - labels[:,xmin] + d) * (labels[:,ymax] - labels[:,ymin] + d)
|
||||
# Compute the intersection area between the patch and all of the ground truth boxes.
|
||||
clipped_boxes = np.copy(labels)
|
||||
clipped_boxes[:,[ymin,ymax]] = np.clip(labels[:,[ymin,ymax]], a_min=0, a_max=image_height-1)
|
||||
clipped_boxes[:,[xmin,xmax]] = np.clip(labels[:,[xmin,xmax]], a_min=0, a_max=image_width-1)
|
||||
intersection_areas = (clipped_boxes[:,xmax] - clipped_boxes[:,xmin] + d) * (clipped_boxes[:,ymax] - clipped_boxes[:,ymin] + d) # +1 because the border pixels belong to the box areas.
|
||||
# Check which boxes meet the overlap requirements.
|
||||
if lower == 0.0:
|
||||
mask_lower = intersection_areas > lower * box_areas # If `self.lower == 0`, we want to make sure that boxes with area 0 don't count, hence the ">" sign instead of the ">=" sign.
|
||||
else:
|
||||
mask_lower = intersection_areas >= lower * box_areas # Especially for the case `self.lower == 1` we want the ">=" sign, otherwise no boxes would count at all.
|
||||
mask_upper = intersection_areas <= upper * box_areas
|
||||
requirements_met *= mask_lower * mask_upper
|
||||
|
||||
elif self.overlap_criterion == 'center_point':
|
||||
# Compute the center points of the boxes.
|
||||
cy = (labels[:,ymin] + labels[:,ymax]) / 2
|
||||
cx = (labels[:,xmin] + labels[:,xmax]) / 2
|
||||
# Check which of the boxes have center points within the cropped patch remove those that don't.
|
||||
requirements_met *= (cy >= 0.0) * (cy <= image_height-1) * (cx >= 0.0) * (cx <= image_width-1)
|
||||
|
||||
return labels[requirements_met]
|
||||
|
||||
class ImageValidator:
|
||||
'''
|
||||
Returns `True` if a given minimum number of bounding boxes meets given overlap
|
||||
requirements with an image of a given height and width.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
overlap_criterion='center_point',
|
||||
bounds=(0.3, 1.0),
|
||||
n_boxes_min=1,
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4},
|
||||
border_pixels='half'):
|
||||
'''
|
||||
Arguments:
|
||||
overlap_criterion (str, optional): Can be either of 'center_point', 'iou', or 'area'. Determines
|
||||
which boxes are considered valid with respect to a given image. If set to 'center_point',
|
||||
a given bounding box is considered valid if its center point lies within the image.
|
||||
If set to 'area', a given bounding box is considered valid if the quotient of its intersection
|
||||
area with the image and its own area is within `lower` and `upper`. If set to 'iou', a given
|
||||
bounding box is considered valid if its IoU with the image is within `lower` and `upper`.
|
||||
bounds (list or BoundGenerator, optional): Only relevant if `overlap_criterion` is 'area' or 'iou'.
|
||||
Determines the lower and upper bounds for `overlap_criterion`. Can be either a 2-tuple of scalars
|
||||
representing a lower bound and an upper bound, or a `BoundGenerator` object, which provides
|
||||
the possibility to generate bounds randomly.
|
||||
n_boxes_min (int or str, optional): Either a non-negative integer or the string 'all'.
|
||||
Determines the minimum number of boxes that must meet the `overlap_criterion` with respect to
|
||||
an image of the given height and width in order for the image to be a valid image.
|
||||
If set to 'all', an image is considered valid if all given boxes meet the `overlap_criterion`.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
border_pixels (str, optional): How to treat the border pixels of the bounding boxes.
|
||||
Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong
|
||||
to the boxes. If 'exclude', the border pixels do not belong to the boxes.
|
||||
If 'half', then one of each of the two horizontal and vertical borders belong
|
||||
to the boxex, but not the other.
|
||||
'''
|
||||
if not ((isinstance(n_boxes_min, int) and n_boxes_min > 0) or n_boxes_min == 'all'):
|
||||
raise ValueError("`n_boxes_min` must be a positive integer or 'all'.")
|
||||
self.overlap_criterion = overlap_criterion
|
||||
self.bounds = bounds
|
||||
self.n_boxes_min = n_boxes_min
|
||||
self.labels_format = labels_format
|
||||
self.border_pixels = border_pixels
|
||||
self.box_filter = BoxFilter(check_overlap=True,
|
||||
check_min_area=False,
|
||||
check_degenerate=False,
|
||||
overlap_criterion=self.overlap_criterion,
|
||||
overlap_bounds=self.bounds,
|
||||
labels_format=self.labels_format,
|
||||
border_pixels=self.border_pixels)
|
||||
|
||||
def __call__(self,
|
||||
labels,
|
||||
image_height,
|
||||
image_width):
|
||||
'''
|
||||
Arguments:
|
||||
labels (array): The labels to be tested. The box coordinates are expected
|
||||
to be in the image's coordinate system.
|
||||
image_height (int): The height of the image to compare the box coordinates to.
|
||||
image_width (int): The width of the image to compare the box coordinates to.
|
||||
|
||||
Returns:
|
||||
A boolean indicating whether an imgae of the given height and width is
|
||||
valid with respect to the given bounding boxes.
|
||||
'''
|
||||
|
||||
self.box_filter.overlap_bounds = self.bounds
|
||||
self.box_filter.labels_format = self.labels_format
|
||||
|
||||
# Get all boxes that meet the overlap requirements.
|
||||
valid_labels = self.box_filter(labels=labels,
|
||||
image_height=image_height,
|
||||
image_width=image_width)
|
||||
|
||||
# Check whether enough boxes meet the requirements.
|
||||
if isinstance(self.n_boxes_min, int):
|
||||
# The image is valid if at least `self.n_boxes_min` ground truth boxes meet the requirements.
|
||||
if len(valid_labels) >= self.n_boxes_min:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
elif self.n_boxes_min == 'all':
|
||||
# The image is valid if all ground truth boxes meet the requirements.
|
||||
if len(valid_labels) == len(labels):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
@@ -0,0 +1,73 @@
|
||||
'''
|
||||
Miscellaneous data generator utilities.
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
|
||||
def apply_inverse_transforms(y_pred_decoded, inverse_transforms):
|
||||
'''
|
||||
Takes a list or Numpy array of decoded predictions and applies a given list of
|
||||
transforms to them. The list of inverse transforms would usually contain the
|
||||
inverter functions that some of the image transformations that come with this
|
||||
data generator return. This function would normally be used to transform predictions
|
||||
that were made on a transformed image back to the original image.
|
||||
|
||||
Arguments:
|
||||
y_pred_decoded (list or array): Either a list of length `batch_size` that
|
||||
contains Numpy arrays that contain the predictions for each batch item
|
||||
or a Numpy array. If this is a list of Numpy arrays, the arrays would
|
||||
usually have the shape `(num_predictions, 6)`, where `num_predictions`
|
||||
is different for each batch item. If this is a Numpy array, it would
|
||||
usually have the shape `(batch_size, num_predictions, 6)`. The last axis
|
||||
would usually contain the class ID, confidence score, and four bounding
|
||||
box coordinates for each prediction.
|
||||
inverse_predictions (list): A nested list of length `batch_size` that contains
|
||||
for each batch item a list of functions that take one argument (one element
|
||||
of `y_pred_decoded` if it is a list or one slice along the first axis of
|
||||
`y_pred_decoded` if it is an array) and return an output of the same shape
|
||||
and data type.
|
||||
|
||||
Returns:
|
||||
The transformed predictions, which have the same structure as `y_pred_decoded`.
|
||||
'''
|
||||
|
||||
if isinstance(y_pred_decoded, list):
|
||||
|
||||
y_pred_decoded_inv = []
|
||||
|
||||
for i in range(len(y_pred_decoded)):
|
||||
y_pred_decoded_inv.append(np.copy(y_pred_decoded[i]))
|
||||
if y_pred_decoded_inv[i].size > 0: # If there are any predictions for this batch item.
|
||||
for inverter in inverse_transforms[i]:
|
||||
if not (inverter is None):
|
||||
y_pred_decoded_inv[i] = inverter(y_pred_decoded_inv[i])
|
||||
|
||||
elif isinstance(y_pred_decoded, np.ndarray):
|
||||
|
||||
y_pred_decoded_inv = np.copy(y_pred_decoded)
|
||||
|
||||
for i in range(len(y_pred_decoded)):
|
||||
if y_pred_decoded_inv[i].size > 0: # If there are any predictions for this batch item.
|
||||
for inverter in inverse_transforms[i]:
|
||||
if not (inverter is None):
|
||||
y_pred_decoded_inv[i] = inverter(y_pred_decoded_inv[i])
|
||||
|
||||
else:
|
||||
raise ValueError("`y_pred_decoded` must be either a list or a Numpy array.")
|
||||
|
||||
return y_pred_decoded_inv
|
||||
@@ -0,0 +1,881 @@
|
||||
'''
|
||||
Various patch sampling operations for data augmentation in 2D object detection.
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
|
||||
from data_generator.object_detection_2d_image_boxes_validation_utils import BoundGenerator, BoxFilter, ImageValidator
|
||||
|
||||
class PatchCoordinateGenerator:
|
||||
'''
|
||||
Generates random patch coordinates that meet specified requirements.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
img_height=None,
|
||||
img_width=None,
|
||||
must_match='h_w',
|
||||
min_scale=0.3,
|
||||
max_scale=1.0,
|
||||
scale_uniformly=False,
|
||||
min_aspect_ratio = 0.5,
|
||||
max_aspect_ratio = 2.0,
|
||||
patch_ymin=None,
|
||||
patch_xmin=None,
|
||||
patch_height=None,
|
||||
patch_width=None,
|
||||
patch_aspect_ratio=None):
|
||||
'''
|
||||
Arguments:
|
||||
img_height (int): The height of the image for which the patch coordinates
|
||||
shall be generated. Doesn't have to be known upon construction.
|
||||
img_width (int): The width of the image for which the patch coordinates
|
||||
shall be generated. Doesn't have to be known upon construction.
|
||||
must_match (str, optional): Can be either of 'h_w', 'h_ar', and 'w_ar'.
|
||||
Specifies which two of the three quantities height, width, and aspect
|
||||
ratio determine the shape of the generated patch. The respective third
|
||||
quantity will be computed from the other two. For example,
|
||||
if `must_match == 'h_w'`, then the patch's height and width will be
|
||||
set to lie within [min_scale, max_scale] of the image size or to
|
||||
`patch_height` and/or `patch_width`, if given. The patch's aspect ratio
|
||||
is the dependent variable in this case, it will be computed from the
|
||||
height and width. Any given values for `patch_aspect_ratio`,
|
||||
`min_aspect_ratio`, or `max_aspect_ratio` will be ignored.
|
||||
min_scale (float, optional): The minimum size of a dimension of the patch
|
||||
as a fraction of the respective dimension of the image. Can be greater
|
||||
than 1. For example, if the image width is 200 and `min_scale == 0.5`,
|
||||
then the width of the generated patch will be at least 100. If `min_scale == 1.5`,
|
||||
the width of the generated patch will be at least 300.
|
||||
max_scale (float, optional): The maximum size of a dimension of the patch
|
||||
as a fraction of the respective dimension of the image. Can be greater
|
||||
than 1. For example, if the image width is 200 and `max_scale == 1.0`,
|
||||
then the width of the generated patch will be at most 200. If `max_scale == 1.5`,
|
||||
the width of the generated patch will be at most 300. Must be greater than
|
||||
`min_scale`.
|
||||
scale_uniformly (bool, optional): If `True` and if `must_match == 'h_w'`,
|
||||
the patch height and width will be scaled uniformly, otherwise they will
|
||||
be scaled independently.
|
||||
min_aspect_ratio (float, optional): Determines the minimum aspect ratio
|
||||
for the generated patches.
|
||||
max_aspect_ratio (float, optional): Determines the maximum aspect ratio
|
||||
for the generated patches.
|
||||
patch_ymin (int, optional): `None` or the vertical coordinate of the top left
|
||||
corner of the generated patches. If this is not `None`, the position of the
|
||||
patches along the vertical axis is fixed. If this is `None`, then the
|
||||
vertical position of generated patches will be chosen randomly such that
|
||||
the overlap of a patch and the image along the vertical dimension is
|
||||
always maximal.
|
||||
patch_xmin (int, optional): `None` or the horizontal coordinate of the top left
|
||||
corner of the generated patches. If this is not `None`, the position of the
|
||||
patches along the horizontal axis is fixed. If this is `None`, then the
|
||||
horizontal position of generated patches will be chosen randomly such that
|
||||
the overlap of a patch and the image along the horizontal dimension is
|
||||
always maximal.
|
||||
patch_height (int, optional): `None` or the fixed height of the generated patches.
|
||||
patch_width (int, optional): `None` or the fixed width of the generated patches.
|
||||
patch_aspect_ratio (float, optional): `None` or the fixed aspect ratio of the
|
||||
generated patches.
|
||||
'''
|
||||
|
||||
if not (must_match in {'h_w', 'h_ar', 'w_ar'}):
|
||||
raise ValueError("`must_match` must be either of 'h_w', 'h_ar' and 'w_ar'.")
|
||||
if min_scale >= max_scale:
|
||||
raise ValueError("It must be `min_scale < max_scale`.")
|
||||
if min_aspect_ratio >= max_aspect_ratio:
|
||||
raise ValueError("It must be `min_aspect_ratio < max_aspect_ratio`.")
|
||||
if scale_uniformly and not ((patch_height is None) and (patch_width is None)):
|
||||
raise ValueError("If `scale_uniformly == True`, `patch_height` and `patch_width` must both be `None`.")
|
||||
self.img_height = img_height
|
||||
self.img_width = img_width
|
||||
self.must_match = must_match
|
||||
self.min_scale = min_scale
|
||||
self.max_scale = max_scale
|
||||
self.scale_uniformly = scale_uniformly
|
||||
self.min_aspect_ratio = min_aspect_ratio
|
||||
self.max_aspect_ratio = max_aspect_ratio
|
||||
self.patch_ymin = patch_ymin
|
||||
self.patch_xmin = patch_xmin
|
||||
self.patch_height = patch_height
|
||||
self.patch_width = patch_width
|
||||
self.patch_aspect_ratio = patch_aspect_ratio
|
||||
|
||||
def __call__(self):
|
||||
'''
|
||||
Returns:
|
||||
A 4-tuple `(ymin, xmin, height, width)` that represents the coordinates
|
||||
of the generated patch.
|
||||
'''
|
||||
|
||||
# Get the patch height and width.
|
||||
|
||||
if self.must_match == 'h_w': # Aspect is the dependent variable.
|
||||
if not self.scale_uniformly:
|
||||
# Get the height.
|
||||
if self.patch_height is None:
|
||||
patch_height = int(np.random.uniform(self.min_scale, self.max_scale) * self.img_height)
|
||||
else:
|
||||
patch_height = self.patch_height
|
||||
# Get the width.
|
||||
if self.patch_width is None:
|
||||
patch_width = int(np.random.uniform(self.min_scale, self.max_scale) * self.img_width)
|
||||
else:
|
||||
patch_width = self.patch_width
|
||||
else:
|
||||
scaling_factor = np.random.uniform(self.min_scale, self.max_scale)
|
||||
patch_height = int(scaling_factor * self.img_height)
|
||||
patch_width = int(scaling_factor * self.img_width)
|
||||
|
||||
elif self.must_match == 'h_ar': # Width is the dependent variable.
|
||||
# Get the height.
|
||||
if self.patch_height is None:
|
||||
patch_height = int(np.random.uniform(self.min_scale, self.max_scale) * self.img_height)
|
||||
else:
|
||||
patch_height = self.patch_height
|
||||
# Get the aspect ratio.
|
||||
if self.patch_aspect_ratio is None:
|
||||
patch_aspect_ratio = np.random.uniform(self.min_aspect_ratio, self.max_aspect_ratio)
|
||||
else:
|
||||
patch_aspect_ratio = self.patch_aspect_ratio
|
||||
# Get the width.
|
||||
patch_width = int(patch_height * patch_aspect_ratio)
|
||||
|
||||
elif self.must_match == 'w_ar': # Height is the dependent variable.
|
||||
# Get the width.
|
||||
if self.patch_width is None:
|
||||
patch_width = int(np.random.uniform(self.min_scale, self.max_scale) * self.img_width)
|
||||
else:
|
||||
patch_width = self.patch_width
|
||||
# Get the aspect ratio.
|
||||
if self.patch_aspect_ratio is None:
|
||||
patch_aspect_ratio = np.random.uniform(self.min_aspect_ratio, self.max_aspect_ratio)
|
||||
else:
|
||||
patch_aspect_ratio = self.patch_aspect_ratio
|
||||
# Get the height.
|
||||
patch_height = int(patch_width / patch_aspect_ratio)
|
||||
|
||||
# Get the top left corner coordinates of the patch.
|
||||
|
||||
if self.patch_ymin is None:
|
||||
# Compute how much room we have along the vertical axis to place the patch.
|
||||
# A negative number here means that we want to sample a patch that is larger than the original image
|
||||
# in the vertical dimension, in which case the patch will be placed such that it fully contains the
|
||||
# image in the vertical dimension.
|
||||
y_range = self.img_height - patch_height
|
||||
# Select a random top left corner for the sample position from the possible positions.
|
||||
if y_range >= 0: patch_ymin = np.random.randint(0, y_range + 1) # There are y_range + 1 possible positions for the crop in the vertical dimension.
|
||||
else: patch_ymin = np.random.randint(y_range, 1) # The possible positions for the image on the background canvas in the vertical dimension.
|
||||
else:
|
||||
patch_ymin = self.patch_ymin
|
||||
|
||||
if self.patch_xmin is None:
|
||||
# Compute how much room we have along the horizontal axis to place the patch.
|
||||
# A negative number here means that we want to sample a patch that is larger than the original image
|
||||
# in the horizontal dimension, in which case the patch will be placed such that it fully contains the
|
||||
# image in the horizontal dimension.
|
||||
x_range = self.img_width - patch_width
|
||||
# Select a random top left corner for the sample position from the possible positions.
|
||||
if x_range >= 0: patch_xmin = np.random.randint(0, x_range + 1) # There are x_range + 1 possible positions for the crop in the horizontal dimension.
|
||||
else: patch_xmin = np.random.randint(x_range, 1) # The possible positions for the image on the background canvas in the horizontal dimension.
|
||||
else:
|
||||
patch_xmin = self.patch_xmin
|
||||
|
||||
return (patch_ymin, patch_xmin, patch_height, patch_width)
|
||||
|
||||
class CropPad:
|
||||
'''
|
||||
Crops and/or pads an image deterministically.
|
||||
|
||||
Depending on the given output patch size and the position (top left corner) relative
|
||||
to the input image, the image will be cropped and/or padded along one or both spatial
|
||||
dimensions.
|
||||
|
||||
For example, if the output patch lies entirely within the input image, this will result
|
||||
in a regular crop. If the input image lies entirely within the output patch, this will
|
||||
result in the image being padded in every direction. All other cases are mixed cases
|
||||
where the image might be cropped in some directions and padded in others.
|
||||
|
||||
The output patch can be arbitrary in both size and position as long as it overlaps
|
||||
with the input image.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
patch_ymin,
|
||||
patch_xmin,
|
||||
patch_height,
|
||||
patch_width,
|
||||
clip_boxes=True,
|
||||
box_filter=None,
|
||||
background=(0,0,0),
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
patch_ymin (int, optional): The vertical coordinate of the top left corner of the output
|
||||
patch relative to the image coordinate system. Can be negative (i.e. lie outside the image)
|
||||
as long as the resulting patch still overlaps with the image.
|
||||
patch_ymin (int, optional): The horizontal coordinate of the top left corner of the output
|
||||
patch relative to the image coordinate system. Can be negative (i.e. lie outside the image)
|
||||
as long as the resulting patch still overlaps with the image.
|
||||
patch_height (int): The height of the patch to be sampled from the image. Can be greater
|
||||
than the height of the input image.
|
||||
patch_width (int): The width of the patch to be sampled from the image. Can be greater
|
||||
than the width of the input image.
|
||||
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
|
||||
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
|
||||
sampled patch.
|
||||
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
|
||||
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
|
||||
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
|
||||
the validity of the bounding boxes is not checked.
|
||||
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential
|
||||
background pixels of the scaled images. In the case of single-channel images,
|
||||
the first element of `background` will be used as the background pixel value.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
#if (patch_height <= 0) or (patch_width <= 0):
|
||||
# raise ValueError("Patch height and width must both be positive.")
|
||||
#if (patch_ymin + patch_height < 0) or (patch_xmin + patch_width < 0):
|
||||
# raise ValueError("A patch with the given coordinates cannot overlap with an input image.")
|
||||
if not (isinstance(box_filter, BoxFilter) or box_filter is None):
|
||||
raise ValueError("`box_filter` must be either `None` or a `BoxFilter` object.")
|
||||
self.patch_height = patch_height
|
||||
self.patch_width = patch_width
|
||||
self.patch_ymin = patch_ymin
|
||||
self.patch_xmin = patch_xmin
|
||||
self.clip_boxes = clip_boxes
|
||||
self.box_filter = box_filter
|
||||
self.background = background
|
||||
self.labels_format = labels_format
|
||||
|
||||
def __call__(self, image, labels=None, return_inverter=False):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
|
||||
if (self.patch_ymin > img_height) or (self.patch_xmin > img_width):
|
||||
raise ValueError("The given patch doesn't overlap with the input image.")
|
||||
|
||||
labels = np.copy(labels)
|
||||
|
||||
xmin = self.labels_format['xmin']
|
||||
ymin = self.labels_format['ymin']
|
||||
xmax = self.labels_format['xmax']
|
||||
ymax = self.labels_format['ymax']
|
||||
|
||||
# Top left corner of the patch relative to the image coordinate system:
|
||||
patch_ymin = self.patch_ymin
|
||||
patch_xmin = self.patch_xmin
|
||||
|
||||
# Create a canvas of the size of the patch we want to end up with.
|
||||
if image.ndim == 3:
|
||||
canvas = np.zeros(shape=(self.patch_height, self.patch_width, 3), dtype=np.uint8)
|
||||
canvas[:, :] = self.background
|
||||
elif image.ndim == 2:
|
||||
canvas = np.zeros(shape=(self.patch_height, self.patch_width), dtype=np.uint8)
|
||||
canvas[:, :] = self.background[0]
|
||||
|
||||
# Perform the crop.
|
||||
if patch_ymin < 0 and patch_xmin < 0: # Pad the image at the top and on the left.
|
||||
image_crop_height = min(img_height, self.patch_height + patch_ymin) # The number of pixels of the image that will end up on the canvas in the vertical direction.
|
||||
image_crop_width = min(img_width, self.patch_width + patch_xmin) # The number of pixels of the image that will end up on the canvas in the horizontal direction.
|
||||
canvas[-patch_ymin:-patch_ymin + image_crop_height, -patch_xmin:-patch_xmin + image_crop_width] = image[:image_crop_height, :image_crop_width]
|
||||
|
||||
elif patch_ymin < 0 and patch_xmin >= 0: # Pad the image at the top and crop it on the left.
|
||||
image_crop_height = min(img_height, self.patch_height + patch_ymin) # The number of pixels of the image that will end up on the canvas in the vertical direction.
|
||||
image_crop_width = min(self.patch_width, img_width - patch_xmin) # The number of pixels of the image that will end up on the canvas in the horizontal direction.
|
||||
canvas[-patch_ymin:-patch_ymin + image_crop_height, :image_crop_width] = image[:image_crop_height, patch_xmin:patch_xmin + image_crop_width]
|
||||
|
||||
elif patch_ymin >= 0 and patch_xmin < 0: # Crop the image at the top and pad it on the left.
|
||||
image_crop_height = min(self.patch_height, img_height - patch_ymin) # The number of pixels of the image that will end up on the canvas in the vertical direction.
|
||||
image_crop_width = min(img_width, self.patch_width + patch_xmin) # The number of pixels of the image that will end up on the canvas in the horizontal direction.
|
||||
canvas[:image_crop_height, -patch_xmin:-patch_xmin + image_crop_width] = image[patch_ymin:patch_ymin + image_crop_height, :image_crop_width]
|
||||
|
||||
elif patch_ymin >= 0 and patch_xmin >= 0: # Crop the image at the top and on the left.
|
||||
image_crop_height = min(self.patch_height, img_height - patch_ymin) # The number of pixels of the image that will end up on the canvas in the vertical direction.
|
||||
image_crop_width = min(self.patch_width, img_width - patch_xmin) # The number of pixels of the image that will end up on the canvas in the horizontal direction.
|
||||
canvas[:image_crop_height, :image_crop_width] = image[patch_ymin:patch_ymin + image_crop_height, patch_xmin:patch_xmin + image_crop_width]
|
||||
|
||||
image = canvas
|
||||
|
||||
if return_inverter:
|
||||
def inverter(labels):
|
||||
labels = np.copy(labels)
|
||||
labels[:, [ymin+1, ymax+1]] += patch_ymin
|
||||
labels[:, [xmin+1, xmax+1]] += patch_xmin
|
||||
return labels
|
||||
|
||||
if not (labels is None):
|
||||
|
||||
# Translate the box coordinates to the patch's coordinate system.
|
||||
labels[:, [ymin, ymax]] -= patch_ymin
|
||||
labels[:, [xmin, xmax]] -= patch_xmin
|
||||
|
||||
# Compute all valid boxes for this patch.
|
||||
if not (self.box_filter is None):
|
||||
self.box_filter.labels_format = self.labels_format
|
||||
labels = self.box_filter(labels=labels,
|
||||
image_height=self.patch_height,
|
||||
image_width=self.patch_width)
|
||||
|
||||
if self.clip_boxes:
|
||||
labels[:,[ymin,ymax]] = np.clip(labels[:,[ymin,ymax]], a_min=0, a_max=self.patch_height-1)
|
||||
labels[:,[xmin,xmax]] = np.clip(labels[:,[xmin,xmax]], a_min=0, a_max=self.patch_width-1)
|
||||
|
||||
if return_inverter:
|
||||
return image, labels, inverter
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
else:
|
||||
if return_inverter:
|
||||
return image, inverter
|
||||
else:
|
||||
return image
|
||||
|
||||
class Crop:
|
||||
'''
|
||||
Crops off the specified numbers of pixels from the borders of images.
|
||||
|
||||
This is just a convenience interface for `CropPad`.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
crop_top,
|
||||
crop_bottom,
|
||||
crop_left,
|
||||
crop_right,
|
||||
clip_boxes=True,
|
||||
box_filter=None,
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
self.crop_top = crop_top
|
||||
self.crop_bottom = crop_bottom
|
||||
self.crop_left = crop_left
|
||||
self.crop_right = crop_right
|
||||
self.clip_boxes = clip_boxes
|
||||
self.box_filter = box_filter
|
||||
self.labels_format = labels_format
|
||||
self.crop = CropPad(patch_ymin=self.crop_top,
|
||||
patch_xmin=self.crop_left,
|
||||
patch_height=None,
|
||||
patch_width=None,
|
||||
clip_boxes=self.clip_boxes,
|
||||
box_filter=self.box_filter,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
def __call__(self, image, labels=None, return_inverter=False):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
|
||||
self.crop.patch_height = img_height - self.crop_top - self.crop_bottom
|
||||
self.crop.patch_width = img_width - self.crop_left - self.crop_right
|
||||
self.crop.labels_format = self.labels_format
|
||||
|
||||
return self.crop(image, labels, return_inverter)
|
||||
|
||||
class Pad:
|
||||
'''
|
||||
Pads images by the specified numbers of pixels on each side.
|
||||
|
||||
This is just a convenience interface for `CropPad`.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
pad_top,
|
||||
pad_bottom,
|
||||
pad_left,
|
||||
pad_right,
|
||||
background=(0,0,0),
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
self.pad_top = pad_top
|
||||
self.pad_bottom = pad_bottom
|
||||
self.pad_left = pad_left
|
||||
self.pad_right = pad_right
|
||||
self.background = background
|
||||
self.labels_format = labels_format
|
||||
self.pad = CropPad(patch_ymin=-self.pad_top,
|
||||
patch_xmin=-self.pad_left,
|
||||
patch_height=None,
|
||||
patch_width=None,
|
||||
clip_boxes=False,
|
||||
box_filter=None,
|
||||
background=self.background,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
def __call__(self, image, labels=None, return_inverter=False):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
|
||||
self.pad.patch_height = img_height + self.pad_top + self.pad_bottom
|
||||
self.pad.patch_width = img_width + self.pad_left + self.pad_right
|
||||
self.pad.labels_format = self.labels_format
|
||||
|
||||
return self.pad(image, labels, return_inverter)
|
||||
|
||||
class RandomPatch:
|
||||
'''
|
||||
Randomly samples a patch from an image. The randomness refers to whatever
|
||||
randomness may be introduced by the patch coordinate generator, the box filter,
|
||||
and the patch validator.
|
||||
|
||||
Input images may be cropped and/or padded along either or both of the two
|
||||
spatial dimensions as necessary in order to obtain the required patch.
|
||||
|
||||
As opposed to `RandomPatchInf`, it is possible for this transform to fail to produce
|
||||
an output image at all, in which case it will return `None`. This is useful, because
|
||||
if this transform is used to generate patches of a fixed size or aspect ratio, then
|
||||
the caller needs to be able to rely on the output image satisfying the set size or
|
||||
aspect ratio. It might therefore not be an option to return the unaltered input image
|
||||
as other random transforms do when they fail to produce a valid transformed image.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
patch_coord_generator,
|
||||
box_filter=None,
|
||||
image_validator=None,
|
||||
n_trials_max=3,
|
||||
clip_boxes=True,
|
||||
prob=1.0,
|
||||
background=(0,0,0),
|
||||
can_fail=False,
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
patch_coord_generator (PatchCoordinateGenerator): A `PatchCoordinateGenerator` object
|
||||
to generate the positions and sizes of the patches to be sampled from the input images.
|
||||
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
|
||||
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
|
||||
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
|
||||
the validity of the bounding boxes is not checked.
|
||||
image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given.
|
||||
An `ImageValidator` object to determine whether a sampled patch is valid. If `None`,
|
||||
any outcome is valid.
|
||||
n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given.
|
||||
Determines the maxmial number of trials to sample a valid patch. If no valid patch could
|
||||
be sampled in `n_trials_max` trials, returns one `None` in place of each regular output.
|
||||
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
|
||||
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
|
||||
sampled patch.
|
||||
prob (float, optional): `(1 - prob)` determines the probability with which the original,
|
||||
unaltered image is returned.
|
||||
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential
|
||||
background pixels of the scaled images. In the case of single-channel images,
|
||||
the first element of `background` will be used as the background pixel value.
|
||||
can_fail (bool, optional): If `True`, will return `None` if no valid patch could be found after
|
||||
`n_trials_max` trials. If `False`, will return the unaltered input image in such a case.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
if not isinstance(patch_coord_generator, PatchCoordinateGenerator):
|
||||
raise ValueError("`patch_coord_generator` must be an instance of `PatchCoordinateGenerator`.")
|
||||
if not (isinstance(image_validator, ImageValidator) or image_validator is None):
|
||||
raise ValueError("`image_validator` must be either `None` or an `ImageValidator` object.")
|
||||
self.patch_coord_generator = patch_coord_generator
|
||||
self.box_filter = box_filter
|
||||
self.image_validator = image_validator
|
||||
self.n_trials_max = n_trials_max
|
||||
self.clip_boxes = clip_boxes
|
||||
self.prob = prob
|
||||
self.background = background
|
||||
self.can_fail = can_fail
|
||||
self.labels_format = labels_format
|
||||
self.sample_patch = CropPad(patch_ymin=None,
|
||||
patch_xmin=None,
|
||||
patch_height=None,
|
||||
patch_width=None,
|
||||
clip_boxes=self.clip_boxes,
|
||||
box_filter=self.box_filter,
|
||||
background=self.background,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
def __call__(self, image, labels=None, return_inverter=False):
|
||||
|
||||
p = np.random.uniform(0,1)
|
||||
if p >= (1.0-self.prob):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
self.patch_coord_generator.img_height = img_height
|
||||
self.patch_coord_generator.img_width = img_width
|
||||
|
||||
xmin = self.labels_format['xmin']
|
||||
ymin = self.labels_format['ymin']
|
||||
xmax = self.labels_format['xmax']
|
||||
ymax = self.labels_format['ymax']
|
||||
|
||||
# Override the preset labels format.
|
||||
if not self.image_validator is None:
|
||||
self.image_validator.labels_format = self.labels_format
|
||||
self.sample_patch.labels_format = self.labels_format
|
||||
|
||||
for _ in range(max(1, self.n_trials_max)):
|
||||
|
||||
# Generate patch coordinates.
|
||||
patch_ymin, patch_xmin, patch_height, patch_width = self.patch_coord_generator()
|
||||
|
||||
self.sample_patch.patch_ymin = patch_ymin
|
||||
self.sample_patch.patch_xmin = patch_xmin
|
||||
self.sample_patch.patch_height = patch_height
|
||||
self.sample_patch.patch_width = patch_width
|
||||
|
||||
if (labels is None) or (self.image_validator is None):
|
||||
# We either don't have any boxes or if we do, we will accept any outcome as valid.
|
||||
return self.sample_patch(image, labels, return_inverter)
|
||||
else:
|
||||
# Translate the box coordinates to the patch's coordinate system.
|
||||
new_labels = np.copy(labels)
|
||||
new_labels[:, [ymin, ymax]] -= patch_ymin
|
||||
new_labels[:, [xmin, xmax]] -= patch_xmin
|
||||
# Check if the patch is valid.
|
||||
if self.image_validator(labels=new_labels,
|
||||
image_height=patch_height,
|
||||
image_width=patch_width):
|
||||
return self.sample_patch(image, labels, return_inverter)
|
||||
|
||||
# If we weren't able to sample a valid patch...
|
||||
if self.can_fail:
|
||||
# ...return `None`.
|
||||
if labels is None:
|
||||
if return_inverter:
|
||||
return None, None
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
if return_inverter:
|
||||
return None, None, None
|
||||
else:
|
||||
return None, None
|
||||
else:
|
||||
# ...return the unaltered input image.
|
||||
if labels is None:
|
||||
if return_inverter:
|
||||
return image, None
|
||||
else:
|
||||
return image
|
||||
else:
|
||||
if return_inverter:
|
||||
return image, labels, None
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
else:
|
||||
if return_inverter:
|
||||
def inverter(labels):
|
||||
return labels
|
||||
|
||||
if labels is None:
|
||||
if return_inverter:
|
||||
return image, inverter
|
||||
else:
|
||||
return image
|
||||
else:
|
||||
if return_inverter:
|
||||
return image, labels, inverter
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class RandomPatchInf:
|
||||
'''
|
||||
Randomly samples a patch from an image. The randomness refers to whatever
|
||||
randomness may be introduced by the patch coordinate generator, the box filter,
|
||||
and the patch validator.
|
||||
|
||||
Input images may be cropped and/or padded along either or both of the two
|
||||
spatial dimensions as necessary in order to obtain the required patch.
|
||||
|
||||
This operation is very similar to `RandomPatch`, except that:
|
||||
1. This operation runs indefinitely until either a valid patch is found or
|
||||
the input image is returned unaltered, i.e. it cannot fail.
|
||||
2. If a bound generator is given, a new pair of bounds will be generated
|
||||
every `n_trials_max` iterations.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
patch_coord_generator,
|
||||
box_filter=None,
|
||||
image_validator=None,
|
||||
bound_generator=None,
|
||||
n_trials_max=50,
|
||||
clip_boxes=True,
|
||||
prob=0.857,
|
||||
background=(0,0,0),
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
patch_coord_generator (PatchCoordinateGenerator): A `PatchCoordinateGenerator` object
|
||||
to generate the positions and sizes of the patches to be sampled from the input images.
|
||||
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
|
||||
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
|
||||
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
|
||||
the validity of the bounding boxes is not checked.
|
||||
image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given.
|
||||
An `ImageValidator` object to determine whether a sampled patch is valid. If `None`,
|
||||
any outcome is valid.
|
||||
bound_generator (BoundGenerator, optional): A `BoundGenerator` object to generate upper and
|
||||
lower bound values for the patch validator. Every `n_trials_max` trials, a new pair of
|
||||
upper and lower bounds will be generated until a valid patch is found or the original image
|
||||
is returned. This bound generator overrides the bound generator of the patch validator.
|
||||
n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given.
|
||||
The sampler will run indefinitely until either a valid patch is found or the original image
|
||||
is returned, but this determines the maxmial number of trials to sample a valid patch for each
|
||||
selected pair of lower and upper bounds before a new pair is picked.
|
||||
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
|
||||
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
|
||||
sampled patch.
|
||||
prob (float, optional): `(1 - prob)` determines the probability with which the original,
|
||||
unaltered image is returned.
|
||||
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential
|
||||
background pixels of the scaled images. In the case of single-channel images,
|
||||
the first element of `background` will be used as the background pixel value.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
|
||||
if not isinstance(patch_coord_generator, PatchCoordinateGenerator):
|
||||
raise ValueError("`patch_coord_generator` must be an instance of `PatchCoordinateGenerator`.")
|
||||
if not (isinstance(image_validator, ImageValidator) or image_validator is None):
|
||||
raise ValueError("`image_validator` must be either `None` or an `ImageValidator` object.")
|
||||
if not (isinstance(bound_generator, BoundGenerator) or bound_generator is None):
|
||||
raise ValueError("`bound_generator` must be either `None` or a `BoundGenerator` object.")
|
||||
self.patch_coord_generator = patch_coord_generator
|
||||
self.box_filter = box_filter
|
||||
self.image_validator = image_validator
|
||||
self.bound_generator = bound_generator
|
||||
self.n_trials_max = n_trials_max
|
||||
self.clip_boxes = clip_boxes
|
||||
self.prob = prob
|
||||
self.background = background
|
||||
self.labels_format = labels_format
|
||||
self.sample_patch = CropPad(patch_ymin=None,
|
||||
patch_xmin=None,
|
||||
patch_height=None,
|
||||
patch_width=None,
|
||||
clip_boxes=self.clip_boxes,
|
||||
box_filter=self.box_filter,
|
||||
background=self.background,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
def __call__(self, image, labels=None, return_inverter=False):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
self.patch_coord_generator.img_height = img_height
|
||||
self.patch_coord_generator.img_width = img_width
|
||||
|
||||
xmin = self.labels_format['xmin']
|
||||
ymin = self.labels_format['ymin']
|
||||
xmax = self.labels_format['xmax']
|
||||
ymax = self.labels_format['ymax']
|
||||
|
||||
# Override the preset labels format.
|
||||
if not self.image_validator is None:
|
||||
self.image_validator.labels_format = self.labels_format
|
||||
self.sample_patch.labels_format = self.labels_format
|
||||
|
||||
while True: # Keep going until we either find a valid patch or return the original image.
|
||||
|
||||
p = np.random.uniform(0,1)
|
||||
if p >= (1.0-self.prob):
|
||||
|
||||
# In case we have a bound generator, pick a lower and upper bound for the patch validator.
|
||||
if not ((self.image_validator is None) or (self.bound_generator is None)):
|
||||
self.image_validator.bounds = self.bound_generator()
|
||||
|
||||
# Use at most `self.n_trials_max` attempts to find a crop
|
||||
# that meets our requirements.
|
||||
for _ in range(max(1, self.n_trials_max)):
|
||||
|
||||
# Generate patch coordinates.
|
||||
patch_ymin, patch_xmin, patch_height, patch_width = self.patch_coord_generator()
|
||||
|
||||
self.sample_patch.patch_ymin = patch_ymin
|
||||
self.sample_patch.patch_xmin = patch_xmin
|
||||
self.sample_patch.patch_height = patch_height
|
||||
self.sample_patch.patch_width = patch_width
|
||||
|
||||
# Check if the resulting patch meets the aspect ratio requirements.
|
||||
aspect_ratio = patch_width / patch_height
|
||||
if not (self.patch_coord_generator.min_aspect_ratio <= aspect_ratio <= self.patch_coord_generator.max_aspect_ratio):
|
||||
continue
|
||||
|
||||
if (labels is None) or (self.image_validator is None):
|
||||
# We either don't have any boxes or if we do, we will accept any outcome as valid.
|
||||
return self.sample_patch(image, labels, return_inverter)
|
||||
else:
|
||||
# Translate the box coordinates to the patch's coordinate system.
|
||||
new_labels = np.copy(labels)
|
||||
new_labels[:, [ymin, ymax]] -= patch_ymin
|
||||
new_labels[:, [xmin, xmax]] -= patch_xmin
|
||||
# Check if the patch contains the minimum number of boxes we require.
|
||||
if self.image_validator(labels=new_labels,
|
||||
image_height=patch_height,
|
||||
image_width=patch_width):
|
||||
return self.sample_patch(image, labels, return_inverter)
|
||||
else:
|
||||
if return_inverter:
|
||||
def inverter(labels):
|
||||
return labels
|
||||
|
||||
if labels is None:
|
||||
if return_inverter:
|
||||
return image, inverter
|
||||
else:
|
||||
return image
|
||||
else:
|
||||
if return_inverter:
|
||||
return image, labels, inverter
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class RandomMaxCropFixedAR:
|
||||
'''
|
||||
Crops the largest possible patch of a given fixed aspect ratio
|
||||
from an image.
|
||||
|
||||
Since the aspect ratio of the sampled patches is constant, they
|
||||
can subsequently be resized to the same size without distortion.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
patch_aspect_ratio,
|
||||
box_filter=None,
|
||||
image_validator=None,
|
||||
n_trials_max=3,
|
||||
clip_boxes=True,
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
patch_aspect_ratio (float): The fixed aspect ratio that all sampled patches will have.
|
||||
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
|
||||
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
|
||||
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
|
||||
the validity of the bounding boxes is not checked.
|
||||
image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given.
|
||||
An `ImageValidator` object to determine whether a sampled patch is valid. If `None`,
|
||||
any outcome is valid.
|
||||
n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given.
|
||||
Determines the maxmial number of trials to sample a valid patch. If no valid patch could
|
||||
be sampled in `n_trials_max` trials, returns `None`.
|
||||
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
|
||||
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
|
||||
sampled patch.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
|
||||
self.patch_aspect_ratio = patch_aspect_ratio
|
||||
self.box_filter = box_filter
|
||||
self.image_validator = image_validator
|
||||
self.n_trials_max = n_trials_max
|
||||
self.clip_boxes = clip_boxes
|
||||
self.labels_format = labels_format
|
||||
self.random_patch = RandomPatch(patch_coord_generator=PatchCoordinateGenerator(), # Just a dummy object
|
||||
box_filter=self.box_filter,
|
||||
image_validator=self.image_validator,
|
||||
n_trials_max=self.n_trials_max,
|
||||
clip_boxes=self.clip_boxes,
|
||||
prob=1.0,
|
||||
can_fail=False,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
def __call__(self, image, labels=None, return_inverter=False):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
|
||||
# The ratio of the input image aspect ratio and patch aspect ratio determines the maximal possible crop.
|
||||
image_aspect_ratio = img_width / img_height
|
||||
|
||||
if image_aspect_ratio < self.patch_aspect_ratio:
|
||||
patch_width = img_width
|
||||
patch_height = int(round(patch_width / self.patch_aspect_ratio))
|
||||
else:
|
||||
patch_height = img_height
|
||||
patch_width = int(round(patch_height * self.patch_aspect_ratio))
|
||||
|
||||
# Now that we know the desired height and width for the patch,
|
||||
# instantiate an appropriate patch coordinate generator.
|
||||
patch_coord_generator = PatchCoordinateGenerator(img_height=img_height,
|
||||
img_width=img_width,
|
||||
must_match='h_w',
|
||||
patch_height=patch_height,
|
||||
patch_width=patch_width)
|
||||
|
||||
# The rest of the work is done by `RandomPatch`.
|
||||
self.random_patch.patch_coord_generator = patch_coord_generator
|
||||
self.random_patch.labels_format = self.labels_format
|
||||
return self.random_patch(image, labels, return_inverter)
|
||||
|
||||
class RandomPadFixedAR:
|
||||
'''
|
||||
Adds the minimal possible padding to an image that results in a patch
|
||||
of the given fixed aspect ratio that contains the entire image.
|
||||
|
||||
Since the aspect ratio of the resulting images is constant, they
|
||||
can subsequently be resized to the same size without distortion.
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
patch_aspect_ratio,
|
||||
background=(0,0,0),
|
||||
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
|
||||
'''
|
||||
Arguments:
|
||||
patch_aspect_ratio (float): The fixed aspect ratio that all sampled patches will have.
|
||||
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential
|
||||
background pixels of the scaled images. In the case of single-channel images,
|
||||
the first element of `background` will be used as the background pixel value.
|
||||
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
|
||||
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
|
||||
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
|
||||
'''
|
||||
|
||||
self.patch_aspect_ratio = patch_aspect_ratio
|
||||
self.background = background
|
||||
self.labels_format = labels_format
|
||||
self.random_patch = RandomPatch(patch_coord_generator=PatchCoordinateGenerator(), # Just a dummy object
|
||||
box_filter=None,
|
||||
image_validator=None,
|
||||
n_trials_max=1,
|
||||
clip_boxes=False,
|
||||
background=self.background,
|
||||
prob=1.0,
|
||||
labels_format=self.labels_format)
|
||||
|
||||
def __call__(self, image, labels=None, return_inverter=False):
|
||||
|
||||
img_height, img_width = image.shape[:2]
|
||||
|
||||
if img_width < img_height:
|
||||
patch_height = img_height
|
||||
patch_width = int(round(patch_height * self.patch_aspect_ratio))
|
||||
else:
|
||||
patch_width = img_width
|
||||
patch_height = int(round(patch_width / self.patch_aspect_ratio))
|
||||
|
||||
# Now that we know the desired height and width for the patch,
|
||||
# instantiate an appropriate patch coordinate generator.
|
||||
patch_coord_generator = PatchCoordinateGenerator(img_height=img_height,
|
||||
img_width=img_width,
|
||||
must_match='h_w',
|
||||
patch_height=patch_height,
|
||||
patch_width=patch_width)
|
||||
|
||||
# The rest of the work is done by `RandomPatch`.
|
||||
self.random_patch.patch_coord_generator = patch_coord_generator
|
||||
self.random_patch.labels_format = self.labels_format
|
||||
return self.random_patch(image, labels, return_inverter)
|
||||
@@ -0,0 +1,485 @@
|
||||
'''
|
||||
Various photometric image transformations, both deterministic and probabilistic.
|
||||
|
||||
Copyright (C) 2018 Pierluigi Ferrari
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
'''
|
||||
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
class ConvertColor:
|
||||
'''
|
||||
Converts images between RGB, HSV and grayscale color spaces. This is just a wrapper
|
||||
around `cv2.cvtColor()`.
|
||||
'''
|
||||
def __init__(self, current='RGB', to='HSV', keep_3ch=True):
|
||||
'''
|
||||
Arguments:
|
||||
current (str, optional): The current color space of the images. Can be
|
||||
one of 'RGB' and 'HSV'.
|
||||
to (str, optional): The target color space of the images. Can be one of
|
||||
'RGB', 'HSV', and 'GRAY'.
|
||||
keep_3ch (bool, optional): Only relevant if `to == GRAY`.
|
||||
If `True`, the resulting grayscale images will have three channels.
|
||||
'''
|
||||
if not ((current in {'RGB', 'HSV'}) and (to in {'RGB', 'HSV', 'GRAY'})):
|
||||
raise NotImplementedError
|
||||
self.current = current
|
||||
self.to = to
|
||||
self.keep_3ch = keep_3ch
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
if self.current == 'RGB' and self.to == 'HSV':
|
||||
image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
|
||||
elif self.current == 'RGB' and self.to == 'GRAY':
|
||||
image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
||||
if self.keep_3ch:
|
||||
image = np.stack([image] * 3, axis=-1)
|
||||
elif self.current == 'HSV' and self.to == 'RGB':
|
||||
image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB)
|
||||
elif self.current == 'HSV' and self.to == 'GRAY':
|
||||
image = cv2.cvtColor(image, cv2.COLOR_HSV2GRAY)
|
||||
if self.keep_3ch:
|
||||
image = np.stack([image] * 3, axis=-1)
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class ConvertDataType:
|
||||
'''
|
||||
Converts images represented as Numpy arrays between `uint8` and `float32`.
|
||||
Serves as a helper for certain photometric distortions. This is just a wrapper
|
||||
around `np.ndarray.astype()`.
|
||||
'''
|
||||
def __init__(self, to='uint8'):
|
||||
'''
|
||||
Arguments:
|
||||
to (string, optional): To which datatype to convert the input images.
|
||||
Can be either of 'uint8' and 'float32'.
|
||||
'''
|
||||
if not (to == 'uint8' or to == 'float32'):
|
||||
raise ValueError("`to` can be either of 'uint8' or 'float32'.")
|
||||
self.to = to
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
if self.to == 'uint8':
|
||||
image = np.round(image, decimals=0).astype(np.uint8)
|
||||
else:
|
||||
image = image.astype(np.float32)
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class ConvertTo3Channels:
|
||||
'''
|
||||
Converts 1-channel and 4-channel images to 3-channel images. Does nothing to images that
|
||||
already have 3 channels. In the case of 4-channel images, the fourth channel will be
|
||||
discarded.
|
||||
'''
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
if image.ndim == 2:
|
||||
image = np.stack([image] * 3, axis=-1)
|
||||
elif image.ndim == 3:
|
||||
if image.shape[2] == 1:
|
||||
image = np.concatenate([image] * 3, axis=-1)
|
||||
elif image.shape[2] == 4:
|
||||
image = image[:,:,:3]
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class Hue:
|
||||
'''
|
||||
Changes the hue of HSV images.
|
||||
|
||||
Important:
|
||||
- Expects HSV input.
|
||||
- Expects input array to be of `dtype` `float`.
|
||||
'''
|
||||
def __init__(self, delta):
|
||||
'''
|
||||
Arguments:
|
||||
delta (int): An integer in the closed interval `[-180, 180]` that determines the hue change, where
|
||||
a change by integer `delta` means a change by `2 * delta` degrees. Read up on the HSV color format
|
||||
if you need more information.
|
||||
'''
|
||||
if not (-180 <= delta <= 180): raise ValueError("`delta` must be in the closed interval `[-180, 180]`.")
|
||||
self.delta = delta
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
image[:, :, 0] = (image[:, :, 0] + self.delta) % 180.0
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class RandomHue:
|
||||
'''
|
||||
Randomly changes the hue of HSV images.
|
||||
|
||||
Important:
|
||||
- Expects HSV input.
|
||||
- Expects input array to be of `dtype` `float`.
|
||||
'''
|
||||
def __init__(self, max_delta=18, prob=0.5):
|
||||
'''
|
||||
Arguments:
|
||||
max_delta (int): An integer in the closed interval `[0, 180]` that determines the maximal absolute
|
||||
hue change.
|
||||
prob (float, optional): `(1 - prob)` determines the probability with which the original,
|
||||
unaltered image is returned.
|
||||
'''
|
||||
if not (0 <= max_delta <= 180): raise ValueError("`max_delta` must be in the closed interval `[0, 180]`.")
|
||||
self.max_delta = max_delta
|
||||
self.prob = prob
|
||||
self.change_hue = Hue(delta=0)
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
p = np.random.uniform(0,1)
|
||||
if p >= (1.0-self.prob):
|
||||
self.change_hue.delta = np.random.uniform(-self.max_delta, self.max_delta)
|
||||
return self.change_hue(image, labels)
|
||||
elif labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class Saturation:
|
||||
'''
|
||||
Changes the saturation of HSV images.
|
||||
|
||||
Important:
|
||||
- Expects HSV input.
|
||||
- Expects input array to be of `dtype` `float`.
|
||||
'''
|
||||
def __init__(self, factor):
|
||||
'''
|
||||
Arguments:
|
||||
factor (float): A float greater than zero that determines saturation change, where
|
||||
values less than one result in less saturation and values greater than one result
|
||||
in more saturation.
|
||||
'''
|
||||
if factor <= 0.0: raise ValueError("It must be `factor > 0`.")
|
||||
self.factor = factor
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
image[:,:,1] = np.clip(image[:,:,1] * self.factor, 0, 255)
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class RandomSaturation:
|
||||
'''
|
||||
Randomly changes the saturation of HSV images.
|
||||
|
||||
Important:
|
||||
- Expects HSV input.
|
||||
- Expects input array to be of `dtype` `float`.
|
||||
'''
|
||||
def __init__(self, lower=0.3, upper=2.0, prob=0.5):
|
||||
'''
|
||||
Arguments:
|
||||
lower (float, optional): A float greater than zero, the lower bound for the random
|
||||
saturation change.
|
||||
upper (float, optional): A float greater than zero, the upper bound for the random
|
||||
saturation change. Must be greater than `lower`.
|
||||
prob (float, optional): `(1 - prob)` determines the probability with which the original,
|
||||
unaltered image is returned.
|
||||
'''
|
||||
if lower >= upper: raise ValueError("`upper` must be greater than `lower`.")
|
||||
self.lower = lower
|
||||
self.upper = upper
|
||||
self.prob = prob
|
||||
self.change_saturation = Saturation(factor=1.0)
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
p = np.random.uniform(0,1)
|
||||
if p >= (1.0-self.prob):
|
||||
self.change_saturation.factor = np.random.uniform(self.lower, self.upper)
|
||||
return self.change_saturation(image, labels)
|
||||
elif labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class Brightness:
|
||||
'''
|
||||
Changes the brightness of RGB images.
|
||||
|
||||
Important:
|
||||
- Expects RGB input.
|
||||
- Expects input array to be of `dtype` `float`.
|
||||
'''
|
||||
def __init__(self, delta):
|
||||
'''
|
||||
Arguments:
|
||||
delta (int): An integer, the amount to add to or subtract from the intensity
|
||||
of every pixel.
|
||||
'''
|
||||
self.delta = delta
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
image = np.clip(image + self.delta, 0, 255)
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class RandomBrightness:
|
||||
'''
|
||||
Randomly changes the brightness of RGB images.
|
||||
|
||||
Important:
|
||||
- Expects RGB input.
|
||||
- Expects input array to be of `dtype` `float`.
|
||||
'''
|
||||
def __init__(self, lower=-84, upper=84, prob=0.5):
|
||||
'''
|
||||
Arguments:
|
||||
lower (int, optional): An integer, the lower bound for the random brightness change.
|
||||
upper (int, optional): An integer, the upper bound for the random brightness change.
|
||||
Must be greater than `lower`.
|
||||
prob (float, optional): `(1 - prob)` determines the probability with which the original,
|
||||
unaltered image is returned.
|
||||
'''
|
||||
if lower >= upper: raise ValueError("`upper` must be greater than `lower`.")
|
||||
self.lower = float(lower)
|
||||
self.upper = float(upper)
|
||||
self.prob = prob
|
||||
self.change_brightness = Brightness(delta=0)
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
p = np.random.uniform(0,1)
|
||||
if p >= (1.0-self.prob):
|
||||
self.change_brightness.delta = np.random.uniform(self.lower, self.upper)
|
||||
return self.change_brightness(image, labels)
|
||||
elif labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class Contrast:
|
||||
'''
|
||||
Changes the contrast of RGB images.
|
||||
|
||||
Important:
|
||||
- Expects RGB input.
|
||||
- Expects input array to be of `dtype` `float`.
|
||||
'''
|
||||
def __init__(self, factor):
|
||||
'''
|
||||
Arguments:
|
||||
factor (float): A float greater than zero that determines contrast change, where
|
||||
values less than one result in less contrast and values greater than one result
|
||||
in more contrast.
|
||||
'''
|
||||
if factor <= 0.0: raise ValueError("It must be `factor > 0`.")
|
||||
self.factor = factor
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
image = np.clip(127.5 + self.factor * (image - 127.5), 0, 255)
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class RandomContrast:
|
||||
'''
|
||||
Randomly changes the contrast of RGB images.
|
||||
|
||||
Important:
|
||||
- Expects RGB input.
|
||||
- Expects input array to be of `dtype` `float`.
|
||||
'''
|
||||
def __init__(self, lower=0.5, upper=1.5, prob=0.5):
|
||||
'''
|
||||
Arguments:
|
||||
lower (float, optional): A float greater than zero, the lower bound for the random
|
||||
contrast change.
|
||||
upper (float, optional): A float greater than zero, the upper bound for the random
|
||||
contrast change. Must be greater than `lower`.
|
||||
prob (float, optional): `(1 - prob)` determines the probability with which the original,
|
||||
unaltered image is returned.
|
||||
'''
|
||||
if lower >= upper: raise ValueError("`upper` must be greater than `lower`.")
|
||||
self.lower = lower
|
||||
self.upper = upper
|
||||
self.prob = prob
|
||||
self.change_contrast = Contrast(factor=1.0)
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
p = np.random.uniform(0,1)
|
||||
if p >= (1.0-self.prob):
|
||||
self.change_contrast.factor = np.random.uniform(self.lower, self.upper)
|
||||
return self.change_contrast(image, labels)
|
||||
elif labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class Gamma:
|
||||
'''
|
||||
Changes the gamma value of RGB images.
|
||||
|
||||
Important: Expects RGB input.
|
||||
'''
|
||||
def __init__(self, gamma):
|
||||
'''
|
||||
Arguments:
|
||||
gamma (float): A float greater than zero that determines gamma change.
|
||||
'''
|
||||
if gamma <= 0.0: raise ValueError("It must be `gamma > 0`.")
|
||||
self.gamma = gamma
|
||||
self.gamma_inv = 1.0 / gamma
|
||||
# Build a lookup table mapping the pixel values [0, 255] to
|
||||
# their adjusted gamma values.
|
||||
self.table = np.array([((i / 255.0) ** self.gamma_inv) * 255 for i in np.arange(0, 256)]).astype("uint8")
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
image = cv2.LUT(image, table)
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class RandomGamma:
|
||||
'''
|
||||
Randomly changes the gamma value of RGB images.
|
||||
|
||||
Important: Expects RGB input.
|
||||
'''
|
||||
def __init__(self, lower=0.25, upper=2.0, prob=0.5):
|
||||
'''
|
||||
Arguments:
|
||||
lower (float, optional): A float greater than zero, the lower bound for the random
|
||||
gamma change.
|
||||
upper (float, optional): A float greater than zero, the upper bound for the random
|
||||
gamma change. Must be greater than `lower`.
|
||||
prob (float, optional): `(1 - prob)` determines the probability with which the original,
|
||||
unaltered image is returned.
|
||||
'''
|
||||
if lower >= upper: raise ValueError("`upper` must be greater than `lower`.")
|
||||
self.lower = lower
|
||||
self.upper = upper
|
||||
self.prob = prob
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
p = np.random.uniform(0,1)
|
||||
if p >= (1.0-self.prob):
|
||||
gamma = np.random.uniform(self.lower, self.upper)
|
||||
change_gamma = Gamma(gamma=gamma)
|
||||
return change_gamma(image, labels)
|
||||
elif labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class HistogramEqualization:
|
||||
'''
|
||||
Performs histogram equalization on HSV images.
|
||||
|
||||
Importat: Expects HSV input.
|
||||
'''
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
image[:,:,2] = cv2.equalizeHist(image[:,:,2])
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class RandomHistogramEqualization:
|
||||
'''
|
||||
Randomly performs histogram equalization on HSV images. The randomness only refers
|
||||
to whether or not the equalization is performed.
|
||||
|
||||
Importat: Expects HSV input.
|
||||
'''
|
||||
def __init__(self, prob=0.5):
|
||||
'''
|
||||
Arguments:
|
||||
prob (float, optional): `(1 - prob)` determines the probability with which the original,
|
||||
unaltered image is returned.
|
||||
'''
|
||||
self.prob = prob
|
||||
self.equalize = HistogramEqualization()
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
p = np.random.uniform(0,1)
|
||||
if p >= (1.0-self.prob):
|
||||
return self.equalize(image, labels)
|
||||
elif labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class ChannelSwap:
|
||||
'''
|
||||
Swaps the channels of images.
|
||||
'''
|
||||
def __init__(self, order):
|
||||
'''
|
||||
Arguments:
|
||||
order (tuple): A tuple of integers that defines the desired channel order
|
||||
of the input images after the channel swap.
|
||||
'''
|
||||
self.order = order
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
image = image[:,:,self.order]
|
||||
if labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
|
||||
class RandomChannelSwap:
|
||||
'''
|
||||
Randomly swaps the channels of RGB images.
|
||||
|
||||
Important: Expects RGB input.
|
||||
'''
|
||||
def __init__(self, prob=0.5):
|
||||
'''
|
||||
Arguments:
|
||||
prob (float, optional): `(1 - prob)` determines the probability with which the original,
|
||||
unaltered image is returned.
|
||||
'''
|
||||
self.prob = prob
|
||||
# All possible permutations of the three image channels except the original order.
|
||||
self.permutations = ((0, 2, 1),
|
||||
(1, 0, 2), (1, 2, 0),
|
||||
(2, 0, 1), (2, 1, 0))
|
||||
self.swap_channels = ChannelSwap(order=(0, 1, 2))
|
||||
|
||||
def __call__(self, image, labels=None):
|
||||
p = np.random.uniform(0,1)
|
||||
if p >= (1.0-self.prob):
|
||||
i = np.random.randint(5) # There are 6 possible permutations.
|
||||
self.swap_channels.order = self.permutations[i]
|
||||
return self.swap_channels(image, labels)
|
||||
elif labels is None:
|
||||
return image
|
||||
else:
|
||||
return image, labels
|
||||
Reference in New Issue
Block a user