This commit is contained in:
dl-desktop
2020-02-06 16:47:03 -03:00
parent 6328265287
commit b586f22bf0
318 changed files with 25111 additions and 664 deletions

Binary file not shown.

View File

@@ -0,0 +1,183 @@
'''
The data augmentation operations of the original SSD implementation.
Copyright (C) 2018 Pierluigi Ferrari
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from __future__ import division
import numpy as np
from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation
from data_generator.object_detection_2d_geometric_ops import RandomFlip, RandomTranslate, RandomScale
from data_generator.object_detection_2d_image_boxes_validation_utils import BoundGenerator, BoxFilter, ImageValidator
class DataAugmentationConstantInputSize:
'''
Applies a chain of photometric and geometric image transformations. For documentation, please refer
to the documentation of the individual transformations involved.
Important: This augmentation chain is suitable for constant-size images only.
'''
def __init__(self,
random_brightness=(-48, 48, 0.5),
random_contrast=(0.5, 1.8, 0.5),
random_saturation=(0.5, 1.8, 0.5),
random_hue=(18, 0.5),
random_flip=0.5,
random_translate=((0.03,0.5), (0.03,0.5), 0.5),
random_scale=(0.5, 2.0, 0.5),
n_trials_max=3,
clip_boxes=True,
overlap_criterion='area',
bounds_box_filter=(0.3, 1.0),
bounds_validator=(0.5, 1.0),
n_boxes_min=1,
background=(0,0,0),
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
if (random_scale[0] >= 1) or (random_scale[1] <= 1):
raise ValueError("This sequence of transformations only makes sense if the minimum scaling factor is <1 and the maximum scaling factor is >1.")
self.n_trials_max = n_trials_max
self.clip_boxes = clip_boxes
self.overlap_criterion = overlap_criterion
self.bounds_box_filter = bounds_box_filter
self.bounds_validator = bounds_validator
self.n_boxes_min = n_boxes_min
self.background = background
self.labels_format = labels_format
# Determines which boxes are kept in an image after the transformations have been applied.
self.box_filter = BoxFilter(check_overlap=True,
check_min_area=True,
check_degenerate=True,
overlap_criterion=self.overlap_criterion,
overlap_bounds=self.bounds_box_filter,
min_area=16,
labels_format=self.labels_format)
# Determines whether the result of the transformations is a valid training image.
self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion,
bounds=self.bounds_validator,
n_boxes_min=self.n_boxes_min,
labels_format=self.labels_format)
# Utility distortions
self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
self.convert_to_float32 = ConvertDataType(to='float32')
self.convert_to_uint8 = ConvertDataType(to='uint8')
self.convert_to_3_channels = ConvertTo3Channels() # Make sure all images end up having 3 channels.
# Photometric transformations
self.random_brightness = RandomBrightness(lower=random_brightness[0], upper=random_brightness[1], prob=random_brightness[2])
self.random_contrast = RandomContrast(lower=random_contrast[0], upper=random_contrast[1], prob=random_contrast[2])
self.random_saturation = RandomSaturation(lower=random_saturation[0], upper=random_saturation[1], prob=random_saturation[2])
self.random_hue = RandomHue(max_delta=random_hue[0], prob=random_hue[1])
# Geometric transformations
self.random_flip = RandomFlip(dim='horizontal', prob=random_flip, labels_format=self.labels_format)
self.random_translate = RandomTranslate(dy_minmax=random_translate[0],
dx_minmax=random_translate[1],
prob=random_translate[2],
clip_boxes=self.clip_boxes,
box_filter=self.box_filter,
image_validator=self.image_validator,
n_trials_max=self.n_trials_max,
background=self.background,
labels_format=self.labels_format)
self.random_zoom_in = RandomScale(min_factor=1.0,
max_factor=random_scale[1],
prob=random_scale[2],
clip_boxes=self.clip_boxes,
box_filter=self.box_filter,
image_validator=self.image_validator,
n_trials_max=self.n_trials_max,
background=self.background,
labels_format=self.labels_format)
self.random_zoom_out = RandomScale(min_factor=random_scale[0],
max_factor=1.0,
prob=random_scale[2],
clip_boxes=self.clip_boxes,
box_filter=self.box_filter,
image_validator=self.image_validator,
n_trials_max=self.n_trials_max,
background=self.background,
labels_format=self.labels_format)
# If we zoom in, do translation before scaling.
self.sequence1 = [self.convert_to_3_channels,
self.convert_to_float32,
self.random_brightness,
self.random_contrast,
self.convert_to_uint8,
self.convert_RGB_to_HSV,
self.convert_to_float32,
self.random_saturation,
self.random_hue,
self.convert_to_uint8,
self.convert_HSV_to_RGB,
self.random_translate,
self.random_zoom_in,
self.random_flip]
# If we zoom out, do scaling before translation.
self.sequence2 = [self.convert_to_3_channels,
self.convert_to_float32,
self.random_brightness,
self.convert_to_uint8,
self.convert_RGB_to_HSV,
self.convert_to_float32,
self.random_saturation,
self.random_hue,
self.convert_to_uint8,
self.convert_HSV_to_RGB,
self.convert_to_float32,
self.random_contrast,
self.convert_to_uint8,
self.random_zoom_out,
self.random_translate,
self.random_flip]
def __call__(self, image, labels=None):
self.random_translate.labels_format = self.labels_format
self.random_zoom_in.labels_format = self.labels_format
self.random_zoom_out.labels_format = self.labels_format
self.random_flip.labels_format = self.labels_format
# Choose sequence 1 with probability 0.5.
if np.random.choice(2):
if not (labels is None):
for transform in self.sequence1:
image, labels = transform(image, labels)
return image, labels
else:
for transform in self.sequence1:
image = transform(image)
return image
# Choose sequence 2 with probability 0.5.
else:
if not (labels is None):
for transform in self.sequence2:
image, labels = transform(image, labels)
return image, labels
else:
for transform in self.sequence2:
image = transform(image)
return image

View File

@@ -0,0 +1,280 @@
'''
The data augmentation operations of the original SSD implementation.
Copyright (C) 2018 Pierluigi Ferrari
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from __future__ import division
import numpy as np
import cv2
import inspect
from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation, RandomChannelSwap
from data_generator.object_detection_2d_patch_sampling_ops import PatchCoordinateGenerator, RandomPatch, RandomPatchInf
from data_generator.object_detection_2d_geometric_ops import ResizeRandomInterp, RandomFlip
from data_generator.object_detection_2d_image_boxes_validation_utils import BoundGenerator, BoxFilter, ImageValidator
class SSDRandomCrop:
'''
Performs the same random crops as defined by the `batch_sampler` instructions
of the original Caffe implementation of SSD. A description of this random cropping
strategy can also be found in the data augmentation section of the paper:
https://arxiv.org/abs/1512.02325
'''
def __init__(self, labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
self.labels_format = labels_format
# This randomly samples one of the lower IoU bounds defined
# by the `sample_space` every time it is called.
self.bound_generator = BoundGenerator(sample_space=((None, None),
(0.1, None),
(0.3, None),
(0.5, None),
(0.7, None),
(0.9, None)),
weights=None)
# Produces coordinates for candidate patches such that the height
# and width of the patches are between 0.3 and 1.0 of the height
# and width of the respective image and the aspect ratio of the
# patches is between 0.5 and 2.0.
self.patch_coord_generator = PatchCoordinateGenerator(must_match='h_w',
min_scale=0.3,
max_scale=1.0,
scale_uniformly=False,
min_aspect_ratio = 0.5,
max_aspect_ratio = 2.0)
# Filters out boxes whose center point does not lie within the
# chosen patches.
self.box_filter = BoxFilter(check_overlap=True,
check_min_area=False,
check_degenerate=False,
overlap_criterion='center_point',
labels_format=self.labels_format)
# Determines whether a given patch is considered a valid patch.
# Defines a patch to be valid if at least one ground truth bounding box
# (n_boxes_min == 1) has an IoU overlap with the patch that
# meets the requirements defined by `bound_generator`.
self.image_validator = ImageValidator(overlap_criterion='iou',
n_boxes_min=1,
labels_format=self.labels_format,
border_pixels='half')
# Performs crops according to the parameters set in the objects above.
# Runs until either a valid patch is found or the original input image
# is returned unaltered. Runs a maximum of 50 trials to find a valid
# patch for each new sampled IoU threshold. Every 50 trials, the original
# image is returned as is with probability (1 - prob) = 0.143.
self.random_crop = RandomPatchInf(patch_coord_generator=self.patch_coord_generator,
box_filter=self.box_filter,
image_validator=self.image_validator,
bound_generator=self.bound_generator,
n_trials_max=50,
clip_boxes=True,
prob=0.857,
labels_format=self.labels_format)
def __call__(self, image, labels=None, return_inverter=False):
self.random_crop.labels_format = self.labels_format
return self.random_crop(image, labels, return_inverter)
class SSDExpand:
'''
Performs the random image expansion as defined by the `train_transform_param` instructions
of the original Caffe implementation of SSD. A description of this expansion strategy
can also be found in section 3.6 ("Data Augmentation for Small Object Accuracy") of the paper:
https://arxiv.org/abs/1512.02325
'''
def __init__(self, background=(123, 117, 104), labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the
background pixels of the translated images.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
self.labels_format = labels_format
# Generate coordinates for patches that are between 1.0 and 4.0 times
# the size of the input image in both spatial dimensions.
self.patch_coord_generator = PatchCoordinateGenerator(must_match='h_w',
min_scale=1.0,
max_scale=4.0,
scale_uniformly=True)
# With probability 0.5, place the input image randomly on a canvas filled with
# mean color values according to the parameters set above. With probability 0.5,
# return the input image unaltered.
self.expand = RandomPatch(patch_coord_generator=self.patch_coord_generator,
box_filter=None,
image_validator=None,
n_trials_max=1,
clip_boxes=False,
prob=0.5,
background=background,
labels_format=self.labels_format)
def __call__(self, image, labels=None, return_inverter=False):
self.expand.labels_format = self.labels_format
return self.expand(image, labels, return_inverter)
class SSDPhotometricDistortions:
'''
Performs the photometric distortions defined by the `train_transform_param` instructions
of the original Caffe implementation of SSD.
'''
def __init__(self):
self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
self.convert_to_float32 = ConvertDataType(to='float32')
self.convert_to_uint8 = ConvertDataType(to='uint8')
self.convert_to_3_channels = ConvertTo3Channels()
self.random_brightness = RandomBrightness(lower=-32, upper=32, prob=0.5)
self.random_contrast = RandomContrast(lower=0.5, upper=1.5, prob=0.5)
self.random_saturation = RandomSaturation(lower=0.5, upper=1.5, prob=0.5)
self.random_hue = RandomHue(max_delta=18, prob=0.5)
self.random_channel_swap = RandomChannelSwap(prob=0.0)
self.sequence1 = [self.convert_to_3_channels,
self.convert_to_float32,
self.random_brightness,
self.random_contrast,
self.convert_to_uint8,
self.convert_RGB_to_HSV,
self.convert_to_float32,
self.random_saturation,
self.random_hue,
self.convert_to_uint8,
self.convert_HSV_to_RGB,
self.random_channel_swap]
self.sequence2 = [self.convert_to_3_channels,
self.convert_to_float32,
self.random_brightness,
self.convert_to_uint8,
self.convert_RGB_to_HSV,
self.convert_to_float32,
self.random_saturation,
self.random_hue,
self.convert_to_uint8,
self.convert_HSV_to_RGB,
self.convert_to_float32,
self.random_contrast,
self.convert_to_uint8,
self.random_channel_swap]
def __call__(self, image, labels):
# Choose sequence 1 with probability 0.5.
if np.random.choice(2):
for transform in self.sequence1:
image, labels = transform(image, labels)
return image, labels
# Choose sequence 2 with probability 0.5.
else:
for transform in self.sequence2:
image, labels = transform(image, labels)
return image, labels
class SSDDataAugmentation:
'''
Reproduces the data augmentation pipeline used in the training of the original
Caffe implementation of SSD.
'''
def __init__(self,
img_height=300,
img_width=300,
background=(123, 117, 104),
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
height (int): The desired height of the output images in pixels.
width (int): The desired width of the output images in pixels.
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the
background pixels of the translated images.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
self.labels_format = labels_format
self.photometric_distortions = SSDPhotometricDistortions()
self.expand = SSDExpand(background=background, labels_format=self.labels_format)
self.random_crop = SSDRandomCrop(labels_format=self.labels_format)
self.random_flip = RandomFlip(dim='horizontal', prob=0.5, labels_format=self.labels_format)
# This box filter makes sure that the resized images don't contain any degenerate boxes.
# Resizing the images could lead the boxes to becomes smaller. For boxes that are already
# pretty small, that might result in boxes with height and/or width zero, which we obviously
# cannot allow.
self.box_filter = BoxFilter(check_overlap=False,
check_min_area=False,
check_degenerate=True,
labels_format=self.labels_format)
self.resize = ResizeRandomInterp(height=img_height,
width=img_width,
interpolation_modes=[cv2.INTER_NEAREST,
cv2.INTER_LINEAR,
cv2.INTER_CUBIC,
cv2.INTER_AREA,
cv2.INTER_LANCZOS4],
box_filter=self.box_filter,
labels_format=self.labels_format)
self.sequence = [self.photometric_distortions,
self.expand,
self.random_crop,
self.random_flip,
self.resize]
def __call__(self, image, labels, return_inverter=False):
self.expand.labels_format = self.labels_format
self.random_crop.labels_format = self.labels_format
self.random_flip.labels_format = self.labels_format
self.resize.labels_format = self.labels_format
inverters = []
for transform in self.sequence:
if return_inverter and ('return_inverter' in inspect.signature(transform).parameters):
image, labels, inverter = transform(image, labels, return_inverter=True)
inverters.append(inverter)
else:
image, labels = transform(image, labels)
if return_inverter:
return image, labels, inverters[::-1]
else:
return image, labels

View File

@@ -0,0 +1,157 @@
'''
A data augmentation pipeline for datasets in bird's eye view, i.e. where there is
no "up" or "down" in the images.
Copyright (C) 2018 Pierluigi Ferrari
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from __future__ import division
import numpy as np
from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation
from data_generator.object_detection_2d_geometric_ops import Resize, RandomFlip, RandomRotate
from data_generator.object_detection_2d_patch_sampling_ops import PatchCoordinateGenerator, RandomPatch
from data_generator.object_detection_2d_image_boxes_validation_utils import BoxFilter, ImageValidator
class DataAugmentationSatellite:
'''
A data augmentation pipeline for datasets in bird's eye view, i.e. where there is
no "up" or "down" in the images.
Applies a chain of photometric and geometric image transformations. For documentation, please refer
to the documentation of the individual transformations involved.
'''
def __init__(self,
resize_height,
resize_width,
random_brightness=(-48, 48, 0.5),
random_contrast=(0.5, 1.8, 0.5),
random_saturation=(0.5, 1.8, 0.5),
random_hue=(18, 0.5),
random_flip=0.5,
random_rotate=([90, 180, 270], 0.5),
min_scale=0.3,
max_scale=2.0,
min_aspect_ratio = 0.8,
max_aspect_ratio = 1.25,
n_trials_max=3,
clip_boxes=True,
overlap_criterion='area',
bounds_box_filter=(0.3, 1.0),
bounds_validator=(0.5, 1.0),
n_boxes_min=1,
background=(0,0,0),
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
self.n_trials_max = n_trials_max
self.clip_boxes = clip_boxes
self.overlap_criterion = overlap_criterion
self.bounds_box_filter = bounds_box_filter
self.bounds_validator = bounds_validator
self.n_boxes_min = n_boxes_min
self.background = background
self.labels_format = labels_format
# Determines which boxes are kept in an image after the transformations have been applied.
self.box_filter_patch = BoxFilter(check_overlap=True,
check_min_area=False,
check_degenerate=False,
overlap_criterion=self.overlap_criterion,
overlap_bounds=self.bounds_box_filter,
labels_format=self.labels_format)
self.box_filter_resize = BoxFilter(check_overlap=False,
check_min_area=True,
check_degenerate=True,
min_area=16,
labels_format=self.labels_format)
# Determines whether the result of the transformations is a valid training image.
self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion,
bounds=self.bounds_validator,
n_boxes_min=self.n_boxes_min,
labels_format=self.labels_format)
# Utility transformations
self.convert_to_3_channels = ConvertTo3Channels() # Make sure all images end up having 3 channels.
self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
self.convert_to_float32 = ConvertDataType(to='float32')
self.convert_to_uint8 = ConvertDataType(to='uint8')
self.resize = Resize(height=resize_height,
width=resize_width,
box_filter=self.box_filter_resize,
labels_format=self.labels_format)
# Photometric transformations
self.random_brightness = RandomBrightness(lower=random_brightness[0], upper=random_brightness[1], prob=random_brightness[2])
self.random_contrast = RandomContrast(lower=random_contrast[0], upper=random_contrast[1], prob=random_contrast[2])
self.random_saturation = RandomSaturation(lower=random_saturation[0], upper=random_saturation[1], prob=random_saturation[2])
self.random_hue = RandomHue(max_delta=random_hue[0], prob=random_hue[1])
# Geometric transformations
self.random_horizontal_flip = RandomFlip(dim='horizontal', prob=random_flip, labels_format=self.labels_format)
self.random_vertical_flip = RandomFlip(dim='vertical', prob=random_flip, labels_format=self.labels_format)
self.random_rotate = RandomRotate(angles=random_rotate[0], prob=random_rotate[1], labels_format=self.labels_format)
self.patch_coord_generator = PatchCoordinateGenerator(must_match='w_ar',
min_scale=min_scale,
max_scale=max_scale,
scale_uniformly=False,
min_aspect_ratio = min_aspect_ratio,
max_aspect_ratio = max_aspect_ratio)
self.random_patch = RandomPatch(patch_coord_generator=self.patch_coord_generator,
box_filter=self.box_filter_patch,
image_validator=self.image_validator,
n_trials_max=self.n_trials_max,
clip_boxes=self.clip_boxes,
prob=1.0,
can_fail=False,
labels_format=self.labels_format)
# Define the processing chain.
self.transformations = [self.convert_to_3_channels,
self.convert_to_float32,
self.random_brightness,
self.random_contrast,
self.convert_to_uint8,
self.convert_RGB_to_HSV,
self.convert_to_float32,
self.random_saturation,
self.random_hue,
self.convert_to_uint8,
self.convert_HSV_to_RGB,
self.random_horizontal_flip,
self.random_vertical_flip,
self.random_rotate,
self.random_patch,
self.resize]
def __call__(self, image, labels=None):
self.random_patch.labels_format = self.labels_format
self.random_horizontal_flip.labels_format = self.labels_format
self.random_vertical_flip.labels_format = self.labels_format
self.random_rotate.labels_format = self.labels_format
self.resize.labels_format = self.labels_format
if not (labels is None):
for transform in self.transformations:
image, labels = transform(image, labels)
return image, labels
else:
for transform in self.sequence1:
image = transform(image)
return image

View File

@@ -0,0 +1,152 @@
'''
A data augmentation pipeline suitable for variable-size images that produces effects
that are similar (but not identical) to those of the original SSD data augmentation
pipeline while being faster.
Copyright (C) 2018 Pierluigi Ferrari
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from __future__ import division
import numpy as np
from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation
from data_generator.object_detection_2d_geometric_ops import Resize, RandomFlip
from data_generator.object_detection_2d_patch_sampling_ops import PatchCoordinateGenerator, RandomPatch
from data_generator.object_detection_2d_image_boxes_validation_utils import BoxFilter, ImageValidator
class DataAugmentationVariableInputSize:
'''
A data augmentation pipeline suitable for variable-size images that produces effects
that are similar (but not identical!) to those of the original SSD data augmentation
pipeline while being faster.
Applies a chain of photometric and geometric image transformations. For documentation, please refer
to the documentation of the individual transformations involved.
'''
def __init__(self,
resize_height,
resize_width,
random_brightness=(-48, 48, 0.5),
random_contrast=(0.5, 1.8, 0.5),
random_saturation=(0.5, 1.8, 0.5),
random_hue=(18, 0.5),
random_flip=0.5,
min_scale=0.3,
max_scale=2.0,
min_aspect_ratio = 0.5,
max_aspect_ratio = 2.0,
n_trials_max=3,
clip_boxes=True,
overlap_criterion='area',
bounds_box_filter=(0.3, 1.0),
bounds_validator=(0.5, 1.0),
n_boxes_min=1,
background=(0,0,0),
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
self.n_trials_max = n_trials_max
self.clip_boxes = clip_boxes
self.overlap_criterion = overlap_criterion
self.bounds_box_filter = bounds_box_filter
self.bounds_validator = bounds_validator
self.n_boxes_min = n_boxes_min
self.background = background
self.labels_format = labels_format
# Determines which boxes are kept in an image after the transformations have been applied.
self.box_filter_patch = BoxFilter(check_overlap=True,
check_min_area=False,
check_degenerate=False,
overlap_criterion=self.overlap_criterion,
overlap_bounds=self.bounds_box_filter,
labels_format=self.labels_format)
self.box_filter_resize = BoxFilter(check_overlap=False,
check_min_area=True,
check_degenerate=True,
min_area=16,
labels_format=self.labels_format)
# Determines whether the result of the transformations is a valid training image.
self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion,
bounds=self.bounds_validator,
n_boxes_min=self.n_boxes_min,
labels_format=self.labels_format)
# Utility transformations
self.convert_to_3_channels = ConvertTo3Channels() # Make sure all images end up having 3 channels.
self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
self.convert_to_float32 = ConvertDataType(to='float32')
self.convert_to_uint8 = ConvertDataType(to='uint8')
self.resize = Resize(height=resize_height,
width=resize_width,
box_filter=self.box_filter_resize,
labels_format=self.labels_format)
# Photometric transformations
self.random_brightness = RandomBrightness(lower=random_brightness[0], upper=random_brightness[1], prob=random_brightness[2])
self.random_contrast = RandomContrast(lower=random_contrast[0], upper=random_contrast[1], prob=random_contrast[2])
self.random_saturation = RandomSaturation(lower=random_saturation[0], upper=random_saturation[1], prob=random_saturation[2])
self.random_hue = RandomHue(max_delta=random_hue[0], prob=random_hue[1])
# Geometric transformations
self.random_flip = RandomFlip(dim='horizontal', prob=random_flip, labels_format=self.labels_format)
self.patch_coord_generator = PatchCoordinateGenerator(must_match='w_ar',
min_scale=min_scale,
max_scale=max_scale,
scale_uniformly=False,
min_aspect_ratio = min_aspect_ratio,
max_aspect_ratio = max_aspect_ratio)
self.random_patch = RandomPatch(patch_coord_generator=self.patch_coord_generator,
box_filter=self.box_filter_patch,
image_validator=self.image_validator,
n_trials_max=self.n_trials_max,
clip_boxes=self.clip_boxes,
prob=1.0,
can_fail=False,
labels_format=self.labels_format)
# Define the processing chain
self.transformations = [self.convert_to_3_channels,
self.convert_to_float32,
self.random_brightness,
self.random_contrast,
self.convert_to_uint8,
self.convert_RGB_to_HSV,
self.convert_to_float32,
self.random_saturation,
self.random_hue,
self.convert_to_uint8,
self.convert_HSV_to_RGB,
self.random_patch,
self.random_flip,
self.resize]
def __call__(self, image, labels=None):
self.random_patch.labels_format = self.labels_format
self.random_flip.labels_format = self.labels_format
self.resize.labels_format = self.labels_format
if not (labels is None):
for transform in self.transformations:
image, labels = transform(image, labels)
return image, labels
else:
for transform in self.sequence1:
image = transform(image)
return image

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,779 @@
'''
Various geometric image transformations for 2D object detection, both deterministic
and probabilistic.
Copyright (C) 2018 Pierluigi Ferrari
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from __future__ import division
import numpy as np
import cv2
import random
from data_generator.object_detection_2d_image_boxes_validation_utils import BoxFilter, ImageValidator
class Resize:
'''
Resizes images to a specified height and width in pixels.
'''
def __init__(self,
height,
width,
interpolation_mode=cv2.INTER_LINEAR,
box_filter=None,
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
height (int): The desired height of the output images in pixels.
width (int): The desired width of the output images in pixels.
interpolation_mode (int, optional): An integer that denotes a valid
OpenCV interpolation mode. For example, integers 0 through 5 are
valid interpolation modes.
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
the validity of the bounding boxes is not checked.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
if not (isinstance(box_filter, BoxFilter) or box_filter is None):
raise ValueError("`box_filter` must be either `None` or a `BoxFilter` object.")
self.out_height = height
self.out_width = width
self.interpolation_mode = interpolation_mode
self.box_filter = box_filter
self.labels_format = labels_format
def __call__(self, image, labels=None, return_inverter=False):
img_height, img_width = image.shape[:2]
xmin = self.labels_format['xmin']
ymin = self.labels_format['ymin']
xmax = self.labels_format['xmax']
ymax = self.labels_format['ymax']
image = cv2.resize(image,
dsize=(self.out_width, self.out_height),
interpolation=self.interpolation_mode)
if return_inverter:
def inverter(labels):
labels = np.copy(labels)
labels[:, [ymin+1, ymax+1]] = np.round(labels[:, [ymin+1, ymax+1]] * (img_height / self.out_height), decimals=0)
labels[:, [xmin+1, xmax+1]] = np.round(labels[:, [xmin+1, xmax+1]] * (img_width / self.out_width), decimals=0)
return labels
if labels is None:
if return_inverter:
return image, inverter
else:
return image
else:
labels = np.copy(labels)
labels[:, [ymin, ymax]] = np.round(labels[:, [ymin, ymax]] * (self.out_height / img_height), decimals=0)
labels[:, [xmin, xmax]] = np.round(labels[:, [xmin, xmax]] * (self.out_width / img_width), decimals=0)
if not (self.box_filter is None):
self.box_filter.labels_format = self.labels_format
labels = self.box_filter(labels=labels,
image_height=self.out_height,
image_width=self.out_width)
if return_inverter:
return image, labels, inverter
else:
return image, labels
class ResizeRandomInterp:
'''
Resizes images to a specified height and width in pixels using a radnomly
selected interpolation mode.
'''
def __init__(self,
height,
width,
interpolation_modes=[cv2.INTER_NEAREST,
cv2.INTER_LINEAR,
cv2.INTER_CUBIC,
cv2.INTER_AREA,
cv2.INTER_LANCZOS4],
box_filter=None,
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
height (int): The desired height of the output image in pixels.
width (int): The desired width of the output image in pixels.
interpolation_modes (list/tuple, optional): A list/tuple of integers
that represent valid OpenCV interpolation modes. For example,
integers 0 through 5 are valid interpolation modes.
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
the validity of the bounding boxes is not checked.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
if not (isinstance(interpolation_modes, (list, tuple))):
raise ValueError("`interpolation_mode` must be a list or tuple.")
self.height = height
self.width = width
self.interpolation_modes = interpolation_modes
self.box_filter = box_filter
self.labels_format = labels_format
self.resize = Resize(height=self.height,
width=self.width,
box_filter=self.box_filter,
labels_format=self.labels_format)
def __call__(self, image, labels=None, return_inverter=False):
self.resize.interpolation_mode = np.random.choice(self.interpolation_modes)
self.resize.labels_format = self.labels_format
return self.resize(image, labels, return_inverter)
class Flip:
'''
Flips images horizontally or vertically.
'''
def __init__(self,
dim='horizontal',
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
dim (str, optional): Can be either of 'horizontal' and 'vertical'.
If 'horizontal', images will be flipped horizontally, i.e. along
the vertical axis. If 'horizontal', images will be flipped vertically,
i.e. along the horizontal axis.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
if not (dim in {'horizontal', 'vertical'}): raise ValueError("`dim` can be one of 'horizontal' and 'vertical'.")
self.dim = dim
self.labels_format = labels_format
def __call__(self, image, labels=None, return_inverter=False):
img_height, img_width = image.shape[:2]
xmin = self.labels_format['xmin']
ymin = self.labels_format['ymin']
xmax = self.labels_format['xmax']
ymax = self.labels_format['ymax']
if self.dim == 'horizontal':
image = image[:,::-1]
if labels is None:
return image
else:
labels = np.copy(labels)
labels[:, [xmin, xmax]] = img_width - labels[:, [xmax, xmin]]
return image, labels
else:
image = image[::-1]
if labels is None:
return image
else:
labels = np.copy(labels)
labels[:, [ymin, ymax]] = img_height - labels[:, [ymax, ymin]]
return image, labels
class RandomFlip:
'''
Randomly flips images horizontally or vertically. The randomness only refers
to whether or not the image will be flipped.
'''
def __init__(self,
dim='horizontal',
prob=0.5,
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
dim (str, optional): Can be either of 'horizontal' and 'vertical'.
If 'horizontal', images will be flipped horizontally, i.e. along
the vertical axis. If 'horizontal', images will be flipped vertically,
i.e. along the horizontal axis.
prob (float, optional): `(1 - prob)` determines the probability with which the original,
unaltered image is returned.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
self.dim = dim
self.prob = prob
self.labels_format = labels_format
self.flip = Flip(dim=self.dim, labels_format=self.labels_format)
def __call__(self, image, labels=None):
p = np.random.uniform(0,1)
if p >= (1.0-self.prob):
self.flip.labels_format = self.labels_format
return self.flip(image, labels)
elif labels is None:
return image
else:
return image, labels
class Translate:
'''
Translates images horizontally and/or vertically.
'''
def __init__(self,
dy,
dx,
clip_boxes=True,
box_filter=None,
background=(0,0,0),
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
dy (float): The fraction of the image height by which to translate images along the
vertical axis. Positive values translate images downwards, negative values
translate images upwards.
dx (float): The fraction of the image width by which to translate images along the
horizontal axis. Positive values translate images to the right, negative values
translate images to the left.
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
image after the translation.
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
the validity of the bounding boxes is not checked.
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the
background pixels of the translated images.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
if not (isinstance(box_filter, BoxFilter) or box_filter is None):
raise ValueError("`box_filter` must be either `None` or a `BoxFilter` object.")
self.dy_rel = dy
self.dx_rel = dx
self.clip_boxes = clip_boxes
self.box_filter = box_filter
self.background = background
self.labels_format = labels_format
def __call__(self, image, labels=None):
img_height, img_width = image.shape[:2]
# Compute the translation matrix.
dy_abs = int(round(img_height * self.dy_rel))
dx_abs = int(round(img_width * self.dx_rel))
M = np.float32([[1, 0, dx_abs],
[0, 1, dy_abs]])
# Translate the image.
image = cv2.warpAffine(image,
M=M,
dsize=(img_width, img_height),
borderMode=cv2.BORDER_CONSTANT,
borderValue=self.background)
if labels is None:
return image
else:
xmin = self.labels_format['xmin']
ymin = self.labels_format['ymin']
xmax = self.labels_format['xmax']
ymax = self.labels_format['ymax']
labels = np.copy(labels)
# Translate the box coordinates to the translated image's coordinate system.
labels[:,[xmin,xmax]] += dx_abs
labels[:,[ymin,ymax]] += dy_abs
# Compute all valid boxes for this patch.
if not (self.box_filter is None):
self.box_filter.labels_format = self.labels_format
labels = self.box_filter(labels=labels,
image_height=img_height,
image_width=img_width)
if self.clip_boxes:
labels[:,[ymin,ymax]] = np.clip(labels[:,[ymin,ymax]], a_min=0, a_max=img_height-1)
labels[:,[xmin,xmax]] = np.clip(labels[:,[xmin,xmax]], a_min=0, a_max=img_width-1)
return image, labels
class RandomTranslate:
'''
Randomly translates images horizontally and/or vertically.
'''
def __init__(self,
dy_minmax=(0.03,0.3),
dx_minmax=(0.03,0.3),
prob=0.5,
clip_boxes=True,
box_filter=None,
image_validator=None,
n_trials_max=3,
background=(0,0,0),
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
dy_minmax (list/tuple, optional): A 2-tuple `(min, max)` of non-negative floats that
determines the minimum and maximum relative translation of images along the vertical
axis both upward and downward. That is, images will be randomly translated by at least
`min` and at most `max` either upward or downward. For example, if `dy_minmax == (0.05,0.3)`,
an image of size `(100,100)` will be translated by at least 5 and at most 30 pixels
either upward or downward. The translation direction is chosen randomly.
dx_minmax (list/tuple, optional): A 2-tuple `(min, max)` of non-negative floats that
determines the minimum and maximum relative translation of images along the horizontal
axis both to the left and right. That is, images will be randomly translated by at least
`min` and at most `max` either left or right. For example, if `dx_minmax == (0.05,0.3)`,
an image of size `(100,100)` will be translated by at least 5 and at most 30 pixels
either left or right. The translation direction is chosen randomly.
prob (float, optional): `(1 - prob)` determines the probability with which the original,
unaltered image is returned.
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
image after the translation.
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
the validity of the bounding boxes is not checked.
image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given.
An `ImageValidator` object to determine whether a translated image is valid. If `None`,
any outcome is valid.
n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given.
Determines the maxmial number of trials to produce a valid image. If no valid image could
be produced in `n_trials_max` trials, returns the unaltered input image.
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the
background pixels of the translated images.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
if dy_minmax[0] > dy_minmax[1]:
raise ValueError("It must be `dy_minmax[0] <= dy_minmax[1]`.")
if dx_minmax[0] > dx_minmax[1]:
raise ValueError("It must be `dx_minmax[0] <= dx_minmax[1]`.")
if dy_minmax[0] < 0 or dx_minmax[0] < 0:
raise ValueError("It must be `dy_minmax[0] >= 0` and `dx_minmax[0] >= 0`.")
if not (isinstance(image_validator, ImageValidator) or image_validator is None):
raise ValueError("`image_validator` must be either `None` or an `ImageValidator` object.")
self.dy_minmax = dy_minmax
self.dx_minmax = dx_minmax
self.prob = prob
self.clip_boxes = clip_boxes
self.box_filter = box_filter
self.image_validator = image_validator
self.n_trials_max = n_trials_max
self.background = background
self.labels_format = labels_format
self.translate = Translate(dy=0,
dx=0,
clip_boxes=self.clip_boxes,
box_filter=self.box_filter,
background=self.background,
labels_format=self.labels_format)
def __call__(self, image, labels=None):
p = np.random.uniform(0,1)
if p >= (1.0-self.prob):
img_height, img_width = image.shape[:2]
xmin = self.labels_format['xmin']
ymin = self.labels_format['ymin']
xmax = self.labels_format['xmax']
ymax = self.labels_format['ymax']
# Override the preset labels format.
if not self.image_validator is None:
self.image_validator.labels_format = self.labels_format
self.translate.labels_format = self.labels_format
for _ in range(max(1, self.n_trials_max)):
# Pick the relative amount by which to translate.
dy_abs = np.random.uniform(self.dy_minmax[0], self.dy_minmax[1])
dx_abs = np.random.uniform(self.dx_minmax[0], self.dx_minmax[1])
# Pick the direction in which to translate.
dy = np.random.choice([-dy_abs, dy_abs])
dx = np.random.choice([-dx_abs, dx_abs])
self.translate.dy_rel = dy
self.translate.dx_rel = dx
if (labels is None) or (self.image_validator is None):
# We either don't have any boxes or if we do, we will accept any outcome as valid.
return self.translate(image, labels)
else:
# Translate the box coordinates to the translated image's coordinate system.
new_labels = np.copy(labels)
new_labels[:, [ymin, ymax]] += int(round(img_height * dy))
new_labels[:, [xmin, xmax]] += int(round(img_width * dx))
# Check if the patch is valid.
if self.image_validator(labels=new_labels,
image_height=img_height,
image_width=img_width):
return self.translate(image, labels)
# If all attempts failed, return the unaltered input image.
if labels is None:
return image
else:
return image, labels
elif labels is None:
return image
else:
return image, labels
class Scale:
'''
Scales images, i.e. zooms in or out.
'''
def __init__(self,
factor,
clip_boxes=True,
box_filter=None,
background=(0,0,0),
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
factor (float): The fraction of the image size by which to scale images. Must be positive.
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
image after the translation.
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
the validity of the bounding boxes is not checked.
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential
background pixels of the scaled images.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
if factor <= 0:
raise ValueError("It must be `factor > 0`.")
if not (isinstance(box_filter, BoxFilter) or box_filter is None):
raise ValueError("`box_filter` must be either `None` or a `BoxFilter` object.")
self.factor = factor
self.clip_boxes = clip_boxes
self.box_filter = box_filter
self.background = background
self.labels_format = labels_format
def __call__(self, image, labels=None):
img_height, img_width = image.shape[:2]
# Compute the rotation matrix.
M = cv2.getRotationMatrix2D(center=(img_width / 2, img_height / 2),
angle=0,
scale=self.factor)
# Scale the image.
image = cv2.warpAffine(image,
M=M,
dsize=(img_width, img_height),
borderMode=cv2.BORDER_CONSTANT,
borderValue=self.background)
if labels is None:
return image
else:
xmin = self.labels_format['xmin']
ymin = self.labels_format['ymin']
xmax = self.labels_format['xmax']
ymax = self.labels_format['ymax']
labels = np.copy(labels)
# Scale the bounding boxes accordingly.
# Transform two opposite corner points of the rectangular boxes using the rotation matrix `M`.
toplefts = np.array([labels[:,xmin], labels[:,ymin], np.ones(labels.shape[0])])
bottomrights = np.array([labels[:,xmax], labels[:,ymax], np.ones(labels.shape[0])])
new_toplefts = (np.dot(M, toplefts)).T
new_bottomrights = (np.dot(M, bottomrights)).T
labels[:,[xmin,ymin]] = np.round(new_toplefts, decimals=0).astype(np.int)
labels[:,[xmax,ymax]] = np.round(new_bottomrights, decimals=0).astype(np.int)
# Compute all valid boxes for this patch.
if not (self.box_filter is None):
self.box_filter.labels_format = self.labels_format
labels = self.box_filter(labels=labels,
image_height=img_height,
image_width=img_width)
if self.clip_boxes:
labels[:,[ymin,ymax]] = np.clip(labels[:,[ymin,ymax]], a_min=0, a_max=img_height-1)
labels[:,[xmin,xmax]] = np.clip(labels[:,[xmin,xmax]], a_min=0, a_max=img_width-1)
return image, labels
class RandomScale:
'''
Randomly scales images.
'''
def __init__(self,
min_factor=0.5,
max_factor=1.5,
prob=0.5,
clip_boxes=True,
box_filter=None,
image_validator=None,
n_trials_max=3,
background=(0,0,0),
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
min_factor (float, optional): The minimum fraction of the image size by which to scale images.
Must be positive.
max_factor (float, optional): The maximum fraction of the image size by which to scale images.
Must be positive.
prob (float, optional): `(1 - prob)` determines the probability with which the original,
unaltered image is returned.
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
image after the translation.
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
the validity of the bounding boxes is not checked.
image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given.
An `ImageValidator` object to determine whether a scaled image is valid. If `None`,
any outcome is valid.
n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given.
Determines the maxmial number of trials to produce a valid image. If no valid image could
be produced in `n_trials_max` trials, returns the unaltered input image.
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential
background pixels of the scaled images.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
if not (0 < min_factor <= max_factor):
raise ValueError("It must be `0 < min_factor <= max_factor`.")
if not (isinstance(image_validator, ImageValidator) or image_validator is None):
raise ValueError("`image_validator` must be either `None` or an `ImageValidator` object.")
self.min_factor = min_factor
self.max_factor = max_factor
self.prob = prob
self.clip_boxes = clip_boxes
self.box_filter = box_filter
self.image_validator = image_validator
self.n_trials_max = n_trials_max
self.background = background
self.labels_format = labels_format
self.scale = Scale(factor=1.0,
clip_boxes=self.clip_boxes,
box_filter=self.box_filter,
background=self.background,
labels_format=self.labels_format)
def __call__(self, image, labels=None):
p = np.random.uniform(0,1)
if p >= (1.0-self.prob):
img_height, img_width = image.shape[:2]
xmin = self.labels_format['xmin']
ymin = self.labels_format['ymin']
xmax = self.labels_format['xmax']
ymax = self.labels_format['ymax']
# Override the preset labels format.
if not self.image_validator is None:
self.image_validator.labels_format = self.labels_format
self.scale.labels_format = self.labels_format
for _ in range(max(1, self.n_trials_max)):
# Pick a scaling factor.
factor = np.random.uniform(self.min_factor, self.max_factor)
self.scale.factor = factor
if (labels is None) or (self.image_validator is None):
# We either don't have any boxes or if we do, we will accept any outcome as valid.
return self.scale(image, labels)
else:
# Scale the bounding boxes accordingly.
# Transform two opposite corner points of the rectangular boxes using the rotation matrix `M`.
toplefts = np.array([labels[:,xmin], labels[:,ymin], np.ones(labels.shape[0])])
bottomrights = np.array([labels[:,xmax], labels[:,ymax], np.ones(labels.shape[0])])
# Compute the rotation matrix.
M = cv2.getRotationMatrix2D(center=(img_width / 2, img_height / 2),
angle=0,
scale=factor)
new_toplefts = (np.dot(M, toplefts)).T
new_bottomrights = (np.dot(M, bottomrights)).T
new_labels = np.copy(labels)
new_labels[:,[xmin,ymin]] = np.around(new_toplefts, decimals=0).astype(np.int)
new_labels[:,[xmax,ymax]] = np.around(new_bottomrights, decimals=0).astype(np.int)
# Check if the patch is valid.
if self.image_validator(labels=new_labels,
image_height=img_height,
image_width=img_width):
return self.scale(image, labels)
# If all attempts failed, return the unaltered input image.
if labels is None:
return image
else:
return image, labels
elif labels is None:
return image
else:
return image, labels
class Rotate:
'''
Rotates images counter-clockwise by 90, 180, or 270 degrees.
'''
def __init__(self,
angle,
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
angle (int): The angle in degrees by which to rotate the images counter-clockwise.
Only 90, 180, and 270 are valid values.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
if not angle in {90, 180, 270}:
raise ValueError("`angle` must be in the set {90, 180, 270}.")
self.angle = angle
self.labels_format = labels_format
def __call__(self, image, labels=None):
img_height, img_width = image.shape[:2]
# Compute the rotation matrix.
M = cv2.getRotationMatrix2D(center=(img_width / 2, img_height / 2),
angle=self.angle,
scale=1)
# Get the sine and cosine from the rotation matrix.
cos_angle = np.abs(M[0, 0])
sin_angle = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image.
img_width_new = int(img_height * sin_angle + img_width * cos_angle)
img_height_new = int(img_height * cos_angle + img_width * sin_angle)
# Adjust the rotation matrix to take into account the translation.
M[1, 2] += (img_height_new - img_height) / 2
M[0, 2] += (img_width_new - img_width) / 2
# Rotate the image.
image = cv2.warpAffine(image,
M=M,
dsize=(img_width_new, img_height_new))
if labels is None:
return image
else:
xmin = self.labels_format['xmin']
ymin = self.labels_format['ymin']
xmax = self.labels_format['xmax']
ymax = self.labels_format['ymax']
labels = np.copy(labels)
# Rotate the bounding boxes accordingly.
# Transform two opposite corner points of the rectangular boxes using the rotation matrix `M`.
toplefts = np.array([labels[:,xmin], labels[:,ymin], np.ones(labels.shape[0])])
bottomrights = np.array([labels[:,xmax], labels[:,ymax], np.ones(labels.shape[0])])
new_toplefts = (np.dot(M, toplefts)).T
new_bottomrights = (np.dot(M, bottomrights)).T
labels[:,[xmin,ymin]] = np.round(new_toplefts, decimals=0).astype(np.int)
labels[:,[xmax,ymax]] = np.round(new_bottomrights, decimals=0).astype(np.int)
if self.angle == 90:
# ymin and ymax were switched by the rotation.
labels[:,[ymax,ymin]] = labels[:,[ymin,ymax]]
elif self.angle == 180:
# ymin and ymax were switched by the rotation,
# and also xmin and xmax were switched.
labels[:,[ymax,ymin]] = labels[:,[ymin,ymax]]
labels[:,[xmax,xmin]] = labels[:,[xmin,xmax]]
elif self.angle == 270:
# xmin and xmax were switched by the rotation.
labels[:,[xmax,xmin]] = labels[:,[xmin,xmax]]
return image, labels
class RandomRotate:
'''
Randomly rotates images counter-clockwise.
'''
def __init__(self,
angles=[90, 180, 270],
prob=0.5,
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
angle (list): The list of angles in degrees from which one is randomly selected to rotate
the images counter-clockwise. Only 90, 180, and 270 are valid values.
prob (float, optional): `(1 - prob)` determines the probability with which the original,
unaltered image is returned.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
for angle in angles:
if not angle in {90, 180, 270}:
raise ValueError("`angles` can only contain the values 90, 180, and 270.")
self.angles = angles
self.prob = prob
self.labels_format = labels_format
self.rotate = Rotate(angle=90, labels_format=self.labels_format)
def __call__(self, image, labels=None):
p = np.random.uniform(0,1)
if p >= (1.0-self.prob):
# Pick a rotation angle.
self.rotate.angle = random.choice(self.angles)
self.rotate.labels_format = self.labels_format
return self.rotate(image, labels)
elif labels is None:
return image
else:
return image, labels

View File

@@ -0,0 +1,322 @@
'''
Utilities for 2D object detection related to answering the following questions:
1. Given an image size and bounding boxes, which bounding boxes meet certain
requirements with respect to the image size?
2. Given an image size and bounding boxes, is an image of that size valid with
respect to the bounding boxes according to certain requirements?
Copyright (C) 2018 Pierluigi Ferrari
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from __future__ import division
import numpy as np
from bounding_box_utils.bounding_box_utils import iou
class BoundGenerator:
'''
Generates pairs of floating point values that represent lower and upper bounds
from a given sample space.
'''
def __init__(self,
sample_space=((0.1, None),
(0.3, None),
(0.5, None),
(0.7, None),
(0.9, None),
(None, None)),
weights=None):
'''
Arguments:
sample_space (list or tuple): A list, tuple, or array-like object of shape
`(n, 2)` that contains `n` samples to choose from, where each sample
is a 2-tuple of scalars and/or `None` values.
weights (list or tuple, optional): A list or tuple representing the distribution
over the sample space. If `None`, a uniform distribution will be assumed.
'''
if (not (weights is None)) and len(weights) != len(sample_space):
raise ValueError("`weights` must either be `None` for uniform distribution or have the same length as `sample_space`.")
self.sample_space = []
for bound_pair in sample_space:
if len(bound_pair) != 2:
raise ValueError("All elements of the sample space must be 2-tuples.")
bound_pair = list(bound_pair)
if bound_pair[0] is None: bound_pair[0] = 0.0
if bound_pair[1] is None: bound_pair[1] = 1.0
if bound_pair[0] > bound_pair[1]:
raise ValueError("For all sample space elements, the lower bound cannot be greater than the upper bound.")
self.sample_space.append(bound_pair)
self.sample_space_size = len(self.sample_space)
if weights is None:
self.weights = [1.0/self.sample_space_size] * self.sample_space_size
else:
self.weights = weights
def __call__(self):
'''
Returns:
An item of the sample space, i.e. a 2-tuple of scalars.
'''
i = np.random.choice(self.sample_space_size, p=self.weights)
return self.sample_space[i]
class BoxFilter:
'''
Returns all bounding boxes that are valid with respect to a the defined criteria.
'''
def __init__(self,
check_overlap=True,
check_min_area=True,
check_degenerate=True,
overlap_criterion='center_point',
overlap_bounds=(0.3, 1.0),
min_area=16,
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4},
border_pixels='half'):
'''
Arguments:
check_overlap (bool, optional): Whether or not to enforce the overlap requirements defined by
`overlap_criterion` and `overlap_bounds`. Sometimes you might want to use the box filter only
to enforce a certain minimum area for all boxes (see next argument), in such cases you can
turn the overlap requirements off.
check_min_area (bool, optional): Whether or not to enforce the minimum area requirement defined
by `min_area`. If `True`, any boxes that have an area (in pixels) that is smaller than `min_area`
will be removed from the labels of an image. Bounding boxes below a certain area aren't useful
training examples. An object that takes up only, say, 5 pixels in an image is probably not
recognizable anymore, neither for a human, nor for an object detection model. It makes sense
to remove such boxes.
check_degenerate (bool, optional): Whether or not to check for and remove degenerate bounding boxes.
Degenerate bounding boxes are boxes that have `xmax <= xmin` and/or `ymax <= ymin`. In particular,
boxes with a width and/or height of zero are degenerate. It is obviously important to filter out
such boxes, so you should only set this option to `False` if you are certain that degenerate
boxes are not possible in your data and processing chain.
overlap_criterion (str, optional): Can be either of 'center_point', 'iou', or 'area'. Determines
which boxes are considered valid with respect to a given image. If set to 'center_point',
a given bounding box is considered valid if its center point lies within the image.
If set to 'area', a given bounding box is considered valid if the quotient of its intersection
area with the image and its own area is within the given `overlap_bounds`. If set to 'iou', a given
bounding box is considered valid if its IoU with the image is within the given `overlap_bounds`.
overlap_bounds (list or BoundGenerator, optional): Only relevant if `overlap_criterion` is 'area' or 'iou'.
Determines the lower and upper bounds for `overlap_criterion`. Can be either a 2-tuple of scalars
representing a lower bound and an upper bound, or a `BoundGenerator` object, which provides
the possibility to generate bounds randomly.
min_area (int, optional): Only relevant if `check_min_area` is `True`. Defines the minimum area in
pixels that a bounding box must have in order to be valid. Boxes with an area smaller than this
will be removed.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
border_pixels (str, optional): How to treat the border pixels of the bounding boxes.
Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong
to the boxes. If 'exclude', the border pixels do not belong to the boxes.
If 'half', then one of each of the two horizontal and vertical borders belong
to the boxex, but not the other.
'''
if not isinstance(overlap_bounds, (list, tuple, BoundGenerator)):
raise ValueError("`overlap_bounds` must be either a 2-tuple of scalars or a `BoundGenerator` object.")
if isinstance(overlap_bounds, (list, tuple)) and (overlap_bounds[0] > overlap_bounds[1]):
raise ValueError("The lower bound must not be greater than the upper bound.")
if not (overlap_criterion in {'iou', 'area', 'center_point'}):
raise ValueError("`overlap_criterion` must be one of 'iou', 'area', or 'center_point'.")
self.overlap_criterion = overlap_criterion
self.overlap_bounds = overlap_bounds
self.min_area = min_area
self.check_overlap = check_overlap
self.check_min_area = check_min_area
self.check_degenerate = check_degenerate
self.labels_format = labels_format
self.border_pixels = border_pixels
def __call__(self,
labels,
image_height=None,
image_width=None):
'''
Arguments:
labels (array): The labels to be filtered. This is an array with shape `(m,n)`, where
`m` is the number of bounding boxes and `n` is the number of elements that defines
each bounding box (box coordinates, class ID, etc.). The box coordinates are expected
to be in the image's coordinate system.
image_height (int): Only relevant if `check_overlap == True`. The height of the image
(in pixels) to compare the box coordinates to.
image_width (int): `check_overlap == True`. The width of the image (in pixels) to compare
the box coordinates to.
Returns:
An array containing the labels of all boxes that are valid.
'''
labels = np.copy(labels)
xmin = self.labels_format['xmin']
ymin = self.labels_format['ymin']
xmax = self.labels_format['xmax']
ymax = self.labels_format['ymax']
# Record the boxes that pass all checks here.
requirements_met = np.ones(shape=labels.shape[0], dtype=np.bool)
if self.check_degenerate:
non_degenerate = (labels[:,xmax] > labels[:,xmin]) * (labels[:,ymax] > labels[:,ymin])
requirements_met *= non_degenerate
if self.check_min_area:
min_area_met = (labels[:,xmax] - labels[:,xmin]) * (labels[:,ymax] - labels[:,ymin]) >= self.min_area
requirements_met *= min_area_met
if self.check_overlap:
# Get the lower and upper bounds.
if isinstance(self.overlap_bounds, BoundGenerator):
lower, upper = self.overlap_bounds()
else:
lower, upper = self.overlap_bounds
# Compute which boxes are valid.
if self.overlap_criterion == 'iou':
# Compute the patch coordinates.
image_coords = np.array([0, 0, image_width, image_height])
# Compute the IoU between the patch and all of the ground truth boxes.
image_boxes_iou = iou(image_coords, labels[:, [xmin, ymin, xmax, ymax]], coords='corners', mode='element-wise', border_pixels=self.border_pixels)
requirements_met *= (image_boxes_iou > lower) * (image_boxes_iou <= upper)
elif self.overlap_criterion == 'area':
if self.border_pixels == 'half':
d = 0
elif self.border_pixels == 'include':
d = 1 # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`.
elif self.border_pixels == 'exclude':
d = -1 # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`.
# Compute the areas of the boxes.
box_areas = (labels[:,xmax] - labels[:,xmin] + d) * (labels[:,ymax] - labels[:,ymin] + d)
# Compute the intersection area between the patch and all of the ground truth boxes.
clipped_boxes = np.copy(labels)
clipped_boxes[:,[ymin,ymax]] = np.clip(labels[:,[ymin,ymax]], a_min=0, a_max=image_height-1)
clipped_boxes[:,[xmin,xmax]] = np.clip(labels[:,[xmin,xmax]], a_min=0, a_max=image_width-1)
intersection_areas = (clipped_boxes[:,xmax] - clipped_boxes[:,xmin] + d) * (clipped_boxes[:,ymax] - clipped_boxes[:,ymin] + d) # +1 because the border pixels belong to the box areas.
# Check which boxes meet the overlap requirements.
if lower == 0.0:
mask_lower = intersection_areas > lower * box_areas # If `self.lower == 0`, we want to make sure that boxes with area 0 don't count, hence the ">" sign instead of the ">=" sign.
else:
mask_lower = intersection_areas >= lower * box_areas # Especially for the case `self.lower == 1` we want the ">=" sign, otherwise no boxes would count at all.
mask_upper = intersection_areas <= upper * box_areas
requirements_met *= mask_lower * mask_upper
elif self.overlap_criterion == 'center_point':
# Compute the center points of the boxes.
cy = (labels[:,ymin] + labels[:,ymax]) / 2
cx = (labels[:,xmin] + labels[:,xmax]) / 2
# Check which of the boxes have center points within the cropped patch remove those that don't.
requirements_met *= (cy >= 0.0) * (cy <= image_height-1) * (cx >= 0.0) * (cx <= image_width-1)
return labels[requirements_met]
class ImageValidator:
'''
Returns `True` if a given minimum number of bounding boxes meets given overlap
requirements with an image of a given height and width.
'''
def __init__(self,
overlap_criterion='center_point',
bounds=(0.3, 1.0),
n_boxes_min=1,
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4},
border_pixels='half'):
'''
Arguments:
overlap_criterion (str, optional): Can be either of 'center_point', 'iou', or 'area'. Determines
which boxes are considered valid with respect to a given image. If set to 'center_point',
a given bounding box is considered valid if its center point lies within the image.
If set to 'area', a given bounding box is considered valid if the quotient of its intersection
area with the image and its own area is within `lower` and `upper`. If set to 'iou', a given
bounding box is considered valid if its IoU with the image is within `lower` and `upper`.
bounds (list or BoundGenerator, optional): Only relevant if `overlap_criterion` is 'area' or 'iou'.
Determines the lower and upper bounds for `overlap_criterion`. Can be either a 2-tuple of scalars
representing a lower bound and an upper bound, or a `BoundGenerator` object, which provides
the possibility to generate bounds randomly.
n_boxes_min (int or str, optional): Either a non-negative integer or the string 'all'.
Determines the minimum number of boxes that must meet the `overlap_criterion` with respect to
an image of the given height and width in order for the image to be a valid image.
If set to 'all', an image is considered valid if all given boxes meet the `overlap_criterion`.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
border_pixels (str, optional): How to treat the border pixels of the bounding boxes.
Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong
to the boxes. If 'exclude', the border pixels do not belong to the boxes.
If 'half', then one of each of the two horizontal and vertical borders belong
to the boxex, but not the other.
'''
if not ((isinstance(n_boxes_min, int) and n_boxes_min > 0) or n_boxes_min == 'all'):
raise ValueError("`n_boxes_min` must be a positive integer or 'all'.")
self.overlap_criterion = overlap_criterion
self.bounds = bounds
self.n_boxes_min = n_boxes_min
self.labels_format = labels_format
self.border_pixels = border_pixels
self.box_filter = BoxFilter(check_overlap=True,
check_min_area=False,
check_degenerate=False,
overlap_criterion=self.overlap_criterion,
overlap_bounds=self.bounds,
labels_format=self.labels_format,
border_pixels=self.border_pixels)
def __call__(self,
labels,
image_height,
image_width):
'''
Arguments:
labels (array): The labels to be tested. The box coordinates are expected
to be in the image's coordinate system.
image_height (int): The height of the image to compare the box coordinates to.
image_width (int): The width of the image to compare the box coordinates to.
Returns:
A boolean indicating whether an imgae of the given height and width is
valid with respect to the given bounding boxes.
'''
self.box_filter.overlap_bounds = self.bounds
self.box_filter.labels_format = self.labels_format
# Get all boxes that meet the overlap requirements.
valid_labels = self.box_filter(labels=labels,
image_height=image_height,
image_width=image_width)
# Check whether enough boxes meet the requirements.
if isinstance(self.n_boxes_min, int):
# The image is valid if at least `self.n_boxes_min` ground truth boxes meet the requirements.
if len(valid_labels) >= self.n_boxes_min:
return True
else:
return False
elif self.n_boxes_min == 'all':
# The image is valid if all ground truth boxes meet the requirements.
if len(valid_labels) == len(labels):
return True
else:
return False

View File

@@ -0,0 +1,73 @@
'''
Miscellaneous data generator utilities.
Copyright (C) 2018 Pierluigi Ferrari
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from __future__ import division
import numpy as np
def apply_inverse_transforms(y_pred_decoded, inverse_transforms):
'''
Takes a list or Numpy array of decoded predictions and applies a given list of
transforms to them. The list of inverse transforms would usually contain the
inverter functions that some of the image transformations that come with this
data generator return. This function would normally be used to transform predictions
that were made on a transformed image back to the original image.
Arguments:
y_pred_decoded (list or array): Either a list of length `batch_size` that
contains Numpy arrays that contain the predictions for each batch item
or a Numpy array. If this is a list of Numpy arrays, the arrays would
usually have the shape `(num_predictions, 6)`, where `num_predictions`
is different for each batch item. If this is a Numpy array, it would
usually have the shape `(batch_size, num_predictions, 6)`. The last axis
would usually contain the class ID, confidence score, and four bounding
box coordinates for each prediction.
inverse_predictions (list): A nested list of length `batch_size` that contains
for each batch item a list of functions that take one argument (one element
of `y_pred_decoded` if it is a list or one slice along the first axis of
`y_pred_decoded` if it is an array) and return an output of the same shape
and data type.
Returns:
The transformed predictions, which have the same structure as `y_pred_decoded`.
'''
if isinstance(y_pred_decoded, list):
y_pred_decoded_inv = []
for i in range(len(y_pred_decoded)):
y_pred_decoded_inv.append(np.copy(y_pred_decoded[i]))
if y_pred_decoded_inv[i].size > 0: # If there are any predictions for this batch item.
for inverter in inverse_transforms[i]:
if not (inverter is None):
y_pred_decoded_inv[i] = inverter(y_pred_decoded_inv[i])
elif isinstance(y_pred_decoded, np.ndarray):
y_pred_decoded_inv = np.copy(y_pred_decoded)
for i in range(len(y_pred_decoded)):
if y_pred_decoded_inv[i].size > 0: # If there are any predictions for this batch item.
for inverter in inverse_transforms[i]:
if not (inverter is None):
y_pred_decoded_inv[i] = inverter(y_pred_decoded_inv[i])
else:
raise ValueError("`y_pred_decoded` must be either a list or a Numpy array.")
return y_pred_decoded_inv

View File

@@ -0,0 +1,881 @@
'''
Various patch sampling operations for data augmentation in 2D object detection.
Copyright (C) 2018 Pierluigi Ferrari
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from __future__ import division
import numpy as np
from data_generator.object_detection_2d_image_boxes_validation_utils import BoundGenerator, BoxFilter, ImageValidator
class PatchCoordinateGenerator:
'''
Generates random patch coordinates that meet specified requirements.
'''
def __init__(self,
img_height=None,
img_width=None,
must_match='h_w',
min_scale=0.3,
max_scale=1.0,
scale_uniformly=False,
min_aspect_ratio = 0.5,
max_aspect_ratio = 2.0,
patch_ymin=None,
patch_xmin=None,
patch_height=None,
patch_width=None,
patch_aspect_ratio=None):
'''
Arguments:
img_height (int): The height of the image for which the patch coordinates
shall be generated. Doesn't have to be known upon construction.
img_width (int): The width of the image for which the patch coordinates
shall be generated. Doesn't have to be known upon construction.
must_match (str, optional): Can be either of 'h_w', 'h_ar', and 'w_ar'.
Specifies which two of the three quantities height, width, and aspect
ratio determine the shape of the generated patch. The respective third
quantity will be computed from the other two. For example,
if `must_match == 'h_w'`, then the patch's height and width will be
set to lie within [min_scale, max_scale] of the image size or to
`patch_height` and/or `patch_width`, if given. The patch's aspect ratio
is the dependent variable in this case, it will be computed from the
height and width. Any given values for `patch_aspect_ratio`,
`min_aspect_ratio`, or `max_aspect_ratio` will be ignored.
min_scale (float, optional): The minimum size of a dimension of the patch
as a fraction of the respective dimension of the image. Can be greater
than 1. For example, if the image width is 200 and `min_scale == 0.5`,
then the width of the generated patch will be at least 100. If `min_scale == 1.5`,
the width of the generated patch will be at least 300.
max_scale (float, optional): The maximum size of a dimension of the patch
as a fraction of the respective dimension of the image. Can be greater
than 1. For example, if the image width is 200 and `max_scale == 1.0`,
then the width of the generated patch will be at most 200. If `max_scale == 1.5`,
the width of the generated patch will be at most 300. Must be greater than
`min_scale`.
scale_uniformly (bool, optional): If `True` and if `must_match == 'h_w'`,
the patch height and width will be scaled uniformly, otherwise they will
be scaled independently.
min_aspect_ratio (float, optional): Determines the minimum aspect ratio
for the generated patches.
max_aspect_ratio (float, optional): Determines the maximum aspect ratio
for the generated patches.
patch_ymin (int, optional): `None` or the vertical coordinate of the top left
corner of the generated patches. If this is not `None`, the position of the
patches along the vertical axis is fixed. If this is `None`, then the
vertical position of generated patches will be chosen randomly such that
the overlap of a patch and the image along the vertical dimension is
always maximal.
patch_xmin (int, optional): `None` or the horizontal coordinate of the top left
corner of the generated patches. If this is not `None`, the position of the
patches along the horizontal axis is fixed. If this is `None`, then the
horizontal position of generated patches will be chosen randomly such that
the overlap of a patch and the image along the horizontal dimension is
always maximal.
patch_height (int, optional): `None` or the fixed height of the generated patches.
patch_width (int, optional): `None` or the fixed width of the generated patches.
patch_aspect_ratio (float, optional): `None` or the fixed aspect ratio of the
generated patches.
'''
if not (must_match in {'h_w', 'h_ar', 'w_ar'}):
raise ValueError("`must_match` must be either of 'h_w', 'h_ar' and 'w_ar'.")
if min_scale >= max_scale:
raise ValueError("It must be `min_scale < max_scale`.")
if min_aspect_ratio >= max_aspect_ratio:
raise ValueError("It must be `min_aspect_ratio < max_aspect_ratio`.")
if scale_uniformly and not ((patch_height is None) and (patch_width is None)):
raise ValueError("If `scale_uniformly == True`, `patch_height` and `patch_width` must both be `None`.")
self.img_height = img_height
self.img_width = img_width
self.must_match = must_match
self.min_scale = min_scale
self.max_scale = max_scale
self.scale_uniformly = scale_uniformly
self.min_aspect_ratio = min_aspect_ratio
self.max_aspect_ratio = max_aspect_ratio
self.patch_ymin = patch_ymin
self.patch_xmin = patch_xmin
self.patch_height = patch_height
self.patch_width = patch_width
self.patch_aspect_ratio = patch_aspect_ratio
def __call__(self):
'''
Returns:
A 4-tuple `(ymin, xmin, height, width)` that represents the coordinates
of the generated patch.
'''
# Get the patch height and width.
if self.must_match == 'h_w': # Aspect is the dependent variable.
if not self.scale_uniformly:
# Get the height.
if self.patch_height is None:
patch_height = int(np.random.uniform(self.min_scale, self.max_scale) * self.img_height)
else:
patch_height = self.patch_height
# Get the width.
if self.patch_width is None:
patch_width = int(np.random.uniform(self.min_scale, self.max_scale) * self.img_width)
else:
patch_width = self.patch_width
else:
scaling_factor = np.random.uniform(self.min_scale, self.max_scale)
patch_height = int(scaling_factor * self.img_height)
patch_width = int(scaling_factor * self.img_width)
elif self.must_match == 'h_ar': # Width is the dependent variable.
# Get the height.
if self.patch_height is None:
patch_height = int(np.random.uniform(self.min_scale, self.max_scale) * self.img_height)
else:
patch_height = self.patch_height
# Get the aspect ratio.
if self.patch_aspect_ratio is None:
patch_aspect_ratio = np.random.uniform(self.min_aspect_ratio, self.max_aspect_ratio)
else:
patch_aspect_ratio = self.patch_aspect_ratio
# Get the width.
patch_width = int(patch_height * patch_aspect_ratio)
elif self.must_match == 'w_ar': # Height is the dependent variable.
# Get the width.
if self.patch_width is None:
patch_width = int(np.random.uniform(self.min_scale, self.max_scale) * self.img_width)
else:
patch_width = self.patch_width
# Get the aspect ratio.
if self.patch_aspect_ratio is None:
patch_aspect_ratio = np.random.uniform(self.min_aspect_ratio, self.max_aspect_ratio)
else:
patch_aspect_ratio = self.patch_aspect_ratio
# Get the height.
patch_height = int(patch_width / patch_aspect_ratio)
# Get the top left corner coordinates of the patch.
if self.patch_ymin is None:
# Compute how much room we have along the vertical axis to place the patch.
# A negative number here means that we want to sample a patch that is larger than the original image
# in the vertical dimension, in which case the patch will be placed such that it fully contains the
# image in the vertical dimension.
y_range = self.img_height - patch_height
# Select a random top left corner for the sample position from the possible positions.
if y_range >= 0: patch_ymin = np.random.randint(0, y_range + 1) # There are y_range + 1 possible positions for the crop in the vertical dimension.
else: patch_ymin = np.random.randint(y_range, 1) # The possible positions for the image on the background canvas in the vertical dimension.
else:
patch_ymin = self.patch_ymin
if self.patch_xmin is None:
# Compute how much room we have along the horizontal axis to place the patch.
# A negative number here means that we want to sample a patch that is larger than the original image
# in the horizontal dimension, in which case the patch will be placed such that it fully contains the
# image in the horizontal dimension.
x_range = self.img_width - patch_width
# Select a random top left corner for the sample position from the possible positions.
if x_range >= 0: patch_xmin = np.random.randint(0, x_range + 1) # There are x_range + 1 possible positions for the crop in the horizontal dimension.
else: patch_xmin = np.random.randint(x_range, 1) # The possible positions for the image on the background canvas in the horizontal dimension.
else:
patch_xmin = self.patch_xmin
return (patch_ymin, patch_xmin, patch_height, patch_width)
class CropPad:
'''
Crops and/or pads an image deterministically.
Depending on the given output patch size and the position (top left corner) relative
to the input image, the image will be cropped and/or padded along one or both spatial
dimensions.
For example, if the output patch lies entirely within the input image, this will result
in a regular crop. If the input image lies entirely within the output patch, this will
result in the image being padded in every direction. All other cases are mixed cases
where the image might be cropped in some directions and padded in others.
The output patch can be arbitrary in both size and position as long as it overlaps
with the input image.
'''
def __init__(self,
patch_ymin,
patch_xmin,
patch_height,
patch_width,
clip_boxes=True,
box_filter=None,
background=(0,0,0),
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
patch_ymin (int, optional): The vertical coordinate of the top left corner of the output
patch relative to the image coordinate system. Can be negative (i.e. lie outside the image)
as long as the resulting patch still overlaps with the image.
patch_ymin (int, optional): The horizontal coordinate of the top left corner of the output
patch relative to the image coordinate system. Can be negative (i.e. lie outside the image)
as long as the resulting patch still overlaps with the image.
patch_height (int): The height of the patch to be sampled from the image. Can be greater
than the height of the input image.
patch_width (int): The width of the patch to be sampled from the image. Can be greater
than the width of the input image.
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
sampled patch.
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
the validity of the bounding boxes is not checked.
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential
background pixels of the scaled images. In the case of single-channel images,
the first element of `background` will be used as the background pixel value.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
#if (patch_height <= 0) or (patch_width <= 0):
# raise ValueError("Patch height and width must both be positive.")
#if (patch_ymin + patch_height < 0) or (patch_xmin + patch_width < 0):
# raise ValueError("A patch with the given coordinates cannot overlap with an input image.")
if not (isinstance(box_filter, BoxFilter) or box_filter is None):
raise ValueError("`box_filter` must be either `None` or a `BoxFilter` object.")
self.patch_height = patch_height
self.patch_width = patch_width
self.patch_ymin = patch_ymin
self.patch_xmin = patch_xmin
self.clip_boxes = clip_boxes
self.box_filter = box_filter
self.background = background
self.labels_format = labels_format
def __call__(self, image, labels=None, return_inverter=False):
img_height, img_width = image.shape[:2]
if (self.patch_ymin > img_height) or (self.patch_xmin > img_width):
raise ValueError("The given patch doesn't overlap with the input image.")
labels = np.copy(labels)
xmin = self.labels_format['xmin']
ymin = self.labels_format['ymin']
xmax = self.labels_format['xmax']
ymax = self.labels_format['ymax']
# Top left corner of the patch relative to the image coordinate system:
patch_ymin = self.patch_ymin
patch_xmin = self.patch_xmin
# Create a canvas of the size of the patch we want to end up with.
if image.ndim == 3:
canvas = np.zeros(shape=(self.patch_height, self.patch_width, 3), dtype=np.uint8)
canvas[:, :] = self.background
elif image.ndim == 2:
canvas = np.zeros(shape=(self.patch_height, self.patch_width), dtype=np.uint8)
canvas[:, :] = self.background[0]
# Perform the crop.
if patch_ymin < 0 and patch_xmin < 0: # Pad the image at the top and on the left.
image_crop_height = min(img_height, self.patch_height + patch_ymin) # The number of pixels of the image that will end up on the canvas in the vertical direction.
image_crop_width = min(img_width, self.patch_width + patch_xmin) # The number of pixels of the image that will end up on the canvas in the horizontal direction.
canvas[-patch_ymin:-patch_ymin + image_crop_height, -patch_xmin:-patch_xmin + image_crop_width] = image[:image_crop_height, :image_crop_width]
elif patch_ymin < 0 and patch_xmin >= 0: # Pad the image at the top and crop it on the left.
image_crop_height = min(img_height, self.patch_height + patch_ymin) # The number of pixels of the image that will end up on the canvas in the vertical direction.
image_crop_width = min(self.patch_width, img_width - patch_xmin) # The number of pixels of the image that will end up on the canvas in the horizontal direction.
canvas[-patch_ymin:-patch_ymin + image_crop_height, :image_crop_width] = image[:image_crop_height, patch_xmin:patch_xmin + image_crop_width]
elif patch_ymin >= 0 and patch_xmin < 0: # Crop the image at the top and pad it on the left.
image_crop_height = min(self.patch_height, img_height - patch_ymin) # The number of pixels of the image that will end up on the canvas in the vertical direction.
image_crop_width = min(img_width, self.patch_width + patch_xmin) # The number of pixels of the image that will end up on the canvas in the horizontal direction.
canvas[:image_crop_height, -patch_xmin:-patch_xmin + image_crop_width] = image[patch_ymin:patch_ymin + image_crop_height, :image_crop_width]
elif patch_ymin >= 0 and patch_xmin >= 0: # Crop the image at the top and on the left.
image_crop_height = min(self.patch_height, img_height - patch_ymin) # The number of pixels of the image that will end up on the canvas in the vertical direction.
image_crop_width = min(self.patch_width, img_width - patch_xmin) # The number of pixels of the image that will end up on the canvas in the horizontal direction.
canvas[:image_crop_height, :image_crop_width] = image[patch_ymin:patch_ymin + image_crop_height, patch_xmin:patch_xmin + image_crop_width]
image = canvas
if return_inverter:
def inverter(labels):
labels = np.copy(labels)
labels[:, [ymin+1, ymax+1]] += patch_ymin
labels[:, [xmin+1, xmax+1]] += patch_xmin
return labels
if not (labels is None):
# Translate the box coordinates to the patch's coordinate system.
labels[:, [ymin, ymax]] -= patch_ymin
labels[:, [xmin, xmax]] -= patch_xmin
# Compute all valid boxes for this patch.
if not (self.box_filter is None):
self.box_filter.labels_format = self.labels_format
labels = self.box_filter(labels=labels,
image_height=self.patch_height,
image_width=self.patch_width)
if self.clip_boxes:
labels[:,[ymin,ymax]] = np.clip(labels[:,[ymin,ymax]], a_min=0, a_max=self.patch_height-1)
labels[:,[xmin,xmax]] = np.clip(labels[:,[xmin,xmax]], a_min=0, a_max=self.patch_width-1)
if return_inverter:
return image, labels, inverter
else:
return image, labels
else:
if return_inverter:
return image, inverter
else:
return image
class Crop:
'''
Crops off the specified numbers of pixels from the borders of images.
This is just a convenience interface for `CropPad`.
'''
def __init__(self,
crop_top,
crop_bottom,
crop_left,
crop_right,
clip_boxes=True,
box_filter=None,
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
self.crop_top = crop_top
self.crop_bottom = crop_bottom
self.crop_left = crop_left
self.crop_right = crop_right
self.clip_boxes = clip_boxes
self.box_filter = box_filter
self.labels_format = labels_format
self.crop = CropPad(patch_ymin=self.crop_top,
patch_xmin=self.crop_left,
patch_height=None,
patch_width=None,
clip_boxes=self.clip_boxes,
box_filter=self.box_filter,
labels_format=self.labels_format)
def __call__(self, image, labels=None, return_inverter=False):
img_height, img_width = image.shape[:2]
self.crop.patch_height = img_height - self.crop_top - self.crop_bottom
self.crop.patch_width = img_width - self.crop_left - self.crop_right
self.crop.labels_format = self.labels_format
return self.crop(image, labels, return_inverter)
class Pad:
'''
Pads images by the specified numbers of pixels on each side.
This is just a convenience interface for `CropPad`.
'''
def __init__(self,
pad_top,
pad_bottom,
pad_left,
pad_right,
background=(0,0,0),
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
self.pad_top = pad_top
self.pad_bottom = pad_bottom
self.pad_left = pad_left
self.pad_right = pad_right
self.background = background
self.labels_format = labels_format
self.pad = CropPad(patch_ymin=-self.pad_top,
patch_xmin=-self.pad_left,
patch_height=None,
patch_width=None,
clip_boxes=False,
box_filter=None,
background=self.background,
labels_format=self.labels_format)
def __call__(self, image, labels=None, return_inverter=False):
img_height, img_width = image.shape[:2]
self.pad.patch_height = img_height + self.pad_top + self.pad_bottom
self.pad.patch_width = img_width + self.pad_left + self.pad_right
self.pad.labels_format = self.labels_format
return self.pad(image, labels, return_inverter)
class RandomPatch:
'''
Randomly samples a patch from an image. The randomness refers to whatever
randomness may be introduced by the patch coordinate generator, the box filter,
and the patch validator.
Input images may be cropped and/or padded along either or both of the two
spatial dimensions as necessary in order to obtain the required patch.
As opposed to `RandomPatchInf`, it is possible for this transform to fail to produce
an output image at all, in which case it will return `None`. This is useful, because
if this transform is used to generate patches of a fixed size or aspect ratio, then
the caller needs to be able to rely on the output image satisfying the set size or
aspect ratio. It might therefore not be an option to return the unaltered input image
as other random transforms do when they fail to produce a valid transformed image.
'''
def __init__(self,
patch_coord_generator,
box_filter=None,
image_validator=None,
n_trials_max=3,
clip_boxes=True,
prob=1.0,
background=(0,0,0),
can_fail=False,
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
patch_coord_generator (PatchCoordinateGenerator): A `PatchCoordinateGenerator` object
to generate the positions and sizes of the patches to be sampled from the input images.
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
the validity of the bounding boxes is not checked.
image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given.
An `ImageValidator` object to determine whether a sampled patch is valid. If `None`,
any outcome is valid.
n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given.
Determines the maxmial number of trials to sample a valid patch. If no valid patch could
be sampled in `n_trials_max` trials, returns one `None` in place of each regular output.
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
sampled patch.
prob (float, optional): `(1 - prob)` determines the probability with which the original,
unaltered image is returned.
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential
background pixels of the scaled images. In the case of single-channel images,
the first element of `background` will be used as the background pixel value.
can_fail (bool, optional): If `True`, will return `None` if no valid patch could be found after
`n_trials_max` trials. If `False`, will return the unaltered input image in such a case.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
if not isinstance(patch_coord_generator, PatchCoordinateGenerator):
raise ValueError("`patch_coord_generator` must be an instance of `PatchCoordinateGenerator`.")
if not (isinstance(image_validator, ImageValidator) or image_validator is None):
raise ValueError("`image_validator` must be either `None` or an `ImageValidator` object.")
self.patch_coord_generator = patch_coord_generator
self.box_filter = box_filter
self.image_validator = image_validator
self.n_trials_max = n_trials_max
self.clip_boxes = clip_boxes
self.prob = prob
self.background = background
self.can_fail = can_fail
self.labels_format = labels_format
self.sample_patch = CropPad(patch_ymin=None,
patch_xmin=None,
patch_height=None,
patch_width=None,
clip_boxes=self.clip_boxes,
box_filter=self.box_filter,
background=self.background,
labels_format=self.labels_format)
def __call__(self, image, labels=None, return_inverter=False):
p = np.random.uniform(0,1)
if p >= (1.0-self.prob):
img_height, img_width = image.shape[:2]
self.patch_coord_generator.img_height = img_height
self.patch_coord_generator.img_width = img_width
xmin = self.labels_format['xmin']
ymin = self.labels_format['ymin']
xmax = self.labels_format['xmax']
ymax = self.labels_format['ymax']
# Override the preset labels format.
if not self.image_validator is None:
self.image_validator.labels_format = self.labels_format
self.sample_patch.labels_format = self.labels_format
for _ in range(max(1, self.n_trials_max)):
# Generate patch coordinates.
patch_ymin, patch_xmin, patch_height, patch_width = self.patch_coord_generator()
self.sample_patch.patch_ymin = patch_ymin
self.sample_patch.patch_xmin = patch_xmin
self.sample_patch.patch_height = patch_height
self.sample_patch.patch_width = patch_width
if (labels is None) or (self.image_validator is None):
# We either don't have any boxes or if we do, we will accept any outcome as valid.
return self.sample_patch(image, labels, return_inverter)
else:
# Translate the box coordinates to the patch's coordinate system.
new_labels = np.copy(labels)
new_labels[:, [ymin, ymax]] -= patch_ymin
new_labels[:, [xmin, xmax]] -= patch_xmin
# Check if the patch is valid.
if self.image_validator(labels=new_labels,
image_height=patch_height,
image_width=patch_width):
return self.sample_patch(image, labels, return_inverter)
# If we weren't able to sample a valid patch...
if self.can_fail:
# ...return `None`.
if labels is None:
if return_inverter:
return None, None
else:
return None
else:
if return_inverter:
return None, None, None
else:
return None, None
else:
# ...return the unaltered input image.
if labels is None:
if return_inverter:
return image, None
else:
return image
else:
if return_inverter:
return image, labels, None
else:
return image, labels
else:
if return_inverter:
def inverter(labels):
return labels
if labels is None:
if return_inverter:
return image, inverter
else:
return image
else:
if return_inverter:
return image, labels, inverter
else:
return image, labels
class RandomPatchInf:
'''
Randomly samples a patch from an image. The randomness refers to whatever
randomness may be introduced by the patch coordinate generator, the box filter,
and the patch validator.
Input images may be cropped and/or padded along either or both of the two
spatial dimensions as necessary in order to obtain the required patch.
This operation is very similar to `RandomPatch`, except that:
1. This operation runs indefinitely until either a valid patch is found or
the input image is returned unaltered, i.e. it cannot fail.
2. If a bound generator is given, a new pair of bounds will be generated
every `n_trials_max` iterations.
'''
def __init__(self,
patch_coord_generator,
box_filter=None,
image_validator=None,
bound_generator=None,
n_trials_max=50,
clip_boxes=True,
prob=0.857,
background=(0,0,0),
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
patch_coord_generator (PatchCoordinateGenerator): A `PatchCoordinateGenerator` object
to generate the positions and sizes of the patches to be sampled from the input images.
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
the validity of the bounding boxes is not checked.
image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given.
An `ImageValidator` object to determine whether a sampled patch is valid. If `None`,
any outcome is valid.
bound_generator (BoundGenerator, optional): A `BoundGenerator` object to generate upper and
lower bound values for the patch validator. Every `n_trials_max` trials, a new pair of
upper and lower bounds will be generated until a valid patch is found or the original image
is returned. This bound generator overrides the bound generator of the patch validator.
n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given.
The sampler will run indefinitely until either a valid patch is found or the original image
is returned, but this determines the maxmial number of trials to sample a valid patch for each
selected pair of lower and upper bounds before a new pair is picked.
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
sampled patch.
prob (float, optional): `(1 - prob)` determines the probability with which the original,
unaltered image is returned.
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential
background pixels of the scaled images. In the case of single-channel images,
the first element of `background` will be used as the background pixel value.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
if not isinstance(patch_coord_generator, PatchCoordinateGenerator):
raise ValueError("`patch_coord_generator` must be an instance of `PatchCoordinateGenerator`.")
if not (isinstance(image_validator, ImageValidator) or image_validator is None):
raise ValueError("`image_validator` must be either `None` or an `ImageValidator` object.")
if not (isinstance(bound_generator, BoundGenerator) or bound_generator is None):
raise ValueError("`bound_generator` must be either `None` or a `BoundGenerator` object.")
self.patch_coord_generator = patch_coord_generator
self.box_filter = box_filter
self.image_validator = image_validator
self.bound_generator = bound_generator
self.n_trials_max = n_trials_max
self.clip_boxes = clip_boxes
self.prob = prob
self.background = background
self.labels_format = labels_format
self.sample_patch = CropPad(patch_ymin=None,
patch_xmin=None,
patch_height=None,
patch_width=None,
clip_boxes=self.clip_boxes,
box_filter=self.box_filter,
background=self.background,
labels_format=self.labels_format)
def __call__(self, image, labels=None, return_inverter=False):
img_height, img_width = image.shape[:2]
self.patch_coord_generator.img_height = img_height
self.patch_coord_generator.img_width = img_width
xmin = self.labels_format['xmin']
ymin = self.labels_format['ymin']
xmax = self.labels_format['xmax']
ymax = self.labels_format['ymax']
# Override the preset labels format.
if not self.image_validator is None:
self.image_validator.labels_format = self.labels_format
self.sample_patch.labels_format = self.labels_format
while True: # Keep going until we either find a valid patch or return the original image.
p = np.random.uniform(0,1)
if p >= (1.0-self.prob):
# In case we have a bound generator, pick a lower and upper bound for the patch validator.
if not ((self.image_validator is None) or (self.bound_generator is None)):
self.image_validator.bounds = self.bound_generator()
# Use at most `self.n_trials_max` attempts to find a crop
# that meets our requirements.
for _ in range(max(1, self.n_trials_max)):
# Generate patch coordinates.
patch_ymin, patch_xmin, patch_height, patch_width = self.patch_coord_generator()
self.sample_patch.patch_ymin = patch_ymin
self.sample_patch.patch_xmin = patch_xmin
self.sample_patch.patch_height = patch_height
self.sample_patch.patch_width = patch_width
# Check if the resulting patch meets the aspect ratio requirements.
aspect_ratio = patch_width / patch_height
if not (self.patch_coord_generator.min_aspect_ratio <= aspect_ratio <= self.patch_coord_generator.max_aspect_ratio):
continue
if (labels is None) or (self.image_validator is None):
# We either don't have any boxes or if we do, we will accept any outcome as valid.
return self.sample_patch(image, labels, return_inverter)
else:
# Translate the box coordinates to the patch's coordinate system.
new_labels = np.copy(labels)
new_labels[:, [ymin, ymax]] -= patch_ymin
new_labels[:, [xmin, xmax]] -= patch_xmin
# Check if the patch contains the minimum number of boxes we require.
if self.image_validator(labels=new_labels,
image_height=patch_height,
image_width=patch_width):
return self.sample_patch(image, labels, return_inverter)
else:
if return_inverter:
def inverter(labels):
return labels
if labels is None:
if return_inverter:
return image, inverter
else:
return image
else:
if return_inverter:
return image, labels, inverter
else:
return image, labels
class RandomMaxCropFixedAR:
'''
Crops the largest possible patch of a given fixed aspect ratio
from an image.
Since the aspect ratio of the sampled patches is constant, they
can subsequently be resized to the same size without distortion.
'''
def __init__(self,
patch_aspect_ratio,
box_filter=None,
image_validator=None,
n_trials_max=3,
clip_boxes=True,
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
patch_aspect_ratio (float): The fixed aspect ratio that all sampled patches will have.
box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given.
A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria
after the transformation. Refer to the `BoxFilter` documentation for details. If `None`,
the validity of the bounding boxes is not checked.
image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given.
An `ImageValidator` object to determine whether a sampled patch is valid. If `None`,
any outcome is valid.
n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given.
Determines the maxmial number of trials to sample a valid patch. If no valid patch could
be sampled in `n_trials_max` trials, returns `None`.
clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given.
If `True`, any ground truth bounding boxes will be clipped to lie entirely within the
sampled patch.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
self.patch_aspect_ratio = patch_aspect_ratio
self.box_filter = box_filter
self.image_validator = image_validator
self.n_trials_max = n_trials_max
self.clip_boxes = clip_boxes
self.labels_format = labels_format
self.random_patch = RandomPatch(patch_coord_generator=PatchCoordinateGenerator(), # Just a dummy object
box_filter=self.box_filter,
image_validator=self.image_validator,
n_trials_max=self.n_trials_max,
clip_boxes=self.clip_boxes,
prob=1.0,
can_fail=False,
labels_format=self.labels_format)
def __call__(self, image, labels=None, return_inverter=False):
img_height, img_width = image.shape[:2]
# The ratio of the input image aspect ratio and patch aspect ratio determines the maximal possible crop.
image_aspect_ratio = img_width / img_height
if image_aspect_ratio < self.patch_aspect_ratio:
patch_width = img_width
patch_height = int(round(patch_width / self.patch_aspect_ratio))
else:
patch_height = img_height
patch_width = int(round(patch_height * self.patch_aspect_ratio))
# Now that we know the desired height and width for the patch,
# instantiate an appropriate patch coordinate generator.
patch_coord_generator = PatchCoordinateGenerator(img_height=img_height,
img_width=img_width,
must_match='h_w',
patch_height=patch_height,
patch_width=patch_width)
# The rest of the work is done by `RandomPatch`.
self.random_patch.patch_coord_generator = patch_coord_generator
self.random_patch.labels_format = self.labels_format
return self.random_patch(image, labels, return_inverter)
class RandomPadFixedAR:
'''
Adds the minimal possible padding to an image that results in a patch
of the given fixed aspect ratio that contains the entire image.
Since the aspect ratio of the resulting images is constant, they
can subsequently be resized to the same size without distortion.
'''
def __init__(self,
patch_aspect_ratio,
background=(0,0,0),
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
'''
Arguments:
patch_aspect_ratio (float): The fixed aspect ratio that all sampled patches will have.
background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential
background pixels of the scaled images. In the case of single-channel images,
the first element of `background` will be used as the background pixel value.
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
'''
self.patch_aspect_ratio = patch_aspect_ratio
self.background = background
self.labels_format = labels_format
self.random_patch = RandomPatch(patch_coord_generator=PatchCoordinateGenerator(), # Just a dummy object
box_filter=None,
image_validator=None,
n_trials_max=1,
clip_boxes=False,
background=self.background,
prob=1.0,
labels_format=self.labels_format)
def __call__(self, image, labels=None, return_inverter=False):
img_height, img_width = image.shape[:2]
if img_width < img_height:
patch_height = img_height
patch_width = int(round(patch_height * self.patch_aspect_ratio))
else:
patch_width = img_width
patch_height = int(round(patch_width / self.patch_aspect_ratio))
# Now that we know the desired height and width for the patch,
# instantiate an appropriate patch coordinate generator.
patch_coord_generator = PatchCoordinateGenerator(img_height=img_height,
img_width=img_width,
must_match='h_w',
patch_height=patch_height,
patch_width=patch_width)
# The rest of the work is done by `RandomPatch`.
self.random_patch.patch_coord_generator = patch_coord_generator
self.random_patch.labels_format = self.labels_format
return self.random_patch(image, labels, return_inverter)

View File

@@ -0,0 +1,485 @@
'''
Various photometric image transformations, both deterministic and probabilistic.
Copyright (C) 2018 Pierluigi Ferrari
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
from __future__ import division
import numpy as np
import cv2
class ConvertColor:
'''
Converts images between RGB, HSV and grayscale color spaces. This is just a wrapper
around `cv2.cvtColor()`.
'''
def __init__(self, current='RGB', to='HSV', keep_3ch=True):
'''
Arguments:
current (str, optional): The current color space of the images. Can be
one of 'RGB' and 'HSV'.
to (str, optional): The target color space of the images. Can be one of
'RGB', 'HSV', and 'GRAY'.
keep_3ch (bool, optional): Only relevant if `to == GRAY`.
If `True`, the resulting grayscale images will have three channels.
'''
if not ((current in {'RGB', 'HSV'}) and (to in {'RGB', 'HSV', 'GRAY'})):
raise NotImplementedError
self.current = current
self.to = to
self.keep_3ch = keep_3ch
def __call__(self, image, labels=None):
if self.current == 'RGB' and self.to == 'HSV':
image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
elif self.current == 'RGB' and self.to == 'GRAY':
image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
if self.keep_3ch:
image = np.stack([image] * 3, axis=-1)
elif self.current == 'HSV' and self.to == 'RGB':
image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB)
elif self.current == 'HSV' and self.to == 'GRAY':
image = cv2.cvtColor(image, cv2.COLOR_HSV2GRAY)
if self.keep_3ch:
image = np.stack([image] * 3, axis=-1)
if labels is None:
return image
else:
return image, labels
class ConvertDataType:
'''
Converts images represented as Numpy arrays between `uint8` and `float32`.
Serves as a helper for certain photometric distortions. This is just a wrapper
around `np.ndarray.astype()`.
'''
def __init__(self, to='uint8'):
'''
Arguments:
to (string, optional): To which datatype to convert the input images.
Can be either of 'uint8' and 'float32'.
'''
if not (to == 'uint8' or to == 'float32'):
raise ValueError("`to` can be either of 'uint8' or 'float32'.")
self.to = to
def __call__(self, image, labels=None):
if self.to == 'uint8':
image = np.round(image, decimals=0).astype(np.uint8)
else:
image = image.astype(np.float32)
if labels is None:
return image
else:
return image, labels
class ConvertTo3Channels:
'''
Converts 1-channel and 4-channel images to 3-channel images. Does nothing to images that
already have 3 channels. In the case of 4-channel images, the fourth channel will be
discarded.
'''
def __init__(self):
pass
def __call__(self, image, labels=None):
if image.ndim == 2:
image = np.stack([image] * 3, axis=-1)
elif image.ndim == 3:
if image.shape[2] == 1:
image = np.concatenate([image] * 3, axis=-1)
elif image.shape[2] == 4:
image = image[:,:,:3]
if labels is None:
return image
else:
return image, labels
class Hue:
'''
Changes the hue of HSV images.
Important:
- Expects HSV input.
- Expects input array to be of `dtype` `float`.
'''
def __init__(self, delta):
'''
Arguments:
delta (int): An integer in the closed interval `[-180, 180]` that determines the hue change, where
a change by integer `delta` means a change by `2 * delta` degrees. Read up on the HSV color format
if you need more information.
'''
if not (-180 <= delta <= 180): raise ValueError("`delta` must be in the closed interval `[-180, 180]`.")
self.delta = delta
def __call__(self, image, labels=None):
image[:, :, 0] = (image[:, :, 0] + self.delta) % 180.0
if labels is None:
return image
else:
return image, labels
class RandomHue:
'''
Randomly changes the hue of HSV images.
Important:
- Expects HSV input.
- Expects input array to be of `dtype` `float`.
'''
def __init__(self, max_delta=18, prob=0.5):
'''
Arguments:
max_delta (int): An integer in the closed interval `[0, 180]` that determines the maximal absolute
hue change.
prob (float, optional): `(1 - prob)` determines the probability with which the original,
unaltered image is returned.
'''
if not (0 <= max_delta <= 180): raise ValueError("`max_delta` must be in the closed interval `[0, 180]`.")
self.max_delta = max_delta
self.prob = prob
self.change_hue = Hue(delta=0)
def __call__(self, image, labels=None):
p = np.random.uniform(0,1)
if p >= (1.0-self.prob):
self.change_hue.delta = np.random.uniform(-self.max_delta, self.max_delta)
return self.change_hue(image, labels)
elif labels is None:
return image
else:
return image, labels
class Saturation:
'''
Changes the saturation of HSV images.
Important:
- Expects HSV input.
- Expects input array to be of `dtype` `float`.
'''
def __init__(self, factor):
'''
Arguments:
factor (float): A float greater than zero that determines saturation change, where
values less than one result in less saturation and values greater than one result
in more saturation.
'''
if factor <= 0.0: raise ValueError("It must be `factor > 0`.")
self.factor = factor
def __call__(self, image, labels=None):
image[:,:,1] = np.clip(image[:,:,1] * self.factor, 0, 255)
if labels is None:
return image
else:
return image, labels
class RandomSaturation:
'''
Randomly changes the saturation of HSV images.
Important:
- Expects HSV input.
- Expects input array to be of `dtype` `float`.
'''
def __init__(self, lower=0.3, upper=2.0, prob=0.5):
'''
Arguments:
lower (float, optional): A float greater than zero, the lower bound for the random
saturation change.
upper (float, optional): A float greater than zero, the upper bound for the random
saturation change. Must be greater than `lower`.
prob (float, optional): `(1 - prob)` determines the probability with which the original,
unaltered image is returned.
'''
if lower >= upper: raise ValueError("`upper` must be greater than `lower`.")
self.lower = lower
self.upper = upper
self.prob = prob
self.change_saturation = Saturation(factor=1.0)
def __call__(self, image, labels=None):
p = np.random.uniform(0,1)
if p >= (1.0-self.prob):
self.change_saturation.factor = np.random.uniform(self.lower, self.upper)
return self.change_saturation(image, labels)
elif labels is None:
return image
else:
return image, labels
class Brightness:
'''
Changes the brightness of RGB images.
Important:
- Expects RGB input.
- Expects input array to be of `dtype` `float`.
'''
def __init__(self, delta):
'''
Arguments:
delta (int): An integer, the amount to add to or subtract from the intensity
of every pixel.
'''
self.delta = delta
def __call__(self, image, labels=None):
image = np.clip(image + self.delta, 0, 255)
if labels is None:
return image
else:
return image, labels
class RandomBrightness:
'''
Randomly changes the brightness of RGB images.
Important:
- Expects RGB input.
- Expects input array to be of `dtype` `float`.
'''
def __init__(self, lower=-84, upper=84, prob=0.5):
'''
Arguments:
lower (int, optional): An integer, the lower bound for the random brightness change.
upper (int, optional): An integer, the upper bound for the random brightness change.
Must be greater than `lower`.
prob (float, optional): `(1 - prob)` determines the probability with which the original,
unaltered image is returned.
'''
if lower >= upper: raise ValueError("`upper` must be greater than `lower`.")
self.lower = float(lower)
self.upper = float(upper)
self.prob = prob
self.change_brightness = Brightness(delta=0)
def __call__(self, image, labels=None):
p = np.random.uniform(0,1)
if p >= (1.0-self.prob):
self.change_brightness.delta = np.random.uniform(self.lower, self.upper)
return self.change_brightness(image, labels)
elif labels is None:
return image
else:
return image, labels
class Contrast:
'''
Changes the contrast of RGB images.
Important:
- Expects RGB input.
- Expects input array to be of `dtype` `float`.
'''
def __init__(self, factor):
'''
Arguments:
factor (float): A float greater than zero that determines contrast change, where
values less than one result in less contrast and values greater than one result
in more contrast.
'''
if factor <= 0.0: raise ValueError("It must be `factor > 0`.")
self.factor = factor
def __call__(self, image, labels=None):
image = np.clip(127.5 + self.factor * (image - 127.5), 0, 255)
if labels is None:
return image
else:
return image, labels
class RandomContrast:
'''
Randomly changes the contrast of RGB images.
Important:
- Expects RGB input.
- Expects input array to be of `dtype` `float`.
'''
def __init__(self, lower=0.5, upper=1.5, prob=0.5):
'''
Arguments:
lower (float, optional): A float greater than zero, the lower bound for the random
contrast change.
upper (float, optional): A float greater than zero, the upper bound for the random
contrast change. Must be greater than `lower`.
prob (float, optional): `(1 - prob)` determines the probability with which the original,
unaltered image is returned.
'''
if lower >= upper: raise ValueError("`upper` must be greater than `lower`.")
self.lower = lower
self.upper = upper
self.prob = prob
self.change_contrast = Contrast(factor=1.0)
def __call__(self, image, labels=None):
p = np.random.uniform(0,1)
if p >= (1.0-self.prob):
self.change_contrast.factor = np.random.uniform(self.lower, self.upper)
return self.change_contrast(image, labels)
elif labels is None:
return image
else:
return image, labels
class Gamma:
'''
Changes the gamma value of RGB images.
Important: Expects RGB input.
'''
def __init__(self, gamma):
'''
Arguments:
gamma (float): A float greater than zero that determines gamma change.
'''
if gamma <= 0.0: raise ValueError("It must be `gamma > 0`.")
self.gamma = gamma
self.gamma_inv = 1.0 / gamma
# Build a lookup table mapping the pixel values [0, 255] to
# their adjusted gamma values.
self.table = np.array([((i / 255.0) ** self.gamma_inv) * 255 for i in np.arange(0, 256)]).astype("uint8")
def __call__(self, image, labels=None):
image = cv2.LUT(image, table)
if labels is None:
return image
else:
return image, labels
class RandomGamma:
'''
Randomly changes the gamma value of RGB images.
Important: Expects RGB input.
'''
def __init__(self, lower=0.25, upper=2.0, prob=0.5):
'''
Arguments:
lower (float, optional): A float greater than zero, the lower bound for the random
gamma change.
upper (float, optional): A float greater than zero, the upper bound for the random
gamma change. Must be greater than `lower`.
prob (float, optional): `(1 - prob)` determines the probability with which the original,
unaltered image is returned.
'''
if lower >= upper: raise ValueError("`upper` must be greater than `lower`.")
self.lower = lower
self.upper = upper
self.prob = prob
def __call__(self, image, labels=None):
p = np.random.uniform(0,1)
if p >= (1.0-self.prob):
gamma = np.random.uniform(self.lower, self.upper)
change_gamma = Gamma(gamma=gamma)
return change_gamma(image, labels)
elif labels is None:
return image
else:
return image, labels
class HistogramEqualization:
'''
Performs histogram equalization on HSV images.
Importat: Expects HSV input.
'''
def __init__(self):
pass
def __call__(self, image, labels=None):
image[:,:,2] = cv2.equalizeHist(image[:,:,2])
if labels is None:
return image
else:
return image, labels
class RandomHistogramEqualization:
'''
Randomly performs histogram equalization on HSV images. The randomness only refers
to whether or not the equalization is performed.
Importat: Expects HSV input.
'''
def __init__(self, prob=0.5):
'''
Arguments:
prob (float, optional): `(1 - prob)` determines the probability with which the original,
unaltered image is returned.
'''
self.prob = prob
self.equalize = HistogramEqualization()
def __call__(self, image, labels=None):
p = np.random.uniform(0,1)
if p >= (1.0-self.prob):
return self.equalize(image, labels)
elif labels is None:
return image
else:
return image, labels
class ChannelSwap:
'''
Swaps the channels of images.
'''
def __init__(self, order):
'''
Arguments:
order (tuple): A tuple of integers that defines the desired channel order
of the input images after the channel swap.
'''
self.order = order
def __call__(self, image, labels=None):
image = image[:,:,self.order]
if labels is None:
return image
else:
return image, labels
class RandomChannelSwap:
'''
Randomly swaps the channels of RGB images.
Important: Expects RGB input.
'''
def __init__(self, prob=0.5):
'''
Arguments:
prob (float, optional): `(1 - prob)` determines the probability with which the original,
unaltered image is returned.
'''
self.prob = prob
# All possible permutations of the three image channels except the original order.
self.permutations = ((0, 2, 1),
(1, 0, 2), (1, 2, 0),
(2, 0, 1), (2, 1, 0))
self.swap_channels = ChannelSwap(order=(0, 1, 2))
def __call__(self, image, labels=None):
p = np.random.uniform(0,1)
if p >= (1.0-self.prob):
i = np.random.randint(5) # There are 6 possible permutations.
self.swap_channels.order = self.permutations[i]
return self.swap_channels(image, labels)
elif labels is None:
return image
else:
return image, labels