176 lines
5.2 KiB
Python
176 lines
5.2 KiB
Python
import random
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from PIL import Image
|
|
from torchvision.transforms.functional import resized_crop, crop
|
|
from torchvision.transforms import RandomResizedCrop, RandomCrop
|
|
from torchvision.transforms import InterpolationMode
|
|
|
|
|
|
def rescale(img, bboxes, target_size=2240):
|
|
h, w = img.shape[0:2]
|
|
scale = target_size / max(h, w)
|
|
img = cv2.resize(img, dsize=None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
|
bboxes = bboxes * scale
|
|
return img, bboxes
|
|
|
|
|
|
def random_resize_crop_synth(augment_targets, size):
|
|
image, region_score, affinity_score, confidence_mask = augment_targets
|
|
|
|
image = Image.fromarray(image)
|
|
region_score = Image.fromarray(region_score)
|
|
affinity_score = Image.fromarray(affinity_score)
|
|
confidence_mask = Image.fromarray(confidence_mask)
|
|
|
|
short_side = min(image.size)
|
|
i, j, h, w = RandomCrop.get_params(image, output_size=(short_side, short_side))
|
|
|
|
image = resized_crop(
|
|
image, i, j, h, w, size=(size, size), interpolation=InterpolationMode.BICUBIC
|
|
)
|
|
region_score = resized_crop(
|
|
region_score, i, j, h, w, (size, size), interpolation=InterpolationMode.BICUBIC
|
|
)
|
|
affinity_score = resized_crop(
|
|
affinity_score,
|
|
i,
|
|
j,
|
|
h,
|
|
w,
|
|
(size, size),
|
|
interpolation=InterpolationMode.BICUBIC,
|
|
)
|
|
confidence_mask = resized_crop(
|
|
confidence_mask,
|
|
i,
|
|
j,
|
|
h,
|
|
w,
|
|
(size, size),
|
|
interpolation=InterpolationMode.NEAREST,
|
|
)
|
|
|
|
image = np.array(image)
|
|
region_score = np.array(region_score)
|
|
affinity_score = np.array(affinity_score)
|
|
confidence_mask = np.array(confidence_mask)
|
|
augment_targets = [image, region_score, affinity_score, confidence_mask]
|
|
|
|
return augment_targets
|
|
|
|
|
|
def random_resize_crop(
|
|
augment_targets, scale, ratio, size, threshold, pre_crop_area=None
|
|
):
|
|
image, region_score, affinity_score, confidence_mask = augment_targets
|
|
|
|
image = Image.fromarray(image)
|
|
region_score = Image.fromarray(region_score)
|
|
affinity_score = Image.fromarray(affinity_score)
|
|
confidence_mask = Image.fromarray(confidence_mask)
|
|
|
|
if pre_crop_area != None:
|
|
i, j, h, w = pre_crop_area
|
|
|
|
else:
|
|
if random.random() < threshold:
|
|
i, j, h, w = RandomResizedCrop.get_params(image, scale=scale, ratio=ratio)
|
|
else:
|
|
i, j, h, w = RandomResizedCrop.get_params(
|
|
image, scale=(1.0, 1.0), ratio=(1.0, 1.0)
|
|
)
|
|
|
|
image = resized_crop(
|
|
image, i, j, h, w, size=(size, size), interpolation=InterpolationMode.BICUBIC
|
|
)
|
|
region_score = resized_crop(
|
|
region_score, i, j, h, w, (size, size), interpolation=InterpolationMode.BICUBIC
|
|
)
|
|
affinity_score = resized_crop(
|
|
affinity_score,
|
|
i,
|
|
j,
|
|
h,
|
|
w,
|
|
(size, size),
|
|
interpolation=InterpolationMode.BICUBIC,
|
|
)
|
|
confidence_mask = resized_crop(
|
|
confidence_mask,
|
|
i,
|
|
j,
|
|
h,
|
|
w,
|
|
(size, size),
|
|
interpolation=InterpolationMode.NEAREST,
|
|
)
|
|
|
|
image = np.array(image)
|
|
region_score = np.array(region_score)
|
|
affinity_score = np.array(affinity_score)
|
|
confidence_mask = np.array(confidence_mask)
|
|
augment_targets = [image, region_score, affinity_score, confidence_mask]
|
|
|
|
return augment_targets
|
|
|
|
|
|
def random_crop(augment_targets, size):
|
|
image, region_score, affinity_score, confidence_mask = augment_targets
|
|
|
|
image = Image.fromarray(image)
|
|
region_score = Image.fromarray(region_score)
|
|
affinity_score = Image.fromarray(affinity_score)
|
|
confidence_mask = Image.fromarray(confidence_mask)
|
|
|
|
i, j, h, w = RandomCrop.get_params(image, output_size=(size, size))
|
|
|
|
image = crop(image, i, j, h, w)
|
|
region_score = crop(region_score, i, j, h, w)
|
|
affinity_score = crop(affinity_score, i, j, h, w)
|
|
confidence_mask = crop(confidence_mask, i, j, h, w)
|
|
|
|
image = np.array(image)
|
|
region_score = np.array(region_score)
|
|
affinity_score = np.array(affinity_score)
|
|
confidence_mask = np.array(confidence_mask)
|
|
augment_targets = [image, region_score, affinity_score, confidence_mask]
|
|
|
|
return augment_targets
|
|
|
|
|
|
def random_horizontal_flip(imgs):
|
|
if random.random() < 0.5:
|
|
for i in range(len(imgs)):
|
|
imgs[i] = np.flip(imgs[i], axis=1).copy()
|
|
return imgs
|
|
|
|
|
|
def random_scale(images, word_level_char_bbox, scale_range):
|
|
scale = random.sample(scale_range, 1)[0]
|
|
|
|
for i in range(len(images)):
|
|
images[i] = cv2.resize(images[i], dsize=None, fx=scale, fy=scale)
|
|
|
|
for i in range(len(word_level_char_bbox)):
|
|
word_level_char_bbox[i] *= scale
|
|
|
|
return images
|
|
|
|
|
|
def random_rotate(images, max_angle):
|
|
angle = random.random() * 2 * max_angle - max_angle
|
|
for i in range(len(images)):
|
|
img = images[i]
|
|
w, h = img.shape[:2]
|
|
rotation_matrix = cv2.getRotationMatrix2D((h / 2, w / 2), angle, 1)
|
|
if i == len(images) - 1:
|
|
img_rotation = cv2.warpAffine(
|
|
img, M=rotation_matrix, dsize=(h, w), flags=cv2.INTER_NEAREST
|
|
)
|
|
else:
|
|
img_rotation = cv2.warpAffine(img, rotation_matrix, (h, w))
|
|
images[i] = img_rotation
|
|
return images
|