Files
easyocr/trainer/craft/data/imgaug.py
2025-07-10 19:42:57 +08:00

176 lines
5.2 KiB
Python

import random
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms.functional import resized_crop, crop
from torchvision.transforms import RandomResizedCrop, RandomCrop
from torchvision.transforms import InterpolationMode
def rescale(img, bboxes, target_size=2240):
h, w = img.shape[0:2]
scale = target_size / max(h, w)
img = cv2.resize(img, dsize=None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
bboxes = bboxes * scale
return img, bboxes
def random_resize_crop_synth(augment_targets, size):
image, region_score, affinity_score, confidence_mask = augment_targets
image = Image.fromarray(image)
region_score = Image.fromarray(region_score)
affinity_score = Image.fromarray(affinity_score)
confidence_mask = Image.fromarray(confidence_mask)
short_side = min(image.size)
i, j, h, w = RandomCrop.get_params(image, output_size=(short_side, short_side))
image = resized_crop(
image, i, j, h, w, size=(size, size), interpolation=InterpolationMode.BICUBIC
)
region_score = resized_crop(
region_score, i, j, h, w, (size, size), interpolation=InterpolationMode.BICUBIC
)
affinity_score = resized_crop(
affinity_score,
i,
j,
h,
w,
(size, size),
interpolation=InterpolationMode.BICUBIC,
)
confidence_mask = resized_crop(
confidence_mask,
i,
j,
h,
w,
(size, size),
interpolation=InterpolationMode.NEAREST,
)
image = np.array(image)
region_score = np.array(region_score)
affinity_score = np.array(affinity_score)
confidence_mask = np.array(confidence_mask)
augment_targets = [image, region_score, affinity_score, confidence_mask]
return augment_targets
def random_resize_crop(
augment_targets, scale, ratio, size, threshold, pre_crop_area=None
):
image, region_score, affinity_score, confidence_mask = augment_targets
image = Image.fromarray(image)
region_score = Image.fromarray(region_score)
affinity_score = Image.fromarray(affinity_score)
confidence_mask = Image.fromarray(confidence_mask)
if pre_crop_area != None:
i, j, h, w = pre_crop_area
else:
if random.random() < threshold:
i, j, h, w = RandomResizedCrop.get_params(image, scale=scale, ratio=ratio)
else:
i, j, h, w = RandomResizedCrop.get_params(
image, scale=(1.0, 1.0), ratio=(1.0, 1.0)
)
image = resized_crop(
image, i, j, h, w, size=(size, size), interpolation=InterpolationMode.BICUBIC
)
region_score = resized_crop(
region_score, i, j, h, w, (size, size), interpolation=InterpolationMode.BICUBIC
)
affinity_score = resized_crop(
affinity_score,
i,
j,
h,
w,
(size, size),
interpolation=InterpolationMode.BICUBIC,
)
confidence_mask = resized_crop(
confidence_mask,
i,
j,
h,
w,
(size, size),
interpolation=InterpolationMode.NEAREST,
)
image = np.array(image)
region_score = np.array(region_score)
affinity_score = np.array(affinity_score)
confidence_mask = np.array(confidence_mask)
augment_targets = [image, region_score, affinity_score, confidence_mask]
return augment_targets
def random_crop(augment_targets, size):
image, region_score, affinity_score, confidence_mask = augment_targets
image = Image.fromarray(image)
region_score = Image.fromarray(region_score)
affinity_score = Image.fromarray(affinity_score)
confidence_mask = Image.fromarray(confidence_mask)
i, j, h, w = RandomCrop.get_params(image, output_size=(size, size))
image = crop(image, i, j, h, w)
region_score = crop(region_score, i, j, h, w)
affinity_score = crop(affinity_score, i, j, h, w)
confidence_mask = crop(confidence_mask, i, j, h, w)
image = np.array(image)
region_score = np.array(region_score)
affinity_score = np.array(affinity_score)
confidence_mask = np.array(confidence_mask)
augment_targets = [image, region_score, affinity_score, confidence_mask]
return augment_targets
def random_horizontal_flip(imgs):
if random.random() < 0.5:
for i in range(len(imgs)):
imgs[i] = np.flip(imgs[i], axis=1).copy()
return imgs
def random_scale(images, word_level_char_bbox, scale_range):
scale = random.sample(scale_range, 1)[0]
for i in range(len(images)):
images[i] = cv2.resize(images[i], dsize=None, fx=scale, fy=scale)
for i in range(len(word_level_char_bbox)):
word_level_char_bbox[i] *= scale
return images
def random_rotate(images, max_angle):
angle = random.random() * 2 * max_angle - max_angle
for i in range(len(images)):
img = images[i]
w, h = img.shape[:2]
rotation_matrix = cv2.getRotationMatrix2D((h / 2, w / 2), angle, 1)
if i == len(images) - 1:
img_rotation = cv2.warpAffine(
img, M=rotation_matrix, dsize=(h, w), flags=cv2.INTER_NEAREST
)
else:
img_rotation = cv2.warpAffine(img, rotation_matrix, (h, w))
images[i] = img_rotation
return images