testing dataset
This commit is contained in:
345
trainer/craft/utils/craft_utils.py
Normal file
345
trainer/craft/utils/craft_utils.py
Normal file
@@ -0,0 +1,345 @@
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import torch
|
||||
import cv2
|
||||
import math
|
||||
import numpy as np
|
||||
from data import imgproc
|
||||
|
||||
""" auxilary functions """
|
||||
# unwarp corodinates
|
||||
|
||||
|
||||
|
||||
|
||||
def warpCoord(Minv, pt):
|
||||
out = np.matmul(Minv, (pt[0], pt[1], 1))
|
||||
return np.array([out[0]/out[2], out[1]/out[2]])
|
||||
""" end of auxilary functions """
|
||||
|
||||
def test():
|
||||
print('pass')
|
||||
|
||||
|
||||
def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text):
|
||||
# prepare data
|
||||
linkmap = linkmap.copy()
|
||||
textmap = textmap.copy()
|
||||
img_h, img_w = textmap.shape
|
||||
|
||||
""" labeling method """
|
||||
ret, text_score = cv2.threshold(textmap, low_text, 1, 0)
|
||||
ret, link_score = cv2.threshold(linkmap, link_threshold, 1, 0)
|
||||
|
||||
text_score_comb = np.clip(text_score + link_score, 0, 1)
|
||||
nLabels, labels, stats, centroids = \
|
||||
cv2.connectedComponentsWithStats(text_score_comb.astype(np.uint8), connectivity=4)
|
||||
|
||||
det = []
|
||||
mapper = []
|
||||
for k in range(1,nLabels):
|
||||
# size filtering
|
||||
size = stats[k, cv2.CC_STAT_AREA]
|
||||
if size < 10: continue
|
||||
|
||||
# thresholding
|
||||
if np.max(textmap[labels==k]) < text_threshold: continue
|
||||
|
||||
# make segmentation map
|
||||
segmap = np.zeros(textmap.shape, dtype=np.uint8)
|
||||
segmap[labels==k] = 255
|
||||
segmap[np.logical_and(link_score==1, text_score==0)] = 0 # remove link area
|
||||
x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP]
|
||||
w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT]
|
||||
niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2)
|
||||
sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1
|
||||
# boundary check
|
||||
if sx < 0 : sx = 0
|
||||
if sy < 0 : sy = 0
|
||||
if ex >= img_w: ex = img_w
|
||||
if ey >= img_h: ey = img_h
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1 + niter, 1 + niter))
|
||||
segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel, iterations=1)
|
||||
#kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 5))
|
||||
#segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel1, iterations=1)
|
||||
|
||||
|
||||
# make box
|
||||
np_contours = np.roll(np.array(np.where(segmap!=0)),1,axis=0).transpose().reshape(-1,2)
|
||||
rectangle = cv2.minAreaRect(np_contours)
|
||||
box = cv2.boxPoints(rectangle)
|
||||
|
||||
# align diamond-shape
|
||||
w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2])
|
||||
box_ratio = max(w, h) / (min(w, h) + 1e-5)
|
||||
if abs(1 - box_ratio) <= 0.1:
|
||||
l, r = min(np_contours[:,0]), max(np_contours[:,0])
|
||||
t, b = min(np_contours[:,1]), max(np_contours[:,1])
|
||||
box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32)
|
||||
|
||||
# make clock-wise order
|
||||
startidx = box.sum(axis=1).argmin()
|
||||
box = np.roll(box, 4-startidx, 0)
|
||||
box = np.array(box)
|
||||
|
||||
det.append(box)
|
||||
mapper.append(k)
|
||||
|
||||
return det, labels, mapper
|
||||
|
||||
def getPoly_core(boxes, labels, mapper, linkmap):
|
||||
# configs
|
||||
num_cp = 5
|
||||
max_len_ratio = 0.7
|
||||
expand_ratio = 1.45
|
||||
max_r = 2.0
|
||||
step_r = 0.2
|
||||
|
||||
polys = []
|
||||
for k, box in enumerate(boxes):
|
||||
# size filter for small instance
|
||||
w, h = int(np.linalg.norm(box[0] - box[1]) + 1), int(np.linalg.norm(box[1] - box[2]) + 1)
|
||||
if w < 30 or h < 30:
|
||||
polys.append(None); continue
|
||||
|
||||
# warp image
|
||||
tar = np.float32([[0,0],[w,0],[w,h],[0,h]])
|
||||
M = cv2.getPerspectiveTransform(box, tar)
|
||||
word_label = cv2.warpPerspective(labels, M, (w, h), flags=cv2.INTER_NEAREST)
|
||||
try:
|
||||
Minv = np.linalg.inv(M)
|
||||
except:
|
||||
polys.append(None); continue
|
||||
|
||||
# binarization for selected label
|
||||
cur_label = mapper[k]
|
||||
word_label[word_label != cur_label] = 0
|
||||
word_label[word_label > 0] = 1
|
||||
|
||||
""" Polygon generation """
|
||||
# find top/bottom contours
|
||||
cp = []
|
||||
max_len = -1
|
||||
for i in range(w):
|
||||
region = np.where(word_label[:,i] != 0)[0]
|
||||
if len(region) < 2 : continue
|
||||
cp.append((i, region[0], region[-1]))
|
||||
length = region[-1] - region[0] + 1
|
||||
if length > max_len: max_len = length
|
||||
|
||||
# pass if max_len is similar to h
|
||||
if h * max_len_ratio < max_len:
|
||||
polys.append(None); continue
|
||||
|
||||
# get pivot points with fixed length
|
||||
tot_seg = num_cp * 2 + 1
|
||||
seg_w = w / tot_seg # segment width
|
||||
pp = [None] * num_cp # init pivot points
|
||||
cp_section = [[0, 0]] * tot_seg
|
||||
seg_height = [0] * num_cp
|
||||
seg_num = 0
|
||||
num_sec = 0
|
||||
prev_h = -1
|
||||
for i in range(0,len(cp)):
|
||||
(x, sy, ey) = cp[i]
|
||||
if (seg_num + 1) * seg_w <= x and seg_num <= tot_seg:
|
||||
# average previous segment
|
||||
if num_sec == 0: break
|
||||
cp_section[seg_num] = [cp_section[seg_num][0] / num_sec, cp_section[seg_num][1] / num_sec]
|
||||
num_sec = 0
|
||||
|
||||
# reset variables
|
||||
seg_num += 1
|
||||
prev_h = -1
|
||||
|
||||
# accumulate center points
|
||||
cy = (sy + ey) * 0.5
|
||||
cur_h = ey - sy + 1
|
||||
cp_section[seg_num] = [cp_section[seg_num][0] + x, cp_section[seg_num][1] + cy]
|
||||
num_sec += 1
|
||||
|
||||
if seg_num % 2 == 0: continue # No polygon area
|
||||
|
||||
if prev_h < cur_h:
|
||||
pp[int((seg_num - 1)/2)] = (x, cy)
|
||||
seg_height[int((seg_num - 1)/2)] = cur_h
|
||||
prev_h = cur_h
|
||||
|
||||
# processing last segment
|
||||
if num_sec != 0:
|
||||
cp_section[-1] = [cp_section[-1][0] / num_sec, cp_section[-1][1] / num_sec]
|
||||
|
||||
# pass if num of pivots is not sufficient or segment widh is smaller than character height
|
||||
if None in pp or seg_w < np.max(seg_height) * 0.25:
|
||||
polys.append(None); continue
|
||||
|
||||
# calc median maximum of pivot points
|
||||
half_char_h = np.median(seg_height) * expand_ratio / 2
|
||||
|
||||
# calc gradiant and apply to make horizontal pivots
|
||||
new_pp = []
|
||||
for i, (x, cy) in enumerate(pp):
|
||||
dx = cp_section[i * 2 + 2][0] - cp_section[i * 2][0]
|
||||
dy = cp_section[i * 2 + 2][1] - cp_section[i * 2][1]
|
||||
if dx == 0: # gradient if zero
|
||||
new_pp.append([x, cy - half_char_h, x, cy + half_char_h])
|
||||
continue
|
||||
rad = - math.atan2(dy, dx)
|
||||
c, s = half_char_h * math.cos(rad), half_char_h * math.sin(rad)
|
||||
new_pp.append([x - s, cy - c, x + s, cy + c])
|
||||
|
||||
# get edge points to cover character heatmaps
|
||||
isSppFound, isEppFound = False, False
|
||||
grad_s = (pp[1][1] - pp[0][1]) / (pp[1][0] - pp[0][0]) + (pp[2][1] - pp[1][1]) / (pp[2][0] - pp[1][0])
|
||||
grad_e = (pp[-2][1] - pp[-1][1]) / (pp[-2][0] - pp[-1][0]) + (pp[-3][1] - pp[-2][1]) / (pp[-3][0] - pp[-2][0])
|
||||
for r in np.arange(0.5, max_r, step_r):
|
||||
dx = 2 * half_char_h * r
|
||||
if not isSppFound:
|
||||
line_img = np.zeros(word_label.shape, dtype=np.uint8)
|
||||
dy = grad_s * dx
|
||||
p = np.array(new_pp[0]) - np.array([dx, dy, dx, dy])
|
||||
cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
|
||||
if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
|
||||
spp = p
|
||||
isSppFound = True
|
||||
if not isEppFound:
|
||||
line_img = np.zeros(word_label.shape, dtype=np.uint8)
|
||||
dy = grad_e * dx
|
||||
p = np.array(new_pp[-1]) + np.array([dx, dy, dx, dy])
|
||||
cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
|
||||
if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
|
||||
epp = p
|
||||
isEppFound = True
|
||||
if isSppFound and isEppFound:
|
||||
break
|
||||
|
||||
# pass if boundary of polygon is not found
|
||||
if not (isSppFound and isEppFound):
|
||||
polys.append(None); continue
|
||||
|
||||
# make final polygon
|
||||
poly = []
|
||||
poly.append(warpCoord(Minv, (spp[0], spp[1])))
|
||||
for p in new_pp:
|
||||
poly.append(warpCoord(Minv, (p[0], p[1])))
|
||||
poly.append(warpCoord(Minv, (epp[0], epp[1])))
|
||||
poly.append(warpCoord(Minv, (epp[2], epp[3])))
|
||||
for p in reversed(new_pp):
|
||||
poly.append(warpCoord(Minv, (p[2], p[3])))
|
||||
poly.append(warpCoord(Minv, (spp[2], spp[3])))
|
||||
|
||||
# add to final result
|
||||
polys.append(np.array(poly))
|
||||
|
||||
return polys
|
||||
|
||||
def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly=False):
|
||||
boxes, labels, mapper = getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text)
|
||||
|
||||
if poly:
|
||||
polys = getPoly_core(boxes, labels, mapper, linkmap)
|
||||
else:
|
||||
polys = [None] * len(boxes)
|
||||
|
||||
return boxes, polys
|
||||
|
||||
def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net = 2):
|
||||
if len(polys) > 0:
|
||||
polys = np.array(polys)
|
||||
for k in range(len(polys)):
|
||||
if polys[k] is not None:
|
||||
polys[k] *= (ratio_w * ratio_net, ratio_h * ratio_net)
|
||||
return polys
|
||||
|
||||
def save_outputs(image, region_scores, affinity_scores, text_threshold, link_threshold,
|
||||
low_text, outoput_path, confidence_mask = None):
|
||||
"""save image, region_scores, and affinity_scores in a single image. region_scores and affinity_scores must be
|
||||
cpu numpy arrays. You can convert GPU Tensors to CPU numpy arrays like this:
|
||||
>>> array = tensor.cpu().data.numpy()
|
||||
When saving outputs of the network during training, make sure you convert ALL tensors (image, region_score,
|
||||
affinity_score) to numpy array first.
|
||||
:param image: numpy array
|
||||
:param region_scores: [] 2D numpy array with each element between 0~1.
|
||||
:param affinity_scores: same as region_scores
|
||||
:param text_threshold: 0 ~ 1. Closer to 0, characters with lower confidence will also be considered a word and be boxed
|
||||
:param link_threshold: 0 ~ 1. Closer to 0, links with lower confidence will also be considered a word and be boxed
|
||||
:param low_text: 0 ~ 1. Closer to 0, boxes will be more loosely drawn.
|
||||
:param outoput_path:
|
||||
:param confidence_mask:
|
||||
:return:
|
||||
"""
|
||||
|
||||
assert region_scores.shape == affinity_scores.shape
|
||||
assert len(image.shape) - 1 == len(region_scores.shape)
|
||||
|
||||
boxes, polys = getDetBoxes(region_scores, affinity_scores, text_threshold, link_threshold,
|
||||
low_text, False)
|
||||
boxes = np.array(boxes, np.int32) * 2
|
||||
if len(boxes) > 0:
|
||||
np.clip(boxes[:, :, 0], 0, image.shape[1])
|
||||
np.clip(boxes[:, :, 1], 0, image.shape[0])
|
||||
for box in boxes:
|
||||
cv2.polylines(image, [np.reshape(box, (-1, 1, 2))], True, (0, 0, 255))
|
||||
|
||||
target_gaussian_heatmap_color = imgproc.cvt2HeatmapImg(region_scores)
|
||||
target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg(affinity_scores)
|
||||
|
||||
if confidence_mask is not None:
|
||||
confidence_mask_gray = imgproc.cvt2HeatmapImg(confidence_mask)
|
||||
gt_scores = np.hstack([target_gaussian_heatmap_color, target_gaussian_affinity_heatmap_color])
|
||||
confidence_mask_gray = np.hstack([np.zeros_like(confidence_mask_gray), confidence_mask_gray])
|
||||
output = np.concatenate([gt_scores, confidence_mask_gray], axis=0)
|
||||
output = np.hstack([image, output])
|
||||
|
||||
else:
|
||||
gt_scores = np.concatenate([target_gaussian_heatmap_color, target_gaussian_affinity_heatmap_color], axis=0)
|
||||
output = np.hstack([image, gt_scores])
|
||||
|
||||
cv2.imwrite(outoput_path, output)
|
||||
return output
|
||||
|
||||
|
||||
def save_outputs_from_tensors(images, region_scores, affinity_scores, text_threshold, link_threshold,
|
||||
low_text, output_dir, image_names, confidence_mask = None):
|
||||
|
||||
"""takes images, region_scores, and affinity_scores as tensors (cab be GPU).
|
||||
:param images: 4D tensor
|
||||
:param region_scores: 3D tensor with values between 0 ~ 1
|
||||
:param affinity_scores: 3D tensor with values between 0 ~ 1
|
||||
:param text_threshold:
|
||||
:param link_threshold:
|
||||
:param low_text:
|
||||
:param output_dir: direcotry to save the output images. Will be joined with base names of image_names
|
||||
:param image_names: names of each image. Doesn't have to be the base name (image file names)
|
||||
:param confidence_mask:
|
||||
:return:
|
||||
"""
|
||||
#import ipdb;ipdb.set_trace()
|
||||
#images = images.cpu().permute(0, 2, 3, 1).contiguous().data.numpy()
|
||||
if type(images) == torch.Tensor:
|
||||
images = np.array(images)
|
||||
|
||||
region_scores = region_scores.cpu().data.numpy()
|
||||
affinity_scores = affinity_scores.cpu().data.numpy()
|
||||
|
||||
batch_size = images.shape[0]
|
||||
assert batch_size == region_scores.shape[0] and batch_size == affinity_scores.shape[0] and batch_size == len(image_names), \
|
||||
"The first dimension (i.e. batch size) of images, region scores, and affinity scores must be equal"
|
||||
|
||||
output_images = []
|
||||
|
||||
for i in range(batch_size):
|
||||
image = images[i]
|
||||
region_score = region_scores[i]
|
||||
affinity_score = affinity_scores[i]
|
||||
|
||||
image_name = os.path.basename(image_names[i])
|
||||
outoput_path = os.path.join(output_dir,image_name)
|
||||
|
||||
output_image = save_outputs(image, region_score, affinity_score, text_threshold, link_threshold,
|
||||
low_text, outoput_path, confidence_mask=confidence_mask)
|
||||
|
||||
output_images.append(output_image)
|
||||
|
||||
return output_images
|
||||
361
trainer/craft/utils/inference_boxes.py
Normal file
361
trainer/craft/utils/inference_boxes.py
Normal file
@@ -0,0 +1,361 @@
|
||||
import os
|
||||
import re
|
||||
import itertools
|
||||
|
||||
import cv2
|
||||
import time
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch.autograd import Variable
|
||||
|
||||
from utils.craft_utils import getDetBoxes, adjustResultCoordinates
|
||||
from data import imgproc
|
||||
from data.dataset import SynthTextDataSet
|
||||
import math
|
||||
import xml.etree.ElementTree as elemTree
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------------------------------------------#
|
||||
def rotatePoint(xc, yc, xp, yp, theta):
|
||||
xoff = xp - xc
|
||||
yoff = yp - yc
|
||||
|
||||
cosTheta = math.cos(theta)
|
||||
sinTheta = math.sin(theta)
|
||||
pResx = cosTheta * xoff + sinTheta * yoff
|
||||
pResy = - sinTheta * xoff + cosTheta * yoff
|
||||
# pRes = (xc + pResx, yc + pResy)
|
||||
return int(xc + pResx), int(yc + pResy)
|
||||
|
||||
def addRotatedShape(cx, cy, w, h, angle):
|
||||
p0x, p0y = rotatePoint(cx, cy, cx - w / 2, cy - h / 2, -angle)
|
||||
p1x, p1y = rotatePoint(cx, cy, cx + w / 2, cy - h / 2, -angle)
|
||||
p2x, p2y = rotatePoint(cx, cy, cx + w / 2, cy + h / 2, -angle)
|
||||
p3x, p3y = rotatePoint(cx, cy, cx - w / 2, cy + h / 2, -angle)
|
||||
|
||||
points = [[p0x, p0y], [p1x, p1y], [p2x, p2y], [p3x, p3y]]
|
||||
|
||||
return points
|
||||
|
||||
def xml_parsing(xml):
|
||||
tree = elemTree.parse(xml)
|
||||
|
||||
annotations = [] # Initialize the list to store labels
|
||||
iter_element = tree.iter(tag="object")
|
||||
|
||||
for element in iter_element:
|
||||
annotation = {} # Initialize the dict to store labels
|
||||
|
||||
annotation['name'] = element.find("name").text # Save the name tag value
|
||||
|
||||
box_coords = element.iter(tag="robndbox")
|
||||
|
||||
for box_coord in box_coords:
|
||||
cx = float(box_coord.find("cx").text)
|
||||
cy = float(box_coord.find("cy").text)
|
||||
w = float(box_coord.find("w").text)
|
||||
h = float(box_coord.find("h").text)
|
||||
angle = float(box_coord.find("angle").text)
|
||||
|
||||
convertcoodi = addRotatedShape(cx, cy, w, h, angle)
|
||||
|
||||
annotation['box_coodi'] = convertcoodi
|
||||
annotations.append(annotation)
|
||||
|
||||
box_coords = element.iter(tag="bndbox")
|
||||
|
||||
for box_coord in box_coords:
|
||||
xmin = int(box_coord.find("xmin").text)
|
||||
ymin = int(box_coord.find("ymin").text)
|
||||
xmax = int(box_coord.find("xmax").text)
|
||||
ymax = int(box_coord.find("ymax").text)
|
||||
# annotation['bndbox'] = [xmin,ymin,xmax,ymax]
|
||||
|
||||
annotation['box_coodi'] = [[xmin, ymin], [xmax, ymin], [xmax, ymax],
|
||||
[xmin, ymax]]
|
||||
annotations.append(annotation)
|
||||
|
||||
|
||||
|
||||
|
||||
bounds = []
|
||||
for i in range(len(annotations)):
|
||||
box_info_dict = {"points": None, "text": None, "ignore": None}
|
||||
|
||||
box_info_dict["points"] = np.array(annotations[i]['box_coodi'])
|
||||
if annotations[i]['name'] == "dnc":
|
||||
box_info_dict["text"] = "###"
|
||||
box_info_dict["ignore"] = True
|
||||
else:
|
||||
box_info_dict["text"] = annotations[i]['name']
|
||||
box_info_dict["ignore"] = False
|
||||
|
||||
bounds.append(box_info_dict)
|
||||
|
||||
|
||||
|
||||
return bounds
|
||||
|
||||
#-------------------------------------------------------------------------------------------------------------------#
|
||||
|
||||
def load_prescription_gt(dataFolder):
|
||||
|
||||
|
||||
total_img_path = []
|
||||
total_imgs_bboxes = []
|
||||
for (root, directories, files) in os.walk(dataFolder):
|
||||
for file in files:
|
||||
if '.jpg' in file:
|
||||
img_path = os.path.join(root, file)
|
||||
total_img_path.append(img_path)
|
||||
if '.xml' in file:
|
||||
gt_path = os.path.join(root, file)
|
||||
total_imgs_bboxes.append(gt_path)
|
||||
|
||||
|
||||
total_imgs_parsing_bboxes = []
|
||||
for img_path, bbox in zip(sorted(total_img_path), sorted(total_imgs_bboxes)):
|
||||
# check file
|
||||
|
||||
assert img_path.split(".jpg")[0] == bbox.split(".xml")[0]
|
||||
|
||||
result_label = xml_parsing(bbox)
|
||||
total_imgs_parsing_bboxes.append(result_label)
|
||||
|
||||
|
||||
return total_imgs_parsing_bboxes, sorted(total_img_path)
|
||||
|
||||
|
||||
# NOTE
|
||||
def load_prescription_cleval_gt(dataFolder):
|
||||
|
||||
|
||||
total_img_path = []
|
||||
total_gt_path = []
|
||||
for (root, directories, files) in os.walk(dataFolder):
|
||||
for file in files:
|
||||
if '.jpg' in file:
|
||||
img_path = os.path.join(root, file)
|
||||
total_img_path.append(img_path)
|
||||
if '_cl.txt' in file:
|
||||
gt_path = os.path.join(root, file)
|
||||
total_gt_path.append(gt_path)
|
||||
|
||||
|
||||
total_imgs_parsing_bboxes = []
|
||||
for img_path, gt_path in zip(sorted(total_img_path), sorted(total_gt_path)):
|
||||
# check file
|
||||
|
||||
assert img_path.split(".jpg")[0] == gt_path.split('_label_cl.txt')[0]
|
||||
|
||||
lines = open(gt_path, encoding="utf-8").readlines()
|
||||
word_bboxes = []
|
||||
|
||||
for line in lines:
|
||||
box_info_dict = {"points": None, "text": None, "ignore": None}
|
||||
box_info = line.strip().encode("utf-8").decode("utf-8-sig").split(",")
|
||||
|
||||
box_points = [int(box_info[i]) for i in range(8)]
|
||||
box_info_dict["points"] = np.array(box_points)
|
||||
|
||||
word_bboxes.append(box_info_dict)
|
||||
total_imgs_parsing_bboxes.append(word_bboxes)
|
||||
|
||||
return total_imgs_parsing_bboxes, sorted(total_img_path)
|
||||
|
||||
|
||||
def load_synthtext_gt(data_folder):
|
||||
|
||||
synth_dataset = SynthTextDataSet(
|
||||
output_size=768, data_dir=data_folder, saved_gt_dir=data_folder, logging=False
|
||||
)
|
||||
img_names, img_bbox, img_words = synth_dataset.load_data(bbox="word")
|
||||
|
||||
total_img_path = []
|
||||
total_imgs_bboxes = []
|
||||
for index in range(len(img_bbox[:100])):
|
||||
img_path = os.path.join(data_folder, img_names[index][0])
|
||||
total_img_path.append(img_path)
|
||||
try:
|
||||
wordbox = img_bbox[index].transpose((2, 1, 0))
|
||||
except:
|
||||
wordbox = np.expand_dims(img_bbox[index], axis=0)
|
||||
wordbox = wordbox.transpose((0, 2, 1))
|
||||
|
||||
words = [re.split(" \n|\n |\n| ", t.strip()) for t in img_words[index]]
|
||||
words = list(itertools.chain(*words))
|
||||
words = [t for t in words if len(t) > 0]
|
||||
|
||||
if len(words) != len(wordbox):
|
||||
import ipdb
|
||||
|
||||
ipdb.set_trace()
|
||||
|
||||
single_img_bboxes = []
|
||||
for j in range(len(words)):
|
||||
box_info_dict = {"points": None, "text": None, "ignore": None}
|
||||
box_info_dict["points"] = wordbox[j]
|
||||
box_info_dict["text"] = words[j]
|
||||
box_info_dict["ignore"] = False
|
||||
single_img_bboxes.append(box_info_dict)
|
||||
|
||||
total_imgs_bboxes.append(single_img_bboxes)
|
||||
|
||||
return total_imgs_bboxes, total_img_path
|
||||
|
||||
|
||||
def load_icdar2015_gt(dataFolder, isTraing=False):
|
||||
if isTraing:
|
||||
img_folderName = "ch4_training_images"
|
||||
gt_folderName = "ch4_training_localization_transcription_gt"
|
||||
else:
|
||||
img_folderName = "ch4_test_images"
|
||||
gt_folderName = "ch4_test_localization_transcription_gt"
|
||||
|
||||
gt_folder_path = os.listdir(os.path.join(dataFolder, gt_folderName))
|
||||
total_imgs_bboxes = []
|
||||
total_img_path = []
|
||||
for gt_path in gt_folder_path:
|
||||
gt_path = os.path.join(os.path.join(dataFolder, gt_folderName), gt_path)
|
||||
img_path = (
|
||||
gt_path.replace(gt_folderName, img_folderName)
|
||||
.replace(".txt", ".jpg")
|
||||
.replace("gt_", "")
|
||||
)
|
||||
image = cv2.imread(img_path)
|
||||
lines = open(gt_path, encoding="utf-8").readlines()
|
||||
single_img_bboxes = []
|
||||
for line in lines:
|
||||
box_info_dict = {"points": None, "text": None, "ignore": None}
|
||||
|
||||
box_info = line.strip().encode("utf-8").decode("utf-8-sig").split(",")
|
||||
box_points = [int(box_info[j]) for j in range(8)]
|
||||
word = box_info[8:]
|
||||
word = ",".join(word)
|
||||
box_points = np.array(box_points, np.int32).reshape(4, 2)
|
||||
cv2.polylines(
|
||||
image, [np.array(box_points).astype(np.int)], True, (0, 0, 255), 1
|
||||
)
|
||||
box_info_dict["points"] = box_points
|
||||
box_info_dict["text"] = word
|
||||
if word == "###":
|
||||
box_info_dict["ignore"] = True
|
||||
else:
|
||||
box_info_dict["ignore"] = False
|
||||
|
||||
single_img_bboxes.append(box_info_dict)
|
||||
total_imgs_bboxes.append(single_img_bboxes)
|
||||
total_img_path.append(img_path)
|
||||
return total_imgs_bboxes, total_img_path
|
||||
|
||||
|
||||
def load_icdar2013_gt(dataFolder, isTraing=False):
|
||||
|
||||
# choose test dataset
|
||||
if isTraing:
|
||||
img_folderName = "Challenge2_Test_Task12_Images"
|
||||
gt_folderName = "Challenge2_Test_Task1_GT"
|
||||
else:
|
||||
img_folderName = "Challenge2_Test_Task12_Images"
|
||||
gt_folderName = "Challenge2_Test_Task1_GT"
|
||||
|
||||
gt_folder_path = os.listdir(os.path.join(dataFolder, gt_folderName))
|
||||
|
||||
total_imgs_bboxes = []
|
||||
total_img_path = []
|
||||
for gt_path in gt_folder_path:
|
||||
gt_path = os.path.join(os.path.join(dataFolder, gt_folderName), gt_path)
|
||||
img_path = (
|
||||
gt_path.replace(gt_folderName, img_folderName)
|
||||
.replace(".txt", ".jpg")
|
||||
.replace("gt_", "")
|
||||
)
|
||||
image = cv2.imread(img_path)
|
||||
lines = open(gt_path, encoding="utf-8").readlines()
|
||||
single_img_bboxes = []
|
||||
for line in lines:
|
||||
box_info_dict = {"points": None, "text": None, "ignore": None}
|
||||
|
||||
box_info = line.strip().encode("utf-8").decode("utf-8-sig").split(",")
|
||||
box = [int(box_info[j]) for j in range(4)]
|
||||
word = box_info[4:]
|
||||
word = ",".join(word)
|
||||
box = [
|
||||
[box[0], box[1]],
|
||||
[box[2], box[1]],
|
||||
[box[2], box[3]],
|
||||
[box[0], box[3]],
|
||||
]
|
||||
|
||||
box_info_dict["points"] = box
|
||||
box_info_dict["text"] = word
|
||||
if word == "###":
|
||||
box_info_dict["ignore"] = True
|
||||
else:
|
||||
box_info_dict["ignore"] = False
|
||||
|
||||
single_img_bboxes.append(box_info_dict)
|
||||
|
||||
total_imgs_bboxes.append(single_img_bboxes)
|
||||
total_img_path.append(img_path)
|
||||
|
||||
return total_imgs_bboxes, total_img_path
|
||||
|
||||
|
||||
def test_net(
|
||||
net,
|
||||
image,
|
||||
text_threshold,
|
||||
link_threshold,
|
||||
low_text,
|
||||
cuda,
|
||||
poly,
|
||||
canvas_size=1280,
|
||||
mag_ratio=1.5,
|
||||
):
|
||||
# resize
|
||||
|
||||
img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
|
||||
image, canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio
|
||||
)
|
||||
ratio_h = ratio_w = 1 / target_ratio
|
||||
|
||||
# preprocessing
|
||||
x = imgproc.normalizeMeanVariance(img_resized)
|
||||
x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w]
|
||||
x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w]
|
||||
if cuda:
|
||||
x = x.cuda()
|
||||
|
||||
# forward pass
|
||||
with torch.no_grad():
|
||||
y, feature = net(x)
|
||||
|
||||
# make score and link map
|
||||
score_text = y[0, :, :, 0].cpu().data.numpy().astype(np.float32)
|
||||
score_link = y[0, :, :, 1].cpu().data.numpy().astype(np.float32)
|
||||
|
||||
# NOTE
|
||||
score_text = score_text[: size_heatmap[0], : size_heatmap[1]]
|
||||
score_link = score_link[: size_heatmap[0], : size_heatmap[1]]
|
||||
|
||||
# Post-processing
|
||||
boxes, polys = getDetBoxes(
|
||||
score_text, score_link, text_threshold, link_threshold, low_text, poly
|
||||
)
|
||||
|
||||
# coordinate adjustment
|
||||
boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
|
||||
polys = adjustResultCoordinates(polys, ratio_w, ratio_h)
|
||||
for k in range(len(polys)):
|
||||
if polys[k] is None:
|
||||
polys[k] = boxes[k]
|
||||
|
||||
# render results (optional)
|
||||
score_text = score_text.copy()
|
||||
render_score_text = imgproc.cvt2HeatmapImg(score_text)
|
||||
render_score_link = imgproc.cvt2HeatmapImg(score_link)
|
||||
render_img = [render_score_text, render_score_link]
|
||||
# ret_score_text = imgproc.cvt2HeatmapImg(render_img)
|
||||
|
||||
return boxes, polys, render_img
|
||||
142
trainer/craft/utils/util.py
Normal file
142
trainer/craft/utils/util.py
Normal file
@@ -0,0 +1,142 @@
|
||||
from collections import OrderedDict
|
||||
import os
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from data import imgproc
|
||||
from utils import craft_utils
|
||||
|
||||
|
||||
def copyStateDict(state_dict):
|
||||
if list(state_dict.keys())[0].startswith("module"):
|
||||
start_idx = 1
|
||||
else:
|
||||
start_idx = 0
|
||||
new_state_dict = OrderedDict()
|
||||
for k, v in state_dict.items():
|
||||
name = ".".join(k.split(".")[start_idx:])
|
||||
new_state_dict[name] = v
|
||||
return new_state_dict
|
||||
|
||||
|
||||
def saveInput(
|
||||
imagename, vis_dir, image, region_scores, affinity_scores, confidence_mask
|
||||
):
|
||||
image = np.uint8(image.copy())
|
||||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
|
||||
boxes, polys = craft_utils.getDetBoxes(
|
||||
region_scores, affinity_scores, 0.85, 0.2, 0.5, False
|
||||
)
|
||||
|
||||
if image.shape[0] / region_scores.shape[0] >= 2:
|
||||
boxes = np.array(boxes, np.int32) * 2
|
||||
else:
|
||||
boxes = np.array(boxes, np.int32)
|
||||
|
||||
if len(boxes) > 0:
|
||||
np.clip(boxes[:, :, 0], 0, image.shape[1])
|
||||
np.clip(boxes[:, :, 1], 0, image.shape[0])
|
||||
for box in boxes:
|
||||
cv2.polylines(image, [np.reshape(box, (-1, 1, 2))], True, (0, 0, 255))
|
||||
target_gaussian_heatmap_color = imgproc.cvt2HeatmapImg(region_scores)
|
||||
target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg(affinity_scores)
|
||||
confidence_mask_gray = imgproc.cvt2HeatmapImg(confidence_mask)
|
||||
|
||||
# overlay
|
||||
height, width, channel = image.shape
|
||||
overlay_region = cv2.resize(target_gaussian_heatmap_color, (width, height))
|
||||
overlay_aff = cv2.resize(target_gaussian_affinity_heatmap_color, (width, height))
|
||||
confidence_mask_gray = cv2.resize(
|
||||
confidence_mask_gray, (width, height), interpolation=cv2.INTER_NEAREST
|
||||
)
|
||||
overlay_region = cv2.addWeighted(image, 0.4, overlay_region, 0.6, 5)
|
||||
overlay_aff = cv2.addWeighted(image, 0.4, overlay_aff, 0.7, 6)
|
||||
|
||||
gt_scores = np.concatenate([overlay_region, overlay_aff], axis=1)
|
||||
|
||||
output = np.concatenate([gt_scores, confidence_mask_gray], axis=1)
|
||||
|
||||
output = np.hstack([image, output])
|
||||
|
||||
# synthtext
|
||||
if type(imagename) is not str:
|
||||
imagename = imagename[0].split("/")[-1][:-4]
|
||||
|
||||
outpath = vis_dir + f"/{imagename}_input.jpg"
|
||||
if not os.path.exists(os.path.dirname(outpath)):
|
||||
os.makedirs(os.path.dirname(outpath), exist_ok=True)
|
||||
cv2.imwrite(outpath, output)
|
||||
# print(f'Logging train input into {outpath}')
|
||||
|
||||
|
||||
def saveImage(
|
||||
imagename,
|
||||
vis_dir,
|
||||
image,
|
||||
bboxes,
|
||||
affi_bboxes,
|
||||
region_scores,
|
||||
affinity_scores,
|
||||
confidence_mask,
|
||||
):
|
||||
output_image = np.uint8(image.copy())
|
||||
output_image = cv2.cvtColor(output_image, cv2.COLOR_RGB2BGR)
|
||||
if len(bboxes) > 0:
|
||||
for i in range(len(bboxes)):
|
||||
_bboxes = np.int32(bboxes[i])
|
||||
for j in range(_bboxes.shape[0]):
|
||||
cv2.polylines(
|
||||
output_image,
|
||||
[np.reshape(_bboxes[j], (-1, 1, 2))],
|
||||
True,
|
||||
(0, 0, 255),
|
||||
)
|
||||
|
||||
for i in range(len(affi_bboxes)):
|
||||
cv2.polylines(
|
||||
output_image,
|
||||
[np.reshape(affi_bboxes[i].astype(np.int32), (-1, 1, 2))],
|
||||
True,
|
||||
(255, 0, 0),
|
||||
)
|
||||
|
||||
target_gaussian_heatmap_color = imgproc.cvt2HeatmapImg(region_scores)
|
||||
target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg(affinity_scores)
|
||||
confidence_mask_gray = imgproc.cvt2HeatmapImg(confidence_mask)
|
||||
|
||||
# overlay
|
||||
height, width, channel = image.shape
|
||||
overlay_region = cv2.resize(target_gaussian_heatmap_color, (width, height))
|
||||
overlay_aff = cv2.resize(target_gaussian_affinity_heatmap_color, (width, height))
|
||||
|
||||
overlay_region = cv2.addWeighted(image.copy(), 0.4, overlay_region, 0.6, 5)
|
||||
overlay_aff = cv2.addWeighted(image.copy(), 0.4, overlay_aff, 0.6, 5)
|
||||
|
||||
heat_map = np.concatenate([overlay_region, overlay_aff], axis=1)
|
||||
|
||||
# synthtext
|
||||
if type(imagename) is not str:
|
||||
imagename = imagename[0].split("/")[-1][:-4]
|
||||
|
||||
output = np.concatenate([output_image, heat_map, confidence_mask_gray], axis=1)
|
||||
outpath = vis_dir + f"/{imagename}.jpg"
|
||||
if not os.path.exists(os.path.dirname(outpath)):
|
||||
os.makedirs(os.path.dirname(outpath), exist_ok=True)
|
||||
|
||||
cv2.imwrite(outpath, output)
|
||||
# print(f'Logging original image into {outpath}')
|
||||
|
||||
|
||||
def save_parser(args):
|
||||
|
||||
""" final options """
|
||||
with open(f"{args.results_dir}/opt.txt", "a", encoding="utf-8") as opt_file:
|
||||
opt_log = "------------ Options -------------\n"
|
||||
arg = vars(args)
|
||||
for k, v in arg.items():
|
||||
opt_log += f"{str(k)}: {str(v)}\n"
|
||||
opt_log += "---------------------------------------\n"
|
||||
print(opt_log)
|
||||
opt_file.write(opt_log)
|
||||
Reference in New Issue
Block a user