easyocr/trainer/craft/utils/inference_boxes.py

import os
import re
import itertools

import cv2
import time
import numpy as np
import torch
from torch.autograd import Variable

from utils.craft_utils import getDetBoxes, adjustResultCoordinates
from data import imgproc
from data.dataset import SynthTextDataSet
import math
import xml.etree.ElementTree as elemTree


#-------------------------------------------------------------------------------------------------------------------#
def rotatePoint(xc, yc, xp, yp, theta):
    xoff = xp - xc
    yoff = yp - yc

    cosTheta = math.cos(theta)
    sinTheta = math.sin(theta)
    pResx = cosTheta * xoff + sinTheta * yoff
    pResy = - sinTheta * xoff + cosTheta * yoff
    # pRes = (xc + pResx, yc + pResy)
    return int(xc + pResx), int(yc + pResy)

def addRotatedShape(cx, cy, w, h, angle):
    p0x, p0y = rotatePoint(cx, cy, cx - w / 2, cy - h / 2, -angle)
    p1x, p1y = rotatePoint(cx, cy, cx + w / 2, cy - h / 2, -angle)
    p2x, p2y = rotatePoint(cx, cy, cx + w / 2, cy + h / 2, -angle)
    p3x, p3y = rotatePoint(cx, cy, cx - w / 2, cy + h / 2, -angle)

    points = [[p0x, p0y], [p1x, p1y], [p2x, p2y], [p3x, p3y]]

    return points

def xml_parsing(xml):
    tree = elemTree.parse(xml)

    annotations = []  # Initialize the list to store labels
    iter_element = tree.iter(tag="object")

    for element in iter_element:
        annotation = {}  # Initialize the dict to store labels

        annotation['name'] = element.find("name").text  # Save the name tag value

        box_coords = element.iter(tag="robndbox")

        for box_coord in box_coords:
            cx = float(box_coord.find("cx").text)
            cy = float(box_coord.find("cy").text)
            w = float(box_coord.find("w").text)
            h = float(box_coord.find("h").text)
            angle = float(box_coord.find("angle").text)

            convertcoodi = addRotatedShape(cx, cy, w, h, angle)

            annotation['box_coodi'] = convertcoodi
            annotations.append(annotation)

        box_coords = element.iter(tag="bndbox")

        for box_coord in box_coords:
            xmin = int(box_coord.find("xmin").text)
            ymin = int(box_coord.find("ymin").text)
            xmax = int(box_coord.find("xmax").text)
            ymax = int(box_coord.find("ymax").text)
            # annotation['bndbox'] = [xmin,ymin,xmax,ymax]

            annotation['box_coodi'] = [[xmin, ymin], [xmax, ymin], [xmax, ymax],
                                       [xmin, ymax]]
            annotations.append(annotation)


    bounds = []
    for i in range(len(annotations)):
        box_info_dict = {"points": None, "text": None, "ignore": None}

        box_info_dict["points"] = np.array(annotations[i]['box_coodi'])
        if annotations[i]['name'] == "dnc":
            box_info_dict["text"] = "###"
            box_info_dict["ignore"] = True
        else:
            box_info_dict["text"] = annotations[i]['name']
            box_info_dict["ignore"] = False

        bounds.append(box_info_dict)


    return bounds

#-------------------------------------------------------------------------------------------------------------------#

def load_prescription_gt(dataFolder):


    total_img_path = []
    total_imgs_bboxes = []
    for (root, directories, files) in os.walk(dataFolder):
        for file in files:
            if '.jpg' in file:
                img_path = os.path.join(root, file)
                total_img_path.append(img_path)
            if '.xml' in file:
                gt_path = os.path.join(root, file)
                total_imgs_bboxes.append(gt_path)


    total_imgs_parsing_bboxes = []
    for img_path, bbox in zip(sorted(total_img_path), sorted(total_imgs_bboxes)):
        # check file

        assert img_path.split(".jpg")[0] == bbox.split(".xml")[0]

        result_label = xml_parsing(bbox)
        total_imgs_parsing_bboxes.append(result_label)


    return total_imgs_parsing_bboxes, sorted(total_img_path)


# NOTE
def load_prescription_cleval_gt(dataFolder):


    total_img_path = []
    total_gt_path = []
    for (root, directories, files) in os.walk(dataFolder):
        for file in files:
            if '.jpg' in file:
                img_path = os.path.join(root, file)
                total_img_path.append(img_path)
            if '_cl.txt' in file:
                gt_path = os.path.join(root, file)
                total_gt_path.append(gt_path)


    total_imgs_parsing_bboxes = []
    for img_path, gt_path in zip(sorted(total_img_path), sorted(total_gt_path)):
        # check file

        assert img_path.split(".jpg")[0] == gt_path.split('_label_cl.txt')[0]

        lines = open(gt_path, encoding="utf-8").readlines()
        word_bboxes = []

        for line in lines:
            box_info_dict = {"points": None, "text": None, "ignore": None}
            box_info = line.strip().encode("utf-8").decode("utf-8-sig").split(",")

            box_points = [int(box_info[i]) for i in range(8)]
            box_info_dict["points"] = np.array(box_points)

            word_bboxes.append(box_info_dict)
        total_imgs_parsing_bboxes.append(word_bboxes)

    return total_imgs_parsing_bboxes, sorted(total_img_path)


def load_synthtext_gt(data_folder):

    synth_dataset = SynthTextDataSet(
        output_size=768, data_dir=data_folder, saved_gt_dir=data_folder, logging=False
    )
    img_names, img_bbox, img_words = synth_dataset.load_data(bbox="word")

    total_img_path = []
    total_imgs_bboxes = []
    for index in range(len(img_bbox[:100])):
        img_path = os.path.join(data_folder, img_names[index][0])
        total_img_path.append(img_path)
        try:
            wordbox = img_bbox[index].transpose((2, 1, 0))
        except:
            wordbox = np.expand_dims(img_bbox[index], axis=0)
            wordbox = wordbox.transpose((0, 2, 1))

        words = [re.split(" \n|\n |\n| ", t.strip()) for t in img_words[index]]
        words = list(itertools.chain(*words))
        words = [t for t in words if len(t) > 0]

        if len(words) != len(wordbox):
            import ipdb

            ipdb.set_trace()

        single_img_bboxes = []
        for j in range(len(words)):
            box_info_dict = {"points": None, "text": None, "ignore": None}
            box_info_dict["points"] = wordbox[j]
            box_info_dict["text"] = words[j]
            box_info_dict["ignore"] = False
            single_img_bboxes.append(box_info_dict)

        total_imgs_bboxes.append(single_img_bboxes)

    return total_imgs_bboxes, total_img_path


def load_icdar2015_gt(dataFolder, isTraing=False):
    if isTraing:
        img_folderName = "ch4_training_images"
        gt_folderName = "ch4_training_localization_transcription_gt"
    else:
        img_folderName = "ch4_test_images"
        gt_folderName = "ch4_test_localization_transcription_gt"

    gt_folder_path = os.listdir(os.path.join(dataFolder, gt_folderName))
    total_imgs_bboxes = []
    total_img_path = []
    for gt_path in gt_folder_path:
        gt_path = os.path.join(os.path.join(dataFolder, gt_folderName), gt_path)
        img_path = (
            gt_path.replace(gt_folderName, img_folderName)
            .replace(".txt", ".jpg")
            .replace("gt_", "")
        )
        image = cv2.imread(img_path)
        lines = open(gt_path, encoding="utf-8").readlines()
        single_img_bboxes = []
        for line in lines:
            box_info_dict = {"points": None, "text": None, "ignore": None}

            box_info = line.strip().encode("utf-8").decode("utf-8-sig").split(",")
            box_points = [int(box_info[j]) for j in range(8)]
            word = box_info[8:]
            word = ",".join(word)
            box_points = np.array(box_points, np.int32).reshape(4, 2)
            cv2.polylines(
                image, [np.array(box_points).astype(np.int)], True, (0, 0, 255), 1
            )
            box_info_dict["points"] = box_points
            box_info_dict["text"] = word
            if word == "###":
                box_info_dict["ignore"] = True
            else:
                box_info_dict["ignore"] = False

            single_img_bboxes.append(box_info_dict)
        total_imgs_bboxes.append(single_img_bboxes)
        total_img_path.append(img_path)
    return total_imgs_bboxes, total_img_path


def load_icdar2013_gt(dataFolder, isTraing=False):

    # choose test dataset
    if isTraing:
        img_folderName = "Challenge2_Test_Task12_Images"
        gt_folderName = "Challenge2_Test_Task1_GT"
    else:
        img_folderName = "Challenge2_Test_Task12_Images"
        gt_folderName = "Challenge2_Test_Task1_GT"

    gt_folder_path = os.listdir(os.path.join(dataFolder, gt_folderName))

    total_imgs_bboxes = []
    total_img_path = []
    for gt_path in gt_folder_path:
        gt_path = os.path.join(os.path.join(dataFolder, gt_folderName), gt_path)
        img_path = (
            gt_path.replace(gt_folderName, img_folderName)
            .replace(".txt", ".jpg")
            .replace("gt_", "")
        )
        image = cv2.imread(img_path)
        lines = open(gt_path, encoding="utf-8").readlines()
        single_img_bboxes = []
        for line in lines:
            box_info_dict = {"points": None, "text": None, "ignore": None}

            box_info = line.strip().encode("utf-8").decode("utf-8-sig").split(",")
            box = [int(box_info[j]) for j in range(4)]
            word = box_info[4:]
            word = ",".join(word)
            box = [
                [box[0], box[1]],
                [box[2], box[1]],
                [box[2], box[3]],
                [box[0], box[3]],
            ]

            box_info_dict["points"] = box
            box_info_dict["text"] = word
            if word == "###":
                box_info_dict["ignore"] = True
            else:
                box_info_dict["ignore"] = False

            single_img_bboxes.append(box_info_dict)

        total_imgs_bboxes.append(single_img_bboxes)
        total_img_path.append(img_path)

    return total_imgs_bboxes, total_img_path


def test_net(
    net,
    image,
    text_threshold,
    link_threshold,
    low_text,
    cuda,
    poly,
    canvas_size=1280,
    mag_ratio=1.5,
):
    # resize

    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image, canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio
    )
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy().astype(np.float32)
    score_link = y[0, :, :, 1].cpu().data.numpy().astype(np.float32)

    # NOTE
    score_text = score_text[: size_heatmap[0], : size_heatmap[1]]
    score_link = score_link[: size_heatmap[0], : size_heatmap[1]]

    # Post-processing
    boxes, polys = getDetBoxes(
        score_text, score_link, text_threshold, link_threshold, low_text, poly
    )

    # coordinate adjustment
    boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None:
            polys[k] = boxes[k]

    # render results (optional)
    score_text = score_text.copy()
    render_score_text = imgproc.cvt2HeatmapImg(score_text)
    render_score_link = imgproc.cvt2HeatmapImg(score_link)
    render_img = [render_score_text, render_score_link]
    # ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    return boxes, polys, render_img