4digit_training done
This commit is contained in:
@@ -121,6 +121,9 @@ def hierarchical_dataset(root, opt, select_data='/'):
|
||||
print(dataset_log)
|
||||
dataset_log += '\n'
|
||||
for dirpath, dirnames, filenames in os.walk(root+'/'):
|
||||
print(f"dirpath : {dirpath}")
|
||||
print(f"dirnames : {dirnames}")
|
||||
|
||||
if not dirnames:
|
||||
select_flag = False
|
||||
for selected_d in select_data:
|
||||
@@ -146,7 +149,7 @@ class OCRDataset(Dataset):
|
||||
self.root = root
|
||||
self.opt = opt
|
||||
print(root)
|
||||
self.df = pd.read_csv(os.path.join(root,'labels.csv'), sep='^([^,]+),', engine='python', usecols=['filename', 'words'], keep_default_na=False)
|
||||
self.df = pd.read_csv(os.path.join(root,'labels.csv'), sep='^([^,]+),',dtype={'words': str}, engine='python', usecols=['filename', 'words'], keep_default_na=False)
|
||||
self.nSamples = len(self.df)
|
||||
|
||||
if self.opt.data_filtering_off:
|
||||
@@ -159,7 +162,7 @@ class OCRDataset(Dataset):
|
||||
if len(label) > self.opt.batch_max_length:
|
||||
continue
|
||||
except:
|
||||
print(label)
|
||||
print(f"type of label {type(label)} \n {label}")
|
||||
out_of_char = f'[^{self.opt.character}]'
|
||||
if re.search(out_of_char, label.lower()):
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user