4digit_training done
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
number: '0123456789'
|
||||
experiment_name: '4digit'
|
||||
symbol: ""
|
||||
lang_char: ''
|
||||
symbol: None
|
||||
lang_char: None
|
||||
train_data: 'all_data'
|
||||
valid_data: 'all_data/4digit_valid'
|
||||
manualSeed: 1111
|
||||
@@ -9,8 +9,7 @@ workers: 6
|
||||
batch_size: 32 #32
|
||||
num_iter: 3000
|
||||
valInterval: 5
|
||||
# saved_model: '' #'saved_models/en_filtered/iter_300000.pth'
|
||||
svaed_model: 'saved_models/4digit/iter_3000.pth'
|
||||
saved_model: '' #'saved_models/en_filtered/iter_300000.pth'
|
||||
FT: False
|
||||
optim: False # default is Adadelta
|
||||
lr: 1.
|
||||
|
||||
@@ -31,7 +31,7 @@ PAD: True
|
||||
contrast_adjust: 0.0
|
||||
data_filtering_off: False
|
||||
# Model Architecture
|
||||
Transformation: 'None'
|
||||
Transformation: 'TPS'
|
||||
FeatureExtraction: 'ResNet'
|
||||
SequenceModeling: 'BiLSTM'
|
||||
Prediction: 'CTC'
|
||||
|
||||
@@ -121,6 +121,9 @@ def hierarchical_dataset(root, opt, select_data='/'):
|
||||
print(dataset_log)
|
||||
dataset_log += '\n'
|
||||
for dirpath, dirnames, filenames in os.walk(root+'/'):
|
||||
print(f"dirpath : {dirpath}")
|
||||
print(f"dirnames : {dirnames}")
|
||||
|
||||
if not dirnames:
|
||||
select_flag = False
|
||||
for selected_d in select_data:
|
||||
@@ -146,7 +149,7 @@ class OCRDataset(Dataset):
|
||||
self.root = root
|
||||
self.opt = opt
|
||||
print(root)
|
||||
self.df = pd.read_csv(os.path.join(root,'labels.csv'), sep='^([^,]+),', engine='python', usecols=['filename', 'words'], keep_default_na=False)
|
||||
self.df = pd.read_csv(os.path.join(root,'labels.csv'), sep='^([^,]+),',dtype={'words': str}, engine='python', usecols=['filename', 'words'], keep_default_na=False)
|
||||
self.nSamples = len(self.df)
|
||||
|
||||
if self.opt.data_filtering_off:
|
||||
@@ -159,7 +162,7 @@ class OCRDataset(Dataset):
|
||||
if len(label) > self.opt.batch_max_length:
|
||||
continue
|
||||
except:
|
||||
print(label)
|
||||
print(f"type of label {type(label)} \n {label}")
|
||||
out_of_char = f'[^{self.opt.character}]'
|
||||
if re.search(out_of_char, label.lower()):
|
||||
continue
|
||||
|
||||
BIN
trainer/saved_models/4digit/best_accuracy.pth
Normal file
BIN
trainer/saved_models/4digit/best_accuracy.pth
Normal file
Binary file not shown.
BIN
trainer/saved_models/4digit/best_norm_ED.pth
Normal file
BIN
trainer/saved_models/4digit/best_norm_ED.pth
Normal file
Binary file not shown.
130
trainer/saved_models/4digit/log_dataset.txt
Normal file
130
trainer/saved_models/4digit/log_dataset.txt
Normal file
@@ -0,0 +1,130 @@
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['4digit_train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['4digit_train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['4digit_train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: 4digit_train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of 4digit_train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of 4digit_train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data/4digit_valid dataset: /
|
||||
sub-directory: /. num samples: 200
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['4digit_train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: 4digit_train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of 4digit_train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of 4digit_train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data/4digit_valid dataset: /
|
||||
sub-directory: /. num samples: 200
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['4digit_train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: 4digit_train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of 4digit_train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of 4digit_train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data/4digit_valid dataset: /
|
||||
sub-directory: /. num samples: 200
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['4digit_train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: 4digit_train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of 4digit_train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of 4digit_train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data/4digit_valid dataset: /
|
||||
sub-directory: /. num samples: 200
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['4digit_train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: 4digit_train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of 4digit_train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of 4digit_train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data/4digit_valid dataset: /
|
||||
sub-directory: /. num samples: 200
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['4digit_train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: 4digit_train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of 4digit_train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of 4digit_train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data/4digit_valid dataset: /
|
||||
sub-directory: /. num samples: 200
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['4digit_train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: 4digit_train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of 4digit_train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of 4digit_train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data/4digit_valid dataset: /
|
||||
sub-directory: /. num samples: 200
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['4digit_train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: 4digit_train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of 4digit_train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of 4digit_train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data/4digit_valid dataset: /
|
||||
sub-directory: /. num samples: 200
|
||||
--------------------------------------------------------------------------------
|
||||
5400
trainer/saved_models/4digit/log_train.txt
Normal file
5400
trainer/saved_models/4digit/log_train.txt
Normal file
File diff suppressed because it is too large
Load Diff
92
trainer/saved_models/4digit/opt.txt
Normal file
92
trainer/saved_models/4digit/opt.txt
Normal file
@@ -0,0 +1,92 @@
|
||||
------------ Options -------------
|
||||
number: 0123456789
|
||||
experiment_name: 4digit
|
||||
symbol:
|
||||
lang_char:
|
||||
train_data: all_data
|
||||
valid_data: all_data/4digit_valid
|
||||
manualSeed: 1111
|
||||
workers: 6
|
||||
batch_size: 32
|
||||
num_iter: 3000
|
||||
valInterval: 5
|
||||
saved_model:
|
||||
FT: False
|
||||
optim: False
|
||||
lr: 1.0
|
||||
beta1: 0.9
|
||||
rho: 0.95
|
||||
eps: 1e-08
|
||||
grad_clip: 5
|
||||
select_data: ['4digit_train']
|
||||
batch_ratio: ['1']
|
||||
total_data_usage_ratio: 1.0
|
||||
batch_max_length: 34
|
||||
imgH: 32
|
||||
imgW: 128
|
||||
rgb: True
|
||||
contrast_adjust: 0.0
|
||||
sensitive: True
|
||||
PAD: True
|
||||
data_filtering_off: False
|
||||
Transformation: TPS
|
||||
FeatureExtraction: ResNet
|
||||
SequenceModeling: BiLSTM
|
||||
Prediction: CTC
|
||||
num_fiducial: 20
|
||||
input_channel: 3
|
||||
output_channel: 256
|
||||
hidden_size: 256
|
||||
decode: greedy
|
||||
new_prediction: False
|
||||
freeze_FeatureFxtraction: False
|
||||
freeze_SequenceModeling: False
|
||||
character: 0123456789
|
||||
num_class: 11
|
||||
---------------------------------------
|
||||
------------ Options -------------
|
||||
number: 0123456789
|
||||
experiment_name: 4digit
|
||||
symbol: None
|
||||
lang_char: None
|
||||
train_data: all_data
|
||||
valid_data: all_data/4digit_valid
|
||||
manualSeed: 1111
|
||||
workers: 6
|
||||
batch_size: 32
|
||||
num_iter: 3000
|
||||
valInterval: 5
|
||||
saved_model:
|
||||
FT: False
|
||||
optim: False
|
||||
lr: 1.0
|
||||
beta1: 0.9
|
||||
rho: 0.95
|
||||
eps: 1e-08
|
||||
grad_clip: 5
|
||||
select_data: ['4digit_train']
|
||||
batch_ratio: ['1']
|
||||
total_data_usage_ratio: 1.0
|
||||
batch_max_length: 34
|
||||
imgH: 32
|
||||
imgW: 128
|
||||
rgb: True
|
||||
contrast_adjust: 0.0
|
||||
sensitive: True
|
||||
PAD: True
|
||||
data_filtering_off: False
|
||||
Transformation: TPS
|
||||
FeatureExtraction: ResNet
|
||||
SequenceModeling: BiLSTM
|
||||
Prediction: CTC
|
||||
num_fiducial: 20
|
||||
input_channel: 3
|
||||
output_channel: 256
|
||||
hidden_size: 256
|
||||
decode: greedy
|
||||
new_prediction: False
|
||||
freeze_FeatureFxtraction: False
|
||||
freeze_SequenceModeling: False
|
||||
character: 0123456789
|
||||
num_class: 11
|
||||
---------------------------------------
|
||||
@@ -28,3 +28,133 @@ Total_batch_size: 32 = 32
|
||||
dataset_root: all_data/valid dataset: /
|
||||
sub-directory: /. num samples: 194
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: ['train']
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: ['train']
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: ['train']
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: ['train']
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data
|
||||
opt.select_data: ['train']
|
||||
opt.batch_ratio: ['1']
|
||||
--------------------------------------------------------------------------------
|
||||
dataset_root: all_data dataset: train
|
||||
sub-directory: /4digit_train num samples: 1000
|
||||
num total samples of train: 1000 x 1.0 (total_data_usage_ratio) = 1000
|
||||
num samples of train per batch: 32 x 1.0 (batch_ratio) = 32
|
||||
--------------------------------------------------------------------------------
|
||||
Total_batch_size: 32 = 32
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
@@ -44,6 +44,7 @@ def train(opt, show_number = 2, amp=False):
|
||||
|
||||
log = open(f'./saved_models/{opt.experiment_name}/log_dataset.txt', 'a', encoding="utf8")
|
||||
AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD, contrast_adjust=opt.contrast_adjust)
|
||||
print(f"opt.valid_data : {opt.valid_data}")
|
||||
valid_dataset, valid_dataset_log = hierarchical_dataset(root=opt.valid_data, opt=opt)
|
||||
|
||||
valid_loader = torch.utils.data.DataLoader(
|
||||
@@ -68,7 +69,7 @@ def train(opt, show_number = 2, amp=False):
|
||||
model = Model(opt)
|
||||
print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
|
||||
opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
|
||||
opt.SequenceModeling, opt.Prediction, opt.saved_model)
|
||||
opt.SequenceModeling, opt.Prediction)
|
||||
|
||||
if opt.saved_model != '':
|
||||
pretrained_dict = torch.load(opt.saved_model)
|
||||
|
||||
@@ -10,11 +10,14 @@ def get_config(file_path):
|
||||
with open(file_path, 'r', encoding="utf8") as stream:
|
||||
opt = yaml.safe_load(stream)
|
||||
opt = AttrDict(opt)
|
||||
if opt.lang_char == 'None':
|
||||
|
||||
if opt.lang_char == 'None' and opt.symbol=='None':
|
||||
opt.character = opt.number
|
||||
elif opt.lang_char == 'None':
|
||||
characters = ''
|
||||
for data in opt['select_data'].split('-'):
|
||||
csv_path = os.path.join(opt['train_data'], data, 'labels.csv')
|
||||
df = pd.read_csv(csv_path, sep='^([^,]+),', engine='python', usecols=['filename', 'words'], keep_default_na=False)
|
||||
df = pd.read_csv(csv_path, sep='^([^,]+),', engine='python',dtype={'words': str}, usecols=['filename', 'words'], keep_default_na=False)
|
||||
all_char = ''.join(df['words'])
|
||||
characters += ''.join(set(all_char))
|
||||
characters = sorted(set(characters))
|
||||
@@ -26,6 +29,6 @@ def get_config(file_path):
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Load configuration
|
||||
# opt = get_config("config_files/4digit_config.yaml")
|
||||
opt = get_config("config_files/en_filtered_config.yaml")
|
||||
# opt = get_config("config_files/en_filtered_config.yaml")
|
||||
opt = get_config("config_files/4digit_config.yaml")
|
||||
train(opt, amp=False)
|
||||
Reference in New Issue
Block a user