Skip to content

Commit 46826d9

Browse files
aarora8danpovey
authored andcommitted
[egs] Add scripts for yomdle korean (#2942)
1 parent b984543 commit 46826d9

32 files changed

+2105
-5
lines changed
File renamed without changes.
File renamed without changes.

egs/yomdle_korean/README.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
This directory contains example scripts for OCR on the Yomdle and Slam datasets.
2+
Training is done on the Yomdle dataset and testing is done on Slam.
3+
LM rescoring is also done with extra corpus data obtained from various sources

egs/yomdle_korean/v1/cmd.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# you can change cmd.sh depending on what type of queue you are using.
2+
# If you have no queueing system and want to run on a local machine, you
3+
# can change all instances 'queue.pl' to run.pl (but be careful and run
4+
# commands one by one: most recipes will exhaust the memory on your
5+
# machine). queue.pl works with GridEngine (qsub). slurm.pl works
6+
# with slurm. Different queues are configured differently, with different
7+
# queue names and different ways of specifying things like memory;
8+
# to account for these differences you can create and edit the file
9+
# conf/queue.conf to match your queue's configuration. Search for
10+
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
11+
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
12+
export cmd="queue.pl"

egs/yomdle_korean/v1/image

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../cifar/v1/image/
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/bin/bash
2+
# Copyright 2018 Hossein Hadian
3+
# 2018 Ashish Arora
4+
5+
# Apache 2.0
6+
# This script performs data augmentation.
7+
8+
nj=4
9+
cmd=run.pl
10+
feat_dim=40
11+
verticle_shift=0
12+
echo "$0 $@"
13+
14+
. ./cmd.sh
15+
. ./path.sh
16+
. ./utils/parse_options.sh || exit 1;
17+
18+
srcdir=$1
19+
outdir=$2
20+
datadir=$3
21+
22+
mkdir -p $datadir/augmentations
23+
echo "copying $srcdir to $datadir/augmentations/aug1, allowed length, creating feats.scp"
24+
25+
for set in aug1; do
26+
image/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \
27+
$srcdir $datadir/augmentations/$set
28+
cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
29+
local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
30+
--vertical-shift $verticle_shift \
31+
--fliplr false --augment 'random_scale' $datadir/augmentations/$set
32+
done
33+
34+
echo " combine original data and data from different augmentations"
35+
utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/aug1
36+
cat $srcdir/allowed_lengths.txt > $outdir/allowed_lengths.txt
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/bin/bash
2+
3+
# this script is used for comparing decoding results between systems.
4+
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}
5+
6+
# Copyright 2017 Chun Chieh Chang
7+
# 2017 Ashish Arora
8+
9+
if [ $# == 0 ]; then
10+
echo "Usage: $0: <dir1> [<dir2> ... ]"
11+
echo "e.g.: $0 exp/chain/cnn{1a,1b}"
12+
exit 1
13+
fi
14+
15+
echo "# $0 $*"
16+
used_epochs=false
17+
18+
echo -n "# System "
19+
for x in $*; do printf "% 10s" " $(basename $x)"; done
20+
echo
21+
22+
echo -n "# WER "
23+
for x in $*; do
24+
wer=$(cat $x/decode_test/scoring_kaldi/best_wer | awk '{print $2}')
25+
printf "% 10s" $wer
26+
done
27+
echo
28+
29+
echo -n "# WER (rescored) "
30+
for x in $*; do
31+
wer=$(cat $x/decode_test_rescored/scoring_kaldi/best_wer | awk '{print $2}')
32+
printf "% 10s" $wer
33+
done
34+
echo
35+
36+
echo -n "# CER "
37+
for x in $*; do
38+
cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}')
39+
printf "% 10s" $cer
40+
done
41+
echo
42+
43+
echo -n "# CER (rescored) "
44+
for x in $*; do
45+
cer=$(cat $x/decode_test_rescored/scoring_kaldi/best_cer | awk '{print $2}')
46+
printf "% 10s" $cer
47+
done
48+
echo
49+
50+
if $used_epochs; then
51+
exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems.
52+
fi
53+
54+
echo -n "# Final train prob "
55+
for x in $*; do
56+
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
57+
printf "% 10s" $prob
58+
done
59+
echo
60+
61+
echo -n "# Final valid prob "
62+
for x in $*; do
63+
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
64+
printf "% 10s" $prob
65+
done
66+
echo
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
tuning/run_cnn_e2eali_1b.sh
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#!/bin/bash
2+
3+
# Copyright 2017 Hossein Hadian
4+
5+
# This script does end2end chain training (i.e. from scratch)
6+
# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/
7+
# System e2e_cnn_1a
8+
# score_basic score_nomalized
9+
# WER 13.64 10.6
10+
# WER (rescored) 13.13 10.2
11+
# CER 2.99 3.0
12+
# CER (rescored) 2.88 2.9
13+
# Final train prob 0.0113
14+
# Final valid prob 0.0152
15+
# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
16+
# exp/chain/e2e_cnn_1a: num-iters=48 nj=5..8 num-params=3.0M dim=40->352 combine=0.047->0.047 (over 2) logprob:train/valid[31,47,final]=(0.002,0.008,0.011/0.008,0.013,0.015)
17+
18+
set -e
19+
# configs for 'chain'
20+
stage=0
21+
nj=30
22+
train_stage=-10
23+
get_egs_stage=-10
24+
affix=1a
25+
26+
# training options
27+
tdnn_dim=450
28+
minibatch_size=150=64,32/300=32,16/600=16,8/1200=8,4
29+
cmvn_opts="--norm-means=false --norm-vars=false"
30+
train_set=train
31+
lang_decode=data/lang
32+
decode_e2e=true
33+
# End configuration section.
34+
echo "$0 $@" # Print the command line for logging
35+
36+
. ./cmd.sh
37+
. ./path.sh
38+
. ./utils/parse_options.sh
39+
40+
if ! cuda-compiled; then
41+
cat <<EOF && exit 1
42+
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
43+
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
44+
where "nvcc" is installed.
45+
EOF
46+
fi
47+
48+
lang=data/lang_e2e
49+
treedir=exp/chain/e2e_monotree # it's actually just a trivial tree (no tree building)
50+
dir=exp/chain/e2e_cnn_${affix}
51+
52+
if [ $stage -le 0 ]; then
53+
# Create a version of the lang/ directory that has one state per phone in the
54+
# topo file. [note, it really has two states.. the first one is only repeated
55+
# once, the second one has zero or more repeats.]
56+
rm -rf $lang
57+
cp -r data/lang $lang
58+
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
59+
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
60+
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
61+
fi
62+
63+
if [ $stage -le 1 ]; then
64+
steps/nnet3/chain/e2e/prepare_e2e.sh --nj $nj --cmd "$cmd" \
65+
--shared-phones true \
66+
--type mono \
67+
data/$train_set $lang $treedir
68+
$cmd $treedir/log/make_phone_lm.log \
69+
cat data/$train_set/text \| \
70+
steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
71+
utils/sym2int.pl -f 2- data/lang/phones.txt \| \
72+
chain-est-phone-lm --num-extra-lm-states=500 \
73+
ark:- $treedir/phone_lm.fst
74+
fi
75+
76+
if [ $stage -le 2 ]; then
77+
echo "$0: creating neural net configs using the xconfig parser";
78+
num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
79+
cnn_opts="l2-regularize=0.075"
80+
tdnn_opts="l2-regularize=0.075"
81+
output_opts="l2-regularize=0.1"
82+
common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
83+
common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
84+
common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
85+
86+
mkdir -p $dir/configs
87+
cat <<EOF > $dir/configs/network.xconfig
88+
input dim=40 name=input
89+
conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1
90+
conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
91+
conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
92+
conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2
93+
conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
94+
conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
95+
conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3
96+
relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
97+
relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
98+
relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
99+
## adding the layers for chain branch
100+
relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts
101+
output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
102+
EOF
103+
104+
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
105+
fi
106+
107+
if [ $stage -le 3 ]; then
108+
steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \
109+
--cmd "$cmd" \
110+
--feat.cmvn-opts "$cmvn_opts" \
111+
--chain.leaky-hmm-coefficient 0.1 \
112+
--chain.apply-deriv-weights true \
113+
--egs.stage $get_egs_stage \
114+
--egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \
115+
--chain.frame-subsampling-factor 4 \
116+
--chain.alignment-subsampling-factor 4 \
117+
--trainer.add-option="--optimization.memory-compression-level=2" \
118+
--trainer.num-chunk-per-minibatch $minibatch_size \
119+
--trainer.frames-per-iter 1500000 \
120+
--trainer.num-epochs 3 \
121+
--trainer.optimization.momentum 0 \
122+
--trainer.optimization.num-jobs-initial 5 \
123+
--trainer.optimization.num-jobs-final 8 \
124+
--trainer.optimization.initial-effective-lrate 0.001 \
125+
--trainer.optimization.final-effective-lrate 0.0001 \
126+
--trainer.optimization.shrink-value 1.0 \
127+
--trainer.max-param-change 2.0 \
128+
--cleanup.remove-egs true \
129+
--feat-dir data/${train_set} \
130+
--tree-dir $treedir \
131+
--dir $dir || exit 1;
132+
fi

0 commit comments

Comments
 (0)