|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# Copyright 2017 Hossein Hadian |
| 4 | + |
| 5 | +# This script does end2end chain training (i.e. from scratch) |
| 6 | +# local/chain/compare_wer.sh exp/chain/e2e_cnn_1a/ |
| 7 | +# System e2e_cnn_1a |
| 8 | +# score_basic score_nomalized |
| 9 | +# WER 13.64 10.6 |
| 10 | +# WER (rescored) 13.13 10.2 |
| 11 | +# CER 2.99 3.0 |
| 12 | +# CER (rescored) 2.88 2.9 |
| 13 | +# Final train prob 0.0113 |
| 14 | +# Final valid prob 0.0152 |
| 15 | +# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a |
| 16 | +# exp/chain/e2e_cnn_1a: num-iters=48 nj=5..8 num-params=3.0M dim=40->352 combine=0.047->0.047 (over 2) logprob:train/valid[31,47,final]=(0.002,0.008,0.011/0.008,0.013,0.015) |
| 17 | + |
| 18 | +set -e |
| 19 | +# configs for 'chain' |
| 20 | +stage=0 |
| 21 | +nj=30 |
| 22 | +train_stage=-10 |
| 23 | +get_egs_stage=-10 |
| 24 | +affix=1a |
| 25 | + |
| 26 | +# training options |
| 27 | +tdnn_dim=450 |
| 28 | +minibatch_size=150=64,32/300=32,16/600=16,8/1200=8,4 |
| 29 | +cmvn_opts="--norm-means=false --norm-vars=false" |
| 30 | +train_set=train |
| 31 | +lang_decode=data/lang |
| 32 | +decode_e2e=true |
| 33 | +# End configuration section. |
| 34 | +echo "$0 $@" # Print the command line for logging |
| 35 | + |
| 36 | +. ./cmd.sh |
| 37 | +. ./path.sh |
| 38 | +. ./utils/parse_options.sh |
| 39 | + |
| 40 | +if ! cuda-compiled; then |
| 41 | + cat <<EOF && exit 1 |
| 42 | +This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA |
| 43 | +If you want to use GPUs (and have them), go to src/, and configure and make on a machine |
| 44 | +where "nvcc" is installed. |
| 45 | +EOF |
| 46 | +fi |
| 47 | + |
| 48 | +lang=data/lang_e2e |
| 49 | +treedir=exp/chain/e2e_monotree # it's actually just a trivial tree (no tree building) |
| 50 | +dir=exp/chain/e2e_cnn_${affix} |
| 51 | + |
| 52 | +if [ $stage -le 0 ]; then |
| 53 | + # Create a version of the lang/ directory that has one state per phone in the |
| 54 | + # topo file. [note, it really has two states.. the first one is only repeated |
| 55 | + # once, the second one has zero or more repeats.] |
| 56 | + rm -rf $lang |
| 57 | + cp -r data/lang $lang |
| 58 | + silphonelist=$(cat $lang/phones/silence.csl) || exit 1; |
| 59 | + nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1; |
| 60 | + steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo |
| 61 | +fi |
| 62 | + |
| 63 | +if [ $stage -le 1 ]; then |
| 64 | + steps/nnet3/chain/e2e/prepare_e2e.sh --nj $nj --cmd "$cmd" \ |
| 65 | + --shared-phones true \ |
| 66 | + --type mono \ |
| 67 | + data/$train_set $lang $treedir |
| 68 | + $cmd $treedir/log/make_phone_lm.log \ |
| 69 | + cat data/$train_set/text \| \ |
| 70 | + steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \ |
| 71 | + utils/sym2int.pl -f 2- data/lang/phones.txt \| \ |
| 72 | + chain-est-phone-lm --num-extra-lm-states=500 \ |
| 73 | + ark:- $treedir/phone_lm.fst |
| 74 | +fi |
| 75 | + |
| 76 | +if [ $stage -le 2 ]; then |
| 77 | + echo "$0: creating neural net configs using the xconfig parser"; |
| 78 | + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') |
| 79 | + cnn_opts="l2-regularize=0.075" |
| 80 | + tdnn_opts="l2-regularize=0.075" |
| 81 | + output_opts="l2-regularize=0.1" |
| 82 | + common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36" |
| 83 | + common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70" |
| 84 | + common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70" |
| 85 | + |
| 86 | + mkdir -p $dir/configs |
| 87 | + cat <<EOF > $dir/configs/network.xconfig |
| 88 | + input dim=40 name=input |
| 89 | + conv-relu-batchnorm-layer name=cnn1 height-in=40 height-out=40 time-offsets=-3,-2,-1,0,1,2,3 $common1 |
| 90 | + conv-relu-batchnorm-layer name=cnn2 height-in=40 height-out=20 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2 |
| 91 | + conv-relu-batchnorm-layer name=cnn3 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 |
| 92 | + conv-relu-batchnorm-layer name=cnn4 height-in=20 height-out=20 time-offsets=-4,-2,0,2,4 $common2 |
| 93 | + conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2 |
| 94 | + conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 |
| 95 | + conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-4,0,4 $common3 |
| 96 | + relu-batchnorm-layer name=tdnn1 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts |
| 97 | + relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts |
| 98 | + relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts |
| 99 | + ## adding the layers for chain branch |
| 100 | + relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts |
| 101 | + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts |
| 102 | +EOF |
| 103 | + |
| 104 | + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs |
| 105 | +fi |
| 106 | + |
| 107 | +if [ $stage -le 3 ]; then |
| 108 | + steps/nnet3/chain/e2e/train_e2e.py --stage $train_stage \ |
| 109 | + --cmd "$cmd" \ |
| 110 | + --feat.cmvn-opts "$cmvn_opts" \ |
| 111 | + --chain.leaky-hmm-coefficient 0.1 \ |
| 112 | + --chain.apply-deriv-weights true \ |
| 113 | + --egs.stage $get_egs_stage \ |
| 114 | + --egs.opts "--num_egs_diagnostic 100 --num_utts_subset 400" \ |
| 115 | + --chain.frame-subsampling-factor 4 \ |
| 116 | + --chain.alignment-subsampling-factor 4 \ |
| 117 | + --trainer.add-option="--optimization.memory-compression-level=2" \ |
| 118 | + --trainer.num-chunk-per-minibatch $minibatch_size \ |
| 119 | + --trainer.frames-per-iter 1500000 \ |
| 120 | + --trainer.num-epochs 3 \ |
| 121 | + --trainer.optimization.momentum 0 \ |
| 122 | + --trainer.optimization.num-jobs-initial 5 \ |
| 123 | + --trainer.optimization.num-jobs-final 8 \ |
| 124 | + --trainer.optimization.initial-effective-lrate 0.001 \ |
| 125 | + --trainer.optimization.final-effective-lrate 0.0001 \ |
| 126 | + --trainer.optimization.shrink-value 1.0 \ |
| 127 | + --trainer.max-param-change 2.0 \ |
| 128 | + --cleanup.remove-egs true \ |
| 129 | + --feat-dir data/${train_set} \ |
| 130 | + --tree-dir $treedir \ |
| 131 | + --dir $dir || exit 1; |
| 132 | +fi |
0 commit comments