Skip to content

Commit 19acf9b

Browse files
committed
modification from the review, small fix
1 parent dd0a307 commit 19acf9b

File tree

5 files changed

+143
-13
lines changed

5 files changed

+143
-13
lines changed

egs/cifar/v1/image/copy_data_dir.sh

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
#!/bin/bash
2+
3+
# Copyright 2013 Johns Hopkins University (author: Daniel Povey)
4+
# Apache 2.0
5+
6+
# This script operates on a directory, such as in data/train/,
7+
# that contains some subset of the following files:
8+
# feats.scp
9+
# images.scp
10+
# vad.scp
11+
# spk2utt
12+
# utt2spk
13+
# text
14+
#
15+
# It copies to another directory, possibly adding a specified prefix or a suffix
16+
# to the utterance and/or speaker names. Note, the recording-ids stay the same.
17+
#
18+
19+
20+
# begin configuration section
21+
spk_prefix=
22+
utt_prefix=
23+
spk_suffix=
24+
utt_suffix=
25+
validate_opts= # should rarely be needed.
26+
# end configuration section
27+
28+
. utils/parse_options.sh
29+
30+
if [ $# != 2 ]; then
31+
echo "Usage: "
32+
echo " $0 [options] <srcdir> <destdir>"
33+
echo "e.g.:"
34+
echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
35+
echo "Options"
36+
echo " --spk-prefix=<prefix> # Prefix for speaker ids, default empty"
37+
echo " --utt-prefix=<prefix> # Prefix for utterance ids, default empty"
38+
echo " --spk-suffix=<suffix> # Suffix for speaker ids, default empty"
39+
echo " --utt-suffix=<suffix> # Suffix for utterance ids, default empty"
40+
exit 1;
41+
fi
42+
43+
44+
export LC_ALL=C
45+
46+
srcdir=$1
47+
destdir=$2
48+
49+
if [ ! -f $srcdir/utt2spk ]; then
50+
echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
51+
exit 1;
52+
fi
53+
54+
if [ "$destdir" == "$srcdir" ]; then
55+
echo "$0: this script requires <srcdir> and <destdir> to be different."
56+
exit 1
57+
fi
58+
59+
set -e;
60+
61+
mkdir -p $destdir
62+
63+
cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
64+
cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
65+
66+
if [ ! -f $srcdir/utt2uniq ]; then
67+
if [[ ! -z $utt_prefix || ! -z $utt_suffix ]]; then
68+
cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
69+
fi
70+
else
71+
cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
72+
fi
73+
74+
cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map | \
75+
utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
76+
77+
utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
78+
79+
if [ -f $srcdir/feats.scp ]; then
80+
utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
81+
fi
82+
83+
if [ -f $srcdir/vad.scp ]; then
84+
utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp
85+
fi
86+
87+
if [ -f $srcdir/images.scp ]; then
88+
utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/images.scp >$destdir/images.scp
89+
fi
90+
91+
if [ -f $srcdir/reco2file_and_channel ]; then
92+
cp $srcdir/reco2file_and_channel $destdir/
93+
fi
94+
95+
if [ -f $srcdir/text ]; then
96+
utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
97+
fi
98+
if [ -f $srcdir/utt2dur ]; then
99+
utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
100+
fi
101+
if [ -f $srcdir/reco2dur ]; then
102+
if [ -f $srcdir/segments ]; then
103+
cp $srcdir/reco2dur $destdir/reco2dur
104+
else
105+
utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur
106+
fi
107+
fi
108+
if [ -f $srcdir/spk2gender ]; then
109+
utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
110+
fi
111+
if [ -f $srcdir/cmvn.scp ]; then
112+
utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
113+
fi
114+
for f in stm glm ctm; do
115+
if [ -f $srcdir/$f ]; then
116+
cp $srcdir/$f $destdir
117+
fi
118+
done
119+
120+
rm $destdir/spk_map $destdir/utt_map
121+
122+
echo "$0: copied data from $srcdir to $destdir"
123+
124+
for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text images.scp reco2file_and_channel stm glm ctm; do
125+
if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
126+
echo "$0: file $f exists in dest $destdir but not in src $srcdir. Moving it to"
127+
echo " ... $destdir/.backup/$f"
128+
mkdir -p $destdir/.backup
129+
mv $destdir/$f $destdir/.backup/
130+
fi
131+
done
132+
133+
134+
[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
135+
[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
136+
137+
utils/validate_data_dir.sh $validate_opts $destdir

egs/wsj/s5/utils/copy_data_dir.sh

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,16 +86,10 @@ fi
8686

8787
if [ -f $srcdir/segments ]; then
8888
utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
89-
if [ -f $srcdir/wav.scp ]; then
90-
cp $srcdir/wav.scp $destdir
91-
elif [ -f $srcdir/images.scp ]; then
92-
cp $srcdir/images.scp $destdir
93-
fi
89+
cp $srcdir/wav.scp $destdir
9490
else # no segments->wav indexed by utt.
9591
if [ -f $srcdir/wav.scp ]; then
9692
utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
97-
elif [ -f $srcdir/images.scp ]; then
98-
utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/images.scp >$destdir/images.scp
9993
fi
10094
fi
10195

@@ -132,7 +126,7 @@ rm $destdir/spk_map $destdir/utt_map
132126

133127
echo "$0: copied data from $srcdir to $destdir"
134128

135-
for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp images.scp reco2file_and_channel stm glm ctm; do
129+
for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel stm glm ctm; do
136130
if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
137131
echo "$0: file $f exists in dest $destdir but not in src $srcdir. Moving it to"
138132
echo " ... $destdir/.backup/$f"

egs/wsj/s5/utils/lang/bpe/prepend_words.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
#!/usr/bin/env python3
2-
# -*- coding: utf-8 -*-
32

43
# This script, prepend '|' to every words in the transcript to mark
54
# the beginning of the words for finding the initial-space of every word
65
# after decoding.
76

87
import sys, io
98

10-
infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
11-
output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
9+
infile = io.TextIOWrapper(sys.stdin.buffer, encoding='latin-1')
10+
output = io.TextIOWrapper(sys.stdout.buffer, encoding='latin-1')
1211
for line in infile:
1312
output.write(' '.join([ "|"+word for word in line.split()]) + '\n')
1413

egs/yomdle_tamil/v1/local/augment_data.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ mkdir -p $datadir/augmentations
2222
echo "copying $srcdir to $datadir/augmentations/aug1, allowed length, creating feats.scp"
2323

2424
for set in aug1; do
25-
utils/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \
25+
image/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \
2626
$srcdir $datadir/augmentations/$set
2727
cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
2828
local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \

egs/yomdle_tamil/v1/run_end2end.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ if [ $stage -le 8 ]; then
156156
steps/nnet3/align.sh --nj $nj --cmd "$cmd" \
157157
--use-gpu false \
158158
--scale-opts '--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0' \
159-
data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
159+
data/train_aug data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train
160160
fi
161161

162162
if [ $stage -le 9 ]; then

0 commit comments

Comments
 (0)