|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# Copyright 2013 Johns Hopkins University (author: Daniel Povey) |
| 4 | +# Apache 2.0 |
| 5 | + |
| 6 | +# This script operates on a directory, such as in data/train/, |
| 7 | +# that contains some subset of the following files: |
| 8 | +# feats.scp |
| 9 | +# images.scp |
| 10 | +# vad.scp |
| 11 | +# spk2utt |
| 12 | +# utt2spk |
| 13 | +# text |
| 14 | +# |
| 15 | +# It copies to another directory, possibly adding a specified prefix or a suffix |
| 16 | +# to the utterance and/or speaker names. Note, the recording-ids stay the same. |
| 17 | +# |
| 18 | + |
| 19 | + |
| 20 | +# begin configuration section |
| 21 | +spk_prefix= |
| 22 | +utt_prefix= |
| 23 | +spk_suffix= |
| 24 | +utt_suffix= |
| 25 | +validate_opts= # should rarely be needed. |
| 26 | +# end configuration section |
| 27 | + |
| 28 | +. utils/parse_options.sh |
| 29 | + |
| 30 | +if [ $# != 2 ]; then |
| 31 | + echo "Usage: " |
| 32 | + echo " $0 [options] <srcdir> <destdir>" |
| 33 | + echo "e.g.:" |
| 34 | + echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1" |
| 35 | + echo "Options" |
| 36 | + echo " --spk-prefix=<prefix> # Prefix for speaker ids, default empty" |
| 37 | + echo " --utt-prefix=<prefix> # Prefix for utterance ids, default empty" |
| 38 | + echo " --spk-suffix=<suffix> # Suffix for speaker ids, default empty" |
| 39 | + echo " --utt-suffix=<suffix> # Suffix for utterance ids, default empty" |
| 40 | + exit 1; |
| 41 | +fi |
| 42 | + |
| 43 | + |
| 44 | +export LC_ALL=C |
| 45 | + |
| 46 | +srcdir=$1 |
| 47 | +destdir=$2 |
| 48 | + |
| 49 | +if [ ! -f $srcdir/utt2spk ]; then |
| 50 | + echo "copy_data_dir.sh: no such file $srcdir/utt2spk" |
| 51 | + exit 1; |
| 52 | +fi |
| 53 | + |
| 54 | +if [ "$destdir" == "$srcdir" ]; then |
| 55 | + echo "$0: this script requires <srcdir> and <destdir> to be different." |
| 56 | + exit 1 |
| 57 | +fi |
| 58 | + |
| 59 | +set -e; |
| 60 | + |
| 61 | +mkdir -p $destdir |
| 62 | + |
| 63 | +cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map |
| 64 | +cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map |
| 65 | + |
| 66 | +if [ ! -f $srcdir/utt2uniq ]; then |
| 67 | + if [[ ! -z $utt_prefix || ! -z $utt_suffix ]]; then |
| 68 | + cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq |
| 69 | + fi |
| 70 | +else |
| 71 | + cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq |
| 72 | +fi |
| 73 | + |
| 74 | +cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map | \ |
| 75 | + utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk |
| 76 | + |
| 77 | +utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt |
| 78 | + |
| 79 | +if [ -f $srcdir/feats.scp ]; then |
| 80 | + utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp |
| 81 | +fi |
| 82 | + |
| 83 | +if [ -f $srcdir/vad.scp ]; then |
| 84 | + utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp |
| 85 | +fi |
| 86 | + |
| 87 | +if [ -f $srcdir/images.scp ]; then |
| 88 | + utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/images.scp >$destdir/images.scp |
| 89 | +fi |
| 90 | + |
| 91 | +if [ -f $srcdir/reco2file_and_channel ]; then |
| 92 | + cp $srcdir/reco2file_and_channel $destdir/ |
| 93 | +fi |
| 94 | + |
| 95 | +if [ -f $srcdir/text ]; then |
| 96 | + utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text |
| 97 | +fi |
| 98 | +if [ -f $srcdir/utt2dur ]; then |
| 99 | + utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur |
| 100 | +fi |
| 101 | +if [ -f $srcdir/reco2dur ]; then |
| 102 | + if [ -f $srcdir/segments ]; then |
| 103 | + cp $srcdir/reco2dur $destdir/reco2dur |
| 104 | + else |
| 105 | + utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur |
| 106 | + fi |
| 107 | +fi |
| 108 | +if [ -f $srcdir/spk2gender ]; then |
| 109 | + utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender |
| 110 | +fi |
| 111 | +if [ -f $srcdir/cmvn.scp ]; then |
| 112 | + utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp |
| 113 | +fi |
| 114 | +for f in stm glm ctm; do |
| 115 | + if [ -f $srcdir/$f ]; then |
| 116 | + cp $srcdir/$f $destdir |
| 117 | + fi |
| 118 | +done |
| 119 | + |
| 120 | +rm $destdir/spk_map $destdir/utt_map |
| 121 | + |
| 122 | +echo "$0: copied data from $srcdir to $destdir" |
| 123 | + |
| 124 | +for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text images.scp reco2file_and_channel stm glm ctm; do |
| 125 | + if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then |
| 126 | + echo "$0: file $f exists in dest $destdir but not in src $srcdir. Moving it to" |
| 127 | + echo " ... $destdir/.backup/$f" |
| 128 | + mkdir -p $destdir/.backup |
| 129 | + mv $destdir/$f $destdir/.backup/ |
| 130 | + fi |
| 131 | +done |
| 132 | + |
| 133 | + |
| 134 | +[ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats" |
| 135 | +[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text" |
| 136 | + |
| 137 | +utils/validate_data_dir.sh $validate_opts $destdir |
0 commit comments