Skip to content

Commit 5c7fdec

Browse files
committed
add kaldi's equivalent add-deltas to PyTorch.
1 parent ffa861c commit 5c7fdec

File tree

2 files changed

+173
-0
lines changed

2 files changed

+173
-0
lines changed

egs/aishell/s10b/ctc/transform.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright 2020 Mobvoi AI Lab, Beijing, China (author: Fangjun Kuang)
4+
# Apache 2.0
5+
6+
import torch
7+
import torch.nn.functional as F
8+
9+
10+
def compute_delta_feat(x, weight):
11+
'''
12+
Args:
13+
x: input feat of shape [batch_size, seq_len, feat_dim]
14+
15+
weight: coefficients for computing delta features;
16+
it has a shape of [feat_dim, 1, kernel_size].
17+
18+
Returns:
19+
a tensor fo shape [batch_size, seq_len, feat_dim]
20+
'''
21+
22+
assert x.ndim == 3
23+
assert weight.ndim == 3
24+
assert weight.size(0) == x.size(2)
25+
assert weight.size(1) == 1
26+
assert weight.size(2) % 2 == 1
27+
28+
feat_dim = x.size(2)
29+
30+
pad_size = weight.size(2) // 2
31+
32+
# F.pad requires a 4-D tensor in our case
33+
x = x.unsqueeze(0)
34+
35+
# (0, 0, pad_size, pad_size) == (left, right, top, bottom)
36+
padded_x = F.pad(x, (0, 0, pad_size, pad_size), mode='replicate')
37+
38+
# after padding, we have to convert it back to 3-D
39+
# since conv1d requires 3-D input
40+
padded_x = padded_x.squeeze(0)
41+
42+
# conv1d requires a shape of [batch_size, feat_dim, seq_len]
43+
padded_x = padded_x.permute(0, 2, 1)
44+
45+
# NOTE(fangjun): we perform a depthwise convolution here by
46+
# setting groups == number of channels
47+
y = F.conv1d(input=padded_x, weight=weight, groups=feat_dim)
48+
49+
# now convert y back to be of shape [batch_size, seq_len, feat_dim]
50+
y = y.permute(0, 2, 1)
51+
52+
return y
53+
54+
55+
class AddDeltasTransform:
56+
'''
57+
This class implements `add-deltas` in kaldi with
58+
order == 2 and window == 2.
59+
60+
It generates the identical output as kaldi's `add-deltas` with default
61+
parameters given the same input.
62+
'''
63+
64+
def __init__(self):
65+
# yapf: disable
66+
self.first_order_coef = torch.tensor([-0.2, -0.1, 0, 0.1, 0.2])
67+
self.second_order_coef = torch.tensor([0.04, 0.04, 0.01, -0.04, -0.1, -0.04, 0.01, 0.04, 0.04])
68+
# yapf: enable
69+
70+
# TODO(fangjun): change the coefficients to the following as suggested by Dan
71+
# [-1, 0, 1]
72+
# [1, 0, -2, 0, 1]
73+
74+
def __call__(self, x):
75+
'''
76+
Args:
77+
x: a tensor of shape [batch_size, seq_len, feat_dim]
78+
79+
Returns:
80+
a tensor of shape [batch_size, seq_len, feat_dim * 3]
81+
'''
82+
if self.first_order_coef.ndim != 3:
83+
num_duplicates = x.size(2)
84+
85+
# yapf: disable
86+
self.first_order_coef = self.first_order_coef.reshape(1, 1, -1)
87+
self.first_order_coef = torch.cat([self.first_order_coef] * num_duplicates, dim=0)
88+
89+
self.second_order_coef = self.second_order_coef.reshape(1, 1, -1)
90+
self.second_order_coef = torch.cat([self.second_order_coef] * num_duplicates, dim=0)
91+
# yapf: enable
92+
93+
device = x.device
94+
self.first_order_coef = self.first_order_coef.to(device)
95+
self.second_order_coef = self.second_order_coef.to(device)
96+
97+
first_order = compute_delta_feat(x, self.first_order_coef)
98+
second_order = compute_delta_feat(x, self.second_order_coef)
99+
100+
y = torch.cat([x, first_order, second_order], dim=2)
101+
102+
return y
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright 2020 Mobvoi AI Lab, Beijing, China (author: Fangjun Kuang)
4+
# Apache 2.0
5+
6+
import os
7+
import shutil
8+
import tempfile
9+
import unittest
10+
11+
import numpy as np
12+
13+
import torch
14+
import torch.nn.functional as F
15+
16+
import kaldi
17+
18+
from transform import AddDeltasTransform
19+
20+
21+
class TransformTest(unittest.TestCase):
22+
23+
def test_add_deltas_transform(self):
24+
x = torch.tensor([
25+
[1, 3],
26+
[5, 10],
27+
[0, 1],
28+
[10, 20],
29+
[3, 1],
30+
[3, 2],
31+
[5, 1],
32+
[10, -2],
33+
]).float()
34+
35+
x = x.unsqueeze(0)
36+
37+
transform = AddDeltasTransform()
38+
y = transform(x)
39+
40+
# now use kaldi's add-deltas to compute the ground truth
41+
d = tempfile.mkdtemp()
42+
43+
wspecifier = 'ark:{}/feats.ark'.format(d)
44+
45+
writer = kaldi.MatrixWriter(wspecifier)
46+
writer.Write('utt1', x.squeeze(0).numpy())
47+
writer.Close()
48+
49+
delta_feats_specifier = 'ark:{dir}/delta.ark'.format(dir=d)
50+
51+
cmd = '''
52+
add-deltas --print-args=false --delta-order=2 --delta-window=2 {} {}
53+
'''.format(wspecifier, delta_feats_specifier)
54+
55+
os.system(cmd)
56+
57+
reader = kaldi.RandomAccessMatrixReader(delta_feats_specifier)
58+
59+
expected = reader['utt1']
60+
61+
y = y.squeeze(0)
62+
63+
np.testing.assert_array_almost_equal(y.numpy(), expected.numpy())
64+
65+
reader.Close()
66+
67+
shutil.rmtree(d)
68+
69+
70+
if __name__ == '__main__':
71+
unittest.main()

0 commit comments

Comments
 (0)