Skip to content

Commit 150b497

Browse files
committed
change the implementation of add-deltas to be a subclass of nn.Module
1 parent 5c7fdec commit 150b497

File tree

2 files changed

+75
-32
lines changed

2 files changed

+75
-32
lines changed

egs/aishell/s10b/ctc/transform.py

Lines changed: 56 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,22 @@
44
# Apache 2.0
55

66
import torch
7+
import torch.nn as nn
78
import torch.nn.functional as F
89

910

10-
def compute_delta_feat(x, weight):
11+
def compute_delta_feat(x, weight, enable_padding):
1112
'''
1213
Args:
1314
x: input feat of shape [batch_size, seq_len, feat_dim]
1415
1516
weight: coefficients for computing delta features;
16-
it has a shape of [feat_dim, 1, kernel_size].
17+
it has shape [feat_dim, 1, kernel_size].
18+
19+
enable_padding: True to add padding.
1720
1821
Returns:
19-
a tensor fo shape [batch_size, seq_len, feat_dim]
22+
a tensor of shape [batch_size, seq_len, feat_dim]
2023
'''
2124

2225
assert x.ndim == 3
@@ -27,51 +30,61 @@ def compute_delta_feat(x, weight):
2730

2831
feat_dim = x.size(2)
2932

30-
pad_size = weight.size(2) // 2
33+
if enable_padding:
34+
pad_size = weight.size(2) // 2
3135

32-
# F.pad requires a 4-D tensor in our case
33-
x = x.unsqueeze(0)
36+
# F.pad requires a 4-D tensor in our case
37+
x = x.unsqueeze(0)
3438

35-
# (0, 0, pad_size, pad_size) == (left, right, top, bottom)
36-
padded_x = F.pad(x, (0, 0, pad_size, pad_size), mode='replicate')
39+
# (0, 0, pad_size, pad_size) == (left, right, top, bottom)
40+
x = F.pad(x, (0, 0, pad_size, pad_size), mode='replicate')
3741

38-
# after padding, we have to convert it back to 3-D
39-
# since conv1d requires 3-D input
40-
padded_x = padded_x.squeeze(0)
42+
# after padding, we have to convert it back to 3-D
43+
# since conv1d requires 3-D input
44+
x = x.squeeze(0)
4145

4246
# conv1d requires a shape of [batch_size, feat_dim, seq_len]
43-
padded_x = padded_x.permute(0, 2, 1)
47+
x = x.permute(0, 2, 1)
4448

4549
# NOTE(fangjun): we perform a depthwise convolution here by
4650
# setting groups == number of channels
47-
y = F.conv1d(input=padded_x, weight=weight, groups=feat_dim)
51+
y = F.conv1d(input=x, weight=weight, groups=feat_dim)
4852

49-
# now convert y back to be of shape [batch_size, seq_len, feat_dim]
53+
# now convert y back to shape [batch_size, seq_len, feat_dim]
5054
y = y.permute(0, 2, 1)
5155

5256
return y
5357

5458

55-
class AddDeltasTransform:
59+
class AddDeltasTransform(nn.Module):
5660
'''
5761
This class implements `add-deltas` in kaldi with
5862
order == 2 and window == 2.
5963
60-
It generates the identical output as kaldi's `add-deltas` with default
61-
parameters given the same input.
64+
It can generate the identical output as kaldi's `add-deltas`.
65+
66+
See transform_test.py
6267
'''
6368

64-
def __init__(self):
65-
# yapf: disable
66-
self.first_order_coef = torch.tensor([-0.2, -0.1, 0, 0.1, 0.2])
67-
self.second_order_coef = torch.tensor([0.04, 0.04, 0.01, -0.04, -0.1, -0.04, 0.01, 0.04, 0.04])
68-
# yapf: enable
69+
def __init__(self,
70+
first_order_coef=[-1, 0, 1],
71+
second_order_coef=[1, 0, -2, 0, 1],
72+
enable_padding=False):
73+
'''
74+
Note that this class has no trainable `nn.Parameters`.
75+
76+
Args:
77+
first_order_coef: coefficient to compute the first order delta feature
78+
79+
second_order_coef: coefficient to compute the second order delta feature
80+
'''
81+
super().__init__()
6982

70-
# TODO(fangjun): change the coefficients to the following as suggested by Dan
71-
# [-1, 0, 1]
72-
# [1, 0, -2, 0, 1]
83+
self.first_order_coef = torch.tensor(first_order_coef)
84+
self.second_order_coef = torch.tensor(second_order_coef)
85+
self.enable_padding = enable_padding
7386

74-
def __call__(self, x):
87+
def forward(self, x):
7588
'''
7689
Args:
7790
x: a tensor of shape [batch_size, seq_len, feat_dim]
@@ -94,9 +107,22 @@ def __call__(self, x):
94107
self.first_order_coef = self.first_order_coef.to(device)
95108
self.second_order_coef = self.second_order_coef.to(device)
96109

97-
first_order = compute_delta_feat(x, self.first_order_coef)
98-
second_order = compute_delta_feat(x, self.second_order_coef)
99-
100-
y = torch.cat([x, first_order, second_order], dim=2)
110+
first_order = compute_delta_feat(x, self.first_order_coef,
111+
self.enable_padding)
112+
second_order = compute_delta_feat(x, self.second_order_coef,
113+
self.enable_padding)
114+
115+
if self.enable_padding:
116+
y = torch.cat([x, first_order, second_order], dim=2)
117+
else:
118+
zeroth = (x.size(1) - second_order.size(1)) // 2
119+
first = (first_order.size(1) - second_order.size(1)) // 2
120+
121+
y = torch.cat([
122+
x[:, zeroth:-zeroth, :],
123+
first_order[:, first:-first, :],
124+
second_order,
125+
],
126+
dim=2)
101127

102128
return y

egs/aishell/s10b/ctc/transform_test.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,18 @@ def test_add_deltas_transform(self):
3030
[3, 2],
3131
[5, 1],
3232
[10, -2],
33+
[10, 20],
34+
[100, 200],
3335
]).float()
3436

3537
x = x.unsqueeze(0)
3638

37-
transform = AddDeltasTransform()
39+
transform = AddDeltasTransform(
40+
first_order_coef=[-0.2, -0.1, 0, 0.1, 0.2],
41+
second_order_coef=[
42+
0.04, 0.04, 0.01, -0.04, -0.1, -0.04, 0.01, 0.04, 0.04
43+
],
44+
enable_padding=True)
3845
y = transform(x)
3946

4047
# now use kaldi's add-deltas to compute the ground truth
@@ -60,7 +67,17 @@ def test_add_deltas_transform(self):
6067

6168
y = y.squeeze(0)
6269

63-
np.testing.assert_array_almost_equal(y.numpy(), expected.numpy())
70+
np.testing.assert_array_almost_equal(y.numpy(),
71+
expected.numpy(),
72+
decimal=5)
73+
74+
# now for padding == False
75+
transform.enable_padding = False
76+
y = transform(x).squeeze(0)
77+
78+
np.testing.assert_array_almost_equal(y.numpy(),
79+
expected.numpy()[4:-4, :],
80+
decimal=5)
6481

6582
reader.Close()
6683

0 commit comments

Comments
 (0)