@@ -119,26 +119,26 @@ void OnlineNaturalGradient::InitDefault(int32 D) {
119
119
t_ = 0 ;
120
120
}
121
121
122
- void OnlineNaturalGradient::Init (const CuMatrixBase<BaseFloat> &R0 ) {
123
- int32 D = R0 .NumCols ();
122
+ void OnlineNaturalGradient::Init (const CuMatrixBase<BaseFloat> &X0 ) {
123
+ int32 D = X0 .NumCols ();
124
124
// for locking reasons it's better to use a different object.
125
125
OnlineNaturalGradient this_copy (*this );
126
126
this_copy.InitDefault (D);
127
127
this_copy.t_ = 1 ; // Prevent recursion to Init() again.
128
128
129
- CuMatrix<BaseFloat> R0_copy (R0 .NumRows (), R0 .NumCols (), kUndefined );
129
+ CuMatrix<BaseFloat> X0_copy (X0 .NumRows (), X0 .NumCols (), kUndefined );
130
130
// 'num_iters' is number of iterations with the same data from a pseudorandom
131
131
// start. this is a faster way of starting than doing eigenvalue
132
132
// decomposition.
133
133
//
134
134
// Note: we only do three iterations of initialization if we have enough data
135
135
// that it's reasonably possible to estimate the subspace of dimension
136
136
// this_copy.rank_. If we don't have more than that many rows in our initial
137
- // minibatch R0 , we just do one iteration... this gives us almost exactly
138
- // (barring small effects due to epsilon_ > 0) the row subspace of R0 after
137
+ // minibatch X0 , we just do one iteration... this gives us almost exactly
138
+ // (barring small effects due to epsilon_ > 0) the row subspace of X0 after
139
139
// one iteration anyway.
140
140
int32 num_init_iters;
141
- if (R0 .NumRows () <= this_copy.rank_ )
141
+ if (X0 .NumRows () <= this_copy.rank_ )
142
142
num_init_iters = 1 ;
143
143
else
144
144
num_init_iters = 3 ;
@@ -147,8 +147,8 @@ void OnlineNaturalGradient::Init(const CuMatrixBase<BaseFloat> &R0) {
147
147
// initialize.
148
148
for (int32 i = 0 ; i < num_init_iters; i++) {
149
149
BaseFloat scale;
150
- R0_copy .CopyFromMat (R0 );
151
- this_copy.PreconditionDirections (&R0_copy , &scale);
150
+ X0_copy .CopyFromMat (X0 );
151
+ this_copy.PreconditionDirections (&X0_copy , &scale);
152
152
}
153
153
rank_ = this_copy.rank_ ;
154
154
W_t_.Swap (&this_copy.W_t_ );
@@ -197,7 +197,7 @@ void OnlineNaturalGradient::PreconditionDirections(
197
197
t_ += 1 ;
198
198
}
199
199
200
- void OnlineNaturalGradient::ReorthogonalizeXt1 (
200
+ void OnlineNaturalGradient::ReorthogonalizeRt1 (
201
201
const VectorBase<BaseFloat> &d_t1,
202
202
BaseFloat rho_t1,
203
203
CuMatrixBase<BaseFloat> *W_t1,
@@ -214,7 +214,7 @@ void OnlineNaturalGradient::ReorthogonalizeXt1(
214
214
ComputeEt (d_t1, beta_t1, &e_t1, &sqrt_e_t1, &inv_sqrt_e_t1);
215
215
216
216
temp_O->SymAddMat2 (1.0 , *W_t1, kNoTrans , 0.0 );
217
- // O_t = E_t ^{-0.5} W_t W_t ^T E_t ^{-0.5}
217
+ // O_{t+1} = E_{t+1} ^{-0.5} W_{t+1} W_{t+1} ^T E_{t+1} ^{-0.5}
218
218
Matrix<BaseFloat> O_mat (*temp_O);
219
219
SpMatrix<BaseFloat> O (O_mat, kTakeLower );
220
220
for (int32 i = 0 ; i < R; i++) {
@@ -439,7 +439,7 @@ void OnlineNaturalGradient::PreconditionDirectionsInternal(
439
439
if (self_debug_) {
440
440
KALDI_WARN << " Reorthogonalizing." ;
441
441
}
442
- ReorthogonalizeXt1 (d_t1,
442
+ ReorthogonalizeRt1 (d_t1,
443
443
rho_t1,
444
444
&W_t1,
445
445
&J_t,
@@ -510,7 +510,7 @@ void OnlineNaturalGradient::ComputeWt1(int32 N,
510
510
// B_t = J_t + (1-\eta)/(\eta/N) (D_t + \rho_t I) W_t
511
511
J_t->AddDiagVecMat (1.0 , w_t_coeff_gpu, W_t, kNoTrans , 1.0 );
512
512
513
- // A_t = (\eta/N) E_{t+1}^{0.5} C_t^{-0.5} U_t^T E_t^{-0.5} B_t
513
+ // A_t = (\eta/N) E_{t+1}^{0.5} C_t^{-0.5} U_t^T E_t^{-0.5}
514
514
Matrix<BaseFloat> A_t (U_t, kTrans );
515
515
for (int32 i = 0 ; i < R; i++) {
516
516
BaseFloat i_factor = (eta / N) * sqrt_e_t1 (i) * inv_sqrt_c_t (i);
0 commit comments