Skip to content

Commit 8cafd32

Browse files
authored
[src] Cosmetic changes to natural-gradient code (#3108)
1 parent 633e61c commit 8cafd32

File tree

2 files changed

+16
-16
lines changed

2 files changed

+16
-16
lines changed

src/nnet3/natural-gradient-online.cc

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -119,26 +119,26 @@ void OnlineNaturalGradient::InitDefault(int32 D) {
119119
t_ = 0;
120120
}
121121

122-
void OnlineNaturalGradient::Init(const CuMatrixBase<BaseFloat> &R0) {
123-
int32 D = R0.NumCols();
122+
void OnlineNaturalGradient::Init(const CuMatrixBase<BaseFloat> &X0) {
123+
int32 D = X0.NumCols();
124124
// for locking reasons it's better to use a different object.
125125
OnlineNaturalGradient this_copy(*this);
126126
this_copy.InitDefault(D);
127127
this_copy.t_ = 1; // Prevent recursion to Init() again.
128128

129-
CuMatrix<BaseFloat> R0_copy(R0.NumRows(), R0.NumCols(), kUndefined);
129+
CuMatrix<BaseFloat> X0_copy(X0.NumRows(), X0.NumCols(), kUndefined);
130130
// 'num_iters' is number of iterations with the same data from a pseudorandom
131131
// start. this is a faster way of starting than doing eigenvalue
132132
// decomposition.
133133
//
134134
// Note: we only do three iterations of initialization if we have enough data
135135
// that it's reasonably possible to estimate the subspace of dimension
136136
// this_copy.rank_. If we don't have more than that many rows in our initial
137-
// minibatch R0, we just do one iteration... this gives us almost exactly
138-
// (barring small effects due to epsilon_ > 0) the row subspace of R0 after
137+
// minibatch X0, we just do one iteration... this gives us almost exactly
138+
// (barring small effects due to epsilon_ > 0) the row subspace of X0 after
139139
// one iteration anyway.
140140
int32 num_init_iters;
141-
if (R0.NumRows() <= this_copy.rank_)
141+
if (X0.NumRows() <= this_copy.rank_)
142142
num_init_iters = 1;
143143
else
144144
num_init_iters = 3;
@@ -147,8 +147,8 @@ void OnlineNaturalGradient::Init(const CuMatrixBase<BaseFloat> &R0) {
147147
// initialize.
148148
for (int32 i = 0; i < num_init_iters; i++) {
149149
BaseFloat scale;
150-
R0_copy.CopyFromMat(R0);
151-
this_copy.PreconditionDirections(&R0_copy, &scale);
150+
X0_copy.CopyFromMat(X0);
151+
this_copy.PreconditionDirections(&X0_copy, &scale);
152152
}
153153
rank_ = this_copy.rank_;
154154
W_t_.Swap(&this_copy.W_t_);
@@ -197,7 +197,7 @@ void OnlineNaturalGradient::PreconditionDirections(
197197
t_ += 1;
198198
}
199199

200-
void OnlineNaturalGradient::ReorthogonalizeXt1(
200+
void OnlineNaturalGradient::ReorthogonalizeRt1(
201201
const VectorBase<BaseFloat> &d_t1,
202202
BaseFloat rho_t1,
203203
CuMatrixBase<BaseFloat> *W_t1,
@@ -214,7 +214,7 @@ void OnlineNaturalGradient::ReorthogonalizeXt1(
214214
ComputeEt(d_t1, beta_t1, &e_t1, &sqrt_e_t1, &inv_sqrt_e_t1);
215215

216216
temp_O->SymAddMat2(1.0, *W_t1, kNoTrans, 0.0);
217-
// O_t = E_t^{-0.5} W_t W_t^T E_t^{-0.5}
217+
// O_{t+1} = E_{t+1}^{-0.5} W_{t+1} W_{t+1}^T E_{t+1}^{-0.5}
218218
Matrix<BaseFloat> O_mat(*temp_O);
219219
SpMatrix<BaseFloat> O(O_mat, kTakeLower);
220220
for (int32 i = 0; i < R; i++) {
@@ -439,7 +439,7 @@ void OnlineNaturalGradient::PreconditionDirectionsInternal(
439439
if (self_debug_) {
440440
KALDI_WARN << "Reorthogonalizing.";
441441
}
442-
ReorthogonalizeXt1(d_t1,
442+
ReorthogonalizeRt1(d_t1,
443443
rho_t1,
444444
&W_t1,
445445
&J_t,
@@ -510,7 +510,7 @@ void OnlineNaturalGradient::ComputeWt1(int32 N,
510510
// B_t = J_t + (1-\eta)/(\eta/N) (D_t + \rho_t I) W_t
511511
J_t->AddDiagVecMat(1.0, w_t_coeff_gpu, W_t, kNoTrans, 1.0);
512512

513-
// A_t = (\eta/N) E_{t+1}^{0.5} C_t^{-0.5} U_t^T E_t^{-0.5} B_t
513+
// A_t = (\eta/N) E_{t+1}^{0.5} C_t^{-0.5} U_t^T E_t^{-0.5}
514514
Matrix<BaseFloat> A_t(U_t, kTrans);
515515
for (int32 i = 0; i < R; i++) {
516516
BaseFloat i_factor = (eta / N) * sqrt_e_t1(i) * inv_sqrt_c_t(i);

src/nnet3/natural-gradient-online.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -375,8 +375,8 @@ namespace nnet3 {
375375
* Initialization *
376376
377377
Now, a note on what we do on time t = 0, i.e. for the first minibatch. We
378-
initialize X_0 to the top R eigenvectors of 1/N X_0 X_0^T, where N is the
379-
minibatch size (num-rows of R0). If L is the corresponding RxR diagonal
378+
initialize R_0 to the top R eigenvectors of 1/N X_0 X_0^T, where N is the
379+
minibatch size (num-rows of X0). If L is the corresponding RxR diagonal
380380
matrix of eigenvalues, then we will set D_0 = L - \rho_0 I. We set \rho_0
381381
to ensure that
382382
tr(F_0) = 1/N tr(X_0 X_0^T),
@@ -457,7 +457,7 @@ class OnlineNaturalGradient {
457457
not.
458458
459459
*/
460-
void PreconditionDirections(CuMatrixBase<BaseFloat> *R,
460+
void PreconditionDirections(CuMatrixBase<BaseFloat> *X,
461461
BaseFloat *scale);
462462

463463

@@ -515,7 +515,7 @@ class OnlineNaturalGradient {
515515
// This function is called if C_t has high condition number; it makes sure
516516
// that R_{t+1} is orthogonal. See the section in the extended comment above
517517
// on "keeping R_t orthogonal".
518-
void ReorthogonalizeXt1(const VectorBase<BaseFloat> &d_t1,
518+
void ReorthogonalizeRt1(const VectorBase<BaseFloat> &d_t1,
519519
BaseFloat rho_t1,
520520
CuMatrixBase<BaseFloat> *W_t1,
521521
CuMatrixBase<BaseFloat> *temp_W,

0 commit comments

Comments
 (0)