Skip to content

Commit f8a3b5c

Browse files
committed
Fixes OnlineFeInput, which was broken by commit cc2469e.
We have reverted just the parts affecting OnlineFeInput. Since ExtractWaveformRemainder was removed in commit 1180e46, we recovered and adapted that code and used it directly in OnlineFeInput<E>::Compute. We have also created a fake FrameExtractionOptions so that NumFrames() will still work as expected. Note that we also support the snip_edges=false case (OnlineFeInput constructor now takes an additional optional parameter).
1 parent fb514dc commit f8a3b5c

File tree

1 file changed

+33
-5
lines changed

1 file changed

+33
-5
lines changed

src/online/online-feat-input.h

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
#include "online-audio-source.h"
3333
#include "feat/feature-functions.h"
34+
#include "feat/feature-window.h"
3435

3536
namespace kaldi {
3637

@@ -275,7 +276,8 @@ class OnlineFeInput : public OnlineFeatInputItf {
275276
// "frame_size" - frame extraction window size in audio samples
276277
// "frame_shift" - feature frame width in audio samples
277278
OnlineFeInput(OnlineAudioSourceItf *au_src, E *fe,
278-
const int32 frame_size, const int32 frame_shift);
279+
const int32 frame_size, const int32 frame_shift,
280+
const bool snip_edges = true);
279281

280282
virtual int32 Dim() const { return extractor_->Dim(); }
281283

@@ -287,15 +289,26 @@ class OnlineFeInput : public OnlineFeatInputItf {
287289
const int32 frame_size_;
288290
const int32 frame_shift_;
289291
Vector<BaseFloat> wave_; // the samples to be passed for extraction
292+
Vector<BaseFloat> wave_remainder_; // the samples remained from the previous
293+
// feature batch
294+
FrameExtractionOptions frame_opts_;
290295

291296
KALDI_DISALLOW_COPY_AND_ASSIGN(OnlineFeInput);
292297
};
293298

294299
template<class E>
295300
OnlineFeInput<E>::OnlineFeInput(OnlineAudioSourceItf *au_src, E *fe,
296-
int32 frame_size, int32 frame_shift)
301+
int32 frame_size, int32 frame_shift,
302+
bool snip_edges)
297303
: source_(au_src), extractor_(fe),
298-
frame_size_(frame_size), frame_shift_(frame_shift) {}
304+
frame_size_(frame_size), frame_shift_(frame_shift) {
305+
// we need a FrameExtractionOptions to call NumFrames()
306+
// 1000 is just a fake sample rate which equates ms and samples
307+
frame_opts_.samp_freq = 1000;
308+
frame_opts_.frame_shift_ms = frame_shift;
309+
frame_opts_.frame_length_ms = frame_size;
310+
frame_opts_.snip_edges = snip_edges;
311+
}
299312

300313
template<class E> bool
301314
OnlineFeInput<E>::Compute(Matrix<BaseFloat> *output) {
@@ -311,11 +324,26 @@ OnlineFeInput<E>::Compute(Matrix<BaseFloat> *output) {
311324

312325
bool ans = source_->Read(&read_samples);
313326

327+
Vector<BaseFloat> all_samples(wave_remainder_.Dim() + read_samples.Dim());
328+
all_samples.Range(0, wave_remainder_.Dim()).CopyFromVec(wave_remainder_);
329+
all_samples.Range(wave_remainder_.Dim(), read_samples.Dim()).
330+
CopyFromVec(read_samples);
331+
314332
// Extract the features
315-
if (read_samples.Dim() >= frame_size_) {
316-
extractor_->Compute(read_samples, 1.0, output);
333+
if (all_samples.Dim() >= frame_size_) {
334+
// extract waveform remainder before calling Compute()
335+
int32 num_frames = NumFrames(all_samples.Dim(), frame_opts_);
336+
// offset is the amount at the start that has been extracted.
337+
int32 offset = num_frames * frame_shift_;
338+
int32 remaining_len = all_samples.Dim() - offset;
339+
wave_remainder_.Resize(remaining_len);
340+
KALDI_ASSERT(remaining_len >= 0);
341+
if (remaining_len > 0)
342+
wave_remainder_.CopyFromVec(SubVector<BaseFloat>(all_samples, offset, remaining_len));
343+
extractor_->Compute(all_samples, 1.0, output);
317344
} else {
318345
output->Resize(0, 0);
346+
wave_remainder_ = all_samples;
319347
}
320348

321349
return ans;

0 commit comments

Comments
 (0)