31
31
32
32
#include " online-audio-source.h"
33
33
#include " feat/feature-functions.h"
34
+ #include " feat/feature-window.h"
34
35
35
36
namespace kaldi {
36
37
@@ -275,7 +276,8 @@ class OnlineFeInput : public OnlineFeatInputItf {
275
276
// "frame_size" - frame extraction window size in audio samples
276
277
// "frame_shift" - feature frame width in audio samples
277
278
OnlineFeInput (OnlineAudioSourceItf *au_src, E *fe,
278
- const int32 frame_size, const int32 frame_shift);
279
+ const int32 frame_size, const int32 frame_shift,
280
+ const bool snip_edges = true );
279
281
280
282
virtual int32 Dim () const { return extractor_->Dim (); }
281
283
@@ -287,15 +289,26 @@ class OnlineFeInput : public OnlineFeatInputItf {
287
289
const int32 frame_size_;
288
290
const int32 frame_shift_;
289
291
Vector<BaseFloat> wave_; // the samples to be passed for extraction
292
+ Vector<BaseFloat> wave_remainder_; // the samples remained from the previous
293
+ // feature batch
294
+ FrameExtractionOptions frame_opts_;
290
295
291
296
KALDI_DISALLOW_COPY_AND_ASSIGN (OnlineFeInput);
292
297
};
293
298
294
299
template <class E >
295
300
OnlineFeInput<E>::OnlineFeInput(OnlineAudioSourceItf *au_src, E *fe,
296
- int32 frame_size, int32 frame_shift)
301
+ int32 frame_size, int32 frame_shift,
302
+ bool snip_edges)
297
303
: source_(au_src), extractor_(fe),
298
- frame_size_(frame_size), frame_shift_(frame_shift) {}
304
+ frame_size_(frame_size), frame_shift_(frame_shift) {
305
+ // we need a FrameExtractionOptions to call NumFrames()
306
+ // 1000 is just a fake sample rate which equates ms and samples
307
+ frame_opts_.samp_freq = 1000 ;
308
+ frame_opts_.frame_shift_ms = frame_shift;
309
+ frame_opts_.frame_length_ms = frame_size;
310
+ frame_opts_.snip_edges = snip_edges;
311
+ }
299
312
300
313
template <class E > bool
301
314
OnlineFeInput<E>::Compute(Matrix<BaseFloat> *output) {
@@ -311,11 +324,26 @@ OnlineFeInput<E>::Compute(Matrix<BaseFloat> *output) {
311
324
312
325
bool ans = source_->Read (&read_samples);
313
326
327
+ Vector<BaseFloat> all_samples (wave_remainder_.Dim () + read_samples.Dim ());
328
+ all_samples.Range (0 , wave_remainder_.Dim ()).CopyFromVec (wave_remainder_);
329
+ all_samples.Range (wave_remainder_.Dim (), read_samples.Dim ()).
330
+ CopyFromVec (read_samples);
331
+
314
332
// Extract the features
315
- if (read_samples.Dim () >= frame_size_) {
316
- extractor_->Compute (read_samples, 1.0 , output);
333
+ if (all_samples.Dim () >= frame_size_) {
334
+ // extract waveform remainder before calling Compute()
335
+ int32 num_frames = NumFrames (all_samples.Dim (), frame_opts_);
336
+ // offset is the amount at the start that has been extracted.
337
+ int32 offset = num_frames * frame_shift_;
338
+ int32 remaining_len = all_samples.Dim () - offset;
339
+ wave_remainder_.Resize (remaining_len);
340
+ KALDI_ASSERT (remaining_len >= 0 );
341
+ if (remaining_len > 0 )
342
+ wave_remainder_.CopyFromVec (SubVector<BaseFloat>(all_samples, offset, remaining_len));
343
+ extractor_->Compute (all_samples, 1.0 , output);
317
344
} else {
318
345
output->Resize (0 , 0 );
346
+ wave_remainder_ = all_samples;
319
347
}
320
348
321
349
return ans;
0 commit comments