@@ -206,16 +206,15 @@ def __init__(self, bundle: torchaudio.pipelines.RNNTBundle, beam_width: int = 10
206
206
self .beam_width = beam_width
207
207
208
208
self .state = None
209
- self .hypothesis = None
209
+ self .hypotheses = None
210
210
211
211
def infer (self , segment : torch .Tensor ) -> str :
212
212
"""Perform streaming inference"""
213
213
features , length = self .feature_extractor (segment )
214
- hypos , self .state = self .decoder .infer (
215
- features , length , self .beam_width , state = self .state , hypothesis = self .hypothesis
214
+ self . hypotheses , self .state = self .decoder .infer (
215
+ features , length , self .beam_width , state = self .state , hypothesis = self .hypotheses
216
216
)
217
- self .hypothesis = hypos [0 ]
218
- transcript = self .token_processor (self .hypothesis [0 ], lstrip = False )
217
+ transcript = self .token_processor (self .hypotheses [0 ][0 ], lstrip = False )
219
218
return transcript
220
219
221
220
@@ -291,7 +290,7 @@ def infer():
291
290
chunk = q .get ()
292
291
segment = cacher (chunk [:, 0 ])
293
292
transcript = pipeline .infer (segment )
294
- print (transcript , end = "" , flush = True )
293
+ print (transcript , end = "\r " , flush = True )
295
294
296
295
import torch .multiprocessing as mp
297
296
0 commit comments