Skip to content

Commit 93536ca

Browse files
committed
Fix AudioEffector for mulaw
When encoding audio with mulaw, the resulting data does not have header, and the StreamReader defaults to 16k Hz, which can strech/shrink the resulting waveform.
1 parent c6624fa commit 93536ca

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

test/torchaudio_unittest/io/effector_test.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,24 @@ def test_null(self):
3030
("ogg", "flac"), # flac only supports s16 and s32
3131
("ogg", "opus"), # opus only supports 48k Hz
3232
("ogg", "vorbis"), # vorbis only supports stereo
33+
# ("ogg", "vorbis", 44100),
34+
# this fails with small descrepancy; 441024 vs 441000
35+
# TODO: investigate
3336
("wav", None),
3437
("wav", "pcm_u8"),
3538
("mp3", None),
39+
("mulaw", None, 44100), # mulaw is encoded without header
3640
]
3741
)
38-
def test_formats(self, format, encoder):
42+
def test_formats(self, format, encoder, sample_rate=8000):
3943
"""Formats (some with restrictions) just work without an issue in effector"""
40-
sample_rate = 8000
4144

4245
effector = AudioEffector(format=format, encoder=encoder)
4346
original = get_sinusoid(n_channels=3, sample_rate=sample_rate, channels_first=False)
4447

4548
output = effector.apply(original, sample_rate)
49+
print(original.shape)
50+
print(output.shape)
4651

4752
# On 4.1 OPUS produces 8020 samples (extra 20)
4853
# this has been fixed on 4.2+

torchaudio/io/_effector.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,10 @@ def _get_reader(self, waveform, sample_rate, frames_per_chunk=None):
267267
muxer = self.format
268268
encoder = self.encoder
269269
option = {}
270+
# Some formats are headerless, so need to provide these infomation.
271+
if self.format == "mulaw":
272+
option = {"sample_rate": f"{sample_rate}", "channels": f"{num_channels}"}
273+
270274
else: # PCM
271275
muxer = _get_muxer(waveform.dtype)
272276
encoder = None

0 commit comments

Comments
 (0)