This commit is contained in:
2026-04-10 15:06:59 +02:00
parent 3031b7153b
commit e5a4711004
7806 changed files with 1918528 additions and 335 deletions

View File

@@ -0,0 +1,131 @@
# This file is part of audioread.
# Copyright 2013, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Multi-library, cross-platform audio decoding."""
from . import ffdec
from .exceptions import DecodeError, NoBackendError
from .base import AudioFile # noqa
def _gst_available():
"""Determine whether Gstreamer and the Python GObject bindings are
installed.
"""
try:
import gi
except ImportError:
return False
try:
gi.require_version('Gst', '1.0')
except (ValueError, AttributeError):
return False
try:
from gi.repository import Gst # noqa
except ImportError:
return False
return True
def _ca_available():
"""Determines whether CoreAudio is available (i.e., we're running on
Mac OS X).
"""
import ctypes.util
lib = ctypes.util.find_library('AudioToolbox')
return lib is not None
def _mad_available():
"""Determines whether the pymad bindings are available."""
try:
import mad # noqa
except ImportError:
return False
else:
return True
# A cache for the available backends.
BACKENDS = []
def available_backends(flush_cache=False):
"""Returns a list of backends that are available on this system.
The list of backends is cached after the first call.
If the parameter `flush_cache` is set to `True`, then the cache
will be flushed and the backend list will be reconstructed.
"""
if BACKENDS and not flush_cache:
return BACKENDS
# Standard-library WAV and AIFF readers.
from . import rawread
result = [rawread.RawAudioFile]
# Core Audio.
if _ca_available():
from . import macca
result.append(macca.ExtAudioFile)
# GStreamer.
if _gst_available():
from . import gstdec
result.append(gstdec.GstAudioFile)
# MAD.
if _mad_available():
from . import maddec
result.append(maddec.MadAudioFile)
# FFmpeg.
if ffdec.available():
result.append(ffdec.FFmpegAudioFile)
# Cache the backends we found
BACKENDS[:] = result
return BACKENDS
def audio_open(path, backends=None):
"""Open an audio file using a library that is available on this
system.
The optional `backends` parameter can be a list of audio file
classes to try opening the file with. If it is not provided,
`audio_open` tries all available backends. If you call this function
many times, you can avoid the cost of checking for available
backends every time by calling `available_backends` once and passing
the result to each `audio_open` call.
If all backends fail to read the file, a NoBackendError exception is
raised.
"""
if backends is None:
backends = available_backends()
for BackendClass in backends:
try:
return BackendClass(path)
except DecodeError:
pass
# All backends failed!
raise NoBackendError()

View File

@@ -0,0 +1,18 @@
# This file is part of audioread.
# Copyright 2021, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
class AudioFile:
"""The base class for all audio file types.
"""

View File

@@ -0,0 +1,25 @@
# This file is part of audioread.
# Copyright 2013, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
class DecodeError(Exception):
"""The base exception class for all decoding errors raised by this
package.
"""
class NoBackendError(DecodeError):
"""The file could not be decoded by any backend. Either no backends
are available or each available backend failed to decode the file.
"""

View File

@@ -0,0 +1,320 @@
# This file is part of audioread.
# Copyright 2014, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Read audio data using the ffmpeg command line tool via its standard
output.
"""
import queue
import re
import subprocess
import sys
import threading
import time
from io import DEFAULT_BUFFER_SIZE
from .exceptions import DecodeError
from .base import AudioFile
COMMANDS = ('ffmpeg', 'avconv')
if sys.platform == "win32":
PROC_FLAGS = 0x08000000
else:
PROC_FLAGS = 0
class FFmpegError(DecodeError):
pass
class CommunicationError(FFmpegError):
"""Raised when the output of FFmpeg is not parseable."""
class UnsupportedError(FFmpegError):
"""The file could not be decoded by FFmpeg."""
class NotInstalledError(FFmpegError):
"""Could not find the ffmpeg binary."""
class ReadTimeoutError(FFmpegError):
"""Reading from the ffmpeg command-line tool timed out."""
class QueueReaderThread(threading.Thread):
"""A thread that consumes data from a filehandle and sends the data
over a Queue.
"""
def __init__(self, fh, blocksize=1024, discard=False):
super().__init__()
self.fh = fh
self.blocksize = blocksize
self.daemon = True
self.discard = discard
self.queue = None if discard else queue.Queue()
def run(self):
while True:
data = self.fh.read(self.blocksize)
if not self.discard:
self.queue.put(data)
if not data:
# Stream closed (EOF).
break
def popen_multiple(commands, command_args, *args, **kwargs):
"""Like `subprocess.Popen`, but can try multiple commands in case
some are not available.
`commands` is an iterable of command names and `command_args` are
the rest of the arguments that, when appended to the command name,
make up the full first argument to `subprocess.Popen`. The
other positional and keyword arguments are passed through.
"""
for i, command in enumerate(commands):
cmd = [command] + command_args
try:
return subprocess.Popen(cmd, *args, **kwargs)
except OSError:
if i == len(commands) - 1:
# No more commands to try.
raise
def available():
"""Detect whether the FFmpeg backend can be used on this system.
"""
try:
proc = popen_multiple(
COMMANDS,
['-version'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
creationflags=PROC_FLAGS,
)
except OSError:
return False
else:
proc.communicate()
return proc.returncode == 0
# For Windows error switch management, we need a lock to keep the mode
# adjustment atomic.
windows_error_mode_lock = threading.Lock()
class FFmpegAudioFile(AudioFile):
"""An audio file decoded by the ffmpeg command-line utility."""
def __init__(self, filename, block_size=DEFAULT_BUFFER_SIZE):
# On Windows, we need to disable the subprocess's crash dialog
# in case it dies. Passing SEM_NOGPFAULTERRORBOX to SetErrorMode
# disables this behavior.
windows = sys.platform.startswith("win")
if windows:
windows_error_mode_lock.acquire()
SEM_NOGPFAULTERRORBOX = 0x0002
import ctypes
# We call SetErrorMode in two steps to avoid overriding
# existing error mode.
previous_error_mode = \
ctypes.windll.kernel32.SetErrorMode(SEM_NOGPFAULTERRORBOX)
ctypes.windll.kernel32.SetErrorMode(
previous_error_mode | SEM_NOGPFAULTERRORBOX
)
try:
self.proc = popen_multiple(
COMMANDS,
['-i', filename, '-f', 's16le', '-'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.DEVNULL,
creationflags=PROC_FLAGS,
)
except OSError:
raise NotInstalledError()
finally:
# Reset previous error mode on Windows. (We can change this
# back now because the flag was inherited by the subprocess;
# we don't need to keep it set in the parent process.)
if windows:
try:
import ctypes
ctypes.windll.kernel32.SetErrorMode(previous_error_mode)
finally:
windows_error_mode_lock.release()
# Start another thread to consume the standard output of the
# process, which contains raw audio data.
self.stdout_reader = QueueReaderThread(self.proc.stdout, block_size)
self.stdout_reader.start()
# Read relevant information from stderr.
self._get_info()
# Start a separate thread to read the rest of the data from
# stderr. This (a) avoids filling up the OS buffer and (b)
# collects the error output for diagnosis.
self.stderr_reader = QueueReaderThread(self.proc.stderr)
self.stderr_reader.start()
def read_data(self, timeout=10.0):
"""Read blocks of raw PCM data from the file."""
# Read from stdout in a separate thread and consume data from
# the queue.
start_time = time.time()
while True:
# Wait for data to be available or a timeout.
data = None
try:
data = self.stdout_reader.queue.get(timeout=timeout)
if data:
yield data
else:
# End of file.
break
except queue.Empty:
# Queue read timed out.
end_time = time.time()
if not data:
if end_time - start_time >= timeout:
# Nothing interesting has happened for a while --
# FFmpeg is probably hanging.
raise ReadTimeoutError('ffmpeg output: {}'.format(
b''.join(self.stderr_reader.queue.queue)
))
else:
start_time = end_time
# Keep waiting.
continue
def _get_info(self):
"""Reads the tool's output from its stderr stream, extracts the
relevant information, and parses it.
"""
out_parts = []
while True:
line = self.proc.stderr.readline()
if not line:
# EOF and data not found.
raise CommunicationError("stream info not found")
# In Python 3, result of reading from stderr is bytes.
if isinstance(line, bytes):
line = line.decode('utf8', 'ignore')
line = line.strip().lower()
if 'no such file' in line:
raise OSError('file not found')
elif 'invalid data found' in line:
raise UnsupportedError()
elif 'duration:' in line:
out_parts.append(line)
elif 'audio:' in line:
out_parts.append(line)
self._parse_info(''.join(out_parts))
break
def _parse_info(self, s):
"""Given relevant data from the ffmpeg output, set audio
parameter fields on this object.
"""
# Sample rate.
match = re.search(r'(\d+) hz', s)
if match:
self.samplerate = int(match.group(1))
else:
self.samplerate = 0
# Channel count.
match = re.search(r'hz, ([^,]+),', s)
if match:
mode = match.group(1)
if mode == 'stereo':
self.channels = 2
else:
cmatch = re.match(r'(\d+)\.?(\d)?', mode)
if cmatch:
self.channels = sum(map(int, cmatch.group().split('.')))
else:
self.channels = 1
else:
self.channels = 0
# Duration.
match = re.search(
r'duration: (\d+):(\d+):(\d+).(\d)', s
)
if match:
durparts = list(map(int, match.groups()))
duration = (
durparts[0] * 60 * 60 +
durparts[1] * 60 +
durparts[2] +
float(durparts[3]) / 10
)
self.duration = duration
else:
# No duration found.
self.duration = 0
def close(self):
"""Close the ffmpeg process used to perform the decoding."""
if hasattr(self, 'proc'):
# First check the process's execution status before attempting to
# kill it. This fixes an issue on Windows Subsystem for Linux where
# ffmpeg closes normally on its own, but never updates
# `returncode`.
self.proc.poll()
# Kill the process if it is still running.
if self.proc.returncode is None:
self.proc.kill()
self.proc.wait()
# Wait for the stream-reading threads to exit. (They need to
# stop reading before we can close the streams.)
if hasattr(self, 'stderr_reader'):
self.stderr_reader.join()
if hasattr(self, 'stdout_reader'):
self.stdout_reader.join()
# Close the stdout and stderr streams that were opened by Popen,
# which should occur regardless of if the process terminated
# cleanly.
self.proc.stdout.close()
self.proc.stderr.close()
def __del__(self):
self.close()
# Iteration.
def __iter__(self):
return self.read_data()
# Context manager.
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
return False

View File

@@ -0,0 +1,429 @@
# This file is part of audioread.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Use Gstreamer to decode audio files.
To read an audio file, pass it to the constructor for GstAudioFile()
and then iterate over the contents:
>>> f = GstAudioFile('something.mp3')
>>> try:
>>> for block in f:
>>> ...
>>> finally:
>>> f.close()
Note that there are a few complications caused by Gstreamer's
asynchronous architecture. This module spawns its own Gobject main-
loop thread; I'm not sure how that will interact with other main
loops if your program has them. Also, in order to stop the thread
and terminate your program normally, you need to call the close()
method on every GstAudioFile you create. Conveniently, the file can be
used as a context manager to make this simpler:
>>> with GstAudioFile('something.mp3') as f:
>>> for block in f:
>>> ...
Iterating a GstAudioFile yields strings containing short integer PCM
data. You can also read the sample rate and channel count from the
file:
>>> with GstAudioFile('something.mp3') as f:
>>> print f.samplerate
>>> print f.channels
>>> print f.duration
"""
import gi
gi.require_version('Gst', '1.0')
from gi.repository import GLib, Gst
import sys
import threading
import os
import queue
from urllib.parse import quote
from .exceptions import DecodeError
from .base import AudioFile
QUEUE_SIZE = 10
BUFFER_SIZE = 10
SENTINEL = '__GSTDEC_SENTINEL__'
# Exceptions.
class GStreamerError(DecodeError):
pass
class UnknownTypeError(GStreamerError):
"""Raised when Gstreamer can't decode the given file type."""
def __init__(self, streaminfo):
super().__init__(
"can't decode stream: " + streaminfo
)
self.streaminfo = streaminfo
class FileReadError(GStreamerError):
"""Raised when the file can't be read at all."""
pass
class NoStreamError(GStreamerError):
"""Raised when the file was read successfully but no audio streams
were found.
"""
def __init__(self):
super().__init__('no audio streams found')
class MetadataMissingError(GStreamerError):
"""Raised when GStreamer fails to report stream metadata (duration,
channels, or sample rate).
"""
pass
class IncompleteGStreamerError(GStreamerError):
"""Raised when necessary components of GStreamer (namely, the
principal plugin packages) are missing.
"""
def __init__(self):
super().__init__(
'missing GStreamer base plugins'
)
# Managing the Gobject main loop thread.
_shared_loop_thread = None
_loop_thread_lock = threading.RLock()
Gst.init(None)
def get_loop_thread():
"""Get the shared main-loop thread.
"""
global _shared_loop_thread
with _loop_thread_lock:
if not _shared_loop_thread:
# Start a new thread.
_shared_loop_thread = MainLoopThread()
_shared_loop_thread.start()
return _shared_loop_thread
class MainLoopThread(threading.Thread):
"""A daemon thread encapsulating a Gobject main loop.
"""
def __init__(self):
super().__init__()
self.loop = GLib.MainLoop.new(None, False)
self.daemon = True
def run(self):
self.loop.run()
# The decoder.
class GstAudioFile(AudioFile):
"""Reads raw audio data from any audio file that Gstreamer
knows how to decode.
>>> with GstAudioFile('something.mp3') as f:
>>> print f.samplerate
>>> print f.channels
>>> print f.duration
>>> for block in f:
>>> do_something(block)
Iterating the object yields blocks of 16-bit PCM data. Three
pieces of stream information are also available: samplerate (in Hz),
number of channels, and duration (in seconds).
It's very important that the client call close() when it's done
with the object. Otherwise, the program is likely to hang on exit.
Alternatively, of course, one can just use the file as a context
manager, as shown above.
"""
def __init__(self, path):
self.running = False
self.finished = False
# Set up the Gstreamer pipeline.
self.pipeline = Gst.Pipeline()
self.dec = Gst.ElementFactory.make("uridecodebin", None)
self.conv = Gst.ElementFactory.make("audioconvert", None)
self.sink = Gst.ElementFactory.make("appsink", None)
if self.dec is None or self.conv is None or self.sink is None:
# uridecodebin, audioconvert, or appsink is missing. We need
# gst-plugins-base.
raise IncompleteGStreamerError()
# Register for bus signals.
bus = self.pipeline.get_bus()
bus.add_signal_watch()
bus.connect("message::eos", self._message)
bus.connect("message::error", self._message)
# Configure the input.
uri = 'file://' + quote(os.path.abspath(path))
self.dec.set_property("uri", uri)
# The callback to connect the input.
self.dec.connect("pad-added", self._pad_added)
self.dec.connect("no-more-pads", self._no_more_pads)
# And a callback if decoding fails.
self.dec.connect("unknown-type", self._unkown_type)
# Configure the output.
# We want short integer data.
self.sink.set_property(
'caps',
Gst.Caps.from_string('audio/x-raw, format=(string)S16LE'),
)
# TODO set endianness?
# Set up the characteristics of the output. We don't want to
# drop any data (nothing is real-time here); we should bound
# the memory usage of the internal queue; and, most
# importantly, setting "sync" to False disables the default
# behavior in which you consume buffers in real time. This way,
# we get data as soon as it's decoded.
self.sink.set_property('drop', False)
self.sink.set_property('max-buffers', BUFFER_SIZE)
self.sink.set_property('sync', False)
# The callback to receive decoded data.
self.sink.set_property('emit-signals', True)
self.sink.connect("new-sample", self._new_sample)
# We'll need to know when the stream becomes ready and we get
# its attributes. This semaphore will become available when the
# caps are received. That way, when __init__() returns, the file
# (and its attributes) will be ready for reading.
self.ready_sem = threading.Semaphore(0)
self.caps_handler = self.sink.get_static_pad("sink").connect(
"notify::caps", self._notify_caps
)
# Link up everything but the decoder (which must be linked only
# when it becomes ready).
self.pipeline.add(self.dec)
self.pipeline.add(self.conv)
self.pipeline.add(self.sink)
self.conv.link(self.sink)
# Set up the queue for data and run the main thread.
self.queue = queue.Queue(QUEUE_SIZE)
self.thread = get_loop_thread()
# This wil get filled with an exception if opening fails.
self.read_exc = None
# Return as soon as the stream is ready!
self.running = True
self.got_caps = False
self.pipeline.set_state(Gst.State.PLAYING)
self.ready_sem.acquire()
if self.read_exc:
# An error occurred before the stream became ready.
self.close(True)
raise self.read_exc
# Gstreamer callbacks.
def _notify_caps(self, pad, args):
"""The callback for the sinkpad's "notify::caps" signal.
"""
# The sink has started to receive data, so the stream is ready.
# This also is our opportunity to read information about the
# stream.
self.got_caps = True
info = pad.get_current_caps().get_structure(0)
# Stream attributes.
self.channels = info.get_int('channels')[1]
self.samplerate = info.get_int('rate')[1]
# Query duration.
success, length = pad.get_peer().query_duration(Gst.Format.TIME)
if success:
self.duration = length / 1000000000
else:
self.read_exc = MetadataMissingError('duration not available')
# Allow constructor to complete.
self.ready_sem.release()
_got_a_pad = False
def _pad_added(self, element, pad):
"""The callback for GstElement's "pad-added" signal.
"""
# Decoded data is ready. Connect up the decoder, finally.
name = pad.query_caps(None).to_string()
if name.startswith('audio/x-raw'):
nextpad = self.conv.get_static_pad('sink')
if not nextpad.is_linked():
self._got_a_pad = True
pad.link(nextpad)
def _no_more_pads(self, element):
"""The callback for GstElement's "no-more-pads" signal.
"""
# Sent when the pads are done adding (i.e., there are no more
# streams in the file). If we haven't gotten at least one
# decodable stream, raise an exception.
if not self._got_a_pad:
self.read_exc = NoStreamError()
self.ready_sem.release() # No effect if we've already started.
def _new_sample(self, sink):
"""The callback for appsink's "new-sample" signal.
"""
if self.running:
# New data is available from the pipeline! Dump it into our
# queue (or possibly block if we're full).
buf = sink.emit('pull-sample').get_buffer()
# We can't use Gst.Buffer.extract() to read the data as it crashes
# when called through PyGObject. We also can't use
# Gst.Buffer.extract_dup() because we have no way in Python to free
# the memory that it returns. Instead we get access to the actual
# data via Gst.Memory.map().
mem = buf.get_all_memory()
success, info = mem.map(Gst.MapFlags.READ)
if success:
if isinstance(info.data, memoryview):
# We need to copy the data as the memoryview is released
# when we call mem.unmap()
data = bytes(info.data)
else:
# GStreamer Python bindings <= 1.16 return a copy of the
# data as bytes()
data = info.data
mem.unmap(info)
self.queue.put(data)
else:
raise GStreamerError("Unable to map buffer memory while reading the file.")
return Gst.FlowReturn.OK
def _unkown_type(self, uridecodebin, decodebin, caps):
"""The callback for decodebin's "unknown-type" signal.
"""
# This is called *before* the stream becomes ready when the
# file can't be read.
streaminfo = caps.to_string()
if not streaminfo.startswith('audio/'):
# Ignore non-audio (e.g., video) decode errors.
return
self.read_exc = UnknownTypeError(streaminfo)
self.ready_sem.release()
def _message(self, bus, message):
"""The callback for GstBus's "message" signal (for two kinds of
messages).
"""
if not self.finished:
if message.type == Gst.MessageType.EOS:
# The file is done. Tell the consumer thread.
self.queue.put(SENTINEL)
if not self.got_caps:
# If the stream ends before _notify_caps was called, this
# is an invalid file.
self.read_exc = NoStreamError()
self.ready_sem.release()
elif message.type == Gst.MessageType.ERROR:
gerror, debug = message.parse_error()
if 'not-linked' in debug:
self.read_exc = NoStreamError()
elif 'No such file' in debug:
self.read_exc = IOError('resource not found')
else:
self.read_exc = FileReadError(debug)
self.ready_sem.release()
# Iteration.
def __next__(self):
# Wait for data from the Gstreamer callbacks.
val = self.queue.get()
if val == SENTINEL:
# End of stream.
raise StopIteration
return val
def __iter__(self):
return self
# Cleanup.
def close(self, force=False):
"""Close the file and clean up associated resources.
Calling `close()` a second time has no effect.
"""
if self.running or force:
self.running = False
self.finished = True
# Unregister for signals, which we registered for above with
# `add_signal_watch`. (Without this, GStreamer leaks file
# descriptors.)
self.pipeline.get_bus().remove_signal_watch()
# Stop reading the file.
self.dec.set_property("uri", None)
# Block spurious signals.
self.sink.get_static_pad("sink").disconnect(self.caps_handler)
# Make space in the output queue to let the decoder thread
# finish. (Otherwise, the thread blocks on its enqueue and
# the interpreter hangs.)
try:
self.queue.get_nowait()
except queue.Empty:
pass
# Halt the pipeline (closing file).
self.pipeline.set_state(Gst.State.NULL)
def __del__(self):
self.close()
# Context manager.
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
return False
# Smoke test.
if __name__ == '__main__':
for path in sys.argv[1:]:
path = os.path.abspath(os.path.expanduser(path))
with GstAudioFile(path) as f:
print(f.channels)
print(f.samplerate)
print(f.duration)
for s in f:
print(len(s), ord(s[0]))

View File

@@ -0,0 +1,348 @@
# This file is part of audioread.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Read audio files using CoreAudio on Mac OS X."""
import copy
import ctypes
import ctypes.util
import os
import sys
from .exceptions import DecodeError
from .base import AudioFile
# CoreFoundation and CoreAudio libraries along with their function
# prototypes.
def _load_framework(name):
return ctypes.cdll.LoadLibrary(ctypes.util.find_library(name))
_coreaudio = _load_framework('AudioToolbox')
_corefoundation = _load_framework('CoreFoundation')
# Convert CFStrings to C strings.
_corefoundation.CFStringGetCStringPtr.restype = ctypes.c_char_p
_corefoundation.CFStringGetCStringPtr.argtypes = [ctypes.c_void_p,
ctypes.c_int]
# Free memory.
_corefoundation.CFRelease.argtypes = [ctypes.c_void_p]
# Create a file:// URL.
_corefoundation.CFURLCreateFromFileSystemRepresentation.restype = \
ctypes.c_void_p
_corefoundation.CFURLCreateFromFileSystemRepresentation.argtypes = \
[ctypes.c_int, ctypes.c_char_p, ctypes.c_int, ctypes.c_bool]
# Get a string representation of a URL.
_corefoundation.CFURLGetString.restype = ctypes.c_void_p
_corefoundation.CFURLGetString.argtypes = [ctypes.c_void_p]
# Open an audio file for reading.
_coreaudio.ExtAudioFileOpenURL.restype = ctypes.c_int
_coreaudio.ExtAudioFileOpenURL.argtypes = [ctypes.c_void_p, ctypes.c_void_p]
# Set audio file property.
_coreaudio.ExtAudioFileSetProperty.restype = ctypes.c_int
_coreaudio.ExtAudioFileSetProperty.argtypes = \
[ctypes.c_void_p, ctypes.c_uint, ctypes.c_uint, ctypes.c_void_p]
# Get audio file property.
_coreaudio.ExtAudioFileGetProperty.restype = ctypes.c_int
_coreaudio.ExtAudioFileGetProperty.argtypes = \
[ctypes.c_void_p, ctypes.c_uint, ctypes.c_void_p, ctypes.c_void_p]
# Read from an audio file.
_coreaudio.ExtAudioFileRead.restype = ctypes.c_int
_coreaudio.ExtAudioFileRead.argtypes = \
[ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p]
# Close/free an audio file.
_coreaudio.ExtAudioFileDispose.restype = ctypes.c_int
_coreaudio.ExtAudioFileDispose.argtypes = [ctypes.c_void_p]
# Constants used in CoreAudio.
def multi_char_literal(chars):
"""Emulates character integer literals in C. Given a string "abc",
returns the value of the C single-quoted literal 'abc'.
"""
num = 0
for index, char in enumerate(chars):
shift = (len(chars) - index - 1) * 8
num |= ord(char) << shift
return num
PROP_FILE_DATA_FORMAT = multi_char_literal('ffmt')
PROP_CLIENT_DATA_FORMAT = multi_char_literal('cfmt')
PROP_LENGTH = multi_char_literal('#frm')
AUDIO_ID_PCM = multi_char_literal('lpcm')
PCM_IS_FLOAT = 1 << 0
PCM_IS_BIG_ENDIAN = 1 << 1
PCM_IS_SIGNED_INT = 1 << 2
PCM_IS_PACKED = 1 << 3
ERROR_TYPE = multi_char_literal('typ?')
ERROR_FORMAT = multi_char_literal('fmt?')
ERROR_NOT_FOUND = -43
# Check for errors in functions that return error codes.
class MacError(DecodeError):
def __init__(self, code):
if code == ERROR_TYPE:
msg = 'unsupported audio type'
elif code == ERROR_FORMAT:
msg = 'unsupported format'
else:
msg = 'error %i' % code
super().__init__(msg)
def check(err):
"""If err is nonzero, raise a MacError exception."""
if err == ERROR_NOT_FOUND:
raise OSError('file not found')
elif err != 0:
raise MacError(err)
# CoreFoundation objects.
class CFObject:
def __init__(self, obj):
if obj == 0:
raise ValueError('object is zero')
self._obj = obj
def __del__(self):
if _corefoundation:
_corefoundation.CFRelease(self._obj)
class CFURL(CFObject):
def __init__(self, filename):
filename = os.path.abspath(os.path.expanduser(filename))
if not isinstance(filename, bytes):
filename = filename.encode(sys.getfilesystemencoding())
url = _corefoundation.CFURLCreateFromFileSystemRepresentation(
0, filename, len(filename), False
)
super().__init__(url)
def __str__(self):
cfstr = _corefoundation.CFURLGetString(self._obj)
out = _corefoundation.CFStringGetCStringPtr(cfstr, 0)
# Resulting CFString does not need to be released according to docs.
return out
# Structs used in CoreAudio.
class AudioStreamBasicDescription(ctypes.Structure):
_fields_ = [
("mSampleRate", ctypes.c_double),
("mFormatID", ctypes.c_uint),
("mFormatFlags", ctypes.c_uint),
("mBytesPerPacket", ctypes.c_uint),
("mFramesPerPacket", ctypes.c_uint),
("mBytesPerFrame", ctypes.c_uint),
("mChannelsPerFrame", ctypes.c_uint),
("mBitsPerChannel", ctypes.c_uint),
("mReserved", ctypes.c_uint),
]
class AudioBuffer(ctypes.Structure):
_fields_ = [
("mNumberChannels", ctypes.c_uint),
("mDataByteSize", ctypes.c_uint),
("mData", ctypes.c_void_p),
]
class AudioBufferList(ctypes.Structure):
_fields_ = [
("mNumberBuffers", ctypes.c_uint),
("mBuffers", AudioBuffer * 1),
]
# Main functionality.
class ExtAudioFile(AudioFile):
"""A CoreAudio "extended audio file". Reads information and raw PCM
audio data from any file that CoreAudio knows how to decode.
>>> with ExtAudioFile('something.m4a') as f:
>>> print f.samplerate
>>> print f.channels
>>> print f.duration
>>> for block in f:
>>> do_something(block)
"""
def __init__(self, filename):
url = CFURL(filename)
try:
self._obj = self._open_url(url)
except:
self.closed = True
raise
del url
self.closed = False
self._file_fmt = None
self._client_fmt = None
self.setup()
@classmethod
def _open_url(cls, url):
"""Given a CFURL Python object, return an opened ExtAudioFileRef.
"""
file_obj = ctypes.c_void_p()
check(_coreaudio.ExtAudioFileOpenURL(
url._obj, ctypes.byref(file_obj)
))
return file_obj
def set_client_format(self, desc):
"""Get the client format description. This describes the
encoding of the data that the program will read from this
object.
"""
assert desc.mFormatID == AUDIO_ID_PCM
check(_coreaudio.ExtAudioFileSetProperty(
self._obj, PROP_CLIENT_DATA_FORMAT, ctypes.sizeof(desc),
ctypes.byref(desc)
))
self._client_fmt = desc
def get_file_format(self):
"""Get the file format description. This describes the type of
data stored on disk.
"""
# Have cached file format?
if self._file_fmt is not None:
return self._file_fmt
# Make the call to retrieve it.
desc = AudioStreamBasicDescription()
size = ctypes.c_int(ctypes.sizeof(desc))
check(_coreaudio.ExtAudioFileGetProperty(
self._obj, PROP_FILE_DATA_FORMAT, ctypes.byref(size),
ctypes.byref(desc)
))
# Cache result.
self._file_fmt = desc
return desc
@property
def channels(self):
"""The number of channels in the audio source."""
return int(self.get_file_format().mChannelsPerFrame)
@property
def samplerate(self):
"""Gets the sample rate of the audio."""
return int(self.get_file_format().mSampleRate)
@property
def duration(self):
"""Gets the length of the file in seconds (a float)."""
return float(self.nframes) / self.samplerate
@property
def nframes(self):
"""Gets the number of frames in the source file."""
length = ctypes.c_long()
size = ctypes.c_int(ctypes.sizeof(length))
check(_coreaudio.ExtAudioFileGetProperty(
self._obj, PROP_LENGTH, ctypes.byref(size), ctypes.byref(length)
))
return length.value
def setup(self, bitdepth=16):
"""Set the client format parameters, specifying the desired PCM
audio data format to be read from the file. Must be called
before reading from the file.
"""
fmt = self.get_file_format()
newfmt = copy.copy(fmt)
newfmt.mFormatID = AUDIO_ID_PCM
newfmt.mFormatFlags = \
PCM_IS_SIGNED_INT | PCM_IS_PACKED
newfmt.mBitsPerChannel = bitdepth
newfmt.mBytesPerPacket = \
(fmt.mChannelsPerFrame * newfmt.mBitsPerChannel // 8)
newfmt.mFramesPerPacket = 1
newfmt.mBytesPerFrame = newfmt.mBytesPerPacket
self.set_client_format(newfmt)
def read_data(self, blocksize=4096):
"""Generates byte strings reflecting the audio data in the file.
"""
frames = ctypes.c_uint(blocksize // self._client_fmt.mBytesPerFrame)
buf = ctypes.create_string_buffer(blocksize)
buflist = AudioBufferList()
buflist.mNumberBuffers = 1
buflist.mBuffers[0].mNumberChannels = \
self._client_fmt.mChannelsPerFrame
buflist.mBuffers[0].mDataByteSize = blocksize
buflist.mBuffers[0].mData = ctypes.cast(buf, ctypes.c_void_p)
while True:
check(_coreaudio.ExtAudioFileRead(
self._obj, ctypes.byref(frames), ctypes.byref(buflist)
))
assert buflist.mNumberBuffers == 1
size = buflist.mBuffers[0].mDataByteSize
if not size:
break
data = ctypes.cast(buflist.mBuffers[0].mData,
ctypes.POINTER(ctypes.c_char))
blob = data[:size]
yield blob
def close(self):
"""Close the audio file and free associated memory."""
if not self.closed:
check(_coreaudio.ExtAudioFileDispose(self._obj))
self.closed = True
def __del__(self):
if _coreaudio:
self.close()
# Context manager methods.
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
return False
# Iteration.
def __iter__(self):
return self.read_data()

View File

@@ -0,0 +1,86 @@
# This file is part of audioread.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Decode MPEG audio files with MAD (via pymad)."""
import mad
from . import DecodeError
from .base import AudioFile
class UnsupportedError(DecodeError):
"""The file is not readable by MAD."""
class MadAudioFile(AudioFile):
"""MPEG audio file decoder using the MAD library."""
def __init__(self, filename):
self.fp = open(filename, 'rb')
self.mf = mad.MadFile(self.fp)
if not self.mf.total_time(): # Indicates a failed open.
self.fp.close()
raise UnsupportedError()
def close(self):
if hasattr(self, 'fp'):
self.fp.close()
if hasattr(self, 'mf'):
del self.mf
def read_blocks(self, block_size=4096):
"""Generates buffers containing PCM data for the audio file.
"""
while True:
out = self.mf.read(block_size)
if not out:
break
yield bytes(out)
@property
def samplerate(self):
"""Sample rate in Hz."""
return self.mf.samplerate()
@property
def duration(self):
"""Length of the audio in seconds (a float)."""
return float(self.mf.total_time()) / 1000
@property
def channels(self):
"""The number of channels."""
if self.mf.mode() == mad.MODE_SINGLE_CHANNEL:
return 1
elif self.mf.mode() in (mad.MODE_DUAL_CHANNEL,
mad.MODE_JOINT_STEREO,
mad.MODE_STEREO):
return 2
else:
# Other mode?
return 2
def __del__(self):
self.close()
# Iteration.
def __iter__(self):
return self.read_blocks()
# Context manager.
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
return False

View File

@@ -0,0 +1,149 @@
# This file is part of audioread.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""Uses standard-library modules to read AIFF, AIFF-C, and WAV files."""
import aifc
import audioop
import struct
import sunau
import wave
from .exceptions import DecodeError
from .base import AudioFile
# Produce two-byte (16-bit) output samples.
TARGET_WIDTH = 2
# Python 3.4 added support for 24-bit (3-byte) samples.
SUPPORTED_WIDTHS = (1, 2, 3, 4)
class UnsupportedError(DecodeError):
"""File is not an AIFF, WAV, or Au file."""
class BitWidthError(DecodeError):
"""The file uses an unsupported bit width."""
def byteswap(s):
"""Swaps the endianness of the bytestring s, which must be an array
of shorts (16-bit signed integers). This is probably less efficient
than it should be.
"""
assert len(s) % 2 == 0
parts = []
for i in range(0, len(s), 2):
chunk = s[i:i + 2]
newchunk = struct.pack('<h', *struct.unpack('>h', chunk))
parts.append(newchunk)
return b''.join(parts)
class RawAudioFile(AudioFile):
"""An AIFF, WAV, or Au file that can be read by the Python standard
library modules ``wave``, ``aifc``, and ``sunau``.
"""
def __init__(self, filename):
self._fh = open(filename, 'rb')
try:
self._file = aifc.open(self._fh)
except aifc.Error:
# Return to the beginning of the file to try the next reader.
self._fh.seek(0)
else:
self._needs_byteswap = True
self._check()
return
try:
self._file = wave.open(self._fh)
except wave.Error:
self._fh.seek(0)
pass
else:
self._needs_byteswap = False
self._check()
return
try:
self._file = sunau.open(self._fh)
except sunau.Error:
self._fh.seek(0)
pass
else:
self._needs_byteswap = True
self._check()
return
# None of the three libraries could open the file.
self._fh.close()
raise UnsupportedError()
def _check(self):
"""Check that the files' parameters allow us to decode it and
raise an error otherwise.
"""
if self._file.getsampwidth() not in SUPPORTED_WIDTHS:
self.close()
raise BitWidthError()
def close(self):
"""Close the underlying file."""
self._file.close()
self._fh.close()
@property
def channels(self):
"""Number of audio channels."""
return self._file.getnchannels()
@property
def samplerate(self):
"""Sample rate in Hz."""
return self._file.getframerate()
@property
def duration(self):
"""Length of the audio in seconds (a float)."""
return float(self._file.getnframes()) / self.samplerate
def read_data(self, block_samples=1024):
"""Generates blocks of PCM data found in the file."""
old_width = self._file.getsampwidth()
while True:
data = self._file.readframes(block_samples)
if not data:
break
# Make sure we have the desired bitdepth and endianness.
data = audioop.lin2lin(data, old_width, TARGET_WIDTH)
if self._needs_byteswap and self._file.getcomptype() != 'sowt':
# Big-endian data. Swap endianness.
data = byteswap(data)
yield data
# Context manager.
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
return False
# Iteration.
def __iter__(self):
return self.read_data()