Videre
This commit is contained in:
@@ -0,0 +1,215 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Core IO and DSP
|
||||
===============
|
||||
|
||||
Audio loading
|
||||
-------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
load
|
||||
stream
|
||||
to_mono
|
||||
resample
|
||||
get_duration
|
||||
get_samplerate
|
||||
|
||||
|
||||
Time-domain processing
|
||||
----------------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
autocorrelate
|
||||
lpc
|
||||
zero_crossings
|
||||
mu_compress
|
||||
mu_expand
|
||||
|
||||
|
||||
Signal generation
|
||||
-----------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
clicks
|
||||
tone
|
||||
chirp
|
||||
|
||||
|
||||
Spectral representations
|
||||
------------------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
stft
|
||||
istft
|
||||
reassigned_spectrogram
|
||||
|
||||
cqt
|
||||
icqt
|
||||
hybrid_cqt
|
||||
pseudo_cqt
|
||||
|
||||
vqt
|
||||
|
||||
iirt
|
||||
|
||||
fmt
|
||||
|
||||
magphase
|
||||
|
||||
|
||||
Phase recovery
|
||||
--------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
griffinlim
|
||||
griffinlim_cqt
|
||||
|
||||
|
||||
Harmonics
|
||||
---------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
interp_harmonics
|
||||
salience
|
||||
f0_harmonics
|
||||
|
||||
phase_vocoder
|
||||
|
||||
|
||||
Magnitude scaling
|
||||
-----------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
amplitude_to_db
|
||||
db_to_amplitude
|
||||
power_to_db
|
||||
db_to_power
|
||||
|
||||
perceptual_weighting
|
||||
frequency_weighting
|
||||
multi_frequency_weighting
|
||||
A_weighting
|
||||
B_weighting
|
||||
C_weighting
|
||||
D_weighting
|
||||
|
||||
pcen
|
||||
|
||||
|
||||
Time unit conversion
|
||||
--------------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
frames_to_samples
|
||||
frames_to_time
|
||||
samples_to_frames
|
||||
samples_to_time
|
||||
time_to_frames
|
||||
time_to_samples
|
||||
|
||||
blocks_to_frames
|
||||
blocks_to_samples
|
||||
blocks_to_time
|
||||
|
||||
|
||||
Frequency unit conversion
|
||||
-------------------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
hz_to_note
|
||||
hz_to_midi
|
||||
hz_to_svara_h
|
||||
hz_to_svara_c
|
||||
hz_to_fjs
|
||||
midi_to_hz
|
||||
midi_to_note
|
||||
midi_to_svara_h
|
||||
midi_to_svara_c
|
||||
note_to_hz
|
||||
note_to_midi
|
||||
note_to_svara_h
|
||||
note_to_svara_c
|
||||
|
||||
hz_to_mel
|
||||
hz_to_octs
|
||||
mel_to_hz
|
||||
octs_to_hz
|
||||
|
||||
A4_to_tuning
|
||||
tuning_to_A4
|
||||
|
||||
|
||||
Music notation
|
||||
--------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
key_to_notes
|
||||
key_to_degrees
|
||||
|
||||
mela_to_svara
|
||||
mela_to_degrees
|
||||
|
||||
thaat_to_degrees
|
||||
|
||||
list_mela
|
||||
list_thaat
|
||||
|
||||
fifths_to_note
|
||||
interval_to_fjs
|
||||
interval_frequencies
|
||||
pythagorean_intervals
|
||||
plimit_intervals
|
||||
|
||||
|
||||
Frequency range generation
|
||||
--------------------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
fft_frequencies
|
||||
cqt_frequencies
|
||||
mel_frequencies
|
||||
tempo_frequencies
|
||||
fourier_tempo_frequencies
|
||||
|
||||
|
||||
Pitch and tuning
|
||||
----------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
pyin
|
||||
yin
|
||||
|
||||
estimate_tuning
|
||||
pitch_tuning
|
||||
piptrack
|
||||
|
||||
|
||||
Miscellaneous
|
||||
-------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
samples_like
|
||||
times_like
|
||||
|
||||
get_fftlib
|
||||
set_fftlib
|
||||
"""
|
||||
|
||||
import lazy_loader as lazy
|
||||
from .version import version as __version__
|
||||
|
||||
__getattr__, __dir__, __all__ = lazy.attach_stub(__name__, __file__)
|
||||
@@ -0,0 +1,123 @@
|
||||
from . import core
|
||||
from . import beat
|
||||
from . import decompose
|
||||
from . import display
|
||||
from . import effects
|
||||
from . import feature
|
||||
from . import filters
|
||||
from . import onset
|
||||
from . import segment
|
||||
from . import sequence
|
||||
from . import util
|
||||
|
||||
from ._cache import cache as cache
|
||||
|
||||
from .util.exceptions import (
|
||||
LibrosaError as LibrosaError,
|
||||
ParameterError as ParameterError,
|
||||
)
|
||||
from .util.files import example as example, ex as ex
|
||||
from .util.files import cite as cite
|
||||
|
||||
from .version import show_versions as show_versions
|
||||
|
||||
from .core import (
|
||||
frames_to_samples as frames_to_samples,
|
||||
frames_to_time as frames_to_time,
|
||||
samples_to_frames as samples_to_frames,
|
||||
samples_to_time as samples_to_time,
|
||||
time_to_samples as time_to_samples,
|
||||
time_to_frames as time_to_frames,
|
||||
blocks_to_samples as blocks_to_samples,
|
||||
blocks_to_frames as blocks_to_frames,
|
||||
blocks_to_time as blocks_to_time,
|
||||
note_to_hz as note_to_hz,
|
||||
note_to_midi as note_to_midi,
|
||||
midi_to_hz as midi_to_hz,
|
||||
midi_to_note as midi_to_note,
|
||||
hz_to_note as hz_to_note,
|
||||
hz_to_midi as hz_to_midi,
|
||||
hz_to_mel as hz_to_mel,
|
||||
hz_to_octs as hz_to_octs,
|
||||
hz_to_fjs as hz_to_fjs,
|
||||
mel_to_hz as mel_to_hz,
|
||||
octs_to_hz as octs_to_hz,
|
||||
A4_to_tuning as A4_to_tuning,
|
||||
tuning_to_A4 as tuning_to_A4,
|
||||
fft_frequencies as fft_frequencies,
|
||||
cqt_frequencies as cqt_frequencies,
|
||||
mel_frequencies as mel_frequencies,
|
||||
tempo_frequencies as tempo_frequencies,
|
||||
fourier_tempo_frequencies as fourier_tempo_frequencies,
|
||||
A_weighting as A_weighting,
|
||||
B_weighting as B_weighting,
|
||||
C_weighting as C_weighting,
|
||||
D_weighting as D_weighting,
|
||||
Z_weighting as Z_weighting,
|
||||
frequency_weighting as frequency_weighting,
|
||||
multi_frequency_weighting as multi_frequency_weighting,
|
||||
samples_like as samples_like,
|
||||
times_like as times_like,
|
||||
midi_to_svara_h as midi_to_svara_h,
|
||||
midi_to_svara_c as midi_to_svara_c,
|
||||
note_to_svara_h as note_to_svara_h,
|
||||
note_to_svara_c as note_to_svara_c,
|
||||
hz_to_svara_h as hz_to_svara_h,
|
||||
hz_to_svara_c as hz_to_svara_c,
|
||||
load as load,
|
||||
stream as stream,
|
||||
to_mono as to_mono,
|
||||
resample as resample,
|
||||
get_duration as get_duration,
|
||||
get_samplerate as get_samplerate,
|
||||
autocorrelate as autocorrelate,
|
||||
lpc as lpc,
|
||||
zero_crossings as zero_crossings,
|
||||
clicks as clicks,
|
||||
tone as tone,
|
||||
chirp as chirp,
|
||||
mu_compress as mu_compress,
|
||||
mu_expand as mu_expand,
|
||||
stft as stft,
|
||||
istft as istft,
|
||||
magphase as magphase,
|
||||
iirt as iirt,
|
||||
reassigned_spectrogram as reassigned_spectrogram,
|
||||
phase_vocoder as phase_vocoder,
|
||||
perceptual_weighting as perceptual_weighting,
|
||||
power_to_db as power_to_db,
|
||||
db_to_power as db_to_power,
|
||||
amplitude_to_db as amplitude_to_db,
|
||||
db_to_amplitude as db_to_amplitude,
|
||||
fmt as fmt,
|
||||
pcen as pcen,
|
||||
griffinlim as griffinlim,
|
||||
estimate_tuning as estimate_tuning,
|
||||
pitch_tuning as pitch_tuning,
|
||||
piptrack as piptrack,
|
||||
yin as yin,
|
||||
pyin as pyin,
|
||||
cqt as cqt,
|
||||
hybrid_cqt as hybrid_cqt,
|
||||
pseudo_cqt as pseudo_cqt,
|
||||
icqt as icqt,
|
||||
griffinlim_cqt as griffinlim_cqt,
|
||||
vqt as vqt,
|
||||
salience as salience,
|
||||
interp_harmonics as interp_harmonics,
|
||||
f0_harmonics as f0_harmonics,
|
||||
get_fftlib as get_fftlib,
|
||||
set_fftlib as set_fftlib,
|
||||
key_to_degrees as key_to_degrees,
|
||||
key_to_notes as key_to_notes,
|
||||
mela_to_degrees as mela_to_degrees,
|
||||
mela_to_svara as mela_to_svara,
|
||||
thaat_to_degrees as thaat_to_degrees,
|
||||
list_mela as list_mela,
|
||||
list_thaat as list_thaat,
|
||||
fifths_to_note as fifths_to_note,
|
||||
interval_to_fjs as interval_to_fjs,
|
||||
interval_frequencies as interval_frequencies,
|
||||
pythagorean_intervals as pythagorean_intervals,
|
||||
plimit_intervals as plimit_intervals,
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Function caching"""
|
||||
|
||||
import os
|
||||
from typing import Any, Callable, TypeVar
|
||||
from joblib import Memory
|
||||
from decorator import FunctionMaker
|
||||
|
||||
|
||||
def _decorator_apply(dec, func):
|
||||
return FunctionMaker.create(
|
||||
func,
|
||||
"return decfunc(%(shortsignature)s)",
|
||||
dict(decfunc=dec(func)),
|
||||
__wrapped__=func,
|
||||
)
|
||||
|
||||
|
||||
_F = TypeVar("_F", bound=Callable[..., Any])
|
||||
|
||||
|
||||
class CacheManager(object):
|
||||
"""The librosa cache manager class wraps joblib.Memory
|
||||
with a __call__ attribute, so that it may act as a function.
|
||||
|
||||
Additionally, it provides a caching level filter, so that
|
||||
different functions can be cached or not depending on the user's
|
||||
preference for speed vs. storage usage.
|
||||
"""
|
||||
|
||||
def __init__(self, *args: Any, **kwargs: Any):
|
||||
level = kwargs.pop("level", 10)
|
||||
|
||||
# Initialize the memory object
|
||||
self.memory: Memory = Memory(*args, **kwargs)
|
||||
# The level parameter controls which data we cache
|
||||
# smaller numbers mean less caching
|
||||
self.level: int = level
|
||||
|
||||
def __call__(self, level: int) -> Callable[[_F], _F]:
|
||||
"""
|
||||
Cache with an explicitly defined level.
|
||||
|
||||
Example usage:
|
||||
|
||||
@cache(level=2)
|
||||
def semi_important_function(some_arguments):
|
||||
...
|
||||
"""
|
||||
|
||||
def wrapper(function):
|
||||
"""Add an input/output cache to the specified function."""
|
||||
if self.memory.location is not None and self.level >= level:
|
||||
return _decorator_apply(self.memory.cache, function)
|
||||
|
||||
else:
|
||||
return function
|
||||
|
||||
return wrapper
|
||||
|
||||
def clear(self, *args: Any, **kwargs: Any) -> None:
|
||||
"""Clear the cache"""
|
||||
self.memory.clear(*args, **kwargs)
|
||||
|
||||
def eval(self, *args: Any, **kwargs: Any) -> Any:
|
||||
"""Evaluate a function"""
|
||||
return self.memory.eval(*args, **kwargs)
|
||||
|
||||
def format(self, *args: Any, **kwargs: Any) -> Any:
|
||||
"""Return the formatted representation of an object"""
|
||||
return self.memory.format(*args, **kwargs)
|
||||
|
||||
def reduce_size(self, *args: Any, **kwargs: Any) -> None:
|
||||
"""Reduce the size of the cache"""
|
||||
self.memory.reduce_size(*args, **kwargs) # pragma: no cover
|
||||
|
||||
def warn(self, *args: Any, **kwargs: Any) -> None:
|
||||
"""Raise a warning"""
|
||||
self.memory.warn(*args, **kwargs) # pragma: no cover
|
||||
|
||||
|
||||
# Instantiate the cache from the environment
|
||||
cache: CacheManager = CacheManager(
|
||||
os.environ.get("LIBROSA_CACHE_DIR", None),
|
||||
mmap_mode=os.environ.get("LIBROSA_CACHE_MMAP", None),
|
||||
compress=os.environ.get("LIBROSA_CACHE_COMPRESS", False),
|
||||
verbose=int(os.environ.get("LIBROSA_CACHE_VERBOSE", 0)),
|
||||
level=int(os.environ.get("LIBROSA_CACHE_LEVEL", 10)),
|
||||
)
|
||||
@@ -0,0 +1,84 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Callable, Generator, List, TypeVar, Union, Tuple, Any, Sequence
|
||||
from typing_extensions import Literal, Never
|
||||
import numpy as np
|
||||
from numpy.typing import ArrayLike
|
||||
|
||||
|
||||
_WindowSpec = Union[str, Tuple[Any, ...], float, Callable[[int], np.ndarray], ArrayLike]
|
||||
_T = TypeVar("_T")
|
||||
_IterableLike = Union[List[_T], Tuple[_T, ...], Generator[_T, None, None]]
|
||||
_SequenceLike = Union[Sequence[_T], np.ndarray]
|
||||
_ScalarOrSequence = Union[_T, _SequenceLike[_T]]
|
||||
|
||||
# The following definitions are copied from numpy/_typing/_scalars.py
|
||||
# (We don't import them directly from numpy because they're an implementation detail.)
|
||||
###
|
||||
### START COPIED CODE
|
||||
###
|
||||
_CharLike_co = Union[str, bytes]
|
||||
# The 6 `<X>Like_co` type-aliases below represent all scalars that can be
|
||||
# coerced into `<X>` (with the casting rule `same_kind`)
|
||||
_BoolLike_co = Union[bool, np.bool_]
|
||||
_UIntLike_co = Union[_BoolLike_co, "np.unsignedinteger[Any]"]
|
||||
_IntLike_co = Union[_BoolLike_co, int, "np.integer[Any]"]
|
||||
_FloatLike_co = Union[_IntLike_co, float, "np.floating[Any]"]
|
||||
_ComplexLike_co = Union[_FloatLike_co, complex, "np.complexfloating[Any, Any]"]
|
||||
_TD64Like_co = Union[_IntLike_co, np.timedelta64]
|
||||
|
||||
_NumberLike_co = Union[int, float, complex, "np.number[Any]", np.bool_]
|
||||
_ScalarLike_co = Union[
|
||||
int,
|
||||
float,
|
||||
complex,
|
||||
str,
|
||||
bytes,
|
||||
np.generic,
|
||||
]
|
||||
# `_VoidLike_co` is technically not a scalar, but it's close enough
|
||||
_VoidLike_co = Union[Tuple[Any, ...], np.void]
|
||||
|
||||
|
||||
# Padding modes in general
|
||||
_ModeKind = Literal[
|
||||
"constant",
|
||||
"edge",
|
||||
"linear_ramp",
|
||||
"maximum",
|
||||
"mean",
|
||||
"median",
|
||||
"minimum",
|
||||
"reflect",
|
||||
"symmetric",
|
||||
"wrap",
|
||||
"empty",
|
||||
]
|
||||
###
|
||||
### END COPIED CODE
|
||||
###
|
||||
|
||||
# Padding modes for head/tail padding
|
||||
# These rule out padding modes that depend on the entire array
|
||||
_STFTPad = Literal[
|
||||
"constant",
|
||||
"edge",
|
||||
"linear_ramp",
|
||||
"reflect",
|
||||
"symmetric",
|
||||
"empty",
|
||||
]
|
||||
|
||||
_PadMode = Union[_ModeKind, Callable[..., Any]]
|
||||
|
||||
_PadModeSTFT = Union[_STFTPad, Callable[..., Any]]
|
||||
|
||||
|
||||
def _ensure_not_reachable(__arg: Never):
|
||||
"""
|
||||
Ensure that a code path is not reachable, like typing_extension.assert_never.
|
||||
|
||||
This doesn't raise an exception so that we are forced to manually
|
||||
raise a more user friendly exception afterwards.
|
||||
"""
|
||||
...
|
||||
699
linedance-app/venv/lib/python3.12/site-packages/librosa/beat.py
Normal file
699
linedance-app/venv/lib/python3.12/site-packages/librosa/beat.py
Normal file
@@ -0,0 +1,699 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Beat and tempo
|
||||
==============
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
beat_track
|
||||
plp
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import scipy
|
||||
import scipy.stats
|
||||
import numba
|
||||
|
||||
from . import core
|
||||
from . import onset
|
||||
from . import util
|
||||
from .feature import fourier_tempogram
|
||||
from .feature import tempo as _tempo
|
||||
from .util.exceptions import ParameterError
|
||||
from .util.decorators import moved
|
||||
from typing import Optional, Tuple, Union
|
||||
from ._typing import _FloatLike_co
|
||||
|
||||
__all__ = ["beat_track", "tempo", "plp"]
|
||||
|
||||
|
||||
tempo = moved(moved_from="librosa.beat.tempo", version="0.10.0", version_removed="1.0")(
|
||||
_tempo
|
||||
)
|
||||
|
||||
|
||||
def beat_track(
|
||||
*,
|
||||
y: Optional[np.ndarray] = None,
|
||||
sr: float = 22050,
|
||||
onset_envelope: Optional[np.ndarray] = None,
|
||||
hop_length: int = 512,
|
||||
start_bpm: float = 120.0,
|
||||
tightness: float = 100,
|
||||
trim: bool = True,
|
||||
bpm: Optional[Union[_FloatLike_co, np.ndarray]] = None,
|
||||
prior: Optional[scipy.stats.rv_continuous] = None,
|
||||
units: str = "frames",
|
||||
sparse: bool = True
|
||||
) -> Tuple[Union[_FloatLike_co, np.ndarray], np.ndarray]:
|
||||
r"""Dynamic programming beat tracker.
|
||||
|
||||
Beats are detected in three stages, following the method of [#]_:
|
||||
|
||||
1. Measure onset strength
|
||||
2. Estimate tempo from onset correlation
|
||||
3. Pick peaks in onset strength approximately consistent with estimated
|
||||
tempo
|
||||
|
||||
.. [#] Ellis, Daniel PW. "Beat tracking by dynamic programming."
|
||||
Journal of New Music Research 36.1 (2007): 51-60.
|
||||
http://labrosa.ee.columbia.edu/projects/beattrack/
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : np.ndarray [shape=(..., n)] or None
|
||||
audio time series
|
||||
|
||||
sr : number > 0 [scalar]
|
||||
sampling rate of ``y``
|
||||
|
||||
onset_envelope : np.ndarray [shape=(..., m)] or None
|
||||
(optional) pre-computed onset strength envelope.
|
||||
|
||||
hop_length : int > 0 [scalar]
|
||||
number of audio samples between successive ``onset_envelope`` values
|
||||
|
||||
start_bpm : float > 0 [scalar]
|
||||
initial guess for the tempo estimator (in beats per minute)
|
||||
|
||||
tightness : float [scalar]
|
||||
tightness of beat distribution around tempo
|
||||
|
||||
trim : bool [scalar]
|
||||
trim leading/trailing beats with weak onsets
|
||||
|
||||
bpm : float [scalar] or np.ndarray [shape=(...)]
|
||||
(optional) If provided, use ``bpm`` as the tempo instead of
|
||||
estimating it from ``onsets``.
|
||||
|
||||
If multichannel, tempo estimates can be provided for all channels.
|
||||
|
||||
Tempo estimates may also be time-varying, in which case the shape
|
||||
of ``bpm`` should match that of ``onset_envelope``, i.e.,
|
||||
one estimate provided for each frame.
|
||||
|
||||
prior : scipy.stats.rv_continuous [optional]
|
||||
An optional prior distribution over tempo.
|
||||
If provided, ``start_bpm`` will be ignored.
|
||||
|
||||
units : {'frames', 'samples', 'time'}
|
||||
The units to encode detected beat events in.
|
||||
By default, 'frames' are used.
|
||||
|
||||
sparse : bool
|
||||
If ``True`` (default), detections are returned as an array of frames,
|
||||
samples, or time indices (as specified by ``units=``).
|
||||
|
||||
If ``False``, detections are encoded as a dense boolean array where
|
||||
``beats[..., n]`` is true if there's a beat at frame index ``n``.
|
||||
|
||||
.. note:: multi-channel input is only supported when ``sparse=False``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tempo : float [scalar, non-negative] or np.ndarray
|
||||
estimated global tempo (in beats per minute)
|
||||
|
||||
If multi-channel and ``bpm`` is not provided, a separate
|
||||
tempo will be returned for each channel.
|
||||
|
||||
.. note::
|
||||
By default, the tempo is returned as an ndarray even for mono input.
|
||||
In this case, the array will have a single element and be one-dimensional.
|
||||
This is to ensure consistent return types for multi-channel input.
|
||||
beats : np.ndarray
|
||||
estimated beat event locations.
|
||||
|
||||
If `sparse=True` (default), beat locations are given in the specified units
|
||||
(default is frame indices).
|
||||
|
||||
If `sparse=False` (required for multichannel input), beat events are
|
||||
indicated by a boolean for each frame.
|
||||
|
||||
.. note::
|
||||
If no onset strength could be detected, beat_tracker estimates 0 BPM
|
||||
and returns an empty list.
|
||||
|
||||
Raises
|
||||
------
|
||||
ParameterError
|
||||
if neither ``y`` nor ``onset_envelope`` are provided,
|
||||
or if ``units`` is not one of 'frames', 'samples', or 'time'
|
||||
|
||||
See Also
|
||||
--------
|
||||
librosa.onset.onset_strength
|
||||
|
||||
Examples
|
||||
--------
|
||||
Track beats using time series input
|
||||
|
||||
>>> y, sr = librosa.load(librosa.ex('choice'), duration=10)
|
||||
|
||||
>>> tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
|
||||
>>> tempo
|
||||
135.99917763157896
|
||||
|
||||
Print the frames corresponding to beats
|
||||
|
||||
>>> beats
|
||||
array([ 3, 21, 40, 59, 78, 96, 116, 135, 154, 173, 192, 211,
|
||||
230, 249, 268, 287, 306, 325, 344, 363])
|
||||
|
||||
Or print them as timestamps
|
||||
|
||||
>>> librosa.frames_to_time(beats, sr=sr)
|
||||
array([0.07 , 0.488, 0.929, 1.37 , 1.811, 2.229, 2.694, 3.135,
|
||||
3.576, 4.017, 4.458, 4.899, 5.341, 5.782, 6.223, 6.664,
|
||||
7.105, 7.546, 7.988, 8.429])
|
||||
|
||||
Output beat detections as a boolean array instead of frame indices
|
||||
|
||||
>>> tempo, beats_dense = librosa.beat.beat_track(y=y, sr=sr, sparse=False)
|
||||
>>> beats_dense
|
||||
array([False, False, False, True, False, False, False, False,
|
||||
False, False, False, False, False, False, False, False,
|
||||
False, False, False, False, ..., False, False, True,
|
||||
False, False, False, False, False, False, False, False,
|
||||
False, False, False, False, False, False, False, False,
|
||||
False])
|
||||
|
||||
Track beats using a pre-computed onset envelope
|
||||
|
||||
>>> onset_env = librosa.onset.onset_strength(y=y, sr=sr,
|
||||
... aggregate=np.median)
|
||||
>>> tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env,
|
||||
... sr=sr)
|
||||
>>> tempo
|
||||
135.99917763157896
|
||||
>>> beats
|
||||
array([ 3, 21, 40, 59, 78, 96, 116, 135, 154, 173, 192, 211,
|
||||
230, 249, 268, 287, 306, 325, 344, 363])
|
||||
|
||||
Plot the beat events against the onset strength envelope
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> hop_length = 512
|
||||
>>> fig, ax = plt.subplots(nrows=2, sharex=True)
|
||||
>>> times = librosa.times_like(onset_env, sr=sr, hop_length=hop_length)
|
||||
>>> M = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length)
|
||||
>>> librosa.display.specshow(librosa.power_to_db(M, ref=np.max),
|
||||
... y_axis='mel', x_axis='time', hop_length=hop_length,
|
||||
... ax=ax[0])
|
||||
>>> ax[0].label_outer()
|
||||
>>> ax[0].set(title='Mel spectrogram')
|
||||
>>> ax[1].plot(times, librosa.util.normalize(onset_env),
|
||||
... label='Onset strength')
|
||||
>>> ax[1].vlines(times[beats], 0, 1, alpha=0.5, color='r',
|
||||
... linestyle='--', label='Beats')
|
||||
>>> ax[1].legend()
|
||||
"""
|
||||
# First, get the frame->beat strength profile if we don't already have one
|
||||
if onset_envelope is None:
|
||||
if y is None:
|
||||
raise ParameterError("y or onset_envelope must be provided")
|
||||
|
||||
onset_envelope = onset.onset_strength(
|
||||
y=y, sr=sr, hop_length=hop_length, aggregate=np.median
|
||||
)
|
||||
|
||||
if sparse and onset_envelope.ndim != 1:
|
||||
raise ParameterError(f"sparse=True (default) does not support "
|
||||
f"{onset_envelope.ndim}-dimensional inputs. "
|
||||
f"Either set sparse=False or convert the signal to mono.")
|
||||
|
||||
# Do we have any onsets to grab?
|
||||
if not onset_envelope.any():
|
||||
if sparse:
|
||||
return (0.0, np.array([], dtype=int))
|
||||
else:
|
||||
return (np.zeros(shape=onset_envelope.shape[:-1], dtype=float),
|
||||
np.zeros_like(onset_envelope, dtype=bool))
|
||||
|
||||
# Estimate BPM if one was not provided
|
||||
if bpm is None:
|
||||
bpm = _tempo(
|
||||
onset_envelope=onset_envelope,
|
||||
sr=sr,
|
||||
hop_length=hop_length,
|
||||
start_bpm=start_bpm,
|
||||
prior=prior,
|
||||
)
|
||||
|
||||
# Ensure that tempo is in a shape that is compatible with vectorization
|
||||
_bpm = np.atleast_1d(bpm)
|
||||
bpm_expanded = util.expand_to(_bpm,
|
||||
ndim=onset_envelope.ndim,
|
||||
axes=range(_bpm.ndim))
|
||||
|
||||
# Then, run the tracker
|
||||
beats = __beat_tracker(onset_envelope, bpm_expanded, float(sr) / hop_length, tightness, trim)
|
||||
|
||||
if sparse:
|
||||
beats = np.flatnonzero(beats)
|
||||
|
||||
if units == "frames":
|
||||
pass
|
||||
elif units == "samples":
|
||||
return (bpm, core.frames_to_samples(beats, hop_length=hop_length))
|
||||
elif units == "time":
|
||||
return (bpm, core.frames_to_time(beats, hop_length=hop_length, sr=sr))
|
||||
else:
|
||||
raise ParameterError(f"Invalid unit type: {units}")
|
||||
return (bpm, beats)
|
||||
|
||||
|
||||
def plp(
|
||||
*,
|
||||
y: Optional[np.ndarray] = None,
|
||||
sr: float = 22050,
|
||||
onset_envelope: Optional[np.ndarray] = None,
|
||||
hop_length: int = 512,
|
||||
win_length: int = 384,
|
||||
tempo_min: Optional[float] = 30,
|
||||
tempo_max: Optional[float] = 300,
|
||||
prior: Optional[scipy.stats.rv_continuous] = None,
|
||||
) -> np.ndarray:
|
||||
"""Predominant local pulse (PLP) estimation. [#]_
|
||||
|
||||
The PLP method analyzes the onset strength envelope in the frequency domain
|
||||
to find a locally stable tempo for each frame. These local periodicities
|
||||
are used to synthesize local half-waves, which are combined such that peaks
|
||||
coincide with rhythmically salient frames (e.g. onset events on a musical time grid).
|
||||
The local maxima of the pulse curve can be taken as estimated beat positions.
|
||||
|
||||
This method may be preferred over the dynamic programming method of `beat_track`
|
||||
when the tempo is expected to vary significantly over time. Additionally,
|
||||
since `plp` does not require the entire signal to make predictions, it may be
|
||||
preferable when beat-tracking long recordings in a streaming setting.
|
||||
|
||||
.. [#] Grosche, P., & Muller, M. (2011).
|
||||
"Extracting predominant local pulse information from music recordings."
|
||||
IEEE Transactions on Audio, Speech, and Language Processing, 19(6), 1688-1701.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : np.ndarray [shape=(..., n)] or None
|
||||
audio time series. Multi-channel is supported.
|
||||
|
||||
sr : number > 0 [scalar]
|
||||
sampling rate of ``y``
|
||||
|
||||
onset_envelope : np.ndarray [shape=(..., n)] or None
|
||||
(optional) pre-computed onset strength envelope
|
||||
|
||||
hop_length : int > 0 [scalar]
|
||||
number of audio samples between successive ``onset_envelope`` values
|
||||
|
||||
win_length : int > 0 [scalar]
|
||||
number of frames to use for tempogram analysis.
|
||||
By default, 384 frames (at ``sr=22050`` and ``hop_length=512``) corresponds
|
||||
to about 8.9 seconds.
|
||||
|
||||
tempo_min, tempo_max : numbers > 0 [scalar], optional
|
||||
Minimum and maximum permissible tempo values. ``tempo_max`` must be at least
|
||||
``tempo_min``.
|
||||
|
||||
Set either (or both) to `None` to disable this constraint.
|
||||
|
||||
prior : scipy.stats.rv_continuous [optional]
|
||||
A prior distribution over tempo (in beats per minute).
|
||||
By default, a uniform prior over ``[tempo_min, tempo_max]`` is used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pulse : np.ndarray, shape=[(..., n)]
|
||||
The estimated pulse curve. Maxima correspond to rhythmically salient
|
||||
points of time.
|
||||
|
||||
If input is multi-channel, one pulse curve per channel is computed.
|
||||
|
||||
See Also
|
||||
--------
|
||||
beat_track
|
||||
librosa.onset.onset_strength
|
||||
librosa.feature.fourier_tempogram
|
||||
|
||||
Examples
|
||||
--------
|
||||
Visualize the PLP compared to an onset strength envelope.
|
||||
Both are normalized here to make comparison easier.
|
||||
|
||||
>>> y, sr = librosa.load(librosa.ex('brahms'))
|
||||
>>> onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
||||
>>> pulse = librosa.beat.plp(onset_envelope=onset_env, sr=sr)
|
||||
>>> # Or compute pulse with an alternate prior, like log-normal
|
||||
>>> import scipy.stats
|
||||
>>> prior = scipy.stats.lognorm(loc=np.log(120), scale=120, s=1)
|
||||
>>> pulse_lognorm = librosa.beat.plp(onset_envelope=onset_env, sr=sr,
|
||||
... prior=prior)
|
||||
>>> melspec = librosa.feature.melspectrogram(y=y, sr=sr)
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> fig, ax = plt.subplots(nrows=3, sharex=True)
|
||||
>>> librosa.display.specshow(librosa.power_to_db(melspec,
|
||||
... ref=np.max),
|
||||
... x_axis='time', y_axis='mel', ax=ax[0])
|
||||
>>> ax[0].set(title='Mel spectrogram')
|
||||
>>> ax[0].label_outer()
|
||||
>>> ax[1].plot(librosa.times_like(onset_env),
|
||||
... librosa.util.normalize(onset_env),
|
||||
... label='Onset strength')
|
||||
>>> ax[1].plot(librosa.times_like(pulse),
|
||||
... librosa.util.normalize(pulse),
|
||||
... label='Predominant local pulse (PLP)')
|
||||
>>> ax[1].set(title='Uniform tempo prior [30, 300]')
|
||||
>>> ax[1].label_outer()
|
||||
>>> ax[2].plot(librosa.times_like(onset_env),
|
||||
... librosa.util.normalize(onset_env),
|
||||
... label='Onset strength')
|
||||
>>> ax[2].plot(librosa.times_like(pulse_lognorm),
|
||||
... librosa.util.normalize(pulse_lognorm),
|
||||
... label='Predominant local pulse (PLP)')
|
||||
>>> ax[2].set(title='Log-normal tempo prior, mean=120', xlim=[5, 20])
|
||||
>>> ax[2].legend()
|
||||
|
||||
PLP local maxima can be used as estimates of beat positions.
|
||||
|
||||
>>> tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env)
|
||||
>>> beats_plp = np.flatnonzero(librosa.util.localmax(pulse))
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True)
|
||||
>>> times = librosa.times_like(onset_env, sr=sr)
|
||||
>>> ax[0].plot(times, librosa.util.normalize(onset_env),
|
||||
... label='Onset strength')
|
||||
>>> ax[0].vlines(times[beats], 0, 1, alpha=0.5, color='r',
|
||||
... linestyle='--', label='Beats')
|
||||
>>> ax[0].legend()
|
||||
>>> ax[0].set(title='librosa.beat.beat_track')
|
||||
>>> ax[0].label_outer()
|
||||
>>> # Limit the plot to a 15-second window
|
||||
>>> times = librosa.times_like(pulse, sr=sr)
|
||||
>>> ax[1].plot(times, librosa.util.normalize(pulse),
|
||||
... label='PLP')
|
||||
>>> ax[1].vlines(times[beats_plp], 0, 1, alpha=0.5, color='r',
|
||||
... linestyle='--', label='PLP Beats')
|
||||
>>> ax[1].legend()
|
||||
>>> ax[1].set(title='librosa.beat.plp', xlim=[5, 20])
|
||||
>>> ax[1].xaxis.set_major_formatter(librosa.display.TimeFormatter())
|
||||
"""
|
||||
# Step 1: get the onset envelope
|
||||
if onset_envelope is None:
|
||||
onset_envelope = onset.onset_strength(
|
||||
y=y, sr=sr, hop_length=hop_length, aggregate=np.median
|
||||
)
|
||||
|
||||
if tempo_min is not None and tempo_max is not None and tempo_max <= tempo_min:
|
||||
raise ParameterError(
|
||||
f"tempo_max={tempo_max} must be larger than tempo_min={tempo_min}"
|
||||
)
|
||||
|
||||
# Step 2: get the fourier tempogram
|
||||
ftgram = fourier_tempogram(
|
||||
onset_envelope=onset_envelope,
|
||||
sr=sr,
|
||||
hop_length=hop_length,
|
||||
win_length=win_length,
|
||||
)
|
||||
|
||||
# Step 3: pin to the feasible tempo range
|
||||
tempo_frequencies = core.fourier_tempo_frequencies(
|
||||
sr=sr, hop_length=hop_length, win_length=win_length
|
||||
)
|
||||
|
||||
if tempo_min is not None:
|
||||
ftgram[..., tempo_frequencies < tempo_min, :] = 0
|
||||
if tempo_max is not None:
|
||||
ftgram[..., tempo_frequencies > tempo_max, :] = 0
|
||||
|
||||
# reshape lengths to match dimension properly
|
||||
tempo_frequencies = util.expand_to(tempo_frequencies, ndim=ftgram.ndim, axes=-2)
|
||||
|
||||
# Step 3: Discard everything below the peak
|
||||
ftmag = np.log1p(1e6 * np.abs(ftgram))
|
||||
if prior is not None:
|
||||
ftmag += prior.logpdf(tempo_frequencies)
|
||||
|
||||
peak_values = ftmag.max(axis=-2, keepdims=True)
|
||||
ftgram[ftmag < peak_values] = 0
|
||||
|
||||
# Normalize to keep only phase information
|
||||
ftgram /= util.tiny(ftgram) ** 0.5 + np.abs(ftgram.max(axis=-2, keepdims=True))
|
||||
|
||||
# Step 5: invert the Fourier tempogram to get the pulse
|
||||
pulse = core.istft(
|
||||
ftgram, hop_length=1, n_fft=win_length, length=onset_envelope.shape[-1]
|
||||
)
|
||||
|
||||
# Step 6: retain only the positive part of the pulse cycle
|
||||
pulse = np.clip(pulse, 0, None, pulse)
|
||||
|
||||
# Return the normalized pulse
|
||||
return util.normalize(pulse, axis=-1)
|
||||
|
||||
|
||||
def __beat_tracker(
|
||||
onset_envelope: np.ndarray, bpm: np.ndarray, frame_rate: float, tightness: float, trim: bool
|
||||
) -> np.ndarray:
|
||||
"""Tracks beats in an onset strength envelope.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
onset_envelope : np.ndarray [shape=(..., n,)]
|
||||
onset strength envelope
|
||||
bpm : float [scalar] or np.ndarray [shape=(...)]
|
||||
tempo estimate
|
||||
frame_rate : float [scalar]
|
||||
frame rate of the spectrogram (sr / hop_length, frames per second)
|
||||
tightness : float [scalar, positive]
|
||||
how closely do we adhere to bpm?
|
||||
trim : bool [scalar]
|
||||
trim leading/trailing beats with weak onsets?
|
||||
|
||||
Returns
|
||||
-------
|
||||
beats : np.ndarray [shape=(n,)]
|
||||
frame numbers of beat events
|
||||
"""
|
||||
if np.any(bpm <= 0):
|
||||
raise ParameterError(f"bpm={bpm} must be strictly positive")
|
||||
|
||||
if tightness <= 0:
|
||||
raise ParameterError("tightness must be strictly positive")
|
||||
|
||||
# TODO: this might be better accomplished with a np.broadcast_shapes check
|
||||
if bpm.shape[-1] not in (1, onset_envelope.shape[-1]):
|
||||
raise ParameterError(f"Invalid bpm shape={bpm.shape} does not match onset envelope shape={onset_envelope.shape}")
|
||||
|
||||
# convert bpm to frames per beat (rounded)
|
||||
# [frames / sec] * [60 sec / min] / [beat / min] = [frames / beat]
|
||||
frames_per_beat = np.round(frame_rate * 60.0 / bpm)
|
||||
|
||||
# localscore is a smoothed version of AGC'd onset envelope
|
||||
localscore = __beat_local_score(__normalize_onsets(onset_envelope), frames_per_beat)
|
||||
|
||||
# run the DP
|
||||
backlink, cumscore = __beat_track_dp(localscore, frames_per_beat, tightness)
|
||||
|
||||
# Reconstruct the beat path from backlinks
|
||||
tail = __last_beat(cumscore)
|
||||
beats = np.zeros_like(onset_envelope, dtype=bool)
|
||||
__dp_backtrack(backlink, tail, beats)
|
||||
|
||||
# Discard spurious trailing beats
|
||||
beats: np.ndarray = __trim_beats(localscore, beats, trim)
|
||||
|
||||
return beats
|
||||
|
||||
|
||||
# -- Helper functions for beat tracking
|
||||
def __normalize_onsets(onsets):
|
||||
"""Normalize onset strength by its standard deviation"""
|
||||
norm = onsets.std(ddof=1, axis=-1, keepdims=True)
|
||||
return onsets / (norm + util.tiny(onsets))
|
||||
|
||||
|
||||
@numba.guvectorize(
|
||||
[
|
||||
"void(float32[:], float32[:], float32[:])",
|
||||
"void(float64[:], float64[:], float64[:])",
|
||||
],
|
||||
"(t),(n)->(t)",
|
||||
nopython=True, cache=False)
|
||||
def __beat_local_score(onset_envelope, frames_per_beat, localscore):
|
||||
# This function essentially implements a same-mode convolution,
|
||||
# but also allows for a time-varying convolution-like filter to support dynamic tempo.
|
||||
|
||||
|
||||
N = len(onset_envelope)
|
||||
|
||||
if len(frames_per_beat) == 1:
|
||||
# Static tempo mode
|
||||
# NOTE: when we can bump the minimum numba to 0.58, we can eliminate this branch and just use
|
||||
# np.convolve(..., mode='same') directly
|
||||
window = np.exp(-0.5 * (np.arange(-frames_per_beat[0], frames_per_beat[0] + 1) * 32.0 / frames_per_beat[0]) ** 2)
|
||||
K = len(window)
|
||||
# This is a vanilla same-mode convolution
|
||||
for i in range(len(onset_envelope)):
|
||||
localscore[i] = 0.
|
||||
# we need i + K // 2 - k < N ==> k > i + K //2 - N
|
||||
# and i + K // 2 - k >= 0 ==> k <= i + K // 2
|
||||
for k in range(max(0, i + K // 2 - N + 1), min(i + K // 2, K)):
|
||||
localscore[i] += window[k] * onset_envelope[i + K//2 -k]
|
||||
|
||||
elif len(frames_per_beat) == len(onset_envelope):
|
||||
# Time-varying tempo estimates
|
||||
# This isn't exactly a convolution anymore, since the filter is time-varying, but it's pretty close
|
||||
for i in range(len(onset_envelope)):
|
||||
window = np.exp(-0.5 * (np.arange(-frames_per_beat[i], frames_per_beat[i] + 1) * 32.0 / frames_per_beat[i]) ** 2)
|
||||
K = 2 * int(frames_per_beat[i]) + 1
|
||||
|
||||
localscore[i] = 0.
|
||||
for k in range(max(0, i + K // 2 - N + 1), min(i + K // 2, K)):
|
||||
localscore[i] += window[k] * onset_envelope[i + K // 2 - k]
|
||||
|
||||
|
||||
|
||||
@numba.guvectorize(
|
||||
[
|
||||
"void(float32[:], float32[:], float32, int32[:], float32[:])",
|
||||
"void(float64[:], float64[:], float32, int32[:], float64[:])",
|
||||
],
|
||||
"(t),(n),()->(t),(t)",
|
||||
nopython=True, cache=True)
|
||||
def __beat_track_dp(localscore, frames_per_beat, tightness, backlink, cumscore):
|
||||
"""Core dynamic program for beat tracking"""
|
||||
# Threshold for the first beat to exceed
|
||||
score_thresh = 0.01 * localscore.max()
|
||||
|
||||
# Are we on the first beat?
|
||||
first_beat = True
|
||||
backlink[0] = -1
|
||||
cumscore[0] = localscore[0]
|
||||
|
||||
# If tv == 0, then tv * i will always be 0, so we only ever use frames_per_beat[0]
|
||||
# If tv == 1, then tv * i = i, so we use the time-varying FPB
|
||||
tv = int(len(frames_per_beat) > 1)
|
||||
|
||||
for i, score_i in enumerate(localscore):
|
||||
best_score = - np.inf
|
||||
beat_location = -1
|
||||
# Search over all possible predecessors to find the best preceding beat
|
||||
# NOTE: to provide time-varying tempo estimates, we replace
|
||||
# frames_per_beat[0] by frames_per_beat[i] in this loop body.
|
||||
for loc in range(i - np.round(frames_per_beat[tv * i] / 2), i - 2 * frames_per_beat[tv * i] - 1, - 1):
|
||||
# Once we're searching past the start, break out
|
||||
if loc < 0:
|
||||
break
|
||||
score = cumscore[loc] - tightness * (np.log(i - loc) - np.log(frames_per_beat[tv * i]))**2
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
beat_location = loc
|
||||
|
||||
# Add the local score
|
||||
if beat_location >= 0:
|
||||
cumscore[i] = score_i + best_score
|
||||
else:
|
||||
# No back-link found, so just use the current score
|
||||
cumscore[i] = score_i
|
||||
|
||||
# Special case the first onset. Stop if the localscore is small
|
||||
if first_beat and score_i < score_thresh:
|
||||
backlink[i] = -1
|
||||
else:
|
||||
backlink[i] = beat_location
|
||||
first_beat = False
|
||||
|
||||
|
||||
@numba.guvectorize(
|
||||
[
|
||||
"void(float32[:], bool_[:], bool_, bool_[:])",
|
||||
"void(float64[:], bool_[:], bool_, bool_[:])"
|
||||
],
|
||||
"(t),(t),()->(t)",
|
||||
nopython=True, cache=True
|
||||
)
|
||||
def __trim_beats(localscore, beats, trim, beats_trimmed):
|
||||
"""Remove spurious leading and trailing beats from the detection array"""
|
||||
# Populate the trimmed beats array with the existing values
|
||||
beats_trimmed[:] = beats
|
||||
|
||||
# Compute the threshold: 1/2 RMS of the smoothed beat envelope
|
||||
w = np.hanning(5)
|
||||
# Slicing here to implement same-mode convolution in older numba where
|
||||
# mode='same' is not yet supported
|
||||
smooth_boe = np.convolve(localscore[beats], w)[len(w)//2:len(localscore)+len(w)//2]
|
||||
|
||||
# This logic is to preserve old behavior and always discard beats detected with oenv==0
|
||||
if trim:
|
||||
threshold = 0.5 * ((smooth_boe**2).mean()**0.5)
|
||||
else:
|
||||
threshold = 0.0
|
||||
|
||||
# Suppress bad beats
|
||||
n = 0
|
||||
while localscore[n] <= threshold:
|
||||
beats_trimmed[n] = False
|
||||
n += 1
|
||||
|
||||
n = len(localscore) - 1
|
||||
while localscore[n] <= threshold:
|
||||
beats_trimmed[n] = False
|
||||
n -= 1
|
||||
pass
|
||||
|
||||
|
||||
def __last_beat(cumscore):
|
||||
"""Identify the position of the last detected beat"""
|
||||
# Use a masked array to support multidimensional statistics
|
||||
# We negate the mask here because of numpy masked array semantics
|
||||
mask = ~util.localmax(cumscore, axis=-1)
|
||||
masked_scores = np.ma.masked_array(data=cumscore, mask=mask) # type: ignore
|
||||
medians = np.ma.median(masked_scores, axis=-1)
|
||||
thresholds = 0.5 * np.ma.getdata(medians)
|
||||
|
||||
# Also find the last beat positions
|
||||
tail = np.empty(shape=cumscore.shape[:-1], dtype=int)
|
||||
__last_beat_selector(cumscore, mask, thresholds, tail)
|
||||
return tail
|
||||
|
||||
|
||||
@numba.guvectorize(
|
||||
[
|
||||
"void(float32[:], bool_[:], float32, int64[:])",
|
||||
"void(float64[:], bool_[:], float64, int64[:])",
|
||||
],
|
||||
"(t),(t),()->()",
|
||||
nopython=True, cache=True
|
||||
)
|
||||
def __last_beat_selector(cumscore, mask, threshold, out):
|
||||
"""Vectorized helper to identify the last valid beat position:
|
||||
|
||||
cumscore[n] > threshold and not mask[n]
|
||||
"""
|
||||
n = len(cumscore) - 1
|
||||
|
||||
out[0] = n
|
||||
while n >= 0:
|
||||
if not mask[n] and cumscore[n] >= threshold:
|
||||
out[0] = n
|
||||
break
|
||||
else:
|
||||
n -= 1
|
||||
|
||||
|
||||
@numba.guvectorize(
|
||||
[
|
||||
"void(int32[:], int32, bool_[:])",
|
||||
"void(int64[:], int64, bool_[:])"
|
||||
],
|
||||
"(t),()->(t)",
|
||||
nopython=True, cache=True
|
||||
)
|
||||
def __dp_backtrack(backlinks, tail, beats):
|
||||
"""Populate the beat indicator array from a sequence of backlinks"""
|
||||
n = tail
|
||||
while n >= 0:
|
||||
beats[n] = True
|
||||
n = backlinks[n]
|
||||
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Core IO and DSP functions"""
|
||||
|
||||
import lazy_loader as lazy
|
||||
|
||||
__getattr__, __dir__, __all__ = lazy.attach_stub(__name__, __file__)
|
||||
@@ -0,0 +1,124 @@
|
||||
from .convert import (
|
||||
frames_to_samples as frames_to_samples,
|
||||
frames_to_time as frames_to_time,
|
||||
samples_to_frames as samples_to_frames,
|
||||
samples_to_time as samples_to_time,
|
||||
time_to_samples as time_to_samples,
|
||||
time_to_frames as time_to_frames,
|
||||
blocks_to_samples as blocks_to_samples,
|
||||
blocks_to_frames as blocks_to_frames,
|
||||
blocks_to_time as blocks_to_time,
|
||||
note_to_hz as note_to_hz,
|
||||
note_to_midi as note_to_midi,
|
||||
midi_to_hz as midi_to_hz,
|
||||
midi_to_note as midi_to_note,
|
||||
hz_to_note as hz_to_note,
|
||||
hz_to_midi as hz_to_midi,
|
||||
hz_to_mel as hz_to_mel,
|
||||
hz_to_octs as hz_to_octs,
|
||||
hz_to_fjs as hz_to_fjs,
|
||||
mel_to_hz as mel_to_hz,
|
||||
octs_to_hz as octs_to_hz,
|
||||
A4_to_tuning as A4_to_tuning,
|
||||
tuning_to_A4 as tuning_to_A4,
|
||||
fft_frequencies as fft_frequencies,
|
||||
cqt_frequencies as cqt_frequencies,
|
||||
mel_frequencies as mel_frequencies,
|
||||
tempo_frequencies as tempo_frequencies,
|
||||
fourier_tempo_frequencies as fourier_tempo_frequencies,
|
||||
A_weighting as A_weighting,
|
||||
B_weighting as B_weighting,
|
||||
C_weighting as C_weighting,
|
||||
D_weighting as D_weighting,
|
||||
Z_weighting as Z_weighting,
|
||||
frequency_weighting as frequency_weighting,
|
||||
multi_frequency_weighting as multi_frequency_weighting,
|
||||
samples_like as samples_like,
|
||||
times_like as times_like,
|
||||
midi_to_svara_h as midi_to_svara_h,
|
||||
midi_to_svara_c as midi_to_svara_c,
|
||||
note_to_svara_h as note_to_svara_h,
|
||||
note_to_svara_c as note_to_svara_c,
|
||||
hz_to_svara_h as hz_to_svara_h,
|
||||
hz_to_svara_c as hz_to_svara_c,
|
||||
)
|
||||
|
||||
from .audio import (
|
||||
load as load,
|
||||
stream as stream,
|
||||
to_mono as to_mono,
|
||||
resample as resample,
|
||||
get_duration as get_duration,
|
||||
get_samplerate as get_samplerate,
|
||||
autocorrelate as autocorrelate,
|
||||
lpc as lpc,
|
||||
zero_crossings as zero_crossings,
|
||||
clicks as clicks,
|
||||
tone as tone,
|
||||
chirp as chirp,
|
||||
mu_compress as mu_compress,
|
||||
mu_expand as mu_expand,
|
||||
)
|
||||
|
||||
from .spectrum import (
|
||||
stft as stft,
|
||||
istft as istft,
|
||||
magphase as magphase,
|
||||
iirt as iirt,
|
||||
reassigned_spectrogram as reassigned_spectrogram,
|
||||
phase_vocoder as phase_vocoder,
|
||||
perceptual_weighting as perceptual_weighting,
|
||||
power_to_db as power_to_db,
|
||||
db_to_power as db_to_power,
|
||||
amplitude_to_db as amplitude_to_db,
|
||||
db_to_amplitude as db_to_amplitude,
|
||||
fmt as fmt,
|
||||
pcen as pcen,
|
||||
griffinlim as griffinlim,
|
||||
)
|
||||
|
||||
from .pitch import (
|
||||
estimate_tuning as estimate_tuning,
|
||||
pitch_tuning as pitch_tuning,
|
||||
piptrack as piptrack,
|
||||
yin as yin,
|
||||
pyin as pyin,
|
||||
)
|
||||
|
||||
from .constantq import (
|
||||
cqt as cqt,
|
||||
hybrid_cqt as hybrid_cqt,
|
||||
pseudo_cqt as pseudo_cqt,
|
||||
icqt as icqt,
|
||||
griffinlim_cqt as griffinlim_cqt,
|
||||
vqt as vqt,
|
||||
)
|
||||
|
||||
from .harmonic import (
|
||||
salience as salience,
|
||||
interp_harmonics as interp_harmonics,
|
||||
f0_harmonics as f0_harmonics,
|
||||
)
|
||||
|
||||
from .fft import (
|
||||
get_fftlib as get_fftlib,
|
||||
set_fftlib as set_fftlib,
|
||||
)
|
||||
|
||||
from .notation import (
|
||||
key_to_degrees as key_to_degrees,
|
||||
key_to_notes as key_to_notes,
|
||||
mela_to_degrees as mela_to_degrees,
|
||||
mela_to_svara as mela_to_svara,
|
||||
thaat_to_degrees as thaat_to_degrees,
|
||||
list_mela as list_mela,
|
||||
list_thaat as list_thaat,
|
||||
fifths_to_note as fifths_to_note,
|
||||
interval_to_fjs as interval_to_fjs,
|
||||
)
|
||||
|
||||
from .intervals import (
|
||||
interval_frequencies as interval_frequencies,
|
||||
pythagorean_intervals as pythagorean_intervals,
|
||||
plimit_intervals as plimit_intervals,
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Fast Fourier Transform (FFT) library container"""
|
||||
import scipy.fft
|
||||
|
||||
from types import ModuleType
|
||||
from typing import Optional
|
||||
from ..util.decorators import deprecated
|
||||
|
||||
|
||||
__all__ = ["get_fftlib", "set_fftlib"]
|
||||
|
||||
# Object to hold FFT interfaces
|
||||
__FFTLIB: Optional[ModuleType] = scipy.fft
|
||||
|
||||
|
||||
@deprecated(version="0.11.0", version_removed="1.0")
|
||||
def set_fftlib(lib: Optional[ModuleType] = None) -> None:
|
||||
"""Set the FFT library used by librosa.
|
||||
|
||||
.. warning:: This functionality is deprecated in librosa 0.11 and will be
|
||||
removed in 1.0. To achieve the same effect, use either the
|
||||
`scipy.fft.set_backend` context manager or
|
||||
`scipy.fft.set_global_backend` function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lib : None or module
|
||||
Must implement an interface compatible with `scipy.fft`.
|
||||
If ``None``, reverts to `scipy.fft`.
|
||||
|
||||
Examples
|
||||
--------
|
||||
Use `pyfftw`:
|
||||
|
||||
>>> import pyfftw
|
||||
>>> librosa.set_fftlib(pyfftw.interfaces.numpy_fft)
|
||||
|
||||
Reset to default `scipy` implementation
|
||||
|
||||
>>> librosa.set_fftlib()
|
||||
"""
|
||||
global __FFTLIB
|
||||
if lib is None:
|
||||
lib = scipy.fft
|
||||
|
||||
__FFTLIB = lib
|
||||
|
||||
|
||||
def get_fftlib() -> ModuleType:
|
||||
"""Get the FFT library currently used by librosa
|
||||
|
||||
Returns
|
||||
-------
|
||||
fft : module
|
||||
The FFT library currently used by librosa.
|
||||
Must API-compatible with `numpy.fft`.
|
||||
"""
|
||||
if __FFTLIB is None:
|
||||
# This path should never occur because importing
|
||||
# this module will call set_fftlib
|
||||
assert False # pragma: no cover
|
||||
|
||||
return __FFTLIB
|
||||
@@ -0,0 +1,450 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Harmonic calculations for frequency representations"""
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import scipy.interpolate
|
||||
import scipy.signal
|
||||
from ..util.exceptions import ParameterError
|
||||
from ..util import is_unique
|
||||
from numpy.typing import ArrayLike
|
||||
from typing import Callable, Optional, Sequence
|
||||
|
||||
__all__ = ["salience", "interp_harmonics", "f0_harmonics"]
|
||||
|
||||
|
||||
def salience(
|
||||
S: np.ndarray,
|
||||
*,
|
||||
freqs: np.ndarray,
|
||||
harmonics: Sequence[float],
|
||||
weights: Optional[ArrayLike] = None,
|
||||
aggregate: Optional[Callable] = None,
|
||||
filter_peaks: bool = True,
|
||||
fill_value: float = np.nan,
|
||||
kind: str = "linear",
|
||||
axis: int = -2,
|
||||
) -> np.ndarray:
|
||||
"""Harmonic salience function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
S : np.ndarray [shape=(..., d, n)]
|
||||
input time frequency magnitude representation (e.g. STFT or CQT magnitudes).
|
||||
Must be real-valued and non-negative.
|
||||
|
||||
freqs : np.ndarray, shape=(S.shape[axis]) or shape=S.shape
|
||||
The frequency values corresponding to S's elements along the
|
||||
chosen axis.
|
||||
|
||||
Frequencies can also be time-varying, e.g. as computed by
|
||||
`reassigned_spectrogram`, in which case the shape should
|
||||
match ``S``.
|
||||
|
||||
harmonics : list-like, non-negative
|
||||
Harmonics to include in salience computation. The first harmonic (1)
|
||||
corresponds to ``S`` itself. Values less than one (e.g., 1/2) correspond
|
||||
to sub-harmonics.
|
||||
|
||||
weights : list-like
|
||||
The weight to apply to each harmonic in the summation. (default:
|
||||
uniform weights). Must be the same length as ``harmonics``.
|
||||
|
||||
aggregate : function
|
||||
aggregation function (default: `np.average`)
|
||||
|
||||
If ``aggregate=np.average``, then a weighted average is
|
||||
computed per-harmonic according to the specified weights.
|
||||
For all other aggregation functions, all harmonics
|
||||
are treated equally.
|
||||
|
||||
filter_peaks : bool
|
||||
If true, returns harmonic summation only on frequencies of peak
|
||||
magnitude. Otherwise returns harmonic summation over the full spectrum.
|
||||
Defaults to True.
|
||||
|
||||
fill_value : float
|
||||
The value to fill non-peaks in the output representation. (default:
|
||||
`np.nan`) Only used if ``filter_peaks == True``.
|
||||
|
||||
kind : str
|
||||
Interpolation type for harmonic estimation.
|
||||
See `scipy.interpolate.interp1d`.
|
||||
|
||||
axis : int
|
||||
The axis along which to compute harmonics
|
||||
|
||||
Returns
|
||||
-------
|
||||
S_sal : np.ndarray
|
||||
``S_sal`` will have the same shape as ``S``, and measure
|
||||
the overall harmonic energy at each frequency.
|
||||
|
||||
See Also
|
||||
--------
|
||||
interp_harmonics
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> y, sr = librosa.load(librosa.ex('trumpet'), duration=3)
|
||||
>>> S = np.abs(librosa.stft(y))
|
||||
>>> freqs = librosa.fft_frequencies(sr=sr)
|
||||
>>> harms = [1, 2, 3, 4]
|
||||
>>> weights = [1.0, 0.5, 0.33, 0.25]
|
||||
>>> S_sal = librosa.salience(S, freqs=freqs, harmonics=harms, weights=weights, fill_value=0)
|
||||
>>> print(S_sal.shape)
|
||||
(1025, 115)
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True)
|
||||
>>> librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
|
||||
... sr=sr, y_axis='log', x_axis='time', ax=ax[0])
|
||||
>>> ax[0].set(title='Magnitude spectrogram')
|
||||
>>> ax[0].label_outer()
|
||||
>>> img = librosa.display.specshow(librosa.amplitude_to_db(S_sal,
|
||||
... ref=np.max),
|
||||
... sr=sr, y_axis='log', x_axis='time', ax=ax[1])
|
||||
>>> ax[1].set(title='Salience spectrogram')
|
||||
>>> fig.colorbar(img, ax=ax, format="%+2.0f dB")
|
||||
"""
|
||||
if aggregate is None:
|
||||
aggregate = np.average
|
||||
|
||||
if weights is None:
|
||||
weights = np.ones((len(harmonics),))
|
||||
else:
|
||||
weights = np.array(weights, dtype=float)
|
||||
|
||||
S_harm = interp_harmonics(S, freqs=freqs, harmonics=harmonics, kind=kind, axis=axis)
|
||||
|
||||
S_sal: np.ndarray
|
||||
if aggregate is np.average:
|
||||
S_sal = aggregate(S_harm, axis=axis - 1, weights=weights)
|
||||
else:
|
||||
S_sal = aggregate(S_harm, axis=axis - 1)
|
||||
|
||||
if filter_peaks:
|
||||
S_peaks = scipy.signal.argrelmax(S, axis=axis)
|
||||
S_out = np.empty(S.shape)
|
||||
S_out.fill(fill_value)
|
||||
S_out[S_peaks] = S_sal[S_peaks]
|
||||
|
||||
S_sal = S_out
|
||||
|
||||
return S_sal
|
||||
|
||||
|
||||
def interp_harmonics(
|
||||
x: np.ndarray,
|
||||
*,
|
||||
freqs: np.ndarray,
|
||||
harmonics: ArrayLike,
|
||||
kind: str = "linear",
|
||||
fill_value: float = 0,
|
||||
axis: int = -2,
|
||||
) -> np.ndarray:
|
||||
"""Compute the energy at harmonics of time-frequency representation.
|
||||
|
||||
Given a frequency-based energy representation such as a spectrogram
|
||||
or tempogram, this function computes the energy at the chosen harmonics
|
||||
of the frequency axis. (See examples below.)
|
||||
The resulting harmonic array can then be used as input to a salience
|
||||
computation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : np.ndarray
|
||||
The input energy
|
||||
freqs : np.ndarray, shape=(x.shape[axis]) or shape=x.shape
|
||||
The frequency values corresponding to x's elements along the
|
||||
chosen axis.
|
||||
Frequencies can also be time-varying, e.g. as computed by
|
||||
`reassigned_spectrogram`, in which case the shape should
|
||||
match ``x``.
|
||||
harmonics : list-like, non-negative
|
||||
Harmonics to compute as ``harmonics[i] * freqs``.
|
||||
The first harmonic (1) corresponds to ``freqs``.
|
||||
Values less than one (e.g., 1/2) correspond to sub-harmonics.
|
||||
kind : str
|
||||
Interpolation type. See `scipy.interpolate.interp1d`.
|
||||
fill_value : float
|
||||
The value to fill when extrapolating beyond the observed
|
||||
frequency range.
|
||||
axis : int
|
||||
The axis along which to compute harmonics
|
||||
|
||||
Returns
|
||||
-------
|
||||
x_harm : np.ndarray
|
||||
``x_harm[i]`` will have the same shape as ``x``, and measure
|
||||
the energy at the ``harmonics[i]`` harmonic of each frequency.
|
||||
A new dimension indexing harmonics will be inserted immediately
|
||||
before ``axis``.
|
||||
|
||||
See Also
|
||||
--------
|
||||
scipy.interpolate.interp1d
|
||||
|
||||
Examples
|
||||
--------
|
||||
Estimate the harmonics of a time-averaged tempogram
|
||||
|
||||
>>> y, sr = librosa.load(librosa.ex('sweetwaltz'))
|
||||
>>> # Compute the time-varying tempogram and average over time
|
||||
>>> tempi = np.mean(librosa.feature.tempogram(y=y, sr=sr), axis=1)
|
||||
>>> # We'll measure the first five harmonics
|
||||
>>> harmonics = [1, 2, 3, 4, 5]
|
||||
>>> f_tempo = librosa.tempo_frequencies(len(tempi), sr=sr)
|
||||
>>> # Build the harmonic tensor; we only have one axis here (tempo)
|
||||
>>> t_harmonics = librosa.interp_harmonics(tempi, freqs=f_tempo, harmonics=harmonics, axis=0)
|
||||
>>> print(t_harmonics.shape)
|
||||
(5, 384)
|
||||
|
||||
>>> # And plot the results
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> fig, ax = plt.subplots()
|
||||
>>> librosa.display.specshow(t_harmonics, x_axis='tempo', sr=sr, ax=ax)
|
||||
>>> ax.set(yticks=np.arange(len(harmonics)),
|
||||
... yticklabels=['{:.3g}'.format(_) for _ in harmonics],
|
||||
... ylabel='Harmonic', xlabel='Tempo (BPM)')
|
||||
|
||||
We can also compute frequency harmonics for spectrograms.
|
||||
To calculate sub-harmonic energy, use values < 1.
|
||||
|
||||
>>> y, sr = librosa.load(librosa.ex('trumpet'), duration=3)
|
||||
>>> harmonics = [1./3, 1./2, 1, 2, 3, 4]
|
||||
>>> S = np.abs(librosa.stft(y))
|
||||
>>> fft_freqs = librosa.fft_frequencies(sr=sr)
|
||||
>>> S_harm = librosa.interp_harmonics(S, freqs=fft_freqs, harmonics=harmonics, axis=0)
|
||||
>>> print(S_harm.shape)
|
||||
(6, 1025, 646)
|
||||
|
||||
>>> fig, ax = plt.subplots(nrows=3, ncols=2, sharex=True, sharey=True)
|
||||
>>> for i, _sh in enumerate(S_harm):
|
||||
... img = librosa.display.specshow(librosa.amplitude_to_db(_sh,
|
||||
... ref=S.max()),
|
||||
... sr=sr, y_axis='log', x_axis='time',
|
||||
... ax=ax.flat[i])
|
||||
... ax.flat[i].set(title='h={:.3g}'.format(harmonics[i]))
|
||||
... ax.flat[i].label_outer()
|
||||
>>> fig.colorbar(img, ax=ax, format="%+2.f dB")
|
||||
"""
|
||||
if freqs.ndim == 1 and len(freqs) == x.shape[axis]:
|
||||
# Build the 1-D interpolator.
|
||||
# All frames have a common domain, so we only need one interpolator here.
|
||||
|
||||
# First, verify that the input frequencies are unique
|
||||
if not is_unique(freqs, axis=0):
|
||||
warnings.warn(
|
||||
"Frequencies are not unique. This may produce incorrect harmonic interpolations.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
f_interp = scipy.interpolate.interp1d(
|
||||
freqs,
|
||||
x,
|
||||
axis=axis,
|
||||
bounds_error=False,
|
||||
copy=False,
|
||||
kind=kind,
|
||||
fill_value=fill_value,
|
||||
)
|
||||
|
||||
# Set the interpolation points
|
||||
f_out = np.multiply.outer(harmonics, freqs)
|
||||
|
||||
# Interpolate; suppress type checks
|
||||
return f_interp(f_out) # type: ignore
|
||||
|
||||
elif freqs.shape == x.shape:
|
||||
if not np.all(is_unique(freqs, axis=axis)):
|
||||
warnings.warn(
|
||||
"Frequencies are not unique. This may produce incorrect harmonic interpolations.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
# If we have time-varying frequencies, then it must match exactly the shape of the input
|
||||
|
||||
# We'll define a frame-wise interpolator helper function that we will vectorize over
|
||||
# the entire input array
|
||||
def _f_interp(_a, _b):
|
||||
interp = scipy.interpolate.interp1d(
|
||||
_a, _b, bounds_error=False, copy=False, kind=kind, fill_value=fill_value
|
||||
)
|
||||
|
||||
return interp(np.multiply.outer(_a, harmonics))
|
||||
|
||||
# Signature is expanding frequency into a new dimension
|
||||
xfunc = np.vectorize(_f_interp, signature="(f),(f)->(f,h)")
|
||||
|
||||
# Rotate the vectorizing axis to the tail so that we get parallelism over frames
|
||||
# Afterward, we're swapping (-1, axis-1) instead of (-1,axis)
|
||||
# because a new dimension has been inserted
|
||||
return ( # type: ignore
|
||||
xfunc(freqs.swapaxes(axis, -1), x.swapaxes(axis, -1))
|
||||
.swapaxes(
|
||||
# Return the original target axis to its place
|
||||
-2,
|
||||
axis,
|
||||
)
|
||||
.swapaxes(
|
||||
# Put the new harmonic axis directly in front of the target axis
|
||||
-1,
|
||||
axis - 1,
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise ParameterError(
|
||||
f"freqs.shape={freqs.shape} is incompatible with input shape={x.shape}"
|
||||
)
|
||||
|
||||
|
||||
def f0_harmonics(
|
||||
x: np.ndarray,
|
||||
*,
|
||||
f0: np.ndarray,
|
||||
freqs: np.ndarray,
|
||||
harmonics: ArrayLike,
|
||||
kind: str = "linear",
|
||||
fill_value: float = 0,
|
||||
axis: int = -2,
|
||||
) -> np.ndarray:
|
||||
"""Compute the energy at selected harmonics of a time-varying
|
||||
fundamental frequency.
|
||||
|
||||
This function can be used to reduce a `frequency * time` representation
|
||||
to a `harmonic * time` representation, effectively normalizing out for
|
||||
the fundamental frequency. The result can be used as a representation
|
||||
of timbre when f0 corresponds to pitch, or as a representation of
|
||||
rhythm when f0 corresponds to tempo.
|
||||
|
||||
This function differs from `interp_harmonics`, which computes the
|
||||
harmonics of *all* frequencies.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : np.ndarray [shape=(..., frequencies, n)]
|
||||
The input array (e.g., STFT magnitudes)
|
||||
f0 : np.ndarray [shape=(..., n)]
|
||||
The fundamental frequency (f0) of each frame in the input
|
||||
Shape should match ``x.shape[-1]``
|
||||
freqs : np.ndarray, shape=(x.shape[axis]) or shape=x.shape
|
||||
The frequency values corresponding to X's elements along the
|
||||
chosen axis.
|
||||
Frequencies can also be time-varying, e.g. as computed by
|
||||
`reassigned_spectrogram`, in which case the shape should
|
||||
match ``x``.
|
||||
harmonics : list-like, non-negative
|
||||
Harmonics to compute as ``harmonics[i] * f0``
|
||||
Values less than one (e.g., 1/2) correspond to sub-harmonics.
|
||||
kind : str
|
||||
Interpolation type. See `scipy.interpolate.interp1d`.
|
||||
fill_value : float
|
||||
The value to fill when extrapolating beyond the observed
|
||||
frequency range.
|
||||
axis : int
|
||||
The axis corresponding to frequency in ``x``
|
||||
|
||||
Returns
|
||||
-------
|
||||
f0_harm : np.ndarray [shape=(..., len(harmonics), n)]
|
||||
Interpolated energy at each specified harmonic of the fundamental
|
||||
frequency for each time step.
|
||||
|
||||
See Also
|
||||
--------
|
||||
interp_harmonics
|
||||
librosa.feature.tempogram_ratio
|
||||
|
||||
Examples
|
||||
--------
|
||||
This example estimates the fundamental (f0), and then extracts the first
|
||||
12 harmonics
|
||||
|
||||
>>> y, sr = librosa.load(librosa.ex('trumpet'))
|
||||
>>> f0, voicing, voicing_p = librosa.pyin(y=y, sr=sr, fmin=200, fmax=700)
|
||||
>>> S = np.abs(librosa.stft(y))
|
||||
>>> freqs = librosa.fft_frequencies(sr=sr)
|
||||
>>> harmonics = np.arange(1, 13)
|
||||
>>> f0_harm = librosa.f0_harmonics(S, freqs=freqs, f0=f0, harmonics=harmonics)
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> fig, ax =plt.subplots(nrows=2, sharex=True)
|
||||
>>> librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
|
||||
... x_axis='time', y_axis='log', ax=ax[0])
|
||||
>>> times = librosa.times_like(f0)
|
||||
>>> for h in harmonics:
|
||||
... ax[0].plot(times, h * f0, label=f"{h}*f0")
|
||||
>>> ax[0].legend(ncols=4, loc='lower right')
|
||||
>>> ax[0].label_outer()
|
||||
>>> librosa.display.specshow(librosa.amplitude_to_db(f0_harm, ref=np.max),
|
||||
... x_axis='time', ax=ax[1])
|
||||
>>> ax[1].set_yticks(harmonics-1)
|
||||
>>> ax[1].set_yticklabels(harmonics)
|
||||
>>> ax[1].set(ylabel='Harmonics')
|
||||
"""
|
||||
result: np.ndarray
|
||||
if freqs.ndim == 1 and len(freqs) == x.shape[axis]:
|
||||
if not is_unique(freqs, axis=0):
|
||||
warnings.warn(
|
||||
"Frequencies are not unique. This may produce incorrect harmonic interpolations.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
# We have a fixed frequency grid
|
||||
idx = np.isfinite(freqs)
|
||||
|
||||
def _f_interps(data, f):
|
||||
interp = scipy.interpolate.interp1d(
|
||||
freqs[idx],
|
||||
data[idx],
|
||||
axis=0,
|
||||
bounds_error=False,
|
||||
copy=False,
|
||||
assume_sorted=False,
|
||||
kind=kind,
|
||||
fill_value=fill_value,
|
||||
)
|
||||
return interp(f)
|
||||
|
||||
xfunc = np.vectorize(_f_interps, signature="(f),(h)->(h)")
|
||||
result = xfunc(x.swapaxes(axis, -1), np.multiply.outer(f0, harmonics)).swapaxes(
|
||||
axis, -1
|
||||
)
|
||||
|
||||
elif freqs.shape == x.shape:
|
||||
if not np.all(is_unique(freqs, axis=axis)):
|
||||
warnings.warn(
|
||||
"Frequencies are not unique. This may produce incorrect harmonic interpolations.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
# We have a dynamic frequency grid, not so bad
|
||||
def _f_interpd(data, frequencies, f):
|
||||
idx = np.isfinite(frequencies)
|
||||
interp = scipy.interpolate.interp1d(
|
||||
frequencies[idx],
|
||||
data[idx],
|
||||
axis=0,
|
||||
bounds_error=False,
|
||||
copy=False,
|
||||
assume_sorted=False,
|
||||
kind=kind,
|
||||
fill_value=fill_value,
|
||||
)
|
||||
return interp(f)
|
||||
|
||||
xfunc = np.vectorize(_f_interpd, signature="(f),(f),(h)->(h)")
|
||||
result = xfunc(
|
||||
x.swapaxes(axis, -1),
|
||||
freqs.swapaxes(axis, -1),
|
||||
np.multiply.outer(f0, harmonics),
|
||||
).swapaxes(axis, -1)
|
||||
|
||||
else:
|
||||
raise ParameterError(
|
||||
f"freqs.shape={freqs.shape} is incompatible with input shape={x.shape}"
|
||||
)
|
||||
|
||||
return np.nan_to_num(result, copy=False, nan=fill_value)
|
||||
Binary file not shown.
@@ -0,0 +1,510 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
"""Functions for interval construction"""
|
||||
|
||||
from typing import Collection, Dict, List, Union, overload, Iterable
|
||||
from typing_extensions import Literal
|
||||
import msgpack
|
||||
import numpy as np
|
||||
from numpy.typing import ArrayLike
|
||||
from .._cache import cache
|
||||
from .._typing import _FloatLike_co
|
||||
from ..util.files import _resource_file
|
||||
|
||||
|
||||
with _resource_file("librosa.core", "intervals.msgpack") as imsgpack:
|
||||
with imsgpack.open("rb") as _fdesc:
|
||||
# We use floats for dictionary keys, so strict mapping is disabled
|
||||
INTERVALS = msgpack.load(_fdesc, strict_map_key=False)
|
||||
|
||||
|
||||
@cache(level=10)
|
||||
def interval_frequencies(
|
||||
n_bins: int,
|
||||
*,
|
||||
fmin: _FloatLike_co,
|
||||
intervals: Union[str, Collection[float]],
|
||||
bins_per_octave: int = 12,
|
||||
tuning: float = 0.0,
|
||||
sort: bool = True
|
||||
) -> np.ndarray:
|
||||
"""Construct a set of frequencies from an interval set
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_bins : int
|
||||
The number of frequencies to generate
|
||||
|
||||
fmin : float > 0
|
||||
The minimum frequency
|
||||
|
||||
intervals : str or array of floats in [1, 2)
|
||||
If `str`, must be one of the following:
|
||||
- `'equal'` - equal temperament
|
||||
- `'pythagorean'` - Pythagorean intervals
|
||||
- `'ji3'` - 3-limit just intonation
|
||||
- `'ji5'` - 5-limit just intonation
|
||||
- `'ji7'` - 7-limit just intonation
|
||||
|
||||
Otherwise, an array of intervals in the range [1, 2) can be provided.
|
||||
|
||||
bins_per_octave : int > 0
|
||||
If `intervals` is a string specification, how many bins to
|
||||
generate per octave.
|
||||
If `intervals` is an array, then this parameter is ignored.
|
||||
|
||||
tuning : float
|
||||
Deviation from A440 tuning in fractional bins.
|
||||
This is only used when `intervals == 'equal'`
|
||||
|
||||
sort : bool
|
||||
Sort the intervals in ascending order.
|
||||
|
||||
Returns
|
||||
-------
|
||||
frequencies : array of float
|
||||
The frequencies
|
||||
|
||||
Examples
|
||||
--------
|
||||
Generate two octaves of Pythagorean intervals starting at 55Hz
|
||||
|
||||
>>> librosa.interval_frequencies(24, fmin=55, intervals="pythagorean", bins_per_octave=12)
|
||||
array([ 55. , 58.733, 61.875, 66.075, 69.609, 74.334, 78.311,
|
||||
82.5 , 88.099, 92.812, 99.112, 104.414, 110. , 117.466,
|
||||
123.75 , 132.149, 139.219, 148.668, 156.621, 165. , 176.199,
|
||||
185.625, 198.224, 208.828])
|
||||
|
||||
Generate two octaves of 5-limit intervals starting at 55Hz
|
||||
|
||||
>>> librosa.interval_frequencies(24, fmin=55, intervals="ji5", bins_per_octave=12)
|
||||
array([ 55. , 58.667, 61.875, 66. , 68.75 , 73.333, 77.344,
|
||||
82.5 , 88. , 91.667, 99. , 103.125, 110. , 117.333,
|
||||
123.75 , 132. , 137.5 , 146.667, 154.687, 165. , 176. ,
|
||||
183.333, 198. , 206.25 ])
|
||||
|
||||
Generate three octaves using only three intervals
|
||||
|
||||
>>> intervals = [1, 4/3, 3/2]
|
||||
>>> librosa.interval_frequencies(9, fmin=55, intervals=intervals)
|
||||
array([ 55. , 73.333, 82.5 , 110. , 146.667, 165. , 220. ,
|
||||
293.333, 330. ])
|
||||
"""
|
||||
if isinstance(intervals, str):
|
||||
if intervals == "equal":
|
||||
# Maybe include tuning here?
|
||||
ratios = 2.0 ** (
|
||||
(tuning + np.arange(0, bins_per_octave, dtype=float)) / bins_per_octave
|
||||
)
|
||||
elif intervals == "pythagorean":
|
||||
ratios = pythagorean_intervals(bins_per_octave=bins_per_octave, sort=sort)
|
||||
elif intervals == "ji3":
|
||||
ratios = plimit_intervals(
|
||||
primes=[3], bins_per_octave=bins_per_octave, sort=sort
|
||||
)
|
||||
elif intervals == "ji5":
|
||||
ratios = plimit_intervals(
|
||||
primes=[3, 5], bins_per_octave=bins_per_octave, sort=sort
|
||||
)
|
||||
elif intervals == "ji7":
|
||||
ratios = plimit_intervals(
|
||||
primes=[3, 5, 7], bins_per_octave=bins_per_octave, sort=sort
|
||||
)
|
||||
else:
|
||||
ratios = np.array(intervals)
|
||||
bins_per_octave = len(ratios)
|
||||
|
||||
# We have one octave of ratios, tile it up to however many we need
|
||||
# and trim back to the right number of bins
|
||||
n_octaves = np.ceil(n_bins / bins_per_octave)
|
||||
all_ratios = np.multiply.outer(2.0 ** np.arange(n_octaves), ratios).flatten()[
|
||||
:n_bins
|
||||
]
|
||||
|
||||
if sort:
|
||||
all_ratios = np.sort(all_ratios)
|
||||
|
||||
return all_ratios * fmin
|
||||
|
||||
|
||||
@overload
|
||||
def pythagorean_intervals(
|
||||
*,
|
||||
bins_per_octave: int = ...,
|
||||
sort: bool = ...,
|
||||
return_factors: Literal[False] = ...
|
||||
) -> np.ndarray:
|
||||
...
|
||||
|
||||
|
||||
@overload
|
||||
def pythagorean_intervals(
|
||||
*, bins_per_octave: int = ..., sort: bool = ..., return_factors: Literal[True]
|
||||
) -> List[Dict[int, int]]:
|
||||
...
|
||||
|
||||
|
||||
@overload
|
||||
def pythagorean_intervals(
|
||||
*, bins_per_octave: int = ..., sort: bool = ..., return_factors: bool = ...
|
||||
) -> Union[np.ndarray, List[Dict[int, int]]]:
|
||||
...
|
||||
|
||||
|
||||
@cache(level=10)
|
||||
def pythagorean_intervals(
|
||||
*, bins_per_octave: int = 12, sort: bool = True, return_factors: bool = False
|
||||
) -> Union[np.ndarray, List[Dict[int, int]]]:
|
||||
"""Pythagorean intervals
|
||||
|
||||
Intervals are constructed by stacking ratios of 3/2 (i.e.,
|
||||
just perfect fifths) and folding down to a single octave::
|
||||
|
||||
1, 3/2, 9/8, 27/16, 81/64, ...
|
||||
|
||||
Note that this differs from 3-limit just intonation intervals
|
||||
in that Pythagorean intervals only use positive powers of 3
|
||||
(ascending fifths) while 3-limit intervals use both positive
|
||||
and negative powers (descending fifths).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bins_per_octave : int
|
||||
The number of intervals to generate
|
||||
sort : bool
|
||||
If `True` then intervals are returned in ascending order.
|
||||
If `False`, then intervals are returned in circle-of-fifths order.
|
||||
return_factors : bool
|
||||
If `True` then return a list of dictionaries encoding the prime factorization
|
||||
of each interval as `{2: p2, 3: p3}` (meaning `3**p3 * 2**p2`).
|
||||
If `False` (default), return intervals as an array of floating point numbers.
|
||||
|
||||
Returns
|
||||
-------
|
||||
intervals : np.ndarray or list of dictionaries
|
||||
The constructed interval set. All intervals are mapped
|
||||
to the range [1, 2).
|
||||
|
||||
See Also
|
||||
--------
|
||||
plimit_intervals
|
||||
|
||||
Examples
|
||||
--------
|
||||
Generate the first 12 intervals
|
||||
|
||||
>>> librosa.pythagorean_intervals(bins_per_octave=12)
|
||||
array([1. , 1.067871, 1.125 , 1.201355, 1.265625, 1.351524,
|
||||
1.423828, 1.5 , 1.601807, 1.6875 , 1.802032, 1.898437])
|
||||
>>> # Compare to the 12-tone equal temperament intervals:
|
||||
>>> 2**(np.arange(12)/12)
|
||||
array([1. , 1.059463, 1.122462, 1.189207, 1.259921, 1.33484 ,
|
||||
1.414214, 1.498307, 1.587401, 1.681793, 1.781797, 1.887749])
|
||||
|
||||
Or the first 7, in circle-of-fifths order
|
||||
|
||||
>>> librosa.pythagorean_intervals(bins_per_octave=7, sort=False)
|
||||
array([1. , 1.5 , 1.125 , 1.6875 , 1.265625, 1.898437,
|
||||
1.423828])
|
||||
|
||||
Generate the first 7, in circle-of-fifths other and factored form
|
||||
|
||||
>>> librosa.pythagorean_intervals(bins_per_octave=7, sort=False, return_factors=True)
|
||||
[
|
||||
{2: 0, 3: 0},
|
||||
{2: -1, 3: 1},
|
||||
{2: -3, 3: 2},
|
||||
{2: -4, 3: 3},
|
||||
{2: -6, 3: 4},
|
||||
{2: -7, 3: 5},
|
||||
{2: -9, 3: 6}
|
||||
]
|
||||
"""
|
||||
# Generate all powers of 3 in log space
|
||||
pow3 = np.arange(bins_per_octave)
|
||||
|
||||
# Using modf here to quickly get the fractional part of the log,
|
||||
# accounting for whatever power of 2 is necessary to get 3**k
|
||||
# within the octave.
|
||||
log_ratios: np.ndarray
|
||||
pow2: np.ndarray
|
||||
log_ratios, pow2 = np.modf(pow3 * np.log2(3))
|
||||
|
||||
# If the fractional part is negative, add
|
||||
# one more power of two to get it into the range [0, 1).
|
||||
too_small = log_ratios < 0
|
||||
log_ratios[too_small] += 1
|
||||
pow2[too_small] += 1
|
||||
|
||||
# Convert powers of 2 to integer
|
||||
pow2 = pow2.astype(int)
|
||||
|
||||
idx: Iterable[int]
|
||||
|
||||
if sort:
|
||||
# Order the intervals
|
||||
idx = np.argsort(log_ratios)
|
||||
log_ratios = log_ratios[idx]
|
||||
else:
|
||||
# If not sorting, we'll take powers in order
|
||||
idx = range(bins_per_octave)
|
||||
|
||||
if return_factors:
|
||||
return list({2: -pow2[i], 3: pow3[i]} for i in idx)
|
||||
|
||||
return np.power(2, log_ratios)
|
||||
|
||||
|
||||
def __harmonic_distance(logs, a, b):
|
||||
"""Compute the harmonic distance between ratios a and b.
|
||||
|
||||
Harmonic distance is defined as `log2(a * b) - 2*log2(gcd(a, b))` [#]_.
|
||||
|
||||
Here we are expressing a and b as prime factorization exponents,
|
||||
and the prime basis are provided in their log2 form.
|
||||
|
||||
.. [#] Tenney, James.
|
||||
"On ‘Crystal Growth’ in harmonic space (1993–1998)."
|
||||
Contemporary Music Review 27.1 (2008): 47-56.
|
||||
"""
|
||||
a = np.array(a)
|
||||
b = np.array(b)
|
||||
|
||||
# numerator = positive exponents
|
||||
a_num = np.maximum(a, 0)
|
||||
# denominator = negative exponents
|
||||
a_den = a_num - a
|
||||
|
||||
b_num = np.maximum(b, 0)
|
||||
b_den = b_num - b
|
||||
|
||||
# log2(ab / gcd(a,b)**2) = log(a) + log(b) - 2 * log(gcd(a,b))
|
||||
# gcd(a,b) for rationals: gcd(a_num, b_num) / lcm(a_den, b_den)
|
||||
# gcd = minimum(a_num, b_num) and lcm = maximum(a_den, b_den)
|
||||
gcd = np.minimum(a_num, b_num) - np.maximum(a_den, b_den)
|
||||
|
||||
# Rounding this to 6 decimals to avoid floating point weirdness
|
||||
return np.around(logs.dot(a + b - 2 * gcd), 6)
|
||||
|
||||
|
||||
def _crystal_tie_break(a, b, logs):
|
||||
"""Given two tuples of prime powers, break ties."""
|
||||
return logs.dot(np.abs(a)) < logs.dot(np.abs(b))
|
||||
|
||||
|
||||
@overload
|
||||
def plimit_intervals(
|
||||
*,
|
||||
primes: ArrayLike,
|
||||
bins_per_octave: int = ...,
|
||||
sort: bool = ...,
|
||||
return_factors: Literal[False] = ...
|
||||
) -> np.ndarray:
|
||||
...
|
||||
|
||||
|
||||
@overload
|
||||
def plimit_intervals(
|
||||
*,
|
||||
primes: ArrayLike,
|
||||
bins_per_octave: int = ...,
|
||||
sort: bool = ...,
|
||||
return_factors: Literal[True]
|
||||
) -> List[Dict[int, int]]:
|
||||
...
|
||||
|
||||
|
||||
@overload
|
||||
def plimit_intervals(
|
||||
*,
|
||||
primes: ArrayLike,
|
||||
bins_per_octave: int = ...,
|
||||
sort: bool = ...,
|
||||
return_factors: bool = ...
|
||||
) -> Union[np.ndarray, List[Dict[int, int]]]:
|
||||
...
|
||||
|
||||
|
||||
@cache(level=10)
|
||||
def plimit_intervals(
|
||||
*,
|
||||
primes: ArrayLike,
|
||||
bins_per_octave: int = 12,
|
||||
sort: bool = True,
|
||||
return_factors: bool = False
|
||||
) -> Union[np.ndarray, List[Dict[int, int]]]:
|
||||
"""Construct p-limit intervals for a given set of prime factors.
|
||||
|
||||
This function is based on the "harmonic crystal growth" algorithm
|
||||
of [#1]_ [#2]_.
|
||||
|
||||
.. [#1] Tenney, James.
|
||||
"On ‘Crystal Growth’ in harmonic space (1993–1998)."
|
||||
Contemporary Music Review 27.1 (2008): 47-56.
|
||||
|
||||
.. [#2] Sabat, Marc, and James Tenney.
|
||||
"Three crystal growth algorithms in 23-limit constrained harmonic space."
|
||||
Contemporary Music Review 27, no. 1 (2008): 57-78.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
primes : array of odd primes
|
||||
Which prime factors are to be used
|
||||
bins_per_octave : int
|
||||
The number of intervals to construct
|
||||
sort : bool
|
||||
If `True` then intervals are returned in ascending order.
|
||||
If `False`, then intervals are returned in crystal growth order.
|
||||
return_factors : bool
|
||||
If `True` then return a list of dictionaries encoding the prime factorization
|
||||
of each interval as `{2: p2, 3: p3, ...}` (meaning `3**p3 * 2**p2`).
|
||||
If `False` (default), return intervals as an array of floating point numbers.
|
||||
|
||||
Returns
|
||||
-------
|
||||
intervals : np.ndarray or list of dictionaries
|
||||
The constructed interval set. All intervals are mapped
|
||||
to the range [1, 2).
|
||||
|
||||
See Also
|
||||
--------
|
||||
pythagorean_intervals
|
||||
|
||||
Examples
|
||||
--------
|
||||
Compare 3-limit tuning to Pythagorean tuning and 12-TET
|
||||
|
||||
>>> librosa.plimit_intervals(primes=[3], bins_per_octave=12)
|
||||
array([1. , 1.05349794, 1.125 , 1.18518519, 1.265625 ,
|
||||
1.33333333, 1.40466392, 1.5 , 1.58024691, 1.6875 ,
|
||||
1.77777778, 1.8984375 ])
|
||||
>>> # Pythagorean intervals:
|
||||
>>> librosa.pythagorean_intervals(bins_per_octave=12)
|
||||
array([1. , 1.06787109, 1.125 , 1.20135498, 1.265625 ,
|
||||
1.35152435, 1.42382812, 1.5 , 1.60180664, 1.6875 ,
|
||||
1.80203247, 1.8984375 ])
|
||||
>>> # 12-TET intervals:
|
||||
>>> 2**(np.arange(12)/12)
|
||||
array([1. , 1.05946309, 1.12246205, 1.18920712, 1.25992105,
|
||||
1.33483985, 1.41421356, 1.49830708, 1.58740105, 1.68179283,
|
||||
1.78179744, 1.88774863])
|
||||
|
||||
Create a 7-bin, 5-limit interval set
|
||||
|
||||
>>> librosa.plimit_intervals(primes=[3, 5], bins_per_octave=7)
|
||||
array([1. , 1.125 , 1.25 , 1.33333333, 1.5 ,
|
||||
1.66666667, 1.875 ])
|
||||
|
||||
The same example, but now in factored form
|
||||
|
||||
>>> librosa.plimit_intervals(primes=[3, 5], bins_per_octave=7,
|
||||
... return_factors=True)
|
||||
[
|
||||
{},
|
||||
{2: -3, 3: 2},
|
||||
{2: -2, 5: 1},
|
||||
{2: 2, 3: -1},
|
||||
{2: -1, 3: 1},
|
||||
{3: -1, 5: 1},
|
||||
{2: -3, 3: 1, 5: 1}
|
||||
]
|
||||
"""
|
||||
primes = np.atleast_1d(primes)
|
||||
logs = np.log2(primes, dtype=np.float64)
|
||||
|
||||
# The seed set are primes and their reciprocals
|
||||
# These are the values that we can use to expand our
|
||||
# interval set. These are expressed in terms of the
|
||||
# prime factorization exponents
|
||||
seeds = []
|
||||
for i in range(len(primes)):
|
||||
# Add the prime
|
||||
seed = [0] * len(primes)
|
||||
seed[i] = 1
|
||||
seeds.append(tuple(seed))
|
||||
# Add the inverse
|
||||
seed[i] = -1
|
||||
seeds.append(tuple(seed))
|
||||
|
||||
# The frontier is the set of candidate intervals for inclusion
|
||||
frontier = seeds.copy()
|
||||
|
||||
# The distances table will let us keep track of the harmonic
|
||||
# distances between all selected intervals
|
||||
distances = dict()
|
||||
|
||||
# Initialize the interval set with the root (1)
|
||||
intervals = list()
|
||||
root = tuple([0] * len(primes))
|
||||
intervals.append(root)
|
||||
|
||||
while len(intervals) < bins_per_octave:
|
||||
# Find the element on the frontier that minimizes the total
|
||||
# harmonic distance to the existing set
|
||||
score = np.inf
|
||||
best_f = 0
|
||||
for f, point in enumerate(frontier):
|
||||
# Compute harmonic distance (HD) to each selected interval
|
||||
HD = 0.0
|
||||
|
||||
for s in intervals:
|
||||
if (s, point) not in distances:
|
||||
distances[s, point] = __harmonic_distance(logs, point, s)
|
||||
distances[point, s] = distances[s, point]
|
||||
|
||||
HD += distances[s, point]
|
||||
|
||||
if HD < score or (
|
||||
np.isclose(HD, score)
|
||||
and _crystal_tie_break(point, frontier[best_f], logs)
|
||||
):
|
||||
score = HD
|
||||
best_f = f
|
||||
|
||||
new_point = frontier.pop(best_f)
|
||||
intervals.append(new_point)
|
||||
|
||||
for _ in seeds:
|
||||
new_seed = tuple(np.array(new_point) + np.array(_))
|
||||
if new_seed not in intervals and new_seed not in frontier:
|
||||
frontier.append(new_seed)
|
||||
|
||||
pows = np.array(list(intervals), dtype=float)
|
||||
|
||||
log_ratios: np.ndarray
|
||||
pow2: np.ndarray
|
||||
log_ratios, pow2 = np.modf(pows.dot(logs))
|
||||
|
||||
# If the fractional part is negative, add
|
||||
# one more power of two to get it into the range [0, 1).
|
||||
too_small = log_ratios < 0
|
||||
log_ratios[too_small] += 1
|
||||
pow2[too_small] -= 1
|
||||
|
||||
# Convert powers of 2 to integer
|
||||
pow2 = pow2.astype(int)
|
||||
|
||||
idx: Iterable[int]
|
||||
if sort:
|
||||
# Order the intervals
|
||||
idx = np.argsort(log_ratios)
|
||||
log_ratios = log_ratios[idx]
|
||||
else:
|
||||
# If not sorting, we'll take powers in order
|
||||
idx = range(bins_per_octave)
|
||||
|
||||
if return_factors:
|
||||
# Collect the factorized intervals into a list
|
||||
factors = []
|
||||
for i in idx:
|
||||
v = dict()
|
||||
if pow2[i] != 0:
|
||||
v[2] = -pow2[i]
|
||||
|
||||
v.update({p: int(power) for p, power in zip(primes, pows[i]) if power != 0})
|
||||
|
||||
factors.append(v)
|
||||
return factors
|
||||
|
||||
# Otherwise, just return intervals as floats
|
||||
return np.power(2, log_ratios)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,597 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Spectrogram decomposition
|
||||
=========================
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
decompose
|
||||
hpss
|
||||
nn_filter
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
import scipy.sparse
|
||||
from scipy.ndimage import median_filter
|
||||
|
||||
import sklearn.decomposition
|
||||
|
||||
from . import core
|
||||
from ._cache import cache
|
||||
from . import segment
|
||||
from . import util
|
||||
from .util.exceptions import ParameterError
|
||||
from typing import Any, Callable, List, Optional, Tuple, Union
|
||||
from ._typing import _IntLike_co, _FloatLike_co
|
||||
|
||||
__all__ = ["decompose", "hpss", "nn_filter"]
|
||||
|
||||
|
||||
def decompose(
|
||||
S: np.ndarray,
|
||||
*,
|
||||
n_components: Optional[int] = None,
|
||||
transformer: Optional[object] = None,
|
||||
sort: bool = False,
|
||||
fit: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Decompose a feature matrix.
|
||||
|
||||
Given a spectrogram ``S``, produce a decomposition into ``components``
|
||||
and ``activations`` such that ``S ~= components.dot(activations)``.
|
||||
|
||||
By default, this is done with with non-negative matrix factorization (NMF),
|
||||
but any `sklearn.decomposition`-type object will work.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
S : np.ndarray [shape=(..., n_features, n_samples), dtype=float]
|
||||
The input feature matrix (e.g., magnitude spectrogram)
|
||||
|
||||
If the input has multiple channels (leading dimensions), they will be automatically
|
||||
flattened prior to decomposition.
|
||||
|
||||
If the input is multi-channel, channels and features are automatically flattened into
|
||||
a single axis before the decomposition.
|
||||
For example, a stereo input `S` with shape `(2, n_features, n_samples)` is
|
||||
automatically reshaped to `(2 * n_features, n_samples)`.
|
||||
|
||||
n_components : int > 0 [scalar] or None
|
||||
number of desired components
|
||||
|
||||
if None, then ``n_features`` components are used
|
||||
|
||||
transformer : None or object
|
||||
If None, use `sklearn.decomposition.NMF`
|
||||
|
||||
Otherwise, any object with a similar interface to NMF should work.
|
||||
``transformer`` must follow the scikit-learn convention, where
|
||||
input data is ``(n_samples, n_features)``.
|
||||
|
||||
`transformer.fit_transform()` will be run on ``S.T`` (not ``S``),
|
||||
the return value of which is stored (transposed) as ``activations``
|
||||
|
||||
The components will be retrieved as ``transformer.components_.T``::
|
||||
|
||||
S ~= np.dot(activations, transformer.components_).T
|
||||
|
||||
or equivalently::
|
||||
|
||||
S ~= np.dot(transformer.components_.T, activations.T)
|
||||
|
||||
sort : bool
|
||||
If ``True``, components are sorted by ascending peak frequency.
|
||||
|
||||
.. note:: If used with ``transformer``, sorting is applied to copies
|
||||
of the decomposition parameters, and not to ``transformer``
|
||||
internal parameters.
|
||||
|
||||
.. warning:: If the input array has more than two dimensions
|
||||
(e.g., if it's a multi-channel spectrogram), then axis sorting
|
||||
is not supported and a `ParameterError` exception is raised.
|
||||
|
||||
fit : bool
|
||||
If `True`, components are estimated from the input ``S``.
|
||||
|
||||
If `False`, components are assumed to be pre-computed and stored
|
||||
in ``transformer``, and are not changed.
|
||||
|
||||
**kwargs : Additional keyword arguments to the default transformer
|
||||
`sklearn.decomposition.NMF`
|
||||
|
||||
Returns
|
||||
-------
|
||||
components: np.ndarray [shape=(..., n_features, n_components)]
|
||||
matrix of components (basis elements).
|
||||
activations: np.ndarray [shape=(n_components, n_samples)]
|
||||
transformed matrix/activation matrix
|
||||
|
||||
Raises
|
||||
------
|
||||
ParameterError
|
||||
if ``fit`` is False and no ``transformer`` object is provided.
|
||||
|
||||
if the input array is multi-channel and ``sort=True`` is specified.
|
||||
|
||||
See Also
|
||||
--------
|
||||
sklearn.decomposition : SciKit-Learn matrix decomposition modules
|
||||
|
||||
Examples
|
||||
--------
|
||||
Decompose a magnitude spectrogram into 16 components with NMF
|
||||
|
||||
>>> y, sr = librosa.load(librosa.ex('pistachio'), duration=5)
|
||||
>>> S = np.abs(librosa.stft(y))
|
||||
>>> comps, acts = librosa.decompose.decompose(S, n_components=16)
|
||||
|
||||
Sort components by ascending peak frequency
|
||||
|
||||
>>> comps, acts = librosa.decompose.decompose(S, n_components=16,
|
||||
... sort=True)
|
||||
|
||||
Or with sparse dictionary learning
|
||||
|
||||
>>> import sklearn.decomposition
|
||||
>>> T = sklearn.decomposition.MiniBatchDictionaryLearning(n_components=16)
|
||||
>>> scomps, sacts = librosa.decompose.decompose(S, transformer=T, sort=True)
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> layout = [list(".AAAA"), list("BCCCC"), list(".DDDD")]
|
||||
>>> fig, ax = plt.subplot_mosaic(layout, constrained_layout=True)
|
||||
>>> librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
|
||||
... y_axis='log', x_axis='time', ax=ax['A'])
|
||||
>>> ax['A'].set(title='Input spectrogram')
|
||||
>>> ax['A'].label_outer()
|
||||
>>> librosa.display.specshow(librosa.amplitude_to_db(comps,
|
||||
>>> ref=np.max),
|
||||
>>> y_axis='log', ax=ax['B'])
|
||||
>>> ax['B'].set(title='Components')
|
||||
>>> ax['B'].label_outer()
|
||||
>>> ax['B'].sharey(ax['A'])
|
||||
>>> librosa.display.specshow(acts, x_axis='time', ax=ax['C'], cmap='gray_r')
|
||||
>>> ax['C'].set(ylabel='Components', title='Activations')
|
||||
>>> ax['C'].sharex(ax['A'])
|
||||
>>> ax['C'].label_outer()
|
||||
>>> S_approx = comps.dot(acts)
|
||||
>>> img = librosa.display.specshow(librosa.amplitude_to_db(S_approx,
|
||||
>>> ref=np.max),
|
||||
>>> y_axis='log', x_axis='time', ax=ax['D'])
|
||||
>>> ax['D'].set(title='Reconstructed spectrogram')
|
||||
>>> ax['D'].sharex(ax['A'])
|
||||
>>> ax['D'].sharey(ax['A'])
|
||||
>>> ax['D'].label_outer()
|
||||
>>> fig.colorbar(img, ax=list(ax.values()), format="%+2.f dB")
|
||||
"""
|
||||
# Do a swapaxes and unroll
|
||||
orig_shape = list(S.shape)
|
||||
|
||||
if S.ndim > 2 and sort:
|
||||
raise ParameterError(
|
||||
"Parameter sort=True is unsupported for input with more than two dimensions"
|
||||
)
|
||||
|
||||
# Transpose S and unroll feature dimensions
|
||||
# Use order='F' here to preserve the temporal ordering
|
||||
S = S.T.reshape((S.shape[-1], -1), order="F")
|
||||
|
||||
if n_components is None:
|
||||
n_components = S.shape[-1]
|
||||
|
||||
if transformer is None:
|
||||
if fit is False:
|
||||
raise ParameterError("fit must be True if transformer is None")
|
||||
|
||||
transformer = sklearn.decomposition.NMF(n_components=n_components, **kwargs)
|
||||
|
||||
# Suppressing type errors here because we don't want to overly restrict
|
||||
# the transformer object type
|
||||
activations: np.ndarray
|
||||
if fit:
|
||||
activations = transformer.fit_transform(S).T # type: ignore
|
||||
else:
|
||||
activations = transformer.transform(S).T # type: ignore
|
||||
|
||||
components: np.ndarray = transformer.components_ # type: ignore
|
||||
component_shape = orig_shape[:-1] + [-1]
|
||||
# use order='F' here to preserve component ordering
|
||||
components = components.reshape(component_shape[::-1], order="F").T
|
||||
|
||||
if sort:
|
||||
components, idx = util.axis_sort(components, index=True)
|
||||
activations = activations[idx]
|
||||
|
||||
return components, activations
|
||||
|
||||
|
||||
@cache(level=30)
|
||||
def hpss(
|
||||
S: np.ndarray,
|
||||
*,
|
||||
kernel_size: Union[
|
||||
_IntLike_co, Tuple[_IntLike_co, _IntLike_co], List[_IntLike_co]
|
||||
] = 31,
|
||||
power: float = 2.0,
|
||||
mask: bool = False,
|
||||
margin: Union[
|
||||
_FloatLike_co, Tuple[_FloatLike_co, _FloatLike_co], List[_FloatLike_co]
|
||||
] = 1.0,
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Median-filtering harmonic percussive source separation (HPSS).
|
||||
|
||||
If ``margin = 1.0``, decomposes an input spectrogram ``S = H + P``
|
||||
where ``H`` contains the harmonic components,
|
||||
and ``P`` contains the percussive components.
|
||||
|
||||
If ``margin > 1.0``, decomposes an input spectrogram ``S = H + P + R``
|
||||
where ``R`` contains residual components not included in ``H`` or ``P``.
|
||||
|
||||
This implementation is based upon the algorithm described by [#]_ and [#]_.
|
||||
|
||||
.. [#] Fitzgerald, Derry.
|
||||
"Harmonic/percussive separation using median filtering."
|
||||
13th International Conference on Digital Audio Effects (DAFX10),
|
||||
Graz, Austria, 2010.
|
||||
|
||||
.. [#] Driedger, Müller, Disch.
|
||||
"Extending harmonic-percussive separation of audio."
|
||||
15th International Society for Music Information Retrieval Conference (ISMIR 2014),
|
||||
Taipei, Taiwan, 2014.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
S : np.ndarray [shape=(..., d, n)]
|
||||
input spectrogram. May be real (magnitude) or complex.
|
||||
Multi-channel is supported.
|
||||
|
||||
kernel_size : int or tuple (kernel_harmonic, kernel_percussive)
|
||||
kernel size(s) for the median filters.
|
||||
|
||||
- If scalar, the same size is used for both harmonic and percussive.
|
||||
- If tuple, the first value specifies the width of the
|
||||
harmonic filter, and the second value specifies the width
|
||||
of the percussive filter.
|
||||
|
||||
power : float > 0 [scalar]
|
||||
Exponent for the Wiener filter when constructing soft mask matrices.
|
||||
|
||||
mask : bool
|
||||
Return the masking matrices instead of components.
|
||||
|
||||
Masking matrices contain non-negative real values that
|
||||
can be used to measure the assignment of energy from ``S``
|
||||
into harmonic or percussive components.
|
||||
|
||||
Components can be recovered by multiplying ``S * mask_H``
|
||||
or ``S * mask_P``.
|
||||
|
||||
margin : float or tuple (margin_harmonic, margin_percussive)
|
||||
margin size(s) for the masks (as described in [2]_)
|
||||
|
||||
- If scalar, the same size is used for both harmonic and percussive.
|
||||
- If tuple, the first value specifies the margin of the
|
||||
harmonic mask, and the second value specifies the margin
|
||||
of the percussive mask.
|
||||
|
||||
Returns
|
||||
-------
|
||||
harmonic : np.ndarray [shape=(..., d, n)]
|
||||
harmonic component (or mask)
|
||||
percussive : np.ndarray [shape=(..., d, n)]
|
||||
percussive component (or mask)
|
||||
|
||||
See Also
|
||||
--------
|
||||
librosa.util.softmask
|
||||
|
||||
Notes
|
||||
-----
|
||||
This function caches at level 30.
|
||||
|
||||
Examples
|
||||
--------
|
||||
Separate into harmonic and percussive
|
||||
|
||||
>>> y, sr = librosa.load(librosa.ex('choice'), duration=5)
|
||||
>>> D = librosa.stft(y)
|
||||
>>> H, P = librosa.decompose.hpss(D)
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> fig, ax = plt.subplots(nrows=3, sharex=True, sharey=True)
|
||||
>>> img = librosa.display.specshow(librosa.amplitude_to_db(np.abs(D),
|
||||
... ref=np.max),
|
||||
... y_axis='log', x_axis='time', ax=ax[0])
|
||||
>>> ax[0].set(title='Full power spectrogram')
|
||||
>>> ax[0].label_outer()
|
||||
>>> librosa.display.specshow(librosa.amplitude_to_db(np.abs(H),
|
||||
... ref=np.max(np.abs(D))),
|
||||
... y_axis='log', x_axis='time', ax=ax[1])
|
||||
>>> ax[1].set(title='Harmonic power spectrogram')
|
||||
>>> ax[1].label_outer()
|
||||
>>> librosa.display.specshow(librosa.amplitude_to_db(np.abs(P),
|
||||
... ref=np.max(np.abs(D))),
|
||||
... y_axis='log', x_axis='time', ax=ax[2])
|
||||
>>> ax[2].set(title='Percussive power spectrogram')
|
||||
>>> fig.colorbar(img, ax=ax, format='%+2.0f dB')
|
||||
|
||||
Or with a narrower horizontal filter
|
||||
|
||||
>>> H, P = librosa.decompose.hpss(D, kernel_size=(13, 31))
|
||||
|
||||
Just get harmonic/percussive masks, not the spectra
|
||||
|
||||
>>> mask_H, mask_P = librosa.decompose.hpss(D, mask=True)
|
||||
>>> mask_H
|
||||
array([[1.853e-03, 1.701e-04, ..., 9.922e-01, 1.000e+00],
|
||||
[2.316e-03, 2.127e-04, ..., 9.989e-01, 1.000e+00],
|
||||
...,
|
||||
[8.195e-05, 6.939e-05, ..., 3.105e-04, 4.231e-04],
|
||||
[3.159e-05, 4.156e-05, ..., 6.216e-04, 6.188e-04]],
|
||||
dtype=float32)
|
||||
>>> mask_P
|
||||
array([[9.981e-01, 9.998e-01, ..., 7.759e-03, 3.201e-05],
|
||||
[9.977e-01, 9.998e-01, ..., 1.122e-03, 4.451e-06],
|
||||
...,
|
||||
[9.999e-01, 9.999e-01, ..., 9.997e-01, 9.996e-01],
|
||||
[1.000e+00, 1.000e+00, ..., 9.994e-01, 9.994e-01]],
|
||||
dtype=float32)
|
||||
|
||||
Separate into harmonic/percussive/residual components by using a margin > 1.0
|
||||
|
||||
>>> H, P = librosa.decompose.hpss(D, margin=3.0)
|
||||
>>> R = D - (H+P)
|
||||
>>> y_harm = librosa.istft(H)
|
||||
>>> y_perc = librosa.istft(P)
|
||||
>>> y_resi = librosa.istft(R)
|
||||
|
||||
Get a more isolated percussive component by widening its margin
|
||||
|
||||
>>> H, P = librosa.decompose.hpss(D, margin=(1.0,5.0))
|
||||
"""
|
||||
phase: Union[float, np.ndarray]
|
||||
|
||||
if np.iscomplexobj(S):
|
||||
S, phase = core.magphase(S)
|
||||
else:
|
||||
phase = 1
|
||||
|
||||
if isinstance(kernel_size, tuple) or isinstance(kernel_size, list):
|
||||
win_harm = kernel_size[0]
|
||||
win_perc = kernel_size[1]
|
||||
else:
|
||||
win_harm = kernel_size
|
||||
win_perc = kernel_size
|
||||
|
||||
if isinstance(margin, tuple) or isinstance(margin, list):
|
||||
margin_harm = margin[0]
|
||||
margin_perc = margin[1]
|
||||
else:
|
||||
margin_harm = margin
|
||||
margin_perc = margin
|
||||
|
||||
# margin minimum is 1.0
|
||||
if margin_harm < 1 or margin_perc < 1:
|
||||
raise ParameterError(
|
||||
"Margins must be >= 1.0. " "A typical range is between 1 and 10."
|
||||
)
|
||||
|
||||
# shape for kernels
|
||||
harm_shape: List[_IntLike_co] = [1] * S.ndim
|
||||
harm_shape[-1] = win_harm
|
||||
|
||||
perc_shape: List[_IntLike_co] = [1] * S.ndim
|
||||
perc_shape[-2] = win_perc
|
||||
|
||||
# Compute median filters. Pre-allocation here preserves memory layout.
|
||||
harm = np.empty_like(S)
|
||||
harm[:] = median_filter(S, size=harm_shape, mode="reflect")
|
||||
|
||||
perc = np.empty_like(S)
|
||||
perc[:] = median_filter(S, size=perc_shape, mode="reflect")
|
||||
|
||||
split_zeros = margin_harm == 1 and margin_perc == 1
|
||||
|
||||
mask_harm = util.softmask(
|
||||
harm, perc * margin_harm, power=power, split_zeros=split_zeros
|
||||
)
|
||||
|
||||
mask_perc = util.softmask(
|
||||
perc, harm * margin_perc, power=power, split_zeros=split_zeros
|
||||
)
|
||||
|
||||
if mask:
|
||||
return mask_harm, mask_perc
|
||||
|
||||
return ((S * mask_harm) * phase, (S * mask_perc) * phase)
|
||||
|
||||
|
||||
@cache(level=30)
|
||||
def nn_filter(
|
||||
S: np.ndarray,
|
||||
*,
|
||||
rec: Optional[Union[scipy.sparse.spmatrix, np.ndarray]] = None,
|
||||
aggregate: Optional[Callable] = None,
|
||||
axis: int = -1,
|
||||
**kwargs: Any,
|
||||
) -> np.ndarray:
|
||||
"""Filter by nearest-neighbor aggregation.
|
||||
|
||||
Each data point (e.g, spectrogram column) is replaced
|
||||
by aggregating its nearest neighbors in feature space.
|
||||
|
||||
This can be useful for de-noising a spectrogram or feature matrix.
|
||||
|
||||
The non-local means method [#]_ can be recovered by providing a
|
||||
weighted recurrence matrix as input and specifying ``aggregate=np.average``.
|
||||
|
||||
Similarly, setting ``aggregate=np.median`` produces sparse de-noising
|
||||
as in REPET-SIM [#]_.
|
||||
|
||||
.. [#] Buades, A., Coll, B., & Morel, J. M.
|
||||
(2005, June). A non-local algorithm for image denoising.
|
||||
In Computer Vision and Pattern Recognition, 2005.
|
||||
CVPR 2005. IEEE Computer Society Conference on (Vol. 2, pp. 60-65). IEEE.
|
||||
|
||||
.. [#] Rafii, Z., & Pardo, B.
|
||||
(2012, October). "Music/Voice Separation Using the Similarity Matrix."
|
||||
International Society for Music Information Retrieval Conference, 2012.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
S : np.ndarray
|
||||
The input data (spectrogram) to filter. Multi-channel is supported.
|
||||
|
||||
rec : (optional) scipy.sparse.spmatrix or np.ndarray
|
||||
Optionally, a pre-computed nearest-neighbor matrix
|
||||
as provided by `librosa.segment.recurrence_matrix`
|
||||
|
||||
aggregate : function
|
||||
aggregation function (default: `np.mean`)
|
||||
|
||||
If ``aggregate=np.average``, then a weighted average is
|
||||
computed according to the (per-row) weights in ``rec``.
|
||||
|
||||
For all other aggregation functions, all neighbors
|
||||
are treated equally.
|
||||
|
||||
axis : int
|
||||
The axis along which to filter (by default, columns)
|
||||
|
||||
**kwargs
|
||||
Additional keyword arguments provided to
|
||||
`librosa.segment.recurrence_matrix` if ``rec`` is not provided
|
||||
|
||||
Returns
|
||||
-------
|
||||
S_filtered : np.ndarray
|
||||
The filtered data, with shape equivalent to the input ``S``.
|
||||
|
||||
Raises
|
||||
------
|
||||
ParameterError
|
||||
if ``rec`` is provided and its shape is incompatible with ``S``.
|
||||
|
||||
See Also
|
||||
--------
|
||||
decompose
|
||||
hpss
|
||||
librosa.segment.recurrence_matrix
|
||||
|
||||
Notes
|
||||
-----
|
||||
This function caches at level 30.
|
||||
|
||||
Examples
|
||||
--------
|
||||
De-noise a chromagram by non-local median filtering.
|
||||
By default this would use euclidean distance to select neighbors,
|
||||
but this can be overridden directly by setting the ``metric`` parameter.
|
||||
|
||||
>>> y, sr = librosa.load(librosa.ex('brahms'),
|
||||
... offset=30, duration=10)
|
||||
>>> chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
|
||||
>>> chroma_med = librosa.decompose.nn_filter(chroma,
|
||||
... aggregate=np.median,
|
||||
... metric='cosine')
|
||||
|
||||
To use non-local means, provide an affinity matrix and ``aggregate=np.average``.
|
||||
|
||||
>>> rec = librosa.segment.recurrence_matrix(chroma, mode='affinity',
|
||||
... metric='cosine', sparse=True)
|
||||
>>> chroma_nlm = librosa.decompose.nn_filter(chroma, rec=rec,
|
||||
... aggregate=np.average)
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> fig, ax = plt.subplots(nrows=5, sharex=True, sharey=True, figsize=(10, 10))
|
||||
>>> librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', ax=ax[0])
|
||||
>>> ax[0].set(title='Unfiltered')
|
||||
>>> ax[0].label_outer()
|
||||
>>> librosa.display.specshow(chroma_med, y_axis='chroma', x_axis='time', ax=ax[1])
|
||||
>>> ax[1].set(title='Median-filtered')
|
||||
>>> ax[1].label_outer()
|
||||
>>> imgc = librosa.display.specshow(chroma_nlm, y_axis='chroma', x_axis='time', ax=ax[2])
|
||||
>>> ax[2].set(title='Non-local means')
|
||||
>>> ax[2].label_outer()
|
||||
>>> imgr1 = librosa.display.specshow(chroma - chroma_med,
|
||||
... y_axis='chroma', x_axis='time', ax=ax[3])
|
||||
>>> ax[3].set(title='Original - median')
|
||||
>>> ax[3].label_outer()
|
||||
>>> imgr2 = librosa.display.specshow(chroma - chroma_nlm,
|
||||
... y_axis='chroma', x_axis='time', ax=ax[4])
|
||||
>>> ax[4].label_outer()
|
||||
>>> ax[4].set(title='Original - NLM')
|
||||
>>> fig.colorbar(imgc, ax=ax[:3])
|
||||
>>> fig.colorbar(imgr1, ax=[ax[3]])
|
||||
>>> fig.colorbar(imgr2, ax=[ax[4]])
|
||||
"""
|
||||
if aggregate is None:
|
||||
aggregate = np.mean
|
||||
|
||||
rec_s: scipy.sparse.spmatrix
|
||||
|
||||
if rec is None:
|
||||
kwargs = dict(kwargs)
|
||||
kwargs["sparse"] = True
|
||||
rec_s = segment.recurrence_matrix(S, axis=axis, **kwargs)
|
||||
elif not scipy.sparse.issparse(rec):
|
||||
rec_s = scipy.sparse.csc_matrix(rec)
|
||||
else:
|
||||
rec_s = rec
|
||||
|
||||
if rec_s.shape[0] != S.shape[axis] or rec_s.shape[0] != rec_s.shape[1]:
|
||||
raise ParameterError(
|
||||
"Invalid self-similarity matrix shape "
|
||||
f"rec.shape={rec_s.shape} for S.shape={S.shape}"
|
||||
)
|
||||
|
||||
return __nn_filter_helper(
|
||||
rec_s.data, rec_s.indices, rec_s.indptr, S.swapaxes(0, axis), aggregate
|
||||
).swapaxes(0, axis)
|
||||
|
||||
|
||||
def __nn_filter_helper(
|
||||
R_data, R_indices, R_ptr, S: np.ndarray, aggregate: Callable
|
||||
) -> np.ndarray:
|
||||
"""Nearest-neighbor filter helper function.
|
||||
|
||||
This is an internal function, not for use outside of the decompose module.
|
||||
|
||||
It applies the nearest-neighbor filter to S, assuming that the first index
|
||||
corresponds to observations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
R_data, R_indices, R_ptr : np.ndarrays
|
||||
The ``data``, ``indices``, and ``indptr`` of a scipy.sparse matrix
|
||||
S : np.ndarray
|
||||
The observation data to filter
|
||||
aggregate : callable
|
||||
The aggregation operator
|
||||
|
||||
Returns
|
||||
-------
|
||||
S_out : np.ndarray like S
|
||||
The filtered data array
|
||||
"""
|
||||
s_out = np.empty_like(S)
|
||||
|
||||
for i in range(len(R_ptr) - 1):
|
||||
# Get the non-zeros out of the recurrence matrix
|
||||
targets = R_indices[R_ptr[i] : R_ptr[i + 1]]
|
||||
|
||||
if not len(targets):
|
||||
s_out[i] = S[i]
|
||||
continue
|
||||
|
||||
neighbors = np.take(S, targets, axis=0)
|
||||
|
||||
if aggregate is np.average:
|
||||
weights = R_data[R_ptr[i] : R_ptr[i + 1]]
|
||||
s_out[i] = aggregate(neighbors, axis=0, weights=weights)
|
||||
else:
|
||||
s_out[i] = aggregate(neighbors, axis=0)
|
||||
|
||||
return s_out
|
||||
2096
linedance-app/venv/lib/python3.12/site-packages/librosa/display.py
Normal file
2096
linedance-app/venv/lib/python3.12/site-packages/librosa/display.py
Normal file
File diff suppressed because it is too large
Load Diff
1002
linedance-app/venv/lib/python3.12/site-packages/librosa/effects.py
Normal file
1002
linedance-app/venv/lib/python3.12/site-packages/librosa/effects.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,63 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Feature extraction
|
||||
==================
|
||||
|
||||
Spectral features
|
||||
-----------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
chroma_stft
|
||||
chroma_cqt
|
||||
chroma_cens
|
||||
chroma_vqt
|
||||
melspectrogram
|
||||
mfcc
|
||||
rms
|
||||
spectral_centroid
|
||||
spectral_bandwidth
|
||||
spectral_contrast
|
||||
spectral_flatness
|
||||
spectral_rolloff
|
||||
poly_features
|
||||
tonnetz
|
||||
zero_crossing_rate
|
||||
|
||||
Rhythm features
|
||||
---------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
tempo
|
||||
tempogram
|
||||
fourier_tempogram
|
||||
tempogram_ratio
|
||||
|
||||
Feature manipulation
|
||||
--------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
delta
|
||||
stack_memory
|
||||
|
||||
|
||||
Feature inversion
|
||||
-----------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated
|
||||
|
||||
inverse.mel_to_stft
|
||||
inverse.mel_to_audio
|
||||
inverse.mfcc_to_mel
|
||||
inverse.mfcc_to_audio
|
||||
"""
|
||||
|
||||
import lazy_loader as lazy
|
||||
|
||||
__getattr__, __dir__, __all__ = lazy.attach_stub(__name__, __file__)
|
||||
@@ -0,0 +1,31 @@
|
||||
from .utils import (
|
||||
delta as delta,
|
||||
stack_memory as stack_memory,
|
||||
)
|
||||
from .spectral import (
|
||||
spectral_centroid as spectral_centroid,
|
||||
spectral_bandwidth as spectral_bandwidth,
|
||||
spectral_contrast as spectral_contrast,
|
||||
spectral_rolloff as spectral_rolloff,
|
||||
spectral_flatness as spectral_flatness,
|
||||
poly_features as poly_features,
|
||||
rms as rms,
|
||||
zero_crossing_rate as zero_crossing_rate,
|
||||
chroma_stft as chroma_stft,
|
||||
chroma_cqt as chroma_cqt,
|
||||
chroma_cens as chroma_cens,
|
||||
chroma_vqt as chroma_vqt,
|
||||
melspectrogram as melspectrogram,
|
||||
mfcc as mfcc,
|
||||
tonnetz as tonnetz,
|
||||
)
|
||||
from .rhythm import (
|
||||
tempogram as tempogram,
|
||||
fourier_tempogram as fourier_tempogram,
|
||||
tempo as tempo,
|
||||
tempogram_ratio as tempogram_ratio,
|
||||
)
|
||||
|
||||
from . import (
|
||||
inverse as inverse,
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,373 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Feature inversion"""
|
||||
|
||||
import warnings
|
||||
import numpy as np
|
||||
|
||||
from ..core.fft import get_fftlib
|
||||
from ..util.exceptions import ParameterError
|
||||
from ..core.spectrum import griffinlim
|
||||
from ..core.spectrum import db_to_power
|
||||
from ..util.utils import tiny
|
||||
from .. import filters
|
||||
from ..util import nnls, expand_to
|
||||
from numpy.typing import DTypeLike
|
||||
from typing import Any, Optional
|
||||
from .._typing import _WindowSpec, _PadModeSTFT
|
||||
|
||||
__all__ = ["mel_to_stft", "mel_to_audio", "mfcc_to_mel", "mfcc_to_audio"]
|
||||
|
||||
|
||||
def mel_to_stft(
|
||||
M: np.ndarray,
|
||||
*,
|
||||
sr: float = 22050,
|
||||
n_fft: int = 2048,
|
||||
power: float = 2.0,
|
||||
**kwargs: Any,
|
||||
) -> np.ndarray:
|
||||
"""Approximate STFT magnitude from a Mel power spectrogram.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
M : np.ndarray [shape=(..., n_mels, n), non-negative]
|
||||
The spectrogram as produced by `feature.melspectrogram`
|
||||
sr : number > 0 [scalar]
|
||||
sampling rate of the underlying signal
|
||||
n_fft : int > 0 [scalar]
|
||||
number of FFT components in the resulting STFT
|
||||
power : float > 0 [scalar]
|
||||
Exponent for the magnitude melspectrogram
|
||||
**kwargs : additional keyword arguments for Mel filter bank parameters
|
||||
fmin : float >= 0 [scalar]
|
||||
lowest frequency (in Hz)
|
||||
fmax : float >= 0 [scalar]
|
||||
highest frequency (in Hz).
|
||||
If `None`, use ``fmax = sr / 2.0``
|
||||
htk : bool [scalar]
|
||||
use HTK formula instead of Slaney
|
||||
norm : {None, 'slaney', or number} [scalar]
|
||||
If 'slaney', divide the triangular mel weights by the width of
|
||||
the mel band (area normalization).
|
||||
If numeric, use `librosa.util.normalize` to normalize each filter
|
||||
by to unit l_p norm. See `librosa.util.normalize` for a full
|
||||
description of supported norm values (including `+-np.inf`).
|
||||
Otherwise, leave all the triangles aiming for a peak value of 1.0
|
||||
dtype : np.dtype
|
||||
The data type of the output basis.
|
||||
By default, uses 32-bit (single-precision) floating point.
|
||||
|
||||
Returns
|
||||
-------
|
||||
S : np.ndarray [shape=(..., n_fft, t), non-negative]
|
||||
An approximate linear magnitude spectrogram
|
||||
|
||||
See Also
|
||||
--------
|
||||
librosa.feature.melspectrogram
|
||||
librosa.stft
|
||||
librosa.filters.mel
|
||||
librosa.util.nnls
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> y, sr = librosa.load(librosa.ex('trumpet'))
|
||||
>>> S = librosa.util.abs2(librosa.stft(y))
|
||||
>>> mel_spec = librosa.feature.melspectrogram(S=S, sr=sr)
|
||||
>>> S_inv = librosa.feature.inverse.mel_to_stft(mel_spec, sr=sr)
|
||||
|
||||
Compare the results visually
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> fig, ax = plt.subplots(nrows=3, sharex=True, sharey=True)
|
||||
>>> img = librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max, top_db=None),
|
||||
... y_axis='log', x_axis='time', ax=ax[0])
|
||||
>>> ax[0].set(title='Original STFT')
|
||||
>>> ax[0].label_outer()
|
||||
>>> librosa.display.specshow(librosa.amplitude_to_db(S_inv, ref=np.max, top_db=None),
|
||||
... y_axis='log', x_axis='time', ax=ax[1])
|
||||
>>> ax[1].set(title='Reconstructed STFT')
|
||||
>>> ax[1].label_outer()
|
||||
>>> librosa.display.specshow(librosa.amplitude_to_db(np.abs(S_inv - S),
|
||||
... ref=S.max(), top_db=None),
|
||||
... vmax=0, y_axis='log', x_axis='time', cmap='magma', ax=ax[2])
|
||||
>>> ax[2].set(title='Residual error (dB)')
|
||||
>>> fig.colorbar(img, ax=ax, format="%+2.f dB")
|
||||
"""
|
||||
# Construct a mel basis with dtype matching the input data
|
||||
mel_basis = filters.mel(
|
||||
sr=sr, n_fft=n_fft, n_mels=M.shape[-2], dtype=M.dtype, **kwargs
|
||||
)
|
||||
|
||||
# Find the non-negative least squares solution, and apply
|
||||
# the inverse exponent.
|
||||
# We'll do the exponentiation in-place.
|
||||
inverse = nnls(mel_basis, M)
|
||||
return np.power(inverse, 1.0 / power, out=inverse)
|
||||
|
||||
|
||||
def mel_to_audio(
|
||||
M: np.ndarray,
|
||||
*,
|
||||
sr: float = 22050,
|
||||
n_fft: int = 2048,
|
||||
hop_length: Optional[int] = None,
|
||||
win_length: Optional[int] = None,
|
||||
window: _WindowSpec = "hann",
|
||||
center: bool = True,
|
||||
pad_mode: _PadModeSTFT = "constant",
|
||||
power: float = 2.0,
|
||||
n_iter: int = 32,
|
||||
length: Optional[int] = None,
|
||||
dtype: DTypeLike = np.float32,
|
||||
**kwargs: Any,
|
||||
) -> np.ndarray:
|
||||
"""Invert a mel power spectrogram to audio using Griffin-Lim.
|
||||
|
||||
This is primarily a convenience wrapper for:
|
||||
|
||||
>>> S = librosa.feature.inverse.mel_to_stft(M)
|
||||
>>> y = librosa.griffinlim(S)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
M : np.ndarray [shape=(..., n_mels, n), non-negative]
|
||||
The spectrogram as produced by `feature.melspectrogram`
|
||||
sr : number > 0 [scalar]
|
||||
sampling rate of the underlying signal
|
||||
n_fft : int > 0 [scalar]
|
||||
number of FFT components in the resulting STFT
|
||||
hop_length : None or int > 0
|
||||
The hop length of the STFT. If not provided, it will default to ``n_fft // 4``
|
||||
win_length : None or int > 0
|
||||
The window length of the STFT. By default, it will equal ``n_fft``
|
||||
window : string, tuple, number, function, or np.ndarray [shape=(n_fft,)]
|
||||
A window specification as supported by `stft` or `istft`
|
||||
center : boolean
|
||||
If `True`, the STFT is assumed to use centered frames.
|
||||
If `False`, the STFT is assumed to use left-aligned frames.
|
||||
pad_mode : string
|
||||
If ``center=True``, the padding mode to use at the edges of the signal.
|
||||
By default, STFT uses zero padding.
|
||||
power : float > 0 [scalar]
|
||||
Exponent for the magnitude melspectrogram
|
||||
n_iter : int > 0
|
||||
The number of iterations for Griffin-Lim
|
||||
length : None or int > 0
|
||||
If provided, the output ``y`` is zero-padded or clipped to exactly ``length``
|
||||
samples.
|
||||
dtype : np.dtype
|
||||
Real numeric type for the time-domain signal. Default is 32-bit float.
|
||||
**kwargs : additional keyword arguments for Mel filter bank parameters
|
||||
fmin : float >= 0 [scalar]
|
||||
lowest frequency (in Hz)
|
||||
fmax : float >= 0 [scalar]
|
||||
highest frequency (in Hz).
|
||||
If `None`, use ``fmax = sr / 2.0``
|
||||
htk : bool [scalar]
|
||||
use HTK formula instead of Slaney
|
||||
norm : {None, 'slaney', or number} [scalar]
|
||||
If 'slaney', divide the triangular mel weights by the width of
|
||||
the mel band (area normalization).
|
||||
If numeric, use `librosa.util.normalize` to normalize each filter
|
||||
by to unit l_p norm. See `librosa.util.normalize` for a full
|
||||
description of supported norm values (including `+-np.inf`).
|
||||
Otherwise, leave all the triangles aiming for a peak value of 1.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
y : np.ndarray [shape(..., n,)]
|
||||
time-domain signal reconstructed from ``M``
|
||||
|
||||
See Also
|
||||
--------
|
||||
librosa.griffinlim
|
||||
librosa.feature.melspectrogram
|
||||
librosa.filters.mel
|
||||
librosa.feature.inverse.mel_to_stft
|
||||
"""
|
||||
stft = mel_to_stft(M, sr=sr, n_fft=n_fft, power=power, **kwargs)
|
||||
|
||||
return griffinlim(
|
||||
stft,
|
||||
n_iter=n_iter,
|
||||
hop_length=hop_length,
|
||||
win_length=win_length,
|
||||
n_fft=n_fft,
|
||||
window=window,
|
||||
center=center,
|
||||
dtype=dtype,
|
||||
length=length,
|
||||
pad_mode=pad_mode,
|
||||
)
|
||||
|
||||
|
||||
def mfcc_to_mel(
|
||||
mfcc: np.ndarray,
|
||||
*,
|
||||
n_mels: int = 128,
|
||||
dct_type: int = 2,
|
||||
norm: Optional[str] = "ortho",
|
||||
ref: float = 1.0,
|
||||
lifter: float = 0,
|
||||
) -> np.ndarray:
|
||||
"""Invert Mel-frequency cepstral coefficients to approximate a Mel power
|
||||
spectrogram.
|
||||
|
||||
This inversion proceeds in two steps:
|
||||
|
||||
1. The inverse DCT is applied to the MFCCs
|
||||
2. `librosa.db_to_power` is applied to map the dB-scaled result to a power spectrogram
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mfcc : np.ndarray [shape=(..., n_mfcc, n)]
|
||||
The Mel-frequency cepstral coefficients
|
||||
n_mels : int > 0
|
||||
The number of Mel frequencies
|
||||
dct_type : {1, 2, 3}
|
||||
Discrete cosine transform (DCT) type
|
||||
By default, DCT type-2 is used.
|
||||
norm : None or 'ortho'
|
||||
If ``dct_type`` is `2 or 3`, setting ``norm='ortho'`` uses an orthonormal
|
||||
DCT basis.
|
||||
Normalization is not supported for `dct_type=1`.
|
||||
ref : float
|
||||
Reference power for (inverse) decibel calculation
|
||||
lifter : number >= 0
|
||||
If ``lifter>0``, apply inverse liftering (inverse cepstral filtering)::
|
||||
M[n, :] <- M[n, :] / (1 + sin(pi * (n + 1) / lifter) * lifter / 2)
|
||||
|
||||
Returns
|
||||
-------
|
||||
M : np.ndarray [shape=(..., n_mels, n)]
|
||||
An approximate Mel power spectrum recovered from ``mfcc``
|
||||
|
||||
Warns
|
||||
-----
|
||||
UserWarning
|
||||
due to critical values in lifter array that invokes underflow.
|
||||
|
||||
See Also
|
||||
--------
|
||||
librosa.feature.mfcc
|
||||
librosa.feature.melspectrogram
|
||||
scipy.fft.dct
|
||||
"""
|
||||
if lifter > 0:
|
||||
n_mfcc = mfcc.shape[-2]
|
||||
idx = np.arange(1, 1 + n_mfcc, dtype=mfcc.dtype)
|
||||
idx = expand_to(idx, ndim=mfcc.ndim, axes=-2)
|
||||
lifter_sine = 1 + lifter * 0.5 * np.sin(np.pi * idx / lifter)
|
||||
|
||||
# raise a UserWarning if lifter array includes critical values
|
||||
if np.any(np.abs(lifter_sine) < np.finfo(lifter_sine.dtype).eps):
|
||||
warnings.warn(
|
||||
message="lifter array includes critical values that may invoke underflow.",
|
||||
category=UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
# lifter mfcc values
|
||||
mfcc = mfcc / (lifter_sine + tiny(mfcc))
|
||||
|
||||
elif lifter != 0:
|
||||
raise ParameterError("MFCC to mel lifter must be a non-negative number.")
|
||||
|
||||
fft = get_fftlib()
|
||||
logmel = fft.idct(mfcc, axis=-2, type=dct_type, norm=norm, n=n_mels)
|
||||
melspec: np.ndarray = db_to_power(logmel, ref=ref)
|
||||
return melspec
|
||||
|
||||
|
||||
def mfcc_to_audio(
|
||||
mfcc: np.ndarray,
|
||||
*,
|
||||
n_mels: int = 128,
|
||||
dct_type: int = 2,
|
||||
norm: Optional[str] = "ortho",
|
||||
ref: float = 1.0,
|
||||
lifter: float = 0,
|
||||
**kwargs: Any,
|
||||
) -> np.ndarray:
|
||||
"""Convert Mel-frequency cepstral coefficients to a time-domain audio signal
|
||||
|
||||
This function is primarily a convenience wrapper for the following steps:
|
||||
|
||||
1. Convert mfcc to Mel power spectrum (`mfcc_to_mel`)
|
||||
2. Convert Mel power spectrum to time-domain audio (`mel_to_audio`)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mfcc : np.ndarray [shape=(..., n_mfcc, n)]
|
||||
The Mel-frequency cepstral coefficients
|
||||
n_mels : int > 0
|
||||
The number of Mel frequencies
|
||||
dct_type : {1, 2, 3}
|
||||
Discrete cosine transform (DCT) type
|
||||
By default, DCT type-2 is used.
|
||||
norm : None or 'ortho'
|
||||
If ``dct_type`` is `2 or 3`, setting ``norm='ortho'`` uses an orthonormal
|
||||
DCT basis.
|
||||
Normalization is not supported for ``dct_type=1``.
|
||||
ref : float
|
||||
Reference power for (inverse) decibel calculation
|
||||
lifter : number >= 0
|
||||
If ``lifter>0``, apply inverse liftering (inverse cepstral filtering)::
|
||||
M[n, :] <- M[n, :] / (1 + sin(pi * (n + 1) / lifter)) * lifter / 2
|
||||
**kwargs : additional keyword arguments to pass through to `mel_to_audio`
|
||||
M : np.ndarray [shape=(..., n_mels, n), non-negative]
|
||||
The spectrogram as produced by `feature.melspectrogram`
|
||||
sr : number > 0 [scalar]
|
||||
sampling rate of the underlying signal
|
||||
n_fft : int > 0 [scalar]
|
||||
number of FFT components in the resulting STFT
|
||||
hop_length : None or int > 0
|
||||
The hop length of the STFT. If not provided, it will default to ``n_fft // 4``
|
||||
win_length : None or int > 0
|
||||
The window length of the STFT. By default, it will equal ``n_fft``
|
||||
window : string, tuple, number, function, or np.ndarray [shape=(n_fft,)]
|
||||
A window specification as supported by `stft` or `istft`
|
||||
center : boolean
|
||||
If `True`, the STFT is assumed to use centered frames.
|
||||
If `False`, the STFT is assumed to use left-aligned frames.
|
||||
pad_mode : string
|
||||
If ``center=True``, the padding mode to use at the edges of the signal.
|
||||
By default, STFT uses zero padding.
|
||||
power : float > 0 [scalar]
|
||||
Exponent for the magnitude melspectrogram
|
||||
n_iter : int > 0
|
||||
The number of iterations for Griffin-Lim
|
||||
length : None or int > 0
|
||||
If provided, the output ``y`` is zero-padded or clipped to exactly ``length``
|
||||
samples.
|
||||
dtype : np.dtype
|
||||
Real numeric type for the time-domain signal. Default is 32-bit float.
|
||||
**kwargs : additional keyword arguments for Mel filter bank parameters
|
||||
fmin : float >= 0 [scalar]
|
||||
lowest frequency (in Hz)
|
||||
fmax : float >= 0 [scalar]
|
||||
highest frequency (in Hz).
|
||||
If `None`, use ``fmax = sr / 2.0``
|
||||
htk : bool [scalar]
|
||||
use HTK formula instead of Slaney
|
||||
|
||||
Returns
|
||||
-------
|
||||
y : np.ndarray [shape=(..., n)]
|
||||
A time-domain signal reconstructed from `mfcc`
|
||||
|
||||
See Also
|
||||
--------
|
||||
mfcc_to_mel
|
||||
mel_to_audio
|
||||
librosa.feature.mfcc
|
||||
librosa.griffinlim
|
||||
scipy.fft.dct
|
||||
"""
|
||||
mel_spec = mfcc_to_mel(
|
||||
mfcc, n_mels=n_mels, dct_type=dct_type, norm=norm, ref=ref, lifter=lifter
|
||||
)
|
||||
|
||||
return mel_to_audio(mel_spec, **kwargs)
|
||||
@@ -0,0 +1,655 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Rhythmic feature extraction"""
|
||||
|
||||
import numpy as np
|
||||
import scipy
|
||||
|
||||
from .. import util
|
||||
|
||||
from .._cache import cache
|
||||
from ..core.audio import autocorrelate
|
||||
from ..core.spectrum import stft
|
||||
from ..core.convert import tempo_frequencies, time_to_frames
|
||||
from ..core.harmonic import f0_harmonics
|
||||
from ..util.exceptions import ParameterError
|
||||
from ..filters import get_window
|
||||
from typing import Optional, Callable, Any
|
||||
from .._typing import _WindowSpec
|
||||
|
||||
__all__ = ["tempogram", "fourier_tempogram", "tempo", "tempogram_ratio"]
|
||||
|
||||
|
||||
# -- Rhythmic features -- #
|
||||
def tempogram(
|
||||
*,
|
||||
y: Optional[np.ndarray] = None,
|
||||
sr: float = 22050,
|
||||
onset_envelope: Optional[np.ndarray] = None,
|
||||
hop_length: int = 512,
|
||||
win_length: int = 384,
|
||||
center: bool = True,
|
||||
window: _WindowSpec = "hann",
|
||||
norm: Optional[float] = np.inf,
|
||||
) -> np.ndarray:
|
||||
"""Compute the tempogram: local autocorrelation of the onset strength envelope. [#]_
|
||||
|
||||
.. [#] Grosche, Peter, Meinard Müller, and Frank Kurth.
|
||||
"Cyclic tempogram - A mid-level tempo representation for music signals."
|
||||
ICASSP, 2010.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : np.ndarray [shape=(..., n)] or None
|
||||
Audio time series. Multi-channel is supported.
|
||||
|
||||
sr : number > 0 [scalar]
|
||||
sampling rate of ``y``
|
||||
|
||||
onset_envelope : np.ndarray [shape=(..., n) or (..., m, n)] or None
|
||||
Optional pre-computed onset strength envelope as provided by
|
||||
`librosa.onset.onset_strength`.
|
||||
|
||||
If multi-dimensional, tempograms are computed independently for each
|
||||
band (first dimension).
|
||||
|
||||
hop_length : int > 0
|
||||
number of audio samples between successive onset measurements
|
||||
|
||||
win_length : int > 0
|
||||
length of the onset autocorrelation window (in frames/onset measurements)
|
||||
The default settings (384) corresponds to ``384 * hop_length / sr ~= 8.9s``.
|
||||
|
||||
center : bool
|
||||
If `True`, onset autocorrelation windows are centered.
|
||||
If `False`, windows are left-aligned.
|
||||
|
||||
window : string, function, number, tuple, or np.ndarray [shape=(win_length,)]
|
||||
A window specification as in `stft`.
|
||||
|
||||
norm : {np.inf, -np.inf, 0, float > 0, None}
|
||||
Normalization mode. Set to `None` to disable normalization.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tempogram : np.ndarray [shape=(..., win_length, n)]
|
||||
Localized autocorrelation of the onset strength envelope.
|
||||
|
||||
If given multi-band input (``onset_envelope.shape==(m,n)``) then
|
||||
``tempogram[i]`` is the tempogram of ``onset_envelope[i]``.
|
||||
|
||||
Raises
|
||||
------
|
||||
ParameterError
|
||||
if neither ``y`` nor ``onset_envelope`` are provided
|
||||
|
||||
if ``win_length < 1``
|
||||
|
||||
See Also
|
||||
--------
|
||||
fourier_tempogram
|
||||
librosa.onset.onset_strength
|
||||
librosa.util.normalize
|
||||
librosa.stft
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> # Compute local onset autocorrelation
|
||||
>>> y, sr = librosa.load(librosa.ex('nutcracker'), duration=30)
|
||||
>>> hop_length = 512
|
||||
>>> oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
|
||||
>>> tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr,
|
||||
... hop_length=hop_length)
|
||||
>>> # Compute global onset autocorrelation
|
||||
>>> ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])
|
||||
>>> ac_global = librosa.util.normalize(ac_global)
|
||||
>>> # Estimate the global tempo for display purposes
|
||||
>>> tempo = librosa.feature.tempo(onset_envelope=oenv, sr=sr,
|
||||
... hop_length=hop_length)[0]
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> fig, ax = plt.subplots(nrows=4, figsize=(10, 10))
|
||||
>>> times = librosa.times_like(oenv, sr=sr, hop_length=hop_length)
|
||||
>>> ax[0].plot(times, oenv, label='Onset strength')
|
||||
>>> ax[0].label_outer()
|
||||
>>> ax[0].legend(frameon=True)
|
||||
>>> librosa.display.specshow(tempogram, sr=sr, hop_length=hop_length,
|
||||
>>> x_axis='time', y_axis='tempo', cmap='magma',
|
||||
... ax=ax[1])
|
||||
>>> ax[1].axhline(tempo, color='w', linestyle='--', alpha=1,
|
||||
... label='Estimated tempo={:g}'.format(tempo))
|
||||
>>> ax[1].legend(loc='upper right')
|
||||
>>> ax[1].set(title='Tempogram')
|
||||
>>> x = np.linspace(0, tempogram.shape[0] * float(hop_length) / sr,
|
||||
... num=tempogram.shape[0])
|
||||
>>> ax[2].plot(x, np.mean(tempogram, axis=1), label='Mean local autocorrelation')
|
||||
>>> ax[2].plot(x, ac_global, '--', alpha=0.75, label='Global autocorrelation')
|
||||
>>> ax[2].set(xlabel='Lag (seconds)')
|
||||
>>> ax[2].legend(frameon=True)
|
||||
>>> freqs = librosa.tempo_frequencies(tempogram.shape[0], hop_length=hop_length, sr=sr)
|
||||
>>> ax[3].semilogx(freqs[1:], np.mean(tempogram[1:], axis=1),
|
||||
... label='Mean local autocorrelation', base=2)
|
||||
>>> ax[3].semilogx(freqs[1:], ac_global[1:], '--', alpha=0.75,
|
||||
... label='Global autocorrelation', base=2)
|
||||
>>> ax[3].axvline(tempo, color='black', linestyle='--', alpha=.8,
|
||||
... label='Estimated tempo={:g}'.format(tempo))
|
||||
>>> ax[3].legend(frameon=True)
|
||||
>>> ax[3].set(xlabel='BPM')
|
||||
>>> ax[3].grid(True)
|
||||
"""
|
||||
from ..onset import onset_strength
|
||||
|
||||
if win_length < 1:
|
||||
raise ParameterError("win_length must be a positive integer")
|
||||
|
||||
ac_window = get_window(window, win_length, fftbins=True)
|
||||
|
||||
if onset_envelope is None:
|
||||
if y is None:
|
||||
raise ParameterError("Either y or onset_envelope must be provided")
|
||||
|
||||
onset_envelope = onset_strength(y=y, sr=sr, hop_length=hop_length)
|
||||
|
||||
# Center the autocorrelation windows
|
||||
n = onset_envelope.shape[-1]
|
||||
|
||||
if center:
|
||||
padding = [(0, 0) for _ in onset_envelope.shape]
|
||||
padding[-1] = (int(win_length // 2),) * 2
|
||||
onset_envelope = np.pad(
|
||||
onset_envelope, padding, mode="linear_ramp", end_values=[0, 0]
|
||||
)
|
||||
|
||||
# Carve onset envelope into frames
|
||||
odf_frame = util.frame(onset_envelope, frame_length=win_length, hop_length=1)
|
||||
|
||||
# Truncate to the length of the original signal
|
||||
if center:
|
||||
odf_frame = odf_frame[..., :n]
|
||||
|
||||
# explicit broadcast of ac_window
|
||||
ac_window = util.expand_to(ac_window, ndim=odf_frame.ndim, axes=-2)
|
||||
|
||||
# Window, autocorrelate, and normalize
|
||||
return util.normalize(
|
||||
autocorrelate(odf_frame * ac_window, axis=-2), norm=norm, axis=-2
|
||||
)
|
||||
|
||||
|
||||
def fourier_tempogram(
|
||||
*,
|
||||
y: Optional[np.ndarray] = None,
|
||||
sr: float = 22050,
|
||||
onset_envelope: Optional[np.ndarray] = None,
|
||||
hop_length: int = 512,
|
||||
win_length: int = 384,
|
||||
center: bool = True,
|
||||
window: _WindowSpec = "hann",
|
||||
) -> np.ndarray:
|
||||
"""Compute the Fourier tempogram: the short-time Fourier transform of the
|
||||
onset strength envelope. [#]_
|
||||
|
||||
.. [#] Grosche, Peter, Meinard Müller, and Frank Kurth.
|
||||
"Cyclic tempogram - A mid-level tempo representation for music signals."
|
||||
ICASSP, 2010.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : np.ndarray [shape=(..., n)] or None
|
||||
Audio time series. Multi-channel is supported.
|
||||
sr : number > 0 [scalar]
|
||||
sampling rate of ``y``
|
||||
onset_envelope : np.ndarray [shape=(..., n)] or None
|
||||
Optional pre-computed onset strength envelope as provided by
|
||||
``librosa.onset.onset_strength``.
|
||||
Multi-channel is supported.
|
||||
hop_length : int > 0
|
||||
number of audio samples between successive onset measurements
|
||||
win_length : int > 0
|
||||
length of the onset window (in frames/onset measurements)
|
||||
The default settings (384) corresponds to ``384 * hop_length / sr ~= 8.9s``.
|
||||
center : bool
|
||||
If `True`, onset windows are centered.
|
||||
If `False`, windows are left-aligned.
|
||||
window : string, function, number, tuple, or np.ndarray [shape=(win_length,)]
|
||||
A window specification as in `stft`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tempogram : np.ndarray [shape=(..., win_length // 2 + 1, n)]
|
||||
Complex short-time Fourier transform of the onset envelope.
|
||||
|
||||
Raises
|
||||
------
|
||||
ParameterError
|
||||
if neither ``y`` nor ``onset_envelope`` are provided
|
||||
|
||||
if ``win_length < 1``
|
||||
|
||||
See Also
|
||||
--------
|
||||
tempogram
|
||||
librosa.onset.onset_strength
|
||||
librosa.util.normalize
|
||||
librosa.stft
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> # Compute local onset autocorrelation
|
||||
>>> y, sr = librosa.load(librosa.ex('nutcracker'))
|
||||
>>> hop_length = 512
|
||||
>>> oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
|
||||
>>> tempogram = librosa.feature.fourier_tempogram(onset_envelope=oenv, sr=sr,
|
||||
... hop_length=hop_length)
|
||||
>>> # Compute the auto-correlation tempogram, unnormalized to make comparison easier
|
||||
>>> ac_tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr,
|
||||
... hop_length=hop_length, norm=None)
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> fig, ax = plt.subplots(nrows=3, sharex=True)
|
||||
>>> ax[0].plot(librosa.times_like(oenv), oenv, label='Onset strength')
|
||||
>>> ax[0].legend(frameon=True)
|
||||
>>> ax[0].label_outer()
|
||||
>>> librosa.display.specshow(np.abs(tempogram), sr=sr, hop_length=hop_length,
|
||||
>>> x_axis='time', y_axis='fourier_tempo', cmap='magma',
|
||||
... ax=ax[1])
|
||||
>>> ax[1].set(title='Fourier tempogram')
|
||||
>>> ax[1].label_outer()
|
||||
>>> librosa.display.specshow(ac_tempogram, sr=sr, hop_length=hop_length,
|
||||
>>> x_axis='time', y_axis='tempo', cmap='magma',
|
||||
... ax=ax[2])
|
||||
>>> ax[2].set(title='Autocorrelation tempogram')
|
||||
"""
|
||||
from ..onset import onset_strength
|
||||
|
||||
if win_length < 1:
|
||||
raise ParameterError("win_length must be a positive integer")
|
||||
|
||||
if onset_envelope is None:
|
||||
if y is None:
|
||||
raise ParameterError("Either y or onset_envelope must be provided")
|
||||
|
||||
onset_envelope = onset_strength(y=y, sr=sr, hop_length=hop_length)
|
||||
|
||||
# Generate the short-time Fourier transform
|
||||
return stft(
|
||||
onset_envelope, n_fft=win_length, hop_length=1, center=center, window=window
|
||||
)
|
||||
|
||||
|
||||
@cache(level=30)
|
||||
def tempo(
|
||||
*,
|
||||
y: Optional[np.ndarray] = None,
|
||||
sr: float = 22050,
|
||||
onset_envelope: Optional[np.ndarray] = None,
|
||||
tg: Optional[np.ndarray] = None,
|
||||
hop_length: int = 512,
|
||||
start_bpm: float = 120,
|
||||
std_bpm: float = 1.0,
|
||||
ac_size: float = 8.0,
|
||||
max_tempo: Optional[float] = 320.0,
|
||||
aggregate: Optional[Callable[..., Any]] = np.mean,
|
||||
prior: Optional[scipy.stats.rv_continuous] = None,
|
||||
) -> np.ndarray:
|
||||
"""Estimate the tempo (beats per minute)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : np.ndarray [shape=(..., n)] or None
|
||||
audio time series. Multi-channel is supported.
|
||||
sr : number > 0 [scalar]
|
||||
sampling rate of the time series
|
||||
onset_envelope : np.ndarray [shape=(..., n)]
|
||||
pre-computed onset strength envelope
|
||||
tg : np.ndarray
|
||||
pre-computed tempogram. If provided, then `y` and
|
||||
`onset_envelope` are ignored, and `win_length` is
|
||||
inferred from the shape of the tempogram.
|
||||
hop_length : int > 0 [scalar]
|
||||
hop length of the time series
|
||||
start_bpm : float [scalar]
|
||||
initial guess of the BPM
|
||||
std_bpm : float > 0 [scalar]
|
||||
standard deviation of tempo distribution
|
||||
ac_size : float > 0 [scalar]
|
||||
length (in seconds) of the auto-correlation window
|
||||
max_tempo : float > 0 [scalar, optional]
|
||||
If provided, only estimate tempo below this threshold
|
||||
aggregate : callable [optional]
|
||||
Aggregation function for estimating global tempo.
|
||||
If `None`, then tempo is estimated independently for each frame.
|
||||
prior : scipy.stats.rv_continuous [optional]
|
||||
A prior distribution over tempo (in beats per minute).
|
||||
By default, a pseudo-log-normal prior is used.
|
||||
If given, ``start_bpm`` and ``std_bpm`` will be ignored.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tempo : np.ndarray
|
||||
estimated tempo (beats per minute).
|
||||
If input is multi-channel, one tempo estimate per channel is provided.
|
||||
|
||||
See Also
|
||||
--------
|
||||
librosa.onset.onset_strength
|
||||
librosa.feature.tempogram
|
||||
|
||||
Notes
|
||||
-----
|
||||
This function caches at level 30.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> # Estimate a static tempo
|
||||
>>> y, sr = librosa.load(librosa.ex('nutcracker'), duration=30)
|
||||
>>> onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
||||
>>> tempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr)
|
||||
>>> tempo
|
||||
array([143.555])
|
||||
|
||||
>>> # Or a static tempo with a uniform prior instead
|
||||
>>> import scipy.stats
|
||||
>>> prior = scipy.stats.uniform(30, 300) # uniform over 30-300 BPM
|
||||
>>> utempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr, prior=prior)
|
||||
>>> utempo
|
||||
array([161.499])
|
||||
|
||||
>>> # Or a dynamic tempo
|
||||
>>> dtempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr,
|
||||
... aggregate=None)
|
||||
>>> dtempo
|
||||
array([ 89.103, 89.103, 89.103, ..., 123.047, 123.047, 123.047])
|
||||
|
||||
>>> # Dynamic tempo with a proper log-normal prior
|
||||
>>> prior_lognorm = scipy.stats.lognorm(loc=np.log(120), scale=120, s=1)
|
||||
>>> dtempo_lognorm = librosa.feature.tempo(onset_envelope=onset_env, sr=sr,
|
||||
... aggregate=None,
|
||||
... prior=prior_lognorm)
|
||||
>>> dtempo_lognorm
|
||||
array([ 89.103, 89.103, 89.103, ..., 123.047, 123.047, 123.047])
|
||||
|
||||
Plot the estimated tempo against the onset autocorrelation
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> # Convert to scalar
|
||||
>>> tempo = tempo.item()
|
||||
>>> utempo = utempo.item()
|
||||
>>> # Compute 2-second windowed autocorrelation
|
||||
>>> hop_length = 512
|
||||
>>> ac = librosa.autocorrelate(onset_env, max_size=2 * sr // hop_length)
|
||||
>>> freqs = librosa.tempo_frequencies(len(ac), sr=sr,
|
||||
... hop_length=hop_length)
|
||||
>>> # Plot on a BPM axis. We skip the first (0-lag) bin.
|
||||
>>> fig, ax = plt.subplots()
|
||||
>>> ax.semilogx(freqs[1:], librosa.util.normalize(ac)[1:],
|
||||
... label='Onset autocorrelation', base=2)
|
||||
>>> ax.axvline(tempo, 0, 1, alpha=0.75, linestyle='--', color='r',
|
||||
... label='Tempo (default prior): {:.2f} BPM'.format(tempo))
|
||||
>>> ax.axvline(utempo, 0, 1, alpha=0.75, linestyle=':', color='g',
|
||||
... label='Tempo (uniform prior): {:.2f} BPM'.format(utempo))
|
||||
>>> ax.set(xlabel='Tempo (BPM)', title='Static tempo estimation')
|
||||
>>> ax.grid(True)
|
||||
>>> ax.legend()
|
||||
|
||||
Plot dynamic tempo estimates over a tempogram
|
||||
|
||||
>>> fig, ax = plt.subplots()
|
||||
>>> tg = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr,
|
||||
... hop_length=hop_length)
|
||||
>>> librosa.display.specshow(tg, x_axis='time', y_axis='tempo', cmap='magma', ax=ax)
|
||||
>>> ax.plot(librosa.times_like(dtempo), dtempo,
|
||||
... color='c', linewidth=1.5, label='Tempo estimate (default prior)')
|
||||
>>> ax.plot(librosa.times_like(dtempo_lognorm), dtempo_lognorm,
|
||||
... color='c', linewidth=1.5, linestyle='--',
|
||||
... label='Tempo estimate (lognorm prior)')
|
||||
>>> ax.set(title='Dynamic tempo estimation')
|
||||
>>> ax.legend()
|
||||
"""
|
||||
if start_bpm <= 0:
|
||||
raise ParameterError("start_bpm must be strictly positive")
|
||||
|
||||
if tg is None:
|
||||
win_length = time_to_frames(ac_size, sr=sr, hop_length=hop_length).item()
|
||||
|
||||
tg = tempogram(
|
||||
y=y,
|
||||
sr=sr,
|
||||
onset_envelope=onset_envelope,
|
||||
hop_length=hop_length,
|
||||
win_length=win_length,
|
||||
)
|
||||
else:
|
||||
# Override window length by what's actually given
|
||||
win_length = tg.shape[-2]
|
||||
|
||||
# Eventually, we want this to work for time-varying tempo
|
||||
if aggregate is not None:
|
||||
tg = aggregate(tg, axis=-1, keepdims=True)
|
||||
|
||||
assert tg is not None
|
||||
|
||||
# Get the BPM values for each bin, skipping the 0-lag bin
|
||||
bpms = tempo_frequencies(win_length, hop_length=hop_length, sr=sr)
|
||||
|
||||
# Weight the autocorrelation by a log-normal distribution
|
||||
if prior is None:
|
||||
logprior = -0.5 * ((np.log2(bpms) - np.log2(start_bpm)) / std_bpm) ** 2
|
||||
else:
|
||||
logprior = prior.logpdf(bpms)
|
||||
|
||||
# Kill everything above the max tempo
|
||||
if max_tempo is not None:
|
||||
max_idx = int(np.argmax(bpms < max_tempo))
|
||||
logprior[:max_idx] = -np.inf
|
||||
# explicit axis expansion
|
||||
logprior = util.expand_to(logprior, ndim=tg.ndim, axes=-2)
|
||||
|
||||
# Get the maximum, weighted by the prior
|
||||
# Using log1p here for numerical stability
|
||||
best_period = np.argmax(np.log1p(1e6 * tg) + logprior, axis=-2)
|
||||
|
||||
tempo_est: np.ndarray = np.take(bpms, best_period)
|
||||
return tempo_est
|
||||
|
||||
|
||||
@cache(level=40)
|
||||
def tempogram_ratio(
|
||||
*,
|
||||
y: Optional[np.ndarray] = None,
|
||||
sr: float = 22050,
|
||||
onset_envelope: Optional[np.ndarray] = None,
|
||||
tg: Optional[np.ndarray] = None,
|
||||
bpm: Optional[np.ndarray] = None,
|
||||
hop_length: int = 512,
|
||||
win_length: int = 384,
|
||||
start_bpm: float = 120,
|
||||
std_bpm: float = 1.0,
|
||||
max_tempo: Optional[float] = 320.0,
|
||||
freqs: Optional[np.ndarray] = None,
|
||||
factors: Optional[np.ndarray] = None,
|
||||
aggregate: Optional[Callable[..., Any]] = None,
|
||||
prior: Optional[scipy.stats.rv_continuous] = None,
|
||||
center: bool = True,
|
||||
window: _WindowSpec = "hann",
|
||||
kind: str = "linear",
|
||||
fill_value: float = 0,
|
||||
norm: Optional[float] = np.inf,
|
||||
) -> np.ndarray:
|
||||
"""Tempogram ratio features, also known as spectral rhythm patterns. [1]_
|
||||
|
||||
This function summarizes the energy at metrically important multiples
|
||||
of the tempo. For example, if the tempo corresponds to the quarter-note
|
||||
period, the tempogram ratio will measure the energy at the eighth note,
|
||||
sixteenth note, half note, whole note, etc. periods, as well as dotted
|
||||
and triplet ratios.
|
||||
|
||||
By default, the multiplicative factors used here are as specified by
|
||||
[2]_. If the estimated tempo corresponds to a quarter note, these factors
|
||||
will measure relative energy at the following metrical subdivisions:
|
||||
|
||||
+-------+--------+------------------+
|
||||
| Index | Factor | Description |
|
||||
+=======+========+==================+
|
||||
| 0 | 4 | Sixteenth note |
|
||||
+-------+--------+------------------+
|
||||
| 1 | 8/3 | Dotted sixteenth |
|
||||
+-------+--------+------------------+
|
||||
| 2 | 3 | Eighth triplet |
|
||||
+-------+--------+------------------+
|
||||
| 3 | 2 | Eighth note |
|
||||
+-------+--------+------------------+
|
||||
| 4 | 4/3 | Dotted eighth |
|
||||
+-------+--------+------------------+
|
||||
| 5 | 3/2 | Quarter triplet |
|
||||
+-------+--------+------------------+
|
||||
| 6 | 1 | Quarter note |
|
||||
+-------+--------+------------------+
|
||||
| 7 | 2/3 | Dotted quarter |
|
||||
+-------+--------+------------------+
|
||||
| 8 | 3/4 | Half triplet |
|
||||
+-------+--------+------------------+
|
||||
| 9 | 1/2 | Half note |
|
||||
+-------+--------+------------------+
|
||||
| 10 | 1/3 | Dotted half note |
|
||||
+-------+--------+------------------+
|
||||
| 11 | 3/8 | Whole triplet |
|
||||
+-------+--------+------------------+
|
||||
| 12 | 1/4 | Whole note |
|
||||
+-------+--------+------------------+
|
||||
|
||||
.. [1] Peeters, Geoffroy.
|
||||
"Rhythm Classification Using Spectral Rhythm Patterns."
|
||||
In ISMIR, pp. 644-647. 2005.
|
||||
|
||||
.. [2] Prockup, Matthew, Andreas F. Ehmann, Fabien Gouyon, Erik M. Schmidt, and Youngmoo E. Kim.
|
||||
"Modeling musical rhythm at scale with the music genome project."
|
||||
In 2015 IEEE workshop on applications of signal processing to audio and acoustics (WASPAA), pp. 1-5. IEEE, 2015.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : np.ndarray [shape=(..., n)] or None
|
||||
audio time series
|
||||
sr : number > 0 [scalar]
|
||||
sampling rate of the time series
|
||||
onset_envelope : np.ndarray [shape=(..., n)]
|
||||
pre-computed onset strength envelope
|
||||
tg : np.ndarray
|
||||
pre-computed tempogram. If provided, then `y` and
|
||||
`onset_envelope` are ignored, and `win_length` is
|
||||
inferred from the shape of the tempogram.
|
||||
bpm : np.ndarray
|
||||
pre-computed tempo estimate. This must be a per-frame
|
||||
estimate, and have dimension compatible with `tg`.
|
||||
hop_length : int > 0 [scalar]
|
||||
hop length of the time series
|
||||
win_length : int > 0 [scalar]
|
||||
window length of the autocorrelation window for tempogram
|
||||
calculation
|
||||
start_bpm : float [scalar]
|
||||
initial guess of the BPM if `bpm` is not provided
|
||||
std_bpm : float > 0 [scalar]
|
||||
standard deviation of tempo distribution
|
||||
max_tempo : float > 0 [scalar, optional]
|
||||
If provided, only estimate tempo below this threshold
|
||||
freqs : np.ndarray
|
||||
Frequencies (in BPM) of the tempogram axis.
|
||||
factors : np.ndarray
|
||||
Multiples of the fundamental tempo (bpm) to estimate.
|
||||
If not provided, the factors are as specified above.
|
||||
prior : scipy.stats.rv_continuous [optional]
|
||||
A prior distribution over tempo (in beats per minute).
|
||||
By default, a pseudo-log-normal prior is used.
|
||||
If given, ``start_bpm`` and ``std_bpm`` will be ignored.
|
||||
center : bool
|
||||
If `True`, onset windows are centered.
|
||||
If `False`, windows are left-aligned.
|
||||
aggregate : callable [optional]
|
||||
Aggregation function for estimating global tempogram ratio.
|
||||
If `None`, then ratios are estimated independently for each frame.
|
||||
window : string, function, number, tuple, or np.ndarray [shape=(win_length,)]
|
||||
A window specification as in `stft`.
|
||||
kind : str
|
||||
Interpolation mode for measuring tempogram ratios
|
||||
fill_value : float
|
||||
The value to fill when extrapolating beyond the observed
|
||||
frequency range.
|
||||
norm : {np.inf, -np.inf, 0, float > 0, None}
|
||||
Normalization mode. Set to `None` to disable normalization.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tgr : np.ndarray
|
||||
The tempogram ratio for the specified factors.
|
||||
If `aggregate` is provided, the trailing time axis
|
||||
will be removed.
|
||||
If `aggregate` is not provided (default), ratios
|
||||
will be estimated for each frame.
|
||||
|
||||
See Also
|
||||
--------
|
||||
tempogram
|
||||
tempo
|
||||
librosa.f0_harmonics
|
||||
librosa.tempo_frequencies
|
||||
|
||||
Examples
|
||||
--------
|
||||
Compute tempogram ratio features using the default factors
|
||||
for a waltz (3/4 time)
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> y, sr = librosa.load(librosa.ex('sweetwaltz'))
|
||||
>>> tempogram = librosa.feature.tempogram(y=y, sr=sr)
|
||||
>>> tgr = librosa.feature.tempogram_ratio(tg=tempogram, sr=sr)
|
||||
>>> fig, ax = plt.subplots(nrows=2, sharex=True)
|
||||
>>> librosa.display.specshow(tempogram, x_axis='time', y_axis='tempo',
|
||||
... ax=ax[0])
|
||||
>>> librosa.display.specshow(tgr, x_axis='time', ax=ax[1])
|
||||
>>> ax[0].label_outer()
|
||||
>>> ax[0].set(title="Tempogram")
|
||||
>>> ax[1].set(title="Tempogram ratio")
|
||||
"""
|
||||
# Get a tempogram and time-varying tempo estimate
|
||||
if tg is None:
|
||||
tg = tempogram(
|
||||
y=y,
|
||||
sr=sr,
|
||||
onset_envelope=onset_envelope,
|
||||
hop_length=hop_length,
|
||||
win_length=win_length,
|
||||
center=center,
|
||||
window=window,
|
||||
norm=norm,
|
||||
)
|
||||
|
||||
if freqs is None:
|
||||
freqs = tempo_frequencies(sr=sr, n_bins=len(tg), hop_length=hop_length)
|
||||
|
||||
# Estimate tempo per-frame, no aggregation yet
|
||||
if bpm is None:
|
||||
bpm = tempo(
|
||||
sr=sr,
|
||||
tg=tg,
|
||||
hop_length=hop_length,
|
||||
start_bpm=start_bpm,
|
||||
std_bpm=std_bpm,
|
||||
max_tempo=max_tempo,
|
||||
aggregate=None,
|
||||
prior=prior,
|
||||
)
|
||||
|
||||
if factors is None:
|
||||
# metric multiples from Prockup'15
|
||||
factors = np.array(
|
||||
[4, 8 / 3, 3, 2, 4 / 3, 3 / 2, 1, 2 / 3, 3 / 4, 1 / 2, 1 / 3, 3 / 8, 1 / 4]
|
||||
)
|
||||
|
||||
tgr = f0_harmonics(
|
||||
tg, freqs=freqs, f0=bpm, harmonics=factors, kind=kind, fill_value=fill_value
|
||||
)
|
||||
|
||||
if aggregate is not None:
|
||||
return aggregate(tgr, axis=-1) # type: ignore
|
||||
|
||||
return tgr
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,310 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Feature manipulation utilities"""
|
||||
|
||||
import numpy as np
|
||||
import scipy.signal
|
||||
from numba import jit
|
||||
|
||||
from .._cache import cache
|
||||
from ..util.exceptions import ParameterError
|
||||
from typing import Any
|
||||
|
||||
__all__ = ["delta", "stack_memory"]
|
||||
|
||||
|
||||
@cache(level=40)
|
||||
def delta(
|
||||
data: np.ndarray,
|
||||
*,
|
||||
width: int = 9,
|
||||
order: int = 1,
|
||||
axis: int = -1,
|
||||
mode: str = "interp",
|
||||
**kwargs: Any,
|
||||
) -> np.ndarray:
|
||||
r"""Compute delta features: local estimate of the derivative
|
||||
of the input data along the selected axis.
|
||||
|
||||
Delta features are computed Savitsky-Golay filtering.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : np.ndarray
|
||||
the input data matrix (eg, spectrogram)
|
||||
|
||||
width : int, positive, odd [scalar]
|
||||
Number of frames over which to compute the delta features.
|
||||
Cannot exceed the length of ``data`` along the specified axis.
|
||||
|
||||
If ``mode='interp'``, then ``width`` must be at least ``data.shape[axis]``.
|
||||
|
||||
order : int > 0 [scalar]
|
||||
the order of the difference operator.
|
||||
1 for first derivative, 2 for second, etc.
|
||||
|
||||
axis : int [scalar]
|
||||
the axis along which to compute deltas.
|
||||
Default is -1 (columns).
|
||||
|
||||
mode : str, {'interp', 'nearest', 'mirror', 'constant', 'wrap'}
|
||||
Padding mode for estimating differences at the boundaries.
|
||||
|
||||
**kwargs : additional keyword arguments
|
||||
See `scipy.signal.savgol_filter`
|
||||
|
||||
Returns
|
||||
-------
|
||||
delta_data : np.ndarray [shape=(..., t)]
|
||||
delta matrix of ``data`` at specified order
|
||||
|
||||
Notes
|
||||
-----
|
||||
This function caches at level 40.
|
||||
|
||||
See Also
|
||||
--------
|
||||
scipy.signal.savgol_filter
|
||||
|
||||
Examples
|
||||
--------
|
||||
Compute MFCC deltas, delta-deltas
|
||||
|
||||
>>> y, sr = librosa.load(librosa.ex('libri1'), duration=5)
|
||||
>>> mfcc = librosa.feature.mfcc(y=y, sr=sr)
|
||||
>>> mfcc_delta = librosa.feature.delta(mfcc)
|
||||
>>> mfcc_delta
|
||||
array([[-5.713e+02, -5.697e+02, ..., -1.522e+02, -1.224e+02],
|
||||
[ 1.104e+01, 1.330e+01, ..., 2.089e+02, 1.698e+02],
|
||||
...,
|
||||
[ 2.829e+00, 1.933e+00, ..., -3.149e+00, 2.294e-01],
|
||||
[ 2.890e+00, 2.187e+00, ..., 6.959e+00, -1.039e+00]],
|
||||
dtype=float32)
|
||||
|
||||
>>> mfcc_delta2 = librosa.feature.delta(mfcc, order=2)
|
||||
>>> mfcc_delta2
|
||||
array([[-1.195, -1.195, ..., -4.328, -4.328],
|
||||
[-1.566, -1.566, ..., -9.949, -9.949],
|
||||
...,
|
||||
[ 0.707, 0.707, ..., 2.287, 2.287],
|
||||
[ 0.655, 0.655, ..., -1.719, -1.719]], dtype=float32)
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> fig, ax = plt.subplots(nrows=3, sharex=True, sharey=True)
|
||||
>>> img1 = librosa.display.specshow(mfcc, ax=ax[0], x_axis='time')
|
||||
>>> ax[0].set(title='MFCC')
|
||||
>>> ax[0].label_outer()
|
||||
>>> img2 = librosa.display.specshow(mfcc_delta, ax=ax[1], x_axis='time')
|
||||
>>> ax[1].set(title=r'MFCC-$\Delta$')
|
||||
>>> ax[1].label_outer()
|
||||
>>> img3 = librosa.display.specshow(mfcc_delta2, ax=ax[2], x_axis='time')
|
||||
>>> ax[2].set(title=r'MFCC-$\Delta^2$')
|
||||
>>> fig.colorbar(img1, ax=[ax[0]])
|
||||
>>> fig.colorbar(img2, ax=[ax[1]])
|
||||
>>> fig.colorbar(img3, ax=[ax[2]])
|
||||
"""
|
||||
data = np.atleast_1d(data)
|
||||
|
||||
if mode == "interp" and width > data.shape[axis]:
|
||||
raise ParameterError(
|
||||
f"when mode='interp', width={width} "
|
||||
f"cannot exceed data.shape[axis]={data.shape[axis]}"
|
||||
)
|
||||
|
||||
if width < 3 or np.mod(width, 2) != 1:
|
||||
raise ParameterError("width must be an odd integer >= 3")
|
||||
|
||||
if order <= 0 or not isinstance(order, (int, np.integer)):
|
||||
raise ParameterError("order must be a positive integer")
|
||||
|
||||
kwargs.pop("deriv", None)
|
||||
kwargs.setdefault("polyorder", order)
|
||||
result: np.ndarray = scipy.signal.savgol_filter(
|
||||
data, width, deriv=order, axis=axis, mode=mode, **kwargs
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
@cache(level=40)
|
||||
def stack_memory(
|
||||
data: np.ndarray, *, n_steps: int = 2, delay: int = 1, **kwargs: Any
|
||||
) -> np.ndarray:
|
||||
"""Short-term history embedding: vertically concatenate a data
|
||||
vector or matrix with delayed copies of itself.
|
||||
|
||||
Each column ``data[:, i]`` is mapped to::
|
||||
|
||||
data[..., i] -> [data[..., i],
|
||||
data[..., i - delay],
|
||||
...
|
||||
data[..., i - (n_steps-1)*delay]]
|
||||
|
||||
For columns ``i < (n_steps - 1) * delay``, the data will be padded.
|
||||
By default, the data is padded with zeros, but this behavior can be
|
||||
overridden by supplying additional keyword arguments which are passed
|
||||
to `np.pad()`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : np.ndarray [shape=(..., d, t)]
|
||||
Input data matrix. If ``data`` is a vector (``data.ndim == 1``),
|
||||
it will be interpreted as a row matrix and reshaped to ``(1, t)``.
|
||||
|
||||
n_steps : int > 0 [scalar]
|
||||
embedding dimension, the number of steps back in time to stack
|
||||
|
||||
delay : int != 0 [scalar]
|
||||
the number of columns to step.
|
||||
|
||||
Positive values embed from the past (previous columns).
|
||||
|
||||
Negative values embed from the future (subsequent columns).
|
||||
|
||||
**kwargs : additional keyword arguments
|
||||
Additional arguments to pass to `numpy.pad`
|
||||
|
||||
Returns
|
||||
-------
|
||||
data_history : np.ndarray [shape=(..., m * d, t)]
|
||||
data augmented with lagged copies of itself,
|
||||
where ``m == n_steps - 1``.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This function caches at level 40.
|
||||
|
||||
Examples
|
||||
--------
|
||||
Keep two steps (current and previous)
|
||||
|
||||
>>> data = np.arange(-3, 3)
|
||||
>>> librosa.feature.stack_memory(data)
|
||||
array([[-3, -2, -1, 0, 1, 2],
|
||||
[ 0, -3, -2, -1, 0, 1]])
|
||||
|
||||
Or three steps
|
||||
|
||||
>>> librosa.feature.stack_memory(data, n_steps=3)
|
||||
array([[-3, -2, -1, 0, 1, 2],
|
||||
[ 0, -3, -2, -1, 0, 1],
|
||||
[ 0, 0, -3, -2, -1, 0]])
|
||||
|
||||
Use reflection padding instead of zero-padding
|
||||
|
||||
>>> librosa.feature.stack_memory(data, n_steps=3, mode='reflect')
|
||||
array([[-3, -2, -1, 0, 1, 2],
|
||||
[-2, -3, -2, -1, 0, 1],
|
||||
[-1, -2, -3, -2, -1, 0]])
|
||||
|
||||
Or pad with edge-values, and delay by 2
|
||||
|
||||
>>> librosa.feature.stack_memory(data, n_steps=3, delay=2, mode='edge')
|
||||
array([[-3, -2, -1, 0, 1, 2],
|
||||
[-3, -3, -3, -2, -1, 0],
|
||||
[-3, -3, -3, -3, -3, -2]])
|
||||
|
||||
Stack time-lagged beat-synchronous chroma edge padding
|
||||
|
||||
>>> y, sr = librosa.load(librosa.ex('sweetwaltz'), duration=10)
|
||||
>>> chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
|
||||
>>> tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length=512)
|
||||
>>> beats = librosa.util.fix_frames(beats, x_min=0)
|
||||
>>> chroma_sync = librosa.util.sync(chroma, beats)
|
||||
>>> chroma_lag = librosa.feature.stack_memory(chroma_sync, n_steps=3,
|
||||
... mode='edge')
|
||||
|
||||
Plot the result
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> fig, ax = plt.subplots()
|
||||
>>> beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=512)
|
||||
>>> librosa.display.specshow(chroma_lag, y_axis='chroma', x_axis='time',
|
||||
... x_coords=beat_times, ax=ax)
|
||||
>>> ax.text(1.0, 1/6, "Lag=0", transform=ax.transAxes, rotation=-90, ha="left", va="center")
|
||||
>>> ax.text(1.0, 3/6, "Lag=1", transform=ax.transAxes, rotation=-90, ha="left", va="center")
|
||||
>>> ax.text(1.0, 5/6, "Lag=2", transform=ax.transAxes, rotation=-90, ha="left", va="center")
|
||||
>>> ax.set(title='Time-lagged chroma', ylabel="")
|
||||
"""
|
||||
if n_steps < 1:
|
||||
raise ParameterError("n_steps must be a positive integer")
|
||||
|
||||
if delay == 0:
|
||||
raise ParameterError("delay must be a non-zero integer")
|
||||
|
||||
data = np.atleast_2d(data)
|
||||
t = data.shape[-1]
|
||||
|
||||
if t < 1:
|
||||
raise ParameterError(
|
||||
"Cannot stack memory when input data has "
|
||||
f"no columns. Given data.shape={data.shape}"
|
||||
)
|
||||
kwargs.setdefault("mode", "constant")
|
||||
|
||||
if kwargs["mode"] == "constant":
|
||||
kwargs.setdefault("constant_values", [0])
|
||||
|
||||
padding = [(0, 0) for _ in range(data.ndim)]
|
||||
|
||||
# Pad the end with zeros, which will roll to the front below
|
||||
if delay > 0:
|
||||
padding[-1] = (int((n_steps - 1) * delay), 0)
|
||||
else:
|
||||
padding[-1] = (0, int((n_steps - 1) * -delay))
|
||||
|
||||
data = np.pad(data, padding, **kwargs)
|
||||
|
||||
# Construct the shape of the target array
|
||||
shape = list(data.shape)
|
||||
shape[-2] = shape[-2] * n_steps
|
||||
shape[-1] = t
|
||||
shape = tuple(shape)
|
||||
|
||||
# Construct the output array to match layout and dtype of input
|
||||
history = np.empty_like(data, shape=shape)
|
||||
|
||||
# Populate the output array
|
||||
__stack(history, data, n_steps, delay)
|
||||
|
||||
return history
|
||||
|
||||
|
||||
@jit(nopython=True, cache=True)
|
||||
def __stack(history, data, n_steps, delay):
|
||||
"""Memory-stacking helper function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
history : output array (2-dimensional)
|
||||
data : pre-padded input array (2-dimensional)
|
||||
n_steps : int > 0, the number of steps to stack
|
||||
delay : int != 0, the amount of delay between steps
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
Output is stored directly in the history array
|
||||
"""
|
||||
# Dimension of each copy of the data
|
||||
d = data.shape[-2]
|
||||
|
||||
# Total number of time-steps to output
|
||||
t = history.shape[-1]
|
||||
|
||||
if delay > 0:
|
||||
for step in range(n_steps):
|
||||
q = n_steps - 1 - step
|
||||
# nth block is original shifted left by n*delay steps
|
||||
history[..., step * d : (step + 1) * d, :] = data[
|
||||
..., q * delay : q * delay + t
|
||||
]
|
||||
else:
|
||||
# Handle the last block separately to avoid -t:0 empty slices
|
||||
history[..., -d:, :] = data[..., -t:]
|
||||
|
||||
for step in range(n_steps - 1):
|
||||
# nth block is original shifted right by n*delay steps
|
||||
q = n_steps - 1 - step
|
||||
history[..., step * d : (step + 1) * d, :] = data[
|
||||
..., -t + q * delay : q * delay
|
||||
]
|
||||
1669
linedance-app/venv/lib/python3.12/site-packages/librosa/filters.py
Normal file
1669
linedance-app/venv/lib/python3.12/site-packages/librosa/filters.py
Normal file
File diff suppressed because it is too large
Load Diff
641
linedance-app/venv/lib/python3.12/site-packages/librosa/onset.py
Normal file
641
linedance-app/venv/lib/python3.12/site-packages/librosa/onset.py
Normal file
@@ -0,0 +1,641 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Onset detection
|
||||
===============
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
onset_detect
|
||||
onset_backtrack
|
||||
onset_strength
|
||||
onset_strength_multi
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import scipy
|
||||
|
||||
from ._cache import cache
|
||||
from . import core
|
||||
from . import util
|
||||
from .util.exceptions import ParameterError
|
||||
|
||||
from .feature.spectral import melspectrogram
|
||||
from typing import Any, Callable, Optional, Union, Sequence
|
||||
|
||||
__all__ = ["onset_detect", "onset_strength", "onset_strength_multi", "onset_backtrack"]
|
||||
|
||||
|
||||
def onset_detect(
|
||||
*,
|
||||
y: Optional[np.ndarray] = None,
|
||||
sr: float = 22050,
|
||||
onset_envelope: Optional[np.ndarray] = None,
|
||||
hop_length: int = 512,
|
||||
backtrack: bool = False,
|
||||
energy: Optional[np.ndarray] = None,
|
||||
units: str = "frames",
|
||||
normalize: bool = True,
|
||||
sparse: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> np.ndarray:
|
||||
"""Locate note onset events by picking peaks in an onset strength envelope.
|
||||
|
||||
The `peak_pick` parameters were chosen by large-scale hyper-parameter
|
||||
optimization over the dataset provided by [#]_.
|
||||
|
||||
.. [#] https://github.com/CPJKU/onset_db
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : np.ndarray [shape=(..., n)]
|
||||
audio time-series. Multi-channel is supported.
|
||||
|
||||
sr : number > 0 [scalar]
|
||||
sampling rate of ``y``
|
||||
|
||||
onset_envelope : np.ndarray [shape=(..., m)]
|
||||
(optional) pre-computed onset strength envelope
|
||||
|
||||
hop_length : int > 0 [scalar]
|
||||
hop length (in samples)
|
||||
|
||||
units : {'frames', 'samples', 'time'}
|
||||
The units to encode detected onset events in.
|
||||
By default, 'frames' are used.
|
||||
|
||||
backtrack : bool
|
||||
If ``True``, detected onset events are backtracked to the nearest
|
||||
preceding minimum of ``energy``.
|
||||
|
||||
This is primarily useful when using onsets as slice points for segmentation.
|
||||
|
||||
.. note:: backtracking is only supported if ``sparse=True``.
|
||||
|
||||
energy : np.ndarray [shape=(m,)] (optional)
|
||||
An energy function to use for backtracking detected onset events.
|
||||
If none is provided, then ``onset_envelope`` is used.
|
||||
|
||||
normalize : bool
|
||||
If ``True`` (default), normalize the onset envelope to have minimum of 0 and
|
||||
maximum of 1 prior to detection. This is helpful for standardizing the
|
||||
parameters of `librosa.util.peak_pick`.
|
||||
|
||||
Otherwise, the onset envelope is left unnormalized.
|
||||
|
||||
sparse : bool
|
||||
If ``True`` (default), detections are returned as an array of frames,
|
||||
samples, or time indices (as specified by ``units=``).
|
||||
|
||||
If ``False``, detections are encoded as a dense boolean array where
|
||||
``onsets[n]`` is True if there's an onset at frame index ``n``.
|
||||
|
||||
.. note:: multi-channel input is only supported if ``sparse=False``.
|
||||
|
||||
**kwargs : additional keyword arguments
|
||||
Additional parameters for peak picking.
|
||||
|
||||
See `librosa.util.peak_pick` for details.
|
||||
|
||||
Returns
|
||||
-------
|
||||
onsets : np.ndarray [shape=(n_onsets,) or onset_envelope.shape]
|
||||
estimated positions of detected onsets, in whichever units
|
||||
are specified. By default, frame indices.
|
||||
|
||||
If `sparse=False`, `onsets[..., n]` indicates an onset
|
||||
detection at frame index `n`.
|
||||
|
||||
.. note::
|
||||
If no onset strength could be detected, onset_detect returns
|
||||
an empty array (sparse=True) or all-False array (sparse=False).
|
||||
|
||||
Raises
|
||||
------
|
||||
ParameterError
|
||||
if neither ``y`` nor ``onsets`` are provided
|
||||
|
||||
or if ``units`` is not one of 'frames', 'samples', or 'time'
|
||||
|
||||
See Also
|
||||
--------
|
||||
onset_strength : compute onset strength per-frame
|
||||
onset_backtrack : backtracking onset events
|
||||
librosa.util.peak_pick : pick peaks from a time series
|
||||
|
||||
Examples
|
||||
--------
|
||||
Get onset times from a signal
|
||||
|
||||
>>> y, sr = librosa.load(librosa.ex('trumpet'))
|
||||
>>> librosa.onset.onset_detect(y=y, sr=sr, units='time')
|
||||
array([0.07 , 0.232, 0.395, 0.604, 0.743, 0.929, 1.045, 1.115,
|
||||
1.416, 1.672, 1.881, 2.043, 2.206, 2.368, 2.554, 3.019])
|
||||
|
||||
Or use a pre-computed onset envelope
|
||||
|
||||
>>> o_env = librosa.onset.onset_strength(y=y, sr=sr)
|
||||
>>> times = librosa.times_like(o_env, sr=sr)
|
||||
>>> onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> D = np.abs(librosa.stft(y))
|
||||
>>> fig, ax = plt.subplots(nrows=2, sharex=True)
|
||||
>>> librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
|
||||
... x_axis='time', y_axis='log', ax=ax[0], sr=sr)
|
||||
>>> ax[0].set(title='Power spectrogram')
|
||||
>>> ax[0].label_outer()
|
||||
>>> ax[1].plot(times, o_env, label='Onset strength')
|
||||
>>> ax[1].vlines(times[onset_frames], 0, o_env.max(), color='r', alpha=0.9,
|
||||
... linestyle='--', label='Onsets')
|
||||
>>> ax[1].legend()
|
||||
"""
|
||||
# First, get the frame->beat strength profile if we don't already have one
|
||||
if onset_envelope is None:
|
||||
if y is None:
|
||||
raise ParameterError("y or onset_envelope must be provided")
|
||||
|
||||
onset_envelope = onset_strength(y=y, sr=sr, hop_length=hop_length)
|
||||
|
||||
# Shift onset envelope up to be non-negative
|
||||
# (a common normalization step to make the threshold more consistent)
|
||||
if normalize:
|
||||
# Normalize onset strength function to [0, 1] range
|
||||
# Normalization is performed over the trailing axis
|
||||
onset_envelope = onset_envelope - np.min(onset_envelope, keepdims=True, axis=-1)
|
||||
|
||||
# Mypy does not realize that oenv is not None by now
|
||||
# Max-scale with safe division
|
||||
onset_envelope /= np.max(onset_envelope, keepdims=True, axis=-1) + util.tiny(onset_envelope) # type: ignore
|
||||
|
||||
# help out mypy
|
||||
assert onset_envelope is not None
|
||||
|
||||
# Do we have any onsets to grab?
|
||||
if not onset_envelope.any() or not np.all(np.isfinite(onset_envelope)):
|
||||
if sparse:
|
||||
onsets = np.array([], dtype=int)
|
||||
else:
|
||||
onsets = np.zeros_like(onset_envelope, dtype=bool)
|
||||
|
||||
else:
|
||||
# These parameter settings found by large-scale search
|
||||
kwargs.setdefault("pre_max", 0.03 * sr // hop_length) # 30ms
|
||||
kwargs.setdefault("post_max", 0.00 * sr // hop_length + 1) # 0ms
|
||||
kwargs.setdefault("pre_avg", 0.10 * sr // hop_length) # 100ms
|
||||
kwargs.setdefault("post_avg", 0.10 * sr // hop_length + 1) # 100ms
|
||||
kwargs.setdefault("wait", 0.03 * sr // hop_length) # 30ms
|
||||
kwargs.setdefault("delta", 0.07)
|
||||
|
||||
# Peak pick the onset envelope
|
||||
onsets = util.peak_pick(onset_envelope, sparse=sparse, axis=-1, **kwargs)
|
||||
|
||||
# Optionally backtrack the events
|
||||
if backtrack:
|
||||
if not sparse:
|
||||
raise ParameterError("onset backtracking is only supported if sparse=True")
|
||||
|
||||
if energy is None:
|
||||
energy = onset_envelope
|
||||
assert energy is not None
|
||||
onsets = onset_backtrack(onsets, energy)
|
||||
|
||||
if sparse:
|
||||
if units == "frames":
|
||||
pass
|
||||
elif units == "samples":
|
||||
onsets = core.frames_to_samples(onsets, hop_length=hop_length)
|
||||
elif units == "time":
|
||||
onsets = core.frames_to_time(onsets, hop_length=hop_length, sr=sr)
|
||||
else:
|
||||
raise ParameterError(f"Invalid unit type: {units}")
|
||||
|
||||
return onsets
|
||||
|
||||
|
||||
def onset_strength(
|
||||
*,
|
||||
y: Optional[np.ndarray] = None,
|
||||
sr: float = 22050,
|
||||
S: Optional[np.ndarray] = None,
|
||||
lag: int = 1,
|
||||
max_size: int = 1,
|
||||
ref: Optional[np.ndarray] = None,
|
||||
detrend: bool = False,
|
||||
center: bool = True,
|
||||
feature: Optional[Callable] = None,
|
||||
aggregate: Optional[Union[Callable, bool]] = None,
|
||||
**kwargs: Any,
|
||||
) -> np.ndarray:
|
||||
"""Compute a spectral flux onset strength envelope.
|
||||
|
||||
Onset strength at time ``t`` is determined by::
|
||||
|
||||
mean_f max(0, S[f, t] - ref[f, t - lag])
|
||||
|
||||
where ``ref`` is ``S`` after local max filtering along the frequency
|
||||
axis [#]_.
|
||||
|
||||
By default, if a time series ``y`` is provided, S will be the
|
||||
log-power Mel spectrogram.
|
||||
|
||||
.. [#] Böck, Sebastian, and Gerhard Widmer.
|
||||
"Maximum filter vibrato suppression for onset detection."
|
||||
16th International Conference on Digital Audio Effects,
|
||||
Maynooth, Ireland. 2013.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : np.ndarray [shape=(..., n)]
|
||||
audio time-series. Multi-channel is supported.
|
||||
|
||||
sr : number > 0 [scalar]
|
||||
sampling rate of ``y``
|
||||
|
||||
S : np.ndarray [shape=(..., d, m)]
|
||||
pre-computed (log-power) spectrogram
|
||||
|
||||
lag : int > 0
|
||||
time lag for computing differences
|
||||
|
||||
max_size : int > 0
|
||||
size (in frequency bins) of the local max filter.
|
||||
set to `1` to disable filtering.
|
||||
|
||||
ref : None or np.ndarray [shape=(..., d, m)]
|
||||
An optional pre-computed reference spectrum, of the same shape as ``S``.
|
||||
If not provided, it will be computed from ``S``.
|
||||
If provided, it will override any local max filtering governed by ``max_size``.
|
||||
|
||||
detrend : bool [scalar]
|
||||
Filter the onset strength to remove the DC component
|
||||
|
||||
center : bool [scalar]
|
||||
Shift the onset function by ``n_fft // (2 * hop_length)`` frames.
|
||||
This corresponds to using a centered frame analysis in the short-time Fourier
|
||||
transform.
|
||||
|
||||
feature : function
|
||||
Function for computing time-series features, eg, scaled spectrograms.
|
||||
By default, uses `librosa.feature.melspectrogram` with ``fmax=sr/2``
|
||||
|
||||
aggregate : function
|
||||
Aggregation function to use when combining onsets
|
||||
at different frequency bins.
|
||||
|
||||
Default: `np.mean`
|
||||
|
||||
**kwargs : additional keyword arguments
|
||||
Additional parameters to ``feature()``, if ``S`` is not provided.
|
||||
|
||||
Returns
|
||||
-------
|
||||
onset_envelope : np.ndarray [shape=(..., m,)]
|
||||
vector containing the onset strength envelope.
|
||||
If the input contains multiple channels, then onset envelope is computed for each channel.
|
||||
|
||||
Raises
|
||||
------
|
||||
ParameterError
|
||||
if neither ``(y, sr)`` nor ``S`` are provided
|
||||
|
||||
or if ``lag`` or ``max_size`` are not positive integers
|
||||
|
||||
See Also
|
||||
--------
|
||||
onset_detect
|
||||
onset_strength_multi
|
||||
|
||||
Examples
|
||||
--------
|
||||
First, load some audio and plot the spectrogram
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> y, sr = librosa.load(librosa.ex('trumpet'), duration=3)
|
||||
>>> D = np.abs(librosa.stft(y))
|
||||
>>> times = librosa.times_like(D, sr=sr)
|
||||
>>> fig, ax = plt.subplots(nrows=2, sharex=True)
|
||||
>>> librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
|
||||
... y_axis='log', x_axis='time', ax=ax[0], sr=sr)
|
||||
>>> ax[0].set(title='Power spectrogram')
|
||||
>>> ax[0].label_outer()
|
||||
|
||||
Construct a standard onset function
|
||||
|
||||
>>> onset_env = librosa.onset.onset_strength(y=y, sr=sr)
|
||||
>>> ax[1].plot(times, 2 + onset_env / onset_env.max(), alpha=0.8,
|
||||
... label='Mean (mel)')
|
||||
|
||||
Median aggregation, and custom mel options
|
||||
|
||||
>>> onset_env = librosa.onset.onset_strength(y=y, sr=sr,
|
||||
... aggregate=np.median,
|
||||
... fmax=8000, n_mels=256)
|
||||
>>> ax[1].plot(times, 1 + onset_env / onset_env.max(), alpha=0.8,
|
||||
... label='Median (custom mel)')
|
||||
|
||||
Constant-Q spectrogram instead of Mel
|
||||
|
||||
>>> C = np.abs(librosa.cqt(y=y, sr=sr))
|
||||
>>> onset_env = librosa.onset.onset_strength(sr=sr, S=librosa.amplitude_to_db(C, ref=np.max))
|
||||
>>> ax[1].plot(times, onset_env / onset_env.max(), alpha=0.8,
|
||||
... label='Mean (CQT)')
|
||||
>>> ax[1].legend()
|
||||
>>> ax[1].set(ylabel='Normalized strength', yticks=[])
|
||||
"""
|
||||
if aggregate is False:
|
||||
raise ParameterError(
|
||||
"aggregate parameter cannot be False when computing full-spectrum onset strength."
|
||||
)
|
||||
|
||||
odf_all = onset_strength_multi(
|
||||
y=y,
|
||||
sr=sr,
|
||||
S=S,
|
||||
lag=lag,
|
||||
max_size=max_size,
|
||||
ref=ref,
|
||||
detrend=detrend,
|
||||
center=center,
|
||||
feature=feature,
|
||||
aggregate=aggregate,
|
||||
channels=None,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return odf_all[..., 0, :]
|
||||
|
||||
|
||||
def onset_backtrack(events: np.ndarray, energy: np.ndarray) -> np.ndarray:
|
||||
"""Backtrack detected onset events to the nearest preceding local
|
||||
minimum of an energy function.
|
||||
|
||||
This function can be used to roll back the timing of detected onsets
|
||||
from a detected peak amplitude to the preceding minimum.
|
||||
|
||||
This is most useful when using onsets to determine slice points for
|
||||
segmentation, as described by [#]_.
|
||||
|
||||
.. [#] Jehan, Tristan.
|
||||
"Creating music by listening"
|
||||
Doctoral dissertation
|
||||
Massachusetts Institute of Technology, 2005.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
events : np.ndarray, dtype=int
|
||||
List of onset event frame indices, as computed by `onset_detect`
|
||||
energy : np.ndarray, shape=(m,)
|
||||
An energy function
|
||||
|
||||
Returns
|
||||
-------
|
||||
events_backtracked : np.ndarray, shape=events.shape
|
||||
The input events matched to nearest preceding minima of ``energy``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
Backtrack the events using the onset envelope
|
||||
|
||||
>>> y, sr = librosa.load(librosa.ex('trumpet'), duration=3)
|
||||
>>> oenv = librosa.onset.onset_strength(y=y, sr=sr)
|
||||
>>> times = librosa.times_like(oenv, sr=sr)
|
||||
>>> # Detect events without backtracking
|
||||
>>> onset_raw = librosa.onset.onset_detect(onset_envelope=oenv,
|
||||
... backtrack=False)
|
||||
>>> onset_bt = librosa.onset.onset_backtrack(onset_raw, oenv)
|
||||
|
||||
Backtrack the events using the RMS values
|
||||
|
||||
>>> S = np.abs(librosa.stft(y=y))
|
||||
>>> rms = librosa.feature.rms(S=S)
|
||||
>>> onset_bt_rms = librosa.onset.onset_backtrack(onset_raw, rms[0])
|
||||
|
||||
Plot the results
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> fig, ax = plt.subplots(nrows=3, sharex=True)
|
||||
>>> librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
|
||||
... y_axis='log', x_axis='time', ax=ax[0])
|
||||
>>> ax[0].label_outer()
|
||||
>>> ax[1].plot(times, oenv, label='Onset strength')
|
||||
>>> ax[1].vlines(librosa.frames_to_time(onset_raw), 0, oenv.max(), label='Raw onsets')
|
||||
>>> ax[1].vlines(librosa.frames_to_time(onset_bt), 0, oenv.max(), label='Backtracked', color='r')
|
||||
>>> ax[1].legend()
|
||||
>>> ax[1].label_outer()
|
||||
>>> ax[2].plot(times, rms[0], label='RMS')
|
||||
>>> ax[2].vlines(librosa.frames_to_time(onset_bt_rms), 0, rms.max(), label='Backtracked (RMS)', color='r')
|
||||
>>> ax[2].legend()
|
||||
"""
|
||||
# Find points where energy is non-increasing
|
||||
# all points: energy[i] <= energy[i-1]
|
||||
# tail points: energy[i] < energy[i+1]
|
||||
minima = np.flatnonzero((energy[1:-1] <= energy[:-2]) & (energy[1:-1] < energy[2:]))
|
||||
|
||||
# Pad on a 0, just in case we have onsets with no preceding minimum
|
||||
# Shift by one to account for slicing in minima detection
|
||||
minima = util.fix_frames(1 + minima, x_min=0)
|
||||
|
||||
# Only match going left from the detected events
|
||||
results: np.ndarray = minima[util.match_events(events, minima, right=False)]
|
||||
return results
|
||||
|
||||
|
||||
@cache(level=30)
|
||||
def onset_strength_multi(
|
||||
*,
|
||||
y: Optional[np.ndarray] = None,
|
||||
sr: float = 22050,
|
||||
S: Optional[np.ndarray] = None,
|
||||
n_fft: int = 2048,
|
||||
hop_length: int = 512,
|
||||
lag: int = 1,
|
||||
max_size: int = 1,
|
||||
ref: Optional[np.ndarray] = None,
|
||||
detrend: bool = False,
|
||||
center: bool = True,
|
||||
feature: Optional[Callable] = None,
|
||||
aggregate: Optional[Union[Callable, bool]] = None,
|
||||
channels: Optional[Union[Sequence[int], Sequence[slice]]] = None,
|
||||
**kwargs: Any,
|
||||
) -> np.ndarray:
|
||||
"""Compute a spectral flux onset strength envelope across multiple channels.
|
||||
|
||||
Onset strength for channel ``i`` at time ``t`` is determined by::
|
||||
|
||||
mean_{f in channels[i]} max(0, S[f, t+1] - S[f, t])
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : np.ndarray [shape=(..., n,)]
|
||||
audio time-series. Multi-channel is supported.
|
||||
|
||||
sr : number > 0 [scalar]
|
||||
sampling rate of ``y``
|
||||
|
||||
S : np.ndarray [shape=(..., d, m)]
|
||||
pre-computed (log-power) spectrogram
|
||||
|
||||
n_fft : int > 0 [scalar]
|
||||
FFT window size for use in ``feature()`` if ``S`` is not provided.
|
||||
|
||||
hop_length : int > 0 [scalar]
|
||||
hop length for use in ``feature()`` if ``S`` is not provided.
|
||||
|
||||
lag : int > 0
|
||||
time lag for computing differences
|
||||
|
||||
max_size : int > 0
|
||||
size (in frequency bins) of the local max filter.
|
||||
set to `1` to disable filtering.
|
||||
|
||||
ref : None or np.ndarray [shape=(d, m)]
|
||||
An optional pre-computed reference spectrum, of the same shape as ``S``.
|
||||
If not provided, it will be computed from ``S``.
|
||||
If provided, it will override any local max filtering governed by ``max_size``.
|
||||
|
||||
detrend : bool [scalar]
|
||||
Filter the onset strength to remove the DC component
|
||||
|
||||
center : bool [scalar]
|
||||
Shift the onset function by ``n_fft // (2 * hop_length)`` frames.
|
||||
This corresponds to using a centered frame analysis in the short-time Fourier
|
||||
transform.
|
||||
|
||||
feature : function
|
||||
Function for computing time-series features, eg, scaled spectrograms.
|
||||
By default, uses `librosa.feature.melspectrogram` with ``fmax=sr/2``
|
||||
|
||||
Must support arguments: ``y, sr, n_fft, hop_length``
|
||||
|
||||
aggregate : function or False
|
||||
Aggregation function to use when combining onsets
|
||||
at different frequency bins.
|
||||
|
||||
If ``False``, then no aggregation is performed.
|
||||
|
||||
Default: `np.mean`
|
||||
|
||||
channels : list or None
|
||||
Array of channel boundaries or slice objects.
|
||||
If `None`, then a single channel is generated to span all bands.
|
||||
|
||||
**kwargs : additional keyword arguments
|
||||
Additional parameters to ``feature()``, if ``S`` is not provided.
|
||||
|
||||
Returns
|
||||
-------
|
||||
onset_envelope : np.ndarray [shape=(..., n_channels, m)]
|
||||
array containing the onset strength envelope for each specified channel
|
||||
|
||||
Raises
|
||||
------
|
||||
ParameterError
|
||||
if neither ``(y, sr)`` nor ``S`` are provided
|
||||
|
||||
See Also
|
||||
--------
|
||||
onset_strength
|
||||
|
||||
Notes
|
||||
-----
|
||||
This function caches at level 30.
|
||||
|
||||
Examples
|
||||
--------
|
||||
First, load some audio and plot the spectrogram
|
||||
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> y, sr = librosa.load(librosa.ex('choice'), duration=5)
|
||||
>>> D = np.abs(librosa.stft(y))
|
||||
>>> fig, ax = plt.subplots(nrows=2, sharex=True)
|
||||
>>> img1 = librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
|
||||
... y_axis='log', x_axis='time', ax=ax[0])
|
||||
>>> ax[0].set(title='Power spectrogram')
|
||||
>>> ax[0].label_outer()
|
||||
>>> fig.colorbar(img1, ax=[ax[0]], format="%+2.f dB")
|
||||
|
||||
Construct a standard onset function over four sub-bands
|
||||
|
||||
>>> onset_subbands = librosa.onset.onset_strength_multi(y=y, sr=sr,
|
||||
... channels=[0, 32, 64, 96, 128])
|
||||
>>> img2 = librosa.display.specshow(onset_subbands, x_axis='time', ax=ax[1])
|
||||
>>> ax[1].set(ylabel='Sub-bands', title='Sub-band onset strength')
|
||||
>>> fig.colorbar(img2, ax=[ax[1]])
|
||||
"""
|
||||
if feature is None:
|
||||
feature = melspectrogram
|
||||
kwargs.setdefault("fmax", 0.5 * sr)
|
||||
|
||||
if aggregate is None:
|
||||
aggregate = np.mean
|
||||
|
||||
if not util.is_positive_int(lag):
|
||||
raise ParameterError(f"lag={lag} must be a positive integer")
|
||||
|
||||
if not util.is_positive_int(max_size):
|
||||
raise ParameterError(f"max_size={max_size} must be a positive integer")
|
||||
|
||||
# First, compute mel spectrogram
|
||||
if S is None:
|
||||
S = np.abs(feature(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, **kwargs))
|
||||
|
||||
# Convert to dBs
|
||||
S = core.power_to_db(S)
|
||||
|
||||
# Assertion to make type checking happy
|
||||
assert S is not None
|
||||
|
||||
# Ensure that S is at least 2-d
|
||||
S = np.atleast_2d(S)
|
||||
|
||||
# Compute the reference spectrogram.
|
||||
# Efficiency hack: skip filtering step and pass by reference
|
||||
# if max_size will produce a no-op.
|
||||
if ref is None:
|
||||
if max_size == 1:
|
||||
ref = S
|
||||
else:
|
||||
ref = scipy.ndimage.maximum_filter1d(S, max_size, axis=-2)
|
||||
elif ref.shape != S.shape:
|
||||
raise ParameterError(
|
||||
f"Reference spectrum shape {ref.shape} must match input spectrum {S.shape}"
|
||||
)
|
||||
|
||||
# Compute difference to the reference, spaced by lag
|
||||
onset_env = S[..., lag:] - ref[..., :-lag]
|
||||
|
||||
# Discard negatives (decreasing amplitude)
|
||||
onset_env = np.maximum(0.0, onset_env)
|
||||
|
||||
# Aggregate within channels
|
||||
pad = True
|
||||
if channels is None:
|
||||
channels = [slice(None)]
|
||||
else:
|
||||
pad = False
|
||||
|
||||
if callable(aggregate):
|
||||
onset_env = util.sync(
|
||||
onset_env, channels, aggregate=aggregate, pad=pad, axis=-2
|
||||
)
|
||||
|
||||
# compensate for lag
|
||||
pad_width = lag
|
||||
if center:
|
||||
# Counter-act framing effects. Shift the onsets by n_fft / hop_length
|
||||
pad_width += n_fft // (2 * hop_length)
|
||||
|
||||
padding = [(0, 0) for _ in onset_env.shape]
|
||||
padding[-1] = (int(pad_width), 0)
|
||||
onset_env = np.pad(onset_env, padding, mode="constant")
|
||||
|
||||
# remove the DC component
|
||||
if detrend:
|
||||
onset_env = scipy.signal.lfilter([1.0, -1.0], [1.0, -0.99], onset_env, axis=-1)
|
||||
|
||||
# Trim to match the input duration
|
||||
if center:
|
||||
onset_env = onset_env[..., : S.shape[-1]]
|
||||
|
||||
return onset_env
|
||||
1414
linedance-app/venv/lib/python3.12/site-packages/librosa/segment.py
Normal file
1414
linedance-app/venv/lib/python3.12/site-packages/librosa/segment.py
Normal file
File diff suppressed because it is too large
Load Diff
2051
linedance-app/venv/lib/python3.12/site-packages/librosa/sequence.py
Normal file
2051
linedance-app/venv/lib/python3.12/site-packages/librosa/sequence.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Skip pydocstyle checks that erroneously trigger on "example"
|
||||
# noqa: D405,D214,D407
|
||||
"""
|
||||
Utilities
|
||||
=========
|
||||
|
||||
Array operations
|
||||
----------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
frame
|
||||
pad_center
|
||||
expand_to
|
||||
fix_length
|
||||
fix_frames
|
||||
index_to_slice
|
||||
softmask
|
||||
stack
|
||||
sync
|
||||
|
||||
axis_sort
|
||||
normalize
|
||||
shear
|
||||
sparsify_rows
|
||||
|
||||
buf_to_float
|
||||
tiny
|
||||
|
||||
Matching
|
||||
--------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
match_intervals
|
||||
match_events
|
||||
|
||||
Miscellaneous
|
||||
-------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
localmax
|
||||
localmin
|
||||
peak_pick
|
||||
nnls
|
||||
cyclic_gradient
|
||||
dtype_c2r
|
||||
dtype_r2c
|
||||
count_unique
|
||||
is_unique
|
||||
abs2
|
||||
phasor
|
||||
|
||||
|
||||
Input validation
|
||||
----------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
valid_audio
|
||||
valid_int
|
||||
valid_intervals
|
||||
is_positive_int
|
||||
|
||||
|
||||
File operations
|
||||
---------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
example
|
||||
example_info
|
||||
list_examples
|
||||
find_files
|
||||
cite
|
||||
"""
|
||||
|
||||
import lazy_loader as lazy
|
||||
|
||||
__getattr__, __dir__, __all__ = lazy.attach_stub(__name__, __file__)
|
||||
@@ -0,0 +1,59 @@
|
||||
from . import decorators
|
||||
from . import exceptions
|
||||
|
||||
from .files import (
|
||||
find_files as find_files,
|
||||
example as example,
|
||||
ex as ex,
|
||||
list_examples as list_examples,
|
||||
example_info as example_info,
|
||||
cite as cite,
|
||||
)
|
||||
|
||||
from .matching import (
|
||||
match_intervals as match_intervals,
|
||||
match_events as match_events,
|
||||
)
|
||||
|
||||
from .deprecation import (
|
||||
Deprecated as Deprecated,
|
||||
rename_kw as rename_kw,
|
||||
)
|
||||
|
||||
from ._nnls import (
|
||||
nnls as nnls,
|
||||
)
|
||||
|
||||
from .utils import (
|
||||
MAX_MEM_BLOCK as MAX_MEM_BLOCK,
|
||||
frame as frame,
|
||||
pad_center as pad_center,
|
||||
expand_to as expand_to,
|
||||
fix_length as fix_length,
|
||||
valid_audio as valid_audio,
|
||||
valid_int as valid_int,
|
||||
is_positive_int as is_positive_int,
|
||||
valid_intervals as valid_intervals,
|
||||
fix_frames as fix_frames,
|
||||
axis_sort as axis_sort,
|
||||
localmax as localmax,
|
||||
localmin as localmin,
|
||||
normalize as normalize,
|
||||
peak_pick as peak_pick,
|
||||
sparsify_rows as sparsify_rows,
|
||||
shear as shear,
|
||||
stack as stack,
|
||||
fill_off_diagonal as fill_off_diagonal,
|
||||
index_to_slice as index_to_slice,
|
||||
sync as sync,
|
||||
softmask as softmask,
|
||||
buf_to_float as buf_to_float,
|
||||
tiny as tiny,
|
||||
cyclic_gradient as cyclic_gradient,
|
||||
dtype_r2c as dtype_r2c,
|
||||
dtype_c2r as dtype_c2r,
|
||||
count_unique as count_unique,
|
||||
is_unique as is_unique,
|
||||
abs2 as abs2,
|
||||
phasor as phasor,
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user