Rewrite pitch tracking based on buffer, not history

pull/357/head
nyanpasu64 2019-04-13 06:09:43 -07:00
rodzic 0a8c0e8451
commit ee4ad5e3ae
4 zmienionych plików z 105 dodań i 95 usunięć

Wyświetl plik

@ -43,7 +43,6 @@ class SpectrumConfig(KeywordAttrs):
# Time-domain history parameters # Time-domain history parameters
min_frames_between_recompute: int = 1 min_frames_between_recompute: int = 1
frames_to_lookbehind: int = 2
class DummySpectrum: class DummySpectrum:

Wyświetl plik

@ -206,24 +206,6 @@ class PerFrameCache:
# CorrelationTrigger # CorrelationTrigger
class CircularArray:
def __init__(self, size: int, *dims: int):
self.size = size
self.buf = np.zeros((size, *dims))
self.index = 0
def push(self, arr: np.ndarray) -> None:
if self.size == 0:
return
self.buf[self.index] = arr
self.index = (self.index + 1) % self.size
def peek(self) -> np.ndarray:
"""Return is borrowed from self.buf.
Do NOT push to self while borrow is alive."""
return self.buf[self.index]
class LagPrevention(KeywordAttrs): class LagPrevention(KeywordAttrs):
max_frames: float = 1 max_frames: float = 1
transition_frames: float = 0.25 transition_frames: float = 0.25
@ -353,14 +335,8 @@ class CorrelationTrigger(MainTrigger):
subsmp_s=self._wave.smp_s / self._stride, subsmp_s=self._wave.smp_s / self._stride,
dummy_data=self._buffer, dummy_data=self._buffer,
) )
self._spectrum = self._spectrum_calc.calc_spectrum(self._buffer)
self.history = CircularArray(
self.scfg.frames_to_lookbehind, self._buffer_nsamp
)
else: else:
self._spectrum_calc = DummySpectrum() self._spectrum_calc = DummySpectrum()
self._spectrum = np.array([0])
self.history = CircularArray(0, self._buffer_nsamp)
def _calc_lag_prevention(self) -> np.ndarray: def _calc_lag_prevention(self) -> np.ndarray:
""" Returns input-data window, """ Returns input-data window,
@ -402,7 +378,7 @@ class CorrelationTrigger(MainTrigger):
def _calc_step(self) -> np.ndarray: def _calc_step(self) -> np.ndarray:
""" Step function used for approximate edge triggering. """ """ Step function used for approximate edge triggering. """
# Increasing buffer_falloff (width of history buffer) # Increasing buffer_falloff (width of buffer)
# causes buffer to affect triggering, more than the step function. # causes buffer to affect triggering, more than the step function.
# So we multiply edge_strength (step function height) by buffer_falloff. # So we multiply edge_strength (step function height) by buffer_falloff.
@ -471,13 +447,12 @@ class CorrelationTrigger(MainTrigger):
# Slope finder # Slope finder
slope_finder = self._calc_slope_finder(period) slope_finder = self._calc_slope_finder(period)
data *= window
# If pitch tracking enabled, rescale buffer to match data's pitch. # If pitch tracking enabled, rescale buffer to match data's pitch.
if self.scfg and (data != 0).any(): if self.scfg and (data != 0).any():
if isinstance(semitones, float): # Mutates self._buffer.
peak_semitones = semitones self.spectrum_rescale_buffer(data)
else:
peak_semitones = None
self.spectrum_rescale_buffer(data, peak_semitones)
self._prev_period = period self._prev_period = period
self._prev_window = window self._prev_window = window
@ -486,8 +461,7 @@ class CorrelationTrigger(MainTrigger):
window = self._prev_window window = self._prev_window
slope_finder = self._prev_slope_finder slope_finder = self._prev_slope_finder
self.history.push(data) data *= window
data *= window
prev_buffer: np.ndarray = self._buffer * self.cfg.buffer_strength prev_buffer: np.ndarray = self._buffer * self.cfg.buffer_strength
prev_buffer += self._edge_finder + slope_finder prev_buffer += self._edge_finder + slope_finder
@ -498,7 +472,7 @@ class CorrelationTrigger(MainTrigger):
else: else:
radius = None radius = None
score = correlate_offset(data, prev_buffer, radius) score = correlate_data(data, prev_buffer, radius)
peak_offset = score.peak peak_offset = score.peak
trigger = index + (stride * peak_offset) trigger = index + (stride * peak_offset)
@ -524,56 +498,48 @@ class CorrelationTrigger(MainTrigger):
return trigger return trigger
def spectrum_rescale_buffer( def spectrum_rescale_buffer(self, data: np.ndarray) -> None:
self, data: np.ndarray, peak_semitones: Optional[float] """
) -> None: - Cross-correlate the log-frequency spectrum of `data` with `buffer`.
"""Rewrites self._spectrum, and possibly rescales self._buffer.""" - Rescale `buffer` until its pitch matches `data`.
"""
# Setup
scfg = self.scfg scfg = self.scfg
N = self._buffer_nsamp N = self._buffer_nsamp
if self.frames_since_spectrum < self.scfg.min_frames_between_recompute: if self.frames_since_spectrum < self.scfg.min_frames_between_recompute:
return return
self.frames_since_spectrum = 0 self.frames_since_spectrum = 0
spectrum = self._spectrum_calc.calc_spectrum(data) calc_spectrum = self._spectrum_calc.calc_spectrum
# Compute log-frequency spectrum of `data`.
spectrum = calc_spectrum(data)
normalize_buffer(spectrum) normalize_buffer(spectrum)
# Don't normalize self._spectrum. It was already normalized when being assigned.
prev_spectrum = self._spectrum_calc.calc_spectrum(self.history.peek())
# rewrite spectrum
self._spectrum = spectrum
assert not np.any(np.isnan(spectrum)) assert not np.any(np.isnan(spectrum))
# Find spectral correlation peak, # Compute log-frequency spectrum of `self._buffer`.
# but prioritize "changing pitch by ???". prev_spectrum = calc_spectrum(self._buffer)
if peak_semitones is not None: # Don't normalize self._spectrum. It was already normalized when being assigned.
boost_x = iround(peak_semitones / 12 * scfg.notes_per_octave)
boost_y: float = scfg.pitch_estimate_boost
else:
boost_x = 0
boost_y = 1.0
# If we want to double pitch... # Rescale `self._buffer` until its pitch matches `data`.
resample_notes = correlate_offset( resample_notes = correlate_spectrum(
spectrum, spectrum, prev_spectrum, scfg.max_notes_to_resample
prev_spectrum,
scfg.max_notes_to_resample,
boost_x=boost_x,
boost_y=boost_y,
).peak ).peak
if resample_notes != 0: if resample_notes != 0:
# we must divide sampling rate by 2. # If we want to double pitch, we must divide data length by 2.
new_len = iround(N / 2 ** (resample_notes / scfg.notes_per_octave)) new_len = iround(N / 2 ** (resample_notes / scfg.notes_per_octave))
def rescale_mut(in_buf):
buf = np.interp(
np.linspace(0, 1, new_len), np.linspace(0, 1, N), in_buf
)
# assert len(buf) == new_len
buf = midpad(buf, N)
in_buf[:] = buf
# Copy+resample self._buffer. # Copy+resample self._buffer.
self._buffer = np.interp( rescale_mut(self._buffer)
np.linspace(0, 1, new_len), np.linspace(0, 1, N), self._buffer
)
# assert len(self._buffer) == new_len
self._buffer = midpad(self._buffer, N)
def _is_window_invalid(self, period: int) -> Union[bool, float]: def _is_window_invalid(self, period: int) -> Union[bool, float]:
""" Returns number of semitones, """ Returns number of semitones,
@ -629,12 +595,15 @@ class CorrelationResult:
corr: np.ndarray corr: np.ndarray
def correlate_offset( @attr.dataclass
data: np.ndarray, class InterpolatedCorrelationResult:
prev_buffer: np.ndarray, peak: float
radius: Optional[int], corr: np.ndarray
boost_x: int = 0,
boost_y: float = 1.0,
# TODO use parabolic() for added precision when trigger subsampling enabled
def correlate_data(
data: np.ndarray, prev_buffer: np.ndarray, radius: Optional[int]
) -> CorrelationResult: ) -> CorrelationResult:
""" """
This is confusing. This is confusing.
@ -663,15 +632,55 @@ def correlate_offset(
corr = corr[left:right] corr = corr[left:right]
mid = mid - left mid = mid - left
# Prioritize part of it.
corr[mid + boost_x : mid + boost_x + 1] *= boost_y
# argmax(corr) == mid + peak_offset == (data >> peak_offset) # argmax(corr) == mid + peak_offset == (data >> peak_offset)
# peak_offset == argmax(corr) - mid # peak_offset == argmax(corr) - mid
peak_offset = np.argmax(corr) - mid # type: int peak_offset = np.argmax(corr) - mid # type: int
return CorrelationResult(peak_offset, corr) return CorrelationResult(peak_offset, corr)
def correlate_spectrum(
data: np.ndarray, prev_buffer: np.ndarray, radius: Optional[int]
) -> InterpolatedCorrelationResult:
N = len(data)
corr = signal.correlate(data, prev_buffer) # returns double, not single/FLOAT
Ncorr = 2 * N - 1
assert len(corr) == Ncorr
# Find optimal offset
mid = N - 1
if radius is not None:
left = max(mid - radius, 0)
right = min(mid + radius + 1, Ncorr)
corr = corr[left:right]
mid = mid - left
# argmax(corr) == mid + peak_offset == (data >> peak_offset)
# peak_offset == argmax(corr) - mid
peak_offset = parabolic(corr, np.argmax(corr)) - mid # type: float
return InterpolatedCorrelationResult(peak_offset, corr)
def parabolic(ys: np.ndarray, xint: int) -> float:
"""
Quadratic interpolation for estimating the true position of an inter-sample maximum
when nearby samples are known.
"""
if xint - 1 < 0 or xint + 1 >= len(ys):
return float(xint)
left = ys[xint - 1]
mid = ys[xint]
right = ys[xint + 1]
# https://ccrma.stanford.edu/~jos/sasp/Quadratic_Interpolation_Spectral_Peaks.html
dx = 0.5 * (+left - right) / (+left - 2 * mid + right)
assert -1 < dx < 1
return xint + dx
SIGN_AMPLIFICATION = 1000 SIGN_AMPLIFICATION = 1000

Wyświetl plik

@ -55,16 +55,21 @@ To remove DC offset from the wave, corrscope calculates the `mean` of input `dat
Corrscope then estimates the fundamental `period` of the waveform, using autocorrelation. Corrscope then estimates the fundamental `period` of the waveform, using autocorrelation.
Corrscope multiplies `data` by `data window` to taper off the edges towards zero, and avoid using data over 1 frame old.
### (optional) Pitch Tracking ### (optional) Pitch Tracking
If `Pitch Tracking` is enabled: If `Pitch Tracking` is enabled:
If `period` changes significantly, corrscope computes the spectrums of `data` and `data` from 2 frames ago, and cross-correlates them to estimate the pitch change over the last 2 frames. It then resamples (horizontally scales) `buffer` to match this pitch change. If `period` changes significantly:
- Cross-correlate the log-frequency spectrum of `data` with `buffer`.
- Rescale `buffer` until its pitch matches `data`.
Pitch Tracking may get confused when `data` moves from 1 note to another over the course of multiple frames. If the right half of `buffer` changes to a new note while the left half is still latched onto the old note, the next frame will latch onto the mistriggered right half of the buffer. To prevent issues, you should consider reducing `Buffer Responsiveness` (so `buffer` will not "learn" the wrong pitch, and instead be rescaled to align with the new note).
### Correlation Triggering (uses `buffer`) ### Correlation Triggering (uses `buffer`)
Corrscope multiplies `data` by `data window` to taper off the edges towards zero, and avoid using data over 1 frame old.
Precomputed: `edge_finder`, which is computed once and reused for every frame. Precomputed: `edge_finder`, which is computed once and reused for every frame.
Corrscope cross-correlates `data` with `buffer + edge_finder` to produce a "buffer similarity + edge" score for each possible `data` triggering location. Corrscope then picks the location in `data` with the highest score, then sets `position` to be used for rendering. Corrscope cross-correlates `data` with `buffer + edge_finder` to produce a "buffer similarity + edge" score for each possible `data` triggering location. Corrscope then picks the location in `data` with the highest score, then sets `position` to be used for rendering.

Wyświetl plik

@ -13,7 +13,8 @@ from corrscope.triggers import (
PerFrameCache, PerFrameCache,
ZeroCrossingTriggerConfig, ZeroCrossingTriggerConfig,
SpectrumConfig, SpectrumConfig,
correlate_offset, correlate_data,
correlate_spectrum,
) )
from corrscope.wave import Wave from corrscope.wave import Wave
@ -214,18 +215,23 @@ def test_post_trigger_radius():
# Test pitch-tracking (spectrum) # Test pitch-tracking (spectrum)
def test_correlate_offset(): @parametrize("correlate", [correlate_data, correlate_spectrum])
def test_correlate_offset(correlate):
""" """
Catches bug where writing N instead of Ncorr Catches bug where writing N instead of Ncorr
prevented function from returning positive numbers. prevented function from returning positive numbers.
""" """
if correlate == correlate_spectrum:
approx = lambda x: pytest.approx(x, rel=0.5)
else:
approx = lambda x: x
np.random.seed(31337) np.random.seed(31337)
# Ensure autocorrelation on random data returns peak at 0. # Ensure autocorrelation on random data returns peak at 0.
N = 100 N = 100
spectrum = np.random.random(N) spectrum = np.random.random(N)
assert correlate_offset(spectrum, spectrum, 12).peak == 0 assert correlate(spectrum, spectrum, 12).peak == approx(0)
# Ensure cross-correlation of time-shifted impulses works. # Ensure cross-correlation of time-shifted impulses works.
# Assume wave where y=[i==99]. # Assume wave where y=[i==99].
@ -236,18 +242,9 @@ def test_correlate_offset():
# We need to slide `left` to the right by 10 samples, and vice versa. # We need to slide `left` to the right by 10 samples, and vice versa.
for radius in [None, 12]: for radius in [None, 12]:
assert correlate_offset(data=left, prev_buffer=right, radius=radius).peak == 10 assert correlate(data=left, prev_buffer=right, radius=radius).peak == approx(10)
assert correlate_offset(data=right, prev_buffer=left, radius=radius).peak == -10 assert correlate(data=right, prev_buffer=left, radius=radius).peak == approx(
-10
# The correlation peak at zero-offset is small enough for boost_x to be returned.
boost_y = 1.5
ones = np.ones(N)
for boost_x in [6, -6]:
assert (
correlate_offset(
ones, ones, radius=9, boost_x=boost_x, boost_y=boost_y
).peak
== boost_x
) )