onedrop_engine/
audio.rs

1//! Audio processing and analysis.
2//!
3//! The MilkDrop preset surface (per-frame / per-pixel equations, beat-detect
4//! thresholds, custom waves' volume modulation) reads three split-band volumes:
5//! `bass`, `mid`, `treb`. They must reflect *frequency content* — kick drums
6//! push `bass`, cymbals push `treb` — not just overall loudness. The original
7//! placeholder in this file did RMS over three time-domain chunks of the input
8//! window, so all three bands tracked overall loudness and presets couldn't
9//! distinguish low- from high-frequency energy. This analyzer now runs an FFT
10//! and integrates the magnitude spectrum over the MD2-standard frequency
11//! ranges (`FFTAnalyzer::get_bass` / `_mid` / `_treble`).
12
13use crate::fft::FFTAnalyzer;
14use onedrop_renderer::AudioLevels;
15
16/// Maps the FFT-band averages to roughly the [0, 2-ish] range MD2 presets
17/// expect (`bass > 1.5` is the conventional "loud kick" threshold for
18/// `BeatDetectionMode::HardCut1`). A unit-amplitude sine in the bass range
19/// produces an averaged magnitude of `~0.5 / num_bass_bins` (the FFT spreads
20/// one bin's peak across the averaging window); with the default 1024-point
21/// FFT @ 44.1 kHz that's ~0.08, so gain 12.5 maps a pure bass sine to ~1.0.
22/// Real-world music with a bass kick lands in the ~0.5-2.0 range with `50.0`,
23/// which lines up reasonably with MD2's expected dynamics. Tunable per-user
24/// later via the GUI if needed.
25const FFT_BAND_GAIN: f32 = 50.0;
26
27/// Target band level the AGC steers each band toward. MD2 presets are
28/// authored against the convention that `bass`/`mid`/`treb` average
29/// ~1.0 on typical content; `bass > 1.5` then unambiguously means
30/// "louder than usual" (kick / snare / hat) rather than just "loud
31/// track". With AGC off, quiet tracks float around 0.2-0.4 and never
32/// trigger beat-reactive presets; loud tracks pin at 8.0 (the clamp)
33/// and beat detection saturates.
34const AGC_TARGET_LEVEL: f32 = 1.0;
35
36/// Smoothing factor for the AGC's per-band running average. Closer to
37/// 1.0 = longer history. ~50-100 frames at 60 FPS feels right — slow
38/// enough that a single kick doesn't dump the gain, fast enough that a
39/// track change resettles within a couple of seconds.
40const AGC_AVG_ALPHA: f32 = 0.99;
41
42/// Clamp the AGC gain to a sane range. Both bounds matter: a track
43/// with absolute silence on a band shouldn't let the gain run to
44/// infinity (the floor at 0.5 caps the boost on near-silent bands),
45/// and a band that's pure-tone-driven shouldn't crush below 1/10 of
46/// the raw signal.
47const AGC_GAIN_MIN: f32 = 0.1;
48const AGC_GAIN_MAX: f32 = 10.0;
49
50/// FFT window length. Powers of 2; 1024 gives ~43 Hz bin width @ 44.1 kHz,
51/// which is enough granularity for the bass band (smaller windows under-resolve
52/// 20-250 Hz; larger windows raise latency without buying much for the
53/// 3-band split MD2 uses).
54const FFT_SIZE: usize = 1024;
55
56/// Audio analyzer for extracting per-band volumes.
57pub struct AudioAnalyzer {
58    fft: FFTAnalyzer,
59
60    /// Long-term smoothed bass — the `bass_att` variable preset equations read.
61    bass_att: f32,
62
63    /// Long-term smoothed mid — the `mid_att` variable preset equations read.
64    mid_att: f32,
65
66    /// Long-term smoothed treble — the `treb_att` variable preset equations read.
67    treb_att: f32,
68
69    /// Exponential-smoothing retention factor for the `_att` channels. Closer
70    /// to 1.0 = longer history. MD2's `_att` channels are heavily smoothed
71    /// so they represent recent average loudness, not instantaneous.
72    attenuation: f32,
73
74    /// Per-band running average used by the AGC to steer the gain.
75    /// Updated each frame from the raw pre-AGC band level; the AGC
76    /// divides the target level by this to compute the gain factor.
77    /// Floor at 0.05 so the gain doesn't blow up on near-silent input.
78    agc_bass_avg: f32,
79    agc_mid_avg: f32,
80    agc_treb_avg: f32,
81}
82
83impl AudioAnalyzer {
84    /// Create a new audio analyzer.
85    pub fn new(sample_rate: f32) -> Self {
86        Self {
87            fft: FFTAnalyzer::new_or_default(FFT_SIZE, sample_rate),
88            bass_att: 0.0,
89            mid_att: 0.0,
90            treb_att: 0.0,
91            attenuation: 0.93,
92            // Seed AGC averages at the target so the gain starts at 1.0
93            // (no boost on the first frame); converges to the actual
94            // long-term level over the next ~100 frames.
95            agc_bass_avg: AGC_TARGET_LEVEL,
96            agc_mid_avg: AGC_TARGET_LEVEL,
97            agc_treb_avg: AGC_TARGET_LEVEL,
98        }
99    }
100
101    /// Analyze audio samples and return per-band volumes.
102    ///
103    /// Pipeline: FFT → raw band integral → AGC. The AGC keeps each
104    /// band hovering near `AGC_TARGET_LEVEL = 1.0` on typical content,
105    /// so `bass > 1.5` on the output unambiguously means "louder than
106    /// usual" — the convention every MD2 preset's beat-detection block
107    /// is authored against. Without the AGC, quiet tracks float around
108    /// 0.2-0.4 and never trigger `BeatDetectionMode::HardCut1`; loud
109    /// tracks pin at the 8.0 clamp and beat detection saturates.
110    pub fn analyze(&mut self, samples: &[f32]) -> AudioLevels {
111        self.fft.analyze(samples);
112
113        let raw_bass = (self.fft.get_bass() * FFT_BAND_GAIN).clamp(0.0, 8.0);
114        let raw_mid = (self.fft.get_mid() * FFT_BAND_GAIN).clamp(0.0, 8.0);
115        let raw_treb = (self.fft.get_treble() * FFT_BAND_GAIN).clamp(0.0, 8.0);
116
117        // Update the AGC running averages and derive per-band gains.
118        // Floor at 0.05 so the divide doesn't explode on silent bands.
119        self.agc_bass_avg = self.agc_bass_avg * AGC_AVG_ALPHA + raw_bass * (1.0 - AGC_AVG_ALPHA);
120        self.agc_mid_avg = self.agc_mid_avg * AGC_AVG_ALPHA + raw_mid * (1.0 - AGC_AVG_ALPHA);
121        self.agc_treb_avg = self.agc_treb_avg * AGC_AVG_ALPHA + raw_treb * (1.0 - AGC_AVG_ALPHA);
122
123        let bass_gain =
124            (AGC_TARGET_LEVEL / self.agc_bass_avg.max(0.05)).clamp(AGC_GAIN_MIN, AGC_GAIN_MAX);
125        let mid_gain =
126            (AGC_TARGET_LEVEL / self.agc_mid_avg.max(0.05)).clamp(AGC_GAIN_MIN, AGC_GAIN_MAX);
127        let treb_gain =
128            (AGC_TARGET_LEVEL / self.agc_treb_avg.max(0.05)).clamp(AGC_GAIN_MIN, AGC_GAIN_MAX);
129
130        let bass = (raw_bass * bass_gain).clamp(0.0, 8.0);
131        let mid = (raw_mid * mid_gain).clamp(0.0, 8.0);
132        let treb = (raw_treb * treb_gain).clamp(0.0, 8.0);
133
134        self.bass_att = self.bass_att * self.attenuation + bass * (1.0 - self.attenuation);
135        self.mid_att = self.mid_att * self.attenuation + mid * (1.0 - self.attenuation);
136        self.treb_att = self.treb_att * self.attenuation + treb * (1.0 - self.attenuation);
137
138        AudioLevels {
139            bass,
140            mid,
141            treb,
142            bass_att: self.bass_att,
143            mid_att: self.mid_att,
144            treb_att: self.treb_att,
145        }
146    }
147
148    /// Set attenuation factor.
149    pub fn set_attenuation(&mut self, attenuation: f32) {
150        self.attenuation = attenuation.clamp(0.0, 1.0);
151    }
152
153    /// Reset attenuated values.
154    pub fn reset(&mut self) {
155        self.bass_att = 0.0;
156        self.mid_att = 0.0;
157        self.treb_att = 0.0;
158        // Re-seed AGC averages at the target so the first frame after
159        // reset doesn't start at full boost.
160        self.agc_bass_avg = AGC_TARGET_LEVEL;
161        self.agc_mid_avg = AGC_TARGET_LEVEL;
162        self.agc_treb_avg = AGC_TARGET_LEVEL;
163    }
164}
165
166impl Default for AudioAnalyzer {
167    fn default() -> Self {
168        Self::new(44100.0)
169    }
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175    use approx::assert_relative_eq;
176    use std::f32::consts::PI;
177
178    /// Build a unit-amplitude sine at the given Hz, sampled at 44.1 kHz.
179    fn sine(freq_hz: f32, n: usize) -> Vec<f32> {
180        (0..n)
181            .map(|i| (2.0 * PI * freq_hz * i as f32 / 44100.0).sin())
182            .collect()
183    }
184
185    #[test]
186    fn test_analyze_silence() {
187        let mut analyzer = AudioAnalyzer::new(44100.0);
188        let samples = vec![0.0; 1024];
189
190        let levels = analyzer.analyze(&samples);
191
192        assert_relative_eq!(levels.bass, 0.0, epsilon = 0.01);
193        assert_relative_eq!(levels.mid, 0.0, epsilon = 0.01);
194        assert_relative_eq!(levels.treb, 0.0, epsilon = 0.01);
195    }
196
197    /// Frequency separation matters: a 60 Hz tone should drive `bass`
198    /// dominantly, a 1 kHz tone `mid`, an 8 kHz tone `treb`. This is the
199    /// regression test for the old time-chunked-RMS placeholder, which
200    /// reported all three bands as roughly equal for any input.
201    #[test]
202    fn bass_tone_drives_bass_band_dominantly() {
203        let mut analyzer = AudioAnalyzer::new(44100.0);
204        let levels = analyzer.analyze(&sine(60.0, 1024));
205        assert!(levels.bass > 5.0 * levels.mid, "{:?}", levels);
206        assert!(levels.bass > 5.0 * levels.treb, "{:?}", levels);
207    }
208
209    #[test]
210    fn mid_tone_drives_mid_band_dominantly() {
211        let mut analyzer = AudioAnalyzer::new(44100.0);
212        let levels = analyzer.analyze(&sine(1000.0, 1024));
213        assert!(levels.mid > 5.0 * levels.bass, "{:?}", levels);
214        assert!(levels.mid > 5.0 * levels.treb, "{:?}", levels);
215    }
216
217    #[test]
218    fn treble_tone_drives_treb_band_dominantly() {
219        let mut analyzer = AudioAnalyzer::new(44100.0);
220        let levels = analyzer.analyze(&sine(8000.0, 1024));
221        assert!(levels.treb > 5.0 * levels.bass, "{:?}", levels);
222        assert!(levels.treb > 5.0 * levels.mid, "{:?}", levels);
223    }
224
225    /// A unit-amplitude bass sine is the loudest signal cpal will hand us.
226    /// Real music sits ~10-20 dB below this, so a unit sine producing
227    /// ~5 means typical music produces bass in the [0.5, 2.0] range MD2
228    /// presets expect for "moderate" → "loud kick". Pins the gain.
229    #[test]
230    fn unit_bass_sine_is_within_clamp_and_above_floor() {
231        let mut analyzer = AudioAnalyzer::new(44100.0);
232        let levels = analyzer.analyze(&sine(80.0, 1024));
233        assert!(levels.bass > 1.0, "bass too quiet: {:?}", levels.bass);
234        assert!(levels.bass < 8.0, "bass clamp violated: {:?}", levels.bass);
235    }
236
237    /// The AGC must steer the per-band level toward `AGC_TARGET_LEVEL`
238    /// on sustained input: a quiet but steady bass tone should boost
239    /// over many frames until the reported `bass` lands near 1.0,
240    /// independent of the source amplitude. Without this, presets'
241    /// `bass > 1.5` beat threshold never fires on quiet tracks.
242    #[test]
243    fn agc_steers_quiet_band_toward_target() {
244        let mut analyzer = AudioAnalyzer::new(44100.0);
245        let quiet: Vec<f32> = sine(60.0, 1024).into_iter().map(|s| s * 0.1).collect();
246
247        // First frame: low signal × seed-gain 1.0 = low reported bass.
248        let first = analyzer.analyze(&quiet);
249        assert!(
250            first.bass < 1.0,
251            "first frame should not be boosted: {:?}",
252            first.bass
253        );
254
255        // After many frames, the AGC running average converges to the
256        // quiet level and the gain inverts it back to ~target.
257        for _ in 0..1000 {
258            analyzer.analyze(&quiet);
259        }
260        let settled = analyzer.analyze(&quiet);
261        assert!(
262            settled.bass > 0.7,
263            "AGC should boost quiet sustained tone toward target, got {}",
264            settled.bass
265        );
266    }
267
268    /// Inverse of the boost case: a LOUD sustained tone must be
269    /// attenuated by the AGC so the reported level stays near the
270    /// target. Otherwise loud tracks pin the clamp at 8.0 and beat
271    /// detection saturates.
272    #[test]
273    fn agc_attenuates_loud_band_toward_target() {
274        let mut analyzer = AudioAnalyzer::new(44100.0);
275        let loud: Vec<f32> = sine(60.0, 1024).into_iter().map(|s| s * 4.0).collect();
276
277        for _ in 0..1000 {
278            analyzer.analyze(&loud);
279        }
280        let settled = analyzer.analyze(&loud);
281        assert!(
282            settled.bass < 4.0,
283            "AGC should attenuate loud sustained tone, got {}",
284            settled.bass
285        );
286    }
287
288    #[test]
289    fn att_channels_smooth_across_frames() {
290        let mut analyzer = AudioAnalyzer::new(44100.0);
291        let samples = sine(1000.0, 1024);
292
293        let first = analyzer.analyze(&samples);
294        // After one frame, _att has barely accumulated (long smoothing).
295        assert!(first.mid_att < first.mid);
296
297        // After many frames, _att approaches the instantaneous value.
298        for _ in 0..200 {
299            analyzer.analyze(&samples);
300        }
301        let settled = analyzer.analyze(&samples);
302        assert_relative_eq!(settled.mid_att, settled.mid, epsilon = 0.05);
303    }
304}