onedrop_engine/audio.rs
1//! Audio processing and analysis.
2//!
3//! The MilkDrop preset surface (per-frame / per-pixel equations, beat-detect
4//! thresholds, custom waves' volume modulation) reads three split-band volumes:
5//! `bass`, `mid`, `treb`. They must reflect *frequency content* — kick drums
6//! push `bass`, cymbals push `treb` — not just overall loudness. The original
7//! placeholder in this file did RMS over three time-domain chunks of the input
8//! window, so all three bands tracked overall loudness and presets couldn't
9//! distinguish low- from high-frequency energy. This analyzer now runs an FFT
10//! and integrates the magnitude spectrum over the MD2-standard frequency
11//! ranges (`FFTAnalyzer::get_bass` / `_mid` / `_treble`).
12
13use crate::fft::FFTAnalyzer;
14use onedrop_renderer::AudioLevels;
15
16/// Maps the FFT-band averages to roughly the [0, 2-ish] range MD2 presets
17/// expect (`bass > 1.5` is the conventional "loud kick" threshold for
18/// `BeatDetectionMode::HardCut1`). A unit-amplitude sine in the bass range
19/// produces an averaged magnitude of `~0.5 / num_bass_bins` (the FFT spreads
20/// one bin's peak across the averaging window); with the default 1024-point
21/// FFT @ 44.1 kHz that's ~0.08, so gain 12.5 maps a pure bass sine to ~1.0.
22/// Real-world music with a bass kick lands in the ~0.5-2.0 range with `50.0`,
23/// which lines up reasonably with MD2's expected dynamics. Tunable per-user
24/// later via the GUI if needed.
25const FFT_BAND_GAIN: f32 = 50.0;
26
27/// Target band level the AGC steers each band toward. MD2 presets are
28/// authored against the convention that `bass`/`mid`/`treb` average
29/// ~1.0 on typical content; `bass > 1.5` then unambiguously means
30/// "louder than usual" (kick / snare / hat) rather than just "loud
31/// track". With AGC off, quiet tracks float around 0.2-0.4 and never
32/// trigger beat-reactive presets; loud tracks pin at 8.0 (the clamp)
33/// and beat detection saturates.
34const AGC_TARGET_LEVEL: f32 = 1.0;
35
36/// Smoothing factor for the AGC's per-band running average. Closer to
37/// 1.0 = longer history. ~50-100 frames at 60 FPS feels right — slow
38/// enough that a single kick doesn't dump the gain, fast enough that a
39/// track change resettles within a couple of seconds.
40const AGC_AVG_ALPHA: f32 = 0.99;
41
42/// Clamp the AGC gain to a sane range. Both bounds matter: a track
43/// with absolute silence on a band shouldn't let the gain run to
44/// infinity (the floor at 0.5 caps the boost on near-silent bands),
45/// and a band that's pure-tone-driven shouldn't crush below 1/10 of
46/// the raw signal.
47const AGC_GAIN_MIN: f32 = 0.1;
48const AGC_GAIN_MAX: f32 = 10.0;
49
50/// FFT window length. Powers of 2; 1024 gives ~43 Hz bin width @ 44.1 kHz,
51/// which is enough granularity for the bass band (smaller windows under-resolve
52/// 20-250 Hz; larger windows raise latency without buying much for the
53/// 3-band split MD2 uses).
54const FFT_SIZE: usize = 1024;
55
56/// Audio analyzer for extracting per-band volumes.
57pub struct AudioAnalyzer {
58 fft: FFTAnalyzer,
59
60 /// Long-term smoothed bass — the `bass_att` variable preset equations read.
61 bass_att: f32,
62
63 /// Long-term smoothed mid — the `mid_att` variable preset equations read.
64 mid_att: f32,
65
66 /// Long-term smoothed treble — the `treb_att` variable preset equations read.
67 treb_att: f32,
68
69 /// Exponential-smoothing retention factor for the `_att` channels. Closer
70 /// to 1.0 = longer history. MD2's `_att` channels are heavily smoothed
71 /// so they represent recent average loudness, not instantaneous.
72 attenuation: f32,
73
74 /// Per-band running average used by the AGC to steer the gain.
75 /// Updated each frame from the raw pre-AGC band level; the AGC
76 /// divides the target level by this to compute the gain factor.
77 /// Floor at 0.05 so the gain doesn't blow up on near-silent input.
78 agc_bass_avg: f32,
79 agc_mid_avg: f32,
80 agc_treb_avg: f32,
81}
82
83impl AudioAnalyzer {
84 /// Create a new audio analyzer.
85 pub fn new(sample_rate: f32) -> Self {
86 Self {
87 fft: FFTAnalyzer::new_or_default(FFT_SIZE, sample_rate),
88 bass_att: 0.0,
89 mid_att: 0.0,
90 treb_att: 0.0,
91 attenuation: 0.93,
92 // Seed AGC averages at the target so the gain starts at 1.0
93 // (no boost on the first frame); converges to the actual
94 // long-term level over the next ~100 frames.
95 agc_bass_avg: AGC_TARGET_LEVEL,
96 agc_mid_avg: AGC_TARGET_LEVEL,
97 agc_treb_avg: AGC_TARGET_LEVEL,
98 }
99 }
100
101 /// Analyze audio samples and return per-band volumes.
102 ///
103 /// Pipeline: FFT → raw band integral → AGC. The AGC keeps each
104 /// band hovering near `AGC_TARGET_LEVEL = 1.0` on typical content,
105 /// so `bass > 1.5` on the output unambiguously means "louder than
106 /// usual" — the convention every MD2 preset's beat-detection block
107 /// is authored against. Without the AGC, quiet tracks float around
108 /// 0.2-0.4 and never trigger `BeatDetectionMode::HardCut1`; loud
109 /// tracks pin at the 8.0 clamp and beat detection saturates.
110 pub fn analyze(&mut self, samples: &[f32]) -> AudioLevels {
111 self.fft.analyze(samples);
112
113 let raw_bass = (self.fft.get_bass() * FFT_BAND_GAIN).clamp(0.0, 8.0);
114 let raw_mid = (self.fft.get_mid() * FFT_BAND_GAIN).clamp(0.0, 8.0);
115 let raw_treb = (self.fft.get_treble() * FFT_BAND_GAIN).clamp(0.0, 8.0);
116
117 // Update the AGC running averages and derive per-band gains.
118 // Floor at 0.05 so the divide doesn't explode on silent bands.
119 self.agc_bass_avg = self.agc_bass_avg * AGC_AVG_ALPHA + raw_bass * (1.0 - AGC_AVG_ALPHA);
120 self.agc_mid_avg = self.agc_mid_avg * AGC_AVG_ALPHA + raw_mid * (1.0 - AGC_AVG_ALPHA);
121 self.agc_treb_avg = self.agc_treb_avg * AGC_AVG_ALPHA + raw_treb * (1.0 - AGC_AVG_ALPHA);
122
123 let bass_gain =
124 (AGC_TARGET_LEVEL / self.agc_bass_avg.max(0.05)).clamp(AGC_GAIN_MIN, AGC_GAIN_MAX);
125 let mid_gain =
126 (AGC_TARGET_LEVEL / self.agc_mid_avg.max(0.05)).clamp(AGC_GAIN_MIN, AGC_GAIN_MAX);
127 let treb_gain =
128 (AGC_TARGET_LEVEL / self.agc_treb_avg.max(0.05)).clamp(AGC_GAIN_MIN, AGC_GAIN_MAX);
129
130 let bass = (raw_bass * bass_gain).clamp(0.0, 8.0);
131 let mid = (raw_mid * mid_gain).clamp(0.0, 8.0);
132 let treb = (raw_treb * treb_gain).clamp(0.0, 8.0);
133
134 self.bass_att = self.bass_att * self.attenuation + bass * (1.0 - self.attenuation);
135 self.mid_att = self.mid_att * self.attenuation + mid * (1.0 - self.attenuation);
136 self.treb_att = self.treb_att * self.attenuation + treb * (1.0 - self.attenuation);
137
138 AudioLevels {
139 bass,
140 mid,
141 treb,
142 bass_att: self.bass_att,
143 mid_att: self.mid_att,
144 treb_att: self.treb_att,
145 }
146 }
147
148 /// Set attenuation factor.
149 pub fn set_attenuation(&mut self, attenuation: f32) {
150 self.attenuation = attenuation.clamp(0.0, 1.0);
151 }
152
153 /// Reset attenuated values.
154 pub fn reset(&mut self) {
155 self.bass_att = 0.0;
156 self.mid_att = 0.0;
157 self.treb_att = 0.0;
158 // Re-seed AGC averages at the target so the first frame after
159 // reset doesn't start at full boost.
160 self.agc_bass_avg = AGC_TARGET_LEVEL;
161 self.agc_mid_avg = AGC_TARGET_LEVEL;
162 self.agc_treb_avg = AGC_TARGET_LEVEL;
163 }
164}
165
166impl Default for AudioAnalyzer {
167 fn default() -> Self {
168 Self::new(44100.0)
169 }
170}
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175 use approx::assert_relative_eq;
176 use std::f32::consts::PI;
177
178 /// Build a unit-amplitude sine at the given Hz, sampled at 44.1 kHz.
179 fn sine(freq_hz: f32, n: usize) -> Vec<f32> {
180 (0..n)
181 .map(|i| (2.0 * PI * freq_hz * i as f32 / 44100.0).sin())
182 .collect()
183 }
184
185 #[test]
186 fn test_analyze_silence() {
187 let mut analyzer = AudioAnalyzer::new(44100.0);
188 let samples = vec![0.0; 1024];
189
190 let levels = analyzer.analyze(&samples);
191
192 assert_relative_eq!(levels.bass, 0.0, epsilon = 0.01);
193 assert_relative_eq!(levels.mid, 0.0, epsilon = 0.01);
194 assert_relative_eq!(levels.treb, 0.0, epsilon = 0.01);
195 }
196
197 /// Frequency separation matters: a 60 Hz tone should drive `bass`
198 /// dominantly, a 1 kHz tone `mid`, an 8 kHz tone `treb`. This is the
199 /// regression test for the old time-chunked-RMS placeholder, which
200 /// reported all three bands as roughly equal for any input.
201 #[test]
202 fn bass_tone_drives_bass_band_dominantly() {
203 let mut analyzer = AudioAnalyzer::new(44100.0);
204 let levels = analyzer.analyze(&sine(60.0, 1024));
205 assert!(levels.bass > 5.0 * levels.mid, "{:?}", levels);
206 assert!(levels.bass > 5.0 * levels.treb, "{:?}", levels);
207 }
208
209 #[test]
210 fn mid_tone_drives_mid_band_dominantly() {
211 let mut analyzer = AudioAnalyzer::new(44100.0);
212 let levels = analyzer.analyze(&sine(1000.0, 1024));
213 assert!(levels.mid > 5.0 * levels.bass, "{:?}", levels);
214 assert!(levels.mid > 5.0 * levels.treb, "{:?}", levels);
215 }
216
217 #[test]
218 fn treble_tone_drives_treb_band_dominantly() {
219 let mut analyzer = AudioAnalyzer::new(44100.0);
220 let levels = analyzer.analyze(&sine(8000.0, 1024));
221 assert!(levels.treb > 5.0 * levels.bass, "{:?}", levels);
222 assert!(levels.treb > 5.0 * levels.mid, "{:?}", levels);
223 }
224
225 /// A unit-amplitude bass sine is the loudest signal cpal will hand us.
226 /// Real music sits ~10-20 dB below this, so a unit sine producing
227 /// ~5 means typical music produces bass in the [0.5, 2.0] range MD2
228 /// presets expect for "moderate" → "loud kick". Pins the gain.
229 #[test]
230 fn unit_bass_sine_is_within_clamp_and_above_floor() {
231 let mut analyzer = AudioAnalyzer::new(44100.0);
232 let levels = analyzer.analyze(&sine(80.0, 1024));
233 assert!(levels.bass > 1.0, "bass too quiet: {:?}", levels.bass);
234 assert!(levels.bass < 8.0, "bass clamp violated: {:?}", levels.bass);
235 }
236
237 /// The AGC must steer the per-band level toward `AGC_TARGET_LEVEL`
238 /// on sustained input: a quiet but steady bass tone should boost
239 /// over many frames until the reported `bass` lands near 1.0,
240 /// independent of the source amplitude. Without this, presets'
241 /// `bass > 1.5` beat threshold never fires on quiet tracks.
242 #[test]
243 fn agc_steers_quiet_band_toward_target() {
244 let mut analyzer = AudioAnalyzer::new(44100.0);
245 let quiet: Vec<f32> = sine(60.0, 1024).into_iter().map(|s| s * 0.1).collect();
246
247 // First frame: low signal × seed-gain 1.0 = low reported bass.
248 let first = analyzer.analyze(&quiet);
249 assert!(
250 first.bass < 1.0,
251 "first frame should not be boosted: {:?}",
252 first.bass
253 );
254
255 // After many frames, the AGC running average converges to the
256 // quiet level and the gain inverts it back to ~target.
257 for _ in 0..1000 {
258 analyzer.analyze(&quiet);
259 }
260 let settled = analyzer.analyze(&quiet);
261 assert!(
262 settled.bass > 0.7,
263 "AGC should boost quiet sustained tone toward target, got {}",
264 settled.bass
265 );
266 }
267
268 /// Inverse of the boost case: a LOUD sustained tone must be
269 /// attenuated by the AGC so the reported level stays near the
270 /// target. Otherwise loud tracks pin the clamp at 8.0 and beat
271 /// detection saturates.
272 #[test]
273 fn agc_attenuates_loud_band_toward_target() {
274 let mut analyzer = AudioAnalyzer::new(44100.0);
275 let loud: Vec<f32> = sine(60.0, 1024).into_iter().map(|s| s * 4.0).collect();
276
277 for _ in 0..1000 {
278 analyzer.analyze(&loud);
279 }
280 let settled = analyzer.analyze(&loud);
281 assert!(
282 settled.bass < 4.0,
283 "AGC should attenuate loud sustained tone, got {}",
284 settled.bass
285 );
286 }
287
288 #[test]
289 fn att_channels_smooth_across_frames() {
290 let mut analyzer = AudioAnalyzer::new(44100.0);
291 let samples = sine(1000.0, 1024);
292
293 let first = analyzer.analyze(&samples);
294 // After one frame, _att has barely accumulated (long smoothing).
295 assert!(first.mid_att < first.mid);
296
297 // After many frames, _att approaches the instantaneous value.
298 for _ in 0..200 {
299 analyzer.analyze(&samples);
300 }
301 let settled = analyzer.analyze(&samples);
302 assert_relative_eq!(settled.mid_att, settled.mid, epsilon = 0.05);
303 }
304}