onedrop_engine/
warp_eval.rs

1//! Per-vertex warp evaluation.
2//!
3//! Each frame the warp mesh's `cols × rows` vertices need their UVs warped
4//! according to the preset's per-vertex (Milkdrop "per_pixel") equations and
5//! the MilkDrop 2 zoom/rot/stretch/translate/warp formula.
6//!
7//! ## Hot path strategy
8//!
9//! [`WarpExecutor`] precompiles each per-vertex equation into an
10//! `evalexpr::Node` once at preset load and then reuses **one** scratch
11//! [`MilkContext`] across all vertices of a frame. This replaces the previous
12//! "clone evaluator state per vertex" approach, which allocated a fresh
13//! `HashMapContext` (≈60 vars + math-function map) for every one of the
14//! `cols × rows` vertices — 768 allocations per frame at the default 32×24
15//! mesh, up to 18 432 at 192×96.
16//!
17//! Per-vertex independence (each vertex sees the same per-frame state on
18//! entry, regardless of what previous vertices wrote) is preserved by
19//! snapshotting the motion outputs (`zoom`, `zoomexp`, `rot`, `cx`, `cy`,
20//! `dx`, `dy`, `sx`, `sy`, `warp`) and the `q1..q32` channels at the start
21//! of the frame and restoring them before each vertex eval.
22
23use onedrop_eval::{CompiledBlock, MilkContext, MilkEvaluator};
24use onedrop_renderer::warp_mesh::{WarpMesh, WarpMeshVertex};
25use onedrop_renderer::warp_pipeline::WarpVertex;
26use rayon::prelude::*;
27
28/// Per-vertex motion outputs read from the evaluator after running per-vertex
29/// equations.
30#[derive(Clone, Copy, Debug)]
31struct PerVertexMotion {
32    zoom: f32,
33    zoomexp: f32,
34    rot: f32,
35    cx: f32,
36    cy: f32,
37    dx: f32,
38    dy: f32,
39    sx: f32,
40    sy: f32,
41    warp: f32,
42}
43
44/// Number of `q*` channels we snapshot/restore between vertices. MilkDrop 2
45/// exposes `q1..q32`; the underlying `MilkContext` reserves 64 slots but only
46/// the first 32 are user-facing.
47const Q_CHANNEL_COUNT: usize = 32;
48
49/// Minimum compiled-equation count before we hand the per-vertex loop to
50/// rayon. Empirically: presets with ~3 cheap equations stay faster
51/// sequential (warp-exec time is dominated by per-thread `MilkContext`
52/// clone + work-stealing overhead at ~1 ms/frame baseline cost), while
53/// presets with 10+ equations see a 2-3× wall-clock win on a 12-thread
54/// box. Picked 8 as the breakpoint after measuring three reference
55/// presets (3 / 26 / 35 equations) on the `bench_render` tool.
56const PARALLEL_EQ_THRESHOLD: usize = 8;
57
58/// Pre-compiled per-vertex equation executor.
59///
60/// One instance lives for the lifetime of the engine; [`set_equations`] is
61/// called whenever a new preset is loaded.
62///
63/// [`set_equations`]: WarpExecutor::set_equations
64pub struct WarpExecutor {
65    /// Source equations as last set, retained so `set_equations` can no-op
66    /// when called with an unchanged set (common case during steady-state
67    /// rendering).
68    sources: Vec<String>,
69    /// Compiled equations packaged in a [`CompiledBlock`] — auto-lowers
70    /// to bytecode when every node is supported (~80 % of corpus
71    /// per_pixel blocks), falls back to evalexpr Node walk otherwise.
72    /// Per-vertex eval is the second-densest CPU phase after wave
73    /// per_point: 768 vertices × N equations every frame, often in
74    /// rayon. Skipping evalexpr's recursive operator dispatch on each
75    /// op trims a measurable slice of `warp_compute`. May contain fewer
76    /// nodes than `sources` if some equations failed to compile —
77    /// failures are logged and silently dropped, matching MD2's
78    /// "never crash on a bad preset" stance.
79    compiled: CompiledBlock,
80}
81
82impl WarpExecutor {
83    pub fn new() -> Self {
84        Self {
85            sources: Vec::new(),
86            compiled: CompiledBlock::empty(),
87        }
88    }
89
90    /// Replace the compiled equation set. No-op when `eqs` matches the
91    /// previously compiled sources.
92    ///
93    /// `evaluator` is the per-frame evaluator; it is borrowed mutably so the
94    /// preprocess pass can register any newly seen variable names on its
95    /// context (preserving the same auto-init semantics as
96    /// [`MilkEvaluator::eval`]).
97    pub fn set_equations(&mut self, evaluator: &mut MilkEvaluator, eqs: &[String]) {
98        if self.sources.len() == eqs.len() && self.sources.iter().zip(eqs).all(|(a, b)| a == b) {
99            return;
100        }
101        self.sources = eqs.to_vec();
102        let mut nodes = Vec::with_capacity(eqs.len());
103        for eq in eqs {
104            match evaluator.compile(eq) {
105                Ok(node) => nodes.push(node),
106                Err(e) => {
107                    log::warn!("per-vertex equation compile failed: {eq:?}: {e}");
108                }
109            }
110        }
111        self.compiled = CompiledBlock::from_nodes(nodes, evaluator.context_mut());
112    }
113
114    /// Compute one [`WarpVertex`] per mesh vertex, ready to upload to the GPU.
115    ///
116    /// Pre-condition: `evaluator` has just finished its per-frame phase so
117    /// motion outputs, audio levels, and `q*` channels are up to date.
118    ///
119    /// ## Parallelism
120    ///
121    /// When the preset has any per-vertex equations, the inner loop fans
122    /// out across rayon's global thread pool: each worker gets one cloned
123    /// [`MilkContext`] (via `map_init`) and chews through its slice of the
124    /// 768-vertex mesh. Per-MD2-spec, per-vertex equations have no
125    /// cross-vertex carryover, so the parallel order is invisible from the
126    /// outside. The init clone is ~1 µs (HashMap with per-frame state plus
127    /// math-function pointers), so amortised across hundreds of vertices per
128    /// worker it's free.
129    ///
130    /// Empty-equation presets stay on the simple sequential path: nothing
131    /// to evaluate, so threading would only add overhead.
132    pub fn compute(
133        &mut self,
134        mesh: &WarpMesh,
135        evaluator: &MilkEvaluator,
136        time: f32,
137    ) -> Vec<WarpVertex> {
138        let base_motion = read_motion(evaluator.context());
139
140        if self.compiled.is_empty() {
141            // Fast path: no per-vertex equations means every vertex shares
142            // the per-frame motion. Skip the scratch context entirely.
143            return mesh
144                .vertices
145                .iter()
146                .map(|v| WarpVertex {
147                    pos_clip: v.pos_clip,
148                    uv_warp: warp_uv_md2(v.uv_orig[0], v.uv_orig[1], v.rad, &base_motion, time),
149                })
150                .collect();
151        }
152
153        let base_ctx = evaluator.context().clone();
154        let base_q = read_q_snapshot(&base_ctx);
155        let compiled = &self.compiled;
156
157        // Per-vertex body. Restores per-frame motion / q* state so the n-th
158        // vertex sees the same starting context as the 0-th (per-vertex eqs
159        // that depend on cross-vertex carryover are undefined in MilkDrop and
160        // we explicitly do not implement those semantics). Captures
161        // `base_motion`, `base_q`, `compiled`, and `time` from the enclosing
162        // scope; takes the scratch context + the source vertex.
163        //
164        // `compiled.run(scratch)` picks the bytecode VM when the
165        // per_pixel block lowered cleanly (the common case), otherwise
166        // falls back to the evalexpr Node walk. Either way per-vertex
167        // failures are silently absorbed — the vertex inherits whatever
168        // state survived.
169        let body = |scratch: &mut MilkContext, v: &WarpMeshVertex| -> WarpVertex {
170            write_motion(scratch, &base_motion);
171            write_q_snapshot(scratch, &base_q);
172
173            scratch.set("x", v.uv_orig[0] as f64);
174            scratch.set("y", v.uv_orig[1] as f64);
175            scratch.set("rad", v.rad as f64);
176            scratch.set("ang", v.ang as f64);
177
178            compiled.run(scratch);
179
180            let motion = read_motion(scratch);
181            let uv_warp = warp_uv_md2(v.uv_orig[0], v.uv_orig[1], v.rad, &motion, time);
182            WarpVertex {
183                pos_clip: v.pos_clip,
184                uv_warp,
185            }
186        };
187
188        if compiled.len() >= PARALLEL_EQ_THRESHOLD {
189            // Many or expensive equations — fan out across cores. One context
190            // clone per worker thread, amortised across its slice of vertices.
191            mesh.vertices
192                .par_iter()
193                .map_init(|| base_ctx.clone(), body)
194                .collect()
195        } else {
196            // Few cheap equations — rayon's per-frame thread-pool overhead
197            // (per-worker `MilkContext` clone + work-stealing) costs more
198            // than the per-vertex eval saves. Stay sequential.
199            let mut scratch = base_ctx;
200            mesh.vertices
201                .iter()
202                .map(|v| body(&mut scratch, v))
203                .collect()
204        }
205    }
206
207    /// Number of equations currently compiled. Test/debug helper.
208    pub fn compiled_count(&self) -> usize {
209        self.compiled.len()
210    }
211}
212
213impl Default for WarpExecutor {
214    fn default() -> Self {
215        Self::new()
216    }
217}
218
219fn read_motion(ctx: &MilkContext) -> PerVertexMotion {
220    let f = |name: &str, default: f32| ctx.get_var(name).map(|v| v as f32).unwrap_or(default);
221    PerVertexMotion {
222        zoom: f("zoom", 1.0),
223        zoomexp: f("zoomexp", 1.0),
224        rot: f("rot", 0.0),
225        cx: f("cx", 0.5),
226        cy: f("cy", 0.5),
227        dx: f("dx", 0.0),
228        dy: f("dy", 0.0),
229        sx: f("sx", 1.0),
230        sy: f("sy", 1.0),
231        warp: f("warp", 0.0),
232    }
233}
234
235fn write_motion(ctx: &mut MilkContext, m: &PerVertexMotion) {
236    ctx.set("zoom", m.zoom as f64);
237    ctx.set("zoomexp", m.zoomexp as f64);
238    ctx.set("rot", m.rot as f64);
239    ctx.set("cx", m.cx as f64);
240    ctx.set("cy", m.cy as f64);
241    ctx.set("dx", m.dx as f64);
242    ctx.set("dy", m.dy as f64);
243    ctx.set("sx", m.sx as f64);
244    ctx.set("sy", m.sy as f64);
245    ctx.set("warp", m.warp as f64);
246}
247
248fn read_q_snapshot(ctx: &MilkContext) -> [f64; Q_CHANNEL_COUNT] {
249    // q1..q32 live in MilkContext's array-backed q_vars; index by slot
250    // and skip both the `"qN"` lookup table and the trait `get` route.
251    let mut out = [0.0; Q_CHANNEL_COUNT];
252    for (i, slot) in out.iter_mut().enumerate() {
253        *slot = ctx.q_get_idx(i);
254    }
255    out
256}
257
258fn write_q_snapshot(ctx: &mut MilkContext, q: &[f64; Q_CHANNEL_COUNT]) {
259    for (i, value) in q.iter().enumerate() {
260        ctx.q_set_idx(i, *value);
261    }
262}
263
264/// MilkDrop 2 warp UV formula.
265///
266/// Operates on normalized texture coordinates `(x, y) ∈ [0, 1]²`. The output
267/// is the UV from which the fragment shader will sample `prev_main`.
268///
269/// Order of operations (matches butterchurn / projectM convention, which both
270/// derive from `vis_milk2`):
271/// 1. Logarithmic zoom around `(cx, cy)`, with curvature controlled by `zoomexp`.
272/// 2. Rotation around `(cx, cy)` by `rot` radians.
273/// 3. Anisotropic stretch around `(cx, cy)` by `(sx, sy)`.
274/// 4. Translation by `(dx, dy)` (subtractive — `dx > 0` scrolls right).
275/// 5. Optional sinusoidal warp displacement scaled by `warp`.
276fn warp_uv_md2(x: f32, y: f32, rad: f32, m: &PerVertexMotion, time: f32) -> [f32; 2] {
277    let zoomexp = if m.zoomexp.abs() < 1e-6 {
278        1.0
279    } else {
280        m.zoomexp
281    };
282    let zoom_pow = zoomexp.powf(rad * 2.0 - 1.0);
283    let zoom2 = m.zoom.powf(zoom_pow).max(1e-6);
284    let inv_zoom2 = 1.0 / zoom2;
285
286    let mut u = (x - m.cx) * inv_zoom2 + m.cx;
287    let mut v = (y - m.cy) * inv_zoom2 + m.cy;
288
289    let dx = u - m.cx;
290    let dy = v - m.cy;
291    let cos_r = m.rot.cos();
292    let sin_r = m.rot.sin();
293    u = dx * cos_r - dy * sin_r + m.cx;
294    v = dx * sin_r + dy * cos_r + m.cy;
295
296    let inv_sx = if m.sx.abs() < 1e-6 { 1.0 } else { 1.0 / m.sx };
297    let inv_sy = if m.sy.abs() < 1e-6 { 1.0 } else { 1.0 / m.sy };
298    u = (u - m.cx) * inv_sx + m.cx;
299    v = (v - m.cy) * inv_sy + m.cy;
300
301    u -= m.dx;
302    v -= m.dy;
303
304    if m.warp.abs() > 1e-6 {
305        const WARP_SCALE: f32 = 0.0035;
306        let f = m.warp * WARP_SCALE;
307        u += f * (time * 0.913 + 10.0 * y).sin();
308        v += f * (time * 0.952 + 10.0 * x).sin();
309    }
310
311    [u, v]
312}
313
314#[cfg(test)]
315mod tests {
316    use super::*;
317
318    fn run(mesh: &WarpMesh, eqs: &[String], evaluator: &mut MilkEvaluator) -> Vec<WarpVertex> {
319        let mut exec = WarpExecutor::new();
320        exec.set_equations(evaluator, eqs);
321        exec.compute(mesh, evaluator, 0.0)
322    }
323
324    #[test]
325    fn identity_when_motion_is_neutral() {
326        let mesh = WarpMesh::new(4, 4, 1.0);
327        let mut evaluator = MilkEvaluator::new();
328        let warp = run(&mesh, &[], &mut evaluator);
329        assert_eq!(warp.len(), mesh.vertices.len());
330        for (i, v) in warp.iter().enumerate() {
331            let orig = mesh.vertices[i].uv_orig;
332            assert!(
333                (v.uv_warp[0] - orig[0]).abs() < 1e-5,
334                "u_warp mismatch at {i}"
335            );
336            assert!(
337                (v.uv_warp[1] - orig[1]).abs() < 1e-5,
338                "v_warp mismatch at {i}"
339            );
340        }
341    }
342
343    #[test]
344    fn zoom_pulls_corner_uvs_toward_center() {
345        let mesh = WarpMesh::new(3, 3, 1.0);
346        let mut evaluator = MilkEvaluator::new();
347        evaluator.context_mut().set_var("zoom", 2.0);
348        let warp = run(&mesh, &[], &mut evaluator);
349
350        let bl = warp[0].uv_warp;
351        assert!(bl[0] > 0.0 && bl[0] < 0.5);
352        assert!(bl[1] > 0.0 && bl[1] < 0.5);
353    }
354
355    #[test]
356    fn rotation_swaps_axes_at_90_degrees() {
357        let mesh = WarpMesh::new(3, 3, 1.0);
358        let mut evaluator = MilkEvaluator::new();
359        evaluator
360            .context_mut()
361            .set_var("rot", std::f64::consts::FRAC_PI_2);
362        let warp = run(&mesh, &[], &mut evaluator);
363        let right_mid = warp[3 + 2].uv_warp;
364        let dist_from_orig = ((right_mid[0] - 1.0).powi(2) + (right_mid[1] - 0.5).powi(2)).sqrt();
365        assert!(dist_from_orig > 0.4);
366    }
367
368    #[test]
369    fn per_vertex_equation_modifies_motion_per_vertex() {
370        let mesh = WarpMesh::new(3, 3, 1.0);
371        let mut evaluator = MilkEvaluator::new();
372        let eqs = vec!["zoom = 1 + rad".to_string()];
373        let warp = run(&mesh, &eqs, &mut evaluator);
374
375        let center = warp[3 + 1].uv_warp;
376        assert!((center[0] - 0.5).abs() < 1e-4);
377        assert!((center[1] - 0.5).abs() < 1e-4);
378
379        let corner = warp[0].uv_warp;
380        assert!(corner[0] > 0.0 && corner[0] < 0.5);
381        assert!(corner[1] > 0.0 && corner[1] < 0.5);
382    }
383
384    #[test]
385    fn vertices_are_independent_no_motion_carryover() {
386        // A bug-prone case: every vertex doubles `zoom`. Without motion
387        // snapshot restoration the n-th vertex would see zoom = 2^(n+1)
388        // (state leaking from previous vertices). With restoration every
389        // vertex starts from the per-frame zoom = 1 and ends at zoom = 2,
390        // producing the same uniform warp toward the center.
391        let mesh = WarpMesh::new(4, 4, 1.0);
392        let mut evaluator = MilkEvaluator::new();
393        let eqs = vec!["zoom = zoom * 2".to_string()];
394        let warp = run(&mesh, &eqs, &mut evaluator);
395
396        // Bottom-left corner: identical answer regardless of vertex order.
397        // We re-run with the same mesh/eq and confirm bit-exact equality
398        // for every vertex — proves there's no order-dependent carryover.
399        let warp2 = run(&mesh, &eqs, &mut evaluator);
400        assert_eq!(warp.len(), warp2.len());
401        for i in 0..warp.len() {
402            assert_eq!(
403                warp[i].uv_warp, warp2[i].uv_warp,
404                "vertex {i} is order-dependent: {:?} vs {:?}",
405                warp[i].uv_warp, warp2[i].uv_warp
406            );
407        }
408
409        // Spot-check: corner with rad ≈ 1 should warp toward center given
410        // zoom = 2 (consistent with `zoom_pulls_corner_uvs_toward_center`).
411        let bl = warp[0].uv_warp;
412        assert!(bl[0] > 0.0 && bl[0] < 0.5);
413        assert!(bl[1] > 0.0 && bl[1] < 0.5);
414    }
415
416    #[test]
417    fn set_equations_is_idempotent_across_calls() {
418        let mut evaluator = MilkEvaluator::new();
419        let mut exec = WarpExecutor::new();
420        let eqs = vec!["zoom = 1 + rad".to_string()];
421        exec.set_equations(&mut evaluator, &eqs);
422        assert_eq!(exec.compiled_count(), 1);
423
424        // Re-setting the same equations should not recompile.
425        exec.set_equations(&mut evaluator, &eqs);
426        assert_eq!(exec.compiled_count(), 1);
427    }
428
429    #[test]
430    fn q_channel_drives_per_vertex_zoom() {
431        // The per-frame phase (simulated here by writing q1 directly on the
432        // evaluator) must be visible to per-vertex equations. Without the
433        // q-snapshot/restore in WarpExecutor::compute, the first vertex would
434        // overwrite q1 (or its zoom dependency) for subsequent vertices.
435        let mesh = WarpMesh::new(3, 3, 1.0);
436        let mut evaluator = MilkEvaluator::new();
437        evaluator.context_mut().set_var("q1", 1.0); // simulate per-frame write
438        let eqs = vec!["zoom = 1 + q1".to_string()];
439        let warp = run(&mesh, &eqs, &mut evaluator);
440
441        // Center vertex: zoom = 2, identity around (0.5, 0.5) — but with
442        // zoom = 2 the corner (0,0) should pull toward center.
443        let bl = warp[0].uv_warp;
444        assert!(
445            bl[0] > 0.0 && bl[0] < 0.5,
446            "q1=1 should make zoom=2 and pull bl corner; got u_warp={}",
447            bl[0]
448        );
449
450        // With q1=0 the same eq reduces to zoom=1 (identity).
451        evaluator.context_mut().set_var("q1", 0.0);
452        let warp2 = run(&mesh, &eqs, &mut evaluator);
453        let bl2 = warp2[0].uv_warp;
454        assert!(
455            (bl2[0] - 0.0).abs() < 1e-4,
456            "q1=0 should yield zoom=1 (identity); got u_warp={}",
457            bl2[0]
458        );
459    }
460
461    #[test]
462    fn q_snapshot_isolates_per_vertex_writes() {
463        // A per-vertex eq that writes q1 must NOT contaminate the next
464        // vertex's view of q1: every vertex should see the original
465        // per-frame q1 value as its starting point.
466        let mesh = WarpMesh::new(3, 3, 1.0);
467        let mut evaluator = MilkEvaluator::new();
468        evaluator.context_mut().set_var("q1", 0.5);
469        let eqs = vec![
470            "q1 = q1 * 2".to_string(),   // per-vertex doubles q1
471            "zoom = 1 + q1".to_string(), // zoom depends on (modified) q1
472        ];
473        let warp = run(&mesh, &eqs, &mut evaluator);
474        // After the first vertex, q1 in the scratch becomes 1.0. If
475        // restoration didn't happen, the second vertex would see q1=1.0
476        // (instead of the per-frame q1=0.5) and produce a different warp.
477        // We re-run to confirm determinism: identical mesh + identical
478        // per-frame state must yield bit-identical UVs.
479        let warp2 = run(&mesh, &eqs, &mut evaluator);
480        for i in 0..warp.len() {
481            assert_eq!(
482                warp[i].uv_warp, warp2[i].uv_warp,
483                "vertex {i} non-deterministic"
484            );
485        }
486        // Per-frame q1 itself is unchanged outside the executor (executor
487        // only touches its own scratch).
488        assert_eq!(evaluator.context().get_var("q1"), Some(0.5));
489    }
490
491    #[test]
492    fn compile_failure_is_dropped_silently() {
493        let mut evaluator = MilkEvaluator::new();
494        let mut exec = WarpExecutor::new();
495        let eqs = vec![
496            "zoom = 1 + rad".to_string(),
497            "this is not valid !!!".to_string(),
498            "rot = 0.5".to_string(),
499        ];
500        exec.set_equations(&mut evaluator, &eqs);
501        // 2 of 3 compile.
502        assert_eq!(exec.compiled_count(), 2);
503    }
504}