onedrop_engine/warp_eval.rs
1//! Per-vertex warp evaluation.
2//!
3//! Each frame the warp mesh's `cols × rows` vertices need their UVs warped
4//! according to the preset's per-vertex (Milkdrop "per_pixel") equations and
5//! the MilkDrop 2 zoom/rot/stretch/translate/warp formula.
6//!
7//! ## Hot path strategy
8//!
9//! [`WarpExecutor`] precompiles each per-vertex equation into an
10//! `evalexpr::Node` once at preset load and then reuses **one** scratch
11//! [`MilkContext`] across all vertices of a frame. This replaces the previous
12//! "clone evaluator state per vertex" approach, which allocated a fresh
13//! `HashMapContext` (≈60 vars + math-function map) for every one of the
14//! `cols × rows` vertices — 768 allocations per frame at the default 32×24
15//! mesh, up to 18 432 at 192×96.
16//!
17//! Per-vertex independence (each vertex sees the same per-frame state on
18//! entry, regardless of what previous vertices wrote) is preserved by
19//! snapshotting the motion outputs (`zoom`, `zoomexp`, `rot`, `cx`, `cy`,
20//! `dx`, `dy`, `sx`, `sy`, `warp`) and the `q1..q32` channels at the start
21//! of the frame and restoring them before each vertex eval.
22
23use onedrop_eval::{CompiledBlock, MilkContext, MilkEvaluator};
24use onedrop_renderer::warp_mesh::{WarpMesh, WarpMeshVertex};
25use onedrop_renderer::warp_pipeline::WarpVertex;
26use rayon::prelude::*;
27
28/// Per-vertex motion outputs read from the evaluator after running per-vertex
29/// equations.
30#[derive(Clone, Copy, Debug)]
31struct PerVertexMotion {
32 zoom: f32,
33 zoomexp: f32,
34 rot: f32,
35 cx: f32,
36 cy: f32,
37 dx: f32,
38 dy: f32,
39 sx: f32,
40 sy: f32,
41 warp: f32,
42}
43
44/// Number of `q*` channels we snapshot/restore between vertices. MilkDrop 2
45/// exposes `q1..q32`; the underlying `MilkContext` reserves 64 slots but only
46/// the first 32 are user-facing.
47const Q_CHANNEL_COUNT: usize = 32;
48
49/// Minimum compiled-equation count before we hand the per-vertex loop to
50/// rayon. Empirically: presets with ~3 cheap equations stay faster
51/// sequential (warp-exec time is dominated by per-thread `MilkContext`
52/// clone + work-stealing overhead at ~1 ms/frame baseline cost), while
53/// presets with 10+ equations see a 2-3× wall-clock win on a 12-thread
54/// box. Picked 8 as the breakpoint after measuring three reference
55/// presets (3 / 26 / 35 equations) on the `bench_render` tool.
56const PARALLEL_EQ_THRESHOLD: usize = 8;
57
58/// Pre-compiled per-vertex equation executor.
59///
60/// One instance lives for the lifetime of the engine; [`set_equations`] is
61/// called whenever a new preset is loaded.
62///
63/// [`set_equations`]: WarpExecutor::set_equations
64pub struct WarpExecutor {
65 /// Source equations as last set, retained so `set_equations` can no-op
66 /// when called with an unchanged set (common case during steady-state
67 /// rendering).
68 sources: Vec<String>,
69 /// Compiled equations packaged in a [`CompiledBlock`] — auto-lowers
70 /// to bytecode when every node is supported (~80 % of corpus
71 /// per_pixel blocks), falls back to evalexpr Node walk otherwise.
72 /// Per-vertex eval is the second-densest CPU phase after wave
73 /// per_point: 768 vertices × N equations every frame, often in
74 /// rayon. Skipping evalexpr's recursive operator dispatch on each
75 /// op trims a measurable slice of `warp_compute`. May contain fewer
76 /// nodes than `sources` if some equations failed to compile —
77 /// failures are logged and silently dropped, matching MD2's
78 /// "never crash on a bad preset" stance.
79 compiled: CompiledBlock,
80}
81
82impl WarpExecutor {
83 pub fn new() -> Self {
84 Self {
85 sources: Vec::new(),
86 compiled: CompiledBlock::empty(),
87 }
88 }
89
90 /// Replace the compiled equation set. No-op when `eqs` matches the
91 /// previously compiled sources.
92 ///
93 /// `evaluator` is the per-frame evaluator; it is borrowed mutably so the
94 /// preprocess pass can register any newly seen variable names on its
95 /// context (preserving the same auto-init semantics as
96 /// [`MilkEvaluator::eval`]).
97 pub fn set_equations(&mut self, evaluator: &mut MilkEvaluator, eqs: &[String]) {
98 if self.sources.len() == eqs.len() && self.sources.iter().zip(eqs).all(|(a, b)| a == b) {
99 return;
100 }
101 self.sources = eqs.to_vec();
102 let mut nodes = Vec::with_capacity(eqs.len());
103 for eq in eqs {
104 match evaluator.compile(eq) {
105 Ok(node) => nodes.push(node),
106 Err(e) => {
107 log::warn!("per-vertex equation compile failed: {eq:?}: {e}");
108 }
109 }
110 }
111 self.compiled = CompiledBlock::from_nodes(nodes, evaluator.context_mut());
112 }
113
114 /// Compute one [`WarpVertex`] per mesh vertex, ready to upload to the GPU.
115 ///
116 /// Pre-condition: `evaluator` has just finished its per-frame phase so
117 /// motion outputs, audio levels, and `q*` channels are up to date.
118 ///
119 /// ## Parallelism
120 ///
121 /// When the preset has any per-vertex equations, the inner loop fans
122 /// out across rayon's global thread pool: each worker gets one cloned
123 /// [`MilkContext`] (via `map_init`) and chews through its slice of the
124 /// 768-vertex mesh. Per-MD2-spec, per-vertex equations have no
125 /// cross-vertex carryover, so the parallel order is invisible from the
126 /// outside. The init clone is ~1 µs (HashMap with per-frame state plus
127 /// math-function pointers), so amortised across hundreds of vertices per
128 /// worker it's free.
129 ///
130 /// Empty-equation presets stay on the simple sequential path: nothing
131 /// to evaluate, so threading would only add overhead.
132 pub fn compute(
133 &mut self,
134 mesh: &WarpMesh,
135 evaluator: &MilkEvaluator,
136 time: f32,
137 ) -> Vec<WarpVertex> {
138 let base_motion = read_motion(evaluator.context());
139
140 if self.compiled.is_empty() {
141 // Fast path: no per-vertex equations means every vertex shares
142 // the per-frame motion. Skip the scratch context entirely.
143 return mesh
144 .vertices
145 .iter()
146 .map(|v| WarpVertex {
147 pos_clip: v.pos_clip,
148 uv_warp: warp_uv_md2(v.uv_orig[0], v.uv_orig[1], v.rad, &base_motion, time),
149 })
150 .collect();
151 }
152
153 let base_ctx = evaluator.context().clone();
154 let base_q = read_q_snapshot(&base_ctx);
155 let compiled = &self.compiled;
156
157 // Per-vertex body. Restores per-frame motion / q* state so the n-th
158 // vertex sees the same starting context as the 0-th (per-vertex eqs
159 // that depend on cross-vertex carryover are undefined in MilkDrop and
160 // we explicitly do not implement those semantics). Captures
161 // `base_motion`, `base_q`, `compiled`, and `time` from the enclosing
162 // scope; takes the scratch context + the source vertex.
163 //
164 // `compiled.run(scratch)` picks the bytecode VM when the
165 // per_pixel block lowered cleanly (the common case), otherwise
166 // falls back to the evalexpr Node walk. Either way per-vertex
167 // failures are silently absorbed — the vertex inherits whatever
168 // state survived.
169 let body = |scratch: &mut MilkContext, v: &WarpMeshVertex| -> WarpVertex {
170 write_motion(scratch, &base_motion);
171 write_q_snapshot(scratch, &base_q);
172
173 scratch.set("x", v.uv_orig[0] as f64);
174 scratch.set("y", v.uv_orig[1] as f64);
175 scratch.set("rad", v.rad as f64);
176 scratch.set("ang", v.ang as f64);
177
178 compiled.run(scratch);
179
180 let motion = read_motion(scratch);
181 let uv_warp = warp_uv_md2(v.uv_orig[0], v.uv_orig[1], v.rad, &motion, time);
182 WarpVertex {
183 pos_clip: v.pos_clip,
184 uv_warp,
185 }
186 };
187
188 if compiled.len() >= PARALLEL_EQ_THRESHOLD {
189 // Many or expensive equations — fan out across cores. One context
190 // clone per worker thread, amortised across its slice of vertices.
191 mesh.vertices
192 .par_iter()
193 .map_init(|| base_ctx.clone(), body)
194 .collect()
195 } else {
196 // Few cheap equations — rayon's per-frame thread-pool overhead
197 // (per-worker `MilkContext` clone + work-stealing) costs more
198 // than the per-vertex eval saves. Stay sequential.
199 let mut scratch = base_ctx;
200 mesh.vertices
201 .iter()
202 .map(|v| body(&mut scratch, v))
203 .collect()
204 }
205 }
206
207 /// Number of equations currently compiled. Test/debug helper.
208 pub fn compiled_count(&self) -> usize {
209 self.compiled.len()
210 }
211}
212
213impl Default for WarpExecutor {
214 fn default() -> Self {
215 Self::new()
216 }
217}
218
219fn read_motion(ctx: &MilkContext) -> PerVertexMotion {
220 let f = |name: &str, default: f32| ctx.get_var(name).map(|v| v as f32).unwrap_or(default);
221 PerVertexMotion {
222 zoom: f("zoom", 1.0),
223 zoomexp: f("zoomexp", 1.0),
224 rot: f("rot", 0.0),
225 cx: f("cx", 0.5),
226 cy: f("cy", 0.5),
227 dx: f("dx", 0.0),
228 dy: f("dy", 0.0),
229 sx: f("sx", 1.0),
230 sy: f("sy", 1.0),
231 warp: f("warp", 0.0),
232 }
233}
234
235fn write_motion(ctx: &mut MilkContext, m: &PerVertexMotion) {
236 ctx.set("zoom", m.zoom as f64);
237 ctx.set("zoomexp", m.zoomexp as f64);
238 ctx.set("rot", m.rot as f64);
239 ctx.set("cx", m.cx as f64);
240 ctx.set("cy", m.cy as f64);
241 ctx.set("dx", m.dx as f64);
242 ctx.set("dy", m.dy as f64);
243 ctx.set("sx", m.sx as f64);
244 ctx.set("sy", m.sy as f64);
245 ctx.set("warp", m.warp as f64);
246}
247
248fn read_q_snapshot(ctx: &MilkContext) -> [f64; Q_CHANNEL_COUNT] {
249 // q1..q32 live in MilkContext's array-backed q_vars; index by slot
250 // and skip both the `"qN"` lookup table and the trait `get` route.
251 let mut out = [0.0; Q_CHANNEL_COUNT];
252 for (i, slot) in out.iter_mut().enumerate() {
253 *slot = ctx.q_get_idx(i);
254 }
255 out
256}
257
258fn write_q_snapshot(ctx: &mut MilkContext, q: &[f64; Q_CHANNEL_COUNT]) {
259 for (i, value) in q.iter().enumerate() {
260 ctx.q_set_idx(i, *value);
261 }
262}
263
264/// MilkDrop 2 warp UV formula.
265///
266/// Operates on normalized texture coordinates `(x, y) ∈ [0, 1]²`. The output
267/// is the UV from which the fragment shader will sample `prev_main`.
268///
269/// Order of operations (matches butterchurn / projectM convention, which both
270/// derive from `vis_milk2`):
271/// 1. Logarithmic zoom around `(cx, cy)`, with curvature controlled by `zoomexp`.
272/// 2. Rotation around `(cx, cy)` by `rot` radians.
273/// 3. Anisotropic stretch around `(cx, cy)` by `(sx, sy)`.
274/// 4. Translation by `(dx, dy)` (subtractive — `dx > 0` scrolls right).
275/// 5. Optional sinusoidal warp displacement scaled by `warp`.
276fn warp_uv_md2(x: f32, y: f32, rad: f32, m: &PerVertexMotion, time: f32) -> [f32; 2] {
277 let zoomexp = if m.zoomexp.abs() < 1e-6 {
278 1.0
279 } else {
280 m.zoomexp
281 };
282 let zoom_pow = zoomexp.powf(rad * 2.0 - 1.0);
283 let zoom2 = m.zoom.powf(zoom_pow).max(1e-6);
284 let inv_zoom2 = 1.0 / zoom2;
285
286 let mut u = (x - m.cx) * inv_zoom2 + m.cx;
287 let mut v = (y - m.cy) * inv_zoom2 + m.cy;
288
289 let dx = u - m.cx;
290 let dy = v - m.cy;
291 let cos_r = m.rot.cos();
292 let sin_r = m.rot.sin();
293 u = dx * cos_r - dy * sin_r + m.cx;
294 v = dx * sin_r + dy * cos_r + m.cy;
295
296 let inv_sx = if m.sx.abs() < 1e-6 { 1.0 } else { 1.0 / m.sx };
297 let inv_sy = if m.sy.abs() < 1e-6 { 1.0 } else { 1.0 / m.sy };
298 u = (u - m.cx) * inv_sx + m.cx;
299 v = (v - m.cy) * inv_sy + m.cy;
300
301 u -= m.dx;
302 v -= m.dy;
303
304 if m.warp.abs() > 1e-6 {
305 const WARP_SCALE: f32 = 0.0035;
306 let f = m.warp * WARP_SCALE;
307 u += f * (time * 0.913 + 10.0 * y).sin();
308 v += f * (time * 0.952 + 10.0 * x).sin();
309 }
310
311 [u, v]
312}
313
314#[cfg(test)]
315mod tests {
316 use super::*;
317
318 fn run(mesh: &WarpMesh, eqs: &[String], evaluator: &mut MilkEvaluator) -> Vec<WarpVertex> {
319 let mut exec = WarpExecutor::new();
320 exec.set_equations(evaluator, eqs);
321 exec.compute(mesh, evaluator, 0.0)
322 }
323
324 #[test]
325 fn identity_when_motion_is_neutral() {
326 let mesh = WarpMesh::new(4, 4, 1.0);
327 let mut evaluator = MilkEvaluator::new();
328 let warp = run(&mesh, &[], &mut evaluator);
329 assert_eq!(warp.len(), mesh.vertices.len());
330 for (i, v) in warp.iter().enumerate() {
331 let orig = mesh.vertices[i].uv_orig;
332 assert!(
333 (v.uv_warp[0] - orig[0]).abs() < 1e-5,
334 "u_warp mismatch at {i}"
335 );
336 assert!(
337 (v.uv_warp[1] - orig[1]).abs() < 1e-5,
338 "v_warp mismatch at {i}"
339 );
340 }
341 }
342
343 #[test]
344 fn zoom_pulls_corner_uvs_toward_center() {
345 let mesh = WarpMesh::new(3, 3, 1.0);
346 let mut evaluator = MilkEvaluator::new();
347 evaluator.context_mut().set_var("zoom", 2.0);
348 let warp = run(&mesh, &[], &mut evaluator);
349
350 let bl = warp[0].uv_warp;
351 assert!(bl[0] > 0.0 && bl[0] < 0.5);
352 assert!(bl[1] > 0.0 && bl[1] < 0.5);
353 }
354
355 #[test]
356 fn rotation_swaps_axes_at_90_degrees() {
357 let mesh = WarpMesh::new(3, 3, 1.0);
358 let mut evaluator = MilkEvaluator::new();
359 evaluator
360 .context_mut()
361 .set_var("rot", std::f64::consts::FRAC_PI_2);
362 let warp = run(&mesh, &[], &mut evaluator);
363 let right_mid = warp[3 + 2].uv_warp;
364 let dist_from_orig = ((right_mid[0] - 1.0).powi(2) + (right_mid[1] - 0.5).powi(2)).sqrt();
365 assert!(dist_from_orig > 0.4);
366 }
367
368 #[test]
369 fn per_vertex_equation_modifies_motion_per_vertex() {
370 let mesh = WarpMesh::new(3, 3, 1.0);
371 let mut evaluator = MilkEvaluator::new();
372 let eqs = vec!["zoom = 1 + rad".to_string()];
373 let warp = run(&mesh, &eqs, &mut evaluator);
374
375 let center = warp[3 + 1].uv_warp;
376 assert!((center[0] - 0.5).abs() < 1e-4);
377 assert!((center[1] - 0.5).abs() < 1e-4);
378
379 let corner = warp[0].uv_warp;
380 assert!(corner[0] > 0.0 && corner[0] < 0.5);
381 assert!(corner[1] > 0.0 && corner[1] < 0.5);
382 }
383
384 #[test]
385 fn vertices_are_independent_no_motion_carryover() {
386 // A bug-prone case: every vertex doubles `zoom`. Without motion
387 // snapshot restoration the n-th vertex would see zoom = 2^(n+1)
388 // (state leaking from previous vertices). With restoration every
389 // vertex starts from the per-frame zoom = 1 and ends at zoom = 2,
390 // producing the same uniform warp toward the center.
391 let mesh = WarpMesh::new(4, 4, 1.0);
392 let mut evaluator = MilkEvaluator::new();
393 let eqs = vec!["zoom = zoom * 2".to_string()];
394 let warp = run(&mesh, &eqs, &mut evaluator);
395
396 // Bottom-left corner: identical answer regardless of vertex order.
397 // We re-run with the same mesh/eq and confirm bit-exact equality
398 // for every vertex — proves there's no order-dependent carryover.
399 let warp2 = run(&mesh, &eqs, &mut evaluator);
400 assert_eq!(warp.len(), warp2.len());
401 for i in 0..warp.len() {
402 assert_eq!(
403 warp[i].uv_warp, warp2[i].uv_warp,
404 "vertex {i} is order-dependent: {:?} vs {:?}",
405 warp[i].uv_warp, warp2[i].uv_warp
406 );
407 }
408
409 // Spot-check: corner with rad ≈ 1 should warp toward center given
410 // zoom = 2 (consistent with `zoom_pulls_corner_uvs_toward_center`).
411 let bl = warp[0].uv_warp;
412 assert!(bl[0] > 0.0 && bl[0] < 0.5);
413 assert!(bl[1] > 0.0 && bl[1] < 0.5);
414 }
415
416 #[test]
417 fn set_equations_is_idempotent_across_calls() {
418 let mut evaluator = MilkEvaluator::new();
419 let mut exec = WarpExecutor::new();
420 let eqs = vec!["zoom = 1 + rad".to_string()];
421 exec.set_equations(&mut evaluator, &eqs);
422 assert_eq!(exec.compiled_count(), 1);
423
424 // Re-setting the same equations should not recompile.
425 exec.set_equations(&mut evaluator, &eqs);
426 assert_eq!(exec.compiled_count(), 1);
427 }
428
429 #[test]
430 fn q_channel_drives_per_vertex_zoom() {
431 // The per-frame phase (simulated here by writing q1 directly on the
432 // evaluator) must be visible to per-vertex equations. Without the
433 // q-snapshot/restore in WarpExecutor::compute, the first vertex would
434 // overwrite q1 (or its zoom dependency) for subsequent vertices.
435 let mesh = WarpMesh::new(3, 3, 1.0);
436 let mut evaluator = MilkEvaluator::new();
437 evaluator.context_mut().set_var("q1", 1.0); // simulate per-frame write
438 let eqs = vec!["zoom = 1 + q1".to_string()];
439 let warp = run(&mesh, &eqs, &mut evaluator);
440
441 // Center vertex: zoom = 2, identity around (0.5, 0.5) — but with
442 // zoom = 2 the corner (0,0) should pull toward center.
443 let bl = warp[0].uv_warp;
444 assert!(
445 bl[0] > 0.0 && bl[0] < 0.5,
446 "q1=1 should make zoom=2 and pull bl corner; got u_warp={}",
447 bl[0]
448 );
449
450 // With q1=0 the same eq reduces to zoom=1 (identity).
451 evaluator.context_mut().set_var("q1", 0.0);
452 let warp2 = run(&mesh, &eqs, &mut evaluator);
453 let bl2 = warp2[0].uv_warp;
454 assert!(
455 (bl2[0] - 0.0).abs() < 1e-4,
456 "q1=0 should yield zoom=1 (identity); got u_warp={}",
457 bl2[0]
458 );
459 }
460
461 #[test]
462 fn q_snapshot_isolates_per_vertex_writes() {
463 // A per-vertex eq that writes q1 must NOT contaminate the next
464 // vertex's view of q1: every vertex should see the original
465 // per-frame q1 value as its starting point.
466 let mesh = WarpMesh::new(3, 3, 1.0);
467 let mut evaluator = MilkEvaluator::new();
468 evaluator.context_mut().set_var("q1", 0.5);
469 let eqs = vec![
470 "q1 = q1 * 2".to_string(), // per-vertex doubles q1
471 "zoom = 1 + q1".to_string(), // zoom depends on (modified) q1
472 ];
473 let warp = run(&mesh, &eqs, &mut evaluator);
474 // After the first vertex, q1 in the scratch becomes 1.0. If
475 // restoration didn't happen, the second vertex would see q1=1.0
476 // (instead of the per-frame q1=0.5) and produce a different warp.
477 // We re-run to confirm determinism: identical mesh + identical
478 // per-frame state must yield bit-identical UVs.
479 let warp2 = run(&mesh, &eqs, &mut evaluator);
480 for i in 0..warp.len() {
481 assert_eq!(
482 warp[i].uv_warp, warp2[i].uv_warp,
483 "vertex {i} non-deterministic"
484 );
485 }
486 // Per-frame q1 itself is unchanged outside the executor (executor
487 // only touches its own scratch).
488 assert_eq!(evaluator.context().get_var("q1"), Some(0.5));
489 }
490
491 #[test]
492 fn compile_failure_is_dropped_silently() {
493 let mut evaluator = MilkEvaluator::new();
494 let mut exec = WarpExecutor::new();
495 let eqs = vec![
496 "zoom = 1 + rad".to_string(),
497 "this is not valid !!!".to_string(),
498 "rot = 0.5".to_string(),
499 ];
500 exec.set_equations(&mut evaluator, &eqs);
501 // 2 of 3 compile.
502 assert_eq!(exec.compiled_count(), 2);
503 }
504}