onedrop_eval/evaluator/joiner.rs
1//! Multi-equation join logic and top-level call interceptors.
2//!
3//! Two concerns sit here:
4//!
5//! 1. [`MilkEvaluator::eval_equation_list`] — paren-balance-aware join over a
6//! `.milk` preset's flat equation list. Consecutive equations that aren't
7//! paren-balanced (or end on a dangling binary operator, or look like a
8//! continuation of the previous line) get glued with `;` before
9//! evaluation.
10//! 2. [`MilkEvaluator::eval_processed_with_loops`] — top-level interceptor
11//! for `loop(N, body)`, `exec2`, `exec3`, and `while(body)`. evalexpr
12//! can't express these EEL2 idioms natively (their bodies are `;`-chains
13//! inside what looks like a function-call arg list), so we strip the
14//! call boundary and walk the body ourselves.
15
16use evalexpr::eval_with_context_mut;
17
18use crate::error::{EvalError, Result};
19
20use super::MilkEvaluator;
21use super::preprocess::{MAX_LOOP_ITER, strip_line_comments};
22use super::{is_ident_byte, match_close_paren, split_top_level_commas};
23
24/// Aggregate stats returned by [`MilkEvaluator::eval_equation_list`].
25///
26/// `total` is the input length so callers can compute per-equation failure
27/// rates; `failed` counts whole blocks that errored, so a 5-line loop that
28/// fails counts as one failure.
29#[derive(Debug, Default, Clone)]
30pub struct EvaluationStats {
31 pub total: usize,
32 pub failed: usize,
33 pub first_error: String,
34}
35
36impl MilkEvaluator {
37 /// Evaluate a flat list of equations, joining consecutive
38 /// paren-unbalanced lines (MD2 `loop(...)` idiom). Returns aggregate
39 /// stats; the evaluator's context retains every variable touched by
40 /// the run.
41 pub fn eval_equation_list(&mut self, eqs: &[String]) -> EvaluationStats {
42 let mut stats = EvaluationStats {
43 total: eqs.len(),
44 ..Default::default()
45 };
46 let mut buffer = String::new();
47 let mut depth = 0i32;
48 for (idx, eq) in eqs.iter().enumerate() {
49 // Strip `//` comments per-equation BEFORE the buffer-join. Each
50 // `per_frame_N=` value in MD2 is one line of EEL source, so a
51 // `//` comment runs to the end of that equation. Stripping
52 // post-join would let the `//` consume every following equation
53 // up to the end of the buffer (no `\n` is inserted by the
54 // joiner), wiping the tail of every `loop(N, …; //... ; …)`
55 // block. `preprocess_expression` still re-strips inside `eval()`
56 // for the single-equation entry point — the second pass is
57 // idempotent because the per-eq strip already left nothing to
58 // remove.
59 let stripped = strip_line_comments(eq);
60 let trimmed = stripped.trim();
61 if trimmed.is_empty() {
62 continue;
63 }
64 if !buffer.is_empty() {
65 // Always use `;` as the join separator. The dominant
66 // multi-line idiom in MD2 is `loop(N, stmt; stmt; stmt)`;
67 // with N-fold unrolling, the body is sliced out and
68 // re-evaluated as a statement chain, so `;` is what
69 // evalexpr needs to see. Skip the separator when the
70 // buffer's tail is already a paren or punctuator that
71 // doesn't need one.
72 let last = buffer.trim_end().bytes().last();
73 // Inside an unclosed `(`, a leading `+`/`-` is unambiguously
74 // binary (no statement boundary has been seen), so suppress
75 // the `;` that would otherwise be rewritten to `,` by
76 // [`super::rewriters::rewrite_semis_in_call_args`] and
77 // corrupt the call's arg count.
78 let unbalanced_binop_continuation =
79 depth > 0 && matches!(trimmed.bytes().next(), Some(b'+') | Some(b'-'));
80 // At depth 0, a leading `+`/`-` on a line that contains no
81 // top-level `=` is almost certainly an orphan binop the
82 // author split across `per_pixel_N=` lines (corpus shape:
83 // `per_pixel_4=rot=cos(rad)` / `per_pixel_5=+sin(rad)`).
84 // The `=`-presence check rules out legitimate `b = -1`
85 // standalone assignments — those still flush independently.
86 let closed_paren_orphan_continuation =
87 depth <= 0 && starts_with_plus_minus_orphan(trimmed);
88 let split_lhs_assignment = starts_with_split_assignment(trimmed);
89 // Split function-call only when the previous tail is an ident
90 // byte; a standalone `(group)` line after a `;` must still
91 // flush cleanly.
92 let split_call_paren =
93 starts_with_open_paren(trimmed) && matches!(last, Some(b) if is_ident_byte(b));
94 let needs_sep = !matches!(
95 last,
96 Some(b',')
97 | Some(b'(')
98 | Some(b';')
99 | Some(b'+')
100 | Some(b'-')
101 | Some(b'*')
102 | Some(b'/')
103 | Some(b'%')
104 ) && !starts_with_binop(trimmed)
105 && !unbalanced_binop_continuation
106 && !closed_paren_orphan_continuation
107 && !split_lhs_assignment
108 && !split_call_paren;
109 if needs_sep {
110 buffer.push(';');
111 }
112 }
113 buffer.push_str(trimmed);
114 depth = paren_balance(&buffer);
115 // Peek at the next non-empty equation. If it starts with a
116 // binary operator (`*`/`/`/`%`) or an assignment/call
117 // continuation marker, defer the flush so the joiner can stitch
118 // the orphan onto the current buffer instead of evaluating it
119 // standalone. Corpus shape:
120 //
121 // ```text
122 // per_pixel_4=rot=bnot(above(x,.5)+((y*q8)%q7))
123 // per_pixel_5=*cos(rad+3.14*if(...))*q4;
124 // ```
125 //
126 // Without the lookahead, eq #4 flushes (it's paren-balanced) and
127 // eq #5 arrives in an empty buffer and dies as "operator
128 // expected 1 args, got 0". With it, the flush is held until eq
129 // #5 has been appended and the composite block evaluates as
130 // `rot=bnot(...)*cos(...)*q4`.
131 if depth <= 0
132 && !ends_with_dangling_binop(&buffer)
133 && !next_eq_is_continuation(eqs, idx)
134 {
135 let block = sanitize_joined_block(&buffer);
136 if let Err(e) = self.eval(&block) {
137 stats.failed += 1;
138 if stats.first_error.is_empty() {
139 stats.first_error = format!("{e:?}");
140 }
141 }
142 buffer.clear();
143 }
144 }
145 if !buffer.is_empty() {
146 let block = sanitize_joined_block(&buffer);
147 if let Err(e) = self.eval(&block) {
148 stats.failed += 1;
149 if stats.first_error.is_empty() {
150 stats.first_error = format!("{e:?}");
151 }
152 }
153 }
154 stats
155 }
156
157 /// Evaluate `expr` (already preprocessed), intercepting top-level
158 /// `loop(N, body)`, `exec2(a, b)`, `exec3(a, b, c)` and
159 /// `while(body)` calls. `original` is kept for error reporting.
160 pub(super) fn eval_processed_with_loops(&mut self, expr: &str, original: &str) -> Result<f64> {
161 let trimmed = expr.trim().trim_end_matches(';').trim();
162 if trimmed.is_empty() {
163 return Ok(0.0);
164 }
165
166 if let Some((before, n_str, body_str, after)) = find_top_level_loop_call(trimmed) {
167 let before_clean = before.trim().trim_end_matches(';').trim();
168 if !before_clean.is_empty() {
169 self.eval_processed_with_loops(before_clean, original)?;
170 }
171 let n_val = self.eval_processed_with_loops(n_str.trim(), original)?;
172 let n = if n_val.is_finite() {
173 (n_val as i64).clamp(0, MAX_LOOP_ITER)
174 } else {
175 0
176 };
177 let body_clean = body_str
178 .trim()
179 .trim_start_matches(';')
180 .trim_end_matches(';')
181 .trim();
182 if !body_clean.is_empty() {
183 // Pre-compile the body once and run it N times. The body has
184 // already been through `preprocess_expression` (the caller
185 // owns the preprocessed `expr` slice this `body_clean` is
186 // sliced from), so `build_operator_tree` consumes it
187 // directly. When the body lowers cleanly to bytecode
188 // (every common init-loop shape — `gmegabuf_set(i, 0); i = i
189 // + 1`, etc.), the inner loop is just `bc.run` against the
190 // mutable context, skipping evalexpr's per-iteration parse
191 // + tree walk that otherwise dominates `loop(1_000_000, ...)`
192 // compile-time eval. Nested loop/exec/while inside the
193 // body fall through to the recursive `eval_processed_with_
194 // loops` path so their semantics stay intact.
195 if !body_has_nested_loop(body_clean)
196 && let Ok(node) =
197 evalexpr::build_operator_tree::<evalexpr::DefaultNumericTypes>(body_clean)
198 {
199 let nodes = [node];
200 if let Ok(bc) =
201 crate::bytecode::CompiledBytecode::try_compile(&nodes, &mut self.context)
202 {
203 for _ in 0..n {
204 bc.run(&mut self.context);
205 }
206 } else {
207 // Body parsed but didn't lower — replay the Node N
208 // times via the leaf evalexpr walker. Still skips
209 // the per-iteration re-parse.
210 let node = &nodes[0];
211 for _ in 0..n {
212 let _ = node.eval_with_context_mut(&mut self.context);
213 }
214 }
215 } else {
216 // Nested loop / unparseable body — fall back to the
217 // recursive interceptor per iteration.
218 for _ in 0..n {
219 self.eval_processed_with_loops(body_clean, original)?;
220 }
221 }
222 }
223 let after_clean = after.trim().trim_start_matches(';').trim();
224 if !after_clean.is_empty() {
225 return self.eval_processed_with_loops(after_clean, original);
226 }
227 return Ok(0.0);
228 }
229
230 // `exec2(a, b)` / `exec3(a, b, c)`. EEL2's sequential evaluators:
231 // each arg is evaluated against the shared context in order, and
232 // the value of the last arg is returned. Args themselves may be
233 // `;`-chains.
234 if let Some((before, args, after)) = find_top_level_exec_call(trimmed) {
235 let before_clean = before.trim().trim_end_matches(';').trim();
236 if !before_clean.is_empty() {
237 self.eval_processed_with_loops(before_clean, original)?;
238 }
239 let mut last_val = 0.0;
240 for arg in &args {
241 let arg_clean = arg
242 .trim()
243 .trim_start_matches(';')
244 .trim_end_matches(';')
245 .trim();
246 if !arg_clean.is_empty() {
247 last_val = self.eval_processed_with_loops(arg_clean, original)?;
248 }
249 }
250 let after_clean = after.trim().trim_start_matches(';').trim();
251 if !after_clean.is_empty() {
252 return self.eval_processed_with_loops(after_clean, original);
253 }
254 return Ok(last_val);
255 }
256
257 // `while(body)`. EEL2's do-while loop; we run a single pass. The
258 // corpus pattern is `while(exec2(stmts, 0))` where the second arg
259 // returns 0 to terminate after one iteration; single-pass matches
260 // that intent and avoids the risk of an unbounded loop.
261 if let Some((before, body, after)) = find_top_level_while_call(trimmed) {
262 let before_clean = before.trim().trim_end_matches(';').trim();
263 if !before_clean.is_empty() {
264 self.eval_processed_with_loops(before_clean, original)?;
265 }
266 let body_clean = body
267 .trim()
268 .trim_start_matches(';')
269 .trim_end_matches(';')
270 .trim();
271 let body_val = if !body_clean.is_empty() {
272 self.eval_processed_with_loops(body_clean, original)?
273 } else {
274 0.0
275 };
276 let after_clean = after.trim().trim_start_matches(';').trim();
277 if !after_clean.is_empty() {
278 return self.eval_processed_with_loops(after_clean, original);
279 }
280 return Ok(body_val);
281 }
282
283 // Leaf: hand the (loop-free) expression to evalexpr.
284 match eval_with_context_mut(trimmed, &mut self.context) {
285 Ok(value) => match value {
286 evalexpr::Value::Float(f) => Ok(f),
287 evalexpr::Value::Int(i) => Ok(i as f64),
288 evalexpr::Value::Boolean(b) => Ok(if b { 1.0 } else { 0.0 }),
289 evalexpr::Value::Empty => Ok(0.0),
290 _ => Err(EvalError::TypeError {
291 expected: "number".to_string(),
292 got: format!("{:?}", value),
293 }),
294 },
295 Err(e) => Err(EvalError::SyntaxError {
296 expression: original.to_string(),
297 reason: e.to_string(),
298 }),
299 }
300 }
301}
302
303/// Return `true` when the next non-empty equation after `idx` looks like a
304/// continuation of the current buffer rather than a fresh statement, so the
305/// buffer flush should be deferred until it's been appended.
306///
307/// Continuation triggers: leading `*`/`/`/`%` (orphan binop), leading
308/// `+`/`-` on a line with no top-level `=` (closed-paren orphan binop),
309/// leading `=` (not `==`) — split-LHS assignment, leading `(` — split
310/// function-call site. Blank equations are skipped first.
311fn next_eq_is_continuation(eqs: &[String], idx: usize) -> bool {
312 for next in eqs.iter().skip(idx + 1) {
313 let stripped = strip_line_comments(next);
314 let trimmed = stripped.trim();
315 if trimmed.is_empty() {
316 continue;
317 }
318 return starts_with_binop(trimmed)
319 || starts_with_plus_minus_orphan(trimmed)
320 || starts_with_split_assignment(trimmed)
321 || starts_with_open_paren(trimmed);
322 }
323 false
324}
325
326/// Collapse junk-separator pairs that the join step can produce when an
327/// equation closes a multi-line block. `;)` and `;,` are never valid in
328/// evalexpr's grammar and are always artifacts of our `;`-join over a
329/// previously-truncated equation that ended with `,`/`(`.
330pub(super) fn sanitize_joined_block(s: &str) -> String {
331 let mut out = s.replace(";)", ")").replace(";,", ",");
332 while out.contains(",;") {
333 out = out.replace(",;", ",");
334 }
335 while out.contains("(;") {
336 out = out.replace("(;", "(");
337 }
338 // Collapse `;;` runs defensively — the per-equation strip filters
339 // comment-only equations before they reach the buffer, but evalexpr's
340 // "Empty operand" diagnostics on stray `;;` are hard to debug.
341 while out.contains(";;") {
342 out = out.replace(";;", ";");
343 }
344 out
345}
346
347/// Count `(` − `)` across `s`. Byte-accurate for ASCII and conservative for
348/// UTF-8 (multibyte chars never collide with the paren ASCII codepoints).
349fn paren_balance(s: &str) -> i32 {
350 let mut depth = 0i32;
351 for b in s.bytes() {
352 match b {
353 b'(' => depth += 1,
354 b')' => depth -= 1,
355 _ => {}
356 }
357 }
358 depth
359}
360
361/// Return `true` when the first non-whitespace byte of `s` is a binary
362/// operator with no unary form (`*`, `/`, `%`). Leading `+`/`-` is excluded
363/// because they're also legitimate unary signs on a fresh statement.
364fn starts_with_binop(s: &str) -> bool {
365 let trimmed = s.trim_start();
366 let Some(c) = trimmed.bytes().next() else {
367 return false;
368 };
369 matches!(c, b'*' | b'/' | b'%')
370}
371
372/// Return `true` when the first non-whitespace byte of `s` is `+` or `-`
373/// AND `s` contains no top-level assignment `=` (single `=`, not `==` or a
374/// compound op). Corpus authors split a single arithmetic expression
375/// across `per_pixel_N=` lines whose continuation starts with `+`/`-`:
376///
377/// ```text
378/// per_pixel_4=rot=cos(rad*q4)
379/// per_pixel_5=+sin(rad*q5)
380/// ```
381///
382/// Treating these as orphan binops mirrors the existing `*`/`/`/`%`
383/// path. The "no top-level `=`" guard rules out legitimate `b = -1` /
384/// `c = +x` standalone assignments — those flush independently because
385/// their leading byte is the LHS identifier, not the sign.
386pub(super) fn starts_with_plus_minus_orphan(s: &str) -> bool {
387 let trimmed = s.trim_start();
388 let Some(c) = trimmed.bytes().next() else {
389 return false;
390 };
391 if !matches!(c, b'+' | b'-') {
392 return false;
393 }
394 !contains_top_level_assignment(trimmed)
395}
396
397/// `true` if `s` contains a top-level single `=` (assignment), as opposed
398/// to `==` / `<=` / `>=` / `!=` / `+=` / `-=` / `*=` / `/=` / `%=`.
399fn contains_top_level_assignment(s: &str) -> bool {
400 let bytes = s.as_bytes();
401 let mut depth = 0i32;
402 let mut i = 0;
403 while i < bytes.len() {
404 match bytes[i] {
405 b'(' => depth += 1,
406 b')' => depth -= 1,
407 b'=' if depth == 0 => {
408 let next = bytes.get(i + 1).copied();
409 let prev = if i > 0 { Some(bytes[i - 1]) } else { None };
410 let is_double = next == Some(b'=');
411 let is_compound = matches!(
412 prev,
413 Some(b'=' | b'<' | b'>' | b'!' | b'+' | b'-' | b'*' | b'/' | b'%')
414 );
415 if !is_double && !is_compound {
416 return true;
417 }
418 if is_double {
419 i += 1;
420 }
421 }
422 _ => {}
423 }
424 i += 1;
425 }
426 false
427}
428
429/// `true` when `s` starts with `=` (single, not `==`), indicating the
430/// previous equation's tail is the LHS of an assignment split across .milk
431/// lines.
432fn starts_with_split_assignment(s: &str) -> bool {
433 let bytes = s.trim_start().as_bytes();
434 bytes.first() == Some(&b'=') && bytes.get(1) != Some(&b'=')
435}
436
437/// `true` when `s` starts with `(`, indicating a function call whose name
438/// and arg list were split across .milk lines.
439fn starts_with_open_paren(s: &str) -> bool {
440 s.trim_start().as_bytes().first() == Some(&b'(')
441}
442
443fn ends_with_dangling_binop(s: &str) -> bool {
444 let trimmed = s.trim_end();
445 let Some(c) = trimmed.bytes().last() else {
446 return false;
447 };
448 // `(` and `,` are already covered by `paren_balance` so only watch for
449 // arithmetic operators that don't bump the paren counter.
450 if !matches!(c, b'+' | b'-' | b'*' | b'/' | b'%') {
451 return false;
452 }
453 // Reject `1e+`/`1e-` (scientific exponent) and the rare `++`/`--` by
454 // requiring the preceding non-space byte to neither match the dangling
455 // operator nor be `e`/`E`.
456 let prev = trimmed
457 .as_bytes()
458 .iter()
459 .rev()
460 .skip(1)
461 .find(|&&b| b != b' ' && b != b'\t')
462 .copied();
463 !matches!(prev, Some(p) if p == c || p == b'e' || p == b'E')
464}
465
466/// `true` if `s` contains an EEL2 control-flow builtin name (`loop`,
467/// `while`, `exec2`, `exec3`) as an identifier. Used as a cheap
468/// conservative guard before the pre-compile-the-body optimisation in
469/// the `loop(N, body)` interceptor: when the body has a nested
470/// loop/while/exec, replaying it as a single `Node` would bypass the
471/// interceptor's recursive descent and break those semantics, so we
472/// fall back to the slow path.
473///
474/// The check is byte-level and ignores word boundaries on purpose —
475/// `loops_var` or `while_count` would also match and trigger fallback.
476/// That's safe (over-conservative) and avoids a more expensive scan.
477fn body_has_nested_loop(s: &str) -> bool {
478 s.contains("loop") || s.contains("while") || s.contains("exec2") || s.contains("exec3")
479}
480
481/// Locate a top-level `loop(<n_expr>, <body>)` call in `s`. Returns
482/// `(before, n_expr, body, after)` slices into `s`, or `None` if no
483/// well-formed top-level loop exists.
484fn find_top_level_loop_call(s: &str) -> Option<(&str, &str, &str, &str)> {
485 let bytes = s.as_bytes();
486 let mut depth = 0usize;
487 let mut i = 0usize;
488 while i < bytes.len() {
489 match bytes[i] {
490 b'(' => depth += 1,
491 b')' => depth = depth.saturating_sub(1),
492 b'l' | b'L' if depth == 0 => {
493 if i + 4 <= bytes.len() && bytes[i..i + 4].eq_ignore_ascii_case(b"loop") {
494 let prev_ok = i == 0 || !is_ident_byte(bytes[i - 1]);
495 if prev_ok && (i + 4 == bytes.len() || !is_ident_byte(bytes[i + 4])) {
496 let mut j = i + 4;
497 while j < bytes.len() && matches!(bytes[j], b' ' | b'\t' | b'\n') {
498 j += 1;
499 }
500 if j < bytes.len() && bytes[j] == b'(' {
501 let open = j;
502 let close = match_close_paren(bytes, open)?;
503 let inner = &s[open + 1..close];
504 // Split on the first top-level `,`. Also accept
505 // `;` as a fallback for `loop(N; body)` typos
506 // MD2's relaxed parser forgave.
507 let split = find_top_level_comma(inner.as_bytes())
508 .or_else(|| find_top_level_semi(inner.as_bytes()))?;
509 let n_str = &inner[..split];
510 let body_str = &inner[split + 1..];
511 let before = &s[..i];
512 let after = &s[close + 1..];
513 return Some((before, n_str, body_str, after));
514 }
515 }
516 }
517 }
518 _ => {}
519 }
520 i += 1;
521 }
522 None
523}
524
525/// Locate the first top-level `,` in `bytes`. Returns the byte offset or
526/// `None` if every comma is enclosed by parens.
527fn find_top_level_comma(bytes: &[u8]) -> Option<usize> {
528 let mut depth = 0usize;
529 for (i, b) in bytes.iter().enumerate() {
530 match *b {
531 b'(' => depth += 1,
532 b')' => depth = depth.saturating_sub(1),
533 b',' if depth == 0 => return Some(i),
534 _ => {}
535 }
536 }
537 None
538}
539
540/// Fallback splitter for `loop(N; body)`-shaped corpus typos.
541fn find_top_level_semi(bytes: &[u8]) -> Option<usize> {
542 let mut depth = 0usize;
543 for (i, b) in bytes.iter().enumerate() {
544 match *b {
545 b'(' => depth += 1,
546 b')' => depth = depth.saturating_sub(1),
547 b';' if depth == 0 => return Some(i),
548 _ => {}
549 }
550 }
551 None
552}
553
554/// Locate a top-level call `name(a, b, …)` and split it into
555/// (before, args, after). Returns `None` if no qualifying call exists at
556/// depth 0.
557fn find_top_level_named_call<'a>(
558 s: &'a str,
559 name: &[u8],
560) -> Option<(&'a str, Vec<&'a str>, &'a str)> {
561 let bytes = s.as_bytes();
562 let mut depth = 0usize;
563 let mut i = 0usize;
564 while i < bytes.len() {
565 match bytes[i] {
566 b'(' => depth += 1,
567 b')' => depth = depth.saturating_sub(1),
568 c if depth == 0
569 && i + name.len() <= bytes.len()
570 && c.eq_ignore_ascii_case(&name[0])
571 && bytes[i..i + name.len()].eq_ignore_ascii_case(name) =>
572 {
573 let prev_ok = i == 0 || !is_ident_byte(bytes[i - 1]);
574 let next = i + name.len();
575 if prev_ok && (next == bytes.len() || !is_ident_byte(bytes[next])) {
576 let mut j = next;
577 while j < bytes.len() && matches!(bytes[j], b' ' | b'\t' | b'\n') {
578 j += 1;
579 }
580 if j < bytes.len() && bytes[j] == b'(' {
581 let open = j;
582 let close = match_close_paren(bytes, open)?;
583 let inner = &s[open + 1..close];
584 let args = split_top_level_commas(inner);
585 let before = &s[..i];
586 let after = &s[close + 1..];
587 return Some((before, args, after));
588 }
589 }
590 }
591 _ => {}
592 }
593 i += 1;
594 }
595 None
596}
597
598/// Find a top-level `exec2(a, b)` or `exec3(a, b, c)`.
599fn find_top_level_exec_call(s: &str) -> Option<(&str, Vec<&str>, &str)> {
600 find_top_level_named_call(s, b"exec2")
601 .filter(|(_, args, _)| args.len() == 2)
602 .or_else(|| find_top_level_named_call(s, b"exec3").filter(|(_, args, _)| args.len() == 3))
603}
604
605/// Find a top-level `while(body)`. Single-pass evaluation (see module-level
606/// docs).
607fn find_top_level_while_call(s: &str) -> Option<(&str, &str, &str)> {
608 let (before, args, after) = find_top_level_named_call(s, b"while")?;
609 if args.is_empty() {
610 return None;
611 }
612 Some((before, args[0], after))
613}