onedrop_hlsl/
lib.rs

1//! HLSL to WGSL Translation
2//!
3//! Pragmatic, MilkDrop-2-targeted translator. The MD2 user shader body lives
4//! inside a `shader_body { ... }` block, samples the previous frame via
5//! `tex2D` / `GetPixel` / `GetBlur1..3`, and uses HLSL-style typed local
6//! declarations (`float2 uv2;`). The translator turns that into a WGSL
7//! fragment-body fragment that the codegen wrapper can paste inside its
8//! `fs_main`.
9//!
10//! The rewrites are still string-driven (no AST) — but they understand the
11//! MD2 conventions enough to land the dominant cases.
12
13pub mod ast;
14pub mod lex;
15pub mod parse;
16pub mod rewrite;
17mod texture_plan;
18pub mod types;
19
20use regex::Regex;
21use std::sync::LazyLock;
22use thiserror::Error;
23
24pub use texture_plan::{
25    MAX_USER_TEXTURE_SLOTS, TextureBindingPlan, TextureSlot, UserSamplerRef, scan_user_samplers,
26    user_texture_binding_name,
27};
28pub use types::{SymbolTable, WgslType};
29
30use std::fmt::Write as _;
31use texture_plan::{noise_sampler_for, replace_texture_sampling_with_plan};
32
33// ---------------------------------------------------------------------------
34// MD2 outer-wrapper stripping
35// ---------------------------------------------------------------------------
36
37/// `shader_body` keyword optionally followed by whitespace/newlines and `{`.
38/// We then balance braces ourselves to recover the body — a regex alone
39/// can't do nested-brace balancing reliably.
40static SHADER_BODY_OPEN: LazyLock<Regex> =
41    LazyLock::new(|| Regex::new(r"shader_body\s*\{").unwrap());
42
43/// MD2 ships warp/comp shaders wrapped in a `shader_body { ... }` block. The
44/// codegen wrapper pastes the user code inside its own `fs_main { ... }`, so
45/// the outer wrapper has to come off first — otherwise WGSL sees a stray
46/// identifier (`shader_body`) followed by `{` and fails with
47/// `expected assignment or increment/decrement, found "{"`.
48///
49/// If the input has no `shader_body` wrapper (synthetic test shaders, or a
50/// preset that already inlines the body), the input is returned unchanged.
51fn strip_shader_body_wrapper(src: &str) -> String {
52    let Some(open) = SHADER_BODY_OPEN.find(src) else {
53        return src.to_string();
54    };
55    let body_start = open.end();
56    let bytes = src.as_bytes();
57    let mut depth = 1usize;
58    let mut i = body_start;
59    while i < bytes.len() {
60        match bytes[i] {
61            b'{' => depth += 1,
62            b'}' => {
63                depth -= 1;
64                if depth == 0 {
65                    // Replace the whole `shader_body { ... }` span (incl. the
66                    // closing brace) with just the inner body — keeps any
67                    // trailing comment / whitespace after it intact.
68                    let mut out = String::with_capacity(src.len());
69                    out.push_str(&src[..open.start()]);
70                    out.push_str(&src[body_start..i]);
71                    out.push_str(&src[i + 1..]);
72                    return out;
73                }
74            }
75            // Skip over `//` line comments and `/* */` block comments so a
76            // brace inside a comment doesn't confuse the depth counter.
77            b'/' if i + 1 < bytes.len() && bytes[i + 1] == b'/' => {
78                while i < bytes.len() && bytes[i] != b'\n' {
79                    i += 1;
80                }
81                continue;
82            }
83            b'/' if i + 1 < bytes.len() && bytes[i + 1] == b'*' => {
84                i += 2;
85                while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
86                    i += 1;
87                }
88                i += 2;
89                continue;
90            }
91            _ => {}
92        }
93        i += 1;
94    }
95    // Unbalanced braces — leave it alone and let naga produce the error.
96    src.to_string()
97}
98
99// ---------------------------------------------------------------------------
100// Pre-compiled regex patterns
101// ---------------------------------------------------------------------------
102
103/// Strips leading zeros from integer literals (HLSL allows `02` for `2`,
104/// WGSL rejects them with `invalid numeric literal format`). Targets only
105/// integer literals — `0.5` and `0` and `100` are untouched because the
106/// pattern requires `\b0+` followed by another decimal digit (so `0.` and
107/// `0)` never match).
108static LEADING_ZERO_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b0+([0-9])").unwrap());
109
110static SEMANTICS_REGEX: LazyLock<Regex> =
111    LazyLock::new(|| Regex::new(r":\s*[A-Z_][A-Z0-9_]*").unwrap());
112
113/// HLSL typed local declaration, post type-substitution. The whole `<TYPE>
114/// <decls>;` statement is captured; the declarator list is then expanded
115/// into one WGSL `var` per name. Examples:
116///   `f32 gx1 = a;`                →  `var gx1: f32 = a;`
117///   `vec2<f32> uv2;`              →  `var uv2: vec2<f32>;`
118///   `vec3<f32> ret1, neu, crisp;` →  `var ret1: vec3<f32>; var neu: vec3<f32>; var crisp: vec3<f32>;`
119///
120/// Anchored to the start of a (multi-line-aware) line so we don't confuse the
121/// type tokens that appear as function return types or constructor names.
122/// The captured group excludes both `;` (the terminator) and `{` (function
123/// body open brace) — the latter stops the greedy match cold when the
124/// pattern would otherwise swallow a function signature plus part of its
125/// body up to the first `;` inside. The closure also calls
126/// [`is_function_signature`] for the residual case where the regex still
127/// matches (e.g., one-liner functions).
128static LOCAL_DECL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
129    Regex::new(
130        r"(?m)^(\s*)(f32|i32|bool|vec2<f32>|vec3<f32>|vec4<f32>|vec2<bool>|vec3<bool>|vec4<bool>|mat4x4<f32>|mat3x3<f32>|mat2x2<f32>)\s+([^;{]+);",
131    )
132    .unwrap()
133});
134
135/// Normalise `;<space>TYPE` to `;\nTYPE` so the line-anchored
136/// [`LOCAL_DECL_REGEX`] sees each declaration on its own line. The Isosceles
137/// preset (and a handful of MD2 packs that compress kaleidoscope state) put
138/// multiple typed locals on a single source line:
139///
140/// ```text
141/// float2 cntr = float2(q13,q14); float sin = q11; float cos = q12; float scale = q15;
142/// ```
143///
144/// Without this pre-pass, only the first decl converted to a `var` — the
145/// rest stayed as raw HLSL and tripped naga with
146/// `expected assignment or increment/decrement; found 'sin'`. Safe for
147/// `for(f32 i = 0; …)` because the first `;` inside the parens is followed
148/// by an expression (the loop condition), never by a type keyword.
149///
150/// We also split when the type is glued to `{` (function-body opening brace,
151/// conditional/loop body), and recognise vec types ending in `>`. A naive
152/// `\b` boundary check fails after `vec2<f32>` because `>` is already a
153/// non-word char; vec-typed declarations grouped behind any prior `; <decl>`
154/// would otherwise stay un-rewritten and naga would report `expected
155/// assignment or increment/decrement; found 'uvc'`. Requiring an
156/// identifier-start `[A-Za-z_]` after the type+space rules out false
157/// positives on constructor calls like `vec3<f32>(0)`.
158static INLINE_DECL_SPLIT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
159    Regex::new(
160        r"([;{])[ \t]*(f32|i32|bool|vec2<f32>|vec3<f32>|vec4<f32>|vec2<bool>|vec3<bool>|vec4<bool>|mat4x4<f32>|mat3x3<f32>|mat2x2<f32>)[ \t]+([A-Za-z_])",
161    )
162    .unwrap()
163});
164
165/// HLSL `sampler foo;` / `texture foo;` declarations at module scope. The
166/// codegen wrapper provides the actual texture/sampler bindings, so user
167/// declarations are redundant — and they confuse the WGSL parser when they
168/// land inside `fs_main`. Stripped out wholesale.
169///
170/// Also matches the **comma-list** form `sampler a, b, c;` that a number
171/// of presets ship as a stylistic shortcut. A simpler regex stopping at
172/// the first identifier would let the trailing comma list survive as
173/// orphan statements `b; c;` that trip the WGSL parser with
174/// `expected assignment or increment/decrement; found 'sampler_fw_rand01'`.
175static SAMPLER_DECL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
176    Regex::new(r"(?m)^\s*(?:sampler|texture|texture2D|texture3D|sampler2D|sampler3D)\s+[A-Za-z_][A-Za-z0-9_]*(?:\s*,\s*[A-Za-z_][A-Za-z0-9_]*)*\s*;").unwrap()
177});
178
179/// Preprocessor directives (`#define`, `#include`, `#pragma`, …). HLSL
180/// presets occasionally use them; WGSL has no preprocessor, so we strip
181/// the whole line.
182static PREPROC_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?m)^\s*#[^\n]*$").unwrap());
183
184/// HLSL storage-class qualifiers — strip when they appear as a leading
185/// word in a declaration. `static const` is a common HLSL pattern for
186/// "function-scope compile-time constant", but WGSL inside a function
187/// uses `let` (or `const` at module scope only). Stripping both lets
188/// `rewrite_local_declarations` turn the rest into a regular `var`.
189static STORAGE_CLASS_REGEX: LazyLock<Regex> =
190    LazyLock::new(|| Regex::new(r"\b(static|uniform|extern)\b\s*").unwrap());
191
192/// HLSL `const TYPE NAME` at start of a typed local declaration.
193/// We strip `const ` only when followed by a recognised HLSL type token
194/// — leaves WGSL module-level `const` (which doesn't appear in user
195/// shader bodies anyway) alone.
196static CONST_TYPE_REGEX: LazyLock<Regex> =
197    LazyLock::new(|| Regex::new(r"\bconst\s+(float[1-4]?(?:x[1-4])?|int|bool)\b").unwrap());
198
199/// Postfix `<ident>++` and `<ident>--` (HLSL increment/decrement).
200/// WGSL has no postfix operators; we rewrite to the equivalent
201/// compound assignment `<ident> = <ident> + 1` only at statement
202/// boundaries (`;` or `)`) so expression-position uses like `a[i++]`
203/// don't get mangled. Real preset pattern: `n++;` at end of a
204/// per-iteration loop.
205static POSTFIX_INC_REGEX: LazyLock<Regex> =
206    LazyLock::new(|| Regex::new(r"\b([A-Za-z_][A-Za-z0-9_]*)\s*\+\+\s*([;)])").unwrap());
207static POSTFIX_DEC_REGEX: LazyLock<Regex> =
208    LazyLock::new(|| Regex::new(r"\b([A-Za-z_][A-Za-z0-9_]*)\s*--\s*([;)])").unwrap());
209
210#[derive(Error, Debug)]
211pub enum TranslationError {
212    #[error("Translation error: {0}")]
213    Translation(String),
214
215    #[error("Unsupported HLSL feature: {0}")]
216    Unsupported(String),
217}
218
219pub type Result<T> = std::result::Result<T, TranslationError>;
220
221/// Translate HLSL shader code to WGSL.
222///
223/// The pipeline is intentionally linear:
224/// 1. strip the MD2 `shader_body { ... }` outer wrapper,
225/// 2. type-substitute (`float4 → vec4<f32>`, …),
226/// 3. function-substitute (`lerp → mix`, `tex2D → textureSample`, …),
227/// 4. rewrite typed local declarations into WGSL `var ident: T = …;` form,
228/// 5. strip HLSL semantics (`: POSITION`, …).
229pub fn translate_shader(hlsl: &str) -> Result<String> {
230    translate_shader_with_plan(hlsl, &TextureBindingPlan::empty())
231}
232
233/// Same as [`translate_shader`], but routes unrecognised `tex2D` sampler names
234/// through the supplied [`TextureBindingPlan`]. Preset authors reference
235/// disk-loaded textures via `sampler sampler_<NAME>;` + `tex2D(sampler_<NAME>,
236/// uv)`; the renderer scans the HLSL, builds a plan that resolves each name to
237/// a slot in the comp pipeline's user-texture binding array, and threads it
238/// through here so the emitted WGSL points at the right binding.
239///
240/// Empty plan ≡ legacy behaviour: unrecognised samplers fall back to
241/// `sampler_main` with a `/*was: <name>*/` debug comment.
242pub fn translate_shader_with_plan(hlsl: &str, plan: &TextureBindingPlan) -> Result<String> {
243    // Stash the original input so the global-var hoist pass at the bottom
244    // can re-parse it for `Item::GlobalVar` names. The rewriter chain
245    // below shadows `hlsl`, and by the time we reach the hoist the AST
246    // boundary between top-level globals and shader_body locals is gone
247    // (everything is one flat translated body string).
248    let hlsl_for_ast = hlsl;
249
250    // AST-driven pre-passes. Each one parses the HLSL, walks the AST,
251    // and writes targeted text edits back; if parsing fails the input
252    // passes through unchanged. The downstream regex pipeline then sees
253    // a slightly friendlier source. Order matters: array globals must be
254    // lowered before the regex `replace_types` runs, and the binop / UV
255    // passes are stable under each other so the chain order is fixed in
256    // `rewrite::apply_all`.
257    let hlsl = rewrite::apply_all(hlsl);
258    let mut wgsl = strip_shader_body_wrapper(&hlsl);
259    wgsl = comment_out_prose_lines(&wgsl);
260    wgsl = strip_preprocessor(&wgsl);
261    wgsl = strip_sampler_declarations(&wgsl);
262    wgsl = strip_storage_class_qualifiers(&wgsl);
263    wgsl = rewrite_postfix_inc_dec(&wgsl);
264    wgsl = replace_types(&wgsl);
265    wgsl = replace_functions(&wgsl);
266    wgsl = replace_texture_sampling_with_plan(&wgsl, plan);
267    wgsl = rewrite_tex3d_calls(&wgsl);
268    wgsl = rewrite_local_declarations(&wgsl);
269    wgsl = brace_up_single_statement_blocks(&wgsl);
270    wgsl = dedup_var_declarations(&wgsl);
271    wgsl = replace_statement_commas(&wgsl);
272    wgsl = replace_semantics(&wgsl);
273
274    // Type-aware passes run last — they need the source to look like
275    // valid WGSL `var` declarations with explicit type annotations,
276    // which only the rewrites above produce. The symbol table is built
277    // once and reused so the truncation pass sees the same view of
278    // locals as the broadcast pass. The broadcast pass recurses into
279    // nested call args itself, so a single sweep is enough.
280    let table = types::SymbolTable::from_source(&wgsl);
281    wgsl = types::inject_broadcasts(&wgsl, &table);
282    wgsl = types::inject_truncations(&wgsl, &table);
283    wgsl = types::inject_assignment_coercions(&wgsl, &table);
284    // Swizzle LHS rewrite runs after assignment coercion so the LHS view
285    // is already a single bare ident statement (no nested cast inserted
286    // between target and `.xy`). Runs before validation can flag
287    // `invalid left-hand side of assignment`.
288    wgsl = types::inject_swizzle_assignments(&wgsl, &table);
289
290    // Lift module-scope user functions LAST so the function body has
291    // already been through every regex rewrite and type-aware pass
292    // (decl conversion, broadcast, truncation, assignment coercion,
293    // swizzle reconstruction). After lift, the body emits as a
294    // syntactically valid WGSL `fn` and the wrapper places it at module
295    // scope on the other side of `LIFTED_FN_SENTINEL`.
296    let lifted = lift_user_functions(&mut wgsl);
297
298    // Hoist pre-`shader_body` global variable declarations (parsed as
299    // `Item::GlobalVar` in the AST) from the fragment body to module
300    // scope. Lifted user functions (now at module scope) routinely
301    // reference these — `float3 t = float3(q20, q23, q26)` declared at
302    // the top of the preset, then used inside a lifted `float3 project(...)`
303    // would otherwise hit "no definition in scope for identifier: `t`"
304    // when the parser walks the lifted body and the surrounding fs_main
305    // locals are out of reach.
306    //
307    // Implementation: re-parse the original HLSL to collect the names
308    // of top-level `GlobalVar` items, scan the translated body for
309    // `var NAME: TYPE [= INIT];` lines whose name is in that set, emit a
310    // module-scope `var<private> NAME: TYPE;` declaration, and replace the
311    // body line with `NAME = INIT;` (or drop entirely when there's no init).
312    let hoisted_globals = hoist_global_vars(&mut wgsl, hlsl_for_ast);
313
314    let module_scope = match (lifted.is_empty(), hoisted_globals.is_empty()) {
315        (true, true) => return Ok(wgsl),
316        (false, true) => lifted,
317        (true, false) => hoisted_globals,
318        (false, false) => format!("{lifted}\n{hoisted_globals}"),
319    };
320    Ok(format!("{module_scope}\n{LIFTED_FN_SENTINEL}\n{wgsl}"))
321}
322
323/// Re-parse `hlsl` (the original input, before any rewriter touched it)
324/// and return a map of `name → wgsl_type` for top-level `Item::GlobalVar`
325/// items. Used by [`hoist_global_vars`] to decide which body-level `var`
326/// declarations belong at module scope.
327///
328/// Skips any name that collides with a WGSL builtin function. A few
329/// MD2 authors declare locals named `sin` / `cos` / `pow` / `dot` etc.
330/// (the Isosceles preset's kaleidoscope state line) — hoisting these to
331/// module scope as `var<private>` would shadow the builtin globally,
332/// breaking every other site that calls `sin(x)` as a function. Leaving
333/// them as fs_main locals preserves the per-scope shadowing the author
334/// expected.
335///
336/// Returned types are in WGSL form (`vec2<f32>`, `mat3x3<f32>`, …) so the
337/// hoist pass can compare them against the in-body `var X: T` lines and
338/// only transform exact-type matches — re-declarations like `float2
339/// rss,uv2;` at top level followed by `float3 uv2 = …;` in `shader_body`
340/// are shadowing the global and must keep their local declaration intact.
341fn collect_global_var_types(hlsl: &str) -> std::collections::HashMap<String, String> {
342    // Pre-strip storage-class qualifiers (`uniform`, `extern`, ...) the HLSL
343    // parser doesn't recognise — they're cosmetic in MD2 user shaders and
344    // would otherwise break the parse, dropping every following GlobalVar
345    // from the AST. Real-world hit: `Se7enSlasher - Texture Distortion …`
346    // has `uniform float caval; … float3 col;` and the `uniform` line
347    // failed parse, so `col` was never registered for hoisting.
348    let cleaned = strip_storage_class_qualifiers(hlsl);
349    let Ok(tu) = parse::parse_hlsl(&cleaned) else {
350        return std::collections::HashMap::new();
351    };
352    tu.items
353        .iter()
354        .filter_map(|i| match i {
355            ast::Item::GlobalVar(g) if !is_wgsl_builtin_function_name(&g.name) => {
356                hlsl_type_to_wgsl(&g.ty.name).map(|wgsl_ty| (g.name.clone(), wgsl_ty.to_string()))
357            }
358            _ => None,
359        })
360        .collect()
361}
362
363/// Map an HLSL type name (`float`, `float2`, `int3`, `float3x3`, …) to
364/// its WGSL equivalent (`f32`, `vec2<f32>`, `vec3<f32>`, `mat3x3<f32>`, …).
365/// Returns `None` for types the hoist pass shouldn't touch (e.g. user
366/// struct names, samplers — these don't show up as `GlobalVar` anyway,
367/// but be defensive).
368fn hlsl_type_to_wgsl(name: &str) -> Option<&'static str> {
369    Some(match name {
370        "float" | "int" | "half" | "double" => "f32",
371        "float2" | "int2" | "half2" | "double2" => "vec2<f32>",
372        "float3" | "int3" | "half3" | "double3" => "vec3<f32>",
373        "float4" | "int4" | "half4" | "double4" => "vec4<f32>",
374        "float2x2" => "mat2x2<f32>",
375        "float3x3" => "mat3x3<f32>",
376        "float4x4" => "mat4x4<f32>",
377        "bool" => "bool",
378        _ => return None,
379    })
380}
381
382/// `true` when `name` collides with a WGSL builtin function. List
383/// covers the subset MD2 user shaders actually invoke; anything outside
384/// this set isn't worth worrying about (real authors don't shadow
385/// `inverseSqrt`).
386fn is_wgsl_builtin_function_name(name: &str) -> bool {
387    matches!(
388        name,
389        "abs"
390            | "acos"
391            | "asin"
392            | "atan"
393            | "atan2"
394            | "ceil"
395            | "clamp"
396            | "cos"
397            | "cosh"
398            | "cross"
399            | "degrees"
400            | "distance"
401            | "dot"
402            | "exp"
403            | "exp2"
404            | "floor"
405            | "fract"
406            | "length"
407            | "log"
408            | "log2"
409            | "max"
410            | "min"
411            | "mix"
412            | "normalize"
413            | "pow"
414            | "radians"
415            | "reflect"
416            | "refract"
417            | "round"
418            | "saturate"
419            | "select"
420            | "sign"
421            | "sin"
422            | "sinh"
423            | "smoothstep"
424            | "sqrt"
425            | "step"
426            | "tan"
427            | "tanh"
428            | "transpose"
429            | "trunc"
430    )
431}
432
433/// Hoist top-level user globals to module scope.
434///
435/// Scans `body` for `var NAME: TYPE [= INIT];` lines and, when `NAME`
436/// matches a top-level `Item::GlobalVar` from `hlsl_src` **and** the
437/// declared type matches that global's type, emits a module-scope
438/// `var<private> NAME: TYPE;` declaration. The body line is rewritten to a
439/// bare `NAME = INIT;` (assignment runs inside `fs_main` so it can still
440/// reference `q1..q32` and other `var<private>` state seeded by the
441/// wrapper) or removed entirely when there's no initializer.
442///
443/// The TYPE match is what protects a corpus shape like
444/// `float2 rs2,rs0,rss,uv2; … shader_body { float3 uv2 = …; }`: at top
445/// level `uv2` is `vec2<f32>` (the hoist target), but the in-body
446/// `var uv2: vec3<f32> = …;` line is a shadowing re-declaration of a
447/// different type and must keep its local `var` form so the in-body
448/// `uv2` ends up as a `vec3<f32>` local. Without this guard, the bare
449/// regex match would strip the `var` from the shadowing decl and the
450/// assignment would silently target the `vec2` module-scope global,
451/// producing downstream `InvalidStoreTypes` errors.
452///
453/// Returns the joined module-scope declarations (`""` if none).
454fn hoist_global_vars(body: &mut String, hlsl_src: &str) -> String {
455    let globals = collect_global_var_types(hlsl_src);
456    if globals.is_empty() {
457        return String::new();
458    }
459
460    static VAR_DECL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
461        Regex::new(
462            r"\bvar\s+([A-Za-z_][A-Za-z0-9_]*)\s*:\s*([A-Za-z_][A-Za-z0-9_<>]*)\s*(=\s*[^;]+)?;",
463        )
464        .unwrap()
465    });
466
467    let mut hoisted = String::new();
468    let mut seen: std::collections::HashSet<String> = Default::default();
469    let mut new_body = String::with_capacity(body.len());
470    let mut last_end = 0usize;
471
472    for cap in VAR_DECL_REGEX.captures_iter(body) {
473        let full = cap.get(0).unwrap();
474        let name = cap.get(1).unwrap().as_str();
475        let ty = cap.get(2).unwrap().as_str();
476        let Some(expected_ty) = globals.get(name) else {
477            continue;
478        };
479        if expected_ty.as_str() != ty {
480            // Same name, different type — the user is shadowing the
481            // global with a re-declaration. Leave the local `var X: T`
482            // intact so the shadow stays in fs_main scope.
483            continue;
484        }
485        let init = cap.get(3).map(|m| m.as_str());
486
487        // Emit one module-scope declaration per global, even if the body
488        // had two `var X: T;` lines for the same name (the dedup pass
489        // mostly handles this, but be defensive).
490        if seen.insert(name.to_string()) {
491            let _ = writeln!(&mut hoisted, "var<private> {name}: {ty};");
492        }
493
494        new_body.push_str(&body[last_end..full.start()]);
495        if let Some(init) = init {
496            // `init` is `"= <expr>"`; trim the leading `=` and whitespace.
497            let init_value = init.trim_start_matches('=').trim_start();
498            let _ = write!(&mut new_body, "{name} = {init_value};");
499        }
500        // No init → drop the body line; the module-scope `var<private>`
501        // is uninitialised and any later in-body `NAME = ...;` assigns it.
502        last_end = full.end();
503    }
504    new_body.push_str(&body[last_end..]);
505    *body = new_body;
506
507    hoisted
508}
509
510/// Marker line that separates module-scope user functions (lifted by
511/// [`lift_user_functions`]) from the fragment body inside the translated
512/// output. The codegen wrapper splits on this marker: text before goes
513/// before `fs_main`, text after goes inside it.
514pub const LIFTED_FN_SENTINEL: &str = "// __ONEDROP_LIFTED_FNS_END__";
515
516/// Find HLSL-shaped function definitions (`<TYPE> <name>(...) { ... }`)
517/// at depth 0 in the translated body, rewrite each signature to WGSL
518/// shape (`fn name(arg: TYPE, …) -> TYPE { ... }`), and remove them from
519/// `src` in place. The lifted functions are returned as a single string
520/// concatenated in source order — `wrap_user_comp_shader_with_plan`
521/// places it before `fs_main`.
522///
523/// Preset pattern this unblocks (real example from
524/// `Geiss - Explosion 3 nz+ enscarpment via lateral hosations.milk`):
525///
526/// ```text
527/// vec2<f32> gradBlur1(vec2<f32> domain, vec2<f32> d, vec3<f32> filter) {
528///    vec3<f32> dx = (2*GetBlur1(domain + vec2<f32>(1,0)*d) - 2*GetBlur1(domain - vec2<f32>(1,0)*d));
529///    ...
530///    return 0.5 * vec2<f32>(...);
531/// }
532/// shader_body { ret += gradBlur1(uv, vec2<f32>(0.5)/aspect.xy, vec3<f32>(0.5,0.5,0.5)); }
533/// ```
534///
535/// After lifting:
536/// ```text
537/// fn gradBlur1(domain: vec2<f32>, d: vec2<f32>, filter: vec3<f32>) -> vec2<f32> {
538///    var dx: vec3<f32> = (2*GetBlur1(domain + vec2<f32>(1,0)*d) - 2*GetBlur1(domain - vec2<f32>(1,0)*d));
539///    ...
540///    return 0.5 * vec2<f32>(...);
541/// }
542/// ```
543///
544/// Functions that reference fs_main locals (`texsize`, `q1`, …) will fail
545/// to validate after lifting since those locals don't exist at module
546/// scope — we accept that limitation here; a future pass may pass them
547/// explicitly.
548fn lift_user_functions(src: &mut String) -> String {
549    let bytes = src.as_bytes();
550    let mut lifted = String::new();
551    let mut residual = String::with_capacity(src.len());
552    let mut i = 0usize;
553
554    while i < bytes.len() {
555        // Scan for a candidate function signature at the start of a line
556        // (after whitespace). We require it to begin at depth 0; the
557        // outer loop never enters function bodies because we skip past
558        // them whole when we recognise one.
559        if let Some((next_i, lifted_fn)) = try_extract_user_function(src.as_str(), i) {
560            lifted.push_str(&lifted_fn);
561            lifted.push('\n');
562            i = next_i;
563            continue;
564        }
565        residual.push(bytes[i] as char);
566        i += 1;
567    }
568
569    *src = residual;
570    lifted
571}
572
573/// Try to match a single HLSL-shaped function definition starting at byte
574/// position `start` (after leading whitespace). Returns the byte position
575/// just past the closing `}` and the rewritten WGSL function text.
576/// Returns `None` if no signature matches — caller advances by 1 byte.
577fn try_extract_user_function(src: &str, start: usize) -> Option<(usize, String)> {
578    let bytes = src.as_bytes();
579
580    // Skip leading whitespace + newlines on the current line.
581    let mut i = start;
582    while i < bytes.len() && bytes[i].is_ascii_whitespace() {
583        i += 1;
584    }
585    if i >= bytes.len() {
586        return None;
587    }
588
589    // Require we're at a line start — function defs are top-level, never
590    // mid-expression. Walk back from `i` to confirm only whitespace until
591    // a newline or start-of-input.
592    let mut back = i;
593    while back > 0 && bytes[back - 1] != b'\n' {
594        if !bytes[back - 1].is_ascii_whitespace() {
595            return None;
596        }
597        back -= 1;
598    }
599
600    // Parse the return type: one of the WGSL type tokens.
601    let (ret_type, after_ty) = parse_wgsl_function_return_type(src, i)?;
602    let mut j = after_ty;
603
604    while j < bytes.len() && bytes[j].is_ascii_whitespace() {
605        j += 1;
606    }
607
608    // Function name: identifier.
609    let name_start = j;
610    while j < bytes.len() && (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_') {
611        j += 1;
612    }
613    if j == name_start {
614        return None;
615    }
616    let name = &src[name_start..j];
617
618    while j < bytes.len() && bytes[j].is_ascii_whitespace() {
619        j += 1;
620    }
621    if j >= bytes.len() || bytes[j] != b'(' {
622        return None;
623    }
624    j += 1;
625    let params_start = j;
626
627    // Balance `(...)`.
628    let mut depth = 1i32;
629    while j < bytes.len() {
630        match bytes[j] {
631            b'(' => depth += 1,
632            b')' => {
633                depth -= 1;
634                if depth == 0 {
635                    break;
636                }
637            }
638            _ => {}
639        }
640        j += 1;
641    }
642    if j >= bytes.len() {
643        return None;
644    }
645    let params_text = &src[params_start..j];
646    j += 1;
647
648    while j < bytes.len() && bytes[j].is_ascii_whitespace() {
649        j += 1;
650    }
651    if j >= bytes.len() || bytes[j] != b'{' {
652        return None;
653    }
654    let body_open = j;
655    j += 1;
656    let mut depth = 1i32;
657    while j < bytes.len() {
658        match bytes[j] {
659            b'{' => depth += 1,
660            b'}' => {
661                depth -= 1;
662                if depth == 0 {
663                    break;
664                }
665            }
666            _ => {}
667        }
668        j += 1;
669    }
670    if j >= bytes.len() {
671        return None;
672    }
673    let body = &src[body_open..=j];
674
675    // Convert params to WGSL shape and shadow any that get reassigned in
676    // the body. HLSL value-parameters are mutable (the body's `x = pow(x, k);`
677    // overwrites them in place); WGSL parameters are `let`-bound and naga
678    // rejects the assignment with "invalid left-hand side of assignment;
679    // consider declaring `x` with `var` instead of `let`". For each param
680    // the body writes to, we rename it to `<name>_md2arg` in the signature
681    // and prepend `var <name>: <ty> = <name>_md2arg;` so existing reads /
682    // writes against `<name>` in the body resolve to a mutable local.
683    let parsed = parse_hlsl_params(params_text);
684    let mut sig_parts = Vec::with_capacity(parsed.len());
685    let mut shadow_prelude = String::new();
686    for (ty, pname) in &parsed {
687        if body_assigns_to(body, pname) {
688            let renamed = format!("{pname}_md2arg");
689            sig_parts.push(format!("{renamed}: {ty}"));
690            shadow_prelude.push_str(&format!("var {pname}: {ty} = {renamed}; "));
691        } else {
692            sig_parts.push(format!("{pname}: {ty}"));
693        }
694    }
695    let wgsl_params = sig_parts.join(", ");
696    let wgsl_fn = if shadow_prelude.is_empty() {
697        format!("fn {name}({wgsl_params}) -> {ret_type} {body}")
698    } else {
699        // body is `{ ... }` — insert shadow prelude right after the `{`.
700        let inner = &body[1..body.len() - 1];
701        format!("fn {name}({wgsl_params}) -> {ret_type} {{{shadow_prelude}{inner}}}")
702    };
703
704    Some((j + 1, wgsl_fn))
705}
706
707/// Whether `body` (a `{ ... }` WGSL block as text) contains an assignment
708/// to a bare identifier matching `name`. Recognises plain `=`, compound
709/// assignments (`+=`, `-=`, `*=`, `/=`, `%=`) and post-increment / decrement
710/// (`++` / `--`); excludes the comparison operators `==`, `!=`, `<=`, `>=`.
711/// Skips matches inside `//`-style line comments because the textual scan
712/// would otherwise see a commented-out assignment as a real write.
713fn body_assigns_to(body: &str, name: &str) -> bool {
714    let bytes = body.as_bytes();
715    let nb = name.as_bytes();
716    if nb.is_empty() {
717        return false;
718    }
719    let mut i = 0usize;
720    let mut in_line_comment = false;
721    while i + nb.len() <= bytes.len() {
722        let c = bytes[i];
723        if in_line_comment {
724            if c == b'\n' {
725                in_line_comment = false;
726            }
727            i += 1;
728            continue;
729        }
730        if c == b'/' && bytes.get(i + 1) == Some(&b'/') {
731            in_line_comment = true;
732            i += 2;
733            continue;
734        }
735        // Left boundary: previous char must not be identifier-continuation.
736        let left_ok = i == 0 || {
737            let p = bytes[i - 1];
738            !(p.is_ascii_alphanumeric() || p == b'_')
739        };
740        if left_ok && &bytes[i..i + nb.len()] == nb {
741            let after = i + nb.len();
742            // Right boundary: not part of a longer identifier.
743            let right_ok = bytes
744                .get(after)
745                .is_none_or(|c| !(c.is_ascii_alphanumeric() || *c == b'_'));
746            if right_ok {
747                // Walk past whitespace and look for an assignment-shaped tail.
748                let mut k = after;
749                while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') {
750                    k += 1;
751                }
752                if k < bytes.len() {
753                    let next = bytes[k];
754                    match next {
755                        b'=' => {
756                            // `==` is a comparison; `=` alone is assignment.
757                            if bytes.get(k + 1) != Some(&b'=') {
758                                return true;
759                            }
760                        }
761                        b'+' | b'-' => {
762                            // `<name>++` / `<name>--` / `<name> += …`
763                            if bytes.get(k + 1) == Some(&next) || bytes.get(k + 1) == Some(&b'=') {
764                                return true;
765                            }
766                        }
767                        b'*' | b'/' | b'%' => {
768                            if bytes.get(k + 1) == Some(&b'=') {
769                                return true;
770                            }
771                        }
772                        b'.' => {
773                            // `<name>.<swiz> = rhs` is also a write to the param
774                            // (component-wise). Scan past `.<ident>` runs and
775                            // any `[…]` indices, then check for an assignment.
776                            let mut m = k;
777                            while m < bytes.len() {
778                                if bytes[m] == b'.' {
779                                    m += 1;
780                                    while m < bytes.len()
781                                        && (bytes[m].is_ascii_alphanumeric() || bytes[m] == b'_')
782                                    {
783                                        m += 1;
784                                    }
785                                } else if bytes[m] == b'[' {
786                                    let mut depth = 1i32;
787                                    m += 1;
788                                    while m < bytes.len() && depth > 0 {
789                                        if bytes[m] == b'[' {
790                                            depth += 1;
791                                        } else if bytes[m] == b']' {
792                                            depth -= 1;
793                                        }
794                                        m += 1;
795                                    }
796                                } else {
797                                    break;
798                                }
799                            }
800                            while m < bytes.len() && (bytes[m] == b' ' || bytes[m] == b'\t') {
801                                m += 1;
802                            }
803                            if let Some(&nx) = bytes.get(m) {
804                                if nx == b'=' && bytes.get(m + 1) != Some(&b'=') {
805                                    return true;
806                                }
807                                if matches!(nx, b'+' | b'-' | b'*' | b'/' | b'%')
808                                    && bytes.get(m + 1) == Some(&b'=')
809                                {
810                                    return true;
811                                }
812                            }
813                        }
814                        _ => {}
815                    }
816                }
817            }
818            i += nb.len();
819            continue;
820        }
821        i += 1;
822    }
823    false
824}
825
826/// Parse an HLSL parameter list into (type, name) pairs in source order.
827/// Same shape as [`convert_hlsl_params_to_wgsl`] but returns the structured
828/// form so callers can decide per-param whether to rename / shadow.
829fn parse_hlsl_params(params: &str) -> Vec<(String, String)> {
830    if params.trim().is_empty() {
831        return Vec::new();
832    }
833    let mut out = Vec::new();
834    for raw in split_top_level_commas(params) {
835        let part = raw.trim();
836        if part.is_empty() {
837            continue;
838        }
839        if let Some((ty, name)) = split_param(part) {
840            // `lift_user_functions` runs after the regex pipeline, so
841            // `params_text` already holds WGSL-shaped types (`f32`,
842            // `vec2<f32>`, …); no type rewrite needed here.
843            out.push((ty.to_string(), name.to_string()));
844        }
845    }
846    out
847}
848
849/// Match one of the known WGSL types at byte position `i`. Returns the
850/// canonical type text and the byte position immediately after it. `void`
851/// is rejected — user comp shader functions always return a typed value
852/// in MD2.
853fn parse_wgsl_function_return_type(src: &str, i: usize) -> Option<(&'static str, usize)> {
854    const TYPES: &[&str] = &[
855        "vec4<f32>",
856        "vec3<f32>",
857        "vec2<f32>",
858        "mat4x4<f32>",
859        "mat3x3<f32>",
860        "mat2x2<f32>",
861        "f32",
862        "i32",
863        // `bool`-returning helpers (`bool mask_rectangle(...)` and friends)
864        // need to lift too, otherwise the unmodified `bool foo(...)` line
865        // lands in the WGSL output and the parser flags `foo` as
866        // `expected assignment or increment/decrement`.
867        "bool",
868    ];
869    let bytes = src.as_bytes();
870    for ty in TYPES {
871        let tb = ty.as_bytes();
872        if i + tb.len() <= bytes.len() && &bytes[i..i + tb.len()] == tb {
873            // Right boundary: next char must not be identifier-continuation.
874            let next = bytes.get(i + tb.len()).copied();
875            let is_id = next.is_some_and(|c| c.is_ascii_alphanumeric() || c == b'_');
876            if !is_id {
877                return Some((ty, i + tb.len()));
878            }
879        }
880    }
881    None
882}
883
884/// Split a single HLSL parameter declaration into `(type, name)`. The
885/// type may contain `<...>` (`vec3<f32>`); we split on the last
886/// whitespace at angle-depth 0.
887fn split_param(decl: &str) -> Option<(&str, &str)> {
888    let bytes = decl.as_bytes();
889    let mut depth_angle = 0i32;
890    let mut last_split = None;
891    let mut i = 0usize;
892    while i < bytes.len() {
893        match bytes[i] {
894            b'<' => depth_angle += 1,
895            b'>' => depth_angle -= 1,
896            b' ' | b'\t' | b'\n' | b'\r' if depth_angle == 0 => {
897                // Track the last whitespace run; the param ident comes
898                // after it.
899                let mut j = i;
900                while j < bytes.len() && bytes[j].is_ascii_whitespace() {
901                    j += 1;
902                }
903                if j < bytes.len() && (bytes[j].is_ascii_alphabetic() || bytes[j] == b'_') {
904                    last_split = Some((i, j));
905                }
906                i = j;
907                continue;
908            }
909            _ => {}
910        }
911        i += 1;
912    }
913    let (ws_start, ident_start) = last_split?;
914    let ty = decl[..ws_start].trim();
915    let name = decl[ident_start..].trim();
916    if ty.is_empty() || name.is_empty() {
917        return None;
918    }
919    Some((ty, name))
920}
921
922fn replace_types(code: &str) -> String {
923    let mut result = code.to_string();
924
925    // Order matters: longer types first so `float4x4` is replaced before
926    // `float4`, `float2x2` before `float2`, and `float4`/`float3`/`float2`/
927    // `float1` before plain `float`. `float1` is HLSL's alias for `float`;
928    // 33 of 168 preset comp shaders use it for single-channel loads
929    // (`float1 dx = …`).
930    //
931    // Non-square matrix types. Corpus has many `float2x3` hits (and a
932    // few `float4x3`) that a naive `float2`→`vec2<f32>` substring
933    // substitution would mangle, leaving `vec2<f32>x3` and tripping the
934    // WGSL parser with `expected ')'; found 'x3'`. We emit the matching
935    // `matRxC<f32>` non-square type — the *constructor* call still won't
936    // validate when args are passed as HLSL row vectors, but the parse
937    // stage succeeds, which is enough for many shaders whose matrix
938    // never actually executes (dead `mul` in an unreachable branch, or
939    // matrices used only for `.xy` extraction).
940    result = result.replace("float4x4", "mat4x4<f32>");
941    result = result.replace("float4x3", "mat4x3<f32>");
942    result = result.replace("float4x2", "mat4x2<f32>");
943    result = result.replace("float3x4", "mat3x4<f32>");
944    result = result.replace("float3x3", "mat3x3<f32>");
945    result = result.replace("float3x2", "mat3x2<f32>");
946    result = result.replace("float2x4", "mat2x4<f32>");
947    result = result.replace("float2x3", "mat2x3<f32>");
948    result = result.replace("float2x2", "mat2x2<f32>");
949    result = result.replace("float4", "vec4<f32>");
950    result = result.replace("float3", "vec3<f32>");
951    result = result.replace("float2", "vec2<f32>");
952    result = result.replace("float1", "f32");
953    // Plain `float` only when followed by a non-identifier char — avoids
954    // mangling `float4` again and `floatBitsToInt`-style names.
955    let float_re = Regex::new(r"\bfloat\b").unwrap();
956    result = float_re.replace_all(&result, "f32").to_string();
957
958    // HLSL `double*` types — WGSL has no f64 in shaders (naga blocks it on
959    // every backend we target), so downgrade them to their f32 cousins.
960    // Real presets ship `double3 blur = ...` purely as a typo-grade type
961    // alias; the math doesn't actually need double precision. Same ordering
962    // rule as `float`: longer names first.
963    result = result.replace("double4x4", "mat4x4<f32>");
964    result = result.replace("double4x3", "mat4x3<f32>");
965    result = result.replace("double4x2", "mat4x2<f32>");
966    result = result.replace("double3x4", "mat3x4<f32>");
967    result = result.replace("double3x3", "mat3x3<f32>");
968    result = result.replace("double3x2", "mat3x2<f32>");
969    result = result.replace("double2x4", "mat2x4<f32>");
970    result = result.replace("double2x3", "mat2x3<f32>");
971    result = result.replace("double2x2", "mat2x2<f32>");
972    result = result.replace("double4", "vec4<f32>");
973    result = result.replace("double3", "vec3<f32>");
974    result = result.replace("double2", "vec2<f32>");
975    let double_re = Regex::new(r"\bdouble\b").unwrap();
976    result = double_re.replace_all(&result, "f32").to_string();
977
978    // HLSL `half*` types are half-precision floats (16-bit). WGSL has no
979    // portable f16 (only via the `f16` extension which naga does not
980    // enable by default), and MD2 user code never actually depends on
981    // the precision — `half1 mod_ = 0;` is just a typo-grade
982    // alternative to `float`. Map to the f32 family, same ordering rule
983    // as `float`/`double`.
984    result = result.replace("half4x4", "mat4x4<f32>");
985    result = result.replace("half4x3", "mat4x3<f32>");
986    result = result.replace("half4x2", "mat4x2<f32>");
987    result = result.replace("half3x4", "mat3x4<f32>");
988    result = result.replace("half3x3", "mat3x3<f32>");
989    result = result.replace("half3x2", "mat3x2<f32>");
990    result = result.replace("half2x4", "mat2x4<f32>");
991    result = result.replace("half2x3", "mat2x3<f32>");
992    result = result.replace("half2x2", "mat2x2<f32>");
993    result = result.replace("half4", "vec4<f32>");
994    result = result.replace("half3", "vec3<f32>");
995    result = result.replace("half2", "vec2<f32>");
996    result = result.replace("half1", "f32");
997    let half_re = Regex::new(r"\bhalf\b").unwrap();
998    result = half_re.replace_all(&result, "f32").to_string();
999
1000    // HLSL `bool*` vector types: `bool2`, `bool3`, `bool4`. WGSL has
1001    // `vec2<bool>` etc. Plain `bool` already exists in WGSL so no
1002    // rewrite is needed for the scalar. Longer first as ever.
1003    result = result.replace("bool4", "vec4<bool>");
1004    result = result.replace("bool3", "vec3<bool>");
1005    result = result.replace("bool2", "vec2<bool>");
1006
1007    // HLSL `intN` vector aliases. The runtime never enforces the integer
1008    // semantics (no array index, no bitwise op in MD2 corpus), so map to
1009    // the f32-vector cousins like `floatN` / `halfN` / `doubleN`. Without
1010    // this an in-the-wild `int2 k1 = (texsize.xy*uv)%2;` survives the
1011    // pipeline as `int2 k1 = …` and trips the WGSL parser at the
1012    // identifier on the next statement (`expected assignment or
1013    // increment/decrement, found 'k1'`). Order: longer first so the matrix
1014    // forms aren't masked, vec-forms before plain `\bint\b`.
1015    result = result.replace("int4", "vec4<f32>");
1016    result = result.replace("int3", "vec3<f32>");
1017    result = result.replace("int2", "vec2<f32>");
1018
1019    // HLSL `int` → WGSL `f32`. MD2 user shaders use `int` for loop
1020    // counters and the like but freely mix them with float arithmetic
1021    // (`ang2 = 6.28*n/anz`), which HLSL implicitly promotes. WGSL refuses
1022    // those implicit conversions with `automatic conversions cannot
1023    // convert elements of i32 to f32`. Since presets never do real i32
1024    // work in user comp shaders (no array indexing, no bitwise ops), it
1025    // costs nothing to widen them to `f32` up-front.
1026    let int_re = Regex::new(r"\bint\b").unwrap();
1027    result = int_re.replace_all(&result, "f32").to_string();
1028
1029    result
1030}
1031
1032fn replace_functions(code: &str) -> String {
1033    let mut result = code.to_string();
1034
1035    // Normalise `name (args)` → `name(args)` for known HLSL functions so
1036    // the substring-based rewrites below match. Real presets ship
1037    // `lerp (a, b, t)` and `tex2D (s, uv)` (often when the call spans
1038    // multiple lines and the author put the `(` on a fresh line).
1039    result = normalise_call_whitespace(&result);
1040
1041    // lerp → mix
1042    result = result.replace("lerp(", "mix(");
1043
1044    // Alias `sat(x)` → `saturate(x)` BEFORE the saturate→clamp rewrite
1045    // so the corpus `#define sat saturate` shortcut composes correctly.
1046    // Same word-boundary rule that `rename_word_call` enforces.
1047    result = rename_word_call(&result, "sat", "saturate");
1048
1049    // saturate(<expr>) → clamp(<expr>, 0.0, 1.0). Paren-balanced — `<expr>`
1050    // may contain nested calls like `saturate(GetBlur1(uv))`.
1051    result = rewrite_unary_call_balanced(&result, "saturate", |inner| {
1052        format!("clamp({inner}, 0.0, 1.0)")
1053    });
1054
1055    // frac → fract
1056    result = result.replace("frac(", "fract(");
1057
1058    // HLSL builtins absent or differently spelled in WGSL.
1059    //
1060    // - `sat(x)` is the HLSL alias for `saturate(x)`. The corpus also
1061    //   ships `#define sat saturate` lines (stripped by
1062    //   `strip_preprocessor`), leaving `sat(…)` references unbound.
1063    // - `rsqrt(x)` is the HLSL reciprocal-square-root; WGSL spells it
1064    //   `inverseSqrt`.
1065    // - `log10(x)` exists in HLSL but not in WGSL. Map to
1066    //   `log(x) * 0.4342944…` (`1/ln(10)`).
1067    // - `tex2Dbias(s, vec4(uv, 0, bias))` is HLSL's mip-bias sampling
1068    //   form; WGSL has `textureSampleBias`, but the corpus uses bias
1069    //   purely cosmetically (always 0 or near-0). Downgrade to plain
1070    //   `tex2D(s, uv)` by stripping the bias arg.
1071    //
1072    // All four are plain string substitutions; no paren balancing is
1073    // required because each maps to a same-shape call (or a binary
1074    // expression for `log10`).
1075    result = rename_word_call(&result, "rsqrt", "inverseSqrt");
1076    // log10(x) → log(x) * (1 / ln 10).
1077    result = rewrite_unary_call_balanced(&result, "log10", |inner| {
1078        format!("(log({inner}) * 0.43429448190325176)")
1079    });
1080    // `tex2Dbias(s, vec4(uv, mip, bias))` → `tex2D(s, uv)`. The wrapping
1081    // vec4 ctor is the dominant corpus shape; project onto its first two
1082    // components.
1083    result = rewrite_tex2dbias(&result);
1084
1085    // Normalise case-insensitive variants of MD2 builtins. Real presets
1086    // ship `tex2d(...)` instead of `tex2D(...)`; WGSL's case-sensitive
1087    // identifier resolution fails on the lowercase form.
1088    result = result.replace("tex2d(", "tex2D(");
1089    result = result.replace("tex3d(", "tex3D(");
1090
1091    // mul(matrix, vector) → matrix * vector
1092    result = rewrite_mul_balanced(&result);
1093
1094    // `mod(a, b)` → `((a) - floor((a) / (b)) * (b))`. `mod` is a WGSL
1095    // reserved keyword (HLSL uses it as the float-mod helper). Use a
1096    // paren-balanced rewrite because both arguments are expressions.
1097    result = rewrite_mod_balanced(&result);
1098
1099    // Rename WGSL-reserved identifiers that real presets use as locals
1100    // (`mod`, `filter`, `sample`). Runs AFTER `rewrite_mod_balanced` so
1101    // function-call rewrites already happened — what's left are bare
1102    // identifier references which get a trailing underscore.
1103    result = rename_reserved_identifiers(&result);
1104
1105    // Strip leading zeros from integer literals — WGSL rejects `02`.
1106    result = LEADING_ZERO_REGEX.replace_all(&result, "$1").to_string();
1107
1108    // Strip HLSL unary `+` (no-op prefix that WGSL doesn't accept).
1109    // Triggered when `+` directly follows `(`, `,`, or any binop after
1110    // optional whitespace. Pure no-op in HLSL semantics.
1111    result = strip_unary_plus(&result);
1112
1113    result
1114}
1115
1116/// HLSL function names that the rewrite pipeline treats specially. When a
1117/// preset writes `name (args)` with whitespace between the identifier and
1118/// `(`, downstream substring matches (`"lerp("`, paren-balanced walkers)
1119/// would skip the call. Collapse that whitespace once up-front so every
1120/// downstream pass sees the no-space form.
1121const KNOWN_CALL_NAMES: &[&str] = &[
1122    "lerp",
1123    "frac",
1124    "saturate",
1125    "mul",
1126    "tex2D",
1127    "tex3D",
1128    "pow",
1129    "clamp",
1130    "mix",
1131    "min",
1132    "max",
1133    "step",
1134    "smoothstep",
1135    "length",
1136    "distance",
1137    "dot",
1138    "normalize",
1139    "abs",
1140    "cos",
1141    "sin",
1142    "tan",
1143    "sqrt",
1144    "exp",
1145    "log",
1146    "GetPixel",
1147    "GetBlur1",
1148    "GetBlur2",
1149    "GetBlur3",
1150    "lum",
1151    "fract",
1152    "floor",
1153    "ceil",
1154    "sign",
1155];
1156
1157/// Replace `name<WS>(` with `name(` for every entry in [`KNOWN_CALL_NAMES`].
1158/// Only fires when `name` is on a word boundary and `<WS>` is non-empty
1159/// whitespace (so the no-whitespace form is left alone). Skips matches
1160/// inside `/* */` and `// ` comments so commented-out code stays stable.
1161fn normalise_call_whitespace(src: &str) -> String {
1162    let bytes = src.as_bytes();
1163    let mut out = String::with_capacity(src.len());
1164    let mut i = 0usize;
1165
1166    while i < bytes.len() {
1167        // Pass through comments verbatim — preserves byte offsets in the
1168        // emitted text, which is fine because comments are stripped later
1169        // anyway, but more importantly avoids rewriting commented-out code
1170        // (which then might land in the visible output as a marker).
1171        if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1172            while i < bytes.len() && bytes[i] != b'\n' {
1173                out.push(bytes[i] as char);
1174                i += 1;
1175            }
1176            continue;
1177        }
1178        if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1179            let start = i;
1180            i += 2;
1181            while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
1182                i += 1;
1183            }
1184            if i + 1 < bytes.len() {
1185                i += 2;
1186            }
1187            out.push_str(&src[start..i]);
1188            continue;
1189        }
1190
1191        // Word-boundary check on the left.
1192        if i > 0 {
1193            let prev = bytes[i - 1];
1194            if prev.is_ascii_alphanumeric() || prev == b'_' {
1195                out.push(bytes[i] as char);
1196                i += 1;
1197                continue;
1198            }
1199        }
1200
1201        let mut matched = None;
1202        for &name in KNOWN_CALL_NAMES {
1203            let nb = name.as_bytes();
1204            if i + nb.len() <= bytes.len() && &bytes[i..i + nb.len()] == nb {
1205                // Right boundary — next byte must be whitespace (we're
1206                // matching `name<WS>(`).
1207                let after = bytes.get(i + nb.len()).copied();
1208                if after.is_some_and(|c| c == b' ' || c == b'\t' || c == b'\n' || c == b'\r') {
1209                    matched = Some(nb.len());
1210                    break;
1211                }
1212            }
1213        }
1214
1215        let Some(nlen) = matched else {
1216            out.push(bytes[i] as char);
1217            i += 1;
1218            continue;
1219        };
1220
1221        // Peek past the whitespace to see if the next non-WS byte is `(`.
1222        let mut j = i + nlen;
1223        while j < bytes.len()
1224            && (bytes[j] == b' ' || bytes[j] == b'\t' || bytes[j] == b'\n' || bytes[j] == b'\r')
1225        {
1226            j += 1;
1227        }
1228        if j < bytes.len() && bytes[j] == b'(' {
1229            // Emit name then `(`, dropping the whitespace.
1230            out.push_str(&src[i..i + nlen]);
1231            out.push('(');
1232            i = j + 1;
1233        } else {
1234            // Whitespace not followed by `(` — leave unchanged.
1235            out.push_str(&src[i..i + nlen]);
1236            i += nlen;
1237        }
1238    }
1239
1240    out
1241}
1242
1243/// Strip HLSL unary `+` (a syntactic no-op WGSL doesn't accept) when it
1244/// directly follows `(`, `,`, `=`, `+`, `-`, `*`, `/`, `<`, `>`, `?`, `:`
1245/// after optional whitespace. Preserves byte positions of everything
1246/// except the `+` itself.
1247///
1248/// Real preset pattern: `ret.x += (+dx.x - dy.x)*0.4;` — the `+` after
1249/// `(` is a stylistic flourish. WGSL parser rejects with
1250/// `expected expression, found "+"`.
1251fn strip_unary_plus(src: &str) -> String {
1252    let bytes = src.as_bytes();
1253    let mut out = String::with_capacity(src.len());
1254    let mut last_op: Option<u8> = Some(b'('); // start-of-input behaves like `(`
1255    let mut i = 0usize;
1256    while i < bytes.len() {
1257        let b = bytes[i];
1258        // Inside comments: pass through and don't update last_op.
1259        if i + 1 < bytes.len() && b == b'/' && bytes[i + 1] == b'/' {
1260            while i < bytes.len() && bytes[i] != b'\n' {
1261                out.push(bytes[i] as char);
1262                i += 1;
1263            }
1264            continue;
1265        }
1266        if i + 1 < bytes.len() && b == b'/' && bytes[i + 1] == b'*' {
1267            let start = i;
1268            i += 2;
1269            while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
1270                i += 1;
1271            }
1272            if i + 1 < bytes.len() {
1273                i += 2;
1274            }
1275            out.push_str(&src[start..i]);
1276            continue;
1277        }
1278
1279        if b == b'+' && last_op.is_some_and(is_unary_context) {
1280            // Drop the `+`. Don't update last_op — the next token still
1281            // sits in the same syntactic position.
1282            i += 1;
1283            continue;
1284        }
1285
1286        out.push(b as char);
1287        if !b.is_ascii_whitespace() {
1288            last_op = Some(b);
1289        }
1290        i += 1;
1291    }
1292    out
1293}
1294
1295fn is_unary_context(b: u8) -> bool {
1296    matches!(
1297        b,
1298        b'(' | b','
1299            | b'='
1300            | b'+'
1301            | b'-'
1302            | b'*'
1303            | b'/'
1304            | b'<'
1305            | b'>'
1306            | b'?'
1307            | b':'
1308            | b'['
1309            | b'{'
1310            | b';'
1311    )
1312}
1313
1314/// Generic paren-balanced rewriter for `<name>(<single-arg>)` calls. Walks
1315/// the source, finds `<name>` on a word boundary followed by `(`, balances
1316/// to the matching `)`, and replaces the whole call with `make_replacement`
1317/// applied to the captured argument text (verbatim, not trimmed).
1318fn rewrite_unary_call_balanced<F>(src: &str, name: &str, make_replacement: F) -> String
1319where
1320    F: Fn(&str) -> String,
1321{
1322    let bytes = src.as_bytes();
1323    let name_b = name.as_bytes();
1324    let mut out = String::with_capacity(src.len());
1325    let mut i = 0usize;
1326
1327    while i < bytes.len() {
1328        if i + name_b.len() < bytes.len()
1329            && &bytes[i..i + name_b.len()] == name_b
1330            && bytes[i + name_b.len()] == b'('
1331            && (i == 0 || !(bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_'))
1332        {
1333            let arg_start = i + name_b.len() + 1;
1334            let mut j = arg_start;
1335            let mut depth = 1i32;
1336            while j < bytes.len() {
1337                match bytes[j] {
1338                    b'(' => depth += 1,
1339                    b')' => {
1340                        depth -= 1;
1341                        if depth == 0 {
1342                            break;
1343                        }
1344                    }
1345                    _ => {}
1346                }
1347                j += 1;
1348            }
1349            if j < bytes.len() {
1350                out.push_str(&make_replacement(&src[arg_start..j]));
1351                i = j + 1;
1352                continue;
1353            }
1354        }
1355        out.push(bytes[i] as char);
1356        i += 1;
1357    }
1358
1359    out
1360}
1361
1362/// Rename `<from>(` → `<to>(` at every word boundary. Used to alias HLSL
1363/// builtins that WGSL spells differently (`sat` → `saturate`, `rsqrt` →
1364/// `inverseSqrt`). Differs from a plain `replace`: a preset local
1365/// `frsqrt = q1` won't pick up an unwanted `frinverseSqrt = q1` rewrite
1366/// because we require a non-identifier byte (or start of source) to the
1367/// left of the match.
1368fn rename_word_call(src: &str, from: &str, to: &str) -> String {
1369    let bytes = src.as_bytes();
1370    let from_b = from.as_bytes();
1371    let mut out = String::with_capacity(src.len());
1372    let mut i = 0usize;
1373    while i < bytes.len() {
1374        if i + from_b.len() < bytes.len()
1375            && &bytes[i..i + from_b.len()] == from_b
1376            && bytes[i + from_b.len()] == b'('
1377            && (i == 0 || !(bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_'))
1378        {
1379            out.push_str(to);
1380            out.push('(');
1381            i += from_b.len() + 1;
1382            continue;
1383        }
1384        out.push(bytes[i] as char);
1385        i += 1;
1386    }
1387    out
1388}
1389
1390/// `tex2Dbias(s, vec4(uv, mip, bias))` → `tex2D(s, uv)`.
1391/// Paren-balanced over both arguments. The bias component is dropped
1392/// (real presets use it cosmetically at 0 or near-0 — no visual delta).
1393fn rewrite_tex2dbias(src: &str) -> String {
1394    let bytes = src.as_bytes();
1395    let needle = b"tex2Dbias(";
1396    let mut out = String::with_capacity(src.len());
1397    let mut i = 0usize;
1398    while i < bytes.len() {
1399        let starts_here = i + needle.len() <= bytes.len() && &bytes[i..i + needle.len()] == needle;
1400        let word_boundary =
1401            i == 0 || !(bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_');
1402        if !(starts_here && word_boundary) {
1403            out.push(bytes[i] as char);
1404            i += 1;
1405            continue;
1406        }
1407        // Walk arg #1 (the sampler) up to the top-level comma.
1408        let mut j = i + needle.len();
1409        let mut depth = 1i32;
1410        let sampler_start = j;
1411        while j < bytes.len() {
1412            match bytes[j] {
1413                b'(' => depth += 1,
1414                b')' => depth -= 1,
1415                b',' if depth == 1 => break,
1416                _ => {}
1417            }
1418            j += 1;
1419        }
1420        if j >= bytes.len() || bytes[j] != b',' {
1421            out.push(bytes[i] as char);
1422            i += 1;
1423            continue;
1424        }
1425        let sampler = &src[sampler_start..j];
1426        // Walk arg #2 up to the closing `)` of the call.
1427        j += 1; // past the comma
1428        let arg2_start = j;
1429        while j < bytes.len() {
1430            match bytes[j] {
1431                b'(' => depth += 1,
1432                b')' => {
1433                    depth -= 1;
1434                    if depth == 0 {
1435                        break;
1436                    }
1437                }
1438                _ => {}
1439            }
1440            j += 1;
1441        }
1442        if j >= bytes.len() {
1443            out.push(bytes[i] as char);
1444            i += 1;
1445            continue;
1446        }
1447        let arg2 = src[arg2_start..j].trim();
1448        // Strip a vec4(/float4(/vec3(/float3( wrapper and keep its first
1449        // component (`uv` for the dominant `vec4(uv, mip, bias)` shape).
1450        let uv = strip_first_vec_component(arg2).unwrap_or_else(|| arg2.to_string());
1451        out.push_str("tex2D(");
1452        out.push_str(sampler.trim());
1453        out.push_str(", ");
1454        out.push_str(&uv);
1455        out.push(')');
1456        i = j + 1;
1457    }
1458    out
1459}
1460
1461/// For a string like `vec4(uv, 0, 0.1)` or `float3(uv, 0)`, return `uv` —
1462/// the slice up to the first top-level comma inside the constructor. Used
1463/// by [`rewrite_tex2dbias`] to drop the mip-bias arguments.
1464fn strip_first_vec_component(s: &str) -> Option<String> {
1465    let s = s.trim();
1466    let prefixes = ["vec4(", "vec3(", "float4(", "float3(", "float2(", "vec2("];
1467    for prefix in prefixes {
1468        if let Some(rest) = s.strip_prefix(prefix) {
1469            let rest = rest.strip_suffix(')')?;
1470            let bytes = rest.as_bytes();
1471            let mut depth = 0i32;
1472            let mut end = bytes.len();
1473            for (i, &b) in bytes.iter().enumerate() {
1474                match b {
1475                    b'(' => depth += 1,
1476                    b')' => depth -= 1,
1477                    b',' if depth == 0 => {
1478                        end = i;
1479                        break;
1480                    }
1481                    _ => {}
1482                }
1483            }
1484            return Some(rest[..end].trim().to_string());
1485        }
1486    }
1487    None
1488}
1489
1490/// Rename WGSL-reserved keywords used by MD2 preset authors as locals
1491/// (`mod`, `filter`, `sample`). Every occurrence on a word boundary that
1492/// isn't immediately followed by `(` (a function call — already rewritten
1493/// by `rewrite_mod_balanced` or rejected upstream) gets a trailing `_`.
1494///
1495/// This stays consistent across one shader: a local `float mod = …;` and
1496/// its later references in `+ q22*sqrt(z)/4*mod *` both become `mod_`, so
1497/// the renamed variable still ties together correctly.
1498fn rename_reserved_identifiers(src: &str) -> String {
1499    const RESERVED: &[&str] = &["mod", "filter", "sample"];
1500    let bytes = src.as_bytes();
1501    let mut out = String::with_capacity(src.len() + 16);
1502    let mut i = 0usize;
1503    while i < bytes.len() {
1504        // Pass comments through unchanged so the renamer doesn't touch
1505        // text the user authored as documentation.
1506        if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1507            while i < bytes.len() && bytes[i] != b'\n' {
1508                out.push(bytes[i] as char);
1509                i += 1;
1510            }
1511            continue;
1512        }
1513        if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1514            let s = i;
1515            i += 2;
1516            while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
1517                i += 1;
1518            }
1519            if i + 1 < bytes.len() {
1520                i += 2;
1521            }
1522            out.push_str(&src[s..i]);
1523            continue;
1524        }
1525
1526        // Word-boundary check on the left.
1527        if i > 0 {
1528            let prev = bytes[i - 1];
1529            if prev.is_ascii_alphanumeric() || prev == b'_' {
1530                out.push(bytes[i] as char);
1531                i += 1;
1532                continue;
1533            }
1534        }
1535
1536        let mut matched = None;
1537        for &kw in RESERVED {
1538            let kb = kw.as_bytes();
1539            if i + kb.len() <= bytes.len()
1540                && &bytes[i..i + kb.len()] == kb
1541                && bytes
1542                    .get(i + kb.len())
1543                    .is_none_or(|c| !(c.is_ascii_alphanumeric() || *c == b'_'))
1544            {
1545                // If the next non-WS byte is `(`, it's a function call —
1546                // a `mod(…)` should have been rewritten already, but if
1547                // some upstream slipped through, we still leave it. Other
1548                // reserved names (`filter`, `sample`) are not function
1549                // calls in any user shader we've seen.
1550                let mut j = i + kb.len();
1551                while j < bytes.len() && bytes[j].is_ascii_whitespace() {
1552                    j += 1;
1553                }
1554                if j < bytes.len() && bytes[j] == b'(' {
1555                    continue;
1556                }
1557                matched = Some(kw);
1558                break;
1559            }
1560        }
1561
1562        if let Some(kw) = matched {
1563            out.push_str(kw);
1564            out.push('_');
1565            i += kw.len();
1566            continue;
1567        }
1568        out.push(bytes[i] as char);
1569        i += 1;
1570    }
1571    out
1572}
1573
1574/// `mod(a, b)` → `((a) - floor((a) / (b)) * (b))`. WGSL reserves `mod` as
1575/// a keyword; HLSL uses it as the float-modulo helper. The expansion
1576/// matches HLSL's semantics (and matches GLSL's `mod`) so behaviour stays
1577/// identical.
1578///
1579/// Paren-balanced on both arguments because real presets ship
1580/// `mod(ang*16/M_PI, 1.0)` and similar — the comma sits inside the call's
1581/// top-level depth.
1582fn rewrite_mod_balanced(src: &str) -> String {
1583    rewrite_binary_call_balanced(src, "mod", |a, b| {
1584        format!("(({a}) - floor(({a}) / ({b})) * ({b}))")
1585    })
1586}
1587
1588/// Generic paren-balanced rewriter for two-argument calls. Walks the
1589/// source, matches `<name>(` on a word boundary, finds the top-level
1590/// `,`, and replaces the whole call with the closure's output.
1591fn rewrite_binary_call_balanced<F>(src: &str, name: &str, make: F) -> String
1592where
1593    F: Fn(&str, &str) -> String,
1594{
1595    let bytes = src.as_bytes();
1596    let name_b = name.as_bytes();
1597    let mut out = String::with_capacity(src.len());
1598    let mut i = 0usize;
1599    while i < bytes.len() {
1600        let matched = i + name_b.len() < bytes.len()
1601            && &bytes[i..i + name_b.len()] == name_b
1602            && bytes[i + name_b.len()] == b'('
1603            && (i == 0 || !(bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_'));
1604        if !matched {
1605            out.push(bytes[i] as char);
1606            i += 1;
1607            continue;
1608        }
1609        let arg_start = i + name_b.len() + 1;
1610        let mut j = arg_start;
1611        let mut depth = 1i32;
1612        let mut split = None;
1613        while j < bytes.len() {
1614            match bytes[j] {
1615                b'(' => depth += 1,
1616                b')' => {
1617                    depth -= 1;
1618                    if depth == 0 {
1619                        break;
1620                    }
1621                }
1622                b',' if depth == 1 && split.is_none() => split = Some(j),
1623                _ => {}
1624            }
1625            j += 1;
1626        }
1627        if let Some(c) = split
1628            && j < bytes.len()
1629        {
1630            let a = src[arg_start..c].trim();
1631            let b = src[c + 1..j].trim();
1632            out.push_str(&make(a, b));
1633            i = j + 1;
1634            continue;
1635        }
1636        // Couldn't balance — fall through and emit the original byte.
1637        out.push(bytes[i] as char);
1638        i += 1;
1639    }
1640    out
1641}
1642
1643/// `mul(a, b)` → `(a) * (b)`. Paren-balanced on both arguments — needed
1644/// because real shaders write `mul(rotation_matrix(theta), uv)`. The
1645/// outermost `,` at depth 0 splits the two arguments.
1646fn rewrite_mul_balanced(src: &str) -> String {
1647    let bytes = src.as_bytes();
1648    let mut out = String::with_capacity(src.len());
1649    let mut i = 0usize;
1650
1651    while i < bytes.len() {
1652        if i + 4 <= bytes.len()
1653            && &bytes[i..i + 3] == b"mul"
1654            && bytes[i + 3] == b'('
1655            && (i == 0 || !(bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_'))
1656        {
1657            let arg_start = i + 4;
1658            let mut j = arg_start;
1659            let mut depth = 1i32;
1660            let mut split = None;
1661            while j < bytes.len() {
1662                match bytes[j] {
1663                    b'(' => depth += 1,
1664                    b')' => {
1665                        depth -= 1;
1666                        if depth == 0 {
1667                            break;
1668                        }
1669                    }
1670                    b',' if depth == 1 && split.is_none() => split = Some(j),
1671                    _ => {}
1672                }
1673                j += 1;
1674            }
1675            if let Some(c) = split
1676                && j < bytes.len()
1677            {
1678                let a = src[arg_start..c].trim();
1679                let b = src[c + 1..j].trim();
1680                out.push_str(&format!("(({a}) * ({b}))"));
1681                i = j + 1;
1682                continue;
1683            }
1684        }
1685        out.push(bytes[i] as char);
1686        i += 1;
1687    }
1688
1689    out
1690}
1691
1692fn rewrite_local_declarations(code: &str) -> String {
1693    // Split inline `; TYPE` runs onto their own lines first, so the
1694    // line-anchored LOCAL_DECL_REGEX sees every decl. See the doc on
1695    // [`INLINE_DECL_SPLIT_REGEX`] for the motivating preset. The same
1696    // pass also detaches `{TYPE` glue at the head of function/conditional
1697    // bodies.
1698    let normalised = INLINE_DECL_SPLIT_REGEX
1699        .replace_all(code, "$1\n$2 $3")
1700        .to_string();
1701    LOCAL_DECL_REGEX
1702        .replace_all(&normalised, |caps: &regex::Captures| {
1703            let indent = &caps[1];
1704            let ty = &caps[2];
1705            let decls = &caps[3];
1706
1707            // Bail when this is actually a function signature
1708            // (`vec3<f32> helper(args) { … }`). The regex greedily
1709            // swallows the body up to the first `;`; we detect the
1710            // function shape by spotting a `(` before any `=` and
1711            // return the match unchanged so [`lift_user_functions`] can
1712            // extract it intact later.
1713            if is_function_signature(decls) {
1714                return caps[0].to_string();
1715            }
1716
1717            let mut out = String::new();
1718            for (i, raw) in split_top_level_commas(decls).into_iter().enumerate() {
1719                let decl = raw.trim();
1720                if decl.is_empty() {
1721                    continue;
1722                }
1723                if i > 0 {
1724                    out.push('\n');
1725                    out.push_str(indent);
1726                } else {
1727                    out.push_str(indent);
1728                }
1729                // Each declarator is `name` or `name = init`. We split on the
1730                // first `=` so initialisers containing further `=` (rare,
1731                // mostly `a = b == c ? …` ternary which we don't otherwise
1732                // support) keep their tail intact.
1733                if let Some((name, init)) = decl.split_once('=') {
1734                    let name = name.trim();
1735                    let init = init.trim();
1736                    out.push_str(&format!("var {name}: {ty} = {init};"));
1737                } else {
1738                    out.push_str(&format!("var {decl}: {ty};"));
1739                }
1740            }
1741            out
1742        })
1743        .to_string()
1744}
1745
1746/// Detect "this `decls` capture is actually a function signature".
1747/// `decls` is the text the local-decl regex captured between the type
1748/// and the terminating `;`. A function signature shape is
1749/// `<ident>(...) { ... return ... ` with the `;` being the first
1750/// statement-terminator inside the body, but the giveaway sits at the
1751/// front: a `(` appears before any `=`. Variable declarations never put
1752/// `(` ahead of the initializer assignment (`var x = sin(0);` — `(`
1753/// follows `=`).
1754fn is_function_signature(decls: &str) -> bool {
1755    for c in decls.chars() {
1756        match c {
1757            '=' => return false,
1758            '(' => return true,
1759            _ => {}
1760        }
1761    }
1762    false
1763}
1764
1765/// Split a declarator list on top-level commas only — commas inside `()` or
1766/// `<>` (e.g. `vec3<f32>(0, 0, 0)`) must not split the declarator.
1767fn split_top_level_commas(s: &str) -> Vec<&str> {
1768    let bytes = s.as_bytes();
1769    let mut out = Vec::new();
1770    let mut depth_paren = 0i32;
1771    let mut depth_angle = 0i32;
1772    let mut start = 0usize;
1773    for (i, &b) in bytes.iter().enumerate() {
1774        match b {
1775            b'(' => depth_paren += 1,
1776            b')' => depth_paren -= 1,
1777            b'<' => depth_angle += 1,
1778            b'>' => depth_angle -= 1,
1779            b',' if depth_paren == 0 && depth_angle == 0 => {
1780                out.push(&s[start..i]);
1781                start = i + 1;
1782            }
1783            _ => {}
1784        }
1785    }
1786    out.push(&s[start..]);
1787    out
1788}
1789
1790fn strip_preprocessor(code: &str) -> String {
1791    // First, expand any `#define NAME REPLACEMENT` where REPLACEMENT is a
1792    // single identifier — common MD2 idiom for type/function aliases:
1793    // `#define MyGet GetPixel`, `#define sat saturate`, `#define vec3 float3`.
1794    // Without this expansion, `strip_preprocessor` drops the `#define` and
1795    // every later `MyGet(uv)` lands as `no definition in scope for
1796    // identifier: 'MyGet'`. We only expand the trivial single-token
1797    // replacement form — function-like macros (`#define FOO(x) bar(x,1)`)
1798    // aren't worth the parser complexity here.
1799    let expanded = expand_simple_defines(code);
1800    PREPROC_REGEX.replace_all(&expanded, "").to_string()
1801}
1802
1803/// Scan for lines of the form `#define IDENT IDENT` (whitespace-separated
1804/// single-token replacement) and substitute `from → to` everywhere else in
1805/// the source. Operates as a single pass: defines are collected first, then
1806/// applied to the rest of the source. Skips macros whose `to` looks like
1807/// anything other than a bare identifier so we don't accidentally inline
1808/// `#define K 0.5` (where the rest of the source has plain `K` in
1809/// arithmetic context — the existing fall-through preserves it as an
1810/// undefined-but-untouched identifier the user can spot in the error).
1811fn expand_simple_defines(code: &str) -> String {
1812    use regex::Regex;
1813    use std::sync::LazyLock;
1814    static DEF_RE: LazyLock<Regex> = LazyLock::new(|| {
1815        // Trailing `//comment` is common in the corpus
1816        // (`#define MyGet GetPixel //GetBlur1`) — allow it.
1817        Regex::new(
1818            r"(?m)^\s*#\s*define\s+([A-Za-z_][A-Za-z0-9_]*)\s+([A-Za-z_][A-Za-z0-9_]*)\s*(?://[^\n]*)?$",
1819        )
1820        .unwrap()
1821    });
1822    let pairs: Vec<(String, String)> = DEF_RE
1823        .captures_iter(code)
1824        .map(|c| (c[1].to_string(), c[2].to_string()))
1825        .collect();
1826    if pairs.is_empty() {
1827        return code.to_string();
1828    }
1829    let mut out = code.to_string();
1830    for (from, to) in &pairs {
1831        if from == to {
1832            continue;
1833        }
1834        let re = Regex::new(&format!(r"\b{}\b", regex::escape(from))).unwrap();
1835        out = re.replace_all(&out, to.as_str()).to_string();
1836    }
1837    out
1838}
1839
1840/// Comment out lines that look like prose (English) rather than
1841/// HLSL/WGSL code. Real preset pattern: ``comp_30=`written by martin``
1842/// — an attribution typed without a `//` prefix, threaded into the
1843/// shader body by the `.milk` parser as a literal line of code. Many
1844/// presets failed with `expected assignment or increment/decrement;
1845/// found 'by'` (or `found 'rota'`, etc.) on lines of this shape.
1846///
1847/// Heuristic: a non-empty line where every non-whitespace character is
1848/// either an ASCII letter or an apostrophe — no `;`, `=`, parens,
1849/// braces, brackets, arithmetic operators, digits, or comment markers —
1850/// and which contains at least two whitespace-separated word tokens. We
1851/// also require the first token *not* to look like a known WGSL keyword
1852/// or builtin call left-side (e.g. `for`, `if`, `var`, `return`, `tex2D`,
1853/// `mul`, …) so a line like `if cond` alone won't be commented out.
1854/// In practice prose lines always have ≥ 2 words and lack punctuation,
1855/// so the false-positive rate is effectively zero on the corpus.
1856fn comment_out_prose_lines(code: &str) -> String {
1857    let mut out = String::with_capacity(code.len() + 32);
1858    for line in code.split_inclusive('\n') {
1859        if looks_like_prose(line) {
1860            out.push_str("// ");
1861            out.push_str(line);
1862        } else {
1863            out.push_str(line);
1864        }
1865    }
1866    out
1867}
1868
1869fn looks_like_prose(line: &str) -> bool {
1870    let trimmed = line.trim_end_matches('\n').trim();
1871    if trimmed.is_empty() {
1872        return false;
1873    }
1874    // Already a comment? Leave it.
1875    if trimmed.starts_with("//") || trimmed.starts_with("/*") || trimmed.starts_with('*') {
1876        return false;
1877    }
1878    // Every non-space char must be an ASCII letter or apostrophe.
1879    let mut letters = 0usize;
1880    for c in trimmed.chars() {
1881        if c.is_ascii_whitespace() {
1882            continue;
1883        }
1884        if c.is_ascii_alphabetic() || c == '\'' {
1885            letters += 1;
1886            continue;
1887        }
1888        return false;
1889    }
1890    if letters < 2 {
1891        return false;
1892    }
1893    // A bare-identifier line (one word, letters only) is never a valid
1894    // HLSL/WGSL statement either — `END`/`EOF` markers and stray
1895    // identifiers fall in here. `KW` covers the few legitimate
1896    // single-word lines (`break;` etc. would have a `;` and bail above).
1897    // Bail on a leading single-word HLSL keyword that often precedes a
1898    // following identifier on the next line via continuation — keeps the
1899    // pass strictly conservative on real code.
1900    const KW: &[&str] = &[
1901        "for",
1902        "if",
1903        "else",
1904        "while",
1905        "do",
1906        "return",
1907        "break",
1908        "continue",
1909        "var",
1910        "let",
1911        "const",
1912        "static",
1913        "uniform",
1914        "extern",
1915        "sampler",
1916        "texture",
1917        "shader_body",
1918        "true",
1919        "false",
1920        "tex2D",
1921        "tex2d",
1922        "tex3D",
1923        "lerp",
1924        "mix",
1925        "saturate",
1926        "abs",
1927        "sin",
1928        "cos",
1929        "tan",
1930        "atan",
1931        "atan2",
1932        "pow",
1933        "sqrt",
1934        "log",
1935        "exp",
1936        "min",
1937        "max",
1938        "clamp",
1939        "length",
1940        "normalize",
1941        "dot",
1942        "cross",
1943        "mul",
1944        "step",
1945        "smoothstep",
1946        "frac",
1947        "fract",
1948        "floor",
1949        "ceil",
1950        "round",
1951        "sign",
1952        "any",
1953        "all",
1954        "not",
1955        "lum",
1956        "GetPixel",
1957        "GetMain",
1958        "GetBlur1",
1959        "GetBlur2",
1960        "GetBlur3",
1961    ];
1962    let first = trimmed.split_whitespace().next().unwrap();
1963    if KW.contains(&first) {
1964        return false;
1965    }
1966    true
1967}
1968
1969fn strip_sampler_declarations(code: &str) -> String {
1970    SAMPLER_DECL_REGEX.replace_all(code, "").to_string()
1971}
1972
1973fn strip_storage_class_qualifiers(code: &str) -> String {
1974    let no_storage = STORAGE_CLASS_REGEX.replace_all(code, "");
1975    CONST_TYPE_REGEX.replace_all(&no_storage, "$1").to_string()
1976}
1977
1978fn rewrite_postfix_inc_dec(code: &str) -> String {
1979    let inc = POSTFIX_INC_REGEX
1980        .replace_all(code, "$1 = $1 + 1$2")
1981        .to_string();
1982    POSTFIX_DEC_REGEX
1983        .replace_all(&inc, "$1 = $1 - 1$2")
1984        .to_string()
1985}
1986
1987/// Wrap single-statement `if`/`while`/`for` bodies in `{ ... }`. WGSL requires
1988/// braces on every conditional/loop body; HLSL doesn't, and a lot of MD2
1989/// preset code uses the brace-less form (`if (cond) ret.z -= 0.5;`).
1990///
1991/// We walk the source, find each `if` / `while` / `for` keyword, balance the
1992/// condition parens, and if the next non-whitespace character isn't `{`, we
1993/// wrap from there to the next top-level `;` (tracking parens to skip nested
1994/// calls) in braces.
1995fn brace_up_single_statement_blocks(src: &str) -> String {
1996    let bytes = src.as_bytes();
1997    let mut out = String::with_capacity(src.len() + 32);
1998    let mut i = 0usize;
1999
2000    while i < bytes.len() {
2001        let kw_len = match keyword_at(bytes, i) {
2002            Some(n) => n,
2003            None => {
2004                out.push(bytes[i] as char);
2005                i += 1;
2006                continue;
2007            }
2008        };
2009
2010        // Found a keyword. Copy it verbatim, then look for the `(` that opens
2011        // the condition.
2012        out.push_str(&src[i..i + kw_len]);
2013        i += kw_len;
2014
2015        // Skip whitespace between keyword and `(`.
2016        let mut j = i;
2017        while j < bytes.len() && bytes[j].is_ascii_whitespace() {
2018            j += 1;
2019        }
2020        if j >= bytes.len() || bytes[j] != b'(' {
2021            // Not a real condition — bare `for(int i=0;...)` would have
2022            // matched the `(` immediately, so this is some unrelated token.
2023            // Continue normally.
2024            out.push_str(&src[i..j]);
2025            i = j;
2026            continue;
2027        }
2028
2029        // Balance parens on the condition.
2030        let cond_start = j;
2031        let mut depth = 0i32;
2032        while j < bytes.len() {
2033            match bytes[j] {
2034                b'(' => depth += 1,
2035                b')' => {
2036                    depth -= 1;
2037                    if depth == 0 {
2038                        j += 1;
2039                        break;
2040                    }
2041                }
2042                _ => {}
2043            }
2044            j += 1;
2045        }
2046        // Copy the whitespace + condition + `)` verbatim.
2047        out.push_str(&src[i..j]);
2048        i = j;
2049
2050        // Skip whitespace after `)`.
2051        while i < bytes.len() && bytes[i].is_ascii_whitespace() {
2052            out.push(bytes[i] as char);
2053            i += 1;
2054        }
2055
2056        if i >= bytes.len() || bytes[i] == b'{' {
2057            // Already braced — nothing to do.
2058            continue;
2059        }
2060
2061        // Brace-less: scan to the next `;` at depth 0 (skipping inner parens
2062        // / brackets) and wrap.
2063        let body_start = i;
2064        let mut paren = 0i32;
2065        let mut bracket = 0i32;
2066        while i < bytes.len() {
2067            match bytes[i] {
2068                b'(' => paren += 1,
2069                b')' => paren -= 1,
2070                b'[' => bracket += 1,
2071                b']' => bracket -= 1,
2072                b';' if paren == 0 && bracket == 0 => {
2073                    i += 1;
2074                    break;
2075                }
2076                _ => {}
2077            }
2078            i += 1;
2079        }
2080
2081        out.push('{');
2082        out.push(' ');
2083        out.push_str(&src[body_start..i]);
2084        out.push_str(" }");
2085        // Force a `_unused` if cond_start was inside an else-if chain and we
2086        // need to suppress dead branches — not done here.
2087        let _ = cond_start;
2088    }
2089
2090    out
2091}
2092
2093/// Match `if`/`while`/`for` keywords on word boundaries; return their length.
2094fn keyword_at(bytes: &[u8], i: usize) -> Option<usize> {
2095    // Must be at a word boundary on the left.
2096    if i > 0 {
2097        let prev = bytes[i - 1];
2098        if prev.is_ascii_alphanumeric() || prev == b'_' {
2099            return None;
2100        }
2101    }
2102    for (kw, len) in [("if", 2usize), ("while", 5), ("for", 3)] {
2103        if i + len <= bytes.len() && &bytes[i..i + len] == kw.as_bytes() {
2104            // Right boundary: next char must be non-identifier.
2105            let next = bytes.get(i + len).copied();
2106            let is_id = next.is_some_and(|c| c.is_ascii_alphanumeric() || c == b'_');
2107            if !is_id {
2108                return Some(len);
2109            }
2110        }
2111    }
2112    None
2113}
2114
2115/// Rewrite `tex3D(<sampler>, <uvw>)` to a real 3D `textureSample` against
2116/// the noise-volume bindings.
2117///
2118/// An earlier implementation collapsed every `tex3D` to a 2D
2119/// `GetPixel(uvw.xy)` fallback — that kept the shader compiling but
2120/// produced wrong output for any preset that relied on noisevol's true
2121/// volumetric variance (many in-the-wild comps reference
2122/// `sampler_noisevol_hq`). Now that the renderer actually binds 3D noise
2123/// textures, the rewriter routes the known names onto them and only
2124/// falls back for unknown sampler names.
2125///
2126/// **Return type**: vec4. HLSL's `tex3D` returns float4 and presets often
2127/// store the result in `float4 noise2 = tex3D(...);` and read `.w` later —
2128/// `textureSample` of a `texture_3d<f32>` is already vec4.
2129fn rewrite_tex3d_calls(src: &str) -> String {
2130    let bytes = src.as_bytes();
2131    let mut out = String::with_capacity(src.len());
2132    let mut i = 0usize;
2133
2134    while i < bytes.len() {
2135        // Match `tex3D` on a word boundary.
2136        if i + 5 <= bytes.len() && &bytes[i..i + 5] == b"tex3D" {
2137            let prev_ok = i == 0 || !(bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_');
2138            let next_open = i + 5 < bytes.len() && bytes[i + 5] == b'(';
2139            if prev_ok && next_open {
2140                // Skip past `tex3D(`.
2141                let arg_start = i + 6;
2142                // Find the comma at depth 0 (separates sampler from uvw).
2143                let mut j = arg_start;
2144                let mut depth = 1i32;
2145                let mut comma = None;
2146                while j < bytes.len() && depth > 0 {
2147                    match bytes[j] {
2148                        b'(' => depth += 1,
2149                        b')' => {
2150                            depth -= 1;
2151                            if depth == 0 {
2152                                break;
2153                            }
2154                        }
2155                        b',' if depth == 1 && comma.is_none() => comma = Some(j),
2156                        _ => {}
2157                    }
2158                    j += 1;
2159                }
2160                if let Some(c) = comma
2161                    && j < bytes.len()
2162                {
2163                    let sampler = src[arg_start..c].trim();
2164                    let uvw = src[c + 1..j].trim();
2165                    let replacement = match sampler {
2166                        s if s.ends_with("noisevol_lq") => format!(
2167                            "textureSample(sampler_noisevol_lq_texture, {}, {uvw})",
2168                            noise_sampler_for(s)
2169                        ),
2170                        s if s.ends_with("noisevol_hq") => format!(
2171                            "textureSample(sampler_noisevol_hq_texture, {}, {uvw})",
2172                            noise_sampler_for(s)
2173                        ),
2174                        _ => {
2175                            // Unknown 3D sampler — keep the 2D fallback so
2176                            // the wrapper compiles instead of failing.
2177                            format!(
2178                                "vec4<f32>(GetPixel(({uvw}).xy), 1.0) /*was: tex3D({sampler})*/"
2179                            )
2180                        }
2181                    };
2182                    out.push_str(&replacement);
2183                    i = j + 1;
2184                    continue;
2185                }
2186            }
2187        }
2188        out.push(bytes[i] as char);
2189        i += 1;
2190    }
2191
2192    out
2193}
2194
2195fn replace_semantics(code: &str) -> String {
2196    SEMANTICS_REGEX.replace_all(code, "").to_string()
2197}
2198
2199/// Walk the source for `var <NAME>: <TYPE> [= INIT];` declarations; the
2200/// first time a `NAME` appears *in the current scope*, keep it; every later
2201/// declaration of the same `NAME` in the same scope becomes a plain
2202/// assignment (`NAME = INIT;`) or — if it had no initialiser — is dropped
2203/// entirely.
2204///
2205/// HLSL allows `float3 ret1 = ret1;` to shadow a previous declaration
2206/// (or just redundantly redeclare it); WGSL rejects with
2207/// `redefinition of ret1`. The presets that hit this are typically
2208/// older MD2 user-shader idioms.
2209///
2210/// Scope tracking matters here. A flat `seen` set is wrong: a global
2211/// `var tmp: f32;` plus a function-local `var tmp: f32;` inside a helper
2212/// would drop the inner decl while the helper's body still referenced
2213/// `tmp`, which then failed naga as `no definition in scope for
2214/// identifier: tmp`. We push a fresh `seen` set on every `{` and pop
2215/// on the matching `}`, so each scope dedups independently.
2216fn dedup_var_declarations(src: &str) -> String {
2217    use std::collections::HashMap;
2218    static VAR_DECL: LazyLock<Regex> = LazyLock::new(|| {
2219        // Anchor relaxed from `(?m)^\s*` to `\s*` and matched by hand
2220        // below: we still want declarations on their own line in practice,
2221        // but the scope walk needs to align byte positions with brace
2222        // boundaries, which `replace_all`'s line-anchored variant doesn't
2223        // expose. The same shape is matched, just without `^`.
2224        // Capture group 4 is the *type* (everything between `:` and the
2225        // optional `=`/`;`); a second declaration of the same name with a
2226        // different type is a deliberate shadow and must NOT be dropped.
2227        Regex::new(
2228            r"(\n[ \t]*|\A[ \t]*)(var|let)\s+([A-Za-z_][A-Za-z0-9_]*)\s*:\s*([^=;]+?)\s*(=\s*[^;]+)?;",
2229        )
2230        .unwrap()
2231    });
2232
2233    // Pre-compute scope boundaries by walking the source once and recording
2234    // `{` and `}` byte positions (ignoring those inside string/char literals
2235    // — none occur in MD2 user shaders — and inside line/block comments).
2236    let bytes = src.as_bytes();
2237    let mut depth: i32 = 0;
2238    let mut depth_at: Vec<i32> = Vec::with_capacity(bytes.len() + 1);
2239    depth_at.push(0);
2240    let mut i = 0usize;
2241    while i < bytes.len() {
2242        // Skip line comments verbatim — their `{`/`}` shouldn't count.
2243        if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'/' {
2244            while i < bytes.len() && bytes[i] != b'\n' {
2245                depth_at.push(depth);
2246                i += 1;
2247            }
2248            continue;
2249        }
2250        if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
2251            while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
2252                depth_at.push(depth);
2253                i += 1;
2254            }
2255            if i + 1 < bytes.len() {
2256                depth_at.push(depth);
2257                depth_at.push(depth);
2258                i += 2;
2259            }
2260            continue;
2261        }
2262        match bytes[i] {
2263            b'{' => depth += 1,
2264            b'}' => depth -= 1,
2265            _ => {}
2266        }
2267        depth_at.push(depth);
2268        i += 1;
2269    }
2270
2271    // Map of scope-depth → (name → declared-type). Each scope dedups
2272    // independently; when we exit a scope (depth dips), that scope's
2273    // record is wiped so a sibling block opening at the same depth
2274    // starts fresh. Storing the type lets the dedup distinguish a true
2275    // redundant re-declaration (same name, same type — drop the second
2276    // `var`) from a deliberate shadow (same name, different type —
2277    // keep both so the local-scope intent survives).
2278    let mut seen_by_depth: HashMap<i32, HashMap<String, String>> = HashMap::new();
2279    let mut last_pos: usize = 0;
2280
2281    VAR_DECL
2282        .replace_all(src, |caps: &regex::Captures| {
2283            let prefix = &caps[1];
2284            let name = &caps[3];
2285            let ty = caps[4].trim();
2286            let init = caps.get(5).map(|m| m.as_str()).unwrap_or("");
2287            let start = caps.get(0).unwrap().start();
2288            // Sweep depth_at from the previous match to here; clear the
2289            // record of every depth we exited (depth dipped below it).
2290            // This handles sibling scopes — closing one function's `}`
2291            // resets that depth before the next function's `var`s arrive.
2292            let mut min_in_window = i32::MAX;
2293            for d in &depth_at[last_pos..=start.min(depth_at.len() - 1)] {
2294                if *d < min_in_window {
2295                    min_in_window = *d;
2296                }
2297            }
2298            let cur_depth = depth_at.get(start).copied().unwrap_or(0).max(0);
2299            // Any depth strictly above the lowest point we passed through
2300            // had its scope closed at some point in the window — wipe
2301            // their records.
2302            if min_in_window < i32::MAX {
2303                seen_by_depth.retain(|d, _| *d <= min_in_window);
2304            }
2305            last_pos = caps.get(0).unwrap().end();
2306            let seen = seen_by_depth.entry(cur_depth).or_default();
2307            match seen.get(name) {
2308                Some(prev_ty) if prev_ty == ty => {
2309                    // Exact duplicate at the same scope — strip `var`.
2310                    let init_trim = init.trim_start_matches('=').trim();
2311                    if init_trim.is_empty() {
2312                        format!("{prefix}/* dropped redundant var {name} */")
2313                    } else {
2314                        format!("{prefix}{name} = {init_trim};")
2315                    }
2316                }
2317                _ => {
2318                    // First sighting OR same name with a different type
2319                    // (deliberate shadow). Keep the declaration; record
2320                    // the latest type so a third same-type repeat would
2321                    // still be deduped.
2322                    seen.insert(name.to_string(), ty.to_string());
2323                    caps[0].to_string()
2324                }
2325            }
2326        })
2327        .to_string()
2328}
2329
2330/// HLSL allows comma-as-statement-separator at the top of a function body:
2331///
2332/// ```hlsl
2333/// ret += saturate(...),
2334/// ret += saturate(...),
2335/// ret = lerp(ret, blurs, t);
2336/// ```
2337///
2338/// WGSL requires `;` between statements; a stray comma triggers
2339/// `expected ;`, found ','`. At this point in the pipeline every multi-
2340/// name `var` declaration has already been split into one `var` per name
2341/// (see [`rewrite_local_declarations`]), so a comma at paren/bracket-
2342/// depth 0 is unambiguously a statement separator. Rewrite to `;`.
2343///
2344/// Comments are skipped so a `,` inside `/* */` doesn't confuse anyone.
2345fn replace_statement_commas(src: &str) -> String {
2346    let bytes = src.as_bytes();
2347    let mut out = String::with_capacity(src.len());
2348    let mut paren = 0i32;
2349    let mut bracket = 0i32;
2350    let mut angle = 0i32;
2351    // Stack of in-flight `{` kinds: `true` = init-list (followed `=`),
2352    // `false` = code block. Commas inside an init-list are constructor
2353    // separators, not statement separators — converting them to `;`
2354    // corrupts shapes like `float2x2 rot = { cos(q9), sin(q9), … };` into
2355    // `mat2x2<f32>(cos(q9); sin(q9); …)` and trips the downstream parser
2356    // on the first non-statement token (the matrix name `rot`).
2357    let mut brace_stack: Vec<bool> = Vec::new();
2358    let mut last_sig: u8 = 0;
2359    let mut i = 0usize;
2360    while i < bytes.len() {
2361        // Pass comments verbatim.
2362        if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'/' {
2363            while i < bytes.len() && bytes[i] != b'\n' {
2364                out.push(bytes[i] as char);
2365                i += 1;
2366            }
2367            continue;
2368        }
2369        if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
2370            let s = i;
2371            i += 2;
2372            while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
2373                i += 1;
2374            }
2375            if i + 1 < bytes.len() {
2376                i += 2;
2377            }
2378            out.push_str(&src[s..i]);
2379            continue;
2380        }
2381        let c = bytes[i];
2382        let in_init_list = brace_stack.last().copied().unwrap_or(false);
2383        match c {
2384            b'(' => paren += 1,
2385            b')' => paren -= 1,
2386            b'[' => bracket += 1,
2387            b']' => bracket -= 1,
2388            b'<' => angle += 1,
2389            b'>' => angle -= 1,
2390            b'{' => {
2391                let is_init = last_sig == b'=' || (in_init_list && last_sig == b'{');
2392                brace_stack.push(is_init);
2393            }
2394            b'}' => {
2395                brace_stack.pop();
2396            }
2397            b',' if paren == 0 && bracket == 0 && angle == 0 && !in_init_list => {
2398                // Statement separator — emit `;`.
2399                out.push(';');
2400                i += 1;
2401                last_sig = b';';
2402                continue;
2403            }
2404            _ => {}
2405        }
2406        if !c.is_ascii_whitespace() {
2407            last_sig = c;
2408        }
2409        out.push(c as char);
2410        i += 1;
2411    }
2412    out
2413}
2414
2415#[cfg(test)]
2416mod tests {
2417    use super::*;
2418    use crate::texture_plan::decompose_sampler_name;
2419
2420    #[test]
2421    fn reassigned_param_is_shadowed_with_var() {
2422        // HLSL value-parameters are mutable; WGSL params are immutable.
2423        // The lift pass must rename the param and prepend a shadowing var.
2424        let hlsl = "float2 PutDist(float x) { float tmp; x = pow(x, 2.0); \
2425                    tmp = 1 - x; return float2(tmp, x); } \
2426                    shader_body { ret = float3(PutDist(0.5), 0); }";
2427        let wgsl = translate_shader(hlsl).unwrap();
2428        // Renamed param in signature
2429        assert!(
2430            wgsl.contains("x_md2arg: f32"),
2431            "expected renamed param, got:\n{wgsl}"
2432        );
2433        // Shadowing var at body entry
2434        assert!(
2435            wgsl.contains("var x: f32 = x_md2arg"),
2436            "expected shadow var, got:\n{wgsl}"
2437        );
2438    }
2439
2440    #[test]
2441    fn unreassigned_param_left_alone() {
2442        // A param that's only read should pass through unchanged — no
2443        // unnecessary shadowing.
2444        let hlsl = "float2 Pass(float x) { return float2(x, x*2); } \
2445                    shader_body { ret = float3(Pass(0.5), 0); }";
2446        let wgsl = translate_shader(hlsl).unwrap();
2447        assert!(
2448            wgsl.contains("fn Pass(x: f32) -> vec2<f32>"),
2449            "expected unchanged signature, got:\n{wgsl}"
2450        );
2451        assert!(
2452            !wgsl.contains("x_md2arg"),
2453            "should not rename a read-only param, got:\n{wgsl}"
2454        );
2455    }
2456
2457    #[test]
2458    fn param_written_via_swizzle_is_shadowed() {
2459        // `uv.x = …` is a write through swizzle — also requires var.
2460        let hlsl = "float2 Tweak(float2 uvi) { uvi.x = uvi.x * 2; return uvi; } \
2461                    shader_body { ret = float3(Tweak(uv), 0); }";
2462        let wgsl = translate_shader(hlsl).unwrap();
2463        assert!(
2464            wgsl.contains("uvi_md2arg: vec2<f32>"),
2465            "expected renamed param, got:\n{wgsl}"
2466        );
2467        assert!(
2468            wgsl.contains("var uvi: vec2<f32> = uvi_md2arg"),
2469            "expected shadow var, got:\n{wgsl}"
2470        );
2471    }
2472
2473    #[test]
2474    fn type_replacement() {
2475        let hlsl = "float4 color = float4(1.0, 0.0, 0.0, 1.0);";
2476        let wgsl = translate_shader(hlsl).unwrap();
2477        assert!(wgsl.contains("vec4<f32>"));
2478    }
2479
2480    #[test]
2481    fn function_replacement() {
2482        let hlsl = "color = lerp(a, b, t);";
2483        let wgsl = translate_shader(hlsl).unwrap();
2484        assert!(wgsl.contains("mix"));
2485    }
2486
2487    #[test]
2488    fn saturate_replacement() {
2489        let hlsl = "color = saturate(color);";
2490        let wgsl = translate_shader(hlsl).unwrap();
2491        assert!(wgsl.contains("clamp"));
2492    }
2493
2494    #[test]
2495    fn texture_sampling_routes_through_main_sampler() {
2496        let hlsl = "color = tex2D(sampler_main, uv);";
2497        let wgsl = translate_shader(hlsl).unwrap();
2498        // The codegen wrapper exposes `sampler_main_texture` + `sampler_main`,
2499        // so every tex2D call must land on those two bindings.
2500        assert!(
2501            wgsl.contains("textureSample(sampler_main_texture, sampler_main, uv)"),
2502            "got: {wgsl}"
2503        );
2504    }
2505
2506    #[test]
2507    fn texture_sampling_unknown_samplers_still_fall_back() {
2508        // User-loaded textures and arbitrary preset sampler names that the
2509        // wrapper doesn't recognise must keep producing valid WGSL. The
2510        // translator routes them through `sampler_main` and tags the
2511        // origin name in a comment for debugging.
2512        let hlsl = "color = tex2D(sampler_clouds, uv);";
2513        let wgsl = translate_shader(hlsl).unwrap();
2514        assert!(
2515            wgsl.contains("textureSample(sampler_main_texture, sampler_main, uv)"),
2516            "got: {wgsl}"
2517        );
2518        assert!(wgsl.contains("/*was: sampler_clouds*/"));
2519    }
2520
2521    #[test]
2522    fn fw_fc_pw_pc_main_route_to_matching_sampler() {
2523        // The four MD2 sampler variants of `main` keep the main texture but
2524        // pick a different sampler binding. The translator no longer
2525        // collapses them to the plain `sampler_main` — they hit
2526        // `sampler_fw`/`_fc`/`_pw`/`_pc` directly.
2527        for (name, expected_sampler) in [
2528            ("sampler_fw_main", "sampler_fw"),
2529            ("sampler_fc_main", "sampler_fc"),
2530            ("sampler_pw_main", "sampler_pw"),
2531            ("sampler_pc_main", "sampler_pc"),
2532        ] {
2533            let hlsl = format!("color = tex2D({name}, uv);");
2534            let wgsl = translate_shader(&hlsl).unwrap();
2535            let needle = format!("textureSample(sampler_main_texture, {expected_sampler}, uv)");
2536            assert!(wgsl.contains(&needle), "for {name}, got: {wgsl}");
2537            // Recognised bindings don't emit the `/*was: …*/` debug
2538            // comment — keeps the translated WGSL tidy.
2539            assert!(
2540                !wgsl.contains(&format!("/*was: {name}*/")),
2541                "recognised variant should not carry a fallback comment"
2542            );
2543        }
2544    }
2545
2546    #[test]
2547    fn noise_2d_samplers_route_to_dedicated_textures() {
2548        for (sampler, expected_tex) in [
2549            ("sampler_noise_lq", "sampler_noise_lq_texture"),
2550            ("sampler_noise_mq", "sampler_noise_mq_texture"),
2551            ("sampler_noise_hq", "sampler_noise_hq_texture"),
2552            // Prefixed variants pick the same texture but a different
2553            // sampler — covered by `noise_2d_prefixed_picks_sampler`.
2554            ("sampler_pw_noise_lq", "sampler_noise_lq_texture"),
2555            ("sampler_fw_noise_hq", "sampler_noise_hq_texture"),
2556        ] {
2557            let hlsl = format!("color = tex2D({sampler}, uv);");
2558            let wgsl = translate_shader(&hlsl).unwrap();
2559            assert!(
2560                wgsl.contains(&format!("textureSample({expected_tex},")),
2561                "for {sampler}, got: {wgsl}"
2562            );
2563        }
2564    }
2565
2566    #[test]
2567    fn noise_2d_prefixed_picks_sampler() {
2568        let wgsl = translate_shader("color = tex2D(sampler_pw_noise_lq, uv);").unwrap();
2569        assert!(
2570            wgsl.contains("textureSample(sampler_noise_lq_texture, sampler_pw, uv)"),
2571            "got: {wgsl}"
2572        );
2573    }
2574
2575    #[test]
2576    fn tex3d_noisevol_samplers_hit_3d_texture() {
2577        // Real MD2 pattern: `tex3D(sampler_noisevol_hq, uvw)`. An earlier
2578        // implementation degraded this to a 2D GetPixel call; now it hits
2579        // the real texture_3d binding.
2580        let wgsl =
2581            translate_shader("color = tex3D(sampler_noisevol_hq, vec3<f32>(uv, time));").unwrap();
2582        assert!(
2583            wgsl.contains("textureSample(sampler_noisevol_hq_texture,"),
2584            "got: {wgsl}"
2585        );
2586        // No `GetPixel(...)` fallback should remain in the translated body.
2587        assert!(
2588            !wgsl.contains("GetPixel((vec3<f32>(uv, time)).xy)"),
2589            "fallback path should be gone: {wgsl}"
2590        );
2591    }
2592
2593    #[test]
2594    fn tex3d_unknown_sampler_keeps_fallback() {
2595        // An unrecognised tex3D sampler still degrades to the 2D fallback
2596        // so the wrapper compiles even when the author references a
2597        // texture we don't bind.
2598        let wgsl = translate_shader("color = tex3D(sampler_user_volume, uvw);").unwrap();
2599        assert!(
2600            wgsl.contains("GetPixel((uvw).xy)"),
2601            "expected fallback, got: {wgsl}"
2602        );
2603    }
2604
2605    #[test]
2606    fn shader_body_wrapper_is_stripped() {
2607        let hlsl = "shader_body\n{\n    ret = float3(1, 0, 0);\n}\n";
2608        let out = strip_shader_body_wrapper(hlsl);
2609        assert!(!out.contains("shader_body"), "wrapper not stripped: {out}");
2610        assert!(out.contains("ret = float3(1, 0, 0)"));
2611    }
2612
2613    #[test]
2614    fn shader_body_with_inner_braces_balances() {
2615        // A nested block (e.g. an `if`) must not cut the wrapper off early.
2616        let hlsl = r#"shader_body
2617{
2618    if (a > 0) {
2619        ret = float3(1, 0, 0);
2620    }
2621    ret *= 0.5;
2622}"#;
2623        let out = strip_shader_body_wrapper(hlsl);
2624        assert!(!out.contains("shader_body"));
2625        assert!(out.contains("ret *= 0.5"));
2626    }
2627
2628    #[test]
2629    fn shader_body_with_brace_in_comment_balances() {
2630        let hlsl = r#"shader_body
2631{
2632    // closing } in a comment must not end the body
2633    ret = float3(1);
2634}"#;
2635        let out = strip_shader_body_wrapper(hlsl);
2636        assert!(!out.contains("shader_body"));
2637        assert!(out.contains("ret = float3(1)"));
2638    }
2639
2640    #[test]
2641    fn no_shader_body_wrapper_is_passthrough() {
2642        let hlsl = "ret = float3(1, 0, 0);";
2643        let out = strip_shader_body_wrapper(hlsl);
2644        assert_eq!(out, hlsl);
2645    }
2646
2647    #[test]
2648    fn local_declaration_with_init_becomes_var() {
2649        let hlsl = "shader_body { float gx1 = a + b * c; ret = gx1.xxx; }";
2650        let wgsl = translate_shader(hlsl).unwrap();
2651        assert!(
2652            wgsl.contains("var gx1: f32 = a + b * c"),
2653            "expected var-form local, got: {wgsl}"
2654        );
2655    }
2656
2657    #[test]
2658    fn local_declaration_without_init_becomes_var() {
2659        // Real preset pattern: declare a vec2, then assign to it on the next
2660        // few lines (for incrementally building offset UVs). The rewriter is
2661        // line-anchored — preset bodies span multiple lines, which is the
2662        // shape it's optimised for.
2663        let hlsl = "    vec2<f32> uv2;\n    uv2 = uv;\n";
2664        // Note: `vec2<f32>` is what `replace_types` emits — here we feed it
2665        // post-substitution to assert the rewriter handles WGSL-shaped types.
2666        let wgsl = rewrite_local_declarations(hlsl);
2667        assert!(wgsl.contains("var uv2: vec2<f32>;"), "got: {wgsl}");
2668    }
2669
2670    #[test]
2671    fn local_declaration_multi_name_expands() {
2672        let hlsl = "    vec3<f32> ret1, neu, crisp, blur;\n";
2673        let wgsl = rewrite_local_declarations(hlsl);
2674        // Each name becomes its own var statement.
2675        assert!(wgsl.contains("var ret1: vec3<f32>;"), "got: {wgsl}");
2676        assert!(wgsl.contains("var neu: vec3<f32>;"));
2677        assert!(wgsl.contains("var crisp: vec3<f32>;"));
2678        assert!(wgsl.contains("var blur: vec3<f32>;"));
2679    }
2680
2681    #[test]
2682    fn inline_multi_decl_on_single_line_splits() {
2683        // Isosceles preset's kaleidoscope state line. Without the
2684        // pre-split, only `cntr` would convert; `sin`, `cos`, `scale`
2685        // would stay as raw HLSL and trip naga.
2686        //
2687        // Top-level globals (`cntr`, `scale`) get hoisted to module-
2688        // scope `var<private>` so lifted user functions can read them.
2689        // `sin` / `cos` are kept as fs_main locals (they shadow WGSL
2690        // builtins; hoisting would break every other site that calls
2691        // `sin(x)` as a function).
2692        let hlsl =
2693            "float2 cntr = float2(q13,q14); float sin = q11; float cos = q12; float scale = q15;";
2694        let wgsl = translate_shader(hlsl).unwrap();
2695        assert!(
2696            wgsl.contains("var<private> cntr: vec2<f32>;"),
2697            "got: {wgsl}"
2698        );
2699        assert!(wgsl.contains("cntr = vec2<f32>(q13,q14);"), "got: {wgsl}");
2700        assert!(wgsl.contains("var sin: f32 = q11;"), "got: {wgsl}");
2701        assert!(wgsl.contains("var cos: f32 = q12;"), "got: {wgsl}");
2702        assert!(wgsl.contains("var<private> scale: f32;"), "got: {wgsl}");
2703        assert!(wgsl.contains("scale = q15;"), "got: {wgsl}");
2704    }
2705
2706    #[test]
2707    fn inline_split_handles_vec_types_after_prior_decl() {
2708        // Regression guard — a naive `\b` boundary in
2709        // `INLINE_DECL_SPLIT_REGEX` would fail after `>` (already
2710        // non-word), so a `vec2<f32>` decl following any other
2711        // `;`-terminated decl on the same line would stay un-rewritten.
2712        // Real preset shape lifted from `MilkDrop2077.1040.milk` (the
2713        // `shadow` helper):
2714        let hlsl = "f32 dark; vec2<f32> uvc, dx;";
2715        let wgsl = rewrite_local_declarations(hlsl);
2716        assert!(wgsl.contains("var dark: f32;"), "got: {wgsl}");
2717        assert!(wgsl.contains("var uvc: vec2<f32>;"), "got: {wgsl}");
2718        assert!(wgsl.contains("var dx: vec2<f32>;"), "got: {wgsl}");
2719    }
2720
2721    #[test]
2722    fn inline_split_detaches_open_brace_glue() {
2723        // Regression guard — `{TYPE` glued at the head of a function or
2724        // conditional body wasn't split by a `;`-only separator. The
2725        // first inner decl therefore stayed attached to the brace and
2726        // `LOCAL_DECL_REGEX` (anchored at the start of a line) skipped
2727        // it. Real preset shape from `MilkDrop2077.1040.milk`'s
2728        // `MinDistB`:
2729        let hlsl = "f32 MinDistB(uvi: vec2<f32>) {f32 tmp; vec4<f32> nb;}";
2730        let wgsl = rewrite_local_declarations(hlsl);
2731        assert!(wgsl.contains("var tmp: f32;"), "got: {wgsl}");
2732        assert!(wgsl.contains("var nb: vec4<f32>;"), "got: {wgsl}");
2733    }
2734
2735    #[test]
2736    fn inline_split_skips_vec_constructor_call() {
2737        // The split must NOT fire on `; vec3<f32>(0, 0, 0)` — that's a
2738        // constructor expression, not a declaration. The regex requires
2739        // an identifier char after the type+space, so a following `(`
2740        // (constructor) is left alone.
2741        let hlsl = "ret = vec3<f32>(0.0); vec3<f32>(1.0);";
2742        let wgsl = rewrite_local_declarations(hlsl);
2743        // No newline injected before the bare constructor — the original
2744        // `;` and ` vec3<f32>(1.0);` stay on one line.
2745        assert!(!wgsl.contains(";\nvec3<f32>(1.0)"), "got: {wgsl}");
2746    }
2747
2748    #[test]
2749    fn for_loop_init_semi_does_not_split_inline_decls() {
2750        // The inline-decl splitter must not fire on the `;` inside a
2751        // `for(...)` init/cond/step. Standard for-loop shape has the
2752        // first `;` followed by an expression, never by a type keyword.
2753        // The for-int rewrite has already turned `int` into `var i: i32
2754        // =` shape by this stage, so we feed the post-rewrite shape.
2755        let hlsl = "for(var i: i32 = 0; i < 10; i = i + 1) { ret = vec3<f32>(0); }";
2756        let wgsl = rewrite_local_declarations(hlsl);
2757        // No newline injected after the first `;` (the loop condition
2758        // separator) — the `i < 10` test stays intact.
2759        assert!(wgsl.contains("i < 10"), "got: {wgsl}");
2760        assert!(
2761            !wgsl.contains(";\ni < 10"),
2762            "for-loop `;` got wrongly split: {wgsl}"
2763        );
2764    }
2765
2766    #[test]
2767    fn local_declaration_i32_loop_counter() {
2768        // `int anz = 3;` is widened to `f32` by `replace_types` (MD2 uses
2769        // ints interchangeably with floats), so the rewrite produces a
2770        // float var. At top level (no enclosing `shader_body`), it parses
2771        // as `Item::GlobalVar` and the hoist pass splits it into a
2772        // module-scope `var<private>` plus an in-body assignment.
2773        let hlsl = "    int anz = 3;\n";
2774        let wgsl = translate_shader(hlsl).unwrap();
2775        assert!(wgsl.contains("var<private> anz: f32;"), "got: {wgsl}");
2776        assert!(wgsl.contains("anz = 3;"), "got: {wgsl}");
2777    }
2778
2779    #[test]
2780    fn sampler_declaration_is_stripped() {
2781        let hlsl = "sampler sampler_pw_noise_lq;\nret = float3(1);\n";
2782        let out = strip_sampler_declarations(hlsl);
2783        assert!(!out.contains("sampler_pw_noise_lq"));
2784        assert!(out.contains("ret = float3(1)"));
2785    }
2786
2787    #[test]
2788    fn preprocessor_directives_are_stripped() {
2789        let hlsl = "#define M_PI 3.14159\n#include <something>\nret = float3(1);";
2790        let out = strip_preprocessor(hlsl);
2791        assert!(!out.contains("#define"));
2792        assert!(!out.contains("#include"));
2793        assert!(out.contains("ret = float3(1)"));
2794    }
2795
2796    #[test]
2797    fn brace_up_single_statement_if() {
2798        let src = "if (a > 0) ret = 1.0;";
2799        let out = brace_up_single_statement_blocks(src);
2800        assert!(out.contains("if (a > 0) {"), "got: {out}");
2801        assert!(out.contains("ret = 1.0;"));
2802        assert!(out.ends_with("}"));
2803    }
2804
2805    #[test]
2806    fn brace_up_leaves_already_braced_alone() {
2807        let src = "if (a > 0) { ret = 1.0; }";
2808        let out = brace_up_single_statement_blocks(src);
2809        assert_eq!(out, src);
2810    }
2811
2812    #[test]
2813    fn brace_up_skips_keyword_inside_identifier() {
2814        // `notif` and `forevermore` must not be treated as if/for keywords.
2815        let src = "var notif: f32 = 0;";
2816        let out = brace_up_single_statement_blocks(src);
2817        assert_eq!(out, src);
2818    }
2819
2820    #[test]
2821    fn brace_up_handles_nested_parens_in_condition() {
2822        let src = "if (max(a, b) > 0) ret.z -= 0.5;";
2823        let out = brace_up_single_statement_blocks(src);
2824        assert!(out.contains("if (max(a, b) > 0) {"), "got: {out}");
2825    }
2826
2827    #[test]
2828    fn brace_up_handles_function_call_in_body() {
2829        // The `;` we wrap to is the end-of-statement, not the one inside
2830        // `mix(a, b);`.
2831        let src = "if (a > 0) ret = mix(a, b, 0.5);";
2832        let out = brace_up_single_statement_blocks(src);
2833        assert!(out.contains("ret = mix(a, b, 0.5);"));
2834        assert!(out.ends_with("}"));
2835    }
2836
2837    #[test]
2838    fn tex3d_known_noisevol_hits_3d_binding() {
2839        // The pipeline binds the volume noise for real, so
2840        // `tex3D(sampler_noisevol_hq, …)` resolves to a `textureSample`
2841        // against the 3D texture instead of the legacy `GetPixel(uvw.xy)`
2842        // fallback.
2843        let src = "ret = tex3D(sampler_noisevol_hq, vec3<f32>(uv, time));";
2844        let out = rewrite_tex3d_calls(src);
2845        assert!(
2846            out.contains(
2847                "textureSample(sampler_noisevol_hq_texture, sampler_pw, vec3<f32>(uv, time))"
2848            ),
2849            "got: {out}"
2850        );
2851    }
2852
2853    #[test]
2854    fn lerp_with_whitespace_is_normalised_and_rewritten() {
2855        // Real preset pattern: a multi-line `lerp (\n  a,\n  b,\n  t)` call
2856        // becomes `mix(...)` thanks to call-whitespace normalisation.
2857        let hlsl = "ret = lerp (a, b, 0.5);";
2858        let wgsl = translate_shader(hlsl).unwrap();
2859        assert!(wgsl.contains("mix(a, b, 0.5)"), "got: {wgsl}");
2860        assert!(!wgsl.contains("lerp"));
2861    }
2862
2863    #[test]
2864    fn float2x2_becomes_mat2x2() {
2865        // HLSL float2x2 is a 2×2 matrix; the previous code only handled
2866        // 3×3 and 4×4 so `float2x2` got mangled by the `float2` rewrite
2867        // into `vec2<f32>x2` (which then failed naga's parser).
2868        let hlsl = "var m: f32 = 0; uv1 = mul(float2x2(q9,q10,-q10,q9), uv1);";
2869        let wgsl = translate_shader(hlsl).unwrap();
2870        assert!(wgsl.contains("mat2x2<f32>"), "got: {wgsl}");
2871        assert!(!wgsl.contains("vec2<f32>x2"));
2872    }
2873
2874    #[test]
2875    fn unary_plus_is_stripped_after_open_paren() {
2876        let hlsl = "ret.x += (+dx.x - dy.x)*0.4;";
2877        let wgsl = translate_shader(hlsl).unwrap();
2878        // The unary `+` after `(` must be gone; the binary `-` stays.
2879        assert!(wgsl.contains("(dx.x - dy.x)"), "got: {wgsl}");
2880    }
2881
2882    #[test]
2883    fn statement_commas_become_semicolons() {
2884        // `ret += a, ret += b;` → `ret += a; ret += b;`.
2885        let src = "ret += a, ret += b;";
2886        let out = replace_statement_commas(src);
2887        assert_eq!(out, "ret += a; ret += b;");
2888    }
2889
2890    #[test]
2891    fn dedup_var_decl_second_becomes_assignment() {
2892        let src = "var ret1: vec3<f32> = vec3<f32>(0);\nvar ret1: vec3<f32> = ret1;\n";
2893        let out = dedup_var_declarations(src);
2894        assert!(
2895            out.contains("var ret1: vec3<f32> = vec3<f32>(0);"),
2896            "got: {out}"
2897        );
2898        assert!(out.contains("ret1 = ret1;"), "got: {out}");
2899    }
2900
2901    #[test]
2902    fn postfix_increment_becomes_compound_assignment() {
2903        let src = "n++;";
2904        let out = rewrite_postfix_inc_dec(src);
2905        assert_eq!(out, "n = n + 1;");
2906    }
2907
2908    #[test]
2909    fn postfix_decrement_in_for_loop() {
2910        let src = "for (i = 10; i > 0; i--) { }";
2911        let out = rewrite_postfix_inc_dec(src);
2912        assert!(out.contains("i = i - 1)"), "got: {out}");
2913    }
2914
2915    #[test]
2916    fn postfix_increment_inside_expression_left_alone() {
2917        // We only rewrite at statement/loop-iter boundaries; bare `a + + b`
2918        // in expression position would already have whitespace separation
2919        // and not match our regex.
2920        let src = "y = a + b;";
2921        let out = rewrite_postfix_inc_dec(src);
2922        assert_eq!(out, src);
2923    }
2924
2925    #[test]
2926    fn static_const_qualifier_stripped() {
2927        let src = "static const int anz = 3;";
2928        let out = strip_storage_class_qualifiers(src);
2929        // After both passes: `static` dropped, `const int` → `int`.
2930        assert_eq!(out.trim(), "int anz = 3;");
2931    }
2932
2933    #[test]
2934    fn statement_commas_leave_call_args_alone() {
2935        // Commas inside `(...)` are call args, not statement separators.
2936        let src = "ret = mix(a, b, t);";
2937        let out = replace_statement_commas(src);
2938        assert_eq!(out, src);
2939    }
2940
2941    #[test]
2942    fn end_to_end_typical_md2_comp_shader() {
2943        // The simplest real-world comp shader pattern. After translation +
2944        // shader_body strip, the body should be syntactically WGSL-valid.
2945        let hlsl = r#"shader_body
2946{
2947    ret = tex2D(sampler_main, uv).xyz;
2948    ret *= 1.28; //gamma
2949    ret *= ret; //darken
2950}"#;
2951        let wgsl = translate_shader(hlsl).unwrap();
2952        assert!(!wgsl.contains("shader_body"));
2953        assert!(wgsl.contains("textureSample(sampler_main_texture, sampler_main, uv)"));
2954        assert!(!wgsl.contains("float"));
2955    }
2956
2957    // ---------------------------------------------------------------
2958    // User texture binding plan + scan
2959    // ---------------------------------------------------------------
2960
2961    #[test]
2962    fn scan_extracts_user_sampler_declarations() {
2963        let hlsl = "sampler sampler_clouds;\nsampler sampler_lichen;\nret = tex2D(sampler_clouds, uv).xyz;";
2964        let refs = scan_user_samplers(hlsl);
2965        let names: Vec<&str> = refs.iter().map(|r| r.full_name.as_str()).collect();
2966        assert!(names.contains(&"sampler_clouds"));
2967        assert!(names.contains(&"sampler_lichen"));
2968    }
2969
2970    #[test]
2971    fn scan_skips_builtins() {
2972        let hlsl = "sampler sampler_main;\nsampler sampler_fw_main;\nsampler sampler_noise_lq;\nsampler sampler_clouds;\nsampler sampler_blur1;";
2973        let refs = scan_user_samplers(hlsl);
2974        let names: Vec<&str> = refs.iter().map(|r| r.full_name.as_str()).collect();
2975        assert_eq!(
2976            names,
2977            vec!["sampler_clouds"],
2978            "only the user texture should survive"
2979        );
2980    }
2981
2982    #[test]
2983    fn scan_collapses_duplicates_by_full_name() {
2984        let hlsl = "sampler sampler_clouds;\nsampler sampler_clouds;\n";
2985        let refs = scan_user_samplers(hlsl);
2986        assert_eq!(refs.len(), 1);
2987    }
2988
2989    #[test]
2990    fn decompose_sampler_name_handles_filter_prefixes() {
2991        assert_eq!(
2992            decompose_sampler_name("sampler_clouds"),
2993            ("clouds".to_string(), "sampler_fw")
2994        );
2995        assert_eq!(
2996            decompose_sampler_name("sampler_fw_clouds"),
2997            ("clouds".to_string(), "sampler_fw")
2998        );
2999        assert_eq!(
3000            decompose_sampler_name("sampler_pc_clouds"),
3001            ("clouds".to_string(), "sampler_pc")
3002        );
3003        assert_eq!(
3004            decompose_sampler_name("sampler_rand02_smalltiled"),
3005            ("rand02_smalltiled".to_string(), "sampler_fw")
3006        );
3007    }
3008
3009    #[test]
3010    fn plan_empty_falls_back_to_legacy_translator() {
3011        let plan = TextureBindingPlan::empty();
3012        let wgsl = translate_shader_with_plan("color = tex2D(sampler_clouds, uv);", &plan).unwrap();
3013        // Empty plan path: the fallback `/*was: ...*/` comment must still
3014        // appear (no user routing applied).
3015        assert!(wgsl.contains("/*was: sampler_clouds*/"), "got: {wgsl}");
3016    }
3017
3018    #[test]
3019    fn plan_routes_user_sampler_to_user_binding() {
3020        let mut plan = TextureBindingPlan::empty();
3021        let slot = plan
3022            .add_slot(
3023                Some("clouds".to_string()),
3024                [256.0, 256.0, 1.0 / 256.0, 1.0 / 256.0],
3025                &[("sampler_clouds".to_string(), "sampler_fw")],
3026            )
3027            .unwrap();
3028        assert_eq!(slot, 0);
3029        let wgsl = translate_shader_with_plan("color = tex2D(sampler_clouds, uv);", &plan).unwrap();
3030        // The translator must emit the user-slot binding, not the fallback.
3031        assert!(
3032            wgsl.contains("textureSample(sampler_user_0_texture, sampler_fw, uv)"),
3033            "got: {wgsl}"
3034        );
3035        assert!(!wgsl.contains("/*was: sampler_clouds*/"));
3036    }
3037
3038    #[test]
3039    fn plan_two_aliases_share_a_slot() {
3040        // `sampler sampler_clouds` and `sampler sampler_fw_clouds` both
3041        // resolve to the same logical "clouds" texture but pick a different
3042        // sampler binding at each call site.
3043        let mut plan = TextureBindingPlan::empty();
3044        plan.add_slot(
3045            Some("clouds".to_string()),
3046            [256.0, 256.0, 1.0 / 256.0, 1.0 / 256.0],
3047            &[
3048                ("sampler_clouds".to_string(), "sampler_fw"),
3049                ("sampler_fw_clouds".to_string(), "sampler_fw"),
3050                ("sampler_pc_clouds".to_string(), "sampler_pc"),
3051            ],
3052        );
3053        assert_eq!(plan.slot_count(), 1, "all aliases must share one slot");
3054
3055        // Translating each alias keeps the slot but flips the sampler kind.
3056        let s1 =
3057            translate_shader_with_plan("ret = tex2D(sampler_fw_clouds, uv).xyz;", &plan).unwrap();
3058        let s2 =
3059            translate_shader_with_plan("ret = tex2D(sampler_pc_clouds, uv).xyz;", &plan).unwrap();
3060        assert!(s1.contains("textureSample(sampler_user_0_texture, sampler_fw, uv)"));
3061        assert!(s2.contains("textureSample(sampler_user_0_texture, sampler_pc, uv)"));
3062    }
3063
3064    // ---------------------------------------------------------------
3065    // Quick translator fixes
3066    // ---------------------------------------------------------------
3067
3068    #[test]
3069    fn mod_call_rewritten_to_float_mod_expansion() {
3070        // `mod` is a WGSL reserved keyword; HLSL uses it as the float
3071        // modulo helper. The expansion must match HLSL semantics
3072        // (= `a - floor(a/b)*b`).
3073        let wgsl = translate_shader("ret.x = mod(ang*16/M_PI, 1.0);").unwrap();
3074        assert!(
3075            wgsl.contains("floor((ang*16/M_PI) / (1.0)) * (1.0)"),
3076            "got: {wgsl}"
3077        );
3078        assert!(!wgsl.contains("mod("), "mod( call must be gone: {wgsl}");
3079    }
3080
3081    #[test]
3082    fn lowercase_tex2d_normalised_to_tex2d() {
3083        // Real preset typo: `tex2d(sampler_main, uv)`. WGSL is case-
3084        // sensitive; the wrapper exposes `textureSample` only through the
3085        // canonical `tex2D` rewrite path. Normalise before the rewrite.
3086        let wgsl = translate_shader("ret = tex2d(sampler_main, uv).xyz;").unwrap();
3087        assert!(
3088            wgsl.contains("textureSample(sampler_main_texture, sampler_main, uv)"),
3089            "got: {wgsl}"
3090        );
3091    }
3092
3093    #[test]
3094    fn lowercase_tex3d_normalised() {
3095        let wgsl =
3096            translate_shader("ret = tex3d(sampler_noisevol_hq, vec3<f32>(uv, time)).xyz;").unwrap();
3097        assert!(
3098            wgsl.contains("textureSample(sampler_noisevol_hq_texture,"),
3099            "got: {wgsl}"
3100        );
3101    }
3102
3103    // ---------------------------------------------------------------
3104    // User-defined function lifting
3105    // ---------------------------------------------------------------
3106
3107    #[test]
3108    fn reserved_identifier_mod_as_local_renamed() {
3109        // Real preset: `float mod = sin(...); ... q22*mod*...`. After
3110        // `mod()` function-call rewriting, the bare `mod` ident stays —
3111        // WGSL rejects it as reserved. We rename to `mod_`.
3112        let wgsl = translate_shader("float mod = sin(uv.x);\nret.y *= mod * 0.5;").unwrap();
3113        assert!(wgsl.contains("var mod_: f32"), "var rename missing: {wgsl}");
3114        assert!(
3115            wgsl.contains("mod_ * 0.5"),
3116            "reference rename missing: {wgsl}"
3117        );
3118    }
3119
3120    #[test]
3121    fn reserved_identifier_filter_renamed() {
3122        // `float3 filter` as a function param or local must become `filter_`.
3123        let wgsl =
3124            translate_shader("float3 filter = float3(1, 1, 1);\nret = filter * GetPixel(uv);")
3125                .unwrap();
3126        assert!(
3127            wgsl.contains("var filter_: vec3<f32>"),
3128            "filter var rename missing: {wgsl}"
3129        );
3130    }
3131
3132    #[test]
3133    fn user_function_is_lifted_above_body() {
3134        // Real preset pattern: a helper function declared before the
3135        // shader_body block. After translation, it should appear before
3136        // the LIFTED_FN_SENTINEL, with the WGSL `fn ... ->` signature.
3137        let hlsl = "float2 helper(float2 a, float3 b) {\n\
3138                       return a * 0.5 + b.xy;\n\
3139                    }\n\
3140                    shader_body {\n\
3141                       ret.xy = helper(uv, vec3<f32>(0));\n\
3142                    }";
3143        let wgsl = translate_shader(hlsl).unwrap();
3144        assert!(wgsl.contains(LIFTED_FN_SENTINEL));
3145        let (lifted, body) = wgsl.split_once(LIFTED_FN_SENTINEL).unwrap();
3146        // Signature converted: HLSL form gone, WGSL form present.
3147        assert!(
3148            lifted.contains("fn helper(a: vec2<f32>, b: vec3<f32>) -> vec2<f32>"),
3149            "lifted block missing canonical signature: {lifted}"
3150        );
3151        // Body uses the function; the call form itself is preserved as
3152        // an identifier reference inside fs_main.
3153        assert!(body.contains("helper("), "call site missing: {body}");
3154    }
3155
3156    #[test]
3157    fn no_lift_when_no_function_definitions() {
3158        // Bodies without module-scope function defs must NOT emit a
3159        // sentinel — preserves backwards-compatibility for the v0.17.0
3160        // text shape.
3161        let hlsl = "shader_body { ret = ret * 0.5; }";
3162        let wgsl = translate_shader(hlsl).unwrap();
3163        assert!(!wgsl.contains(LIFTED_FN_SENTINEL));
3164    }
3165
3166    #[test]
3167    fn lift_handles_zero_arg_function() {
3168        let hlsl = "float3 zero() { return vec3<f32>(0); }\n\
3169                    shader_body { ret = zero(); }";
3170        let wgsl = translate_shader(hlsl).unwrap();
3171        let (lifted, _body) = wgsl.split_once(LIFTED_FN_SENTINEL).unwrap();
3172        assert!(lifted.contains("fn zero() -> vec3<f32>"), "got: {lifted}");
3173    }
3174
3175    #[test]
3176    fn lift_geiss_explosion_3_helper_translates_cleanly() {
3177        // Reduced repro from `Geiss - Explosion 3 nz+...milk`. The
3178        // helper uses `filter` as a parameter name (WGSL-reserved) and
3179        // a local `dx` of type vec3 with a parenthesised RHS.
3180        let hlsl = "float2 gradBlur1( float2 domain, float2 d, float3 filter){\n\
3181                       float3 dx = ( 2*GetBlur1(domain + float2(1,0)*d) - 2*GetBlur1(domain-float2(1,0)*d) );\n\
3182                       return 0.5*float2(dx.x*filter.x, dx.y*filter.y) / (filter.x+filter.y+filter.z);\n\
3183                    }\n\
3184                    shader_body { ret.xy = gradBlur1(uv, vec2<f32>(0.5), vec3<f32>(0.5,0.5,0.5)); }";
3185        let wgsl = translate_shader(hlsl).unwrap();
3186        assert!(wgsl.contains(LIFTED_FN_SENTINEL), "no sentinel: {wgsl}");
3187        let (lifted, _body) = wgsl.split_once(LIFTED_FN_SENTINEL).unwrap();
3188        // Lifted body must use WGSL `var` for the local, not HLSL
3189        // `vec3<f32> dx`. This is what the parse error "expected `(`;
3190        // found `dx`" was originally about.
3191        assert!(
3192            lifted.contains("var dx: vec3<f32>"),
3193            "local-decl rewrite didn't reach lifted body: {lifted}"
3194        );
3195        // `filter` must have been renamed to avoid the WGSL keyword.
3196        assert!(
3197            lifted.contains("filter_:"),
3198            "filter param not renamed in signature: {lifted}"
3199        );
3200    }
3201
3202    #[test]
3203    fn lift_handles_function_with_nested_braces() {
3204        // A user function with an inner `{...}` block (e.g., an `if`)
3205        // must balance braces correctly and not chop off mid-body.
3206        let hlsl = "float foo(float t) {\n\
3207                       if (t > 0) { return t * 2; }\n\
3208                       return 0;\n\
3209                    }\n\
3210                    shader_body { ret.x = foo(0.5); }";
3211        let wgsl = translate_shader(hlsl).unwrap();
3212        let (lifted, _body) = wgsl.split_once(LIFTED_FN_SENTINEL).unwrap();
3213        assert!(
3214            lifted.contains("if (t > 0)"),
3215            "inner if dropped during lift: {lifted}"
3216        );
3217        assert!(lifted.contains("return 0;"));
3218    }
3219
3220    #[test]
3221    fn plan_cap_is_enforced() {
3222        let mut plan = TextureBindingPlan::empty();
3223        for i in 0..MAX_USER_TEXTURE_SLOTS {
3224            let name = format!("tex_{i}");
3225            let alias = format!("sampler_tex_{i}");
3226            plan.add_slot(Some(name), [1.0, 1.0, 1.0, 1.0], &[(alias, "sampler_fw")])
3227                .unwrap();
3228        }
3229        // One past the cap → None.
3230        let over = plan.add_slot(
3231            Some("overflow".to_string()),
3232            [1.0; 4],
3233            &[("sampler_overflow".to_string(), "sampler_fw")],
3234        );
3235        assert!(over.is_none());
3236        assert_eq!(plan.slot_count(), MAX_USER_TEXTURE_SLOTS);
3237    }
3238
3239    // ---------- non-square mat, builtin aliases, prose ----------
3240
3241    #[test]
3242    fn float2x3_maps_to_mat2x3() {
3243        // Non-square matrix types were getting mangled by the
3244        // `float2`→`vec2<f32>` substring substitution, leaving
3245        // `vec2<f32>x3` and tripping the WGSL parser with
3246        // `expected ')'; found 'x3'`. Many comp residual failures share
3247        // this root cause.
3248        let out = replace_types("float2x3 m;");
3249        assert_eq!(out, "mat2x3<f32> m;");
3250    }
3251
3252    #[test]
3253    fn float3x2_and_other_non_square_matrices_map_correctly() {
3254        for (hlsl, wgsl) in [
3255            ("float3x2", "mat3x2<f32>"),
3256            ("float4x3", "mat4x3<f32>"),
3257            ("float2x4", "mat2x4<f32>"),
3258            ("float3x4", "mat3x4<f32>"),
3259            ("float4x2", "mat4x2<f32>"),
3260        ] {
3261            let out = replace_types(&format!("{hlsl} m;"));
3262            assert_eq!(out, format!("{wgsl} m;"), "mapping {hlsl}");
3263        }
3264    }
3265
3266    #[test]
3267    fn sat_aliases_saturate() {
3268        // Corpus ships `#define sat saturate` (stripped by
3269        // `strip_preprocessor`) followed by `sat(…)` calls that would go
3270        // unbound on the WGSL side. We alias the call here.
3271        let out = replace_functions("ret = sat(x);");
3272        assert!(out.contains("clamp(x, 0.0, 1.0)"), "got: {out}");
3273    }
3274
3275    #[test]
3276    fn rsqrt_aliases_inverse_sqrt() {
3277        let out = replace_functions("ret = rsqrt(x);");
3278        assert!(out.contains("inverseSqrt(x)"), "got: {out}");
3279    }
3280
3281    #[test]
3282    fn log10_expands_to_natural_log() {
3283        let out = replace_functions("ret = log10(x);");
3284        assert!(out.contains("(log(x) * 0.43429448190325176)"), "got: {out}");
3285    }
3286
3287    #[test]
3288    fn tex2dbias_drops_mip_bias_args() {
3289        let out = replace_functions("ret = tex2Dbias(sampler_main, float4(uv, 0, 0.1));");
3290        assert!(out.contains("tex2D(sampler_main, uv)"), "got: {out}");
3291    }
3292
3293    #[test]
3294    fn multi_decl_sampler_list_stripped() {
3295        // `sampler a, b, c;` is a common HLSL stylistic shortcut. A
3296        // simpler regex matching only the first identifier would let the
3297        // rest survive as orphan statements that trip the WGSL parser.
3298        let src = "sampler sampler_fw_rand01, sampler_pw_rand02;\nret = uv.x;\n";
3299        let out = strip_sampler_declarations(src);
3300        assert!(!out.contains("sampler_fw_rand01"), "got: {out}");
3301        assert!(!out.contains("sampler_pw_rand02"), "got: {out}");
3302        assert!(out.contains("ret = uv.x"), "body lost: {out}");
3303    }
3304
3305    #[test]
3306    fn prose_line_is_commented_out() {
3307        // `written by martin` style attributions inside the shader body
3308        // get a `// ` prefix so the WGSL parser doesn't choke on
3309        // `expected assignment or increment/decrement; found 'by'`.
3310        let src = "ret = 0.5;\nwritten by martin\nuv = uv * 2.0;\n";
3311        let out = comment_out_prose_lines(src);
3312        assert!(out.contains("// written by martin"), "got: {out}");
3313        assert!(out.contains("ret = 0.5;"), "real code lost: {out}");
3314        assert!(out.contains("uv = uv * 2.0;"), "real code lost: {out}");
3315    }
3316
3317    #[test]
3318    fn prose_line_does_not_eat_keywords() {
3319        // A line starting with `for` or `if` is a control-flow construct,
3320        // not prose — never comment it out.
3321        for kw_line in ["for (var i = 0; i < 3; i = i + 1) {", "if x = 0;"] {
3322            let src = format!("{kw_line}\n");
3323            let out = comment_out_prose_lines(&src);
3324            assert_eq!(out, src, "wrongly commented out: {kw_line}");
3325        }
3326    }
3327
3328    #[test]
3329    fn bare_end_marker_commented_out() {
3330        // The MD2 corpus sometimes appends `END` (or similar single-word
3331        // markers) after `shader_body`. After `strip_shader_body_wrapper`
3332        // those lines land in the fragment body and crash the parser.
3333        let src = "ret = 0.5;\nEND\n";
3334        let out = comment_out_prose_lines(src);
3335        assert!(out.contains("// END"), "got: {out}");
3336    }
3337}