onedrop_hlsl/lib.rs
1//! HLSL to WGSL Translation
2//!
3//! Pragmatic, MilkDrop-2-targeted translator. The MD2 user shader body lives
4//! inside a `shader_body { ... }` block, samples the previous frame via
5//! `tex2D` / `GetPixel` / `GetBlur1..3`, and uses HLSL-style typed local
6//! declarations (`float2 uv2;`). The translator turns that into a WGSL
7//! fragment-body fragment that the codegen wrapper can paste inside its
8//! `fs_main`.
9//!
10//! The rewrites are still string-driven (no AST) — but they understand the
11//! MD2 conventions enough to land the dominant cases.
12
13pub mod ast;
14pub mod lex;
15pub mod parse;
16pub mod rewrite;
17mod texture_plan;
18pub mod types;
19
20use regex::Regex;
21use std::sync::LazyLock;
22use thiserror::Error;
23
24pub use texture_plan::{
25 MAX_USER_TEXTURE_SLOTS, TextureBindingPlan, TextureSlot, UserSamplerRef, scan_user_samplers,
26 user_texture_binding_name,
27};
28pub use types::{SymbolTable, WgslType};
29
30use std::fmt::Write as _;
31use texture_plan::{noise_sampler_for, replace_texture_sampling_with_plan};
32
33// ---------------------------------------------------------------------------
34// MD2 outer-wrapper stripping
35// ---------------------------------------------------------------------------
36
37/// `shader_body` keyword optionally followed by whitespace/newlines and `{`.
38/// We then balance braces ourselves to recover the body — a regex alone
39/// can't do nested-brace balancing reliably.
40static SHADER_BODY_OPEN: LazyLock<Regex> =
41 LazyLock::new(|| Regex::new(r"shader_body\s*\{").unwrap());
42
43/// MD2 ships warp/comp shaders wrapped in a `shader_body { ... }` block. The
44/// codegen wrapper pastes the user code inside its own `fs_main { ... }`, so
45/// the outer wrapper has to come off first — otherwise WGSL sees a stray
46/// identifier (`shader_body`) followed by `{` and fails with
47/// `expected assignment or increment/decrement, found "{"`.
48///
49/// If the input has no `shader_body` wrapper (synthetic test shaders, or a
50/// preset that already inlines the body), the input is returned unchanged.
51fn strip_shader_body_wrapper(src: &str) -> String {
52 let Some(open) = SHADER_BODY_OPEN.find(src) else {
53 return src.to_string();
54 };
55 let body_start = open.end();
56 let bytes = src.as_bytes();
57 let mut depth = 1usize;
58 let mut i = body_start;
59 while i < bytes.len() {
60 match bytes[i] {
61 b'{' => depth += 1,
62 b'}' => {
63 depth -= 1;
64 if depth == 0 {
65 // Replace the whole `shader_body { ... }` span (incl. the
66 // closing brace) with just the inner body — keeps any
67 // trailing comment / whitespace after it intact.
68 let mut out = String::with_capacity(src.len());
69 out.push_str(&src[..open.start()]);
70 out.push_str(&src[body_start..i]);
71 out.push_str(&src[i + 1..]);
72 return out;
73 }
74 }
75 // Skip over `//` line comments and `/* */` block comments so a
76 // brace inside a comment doesn't confuse the depth counter.
77 b'/' if i + 1 < bytes.len() && bytes[i + 1] == b'/' => {
78 while i < bytes.len() && bytes[i] != b'\n' {
79 i += 1;
80 }
81 continue;
82 }
83 b'/' if i + 1 < bytes.len() && bytes[i + 1] == b'*' => {
84 i += 2;
85 while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
86 i += 1;
87 }
88 i += 2;
89 continue;
90 }
91 _ => {}
92 }
93 i += 1;
94 }
95 // Unbalanced braces — leave it alone and let naga produce the error.
96 src.to_string()
97}
98
99// ---------------------------------------------------------------------------
100// Pre-compiled regex patterns
101// ---------------------------------------------------------------------------
102
103/// Strips leading zeros from integer literals (HLSL allows `02` for `2`,
104/// WGSL rejects them with `invalid numeric literal format`). Targets only
105/// integer literals — `0.5` and `0` and `100` are untouched because the
106/// pattern requires `\b0+` followed by another decimal digit (so `0.` and
107/// `0)` never match).
108static LEADING_ZERO_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b0+([0-9])").unwrap());
109
110static SEMANTICS_REGEX: LazyLock<Regex> =
111 LazyLock::new(|| Regex::new(r":\s*[A-Z_][A-Z0-9_]*").unwrap());
112
113/// HLSL typed local declaration, post type-substitution. The whole `<TYPE>
114/// <decls>;` statement is captured; the declarator list is then expanded
115/// into one WGSL `var` per name. Examples:
116/// `f32 gx1 = a;` → `var gx1: f32 = a;`
117/// `vec2<f32> uv2;` → `var uv2: vec2<f32>;`
118/// `vec3<f32> ret1, neu, crisp;` → `var ret1: vec3<f32>; var neu: vec3<f32>; var crisp: vec3<f32>;`
119///
120/// Anchored to the start of a (multi-line-aware) line so we don't confuse the
121/// type tokens that appear as function return types or constructor names.
122/// The captured group excludes both `;` (the terminator) and `{` (function
123/// body open brace) — the latter stops the greedy match cold when the
124/// pattern would otherwise swallow a function signature plus part of its
125/// body up to the first `;` inside. The closure also calls
126/// [`is_function_signature`] for the residual case where the regex still
127/// matches (e.g., one-liner functions).
128static LOCAL_DECL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
129 Regex::new(
130 r"(?m)^(\s*)(f32|i32|bool|vec2<f32>|vec3<f32>|vec4<f32>|vec2<bool>|vec3<bool>|vec4<bool>|mat4x4<f32>|mat3x3<f32>|mat2x2<f32>)\s+([^;{]+);",
131 )
132 .unwrap()
133});
134
135/// Normalise `;<space>TYPE` to `;\nTYPE` so the line-anchored
136/// [`LOCAL_DECL_REGEX`] sees each declaration on its own line. The Isosceles
137/// preset (and a handful of MD2 packs that compress kaleidoscope state) put
138/// multiple typed locals on a single source line:
139///
140/// ```text
141/// float2 cntr = float2(q13,q14); float sin = q11; float cos = q12; float scale = q15;
142/// ```
143///
144/// Without this pre-pass, only the first decl converted to a `var` — the
145/// rest stayed as raw HLSL and tripped naga with
146/// `expected assignment or increment/decrement; found 'sin'`. Safe for
147/// `for(f32 i = 0; …)` because the first `;` inside the parens is followed
148/// by an expression (the loop condition), never by a type keyword.
149///
150/// We also split when the type is glued to `{` (function-body opening brace,
151/// conditional/loop body), and recognise vec types ending in `>`. A naive
152/// `\b` boundary check fails after `vec2<f32>` because `>` is already a
153/// non-word char; vec-typed declarations grouped behind any prior `; <decl>`
154/// would otherwise stay un-rewritten and naga would report `expected
155/// assignment or increment/decrement; found 'uvc'`. Requiring an
156/// identifier-start `[A-Za-z_]` after the type+space rules out false
157/// positives on constructor calls like `vec3<f32>(0)`.
158static INLINE_DECL_SPLIT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
159 Regex::new(
160 r"([;{])[ \t]*(f32|i32|bool|vec2<f32>|vec3<f32>|vec4<f32>|vec2<bool>|vec3<bool>|vec4<bool>|mat4x4<f32>|mat3x3<f32>|mat2x2<f32>)[ \t]+([A-Za-z_])",
161 )
162 .unwrap()
163});
164
165/// HLSL `sampler foo;` / `texture foo;` declarations at module scope. The
166/// codegen wrapper provides the actual texture/sampler bindings, so user
167/// declarations are redundant — and they confuse the WGSL parser when they
168/// land inside `fs_main`. Stripped out wholesale.
169///
170/// Also matches the **comma-list** form `sampler a, b, c;` that a number
171/// of presets ship as a stylistic shortcut. A simpler regex stopping at
172/// the first identifier would let the trailing comma list survive as
173/// orphan statements `b; c;` that trip the WGSL parser with
174/// `expected assignment or increment/decrement; found 'sampler_fw_rand01'`.
175static SAMPLER_DECL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
176 Regex::new(r"(?m)^\s*(?:sampler|texture|texture2D|texture3D|sampler2D|sampler3D)\s+[A-Za-z_][A-Za-z0-9_]*(?:\s*,\s*[A-Za-z_][A-Za-z0-9_]*)*\s*;").unwrap()
177});
178
179/// Preprocessor directives (`#define`, `#include`, `#pragma`, …). HLSL
180/// presets occasionally use them; WGSL has no preprocessor, so we strip
181/// the whole line.
182static PREPROC_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?m)^\s*#[^\n]*$").unwrap());
183
184/// HLSL storage-class qualifiers — strip when they appear as a leading
185/// word in a declaration. `static const` is a common HLSL pattern for
186/// "function-scope compile-time constant", but WGSL inside a function
187/// uses `let` (or `const` at module scope only). Stripping both lets
188/// `rewrite_local_declarations` turn the rest into a regular `var`.
189static STORAGE_CLASS_REGEX: LazyLock<Regex> =
190 LazyLock::new(|| Regex::new(r"\b(static|uniform|extern)\b\s*").unwrap());
191
192/// HLSL `const TYPE NAME` at start of a typed local declaration.
193/// We strip `const ` only when followed by a recognised HLSL type token
194/// — leaves WGSL module-level `const` (which doesn't appear in user
195/// shader bodies anyway) alone.
196static CONST_TYPE_REGEX: LazyLock<Regex> =
197 LazyLock::new(|| Regex::new(r"\bconst\s+(float[1-4]?(?:x[1-4])?|int|bool)\b").unwrap());
198
199/// Postfix `<ident>++` and `<ident>--` (HLSL increment/decrement).
200/// WGSL has no postfix operators; we rewrite to the equivalent
201/// compound assignment `<ident> = <ident> + 1` only at statement
202/// boundaries (`;` or `)`) so expression-position uses like `a[i++]`
203/// don't get mangled. Real preset pattern: `n++;` at end of a
204/// per-iteration loop.
205static POSTFIX_INC_REGEX: LazyLock<Regex> =
206 LazyLock::new(|| Regex::new(r"\b([A-Za-z_][A-Za-z0-9_]*)\s*\+\+\s*([;)])").unwrap());
207static POSTFIX_DEC_REGEX: LazyLock<Regex> =
208 LazyLock::new(|| Regex::new(r"\b([A-Za-z_][A-Za-z0-9_]*)\s*--\s*([;)])").unwrap());
209
210#[derive(Error, Debug)]
211pub enum TranslationError {
212 #[error("Translation error: {0}")]
213 Translation(String),
214
215 #[error("Unsupported HLSL feature: {0}")]
216 Unsupported(String),
217}
218
219pub type Result<T> = std::result::Result<T, TranslationError>;
220
221/// Translate HLSL shader code to WGSL.
222///
223/// The pipeline is intentionally linear:
224/// 1. strip the MD2 `shader_body { ... }` outer wrapper,
225/// 2. type-substitute (`float4 → vec4<f32>`, …),
226/// 3. function-substitute (`lerp → mix`, `tex2D → textureSample`, …),
227/// 4. rewrite typed local declarations into WGSL `var ident: T = …;` form,
228/// 5. strip HLSL semantics (`: POSITION`, …).
229pub fn translate_shader(hlsl: &str) -> Result<String> {
230 translate_shader_with_plan(hlsl, &TextureBindingPlan::empty())
231}
232
233/// Same as [`translate_shader`], but routes unrecognised `tex2D` sampler names
234/// through the supplied [`TextureBindingPlan`]. Preset authors reference
235/// disk-loaded textures via `sampler sampler_<NAME>;` + `tex2D(sampler_<NAME>,
236/// uv)`; the renderer scans the HLSL, builds a plan that resolves each name to
237/// a slot in the comp pipeline's user-texture binding array, and threads it
238/// through here so the emitted WGSL points at the right binding.
239///
240/// Empty plan ≡ legacy behaviour: unrecognised samplers fall back to
241/// `sampler_main` with a `/*was: <name>*/` debug comment.
242pub fn translate_shader_with_plan(hlsl: &str, plan: &TextureBindingPlan) -> Result<String> {
243 // Stash the original input so the global-var hoist pass at the bottom
244 // can re-parse it for `Item::GlobalVar` names. The rewriter chain
245 // below shadows `hlsl`, and by the time we reach the hoist the AST
246 // boundary between top-level globals and shader_body locals is gone
247 // (everything is one flat translated body string).
248 let hlsl_for_ast = hlsl;
249
250 // AST-driven pre-passes. Each one parses the HLSL, walks the AST,
251 // and writes targeted text edits back; if parsing fails the input
252 // passes through unchanged. The downstream regex pipeline then sees
253 // a slightly friendlier source. Order matters: array globals must be
254 // lowered before the regex `replace_types` runs, and the binop / UV
255 // passes are stable under each other so the chain order is fixed in
256 // `rewrite::apply_all`.
257 let hlsl = rewrite::apply_all(hlsl);
258 let mut wgsl = strip_shader_body_wrapper(&hlsl);
259 wgsl = comment_out_prose_lines(&wgsl);
260 wgsl = strip_preprocessor(&wgsl);
261 wgsl = strip_sampler_declarations(&wgsl);
262 wgsl = strip_storage_class_qualifiers(&wgsl);
263 wgsl = rewrite_postfix_inc_dec(&wgsl);
264 wgsl = replace_types(&wgsl);
265 wgsl = replace_functions(&wgsl);
266 wgsl = replace_texture_sampling_with_plan(&wgsl, plan);
267 wgsl = rewrite_tex3d_calls(&wgsl);
268 wgsl = rewrite_local_declarations(&wgsl);
269 wgsl = brace_up_single_statement_blocks(&wgsl);
270 wgsl = dedup_var_declarations(&wgsl);
271 wgsl = replace_statement_commas(&wgsl);
272 wgsl = replace_semantics(&wgsl);
273
274 // Type-aware passes run last — they need the source to look like
275 // valid WGSL `var` declarations with explicit type annotations,
276 // which only the rewrites above produce. The symbol table is built
277 // once and reused so the truncation pass sees the same view of
278 // locals as the broadcast pass. The broadcast pass recurses into
279 // nested call args itself, so a single sweep is enough.
280 let table = types::SymbolTable::from_source(&wgsl);
281 wgsl = types::inject_broadcasts(&wgsl, &table);
282 wgsl = types::inject_truncations(&wgsl, &table);
283 wgsl = types::inject_assignment_coercions(&wgsl, &table);
284 // Swizzle LHS rewrite runs after assignment coercion so the LHS view
285 // is already a single bare ident statement (no nested cast inserted
286 // between target and `.xy`). Runs before validation can flag
287 // `invalid left-hand side of assignment`.
288 wgsl = types::inject_swizzle_assignments(&wgsl, &table);
289
290 // Lift module-scope user functions LAST so the function body has
291 // already been through every regex rewrite and type-aware pass
292 // (decl conversion, broadcast, truncation, assignment coercion,
293 // swizzle reconstruction). After lift, the body emits as a
294 // syntactically valid WGSL `fn` and the wrapper places it at module
295 // scope on the other side of `LIFTED_FN_SENTINEL`.
296 let lifted = lift_user_functions(&mut wgsl);
297
298 // Hoist pre-`shader_body` global variable declarations (parsed as
299 // `Item::GlobalVar` in the AST) from the fragment body to module
300 // scope. Lifted user functions (now at module scope) routinely
301 // reference these — `float3 t = float3(q20, q23, q26)` declared at
302 // the top of the preset, then used inside a lifted `float3 project(...)`
303 // would otherwise hit "no definition in scope for identifier: `t`"
304 // when the parser walks the lifted body and the surrounding fs_main
305 // locals are out of reach.
306 //
307 // Implementation: re-parse the original HLSL to collect the names
308 // of top-level `GlobalVar` items, scan the translated body for
309 // `var NAME: TYPE [= INIT];` lines whose name is in that set, emit a
310 // module-scope `var<private> NAME: TYPE;` declaration, and replace the
311 // body line with `NAME = INIT;` (or drop entirely when there's no init).
312 let hoisted_globals = hoist_global_vars(&mut wgsl, hlsl_for_ast);
313
314 let module_scope = match (lifted.is_empty(), hoisted_globals.is_empty()) {
315 (true, true) => return Ok(wgsl),
316 (false, true) => lifted,
317 (true, false) => hoisted_globals,
318 (false, false) => format!("{lifted}\n{hoisted_globals}"),
319 };
320 Ok(format!("{module_scope}\n{LIFTED_FN_SENTINEL}\n{wgsl}"))
321}
322
323/// Re-parse `hlsl` (the original input, before any rewriter touched it)
324/// and return a map of `name → wgsl_type` for top-level `Item::GlobalVar`
325/// items. Used by [`hoist_global_vars`] to decide which body-level `var`
326/// declarations belong at module scope.
327///
328/// Skips any name that collides with a WGSL builtin function. A few
329/// MD2 authors declare locals named `sin` / `cos` / `pow` / `dot` etc.
330/// (the Isosceles preset's kaleidoscope state line) — hoisting these to
331/// module scope as `var<private>` would shadow the builtin globally,
332/// breaking every other site that calls `sin(x)` as a function. Leaving
333/// them as fs_main locals preserves the per-scope shadowing the author
334/// expected.
335///
336/// Returned types are in WGSL form (`vec2<f32>`, `mat3x3<f32>`, …) so the
337/// hoist pass can compare them against the in-body `var X: T` lines and
338/// only transform exact-type matches — re-declarations like `float2
339/// rss,uv2;` at top level followed by `float3 uv2 = …;` in `shader_body`
340/// are shadowing the global and must keep their local declaration intact.
341fn collect_global_var_types(hlsl: &str) -> std::collections::HashMap<String, String> {
342 // Pre-strip storage-class qualifiers (`uniform`, `extern`, ...) the HLSL
343 // parser doesn't recognise — they're cosmetic in MD2 user shaders and
344 // would otherwise break the parse, dropping every following GlobalVar
345 // from the AST. Real-world hit: `Se7enSlasher - Texture Distortion …`
346 // has `uniform float caval; … float3 col;` and the `uniform` line
347 // failed parse, so `col` was never registered for hoisting.
348 let cleaned = strip_storage_class_qualifiers(hlsl);
349 let Ok(tu) = parse::parse_hlsl(&cleaned) else {
350 return std::collections::HashMap::new();
351 };
352 tu.items
353 .iter()
354 .filter_map(|i| match i {
355 ast::Item::GlobalVar(g) if !is_wgsl_builtin_function_name(&g.name) => {
356 hlsl_type_to_wgsl(&g.ty.name).map(|wgsl_ty| (g.name.clone(), wgsl_ty.to_string()))
357 }
358 _ => None,
359 })
360 .collect()
361}
362
363/// Map an HLSL type name (`float`, `float2`, `int3`, `float3x3`, …) to
364/// its WGSL equivalent (`f32`, `vec2<f32>`, `vec3<f32>`, `mat3x3<f32>`, …).
365/// Returns `None` for types the hoist pass shouldn't touch (e.g. user
366/// struct names, samplers — these don't show up as `GlobalVar` anyway,
367/// but be defensive).
368fn hlsl_type_to_wgsl(name: &str) -> Option<&'static str> {
369 Some(match name {
370 "float" | "int" | "half" | "double" => "f32",
371 "float2" | "int2" | "half2" | "double2" => "vec2<f32>",
372 "float3" | "int3" | "half3" | "double3" => "vec3<f32>",
373 "float4" | "int4" | "half4" | "double4" => "vec4<f32>",
374 "float2x2" => "mat2x2<f32>",
375 "float3x3" => "mat3x3<f32>",
376 "float4x4" => "mat4x4<f32>",
377 "bool" => "bool",
378 _ => return None,
379 })
380}
381
382/// `true` when `name` collides with a WGSL builtin function. List
383/// covers the subset MD2 user shaders actually invoke; anything outside
384/// this set isn't worth worrying about (real authors don't shadow
385/// `inverseSqrt`).
386fn is_wgsl_builtin_function_name(name: &str) -> bool {
387 matches!(
388 name,
389 "abs"
390 | "acos"
391 | "asin"
392 | "atan"
393 | "atan2"
394 | "ceil"
395 | "clamp"
396 | "cos"
397 | "cosh"
398 | "cross"
399 | "degrees"
400 | "distance"
401 | "dot"
402 | "exp"
403 | "exp2"
404 | "floor"
405 | "fract"
406 | "length"
407 | "log"
408 | "log2"
409 | "max"
410 | "min"
411 | "mix"
412 | "normalize"
413 | "pow"
414 | "radians"
415 | "reflect"
416 | "refract"
417 | "round"
418 | "saturate"
419 | "select"
420 | "sign"
421 | "sin"
422 | "sinh"
423 | "smoothstep"
424 | "sqrt"
425 | "step"
426 | "tan"
427 | "tanh"
428 | "transpose"
429 | "trunc"
430 )
431}
432
433/// Hoist top-level user globals to module scope.
434///
435/// Scans `body` for `var NAME: TYPE [= INIT];` lines and, when `NAME`
436/// matches a top-level `Item::GlobalVar` from `hlsl_src` **and** the
437/// declared type matches that global's type, emits a module-scope
438/// `var<private> NAME: TYPE;` declaration. The body line is rewritten to a
439/// bare `NAME = INIT;` (assignment runs inside `fs_main` so it can still
440/// reference `q1..q32` and other `var<private>` state seeded by the
441/// wrapper) or removed entirely when there's no initializer.
442///
443/// The TYPE match is what protects a corpus shape like
444/// `float2 rs2,rs0,rss,uv2; … shader_body { float3 uv2 = …; }`: at top
445/// level `uv2` is `vec2<f32>` (the hoist target), but the in-body
446/// `var uv2: vec3<f32> = …;` line is a shadowing re-declaration of a
447/// different type and must keep its local `var` form so the in-body
448/// `uv2` ends up as a `vec3<f32>` local. Without this guard, the bare
449/// regex match would strip the `var` from the shadowing decl and the
450/// assignment would silently target the `vec2` module-scope global,
451/// producing downstream `InvalidStoreTypes` errors.
452///
453/// Returns the joined module-scope declarations (`""` if none).
454fn hoist_global_vars(body: &mut String, hlsl_src: &str) -> String {
455 let globals = collect_global_var_types(hlsl_src);
456 if globals.is_empty() {
457 return String::new();
458 }
459
460 static VAR_DECL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
461 Regex::new(
462 r"\bvar\s+([A-Za-z_][A-Za-z0-9_]*)\s*:\s*([A-Za-z_][A-Za-z0-9_<>]*)\s*(=\s*[^;]+)?;",
463 )
464 .unwrap()
465 });
466
467 let mut hoisted = String::new();
468 let mut seen: std::collections::HashSet<String> = Default::default();
469 let mut new_body = String::with_capacity(body.len());
470 let mut last_end = 0usize;
471
472 for cap in VAR_DECL_REGEX.captures_iter(body) {
473 let full = cap.get(0).unwrap();
474 let name = cap.get(1).unwrap().as_str();
475 let ty = cap.get(2).unwrap().as_str();
476 let Some(expected_ty) = globals.get(name) else {
477 continue;
478 };
479 if expected_ty.as_str() != ty {
480 // Same name, different type — the user is shadowing the
481 // global with a re-declaration. Leave the local `var X: T`
482 // intact so the shadow stays in fs_main scope.
483 continue;
484 }
485 let init = cap.get(3).map(|m| m.as_str());
486
487 // Emit one module-scope declaration per global, even if the body
488 // had two `var X: T;` lines for the same name (the dedup pass
489 // mostly handles this, but be defensive).
490 if seen.insert(name.to_string()) {
491 let _ = writeln!(&mut hoisted, "var<private> {name}: {ty};");
492 }
493
494 new_body.push_str(&body[last_end..full.start()]);
495 if let Some(init) = init {
496 // `init` is `"= <expr>"`; trim the leading `=` and whitespace.
497 let init_value = init.trim_start_matches('=').trim_start();
498 let _ = write!(&mut new_body, "{name} = {init_value};");
499 }
500 // No init → drop the body line; the module-scope `var<private>`
501 // is uninitialised and any later in-body `NAME = ...;` assigns it.
502 last_end = full.end();
503 }
504 new_body.push_str(&body[last_end..]);
505 *body = new_body;
506
507 hoisted
508}
509
510/// Marker line that separates module-scope user functions (lifted by
511/// [`lift_user_functions`]) from the fragment body inside the translated
512/// output. The codegen wrapper splits on this marker: text before goes
513/// before `fs_main`, text after goes inside it.
514pub const LIFTED_FN_SENTINEL: &str = "// __ONEDROP_LIFTED_FNS_END__";
515
516/// Find HLSL-shaped function definitions (`<TYPE> <name>(...) { ... }`)
517/// at depth 0 in the translated body, rewrite each signature to WGSL
518/// shape (`fn name(arg: TYPE, …) -> TYPE { ... }`), and remove them from
519/// `src` in place. The lifted functions are returned as a single string
520/// concatenated in source order — `wrap_user_comp_shader_with_plan`
521/// places it before `fs_main`.
522///
523/// Preset pattern this unblocks (real example from
524/// `Geiss - Explosion 3 nz+ enscarpment via lateral hosations.milk`):
525///
526/// ```text
527/// vec2<f32> gradBlur1(vec2<f32> domain, vec2<f32> d, vec3<f32> filter) {
528/// vec3<f32> dx = (2*GetBlur1(domain + vec2<f32>(1,0)*d) - 2*GetBlur1(domain - vec2<f32>(1,0)*d));
529/// ...
530/// return 0.5 * vec2<f32>(...);
531/// }
532/// shader_body { ret += gradBlur1(uv, vec2<f32>(0.5)/aspect.xy, vec3<f32>(0.5,0.5,0.5)); }
533/// ```
534///
535/// After lifting:
536/// ```text
537/// fn gradBlur1(domain: vec2<f32>, d: vec2<f32>, filter: vec3<f32>) -> vec2<f32> {
538/// var dx: vec3<f32> = (2*GetBlur1(domain + vec2<f32>(1,0)*d) - 2*GetBlur1(domain - vec2<f32>(1,0)*d));
539/// ...
540/// return 0.5 * vec2<f32>(...);
541/// }
542/// ```
543///
544/// Functions that reference fs_main locals (`texsize`, `q1`, …) will fail
545/// to validate after lifting since those locals don't exist at module
546/// scope — we accept that limitation here; a future pass may pass them
547/// explicitly.
548fn lift_user_functions(src: &mut String) -> String {
549 let bytes = src.as_bytes();
550 let mut lifted = String::new();
551 let mut residual = String::with_capacity(src.len());
552 let mut i = 0usize;
553
554 while i < bytes.len() {
555 // Scan for a candidate function signature at the start of a line
556 // (after whitespace). We require it to begin at depth 0; the
557 // outer loop never enters function bodies because we skip past
558 // them whole when we recognise one.
559 if let Some((next_i, lifted_fn)) = try_extract_user_function(src.as_str(), i) {
560 lifted.push_str(&lifted_fn);
561 lifted.push('\n');
562 i = next_i;
563 continue;
564 }
565 residual.push(bytes[i] as char);
566 i += 1;
567 }
568
569 *src = residual;
570 lifted
571}
572
573/// Try to match a single HLSL-shaped function definition starting at byte
574/// position `start` (after leading whitespace). Returns the byte position
575/// just past the closing `}` and the rewritten WGSL function text.
576/// Returns `None` if no signature matches — caller advances by 1 byte.
577fn try_extract_user_function(src: &str, start: usize) -> Option<(usize, String)> {
578 let bytes = src.as_bytes();
579
580 // Skip leading whitespace + newlines on the current line.
581 let mut i = start;
582 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
583 i += 1;
584 }
585 if i >= bytes.len() {
586 return None;
587 }
588
589 // Require we're at a line start — function defs are top-level, never
590 // mid-expression. Walk back from `i` to confirm only whitespace until
591 // a newline or start-of-input.
592 let mut back = i;
593 while back > 0 && bytes[back - 1] != b'\n' {
594 if !bytes[back - 1].is_ascii_whitespace() {
595 return None;
596 }
597 back -= 1;
598 }
599
600 // Parse the return type: one of the WGSL type tokens.
601 let (ret_type, after_ty) = parse_wgsl_function_return_type(src, i)?;
602 let mut j = after_ty;
603
604 while j < bytes.len() && bytes[j].is_ascii_whitespace() {
605 j += 1;
606 }
607
608 // Function name: identifier.
609 let name_start = j;
610 while j < bytes.len() && (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_') {
611 j += 1;
612 }
613 if j == name_start {
614 return None;
615 }
616 let name = &src[name_start..j];
617
618 while j < bytes.len() && bytes[j].is_ascii_whitespace() {
619 j += 1;
620 }
621 if j >= bytes.len() || bytes[j] != b'(' {
622 return None;
623 }
624 j += 1;
625 let params_start = j;
626
627 // Balance `(...)`.
628 let mut depth = 1i32;
629 while j < bytes.len() {
630 match bytes[j] {
631 b'(' => depth += 1,
632 b')' => {
633 depth -= 1;
634 if depth == 0 {
635 break;
636 }
637 }
638 _ => {}
639 }
640 j += 1;
641 }
642 if j >= bytes.len() {
643 return None;
644 }
645 let params_text = &src[params_start..j];
646 j += 1;
647
648 while j < bytes.len() && bytes[j].is_ascii_whitespace() {
649 j += 1;
650 }
651 if j >= bytes.len() || bytes[j] != b'{' {
652 return None;
653 }
654 let body_open = j;
655 j += 1;
656 let mut depth = 1i32;
657 while j < bytes.len() {
658 match bytes[j] {
659 b'{' => depth += 1,
660 b'}' => {
661 depth -= 1;
662 if depth == 0 {
663 break;
664 }
665 }
666 _ => {}
667 }
668 j += 1;
669 }
670 if j >= bytes.len() {
671 return None;
672 }
673 let body = &src[body_open..=j];
674
675 // Convert params to WGSL shape and shadow any that get reassigned in
676 // the body. HLSL value-parameters are mutable (the body's `x = pow(x, k);`
677 // overwrites them in place); WGSL parameters are `let`-bound and naga
678 // rejects the assignment with "invalid left-hand side of assignment;
679 // consider declaring `x` with `var` instead of `let`". For each param
680 // the body writes to, we rename it to `<name>_md2arg` in the signature
681 // and prepend `var <name>: <ty> = <name>_md2arg;` so existing reads /
682 // writes against `<name>` in the body resolve to a mutable local.
683 let parsed = parse_hlsl_params(params_text);
684 let mut sig_parts = Vec::with_capacity(parsed.len());
685 let mut shadow_prelude = String::new();
686 for (ty, pname) in &parsed {
687 if body_assigns_to(body, pname) {
688 let renamed = format!("{pname}_md2arg");
689 sig_parts.push(format!("{renamed}: {ty}"));
690 shadow_prelude.push_str(&format!("var {pname}: {ty} = {renamed}; "));
691 } else {
692 sig_parts.push(format!("{pname}: {ty}"));
693 }
694 }
695 let wgsl_params = sig_parts.join(", ");
696 let wgsl_fn = if shadow_prelude.is_empty() {
697 format!("fn {name}({wgsl_params}) -> {ret_type} {body}")
698 } else {
699 // body is `{ ... }` — insert shadow prelude right after the `{`.
700 let inner = &body[1..body.len() - 1];
701 format!("fn {name}({wgsl_params}) -> {ret_type} {{{shadow_prelude}{inner}}}")
702 };
703
704 Some((j + 1, wgsl_fn))
705}
706
707/// Whether `body` (a `{ ... }` WGSL block as text) contains an assignment
708/// to a bare identifier matching `name`. Recognises plain `=`, compound
709/// assignments (`+=`, `-=`, `*=`, `/=`, `%=`) and post-increment / decrement
710/// (`++` / `--`); excludes the comparison operators `==`, `!=`, `<=`, `>=`.
711/// Skips matches inside `//`-style line comments because the textual scan
712/// would otherwise see a commented-out assignment as a real write.
713fn body_assigns_to(body: &str, name: &str) -> bool {
714 let bytes = body.as_bytes();
715 let nb = name.as_bytes();
716 if nb.is_empty() {
717 return false;
718 }
719 let mut i = 0usize;
720 let mut in_line_comment = false;
721 while i + nb.len() <= bytes.len() {
722 let c = bytes[i];
723 if in_line_comment {
724 if c == b'\n' {
725 in_line_comment = false;
726 }
727 i += 1;
728 continue;
729 }
730 if c == b'/' && bytes.get(i + 1) == Some(&b'/') {
731 in_line_comment = true;
732 i += 2;
733 continue;
734 }
735 // Left boundary: previous char must not be identifier-continuation.
736 let left_ok = i == 0 || {
737 let p = bytes[i - 1];
738 !(p.is_ascii_alphanumeric() || p == b'_')
739 };
740 if left_ok && &bytes[i..i + nb.len()] == nb {
741 let after = i + nb.len();
742 // Right boundary: not part of a longer identifier.
743 let right_ok = bytes
744 .get(after)
745 .is_none_or(|c| !(c.is_ascii_alphanumeric() || *c == b'_'));
746 if right_ok {
747 // Walk past whitespace and look for an assignment-shaped tail.
748 let mut k = after;
749 while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') {
750 k += 1;
751 }
752 if k < bytes.len() {
753 let next = bytes[k];
754 match next {
755 b'=' => {
756 // `==` is a comparison; `=` alone is assignment.
757 if bytes.get(k + 1) != Some(&b'=') {
758 return true;
759 }
760 }
761 b'+' | b'-' => {
762 // `<name>++` / `<name>--` / `<name> += …`
763 if bytes.get(k + 1) == Some(&next) || bytes.get(k + 1) == Some(&b'=') {
764 return true;
765 }
766 }
767 b'*' | b'/' | b'%' => {
768 if bytes.get(k + 1) == Some(&b'=') {
769 return true;
770 }
771 }
772 b'.' => {
773 // `<name>.<swiz> = rhs` is also a write to the param
774 // (component-wise). Scan past `.<ident>` runs and
775 // any `[…]` indices, then check for an assignment.
776 let mut m = k;
777 while m < bytes.len() {
778 if bytes[m] == b'.' {
779 m += 1;
780 while m < bytes.len()
781 && (bytes[m].is_ascii_alphanumeric() || bytes[m] == b'_')
782 {
783 m += 1;
784 }
785 } else if bytes[m] == b'[' {
786 let mut depth = 1i32;
787 m += 1;
788 while m < bytes.len() && depth > 0 {
789 if bytes[m] == b'[' {
790 depth += 1;
791 } else if bytes[m] == b']' {
792 depth -= 1;
793 }
794 m += 1;
795 }
796 } else {
797 break;
798 }
799 }
800 while m < bytes.len() && (bytes[m] == b' ' || bytes[m] == b'\t') {
801 m += 1;
802 }
803 if let Some(&nx) = bytes.get(m) {
804 if nx == b'=' && bytes.get(m + 1) != Some(&b'=') {
805 return true;
806 }
807 if matches!(nx, b'+' | b'-' | b'*' | b'/' | b'%')
808 && bytes.get(m + 1) == Some(&b'=')
809 {
810 return true;
811 }
812 }
813 }
814 _ => {}
815 }
816 }
817 }
818 i += nb.len();
819 continue;
820 }
821 i += 1;
822 }
823 false
824}
825
826/// Parse an HLSL parameter list into (type, name) pairs in source order.
827/// Same shape as [`convert_hlsl_params_to_wgsl`] but returns the structured
828/// form so callers can decide per-param whether to rename / shadow.
829fn parse_hlsl_params(params: &str) -> Vec<(String, String)> {
830 if params.trim().is_empty() {
831 return Vec::new();
832 }
833 let mut out = Vec::new();
834 for raw in split_top_level_commas(params) {
835 let part = raw.trim();
836 if part.is_empty() {
837 continue;
838 }
839 if let Some((ty, name)) = split_param(part) {
840 // `lift_user_functions` runs after the regex pipeline, so
841 // `params_text` already holds WGSL-shaped types (`f32`,
842 // `vec2<f32>`, …); no type rewrite needed here.
843 out.push((ty.to_string(), name.to_string()));
844 }
845 }
846 out
847}
848
849/// Match one of the known WGSL types at byte position `i`. Returns the
850/// canonical type text and the byte position immediately after it. `void`
851/// is rejected — user comp shader functions always return a typed value
852/// in MD2.
853fn parse_wgsl_function_return_type(src: &str, i: usize) -> Option<(&'static str, usize)> {
854 const TYPES: &[&str] = &[
855 "vec4<f32>",
856 "vec3<f32>",
857 "vec2<f32>",
858 "mat4x4<f32>",
859 "mat3x3<f32>",
860 "mat2x2<f32>",
861 "f32",
862 "i32",
863 // `bool`-returning helpers (`bool mask_rectangle(...)` and friends)
864 // need to lift too, otherwise the unmodified `bool foo(...)` line
865 // lands in the WGSL output and the parser flags `foo` as
866 // `expected assignment or increment/decrement`.
867 "bool",
868 ];
869 let bytes = src.as_bytes();
870 for ty in TYPES {
871 let tb = ty.as_bytes();
872 if i + tb.len() <= bytes.len() && &bytes[i..i + tb.len()] == tb {
873 // Right boundary: next char must not be identifier-continuation.
874 let next = bytes.get(i + tb.len()).copied();
875 let is_id = next.is_some_and(|c| c.is_ascii_alphanumeric() || c == b'_');
876 if !is_id {
877 return Some((ty, i + tb.len()));
878 }
879 }
880 }
881 None
882}
883
884/// Split a single HLSL parameter declaration into `(type, name)`. The
885/// type may contain `<...>` (`vec3<f32>`); we split on the last
886/// whitespace at angle-depth 0.
887fn split_param(decl: &str) -> Option<(&str, &str)> {
888 let bytes = decl.as_bytes();
889 let mut depth_angle = 0i32;
890 let mut last_split = None;
891 let mut i = 0usize;
892 while i < bytes.len() {
893 match bytes[i] {
894 b'<' => depth_angle += 1,
895 b'>' => depth_angle -= 1,
896 b' ' | b'\t' | b'\n' | b'\r' if depth_angle == 0 => {
897 // Track the last whitespace run; the param ident comes
898 // after it.
899 let mut j = i;
900 while j < bytes.len() && bytes[j].is_ascii_whitespace() {
901 j += 1;
902 }
903 if j < bytes.len() && (bytes[j].is_ascii_alphabetic() || bytes[j] == b'_') {
904 last_split = Some((i, j));
905 }
906 i = j;
907 continue;
908 }
909 _ => {}
910 }
911 i += 1;
912 }
913 let (ws_start, ident_start) = last_split?;
914 let ty = decl[..ws_start].trim();
915 let name = decl[ident_start..].trim();
916 if ty.is_empty() || name.is_empty() {
917 return None;
918 }
919 Some((ty, name))
920}
921
922fn replace_types(code: &str) -> String {
923 let mut result = code.to_string();
924
925 // Order matters: longer types first so `float4x4` is replaced before
926 // `float4`, `float2x2` before `float2`, and `float4`/`float3`/`float2`/
927 // `float1` before plain `float`. `float1` is HLSL's alias for `float`;
928 // 33 of 168 preset comp shaders use it for single-channel loads
929 // (`float1 dx = …`).
930 //
931 // Non-square matrix types. Corpus has many `float2x3` hits (and a
932 // few `float4x3`) that a naive `float2`→`vec2<f32>` substring
933 // substitution would mangle, leaving `vec2<f32>x3` and tripping the
934 // WGSL parser with `expected ')'; found 'x3'`. We emit the matching
935 // `matRxC<f32>` non-square type — the *constructor* call still won't
936 // validate when args are passed as HLSL row vectors, but the parse
937 // stage succeeds, which is enough for many shaders whose matrix
938 // never actually executes (dead `mul` in an unreachable branch, or
939 // matrices used only for `.xy` extraction).
940 result = result.replace("float4x4", "mat4x4<f32>");
941 result = result.replace("float4x3", "mat4x3<f32>");
942 result = result.replace("float4x2", "mat4x2<f32>");
943 result = result.replace("float3x4", "mat3x4<f32>");
944 result = result.replace("float3x3", "mat3x3<f32>");
945 result = result.replace("float3x2", "mat3x2<f32>");
946 result = result.replace("float2x4", "mat2x4<f32>");
947 result = result.replace("float2x3", "mat2x3<f32>");
948 result = result.replace("float2x2", "mat2x2<f32>");
949 result = result.replace("float4", "vec4<f32>");
950 result = result.replace("float3", "vec3<f32>");
951 result = result.replace("float2", "vec2<f32>");
952 result = result.replace("float1", "f32");
953 // Plain `float` only when followed by a non-identifier char — avoids
954 // mangling `float4` again and `floatBitsToInt`-style names.
955 let float_re = Regex::new(r"\bfloat\b").unwrap();
956 result = float_re.replace_all(&result, "f32").to_string();
957
958 // HLSL `double*` types — WGSL has no f64 in shaders (naga blocks it on
959 // every backend we target), so downgrade them to their f32 cousins.
960 // Real presets ship `double3 blur = ...` purely as a typo-grade type
961 // alias; the math doesn't actually need double precision. Same ordering
962 // rule as `float`: longer names first.
963 result = result.replace("double4x4", "mat4x4<f32>");
964 result = result.replace("double4x3", "mat4x3<f32>");
965 result = result.replace("double4x2", "mat4x2<f32>");
966 result = result.replace("double3x4", "mat3x4<f32>");
967 result = result.replace("double3x3", "mat3x3<f32>");
968 result = result.replace("double3x2", "mat3x2<f32>");
969 result = result.replace("double2x4", "mat2x4<f32>");
970 result = result.replace("double2x3", "mat2x3<f32>");
971 result = result.replace("double2x2", "mat2x2<f32>");
972 result = result.replace("double4", "vec4<f32>");
973 result = result.replace("double3", "vec3<f32>");
974 result = result.replace("double2", "vec2<f32>");
975 let double_re = Regex::new(r"\bdouble\b").unwrap();
976 result = double_re.replace_all(&result, "f32").to_string();
977
978 // HLSL `half*` types are half-precision floats (16-bit). WGSL has no
979 // portable f16 (only via the `f16` extension which naga does not
980 // enable by default), and MD2 user code never actually depends on
981 // the precision — `half1 mod_ = 0;` is just a typo-grade
982 // alternative to `float`. Map to the f32 family, same ordering rule
983 // as `float`/`double`.
984 result = result.replace("half4x4", "mat4x4<f32>");
985 result = result.replace("half4x3", "mat4x3<f32>");
986 result = result.replace("half4x2", "mat4x2<f32>");
987 result = result.replace("half3x4", "mat3x4<f32>");
988 result = result.replace("half3x3", "mat3x3<f32>");
989 result = result.replace("half3x2", "mat3x2<f32>");
990 result = result.replace("half2x4", "mat2x4<f32>");
991 result = result.replace("half2x3", "mat2x3<f32>");
992 result = result.replace("half2x2", "mat2x2<f32>");
993 result = result.replace("half4", "vec4<f32>");
994 result = result.replace("half3", "vec3<f32>");
995 result = result.replace("half2", "vec2<f32>");
996 result = result.replace("half1", "f32");
997 let half_re = Regex::new(r"\bhalf\b").unwrap();
998 result = half_re.replace_all(&result, "f32").to_string();
999
1000 // HLSL `bool*` vector types: `bool2`, `bool3`, `bool4`. WGSL has
1001 // `vec2<bool>` etc. Plain `bool` already exists in WGSL so no
1002 // rewrite is needed for the scalar. Longer first as ever.
1003 result = result.replace("bool4", "vec4<bool>");
1004 result = result.replace("bool3", "vec3<bool>");
1005 result = result.replace("bool2", "vec2<bool>");
1006
1007 // HLSL `intN` vector aliases. The runtime never enforces the integer
1008 // semantics (no array index, no bitwise op in MD2 corpus), so map to
1009 // the f32-vector cousins like `floatN` / `halfN` / `doubleN`. Without
1010 // this an in-the-wild `int2 k1 = (texsize.xy*uv)%2;` survives the
1011 // pipeline as `int2 k1 = …` and trips the WGSL parser at the
1012 // identifier on the next statement (`expected assignment or
1013 // increment/decrement, found 'k1'`). Order: longer first so the matrix
1014 // forms aren't masked, vec-forms before plain `\bint\b`.
1015 result = result.replace("int4", "vec4<f32>");
1016 result = result.replace("int3", "vec3<f32>");
1017 result = result.replace("int2", "vec2<f32>");
1018
1019 // HLSL `int` → WGSL `f32`. MD2 user shaders use `int` for loop
1020 // counters and the like but freely mix them with float arithmetic
1021 // (`ang2 = 6.28*n/anz`), which HLSL implicitly promotes. WGSL refuses
1022 // those implicit conversions with `automatic conversions cannot
1023 // convert elements of i32 to f32`. Since presets never do real i32
1024 // work in user comp shaders (no array indexing, no bitwise ops), it
1025 // costs nothing to widen them to `f32` up-front.
1026 let int_re = Regex::new(r"\bint\b").unwrap();
1027 result = int_re.replace_all(&result, "f32").to_string();
1028
1029 result
1030}
1031
1032fn replace_functions(code: &str) -> String {
1033 let mut result = code.to_string();
1034
1035 // Normalise `name (args)` → `name(args)` for known HLSL functions so
1036 // the substring-based rewrites below match. Real presets ship
1037 // `lerp (a, b, t)` and `tex2D (s, uv)` (often when the call spans
1038 // multiple lines and the author put the `(` on a fresh line).
1039 result = normalise_call_whitespace(&result);
1040
1041 // lerp → mix
1042 result = result.replace("lerp(", "mix(");
1043
1044 // Alias `sat(x)` → `saturate(x)` BEFORE the saturate→clamp rewrite
1045 // so the corpus `#define sat saturate` shortcut composes correctly.
1046 // Same word-boundary rule that `rename_word_call` enforces.
1047 result = rename_word_call(&result, "sat", "saturate");
1048
1049 // saturate(<expr>) → clamp(<expr>, 0.0, 1.0). Paren-balanced — `<expr>`
1050 // may contain nested calls like `saturate(GetBlur1(uv))`.
1051 result = rewrite_unary_call_balanced(&result, "saturate", |inner| {
1052 format!("clamp({inner}, 0.0, 1.0)")
1053 });
1054
1055 // frac → fract
1056 result = result.replace("frac(", "fract(");
1057
1058 // HLSL builtins absent or differently spelled in WGSL.
1059 //
1060 // - `sat(x)` is the HLSL alias for `saturate(x)`. The corpus also
1061 // ships `#define sat saturate` lines (stripped by
1062 // `strip_preprocessor`), leaving `sat(…)` references unbound.
1063 // - `rsqrt(x)` is the HLSL reciprocal-square-root; WGSL spells it
1064 // `inverseSqrt`.
1065 // - `log10(x)` exists in HLSL but not in WGSL. Map to
1066 // `log(x) * 0.4342944…` (`1/ln(10)`).
1067 // - `tex2Dbias(s, vec4(uv, 0, bias))` is HLSL's mip-bias sampling
1068 // form; WGSL has `textureSampleBias`, but the corpus uses bias
1069 // purely cosmetically (always 0 or near-0). Downgrade to plain
1070 // `tex2D(s, uv)` by stripping the bias arg.
1071 //
1072 // All four are plain string substitutions; no paren balancing is
1073 // required because each maps to a same-shape call (or a binary
1074 // expression for `log10`).
1075 result = rename_word_call(&result, "rsqrt", "inverseSqrt");
1076 // log10(x) → log(x) * (1 / ln 10).
1077 result = rewrite_unary_call_balanced(&result, "log10", |inner| {
1078 format!("(log({inner}) * 0.43429448190325176)")
1079 });
1080 // `tex2Dbias(s, vec4(uv, mip, bias))` → `tex2D(s, uv)`. The wrapping
1081 // vec4 ctor is the dominant corpus shape; project onto its first two
1082 // components.
1083 result = rewrite_tex2dbias(&result);
1084
1085 // Normalise case-insensitive variants of MD2 builtins. Real presets
1086 // ship `tex2d(...)` instead of `tex2D(...)`; WGSL's case-sensitive
1087 // identifier resolution fails on the lowercase form.
1088 result = result.replace("tex2d(", "tex2D(");
1089 result = result.replace("tex3d(", "tex3D(");
1090
1091 // mul(matrix, vector) → matrix * vector
1092 result = rewrite_mul_balanced(&result);
1093
1094 // `mod(a, b)` → `((a) - floor((a) / (b)) * (b))`. `mod` is a WGSL
1095 // reserved keyword (HLSL uses it as the float-mod helper). Use a
1096 // paren-balanced rewrite because both arguments are expressions.
1097 result = rewrite_mod_balanced(&result);
1098
1099 // Rename WGSL-reserved identifiers that real presets use as locals
1100 // (`mod`, `filter`, `sample`). Runs AFTER `rewrite_mod_balanced` so
1101 // function-call rewrites already happened — what's left are bare
1102 // identifier references which get a trailing underscore.
1103 result = rename_reserved_identifiers(&result);
1104
1105 // Strip leading zeros from integer literals — WGSL rejects `02`.
1106 result = LEADING_ZERO_REGEX.replace_all(&result, "$1").to_string();
1107
1108 // Strip HLSL unary `+` (no-op prefix that WGSL doesn't accept).
1109 // Triggered when `+` directly follows `(`, `,`, or any binop after
1110 // optional whitespace. Pure no-op in HLSL semantics.
1111 result = strip_unary_plus(&result);
1112
1113 result
1114}
1115
1116/// HLSL function names that the rewrite pipeline treats specially. When a
1117/// preset writes `name (args)` with whitespace between the identifier and
1118/// `(`, downstream substring matches (`"lerp("`, paren-balanced walkers)
1119/// would skip the call. Collapse that whitespace once up-front so every
1120/// downstream pass sees the no-space form.
1121const KNOWN_CALL_NAMES: &[&str] = &[
1122 "lerp",
1123 "frac",
1124 "saturate",
1125 "mul",
1126 "tex2D",
1127 "tex3D",
1128 "pow",
1129 "clamp",
1130 "mix",
1131 "min",
1132 "max",
1133 "step",
1134 "smoothstep",
1135 "length",
1136 "distance",
1137 "dot",
1138 "normalize",
1139 "abs",
1140 "cos",
1141 "sin",
1142 "tan",
1143 "sqrt",
1144 "exp",
1145 "log",
1146 "GetPixel",
1147 "GetBlur1",
1148 "GetBlur2",
1149 "GetBlur3",
1150 "lum",
1151 "fract",
1152 "floor",
1153 "ceil",
1154 "sign",
1155];
1156
1157/// Replace `name<WS>(` with `name(` for every entry in [`KNOWN_CALL_NAMES`].
1158/// Only fires when `name` is on a word boundary and `<WS>` is non-empty
1159/// whitespace (so the no-whitespace form is left alone). Skips matches
1160/// inside `/* */` and `// ` comments so commented-out code stays stable.
1161fn normalise_call_whitespace(src: &str) -> String {
1162 let bytes = src.as_bytes();
1163 let mut out = String::with_capacity(src.len());
1164 let mut i = 0usize;
1165
1166 while i < bytes.len() {
1167 // Pass through comments verbatim — preserves byte offsets in the
1168 // emitted text, which is fine because comments are stripped later
1169 // anyway, but more importantly avoids rewriting commented-out code
1170 // (which then might land in the visible output as a marker).
1171 if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1172 while i < bytes.len() && bytes[i] != b'\n' {
1173 out.push(bytes[i] as char);
1174 i += 1;
1175 }
1176 continue;
1177 }
1178 if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1179 let start = i;
1180 i += 2;
1181 while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
1182 i += 1;
1183 }
1184 if i + 1 < bytes.len() {
1185 i += 2;
1186 }
1187 out.push_str(&src[start..i]);
1188 continue;
1189 }
1190
1191 // Word-boundary check on the left.
1192 if i > 0 {
1193 let prev = bytes[i - 1];
1194 if prev.is_ascii_alphanumeric() || prev == b'_' {
1195 out.push(bytes[i] as char);
1196 i += 1;
1197 continue;
1198 }
1199 }
1200
1201 let mut matched = None;
1202 for &name in KNOWN_CALL_NAMES {
1203 let nb = name.as_bytes();
1204 if i + nb.len() <= bytes.len() && &bytes[i..i + nb.len()] == nb {
1205 // Right boundary — next byte must be whitespace (we're
1206 // matching `name<WS>(`).
1207 let after = bytes.get(i + nb.len()).copied();
1208 if after.is_some_and(|c| c == b' ' || c == b'\t' || c == b'\n' || c == b'\r') {
1209 matched = Some(nb.len());
1210 break;
1211 }
1212 }
1213 }
1214
1215 let Some(nlen) = matched else {
1216 out.push(bytes[i] as char);
1217 i += 1;
1218 continue;
1219 };
1220
1221 // Peek past the whitespace to see if the next non-WS byte is `(`.
1222 let mut j = i + nlen;
1223 while j < bytes.len()
1224 && (bytes[j] == b' ' || bytes[j] == b'\t' || bytes[j] == b'\n' || bytes[j] == b'\r')
1225 {
1226 j += 1;
1227 }
1228 if j < bytes.len() && bytes[j] == b'(' {
1229 // Emit name then `(`, dropping the whitespace.
1230 out.push_str(&src[i..i + nlen]);
1231 out.push('(');
1232 i = j + 1;
1233 } else {
1234 // Whitespace not followed by `(` — leave unchanged.
1235 out.push_str(&src[i..i + nlen]);
1236 i += nlen;
1237 }
1238 }
1239
1240 out
1241}
1242
1243/// Strip HLSL unary `+` (a syntactic no-op WGSL doesn't accept) when it
1244/// directly follows `(`, `,`, `=`, `+`, `-`, `*`, `/`, `<`, `>`, `?`, `:`
1245/// after optional whitespace. Preserves byte positions of everything
1246/// except the `+` itself.
1247///
1248/// Real preset pattern: `ret.x += (+dx.x - dy.x)*0.4;` — the `+` after
1249/// `(` is a stylistic flourish. WGSL parser rejects with
1250/// `expected expression, found "+"`.
1251fn strip_unary_plus(src: &str) -> String {
1252 let bytes = src.as_bytes();
1253 let mut out = String::with_capacity(src.len());
1254 let mut last_op: Option<u8> = Some(b'('); // start-of-input behaves like `(`
1255 let mut i = 0usize;
1256 while i < bytes.len() {
1257 let b = bytes[i];
1258 // Inside comments: pass through and don't update last_op.
1259 if i + 1 < bytes.len() && b == b'/' && bytes[i + 1] == b'/' {
1260 while i < bytes.len() && bytes[i] != b'\n' {
1261 out.push(bytes[i] as char);
1262 i += 1;
1263 }
1264 continue;
1265 }
1266 if i + 1 < bytes.len() && b == b'/' && bytes[i + 1] == b'*' {
1267 let start = i;
1268 i += 2;
1269 while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
1270 i += 1;
1271 }
1272 if i + 1 < bytes.len() {
1273 i += 2;
1274 }
1275 out.push_str(&src[start..i]);
1276 continue;
1277 }
1278
1279 if b == b'+' && last_op.is_some_and(is_unary_context) {
1280 // Drop the `+`. Don't update last_op — the next token still
1281 // sits in the same syntactic position.
1282 i += 1;
1283 continue;
1284 }
1285
1286 out.push(b as char);
1287 if !b.is_ascii_whitespace() {
1288 last_op = Some(b);
1289 }
1290 i += 1;
1291 }
1292 out
1293}
1294
1295fn is_unary_context(b: u8) -> bool {
1296 matches!(
1297 b,
1298 b'(' | b','
1299 | b'='
1300 | b'+'
1301 | b'-'
1302 | b'*'
1303 | b'/'
1304 | b'<'
1305 | b'>'
1306 | b'?'
1307 | b':'
1308 | b'['
1309 | b'{'
1310 | b';'
1311 )
1312}
1313
1314/// Generic paren-balanced rewriter for `<name>(<single-arg>)` calls. Walks
1315/// the source, finds `<name>` on a word boundary followed by `(`, balances
1316/// to the matching `)`, and replaces the whole call with `make_replacement`
1317/// applied to the captured argument text (verbatim, not trimmed).
1318fn rewrite_unary_call_balanced<F>(src: &str, name: &str, make_replacement: F) -> String
1319where
1320 F: Fn(&str) -> String,
1321{
1322 let bytes = src.as_bytes();
1323 let name_b = name.as_bytes();
1324 let mut out = String::with_capacity(src.len());
1325 let mut i = 0usize;
1326
1327 while i < bytes.len() {
1328 if i + name_b.len() < bytes.len()
1329 && &bytes[i..i + name_b.len()] == name_b
1330 && bytes[i + name_b.len()] == b'('
1331 && (i == 0 || !(bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_'))
1332 {
1333 let arg_start = i + name_b.len() + 1;
1334 let mut j = arg_start;
1335 let mut depth = 1i32;
1336 while j < bytes.len() {
1337 match bytes[j] {
1338 b'(' => depth += 1,
1339 b')' => {
1340 depth -= 1;
1341 if depth == 0 {
1342 break;
1343 }
1344 }
1345 _ => {}
1346 }
1347 j += 1;
1348 }
1349 if j < bytes.len() {
1350 out.push_str(&make_replacement(&src[arg_start..j]));
1351 i = j + 1;
1352 continue;
1353 }
1354 }
1355 out.push(bytes[i] as char);
1356 i += 1;
1357 }
1358
1359 out
1360}
1361
1362/// Rename `<from>(` → `<to>(` at every word boundary. Used to alias HLSL
1363/// builtins that WGSL spells differently (`sat` → `saturate`, `rsqrt` →
1364/// `inverseSqrt`). Differs from a plain `replace`: a preset local
1365/// `frsqrt = q1` won't pick up an unwanted `frinverseSqrt = q1` rewrite
1366/// because we require a non-identifier byte (or start of source) to the
1367/// left of the match.
1368fn rename_word_call(src: &str, from: &str, to: &str) -> String {
1369 let bytes = src.as_bytes();
1370 let from_b = from.as_bytes();
1371 let mut out = String::with_capacity(src.len());
1372 let mut i = 0usize;
1373 while i < bytes.len() {
1374 if i + from_b.len() < bytes.len()
1375 && &bytes[i..i + from_b.len()] == from_b
1376 && bytes[i + from_b.len()] == b'('
1377 && (i == 0 || !(bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_'))
1378 {
1379 out.push_str(to);
1380 out.push('(');
1381 i += from_b.len() + 1;
1382 continue;
1383 }
1384 out.push(bytes[i] as char);
1385 i += 1;
1386 }
1387 out
1388}
1389
1390/// `tex2Dbias(s, vec4(uv, mip, bias))` → `tex2D(s, uv)`.
1391/// Paren-balanced over both arguments. The bias component is dropped
1392/// (real presets use it cosmetically at 0 or near-0 — no visual delta).
1393fn rewrite_tex2dbias(src: &str) -> String {
1394 let bytes = src.as_bytes();
1395 let needle = b"tex2Dbias(";
1396 let mut out = String::with_capacity(src.len());
1397 let mut i = 0usize;
1398 while i < bytes.len() {
1399 let starts_here = i + needle.len() <= bytes.len() && &bytes[i..i + needle.len()] == needle;
1400 let word_boundary =
1401 i == 0 || !(bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_');
1402 if !(starts_here && word_boundary) {
1403 out.push(bytes[i] as char);
1404 i += 1;
1405 continue;
1406 }
1407 // Walk arg #1 (the sampler) up to the top-level comma.
1408 let mut j = i + needle.len();
1409 let mut depth = 1i32;
1410 let sampler_start = j;
1411 while j < bytes.len() {
1412 match bytes[j] {
1413 b'(' => depth += 1,
1414 b')' => depth -= 1,
1415 b',' if depth == 1 => break,
1416 _ => {}
1417 }
1418 j += 1;
1419 }
1420 if j >= bytes.len() || bytes[j] != b',' {
1421 out.push(bytes[i] as char);
1422 i += 1;
1423 continue;
1424 }
1425 let sampler = &src[sampler_start..j];
1426 // Walk arg #2 up to the closing `)` of the call.
1427 j += 1; // past the comma
1428 let arg2_start = j;
1429 while j < bytes.len() {
1430 match bytes[j] {
1431 b'(' => depth += 1,
1432 b')' => {
1433 depth -= 1;
1434 if depth == 0 {
1435 break;
1436 }
1437 }
1438 _ => {}
1439 }
1440 j += 1;
1441 }
1442 if j >= bytes.len() {
1443 out.push(bytes[i] as char);
1444 i += 1;
1445 continue;
1446 }
1447 let arg2 = src[arg2_start..j].trim();
1448 // Strip a vec4(/float4(/vec3(/float3( wrapper and keep its first
1449 // component (`uv` for the dominant `vec4(uv, mip, bias)` shape).
1450 let uv = strip_first_vec_component(arg2).unwrap_or_else(|| arg2.to_string());
1451 out.push_str("tex2D(");
1452 out.push_str(sampler.trim());
1453 out.push_str(", ");
1454 out.push_str(&uv);
1455 out.push(')');
1456 i = j + 1;
1457 }
1458 out
1459}
1460
1461/// For a string like `vec4(uv, 0, 0.1)` or `float3(uv, 0)`, return `uv` —
1462/// the slice up to the first top-level comma inside the constructor. Used
1463/// by [`rewrite_tex2dbias`] to drop the mip-bias arguments.
1464fn strip_first_vec_component(s: &str) -> Option<String> {
1465 let s = s.trim();
1466 let prefixes = ["vec4(", "vec3(", "float4(", "float3(", "float2(", "vec2("];
1467 for prefix in prefixes {
1468 if let Some(rest) = s.strip_prefix(prefix) {
1469 let rest = rest.strip_suffix(')')?;
1470 let bytes = rest.as_bytes();
1471 let mut depth = 0i32;
1472 let mut end = bytes.len();
1473 for (i, &b) in bytes.iter().enumerate() {
1474 match b {
1475 b'(' => depth += 1,
1476 b')' => depth -= 1,
1477 b',' if depth == 0 => {
1478 end = i;
1479 break;
1480 }
1481 _ => {}
1482 }
1483 }
1484 return Some(rest[..end].trim().to_string());
1485 }
1486 }
1487 None
1488}
1489
1490/// Rename WGSL-reserved keywords used by MD2 preset authors as locals
1491/// (`mod`, `filter`, `sample`). Every occurrence on a word boundary that
1492/// isn't immediately followed by `(` (a function call — already rewritten
1493/// by `rewrite_mod_balanced` or rejected upstream) gets a trailing `_`.
1494///
1495/// This stays consistent across one shader: a local `float mod = …;` and
1496/// its later references in `+ q22*sqrt(z)/4*mod *` both become `mod_`, so
1497/// the renamed variable still ties together correctly.
1498fn rename_reserved_identifiers(src: &str) -> String {
1499 const RESERVED: &[&str] = &["mod", "filter", "sample"];
1500 let bytes = src.as_bytes();
1501 let mut out = String::with_capacity(src.len() + 16);
1502 let mut i = 0usize;
1503 while i < bytes.len() {
1504 // Pass comments through unchanged so the renamer doesn't touch
1505 // text the user authored as documentation.
1506 if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'/' {
1507 while i < bytes.len() && bytes[i] != b'\n' {
1508 out.push(bytes[i] as char);
1509 i += 1;
1510 }
1511 continue;
1512 }
1513 if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
1514 let s = i;
1515 i += 2;
1516 while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
1517 i += 1;
1518 }
1519 if i + 1 < bytes.len() {
1520 i += 2;
1521 }
1522 out.push_str(&src[s..i]);
1523 continue;
1524 }
1525
1526 // Word-boundary check on the left.
1527 if i > 0 {
1528 let prev = bytes[i - 1];
1529 if prev.is_ascii_alphanumeric() || prev == b'_' {
1530 out.push(bytes[i] as char);
1531 i += 1;
1532 continue;
1533 }
1534 }
1535
1536 let mut matched = None;
1537 for &kw in RESERVED {
1538 let kb = kw.as_bytes();
1539 if i + kb.len() <= bytes.len()
1540 && &bytes[i..i + kb.len()] == kb
1541 && bytes
1542 .get(i + kb.len())
1543 .is_none_or(|c| !(c.is_ascii_alphanumeric() || *c == b'_'))
1544 {
1545 // If the next non-WS byte is `(`, it's a function call —
1546 // a `mod(…)` should have been rewritten already, but if
1547 // some upstream slipped through, we still leave it. Other
1548 // reserved names (`filter`, `sample`) are not function
1549 // calls in any user shader we've seen.
1550 let mut j = i + kb.len();
1551 while j < bytes.len() && bytes[j].is_ascii_whitespace() {
1552 j += 1;
1553 }
1554 if j < bytes.len() && bytes[j] == b'(' {
1555 continue;
1556 }
1557 matched = Some(kw);
1558 break;
1559 }
1560 }
1561
1562 if let Some(kw) = matched {
1563 out.push_str(kw);
1564 out.push('_');
1565 i += kw.len();
1566 continue;
1567 }
1568 out.push(bytes[i] as char);
1569 i += 1;
1570 }
1571 out
1572}
1573
1574/// `mod(a, b)` → `((a) - floor((a) / (b)) * (b))`. WGSL reserves `mod` as
1575/// a keyword; HLSL uses it as the float-modulo helper. The expansion
1576/// matches HLSL's semantics (and matches GLSL's `mod`) so behaviour stays
1577/// identical.
1578///
1579/// Paren-balanced on both arguments because real presets ship
1580/// `mod(ang*16/M_PI, 1.0)` and similar — the comma sits inside the call's
1581/// top-level depth.
1582fn rewrite_mod_balanced(src: &str) -> String {
1583 rewrite_binary_call_balanced(src, "mod", |a, b| {
1584 format!("(({a}) - floor(({a}) / ({b})) * ({b}))")
1585 })
1586}
1587
1588/// Generic paren-balanced rewriter for two-argument calls. Walks the
1589/// source, matches `<name>(` on a word boundary, finds the top-level
1590/// `,`, and replaces the whole call with the closure's output.
1591fn rewrite_binary_call_balanced<F>(src: &str, name: &str, make: F) -> String
1592where
1593 F: Fn(&str, &str) -> String,
1594{
1595 let bytes = src.as_bytes();
1596 let name_b = name.as_bytes();
1597 let mut out = String::with_capacity(src.len());
1598 let mut i = 0usize;
1599 while i < bytes.len() {
1600 let matched = i + name_b.len() < bytes.len()
1601 && &bytes[i..i + name_b.len()] == name_b
1602 && bytes[i + name_b.len()] == b'('
1603 && (i == 0 || !(bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_'));
1604 if !matched {
1605 out.push(bytes[i] as char);
1606 i += 1;
1607 continue;
1608 }
1609 let arg_start = i + name_b.len() + 1;
1610 let mut j = arg_start;
1611 let mut depth = 1i32;
1612 let mut split = None;
1613 while j < bytes.len() {
1614 match bytes[j] {
1615 b'(' => depth += 1,
1616 b')' => {
1617 depth -= 1;
1618 if depth == 0 {
1619 break;
1620 }
1621 }
1622 b',' if depth == 1 && split.is_none() => split = Some(j),
1623 _ => {}
1624 }
1625 j += 1;
1626 }
1627 if let Some(c) = split
1628 && j < bytes.len()
1629 {
1630 let a = src[arg_start..c].trim();
1631 let b = src[c + 1..j].trim();
1632 out.push_str(&make(a, b));
1633 i = j + 1;
1634 continue;
1635 }
1636 // Couldn't balance — fall through and emit the original byte.
1637 out.push(bytes[i] as char);
1638 i += 1;
1639 }
1640 out
1641}
1642
1643/// `mul(a, b)` → `(a) * (b)`. Paren-balanced on both arguments — needed
1644/// because real shaders write `mul(rotation_matrix(theta), uv)`. The
1645/// outermost `,` at depth 0 splits the two arguments.
1646fn rewrite_mul_balanced(src: &str) -> String {
1647 let bytes = src.as_bytes();
1648 let mut out = String::with_capacity(src.len());
1649 let mut i = 0usize;
1650
1651 while i < bytes.len() {
1652 if i + 4 <= bytes.len()
1653 && &bytes[i..i + 3] == b"mul"
1654 && bytes[i + 3] == b'('
1655 && (i == 0 || !(bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_'))
1656 {
1657 let arg_start = i + 4;
1658 let mut j = arg_start;
1659 let mut depth = 1i32;
1660 let mut split = None;
1661 while j < bytes.len() {
1662 match bytes[j] {
1663 b'(' => depth += 1,
1664 b')' => {
1665 depth -= 1;
1666 if depth == 0 {
1667 break;
1668 }
1669 }
1670 b',' if depth == 1 && split.is_none() => split = Some(j),
1671 _ => {}
1672 }
1673 j += 1;
1674 }
1675 if let Some(c) = split
1676 && j < bytes.len()
1677 {
1678 let a = src[arg_start..c].trim();
1679 let b = src[c + 1..j].trim();
1680 out.push_str(&format!("(({a}) * ({b}))"));
1681 i = j + 1;
1682 continue;
1683 }
1684 }
1685 out.push(bytes[i] as char);
1686 i += 1;
1687 }
1688
1689 out
1690}
1691
1692fn rewrite_local_declarations(code: &str) -> String {
1693 // Split inline `; TYPE` runs onto their own lines first, so the
1694 // line-anchored LOCAL_DECL_REGEX sees every decl. See the doc on
1695 // [`INLINE_DECL_SPLIT_REGEX`] for the motivating preset. The same
1696 // pass also detaches `{TYPE` glue at the head of function/conditional
1697 // bodies.
1698 let normalised = INLINE_DECL_SPLIT_REGEX
1699 .replace_all(code, "$1\n$2 $3")
1700 .to_string();
1701 LOCAL_DECL_REGEX
1702 .replace_all(&normalised, |caps: ®ex::Captures| {
1703 let indent = &caps[1];
1704 let ty = &caps[2];
1705 let decls = &caps[3];
1706
1707 // Bail when this is actually a function signature
1708 // (`vec3<f32> helper(args) { … }`). The regex greedily
1709 // swallows the body up to the first `;`; we detect the
1710 // function shape by spotting a `(` before any `=` and
1711 // return the match unchanged so [`lift_user_functions`] can
1712 // extract it intact later.
1713 if is_function_signature(decls) {
1714 return caps[0].to_string();
1715 }
1716
1717 let mut out = String::new();
1718 for (i, raw) in split_top_level_commas(decls).into_iter().enumerate() {
1719 let decl = raw.trim();
1720 if decl.is_empty() {
1721 continue;
1722 }
1723 if i > 0 {
1724 out.push('\n');
1725 out.push_str(indent);
1726 } else {
1727 out.push_str(indent);
1728 }
1729 // Each declarator is `name` or `name = init`. We split on the
1730 // first `=` so initialisers containing further `=` (rare,
1731 // mostly `a = b == c ? …` ternary which we don't otherwise
1732 // support) keep their tail intact.
1733 if let Some((name, init)) = decl.split_once('=') {
1734 let name = name.trim();
1735 let init = init.trim();
1736 out.push_str(&format!("var {name}: {ty} = {init};"));
1737 } else {
1738 out.push_str(&format!("var {decl}: {ty};"));
1739 }
1740 }
1741 out
1742 })
1743 .to_string()
1744}
1745
1746/// Detect "this `decls` capture is actually a function signature".
1747/// `decls` is the text the local-decl regex captured between the type
1748/// and the terminating `;`. A function signature shape is
1749/// `<ident>(...) { ... return ... ` with the `;` being the first
1750/// statement-terminator inside the body, but the giveaway sits at the
1751/// front: a `(` appears before any `=`. Variable declarations never put
1752/// `(` ahead of the initializer assignment (`var x = sin(0);` — `(`
1753/// follows `=`).
1754fn is_function_signature(decls: &str) -> bool {
1755 for c in decls.chars() {
1756 match c {
1757 '=' => return false,
1758 '(' => return true,
1759 _ => {}
1760 }
1761 }
1762 false
1763}
1764
1765/// Split a declarator list on top-level commas only — commas inside `()` or
1766/// `<>` (e.g. `vec3<f32>(0, 0, 0)`) must not split the declarator.
1767fn split_top_level_commas(s: &str) -> Vec<&str> {
1768 let bytes = s.as_bytes();
1769 let mut out = Vec::new();
1770 let mut depth_paren = 0i32;
1771 let mut depth_angle = 0i32;
1772 let mut start = 0usize;
1773 for (i, &b) in bytes.iter().enumerate() {
1774 match b {
1775 b'(' => depth_paren += 1,
1776 b')' => depth_paren -= 1,
1777 b'<' => depth_angle += 1,
1778 b'>' => depth_angle -= 1,
1779 b',' if depth_paren == 0 && depth_angle == 0 => {
1780 out.push(&s[start..i]);
1781 start = i + 1;
1782 }
1783 _ => {}
1784 }
1785 }
1786 out.push(&s[start..]);
1787 out
1788}
1789
1790fn strip_preprocessor(code: &str) -> String {
1791 // First, expand any `#define NAME REPLACEMENT` where REPLACEMENT is a
1792 // single identifier — common MD2 idiom for type/function aliases:
1793 // `#define MyGet GetPixel`, `#define sat saturate`, `#define vec3 float3`.
1794 // Without this expansion, `strip_preprocessor` drops the `#define` and
1795 // every later `MyGet(uv)` lands as `no definition in scope for
1796 // identifier: 'MyGet'`. We only expand the trivial single-token
1797 // replacement form — function-like macros (`#define FOO(x) bar(x,1)`)
1798 // aren't worth the parser complexity here.
1799 let expanded = expand_simple_defines(code);
1800 PREPROC_REGEX.replace_all(&expanded, "").to_string()
1801}
1802
1803/// Scan for lines of the form `#define IDENT IDENT` (whitespace-separated
1804/// single-token replacement) and substitute `from → to` everywhere else in
1805/// the source. Operates as a single pass: defines are collected first, then
1806/// applied to the rest of the source. Skips macros whose `to` looks like
1807/// anything other than a bare identifier so we don't accidentally inline
1808/// `#define K 0.5` (where the rest of the source has plain `K` in
1809/// arithmetic context — the existing fall-through preserves it as an
1810/// undefined-but-untouched identifier the user can spot in the error).
1811fn expand_simple_defines(code: &str) -> String {
1812 use regex::Regex;
1813 use std::sync::LazyLock;
1814 static DEF_RE: LazyLock<Regex> = LazyLock::new(|| {
1815 // Trailing `//comment` is common in the corpus
1816 // (`#define MyGet GetPixel //GetBlur1`) — allow it.
1817 Regex::new(
1818 r"(?m)^\s*#\s*define\s+([A-Za-z_][A-Za-z0-9_]*)\s+([A-Za-z_][A-Za-z0-9_]*)\s*(?://[^\n]*)?$",
1819 )
1820 .unwrap()
1821 });
1822 let pairs: Vec<(String, String)> = DEF_RE
1823 .captures_iter(code)
1824 .map(|c| (c[1].to_string(), c[2].to_string()))
1825 .collect();
1826 if pairs.is_empty() {
1827 return code.to_string();
1828 }
1829 let mut out = code.to_string();
1830 for (from, to) in &pairs {
1831 if from == to {
1832 continue;
1833 }
1834 let re = Regex::new(&format!(r"\b{}\b", regex::escape(from))).unwrap();
1835 out = re.replace_all(&out, to.as_str()).to_string();
1836 }
1837 out
1838}
1839
1840/// Comment out lines that look like prose (English) rather than
1841/// HLSL/WGSL code. Real preset pattern: ``comp_30=`written by martin``
1842/// — an attribution typed without a `//` prefix, threaded into the
1843/// shader body by the `.milk` parser as a literal line of code. Many
1844/// presets failed with `expected assignment or increment/decrement;
1845/// found 'by'` (or `found 'rota'`, etc.) on lines of this shape.
1846///
1847/// Heuristic: a non-empty line where every non-whitespace character is
1848/// either an ASCII letter or an apostrophe — no `;`, `=`, parens,
1849/// braces, brackets, arithmetic operators, digits, or comment markers —
1850/// and which contains at least two whitespace-separated word tokens. We
1851/// also require the first token *not* to look like a known WGSL keyword
1852/// or builtin call left-side (e.g. `for`, `if`, `var`, `return`, `tex2D`,
1853/// `mul`, …) so a line like `if cond` alone won't be commented out.
1854/// In practice prose lines always have ≥ 2 words and lack punctuation,
1855/// so the false-positive rate is effectively zero on the corpus.
1856fn comment_out_prose_lines(code: &str) -> String {
1857 let mut out = String::with_capacity(code.len() + 32);
1858 for line in code.split_inclusive('\n') {
1859 if looks_like_prose(line) {
1860 out.push_str("// ");
1861 out.push_str(line);
1862 } else {
1863 out.push_str(line);
1864 }
1865 }
1866 out
1867}
1868
1869fn looks_like_prose(line: &str) -> bool {
1870 let trimmed = line.trim_end_matches('\n').trim();
1871 if trimmed.is_empty() {
1872 return false;
1873 }
1874 // Already a comment? Leave it.
1875 if trimmed.starts_with("//") || trimmed.starts_with("/*") || trimmed.starts_with('*') {
1876 return false;
1877 }
1878 // Every non-space char must be an ASCII letter or apostrophe.
1879 let mut letters = 0usize;
1880 for c in trimmed.chars() {
1881 if c.is_ascii_whitespace() {
1882 continue;
1883 }
1884 if c.is_ascii_alphabetic() || c == '\'' {
1885 letters += 1;
1886 continue;
1887 }
1888 return false;
1889 }
1890 if letters < 2 {
1891 return false;
1892 }
1893 // A bare-identifier line (one word, letters only) is never a valid
1894 // HLSL/WGSL statement either — `END`/`EOF` markers and stray
1895 // identifiers fall in here. `KW` covers the few legitimate
1896 // single-word lines (`break;` etc. would have a `;` and bail above).
1897 // Bail on a leading single-word HLSL keyword that often precedes a
1898 // following identifier on the next line via continuation — keeps the
1899 // pass strictly conservative on real code.
1900 const KW: &[&str] = &[
1901 "for",
1902 "if",
1903 "else",
1904 "while",
1905 "do",
1906 "return",
1907 "break",
1908 "continue",
1909 "var",
1910 "let",
1911 "const",
1912 "static",
1913 "uniform",
1914 "extern",
1915 "sampler",
1916 "texture",
1917 "shader_body",
1918 "true",
1919 "false",
1920 "tex2D",
1921 "tex2d",
1922 "tex3D",
1923 "lerp",
1924 "mix",
1925 "saturate",
1926 "abs",
1927 "sin",
1928 "cos",
1929 "tan",
1930 "atan",
1931 "atan2",
1932 "pow",
1933 "sqrt",
1934 "log",
1935 "exp",
1936 "min",
1937 "max",
1938 "clamp",
1939 "length",
1940 "normalize",
1941 "dot",
1942 "cross",
1943 "mul",
1944 "step",
1945 "smoothstep",
1946 "frac",
1947 "fract",
1948 "floor",
1949 "ceil",
1950 "round",
1951 "sign",
1952 "any",
1953 "all",
1954 "not",
1955 "lum",
1956 "GetPixel",
1957 "GetMain",
1958 "GetBlur1",
1959 "GetBlur2",
1960 "GetBlur3",
1961 ];
1962 let first = trimmed.split_whitespace().next().unwrap();
1963 if KW.contains(&first) {
1964 return false;
1965 }
1966 true
1967}
1968
1969fn strip_sampler_declarations(code: &str) -> String {
1970 SAMPLER_DECL_REGEX.replace_all(code, "").to_string()
1971}
1972
1973fn strip_storage_class_qualifiers(code: &str) -> String {
1974 let no_storage = STORAGE_CLASS_REGEX.replace_all(code, "");
1975 CONST_TYPE_REGEX.replace_all(&no_storage, "$1").to_string()
1976}
1977
1978fn rewrite_postfix_inc_dec(code: &str) -> String {
1979 let inc = POSTFIX_INC_REGEX
1980 .replace_all(code, "$1 = $1 + 1$2")
1981 .to_string();
1982 POSTFIX_DEC_REGEX
1983 .replace_all(&inc, "$1 = $1 - 1$2")
1984 .to_string()
1985}
1986
1987/// Wrap single-statement `if`/`while`/`for` bodies in `{ ... }`. WGSL requires
1988/// braces on every conditional/loop body; HLSL doesn't, and a lot of MD2
1989/// preset code uses the brace-less form (`if (cond) ret.z -= 0.5;`).
1990///
1991/// We walk the source, find each `if` / `while` / `for` keyword, balance the
1992/// condition parens, and if the next non-whitespace character isn't `{`, we
1993/// wrap from there to the next top-level `;` (tracking parens to skip nested
1994/// calls) in braces.
1995fn brace_up_single_statement_blocks(src: &str) -> String {
1996 let bytes = src.as_bytes();
1997 let mut out = String::with_capacity(src.len() + 32);
1998 let mut i = 0usize;
1999
2000 while i < bytes.len() {
2001 let kw_len = match keyword_at(bytes, i) {
2002 Some(n) => n,
2003 None => {
2004 out.push(bytes[i] as char);
2005 i += 1;
2006 continue;
2007 }
2008 };
2009
2010 // Found a keyword. Copy it verbatim, then look for the `(` that opens
2011 // the condition.
2012 out.push_str(&src[i..i + kw_len]);
2013 i += kw_len;
2014
2015 // Skip whitespace between keyword and `(`.
2016 let mut j = i;
2017 while j < bytes.len() && bytes[j].is_ascii_whitespace() {
2018 j += 1;
2019 }
2020 if j >= bytes.len() || bytes[j] != b'(' {
2021 // Not a real condition — bare `for(int i=0;...)` would have
2022 // matched the `(` immediately, so this is some unrelated token.
2023 // Continue normally.
2024 out.push_str(&src[i..j]);
2025 i = j;
2026 continue;
2027 }
2028
2029 // Balance parens on the condition.
2030 let cond_start = j;
2031 let mut depth = 0i32;
2032 while j < bytes.len() {
2033 match bytes[j] {
2034 b'(' => depth += 1,
2035 b')' => {
2036 depth -= 1;
2037 if depth == 0 {
2038 j += 1;
2039 break;
2040 }
2041 }
2042 _ => {}
2043 }
2044 j += 1;
2045 }
2046 // Copy the whitespace + condition + `)` verbatim.
2047 out.push_str(&src[i..j]);
2048 i = j;
2049
2050 // Skip whitespace after `)`.
2051 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
2052 out.push(bytes[i] as char);
2053 i += 1;
2054 }
2055
2056 if i >= bytes.len() || bytes[i] == b'{' {
2057 // Already braced — nothing to do.
2058 continue;
2059 }
2060
2061 // Brace-less: scan to the next `;` at depth 0 (skipping inner parens
2062 // / brackets) and wrap.
2063 let body_start = i;
2064 let mut paren = 0i32;
2065 let mut bracket = 0i32;
2066 while i < bytes.len() {
2067 match bytes[i] {
2068 b'(' => paren += 1,
2069 b')' => paren -= 1,
2070 b'[' => bracket += 1,
2071 b']' => bracket -= 1,
2072 b';' if paren == 0 && bracket == 0 => {
2073 i += 1;
2074 break;
2075 }
2076 _ => {}
2077 }
2078 i += 1;
2079 }
2080
2081 out.push('{');
2082 out.push(' ');
2083 out.push_str(&src[body_start..i]);
2084 out.push_str(" }");
2085 // Force a `_unused` if cond_start was inside an else-if chain and we
2086 // need to suppress dead branches — not done here.
2087 let _ = cond_start;
2088 }
2089
2090 out
2091}
2092
2093/// Match `if`/`while`/`for` keywords on word boundaries; return their length.
2094fn keyword_at(bytes: &[u8], i: usize) -> Option<usize> {
2095 // Must be at a word boundary on the left.
2096 if i > 0 {
2097 let prev = bytes[i - 1];
2098 if prev.is_ascii_alphanumeric() || prev == b'_' {
2099 return None;
2100 }
2101 }
2102 for (kw, len) in [("if", 2usize), ("while", 5), ("for", 3)] {
2103 if i + len <= bytes.len() && &bytes[i..i + len] == kw.as_bytes() {
2104 // Right boundary: next char must be non-identifier.
2105 let next = bytes.get(i + len).copied();
2106 let is_id = next.is_some_and(|c| c.is_ascii_alphanumeric() || c == b'_');
2107 if !is_id {
2108 return Some(len);
2109 }
2110 }
2111 }
2112 None
2113}
2114
2115/// Rewrite `tex3D(<sampler>, <uvw>)` to a real 3D `textureSample` against
2116/// the noise-volume bindings.
2117///
2118/// An earlier implementation collapsed every `tex3D` to a 2D
2119/// `GetPixel(uvw.xy)` fallback — that kept the shader compiling but
2120/// produced wrong output for any preset that relied on noisevol's true
2121/// volumetric variance (many in-the-wild comps reference
2122/// `sampler_noisevol_hq`). Now that the renderer actually binds 3D noise
2123/// textures, the rewriter routes the known names onto them and only
2124/// falls back for unknown sampler names.
2125///
2126/// **Return type**: vec4. HLSL's `tex3D` returns float4 and presets often
2127/// store the result in `float4 noise2 = tex3D(...);` and read `.w` later —
2128/// `textureSample` of a `texture_3d<f32>` is already vec4.
2129fn rewrite_tex3d_calls(src: &str) -> String {
2130 let bytes = src.as_bytes();
2131 let mut out = String::with_capacity(src.len());
2132 let mut i = 0usize;
2133
2134 while i < bytes.len() {
2135 // Match `tex3D` on a word boundary.
2136 if i + 5 <= bytes.len() && &bytes[i..i + 5] == b"tex3D" {
2137 let prev_ok = i == 0 || !(bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_');
2138 let next_open = i + 5 < bytes.len() && bytes[i + 5] == b'(';
2139 if prev_ok && next_open {
2140 // Skip past `tex3D(`.
2141 let arg_start = i + 6;
2142 // Find the comma at depth 0 (separates sampler from uvw).
2143 let mut j = arg_start;
2144 let mut depth = 1i32;
2145 let mut comma = None;
2146 while j < bytes.len() && depth > 0 {
2147 match bytes[j] {
2148 b'(' => depth += 1,
2149 b')' => {
2150 depth -= 1;
2151 if depth == 0 {
2152 break;
2153 }
2154 }
2155 b',' if depth == 1 && comma.is_none() => comma = Some(j),
2156 _ => {}
2157 }
2158 j += 1;
2159 }
2160 if let Some(c) = comma
2161 && j < bytes.len()
2162 {
2163 let sampler = src[arg_start..c].trim();
2164 let uvw = src[c + 1..j].trim();
2165 let replacement = match sampler {
2166 s if s.ends_with("noisevol_lq") => format!(
2167 "textureSample(sampler_noisevol_lq_texture, {}, {uvw})",
2168 noise_sampler_for(s)
2169 ),
2170 s if s.ends_with("noisevol_hq") => format!(
2171 "textureSample(sampler_noisevol_hq_texture, {}, {uvw})",
2172 noise_sampler_for(s)
2173 ),
2174 _ => {
2175 // Unknown 3D sampler — keep the 2D fallback so
2176 // the wrapper compiles instead of failing.
2177 format!(
2178 "vec4<f32>(GetPixel(({uvw}).xy), 1.0) /*was: tex3D({sampler})*/"
2179 )
2180 }
2181 };
2182 out.push_str(&replacement);
2183 i = j + 1;
2184 continue;
2185 }
2186 }
2187 }
2188 out.push(bytes[i] as char);
2189 i += 1;
2190 }
2191
2192 out
2193}
2194
2195fn replace_semantics(code: &str) -> String {
2196 SEMANTICS_REGEX.replace_all(code, "").to_string()
2197}
2198
2199/// Walk the source for `var <NAME>: <TYPE> [= INIT];` declarations; the
2200/// first time a `NAME` appears *in the current scope*, keep it; every later
2201/// declaration of the same `NAME` in the same scope becomes a plain
2202/// assignment (`NAME = INIT;`) or — if it had no initialiser — is dropped
2203/// entirely.
2204///
2205/// HLSL allows `float3 ret1 = ret1;` to shadow a previous declaration
2206/// (or just redundantly redeclare it); WGSL rejects with
2207/// `redefinition of ret1`. The presets that hit this are typically
2208/// older MD2 user-shader idioms.
2209///
2210/// Scope tracking matters here. A flat `seen` set is wrong: a global
2211/// `var tmp: f32;` plus a function-local `var tmp: f32;` inside a helper
2212/// would drop the inner decl while the helper's body still referenced
2213/// `tmp`, which then failed naga as `no definition in scope for
2214/// identifier: tmp`. We push a fresh `seen` set on every `{` and pop
2215/// on the matching `}`, so each scope dedups independently.
2216fn dedup_var_declarations(src: &str) -> String {
2217 use std::collections::HashMap;
2218 static VAR_DECL: LazyLock<Regex> = LazyLock::new(|| {
2219 // Anchor relaxed from `(?m)^\s*` to `\s*` and matched by hand
2220 // below: we still want declarations on their own line in practice,
2221 // but the scope walk needs to align byte positions with brace
2222 // boundaries, which `replace_all`'s line-anchored variant doesn't
2223 // expose. The same shape is matched, just without `^`.
2224 // Capture group 4 is the *type* (everything between `:` and the
2225 // optional `=`/`;`); a second declaration of the same name with a
2226 // different type is a deliberate shadow and must NOT be dropped.
2227 Regex::new(
2228 r"(\n[ \t]*|\A[ \t]*)(var|let)\s+([A-Za-z_][A-Za-z0-9_]*)\s*:\s*([^=;]+?)\s*(=\s*[^;]+)?;",
2229 )
2230 .unwrap()
2231 });
2232
2233 // Pre-compute scope boundaries by walking the source once and recording
2234 // `{` and `}` byte positions (ignoring those inside string/char literals
2235 // — none occur in MD2 user shaders — and inside line/block comments).
2236 let bytes = src.as_bytes();
2237 let mut depth: i32 = 0;
2238 let mut depth_at: Vec<i32> = Vec::with_capacity(bytes.len() + 1);
2239 depth_at.push(0);
2240 let mut i = 0usize;
2241 while i < bytes.len() {
2242 // Skip line comments verbatim — their `{`/`}` shouldn't count.
2243 if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'/' {
2244 while i < bytes.len() && bytes[i] != b'\n' {
2245 depth_at.push(depth);
2246 i += 1;
2247 }
2248 continue;
2249 }
2250 if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
2251 while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
2252 depth_at.push(depth);
2253 i += 1;
2254 }
2255 if i + 1 < bytes.len() {
2256 depth_at.push(depth);
2257 depth_at.push(depth);
2258 i += 2;
2259 }
2260 continue;
2261 }
2262 match bytes[i] {
2263 b'{' => depth += 1,
2264 b'}' => depth -= 1,
2265 _ => {}
2266 }
2267 depth_at.push(depth);
2268 i += 1;
2269 }
2270
2271 // Map of scope-depth → (name → declared-type). Each scope dedups
2272 // independently; when we exit a scope (depth dips), that scope's
2273 // record is wiped so a sibling block opening at the same depth
2274 // starts fresh. Storing the type lets the dedup distinguish a true
2275 // redundant re-declaration (same name, same type — drop the second
2276 // `var`) from a deliberate shadow (same name, different type —
2277 // keep both so the local-scope intent survives).
2278 let mut seen_by_depth: HashMap<i32, HashMap<String, String>> = HashMap::new();
2279 let mut last_pos: usize = 0;
2280
2281 VAR_DECL
2282 .replace_all(src, |caps: ®ex::Captures| {
2283 let prefix = &caps[1];
2284 let name = &caps[3];
2285 let ty = caps[4].trim();
2286 let init = caps.get(5).map(|m| m.as_str()).unwrap_or("");
2287 let start = caps.get(0).unwrap().start();
2288 // Sweep depth_at from the previous match to here; clear the
2289 // record of every depth we exited (depth dipped below it).
2290 // This handles sibling scopes — closing one function's `}`
2291 // resets that depth before the next function's `var`s arrive.
2292 let mut min_in_window = i32::MAX;
2293 for d in &depth_at[last_pos..=start.min(depth_at.len() - 1)] {
2294 if *d < min_in_window {
2295 min_in_window = *d;
2296 }
2297 }
2298 let cur_depth = depth_at.get(start).copied().unwrap_or(0).max(0);
2299 // Any depth strictly above the lowest point we passed through
2300 // had its scope closed at some point in the window — wipe
2301 // their records.
2302 if min_in_window < i32::MAX {
2303 seen_by_depth.retain(|d, _| *d <= min_in_window);
2304 }
2305 last_pos = caps.get(0).unwrap().end();
2306 let seen = seen_by_depth.entry(cur_depth).or_default();
2307 match seen.get(name) {
2308 Some(prev_ty) if prev_ty == ty => {
2309 // Exact duplicate at the same scope — strip `var`.
2310 let init_trim = init.trim_start_matches('=').trim();
2311 if init_trim.is_empty() {
2312 format!("{prefix}/* dropped redundant var {name} */")
2313 } else {
2314 format!("{prefix}{name} = {init_trim};")
2315 }
2316 }
2317 _ => {
2318 // First sighting OR same name with a different type
2319 // (deliberate shadow). Keep the declaration; record
2320 // the latest type so a third same-type repeat would
2321 // still be deduped.
2322 seen.insert(name.to_string(), ty.to_string());
2323 caps[0].to_string()
2324 }
2325 }
2326 })
2327 .to_string()
2328}
2329
2330/// HLSL allows comma-as-statement-separator at the top of a function body:
2331///
2332/// ```hlsl
2333/// ret += saturate(...),
2334/// ret += saturate(...),
2335/// ret = lerp(ret, blurs, t);
2336/// ```
2337///
2338/// WGSL requires `;` between statements; a stray comma triggers
2339/// `expected ;`, found ','`. At this point in the pipeline every multi-
2340/// name `var` declaration has already been split into one `var` per name
2341/// (see [`rewrite_local_declarations`]), so a comma at paren/bracket-
2342/// depth 0 is unambiguously a statement separator. Rewrite to `;`.
2343///
2344/// Comments are skipped so a `,` inside `/* */` doesn't confuse anyone.
2345fn replace_statement_commas(src: &str) -> String {
2346 let bytes = src.as_bytes();
2347 let mut out = String::with_capacity(src.len());
2348 let mut paren = 0i32;
2349 let mut bracket = 0i32;
2350 let mut angle = 0i32;
2351 // Stack of in-flight `{` kinds: `true` = init-list (followed `=`),
2352 // `false` = code block. Commas inside an init-list are constructor
2353 // separators, not statement separators — converting them to `;`
2354 // corrupts shapes like `float2x2 rot = { cos(q9), sin(q9), … };` into
2355 // `mat2x2<f32>(cos(q9); sin(q9); …)` and trips the downstream parser
2356 // on the first non-statement token (the matrix name `rot`).
2357 let mut brace_stack: Vec<bool> = Vec::new();
2358 let mut last_sig: u8 = 0;
2359 let mut i = 0usize;
2360 while i < bytes.len() {
2361 // Pass comments verbatim.
2362 if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'/' {
2363 while i < bytes.len() && bytes[i] != b'\n' {
2364 out.push(bytes[i] as char);
2365 i += 1;
2366 }
2367 continue;
2368 }
2369 if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
2370 let s = i;
2371 i += 2;
2372 while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
2373 i += 1;
2374 }
2375 if i + 1 < bytes.len() {
2376 i += 2;
2377 }
2378 out.push_str(&src[s..i]);
2379 continue;
2380 }
2381 let c = bytes[i];
2382 let in_init_list = brace_stack.last().copied().unwrap_or(false);
2383 match c {
2384 b'(' => paren += 1,
2385 b')' => paren -= 1,
2386 b'[' => bracket += 1,
2387 b']' => bracket -= 1,
2388 b'<' => angle += 1,
2389 b'>' => angle -= 1,
2390 b'{' => {
2391 let is_init = last_sig == b'=' || (in_init_list && last_sig == b'{');
2392 brace_stack.push(is_init);
2393 }
2394 b'}' => {
2395 brace_stack.pop();
2396 }
2397 b',' if paren == 0 && bracket == 0 && angle == 0 && !in_init_list => {
2398 // Statement separator — emit `;`.
2399 out.push(';');
2400 i += 1;
2401 last_sig = b';';
2402 continue;
2403 }
2404 _ => {}
2405 }
2406 if !c.is_ascii_whitespace() {
2407 last_sig = c;
2408 }
2409 out.push(c as char);
2410 i += 1;
2411 }
2412 out
2413}
2414
2415#[cfg(test)]
2416mod tests {
2417 use super::*;
2418 use crate::texture_plan::decompose_sampler_name;
2419
2420 #[test]
2421 fn reassigned_param_is_shadowed_with_var() {
2422 // HLSL value-parameters are mutable; WGSL params are immutable.
2423 // The lift pass must rename the param and prepend a shadowing var.
2424 let hlsl = "float2 PutDist(float x) { float tmp; x = pow(x, 2.0); \
2425 tmp = 1 - x; return float2(tmp, x); } \
2426 shader_body { ret = float3(PutDist(0.5), 0); }";
2427 let wgsl = translate_shader(hlsl).unwrap();
2428 // Renamed param in signature
2429 assert!(
2430 wgsl.contains("x_md2arg: f32"),
2431 "expected renamed param, got:\n{wgsl}"
2432 );
2433 // Shadowing var at body entry
2434 assert!(
2435 wgsl.contains("var x: f32 = x_md2arg"),
2436 "expected shadow var, got:\n{wgsl}"
2437 );
2438 }
2439
2440 #[test]
2441 fn unreassigned_param_left_alone() {
2442 // A param that's only read should pass through unchanged — no
2443 // unnecessary shadowing.
2444 let hlsl = "float2 Pass(float x) { return float2(x, x*2); } \
2445 shader_body { ret = float3(Pass(0.5), 0); }";
2446 let wgsl = translate_shader(hlsl).unwrap();
2447 assert!(
2448 wgsl.contains("fn Pass(x: f32) -> vec2<f32>"),
2449 "expected unchanged signature, got:\n{wgsl}"
2450 );
2451 assert!(
2452 !wgsl.contains("x_md2arg"),
2453 "should not rename a read-only param, got:\n{wgsl}"
2454 );
2455 }
2456
2457 #[test]
2458 fn param_written_via_swizzle_is_shadowed() {
2459 // `uv.x = …` is a write through swizzle — also requires var.
2460 let hlsl = "float2 Tweak(float2 uvi) { uvi.x = uvi.x * 2; return uvi; } \
2461 shader_body { ret = float3(Tweak(uv), 0); }";
2462 let wgsl = translate_shader(hlsl).unwrap();
2463 assert!(
2464 wgsl.contains("uvi_md2arg: vec2<f32>"),
2465 "expected renamed param, got:\n{wgsl}"
2466 );
2467 assert!(
2468 wgsl.contains("var uvi: vec2<f32> = uvi_md2arg"),
2469 "expected shadow var, got:\n{wgsl}"
2470 );
2471 }
2472
2473 #[test]
2474 fn type_replacement() {
2475 let hlsl = "float4 color = float4(1.0, 0.0, 0.0, 1.0);";
2476 let wgsl = translate_shader(hlsl).unwrap();
2477 assert!(wgsl.contains("vec4<f32>"));
2478 }
2479
2480 #[test]
2481 fn function_replacement() {
2482 let hlsl = "color = lerp(a, b, t);";
2483 let wgsl = translate_shader(hlsl).unwrap();
2484 assert!(wgsl.contains("mix"));
2485 }
2486
2487 #[test]
2488 fn saturate_replacement() {
2489 let hlsl = "color = saturate(color);";
2490 let wgsl = translate_shader(hlsl).unwrap();
2491 assert!(wgsl.contains("clamp"));
2492 }
2493
2494 #[test]
2495 fn texture_sampling_routes_through_main_sampler() {
2496 let hlsl = "color = tex2D(sampler_main, uv);";
2497 let wgsl = translate_shader(hlsl).unwrap();
2498 // The codegen wrapper exposes `sampler_main_texture` + `sampler_main`,
2499 // so every tex2D call must land on those two bindings.
2500 assert!(
2501 wgsl.contains("textureSample(sampler_main_texture, sampler_main, uv)"),
2502 "got: {wgsl}"
2503 );
2504 }
2505
2506 #[test]
2507 fn texture_sampling_unknown_samplers_still_fall_back() {
2508 // User-loaded textures and arbitrary preset sampler names that the
2509 // wrapper doesn't recognise must keep producing valid WGSL. The
2510 // translator routes them through `sampler_main` and tags the
2511 // origin name in a comment for debugging.
2512 let hlsl = "color = tex2D(sampler_clouds, uv);";
2513 let wgsl = translate_shader(hlsl).unwrap();
2514 assert!(
2515 wgsl.contains("textureSample(sampler_main_texture, sampler_main, uv)"),
2516 "got: {wgsl}"
2517 );
2518 assert!(wgsl.contains("/*was: sampler_clouds*/"));
2519 }
2520
2521 #[test]
2522 fn fw_fc_pw_pc_main_route_to_matching_sampler() {
2523 // The four MD2 sampler variants of `main` keep the main texture but
2524 // pick a different sampler binding. The translator no longer
2525 // collapses them to the plain `sampler_main` — they hit
2526 // `sampler_fw`/`_fc`/`_pw`/`_pc` directly.
2527 for (name, expected_sampler) in [
2528 ("sampler_fw_main", "sampler_fw"),
2529 ("sampler_fc_main", "sampler_fc"),
2530 ("sampler_pw_main", "sampler_pw"),
2531 ("sampler_pc_main", "sampler_pc"),
2532 ] {
2533 let hlsl = format!("color = tex2D({name}, uv);");
2534 let wgsl = translate_shader(&hlsl).unwrap();
2535 let needle = format!("textureSample(sampler_main_texture, {expected_sampler}, uv)");
2536 assert!(wgsl.contains(&needle), "for {name}, got: {wgsl}");
2537 // Recognised bindings don't emit the `/*was: …*/` debug
2538 // comment — keeps the translated WGSL tidy.
2539 assert!(
2540 !wgsl.contains(&format!("/*was: {name}*/")),
2541 "recognised variant should not carry a fallback comment"
2542 );
2543 }
2544 }
2545
2546 #[test]
2547 fn noise_2d_samplers_route_to_dedicated_textures() {
2548 for (sampler, expected_tex) in [
2549 ("sampler_noise_lq", "sampler_noise_lq_texture"),
2550 ("sampler_noise_mq", "sampler_noise_mq_texture"),
2551 ("sampler_noise_hq", "sampler_noise_hq_texture"),
2552 // Prefixed variants pick the same texture but a different
2553 // sampler — covered by `noise_2d_prefixed_picks_sampler`.
2554 ("sampler_pw_noise_lq", "sampler_noise_lq_texture"),
2555 ("sampler_fw_noise_hq", "sampler_noise_hq_texture"),
2556 ] {
2557 let hlsl = format!("color = tex2D({sampler}, uv);");
2558 let wgsl = translate_shader(&hlsl).unwrap();
2559 assert!(
2560 wgsl.contains(&format!("textureSample({expected_tex},")),
2561 "for {sampler}, got: {wgsl}"
2562 );
2563 }
2564 }
2565
2566 #[test]
2567 fn noise_2d_prefixed_picks_sampler() {
2568 let wgsl = translate_shader("color = tex2D(sampler_pw_noise_lq, uv);").unwrap();
2569 assert!(
2570 wgsl.contains("textureSample(sampler_noise_lq_texture, sampler_pw, uv)"),
2571 "got: {wgsl}"
2572 );
2573 }
2574
2575 #[test]
2576 fn tex3d_noisevol_samplers_hit_3d_texture() {
2577 // Real MD2 pattern: `tex3D(sampler_noisevol_hq, uvw)`. An earlier
2578 // implementation degraded this to a 2D GetPixel call; now it hits
2579 // the real texture_3d binding.
2580 let wgsl =
2581 translate_shader("color = tex3D(sampler_noisevol_hq, vec3<f32>(uv, time));").unwrap();
2582 assert!(
2583 wgsl.contains("textureSample(sampler_noisevol_hq_texture,"),
2584 "got: {wgsl}"
2585 );
2586 // No `GetPixel(...)` fallback should remain in the translated body.
2587 assert!(
2588 !wgsl.contains("GetPixel((vec3<f32>(uv, time)).xy)"),
2589 "fallback path should be gone: {wgsl}"
2590 );
2591 }
2592
2593 #[test]
2594 fn tex3d_unknown_sampler_keeps_fallback() {
2595 // An unrecognised tex3D sampler still degrades to the 2D fallback
2596 // so the wrapper compiles even when the author references a
2597 // texture we don't bind.
2598 let wgsl = translate_shader("color = tex3D(sampler_user_volume, uvw);").unwrap();
2599 assert!(
2600 wgsl.contains("GetPixel((uvw).xy)"),
2601 "expected fallback, got: {wgsl}"
2602 );
2603 }
2604
2605 #[test]
2606 fn shader_body_wrapper_is_stripped() {
2607 let hlsl = "shader_body\n{\n ret = float3(1, 0, 0);\n}\n";
2608 let out = strip_shader_body_wrapper(hlsl);
2609 assert!(!out.contains("shader_body"), "wrapper not stripped: {out}");
2610 assert!(out.contains("ret = float3(1, 0, 0)"));
2611 }
2612
2613 #[test]
2614 fn shader_body_with_inner_braces_balances() {
2615 // A nested block (e.g. an `if`) must not cut the wrapper off early.
2616 let hlsl = r#"shader_body
2617{
2618 if (a > 0) {
2619 ret = float3(1, 0, 0);
2620 }
2621 ret *= 0.5;
2622}"#;
2623 let out = strip_shader_body_wrapper(hlsl);
2624 assert!(!out.contains("shader_body"));
2625 assert!(out.contains("ret *= 0.5"));
2626 }
2627
2628 #[test]
2629 fn shader_body_with_brace_in_comment_balances() {
2630 let hlsl = r#"shader_body
2631{
2632 // closing } in a comment must not end the body
2633 ret = float3(1);
2634}"#;
2635 let out = strip_shader_body_wrapper(hlsl);
2636 assert!(!out.contains("shader_body"));
2637 assert!(out.contains("ret = float3(1)"));
2638 }
2639
2640 #[test]
2641 fn no_shader_body_wrapper_is_passthrough() {
2642 let hlsl = "ret = float3(1, 0, 0);";
2643 let out = strip_shader_body_wrapper(hlsl);
2644 assert_eq!(out, hlsl);
2645 }
2646
2647 #[test]
2648 fn local_declaration_with_init_becomes_var() {
2649 let hlsl = "shader_body { float gx1 = a + b * c; ret = gx1.xxx; }";
2650 let wgsl = translate_shader(hlsl).unwrap();
2651 assert!(
2652 wgsl.contains("var gx1: f32 = a + b * c"),
2653 "expected var-form local, got: {wgsl}"
2654 );
2655 }
2656
2657 #[test]
2658 fn local_declaration_without_init_becomes_var() {
2659 // Real preset pattern: declare a vec2, then assign to it on the next
2660 // few lines (for incrementally building offset UVs). The rewriter is
2661 // line-anchored — preset bodies span multiple lines, which is the
2662 // shape it's optimised for.
2663 let hlsl = " vec2<f32> uv2;\n uv2 = uv;\n";
2664 // Note: `vec2<f32>` is what `replace_types` emits — here we feed it
2665 // post-substitution to assert the rewriter handles WGSL-shaped types.
2666 let wgsl = rewrite_local_declarations(hlsl);
2667 assert!(wgsl.contains("var uv2: vec2<f32>;"), "got: {wgsl}");
2668 }
2669
2670 #[test]
2671 fn local_declaration_multi_name_expands() {
2672 let hlsl = " vec3<f32> ret1, neu, crisp, blur;\n";
2673 let wgsl = rewrite_local_declarations(hlsl);
2674 // Each name becomes its own var statement.
2675 assert!(wgsl.contains("var ret1: vec3<f32>;"), "got: {wgsl}");
2676 assert!(wgsl.contains("var neu: vec3<f32>;"));
2677 assert!(wgsl.contains("var crisp: vec3<f32>;"));
2678 assert!(wgsl.contains("var blur: vec3<f32>;"));
2679 }
2680
2681 #[test]
2682 fn inline_multi_decl_on_single_line_splits() {
2683 // Isosceles preset's kaleidoscope state line. Without the
2684 // pre-split, only `cntr` would convert; `sin`, `cos`, `scale`
2685 // would stay as raw HLSL and trip naga.
2686 //
2687 // Top-level globals (`cntr`, `scale`) get hoisted to module-
2688 // scope `var<private>` so lifted user functions can read them.
2689 // `sin` / `cos` are kept as fs_main locals (they shadow WGSL
2690 // builtins; hoisting would break every other site that calls
2691 // `sin(x)` as a function).
2692 let hlsl =
2693 "float2 cntr = float2(q13,q14); float sin = q11; float cos = q12; float scale = q15;";
2694 let wgsl = translate_shader(hlsl).unwrap();
2695 assert!(
2696 wgsl.contains("var<private> cntr: vec2<f32>;"),
2697 "got: {wgsl}"
2698 );
2699 assert!(wgsl.contains("cntr = vec2<f32>(q13,q14);"), "got: {wgsl}");
2700 assert!(wgsl.contains("var sin: f32 = q11;"), "got: {wgsl}");
2701 assert!(wgsl.contains("var cos: f32 = q12;"), "got: {wgsl}");
2702 assert!(wgsl.contains("var<private> scale: f32;"), "got: {wgsl}");
2703 assert!(wgsl.contains("scale = q15;"), "got: {wgsl}");
2704 }
2705
2706 #[test]
2707 fn inline_split_handles_vec_types_after_prior_decl() {
2708 // Regression guard — a naive `\b` boundary in
2709 // `INLINE_DECL_SPLIT_REGEX` would fail after `>` (already
2710 // non-word), so a `vec2<f32>` decl following any other
2711 // `;`-terminated decl on the same line would stay un-rewritten.
2712 // Real preset shape lifted from `MilkDrop2077.1040.milk` (the
2713 // `shadow` helper):
2714 let hlsl = "f32 dark; vec2<f32> uvc, dx;";
2715 let wgsl = rewrite_local_declarations(hlsl);
2716 assert!(wgsl.contains("var dark: f32;"), "got: {wgsl}");
2717 assert!(wgsl.contains("var uvc: vec2<f32>;"), "got: {wgsl}");
2718 assert!(wgsl.contains("var dx: vec2<f32>;"), "got: {wgsl}");
2719 }
2720
2721 #[test]
2722 fn inline_split_detaches_open_brace_glue() {
2723 // Regression guard — `{TYPE` glued at the head of a function or
2724 // conditional body wasn't split by a `;`-only separator. The
2725 // first inner decl therefore stayed attached to the brace and
2726 // `LOCAL_DECL_REGEX` (anchored at the start of a line) skipped
2727 // it. Real preset shape from `MilkDrop2077.1040.milk`'s
2728 // `MinDistB`:
2729 let hlsl = "f32 MinDistB(uvi: vec2<f32>) {f32 tmp; vec4<f32> nb;}";
2730 let wgsl = rewrite_local_declarations(hlsl);
2731 assert!(wgsl.contains("var tmp: f32;"), "got: {wgsl}");
2732 assert!(wgsl.contains("var nb: vec4<f32>;"), "got: {wgsl}");
2733 }
2734
2735 #[test]
2736 fn inline_split_skips_vec_constructor_call() {
2737 // The split must NOT fire on `; vec3<f32>(0, 0, 0)` — that's a
2738 // constructor expression, not a declaration. The regex requires
2739 // an identifier char after the type+space, so a following `(`
2740 // (constructor) is left alone.
2741 let hlsl = "ret = vec3<f32>(0.0); vec3<f32>(1.0);";
2742 let wgsl = rewrite_local_declarations(hlsl);
2743 // No newline injected before the bare constructor — the original
2744 // `;` and ` vec3<f32>(1.0);` stay on one line.
2745 assert!(!wgsl.contains(";\nvec3<f32>(1.0)"), "got: {wgsl}");
2746 }
2747
2748 #[test]
2749 fn for_loop_init_semi_does_not_split_inline_decls() {
2750 // The inline-decl splitter must not fire on the `;` inside a
2751 // `for(...)` init/cond/step. Standard for-loop shape has the
2752 // first `;` followed by an expression, never by a type keyword.
2753 // The for-int rewrite has already turned `int` into `var i: i32
2754 // =` shape by this stage, so we feed the post-rewrite shape.
2755 let hlsl = "for(var i: i32 = 0; i < 10; i = i + 1) { ret = vec3<f32>(0); }";
2756 let wgsl = rewrite_local_declarations(hlsl);
2757 // No newline injected after the first `;` (the loop condition
2758 // separator) — the `i < 10` test stays intact.
2759 assert!(wgsl.contains("i < 10"), "got: {wgsl}");
2760 assert!(
2761 !wgsl.contains(";\ni < 10"),
2762 "for-loop `;` got wrongly split: {wgsl}"
2763 );
2764 }
2765
2766 #[test]
2767 fn local_declaration_i32_loop_counter() {
2768 // `int anz = 3;` is widened to `f32` by `replace_types` (MD2 uses
2769 // ints interchangeably with floats), so the rewrite produces a
2770 // float var. At top level (no enclosing `shader_body`), it parses
2771 // as `Item::GlobalVar` and the hoist pass splits it into a
2772 // module-scope `var<private>` plus an in-body assignment.
2773 let hlsl = " int anz = 3;\n";
2774 let wgsl = translate_shader(hlsl).unwrap();
2775 assert!(wgsl.contains("var<private> anz: f32;"), "got: {wgsl}");
2776 assert!(wgsl.contains("anz = 3;"), "got: {wgsl}");
2777 }
2778
2779 #[test]
2780 fn sampler_declaration_is_stripped() {
2781 let hlsl = "sampler sampler_pw_noise_lq;\nret = float3(1);\n";
2782 let out = strip_sampler_declarations(hlsl);
2783 assert!(!out.contains("sampler_pw_noise_lq"));
2784 assert!(out.contains("ret = float3(1)"));
2785 }
2786
2787 #[test]
2788 fn preprocessor_directives_are_stripped() {
2789 let hlsl = "#define M_PI 3.14159\n#include <something>\nret = float3(1);";
2790 let out = strip_preprocessor(hlsl);
2791 assert!(!out.contains("#define"));
2792 assert!(!out.contains("#include"));
2793 assert!(out.contains("ret = float3(1)"));
2794 }
2795
2796 #[test]
2797 fn brace_up_single_statement_if() {
2798 let src = "if (a > 0) ret = 1.0;";
2799 let out = brace_up_single_statement_blocks(src);
2800 assert!(out.contains("if (a > 0) {"), "got: {out}");
2801 assert!(out.contains("ret = 1.0;"));
2802 assert!(out.ends_with("}"));
2803 }
2804
2805 #[test]
2806 fn brace_up_leaves_already_braced_alone() {
2807 let src = "if (a > 0) { ret = 1.0; }";
2808 let out = brace_up_single_statement_blocks(src);
2809 assert_eq!(out, src);
2810 }
2811
2812 #[test]
2813 fn brace_up_skips_keyword_inside_identifier() {
2814 // `notif` and `forevermore` must not be treated as if/for keywords.
2815 let src = "var notif: f32 = 0;";
2816 let out = brace_up_single_statement_blocks(src);
2817 assert_eq!(out, src);
2818 }
2819
2820 #[test]
2821 fn brace_up_handles_nested_parens_in_condition() {
2822 let src = "if (max(a, b) > 0) ret.z -= 0.5;";
2823 let out = brace_up_single_statement_blocks(src);
2824 assert!(out.contains("if (max(a, b) > 0) {"), "got: {out}");
2825 }
2826
2827 #[test]
2828 fn brace_up_handles_function_call_in_body() {
2829 // The `;` we wrap to is the end-of-statement, not the one inside
2830 // `mix(a, b);`.
2831 let src = "if (a > 0) ret = mix(a, b, 0.5);";
2832 let out = brace_up_single_statement_blocks(src);
2833 assert!(out.contains("ret = mix(a, b, 0.5);"));
2834 assert!(out.ends_with("}"));
2835 }
2836
2837 #[test]
2838 fn tex3d_known_noisevol_hits_3d_binding() {
2839 // The pipeline binds the volume noise for real, so
2840 // `tex3D(sampler_noisevol_hq, …)` resolves to a `textureSample`
2841 // against the 3D texture instead of the legacy `GetPixel(uvw.xy)`
2842 // fallback.
2843 let src = "ret = tex3D(sampler_noisevol_hq, vec3<f32>(uv, time));";
2844 let out = rewrite_tex3d_calls(src);
2845 assert!(
2846 out.contains(
2847 "textureSample(sampler_noisevol_hq_texture, sampler_pw, vec3<f32>(uv, time))"
2848 ),
2849 "got: {out}"
2850 );
2851 }
2852
2853 #[test]
2854 fn lerp_with_whitespace_is_normalised_and_rewritten() {
2855 // Real preset pattern: a multi-line `lerp (\n a,\n b,\n t)` call
2856 // becomes `mix(...)` thanks to call-whitespace normalisation.
2857 let hlsl = "ret = lerp (a, b, 0.5);";
2858 let wgsl = translate_shader(hlsl).unwrap();
2859 assert!(wgsl.contains("mix(a, b, 0.5)"), "got: {wgsl}");
2860 assert!(!wgsl.contains("lerp"));
2861 }
2862
2863 #[test]
2864 fn float2x2_becomes_mat2x2() {
2865 // HLSL float2x2 is a 2×2 matrix; the previous code only handled
2866 // 3×3 and 4×4 so `float2x2` got mangled by the `float2` rewrite
2867 // into `vec2<f32>x2` (which then failed naga's parser).
2868 let hlsl = "var m: f32 = 0; uv1 = mul(float2x2(q9,q10,-q10,q9), uv1);";
2869 let wgsl = translate_shader(hlsl).unwrap();
2870 assert!(wgsl.contains("mat2x2<f32>"), "got: {wgsl}");
2871 assert!(!wgsl.contains("vec2<f32>x2"));
2872 }
2873
2874 #[test]
2875 fn unary_plus_is_stripped_after_open_paren() {
2876 let hlsl = "ret.x += (+dx.x - dy.x)*0.4;";
2877 let wgsl = translate_shader(hlsl).unwrap();
2878 // The unary `+` after `(` must be gone; the binary `-` stays.
2879 assert!(wgsl.contains("(dx.x - dy.x)"), "got: {wgsl}");
2880 }
2881
2882 #[test]
2883 fn statement_commas_become_semicolons() {
2884 // `ret += a, ret += b;` → `ret += a; ret += b;`.
2885 let src = "ret += a, ret += b;";
2886 let out = replace_statement_commas(src);
2887 assert_eq!(out, "ret += a; ret += b;");
2888 }
2889
2890 #[test]
2891 fn dedup_var_decl_second_becomes_assignment() {
2892 let src = "var ret1: vec3<f32> = vec3<f32>(0);\nvar ret1: vec3<f32> = ret1;\n";
2893 let out = dedup_var_declarations(src);
2894 assert!(
2895 out.contains("var ret1: vec3<f32> = vec3<f32>(0);"),
2896 "got: {out}"
2897 );
2898 assert!(out.contains("ret1 = ret1;"), "got: {out}");
2899 }
2900
2901 #[test]
2902 fn postfix_increment_becomes_compound_assignment() {
2903 let src = "n++;";
2904 let out = rewrite_postfix_inc_dec(src);
2905 assert_eq!(out, "n = n + 1;");
2906 }
2907
2908 #[test]
2909 fn postfix_decrement_in_for_loop() {
2910 let src = "for (i = 10; i > 0; i--) { }";
2911 let out = rewrite_postfix_inc_dec(src);
2912 assert!(out.contains("i = i - 1)"), "got: {out}");
2913 }
2914
2915 #[test]
2916 fn postfix_increment_inside_expression_left_alone() {
2917 // We only rewrite at statement/loop-iter boundaries; bare `a + + b`
2918 // in expression position would already have whitespace separation
2919 // and not match our regex.
2920 let src = "y = a + b;";
2921 let out = rewrite_postfix_inc_dec(src);
2922 assert_eq!(out, src);
2923 }
2924
2925 #[test]
2926 fn static_const_qualifier_stripped() {
2927 let src = "static const int anz = 3;";
2928 let out = strip_storage_class_qualifiers(src);
2929 // After both passes: `static` dropped, `const int` → `int`.
2930 assert_eq!(out.trim(), "int anz = 3;");
2931 }
2932
2933 #[test]
2934 fn statement_commas_leave_call_args_alone() {
2935 // Commas inside `(...)` are call args, not statement separators.
2936 let src = "ret = mix(a, b, t);";
2937 let out = replace_statement_commas(src);
2938 assert_eq!(out, src);
2939 }
2940
2941 #[test]
2942 fn end_to_end_typical_md2_comp_shader() {
2943 // The simplest real-world comp shader pattern. After translation +
2944 // shader_body strip, the body should be syntactically WGSL-valid.
2945 let hlsl = r#"shader_body
2946{
2947 ret = tex2D(sampler_main, uv).xyz;
2948 ret *= 1.28; //gamma
2949 ret *= ret; //darken
2950}"#;
2951 let wgsl = translate_shader(hlsl).unwrap();
2952 assert!(!wgsl.contains("shader_body"));
2953 assert!(wgsl.contains("textureSample(sampler_main_texture, sampler_main, uv)"));
2954 assert!(!wgsl.contains("float"));
2955 }
2956
2957 // ---------------------------------------------------------------
2958 // User texture binding plan + scan
2959 // ---------------------------------------------------------------
2960
2961 #[test]
2962 fn scan_extracts_user_sampler_declarations() {
2963 let hlsl = "sampler sampler_clouds;\nsampler sampler_lichen;\nret = tex2D(sampler_clouds, uv).xyz;";
2964 let refs = scan_user_samplers(hlsl);
2965 let names: Vec<&str> = refs.iter().map(|r| r.full_name.as_str()).collect();
2966 assert!(names.contains(&"sampler_clouds"));
2967 assert!(names.contains(&"sampler_lichen"));
2968 }
2969
2970 #[test]
2971 fn scan_skips_builtins() {
2972 let hlsl = "sampler sampler_main;\nsampler sampler_fw_main;\nsampler sampler_noise_lq;\nsampler sampler_clouds;\nsampler sampler_blur1;";
2973 let refs = scan_user_samplers(hlsl);
2974 let names: Vec<&str> = refs.iter().map(|r| r.full_name.as_str()).collect();
2975 assert_eq!(
2976 names,
2977 vec!["sampler_clouds"],
2978 "only the user texture should survive"
2979 );
2980 }
2981
2982 #[test]
2983 fn scan_collapses_duplicates_by_full_name() {
2984 let hlsl = "sampler sampler_clouds;\nsampler sampler_clouds;\n";
2985 let refs = scan_user_samplers(hlsl);
2986 assert_eq!(refs.len(), 1);
2987 }
2988
2989 #[test]
2990 fn decompose_sampler_name_handles_filter_prefixes() {
2991 assert_eq!(
2992 decompose_sampler_name("sampler_clouds"),
2993 ("clouds".to_string(), "sampler_fw")
2994 );
2995 assert_eq!(
2996 decompose_sampler_name("sampler_fw_clouds"),
2997 ("clouds".to_string(), "sampler_fw")
2998 );
2999 assert_eq!(
3000 decompose_sampler_name("sampler_pc_clouds"),
3001 ("clouds".to_string(), "sampler_pc")
3002 );
3003 assert_eq!(
3004 decompose_sampler_name("sampler_rand02_smalltiled"),
3005 ("rand02_smalltiled".to_string(), "sampler_fw")
3006 );
3007 }
3008
3009 #[test]
3010 fn plan_empty_falls_back_to_legacy_translator() {
3011 let plan = TextureBindingPlan::empty();
3012 let wgsl = translate_shader_with_plan("color = tex2D(sampler_clouds, uv);", &plan).unwrap();
3013 // Empty plan path: the fallback `/*was: ...*/` comment must still
3014 // appear (no user routing applied).
3015 assert!(wgsl.contains("/*was: sampler_clouds*/"), "got: {wgsl}");
3016 }
3017
3018 #[test]
3019 fn plan_routes_user_sampler_to_user_binding() {
3020 let mut plan = TextureBindingPlan::empty();
3021 let slot = plan
3022 .add_slot(
3023 Some("clouds".to_string()),
3024 [256.0, 256.0, 1.0 / 256.0, 1.0 / 256.0],
3025 &[("sampler_clouds".to_string(), "sampler_fw")],
3026 )
3027 .unwrap();
3028 assert_eq!(slot, 0);
3029 let wgsl = translate_shader_with_plan("color = tex2D(sampler_clouds, uv);", &plan).unwrap();
3030 // The translator must emit the user-slot binding, not the fallback.
3031 assert!(
3032 wgsl.contains("textureSample(sampler_user_0_texture, sampler_fw, uv)"),
3033 "got: {wgsl}"
3034 );
3035 assert!(!wgsl.contains("/*was: sampler_clouds*/"));
3036 }
3037
3038 #[test]
3039 fn plan_two_aliases_share_a_slot() {
3040 // `sampler sampler_clouds` and `sampler sampler_fw_clouds` both
3041 // resolve to the same logical "clouds" texture but pick a different
3042 // sampler binding at each call site.
3043 let mut plan = TextureBindingPlan::empty();
3044 plan.add_slot(
3045 Some("clouds".to_string()),
3046 [256.0, 256.0, 1.0 / 256.0, 1.0 / 256.0],
3047 &[
3048 ("sampler_clouds".to_string(), "sampler_fw"),
3049 ("sampler_fw_clouds".to_string(), "sampler_fw"),
3050 ("sampler_pc_clouds".to_string(), "sampler_pc"),
3051 ],
3052 );
3053 assert_eq!(plan.slot_count(), 1, "all aliases must share one slot");
3054
3055 // Translating each alias keeps the slot but flips the sampler kind.
3056 let s1 =
3057 translate_shader_with_plan("ret = tex2D(sampler_fw_clouds, uv).xyz;", &plan).unwrap();
3058 let s2 =
3059 translate_shader_with_plan("ret = tex2D(sampler_pc_clouds, uv).xyz;", &plan).unwrap();
3060 assert!(s1.contains("textureSample(sampler_user_0_texture, sampler_fw, uv)"));
3061 assert!(s2.contains("textureSample(sampler_user_0_texture, sampler_pc, uv)"));
3062 }
3063
3064 // ---------------------------------------------------------------
3065 // Quick translator fixes
3066 // ---------------------------------------------------------------
3067
3068 #[test]
3069 fn mod_call_rewritten_to_float_mod_expansion() {
3070 // `mod` is a WGSL reserved keyword; HLSL uses it as the float
3071 // modulo helper. The expansion must match HLSL semantics
3072 // (= `a - floor(a/b)*b`).
3073 let wgsl = translate_shader("ret.x = mod(ang*16/M_PI, 1.0);").unwrap();
3074 assert!(
3075 wgsl.contains("floor((ang*16/M_PI) / (1.0)) * (1.0)"),
3076 "got: {wgsl}"
3077 );
3078 assert!(!wgsl.contains("mod("), "mod( call must be gone: {wgsl}");
3079 }
3080
3081 #[test]
3082 fn lowercase_tex2d_normalised_to_tex2d() {
3083 // Real preset typo: `tex2d(sampler_main, uv)`. WGSL is case-
3084 // sensitive; the wrapper exposes `textureSample` only through the
3085 // canonical `tex2D` rewrite path. Normalise before the rewrite.
3086 let wgsl = translate_shader("ret = tex2d(sampler_main, uv).xyz;").unwrap();
3087 assert!(
3088 wgsl.contains("textureSample(sampler_main_texture, sampler_main, uv)"),
3089 "got: {wgsl}"
3090 );
3091 }
3092
3093 #[test]
3094 fn lowercase_tex3d_normalised() {
3095 let wgsl =
3096 translate_shader("ret = tex3d(sampler_noisevol_hq, vec3<f32>(uv, time)).xyz;").unwrap();
3097 assert!(
3098 wgsl.contains("textureSample(sampler_noisevol_hq_texture,"),
3099 "got: {wgsl}"
3100 );
3101 }
3102
3103 // ---------------------------------------------------------------
3104 // User-defined function lifting
3105 // ---------------------------------------------------------------
3106
3107 #[test]
3108 fn reserved_identifier_mod_as_local_renamed() {
3109 // Real preset: `float mod = sin(...); ... q22*mod*...`. After
3110 // `mod()` function-call rewriting, the bare `mod` ident stays —
3111 // WGSL rejects it as reserved. We rename to `mod_`.
3112 let wgsl = translate_shader("float mod = sin(uv.x);\nret.y *= mod * 0.5;").unwrap();
3113 assert!(wgsl.contains("var mod_: f32"), "var rename missing: {wgsl}");
3114 assert!(
3115 wgsl.contains("mod_ * 0.5"),
3116 "reference rename missing: {wgsl}"
3117 );
3118 }
3119
3120 #[test]
3121 fn reserved_identifier_filter_renamed() {
3122 // `float3 filter` as a function param or local must become `filter_`.
3123 let wgsl =
3124 translate_shader("float3 filter = float3(1, 1, 1);\nret = filter * GetPixel(uv);")
3125 .unwrap();
3126 assert!(
3127 wgsl.contains("var filter_: vec3<f32>"),
3128 "filter var rename missing: {wgsl}"
3129 );
3130 }
3131
3132 #[test]
3133 fn user_function_is_lifted_above_body() {
3134 // Real preset pattern: a helper function declared before the
3135 // shader_body block. After translation, it should appear before
3136 // the LIFTED_FN_SENTINEL, with the WGSL `fn ... ->` signature.
3137 let hlsl = "float2 helper(float2 a, float3 b) {\n\
3138 return a * 0.5 + b.xy;\n\
3139 }\n\
3140 shader_body {\n\
3141 ret.xy = helper(uv, vec3<f32>(0));\n\
3142 }";
3143 let wgsl = translate_shader(hlsl).unwrap();
3144 assert!(wgsl.contains(LIFTED_FN_SENTINEL));
3145 let (lifted, body) = wgsl.split_once(LIFTED_FN_SENTINEL).unwrap();
3146 // Signature converted: HLSL form gone, WGSL form present.
3147 assert!(
3148 lifted.contains("fn helper(a: vec2<f32>, b: vec3<f32>) -> vec2<f32>"),
3149 "lifted block missing canonical signature: {lifted}"
3150 );
3151 // Body uses the function; the call form itself is preserved as
3152 // an identifier reference inside fs_main.
3153 assert!(body.contains("helper("), "call site missing: {body}");
3154 }
3155
3156 #[test]
3157 fn no_lift_when_no_function_definitions() {
3158 // Bodies without module-scope function defs must NOT emit a
3159 // sentinel — preserves backwards-compatibility for the v0.17.0
3160 // text shape.
3161 let hlsl = "shader_body { ret = ret * 0.5; }";
3162 let wgsl = translate_shader(hlsl).unwrap();
3163 assert!(!wgsl.contains(LIFTED_FN_SENTINEL));
3164 }
3165
3166 #[test]
3167 fn lift_handles_zero_arg_function() {
3168 let hlsl = "float3 zero() { return vec3<f32>(0); }\n\
3169 shader_body { ret = zero(); }";
3170 let wgsl = translate_shader(hlsl).unwrap();
3171 let (lifted, _body) = wgsl.split_once(LIFTED_FN_SENTINEL).unwrap();
3172 assert!(lifted.contains("fn zero() -> vec3<f32>"), "got: {lifted}");
3173 }
3174
3175 #[test]
3176 fn lift_geiss_explosion_3_helper_translates_cleanly() {
3177 // Reduced repro from `Geiss - Explosion 3 nz+...milk`. The
3178 // helper uses `filter` as a parameter name (WGSL-reserved) and
3179 // a local `dx` of type vec3 with a parenthesised RHS.
3180 let hlsl = "float2 gradBlur1( float2 domain, float2 d, float3 filter){\n\
3181 float3 dx = ( 2*GetBlur1(domain + float2(1,0)*d) - 2*GetBlur1(domain-float2(1,0)*d) );\n\
3182 return 0.5*float2(dx.x*filter.x, dx.y*filter.y) / (filter.x+filter.y+filter.z);\n\
3183 }\n\
3184 shader_body { ret.xy = gradBlur1(uv, vec2<f32>(0.5), vec3<f32>(0.5,0.5,0.5)); }";
3185 let wgsl = translate_shader(hlsl).unwrap();
3186 assert!(wgsl.contains(LIFTED_FN_SENTINEL), "no sentinel: {wgsl}");
3187 let (lifted, _body) = wgsl.split_once(LIFTED_FN_SENTINEL).unwrap();
3188 // Lifted body must use WGSL `var` for the local, not HLSL
3189 // `vec3<f32> dx`. This is what the parse error "expected `(`;
3190 // found `dx`" was originally about.
3191 assert!(
3192 lifted.contains("var dx: vec3<f32>"),
3193 "local-decl rewrite didn't reach lifted body: {lifted}"
3194 );
3195 // `filter` must have been renamed to avoid the WGSL keyword.
3196 assert!(
3197 lifted.contains("filter_:"),
3198 "filter param not renamed in signature: {lifted}"
3199 );
3200 }
3201
3202 #[test]
3203 fn lift_handles_function_with_nested_braces() {
3204 // A user function with an inner `{...}` block (e.g., an `if`)
3205 // must balance braces correctly and not chop off mid-body.
3206 let hlsl = "float foo(float t) {\n\
3207 if (t > 0) { return t * 2; }\n\
3208 return 0;\n\
3209 }\n\
3210 shader_body { ret.x = foo(0.5); }";
3211 let wgsl = translate_shader(hlsl).unwrap();
3212 let (lifted, _body) = wgsl.split_once(LIFTED_FN_SENTINEL).unwrap();
3213 assert!(
3214 lifted.contains("if (t > 0)"),
3215 "inner if dropped during lift: {lifted}"
3216 );
3217 assert!(lifted.contains("return 0;"));
3218 }
3219
3220 #[test]
3221 fn plan_cap_is_enforced() {
3222 let mut plan = TextureBindingPlan::empty();
3223 for i in 0..MAX_USER_TEXTURE_SLOTS {
3224 let name = format!("tex_{i}");
3225 let alias = format!("sampler_tex_{i}");
3226 plan.add_slot(Some(name), [1.0, 1.0, 1.0, 1.0], &[(alias, "sampler_fw")])
3227 .unwrap();
3228 }
3229 // One past the cap → None.
3230 let over = plan.add_slot(
3231 Some("overflow".to_string()),
3232 [1.0; 4],
3233 &[("sampler_overflow".to_string(), "sampler_fw")],
3234 );
3235 assert!(over.is_none());
3236 assert_eq!(plan.slot_count(), MAX_USER_TEXTURE_SLOTS);
3237 }
3238
3239 // ---------- non-square mat, builtin aliases, prose ----------
3240
3241 #[test]
3242 fn float2x3_maps_to_mat2x3() {
3243 // Non-square matrix types were getting mangled by the
3244 // `float2`→`vec2<f32>` substring substitution, leaving
3245 // `vec2<f32>x3` and tripping the WGSL parser with
3246 // `expected ')'; found 'x3'`. Many comp residual failures share
3247 // this root cause.
3248 let out = replace_types("float2x3 m;");
3249 assert_eq!(out, "mat2x3<f32> m;");
3250 }
3251
3252 #[test]
3253 fn float3x2_and_other_non_square_matrices_map_correctly() {
3254 for (hlsl, wgsl) in [
3255 ("float3x2", "mat3x2<f32>"),
3256 ("float4x3", "mat4x3<f32>"),
3257 ("float2x4", "mat2x4<f32>"),
3258 ("float3x4", "mat3x4<f32>"),
3259 ("float4x2", "mat4x2<f32>"),
3260 ] {
3261 let out = replace_types(&format!("{hlsl} m;"));
3262 assert_eq!(out, format!("{wgsl} m;"), "mapping {hlsl}");
3263 }
3264 }
3265
3266 #[test]
3267 fn sat_aliases_saturate() {
3268 // Corpus ships `#define sat saturate` (stripped by
3269 // `strip_preprocessor`) followed by `sat(…)` calls that would go
3270 // unbound on the WGSL side. We alias the call here.
3271 let out = replace_functions("ret = sat(x);");
3272 assert!(out.contains("clamp(x, 0.0, 1.0)"), "got: {out}");
3273 }
3274
3275 #[test]
3276 fn rsqrt_aliases_inverse_sqrt() {
3277 let out = replace_functions("ret = rsqrt(x);");
3278 assert!(out.contains("inverseSqrt(x)"), "got: {out}");
3279 }
3280
3281 #[test]
3282 fn log10_expands_to_natural_log() {
3283 let out = replace_functions("ret = log10(x);");
3284 assert!(out.contains("(log(x) * 0.43429448190325176)"), "got: {out}");
3285 }
3286
3287 #[test]
3288 fn tex2dbias_drops_mip_bias_args() {
3289 let out = replace_functions("ret = tex2Dbias(sampler_main, float4(uv, 0, 0.1));");
3290 assert!(out.contains("tex2D(sampler_main, uv)"), "got: {out}");
3291 }
3292
3293 #[test]
3294 fn multi_decl_sampler_list_stripped() {
3295 // `sampler a, b, c;` is a common HLSL stylistic shortcut. A
3296 // simpler regex matching only the first identifier would let the
3297 // rest survive as orphan statements that trip the WGSL parser.
3298 let src = "sampler sampler_fw_rand01, sampler_pw_rand02;\nret = uv.x;\n";
3299 let out = strip_sampler_declarations(src);
3300 assert!(!out.contains("sampler_fw_rand01"), "got: {out}");
3301 assert!(!out.contains("sampler_pw_rand02"), "got: {out}");
3302 assert!(out.contains("ret = uv.x"), "body lost: {out}");
3303 }
3304
3305 #[test]
3306 fn prose_line_is_commented_out() {
3307 // `written by martin` style attributions inside the shader body
3308 // get a `// ` prefix so the WGSL parser doesn't choke on
3309 // `expected assignment or increment/decrement; found 'by'`.
3310 let src = "ret = 0.5;\nwritten by martin\nuv = uv * 2.0;\n";
3311 let out = comment_out_prose_lines(src);
3312 assert!(out.contains("// written by martin"), "got: {out}");
3313 assert!(out.contains("ret = 0.5;"), "real code lost: {out}");
3314 assert!(out.contains("uv = uv * 2.0;"), "real code lost: {out}");
3315 }
3316
3317 #[test]
3318 fn prose_line_does_not_eat_keywords() {
3319 // A line starting with `for` or `if` is a control-flow construct,
3320 // not prose — never comment it out.
3321 for kw_line in ["for (var i = 0; i < 3; i = i + 1) {", "if x = 0;"] {
3322 let src = format!("{kw_line}\n");
3323 let out = comment_out_prose_lines(&src);
3324 assert_eq!(out, src, "wrongly commented out: {kw_line}");
3325 }
3326 }
3327
3328 #[test]
3329 fn bare_end_marker_commented_out() {
3330 // The MD2 corpus sometimes appends `END` (or similar single-word
3331 // markers) after `shader_body`. After `strip_shader_body_wrapper`
3332 // those lines land in the fragment body and crash the parser.
3333 let src = "ret = 0.5;\nEND\n";
3334 let out = comment_out_prose_lines(src);
3335 assert!(out.contains("// END"), "got: {out}");
3336 }
3337}