onedrop_hlsl/rewrite/
comma_paren.rs

1//! Pass: lower C-style comma operator inside parenthesised expressions.
2//!
3//! HLSL's `(a, b, c)` is the comma operator — evaluate left-to-right, yield
4//! the rightmost operand. The MD2 corpus uses this sparingly but it surfaces
5//! in shapes like:
6//!
7//! ```hlsl
8//! uv2 = uv + texsize.zx * (q3, q3);
9//! ```
10//!
11//! (a redundant-but-valid double of `q3`; HLSL treats the parens as scalar
12//! `q3` so the surrounding `texsize.zx * scalar` is well-typed). WGSL has
13//! no comma operator — the parens parse `q3, q3` as two arguments and the
14//! containing `*` rejects with `expected ')'; found ','`.
15//!
16//! The pass is purely token-driven: detect every `(` whose previous
17//! non-whitespace token is *not* `Ident`, `Keyword`, `RParen`, `RBracket`
18//! (those are call / index / cast contexts where commas separate
19//! arguments, not operator operands), then scan for top-level commas
20//! inside the matching pair and emit a single edit that strips every
21//! token up to and including the final top-level comma.
22//!
23//! Skipping `Keyword` covers the `for(init; cond; step)` / `if(...)` /
24//! `while(...)` / `do {...} while(...)` / `switch(...)` / `return (...)` /
25//! cast contexts in one rule. The cost is missing the (very rare)
26//! `return (a, b);` idiom; we judged that acceptable next to the
27//! breakage potential of rewriting commas inside `for(int i = 0, j = 0;
28//! ...)` init lists.
29//!
30//! Found in 17 warp + 2 comp presets on the 2 000-sample, all of which
31//! presented as the `expected ')'; found ';'` cluster — once the comma
32//! op is collapsed to its rightmost operand the surrounding expression
33//! type-checks and naga validates.
34//!
35//! Idempotent: a second run sees no top-level commas inside any paren
36//! expression and emits no edits.
37
38use super::*;
39use crate::lex::{Token, TokenKind, tokenize};
40
41pub(crate) fn rewrite_comma_paren(src: &str) -> String {
42    let Ok(tokens) = tokenize(src) else {
43        return src.to_string();
44    };
45    let mut edits = Vec::new();
46    collect_edits(&tokens, &mut edits);
47    apply_edits(src, &mut edits)
48}
49
50/// For every paren-expression `(a, b, …, z)` (i.e. `(` not preceded by an
51/// identifier / keyword / closing bracket), emit one edit removing
52/// `a, b, …,` so only `z` remains inside the parens.
53///
54/// Nested paren-expressions with their own commas (`(a, (b, c), d)`) emit
55/// only the *outermost* edit — the inner edit would overlap the outer
56/// and get dropped by `apply_edits`'s overlap guard, leaving inner
57/// rewrites half-done. Tracking absorbed-children per paren state keeps
58/// the outer edit clean.
59fn collect_edits(tokens: &[Token], edits: &mut Vec<TextEdit>) {
60    let mut stack: Vec<ParenState> = Vec::new();
61
62    for i in 0..tokens.len() {
63        let t = &tokens[i];
64        match &t.kind {
65            TokenKind::LParen => {
66                let is_call = i > 0 && is_call_like_prev(&tokens[i - 1].kind);
67                stack.push(ParenState {
68                    is_call,
69                    open_end: t.span.end,
70                    last_comma_end: None,
71                    pending: Vec::new(),
72                });
73            }
74            TokenKind::Comma => {
75                if let Some(top) = stack.last_mut()
76                    && !top.is_call
77                {
78                    top.last_comma_end = Some(t.span.end);
79                }
80            }
81            TokenKind::RParen => {
82                if let Some(top) = stack.pop() {
83                    let own_edit = if !top.is_call && top.last_comma_end.is_some() {
84                        Some(TextEdit {
85                            start: top.open_end,
86                            end: top.last_comma_end.unwrap(),
87                            replacement: String::new(),
88                        })
89                    } else {
90                        None
91                    };
92                    // If this paren emits its own edit, it subsumes every
93                    // pending child edit (any nested comma-op gets
94                    // collapsed alongside). Otherwise propagate the
95                    // children up unchanged.
96                    let to_publish: Vec<TextEdit> = if let Some(e) = own_edit {
97                        vec![e]
98                    } else {
99                        top.pending
100                    };
101                    match stack.last_mut() {
102                        Some(parent) => parent.pending.extend(to_publish),
103                        None => edits.extend(to_publish),
104                    }
105                }
106            }
107            _ => {}
108        }
109    }
110    // Anything left pending at top level (no enclosing paren) flushes here.
111    while let Some(top) = stack.pop() {
112        match stack.last_mut() {
113            Some(parent) => parent.pending.extend(top.pending),
114            None => edits.extend(top.pending),
115        }
116    }
117}
118
119struct ParenState {
120    is_call: bool,
121    open_end: u32,
122    last_comma_end: Option<u32>,
123    /// Edits emitted by paren-expressions strictly inside this one. If
124    /// this paren ends up emitting its own outer edit they get dropped;
125    /// otherwise they propagate up to the parent.
126    pending: Vec<TextEdit>,
127}
128
129/// `(` is part of a call / index / cast context when preceded by one of
130/// these tokens. Everything else is treated as a paren-expression where
131/// inner top-level commas would be the C-style comma operator.
132///
133/// `Gt` is included because the HLSL grammar has no generic syntax, but
134/// internally-emitted WGSL-shape source (`vec3<f32>(a, b, c)`) tokenises
135/// the `>` as `Gt` immediately before the constructor `(`. Treating
136/// `>(` as call-like keeps the comma-paren pass idempotent on previously
137/// re-emitted source, at the negligible cost of skipping the (corpus-
138/// absent) `a > (b, c)` operator-then-paren-expr shape.
139fn is_call_like_prev(kind: &TokenKind) -> bool {
140    matches!(
141        kind,
142        TokenKind::Ident
143            | TokenKind::Keyword(_)
144            | TokenKind::RParen
145            | TokenKind::RBracket
146            | TokenKind::Gt
147    )
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153    use crate::translate_shader;
154
155    fn rewrite(src: &str) -> String {
156        rewrite_comma_paren(src)
157    }
158
159    #[test]
160    fn paren_expr_comma_drops_left_operand() {
161        let src = "shader_body { float2 uv2 = uv + texsize.zx*(q3,q3); }";
162        let out = rewrite(src);
163        // `(q3,q3)` collapses to `(q3)` (whitespace from the leading
164        // operand may survive; downstream is whitespace-tolerant).
165        assert!(!out.contains(','), "comma not removed: {out}");
166        assert!(
167            out.contains("q3)") && !out.contains("q3,"),
168            "expected rightmost `q3` only, got: {out}"
169        );
170    }
171
172    #[test]
173    fn function_call_args_left_alone() {
174        let src = "shader_body { float3 c = mix(a, b, 0.5); }";
175        let out = rewrite(src);
176        // mix() args must keep all commas — the `(` is preceded by `Ident`.
177        assert_eq!(out, src);
178    }
179
180    #[test]
181    fn vec_constructor_args_left_alone() {
182        let src = "shader_body { float2 v = float2(q3, q3); }";
183        let out = rewrite(src);
184        // `float2(...)` is a call/constructor — leave the comma alone.
185        assert_eq!(out, src);
186    }
187
188    #[test]
189    fn for_init_with_comma_left_alone() {
190        // `for(int i = 0, j = 0; …)` — the `(` is preceded by Keyword(For),
191        // so we don't touch the comma in the init list.
192        let src = "shader_body { for (int i = 0, j = 0; i < 4; i++) { } }";
193        let out = rewrite(src);
194        assert_eq!(out, src);
195    }
196
197    #[test]
198    fn nested_paren_comma() {
199        // `(a, (b, c), d)` — the outer `(a, …, d)` and the inner `(b, c)`
200        // both have top-level commas. `apply_edits` drops overlapping
201        // edits silently (the outer eats the inner), so the inner edit is
202        // ignored and the outer leaves `(d)`.
203        let src = "shader_body { float x = (a, (b, c), d); }";
204        let out = rewrite(src);
205        // Only the rightmost survives at the outer level.
206        assert!(!out.contains(','), "stale comma: {out}");
207        assert!(out.contains("d)"), "got: {out}");
208        assert!(!out.contains("a,"), "outer LHS not stripped: {out}");
209    }
210
211    #[test]
212    fn multiple_paren_exprs_independent() {
213        let src = "shader_body { float x = (a, b) + (c, d, e); }";
214        let out = rewrite(src);
215        // Each paren-expression strips its own commas independently.
216        assert!(!out.contains(','), "stale comma: {out}");
217        assert!(out.contains("b)") && out.contains("e)"), "got: {out}");
218        assert!(!out.contains("a,") && !out.contains("c,"), "got: {out}");
219    }
220
221    #[test]
222    fn cast_followed_by_paren_left_alone() {
223        // `(float)(x, y)` — the second `(` is preceded by `RParen`, so we
224        // skip. (Rare in MD2; harmless to skip if it ever appeared.)
225        let src = "shader_body { float v = (float)(x, y); }";
226        let out = rewrite(src);
227        assert_eq!(out, src);
228    }
229
230    #[test]
231    fn translate_roundtrip_emits_wgsl_paren_singleton() {
232        // End-to-end: the LuxXx warp shape compiles after the pass.
233        let hlsl = r#"
234shader_body {
235    float q3 = 0.1;
236    float2 uv = float2(0, 0);
237    float4 texsize = float4(1, 1, 1, 1);
238    float2 uv2 = uv + texsize.zx*(q3, q3);
239}
240"#;
241        let wgsl = translate_shader(hlsl).expect("translates");
242        // The comma is gone and only the rightmost `q3` remains in the
243        // paren expression after the `*`.
244        assert!(
245            wgsl.contains("texsize.zx*( q3)") || wgsl.contains("texsize.zx*(q3)"),
246            "expected collapsed comma op, got:\n{wgsl}"
247        );
248        assert!(!wgsl.contains("q3, q3"), "comma not removed:\n{wgsl}");
249    }
250
251    #[test]
252    fn idempotent_on_single_operand_parens() {
253        let src = "shader_body { float x = (a); }";
254        let out = rewrite(src);
255        assert_eq!(out, src);
256        let out2 = rewrite(&out);
257        assert_eq!(out, out2);
258    }
259
260    #[test]
261    fn wgsl_shape_vec_constructor_left_alone() {
262        // `vec3<f32>(1, 0, 0)` tokenises as `vec3 < f32 > ( 1 , 0 , 0 )`
263        // under the HLSL grammar. The `(` is preceded by `Gt`; with the
264        // `Gt`-skip rule the constructor's commas survive. Without it,
265        // any re-parse of in-flight rewritten source would corrupt
266        // `vec3<f32>(a, b, c)` into `vec3<f32>(c)`.
267        let src = "shader_body { ret = vec3<f32>(1, 0, 0); }";
268        let out = rewrite(src);
269        assert_eq!(out, src);
270    }
271
272    #[test]
273    fn comment_with_comma_inside_paren_is_not_a_comma_op() {
274        // Comments are stripped at lex time, so a `,` inside a `//` line
275        // doesn't survive into the token stream — the paren is treated as
276        // a single-operand expression.
277        let src = r#"shader_body { float x = (
278    a // hello, world
279); }"#;
280        let out = rewrite(src);
281        assert_eq!(out, src);
282    }
283}