onedrop_hlsl/rewrite/comma_paren.rs
1//! Pass: lower C-style comma operator inside parenthesised expressions.
2//!
3//! HLSL's `(a, b, c)` is the comma operator — evaluate left-to-right, yield
4//! the rightmost operand. The MD2 corpus uses this sparingly but it surfaces
5//! in shapes like:
6//!
7//! ```hlsl
8//! uv2 = uv + texsize.zx * (q3, q3);
9//! ```
10//!
11//! (a redundant-but-valid double of `q3`; HLSL treats the parens as scalar
12//! `q3` so the surrounding `texsize.zx * scalar` is well-typed). WGSL has
13//! no comma operator — the parens parse `q3, q3` as two arguments and the
14//! containing `*` rejects with `expected ')'; found ','`.
15//!
16//! The pass is purely token-driven: detect every `(` whose previous
17//! non-whitespace token is *not* `Ident`, `Keyword`, `RParen`, `RBracket`
18//! (those are call / index / cast contexts where commas separate
19//! arguments, not operator operands), then scan for top-level commas
20//! inside the matching pair and emit a single edit that strips every
21//! token up to and including the final top-level comma.
22//!
23//! Skipping `Keyword` covers the `for(init; cond; step)` / `if(...)` /
24//! `while(...)` / `do {...} while(...)` / `switch(...)` / `return (...)` /
25//! cast contexts in one rule. The cost is missing the (very rare)
26//! `return (a, b);` idiom; we judged that acceptable next to the
27//! breakage potential of rewriting commas inside `for(int i = 0, j = 0;
28//! ...)` init lists.
29//!
30//! Found in 17 warp + 2 comp presets on the 2 000-sample, all of which
31//! presented as the `expected ')'; found ';'` cluster — once the comma
32//! op is collapsed to its rightmost operand the surrounding expression
33//! type-checks and naga validates.
34//!
35//! Idempotent: a second run sees no top-level commas inside any paren
36//! expression and emits no edits.
37
38use super::*;
39use crate::lex::{Token, TokenKind, tokenize};
40
41pub(crate) fn rewrite_comma_paren(src: &str) -> String {
42 let Ok(tokens) = tokenize(src) else {
43 return src.to_string();
44 };
45 let mut edits = Vec::new();
46 collect_edits(&tokens, &mut edits);
47 apply_edits(src, &mut edits)
48}
49
50/// For every paren-expression `(a, b, …, z)` (i.e. `(` not preceded by an
51/// identifier / keyword / closing bracket), emit one edit removing
52/// `a, b, …,` so only `z` remains inside the parens.
53///
54/// Nested paren-expressions with their own commas (`(a, (b, c), d)`) emit
55/// only the *outermost* edit — the inner edit would overlap the outer
56/// and get dropped by `apply_edits`'s overlap guard, leaving inner
57/// rewrites half-done. Tracking absorbed-children per paren state keeps
58/// the outer edit clean.
59fn collect_edits(tokens: &[Token], edits: &mut Vec<TextEdit>) {
60 let mut stack: Vec<ParenState> = Vec::new();
61
62 for i in 0..tokens.len() {
63 let t = &tokens[i];
64 match &t.kind {
65 TokenKind::LParen => {
66 let is_call = i > 0 && is_call_like_prev(&tokens[i - 1].kind);
67 stack.push(ParenState {
68 is_call,
69 open_end: t.span.end,
70 last_comma_end: None,
71 pending: Vec::new(),
72 });
73 }
74 TokenKind::Comma => {
75 if let Some(top) = stack.last_mut()
76 && !top.is_call
77 {
78 top.last_comma_end = Some(t.span.end);
79 }
80 }
81 TokenKind::RParen => {
82 if let Some(top) = stack.pop() {
83 let own_edit = if !top.is_call && top.last_comma_end.is_some() {
84 Some(TextEdit {
85 start: top.open_end,
86 end: top.last_comma_end.unwrap(),
87 replacement: String::new(),
88 })
89 } else {
90 None
91 };
92 // If this paren emits its own edit, it subsumes every
93 // pending child edit (any nested comma-op gets
94 // collapsed alongside). Otherwise propagate the
95 // children up unchanged.
96 let to_publish: Vec<TextEdit> = if let Some(e) = own_edit {
97 vec![e]
98 } else {
99 top.pending
100 };
101 match stack.last_mut() {
102 Some(parent) => parent.pending.extend(to_publish),
103 None => edits.extend(to_publish),
104 }
105 }
106 }
107 _ => {}
108 }
109 }
110 // Anything left pending at top level (no enclosing paren) flushes here.
111 while let Some(top) = stack.pop() {
112 match stack.last_mut() {
113 Some(parent) => parent.pending.extend(top.pending),
114 None => edits.extend(top.pending),
115 }
116 }
117}
118
119struct ParenState {
120 is_call: bool,
121 open_end: u32,
122 last_comma_end: Option<u32>,
123 /// Edits emitted by paren-expressions strictly inside this one. If
124 /// this paren ends up emitting its own outer edit they get dropped;
125 /// otherwise they propagate up to the parent.
126 pending: Vec<TextEdit>,
127}
128
129/// `(` is part of a call / index / cast context when preceded by one of
130/// these tokens. Everything else is treated as a paren-expression where
131/// inner top-level commas would be the C-style comma operator.
132///
133/// `Gt` is included because the HLSL grammar has no generic syntax, but
134/// internally-emitted WGSL-shape source (`vec3<f32>(a, b, c)`) tokenises
135/// the `>` as `Gt` immediately before the constructor `(`. Treating
136/// `>(` as call-like keeps the comma-paren pass idempotent on previously
137/// re-emitted source, at the negligible cost of skipping the (corpus-
138/// absent) `a > (b, c)` operator-then-paren-expr shape.
139fn is_call_like_prev(kind: &TokenKind) -> bool {
140 matches!(
141 kind,
142 TokenKind::Ident
143 | TokenKind::Keyword(_)
144 | TokenKind::RParen
145 | TokenKind::RBracket
146 | TokenKind::Gt
147 )
148}
149
150#[cfg(test)]
151mod tests {
152 use super::*;
153 use crate::translate_shader;
154
155 fn rewrite(src: &str) -> String {
156 rewrite_comma_paren(src)
157 }
158
159 #[test]
160 fn paren_expr_comma_drops_left_operand() {
161 let src = "shader_body { float2 uv2 = uv + texsize.zx*(q3,q3); }";
162 let out = rewrite(src);
163 // `(q3,q3)` collapses to `(q3)` (whitespace from the leading
164 // operand may survive; downstream is whitespace-tolerant).
165 assert!(!out.contains(','), "comma not removed: {out}");
166 assert!(
167 out.contains("q3)") && !out.contains("q3,"),
168 "expected rightmost `q3` only, got: {out}"
169 );
170 }
171
172 #[test]
173 fn function_call_args_left_alone() {
174 let src = "shader_body { float3 c = mix(a, b, 0.5); }";
175 let out = rewrite(src);
176 // mix() args must keep all commas — the `(` is preceded by `Ident`.
177 assert_eq!(out, src);
178 }
179
180 #[test]
181 fn vec_constructor_args_left_alone() {
182 let src = "shader_body { float2 v = float2(q3, q3); }";
183 let out = rewrite(src);
184 // `float2(...)` is a call/constructor — leave the comma alone.
185 assert_eq!(out, src);
186 }
187
188 #[test]
189 fn for_init_with_comma_left_alone() {
190 // `for(int i = 0, j = 0; …)` — the `(` is preceded by Keyword(For),
191 // so we don't touch the comma in the init list.
192 let src = "shader_body { for (int i = 0, j = 0; i < 4; i++) { } }";
193 let out = rewrite(src);
194 assert_eq!(out, src);
195 }
196
197 #[test]
198 fn nested_paren_comma() {
199 // `(a, (b, c), d)` — the outer `(a, …, d)` and the inner `(b, c)`
200 // both have top-level commas. `apply_edits` drops overlapping
201 // edits silently (the outer eats the inner), so the inner edit is
202 // ignored and the outer leaves `(d)`.
203 let src = "shader_body { float x = (a, (b, c), d); }";
204 let out = rewrite(src);
205 // Only the rightmost survives at the outer level.
206 assert!(!out.contains(','), "stale comma: {out}");
207 assert!(out.contains("d)"), "got: {out}");
208 assert!(!out.contains("a,"), "outer LHS not stripped: {out}");
209 }
210
211 #[test]
212 fn multiple_paren_exprs_independent() {
213 let src = "shader_body { float x = (a, b) + (c, d, e); }";
214 let out = rewrite(src);
215 // Each paren-expression strips its own commas independently.
216 assert!(!out.contains(','), "stale comma: {out}");
217 assert!(out.contains("b)") && out.contains("e)"), "got: {out}");
218 assert!(!out.contains("a,") && !out.contains("c,"), "got: {out}");
219 }
220
221 #[test]
222 fn cast_followed_by_paren_left_alone() {
223 // `(float)(x, y)` — the second `(` is preceded by `RParen`, so we
224 // skip. (Rare in MD2; harmless to skip if it ever appeared.)
225 let src = "shader_body { float v = (float)(x, y); }";
226 let out = rewrite(src);
227 assert_eq!(out, src);
228 }
229
230 #[test]
231 fn translate_roundtrip_emits_wgsl_paren_singleton() {
232 // End-to-end: the LuxXx warp shape compiles after the pass.
233 let hlsl = r#"
234shader_body {
235 float q3 = 0.1;
236 float2 uv = float2(0, 0);
237 float4 texsize = float4(1, 1, 1, 1);
238 float2 uv2 = uv + texsize.zx*(q3, q3);
239}
240"#;
241 let wgsl = translate_shader(hlsl).expect("translates");
242 // The comma is gone and only the rightmost `q3` remains in the
243 // paren expression after the `*`.
244 assert!(
245 wgsl.contains("texsize.zx*( q3)") || wgsl.contains("texsize.zx*(q3)"),
246 "expected collapsed comma op, got:\n{wgsl}"
247 );
248 assert!(!wgsl.contains("q3, q3"), "comma not removed:\n{wgsl}");
249 }
250
251 #[test]
252 fn idempotent_on_single_operand_parens() {
253 let src = "shader_body { float x = (a); }";
254 let out = rewrite(src);
255 assert_eq!(out, src);
256 let out2 = rewrite(&out);
257 assert_eq!(out, out2);
258 }
259
260 #[test]
261 fn wgsl_shape_vec_constructor_left_alone() {
262 // `vec3<f32>(1, 0, 0)` tokenises as `vec3 < f32 > ( 1 , 0 , 0 )`
263 // under the HLSL grammar. The `(` is preceded by `Gt`; with the
264 // `Gt`-skip rule the constructor's commas survive. Without it,
265 // any re-parse of in-flight rewritten source would corrupt
266 // `vec3<f32>(a, b, c)` into `vec3<f32>(c)`.
267 let src = "shader_body { ret = vec3<f32>(1, 0, 0); }";
268 let out = rewrite(src);
269 assert_eq!(out, src);
270 }
271
272 #[test]
273 fn comment_with_comma_inside_paren_is_not_a_comma_op() {
274 // Comments are stripped at lex time, so a `,` inside a `//` line
275 // doesn't survive into the token stream — the paren is treated as
276 // a single-operand expression.
277 let src = r#"shader_body { float x = (
278 a // hello, world
279); }"#;
280 let out = rewrite(src);
281 assert_eq!(out, src);
282 }
283}