onedrop_renderer/
custom_shape.rs

1//! Custom-shape (`shapecode_N`) pass.
2//!
3//! Up to four user-defined shapes per preset. Each shape spawns
4//! `num_inst` instances (1..1024) drawn as triangle fans of `sides`
5//! vertices around `(x, y)` at radius `rad` rotated by `ang`. The
6//! engine runs the shape's per-frame equations once per instance,
7//! collects the resulting state into a flat storage buffer, and
8//! issues one fill draw call per shape; an optional outline pass
9//! follows for `b_thick_outline = 1`.
10//!
11//! Why a storage buffer + instanced draw (vs. CPU-expanded vertex
12//! stream like custom waves): a single shape with `num_inst = 1024` and
13//! `sides = 64` would emit `1024 × 64 × 3 ≈ 200K vertices` per frame —
14//! the upload bandwidth dominates the work. With instancing the upload
15//! is one struct (~96 bytes) per instance, and the GPU expands the
16//! geometry from `(@builtin(vertex_index), @builtin(instance_index))`.
17//!
18//! Textured mode: instead of a solid colour, the fragment shader
19//! samples `prev_texture` (the post-warp / post-wave output of the
20//! previous frame) at UV warped by `tex_zoom` / `tex_ang`. The output
21//! is multiplied by the centre→edge gradient so the user's RGB still
22//! tints the result.
23
24use bytemuck::{Pod, Zeroable};
25use wgpu::util::DeviceExt;
26
27/// GPU-side per-instance record. `repr(C)` + `Pod` for direct
28/// `bytemuck::cast_slice` upload to a storage buffer. Each field is a
29/// `vec4`-aligned 16-byte block so the WGSL `array<ShapeInstance>` layout
30/// matches without an explicit `align(16)` shim.
31#[repr(C)]
32#[derive(Debug, Clone, Copy, PartialEq, Pod, Zeroable, Default)]
33pub struct CustomShapeInstance {
34    /// `[center_x, center_y, radius_x, radius_y]` — center is
35    /// clip-space (-1..1); radius is in clip units before the shader's
36    /// aspect correction.
37    pub center_radius: [f32; 4],
38    /// `[angle (radians), tex_zoom, tex_ang (radians), sides (f32 → u32)]`.
39    pub angle_zoom_ang_sides: [f32; 4],
40    pub color_center: [f32; 4],
41    pub color_edge: [f32; 4],
42    pub border_color: [f32; 4],
43    /// `[border_size, flag_bits, pad, pad]` where `flag_bits` is:
44    /// - bit 0: `textured`
45    /// - bit 1: `additive`
46    /// - bit 2: `thick_outline`
47    pub border_flags: [f32; 4],
48}
49
50/// Per-instance flag bits packed into `CustomShapeInstance::border_flags[1]`.
51/// Reading side lives in `custom_shape.wgsl` (`flag_bits & 1u`, etc.).
52#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
53pub struct ShapeFlags(u32);
54
55impl ShapeFlags {
56    pub const TEXTURED: Self = Self(0b001);
57    pub const ADDITIVE: Self = Self(0b010);
58    pub const THICK_OUTLINE: Self = Self(0b100);
59
60    pub const fn empty() -> Self {
61        Self(0)
62    }
63    pub const fn bits(self) -> u32 {
64        self.0
65    }
66    pub fn contains(self, other: Self) -> bool {
67        (self.0 & other.0) == other.0
68    }
69    pub fn pack(self) -> f32 {
70        // Round-trip through f32 since the storage buffer layout uses
71        // f32 throughout. `u32 → f32` is exact for values up to 2^24.
72        self.0 as f32
73    }
74}
75
76impl std::ops::BitOr for ShapeFlags {
77    type Output = Self;
78    fn bitor(self, rhs: Self) -> Self {
79        Self(self.0 | rhs.0)
80    }
81}
82
83impl std::ops::BitOrAssign for ShapeFlags {
84    fn bitor_assign(&mut self, rhs: Self) {
85        self.0 |= rhs.0;
86    }
87}
88
89/// One dispatch unit: a contiguous run of instances in the storage
90/// buffer that belong to the same shape and share `sides` + blend mode.
91#[derive(Debug, Clone, Copy)]
92pub struct CustomShapeBatch {
93    pub start_instance: u32,
94    pub instance_count: u32,
95    /// Effective sides for the draw call's vertex count: `sides * 3`
96    /// triangle-list vertices per instance. Clamped to `[3, MAX_SIDES]`.
97    pub sides: u32,
98    /// `true` → additive blend pipeline, `false` → alpha.
99    pub additive: bool,
100    /// `true` → also dispatch the LineStrip outline pass for this batch
101    /// using the same instance range.
102    pub thick_outline: bool,
103}
104
105/// MD2 caps `sides` at 100 in the preset format. We clamp to 64 here
106/// to keep the per-instance vertex count manageable (64 × 3 = 192
107/// vertices/instance × 1024 instances = ~200K verts max per shape).
108pub const MAX_SHAPE_SIDES: u32 = 64;
109
110/// Hard cap on total instances buffered per frame. 4 shapes × 1024
111/// MD2 max = 4 096; round up to 4 800 for headroom.
112pub const MAX_CUSTOM_SHAPE_INSTANCES: usize = 4_800;
113
114#[repr(C)]
115#[derive(Debug, Clone, Copy, Pod, Zeroable)]
116struct ShapeUniforms {
117    aspect_pad: [f32; 4],
118}
119
120/// GPU pass. Owns the per-frame instance storage buffer + the four
121/// pipelines (fill × alpha/additive, outline × alpha/additive). The
122/// fragment shader picks textured vs. solid via a per-instance flag bit,
123/// so we don't need a separate textured pipeline.
124pub struct CustomShapeRenderer {
125    fill_pipelines: [wgpu::RenderPipeline; 2],
126    outline_pipelines: [wgpu::RenderPipeline; 2],
127    bind_group: wgpu::BindGroup,
128    bgl: wgpu::BindGroupLayout,
129    instance_buffer: wgpu::Buffer,
130    uniform_buffer: wgpu::Buffer,
131    sampler: wgpu::Sampler,
132    instance_count: u32,
133    batches: Vec<CustomShapeBatch>,
134}
135
136#[inline]
137fn additive_index(additive: bool) -> usize {
138    additive as usize
139}
140
141impl CustomShapeRenderer {
142    pub fn new(
143        device: &wgpu::Device,
144        format: wgpu::TextureFormat,
145        prev_texture_view: &wgpu::TextureView,
146    ) -> Self {
147        let shader = crate::pipeline_helpers::load_wgsl(
148            device,
149            "Custom Shape Shader",
150            include_str!("../shaders/custom_shape.wgsl"),
151        );
152
153        // Zero-initialised storage buffer. We always upload the
154        // exact span the current frame uses, so any tail past
155        // `instance_count` reads stale data — but no batch ever
156        // points there.
157        let initial = vec![CustomShapeInstance::default(); MAX_CUSTOM_SHAPE_INSTANCES];
158        let instance_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
159            label: Some("Custom Shape Instances"),
160            contents: bytemuck::cast_slice(&initial),
161            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
162        });
163
164        let uniform_buffer = device.create_buffer(&wgpu::BufferDescriptor {
165            label: Some("Custom Shape Uniforms"),
166            size: std::mem::size_of::<ShapeUniforms>() as u64,
167            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
168            mapped_at_creation: false,
169        });
170
171        let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
172            label: Some("Custom Shape Sampler"),
173            address_mode_u: wgpu::AddressMode::Repeat,
174            address_mode_v: wgpu::AddressMode::Repeat,
175            mag_filter: wgpu::FilterMode::Linear,
176            min_filter: wgpu::FilterMode::Linear,
177            ..Default::default()
178        });
179
180        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
181            label: Some("Custom Shape BGL"),
182            entries: &[
183                wgpu::BindGroupLayoutEntry {
184                    binding: 0,
185                    visibility: wgpu::ShaderStages::VERTEX,
186                    ty: wgpu::BindingType::Buffer {
187                        ty: wgpu::BufferBindingType::Storage { read_only: true },
188                        has_dynamic_offset: false,
189                        min_binding_size: None,
190                    },
191                    count: None,
192                },
193                wgpu::BindGroupLayoutEntry {
194                    binding: 1,
195                    visibility: wgpu::ShaderStages::VERTEX_FRAGMENT,
196                    ty: wgpu::BindingType::Buffer {
197                        ty: wgpu::BufferBindingType::Uniform,
198                        has_dynamic_offset: false,
199                        min_binding_size: None,
200                    },
201                    count: None,
202                },
203                wgpu::BindGroupLayoutEntry {
204                    binding: 2,
205                    visibility: wgpu::ShaderStages::FRAGMENT,
206                    ty: wgpu::BindingType::Texture {
207                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
208                        view_dimension: wgpu::TextureViewDimension::D2,
209                        multisampled: false,
210                    },
211                    count: None,
212                },
213                wgpu::BindGroupLayoutEntry {
214                    binding: 3,
215                    visibility: wgpu::ShaderStages::FRAGMENT,
216                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
217                    count: None,
218                },
219            ],
220        });
221
222        let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
223            label: Some("Custom Shape BG"),
224            layout: &bgl,
225            entries: &[
226                wgpu::BindGroupEntry {
227                    binding: 0,
228                    resource: instance_buffer.as_entire_binding(),
229                },
230                wgpu::BindGroupEntry {
231                    binding: 1,
232                    resource: uniform_buffer.as_entire_binding(),
233                },
234                wgpu::BindGroupEntry {
235                    binding: 2,
236                    resource: wgpu::BindingResource::TextureView(prev_texture_view),
237                },
238                wgpu::BindGroupEntry {
239                    binding: 3,
240                    resource: wgpu::BindingResource::Sampler(&sampler),
241                },
242            ],
243        });
244
245        let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
246            label: Some("Custom Shape Layout"),
247            bind_group_layouts: &[Some(&bgl)],
248            immediate_size: 0,
249        });
250
251        let make_pipeline = |label: &str,
252                             topology: wgpu::PrimitiveTopology,
253                             vs_entry: &str,
254                             fs_entry: &str,
255                             additive: bool|
256         -> wgpu::RenderPipeline {
257            let blend = crate::pipeline_helpers::blend_state_for(additive);
258            device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
259                label: Some(label),
260                layout: Some(&layout),
261                vertex: wgpu::VertexState {
262                    module: &shader,
263                    entry_point: Some(vs_entry),
264                    buffers: &[],
265                    compilation_options: Default::default(),
266                },
267                fragment: Some(wgpu::FragmentState {
268                    module: &shader,
269                    entry_point: Some(fs_entry),
270                    targets: &[Some(wgpu::ColorTargetState {
271                        format,
272                        blend: Some(blend),
273                        write_mask: wgpu::ColorWrites::ALL,
274                    })],
275                    compilation_options: Default::default(),
276                }),
277                primitive: wgpu::PrimitiveState {
278                    topology,
279                    ..Default::default()
280                },
281                depth_stencil: None,
282                multisample: wgpu::MultisampleState::default(),
283                multiview_mask: None,
284                cache: None,
285            })
286        };
287
288        let fill_pipelines = [
289            make_pipeline(
290                "Shape Fill Alpha",
291                wgpu::PrimitiveTopology::TriangleList,
292                "vs_main",
293                "fs_main",
294                false,
295            ),
296            make_pipeline(
297                "Shape Fill Additive",
298                wgpu::PrimitiveTopology::TriangleList,
299                "vs_main",
300                "fs_main",
301                true,
302            ),
303        ];
304        let outline_pipelines = [
305            make_pipeline(
306                "Shape Outline Alpha",
307                wgpu::PrimitiveTopology::TriangleList,
308                "vs_outline",
309                "fs_outline",
310                false,
311            ),
312            make_pipeline(
313                "Shape Outline Additive",
314                wgpu::PrimitiveTopology::TriangleList,
315                "vs_outline",
316                "fs_outline",
317                true,
318            ),
319        ];
320
321        Self {
322            fill_pipelines,
323            outline_pipelines,
324            bind_group,
325            bgl,
326            instance_buffer,
327            uniform_buffer,
328            sampler,
329            instance_count: 0,
330            batches: Vec::new(),
331        }
332    }
333
334    /// Re-bind `prev_texture_view` after a resize / texture realloc. The
335    /// instance + uniform buffers persist, only the texture view
336    /// changes.
337    pub fn rebind_prev_texture(
338        &mut self,
339        device: &wgpu::Device,
340        prev_texture_view: &wgpu::TextureView,
341    ) {
342        self.bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
343            label: Some("Custom Shape BG (rebind)"),
344            layout: &self.bgl,
345            entries: &[
346                wgpu::BindGroupEntry {
347                    binding: 0,
348                    resource: self.instance_buffer.as_entire_binding(),
349                },
350                wgpu::BindGroupEntry {
351                    binding: 1,
352                    resource: self.uniform_buffer.as_entire_binding(),
353                },
354                wgpu::BindGroupEntry {
355                    binding: 2,
356                    resource: wgpu::BindingResource::TextureView(prev_texture_view),
357                },
358                wgpu::BindGroupEntry {
359                    binding: 3,
360                    resource: wgpu::BindingResource::Sampler(&self.sampler),
361                },
362            ],
363        });
364    }
365
366    /// Replace the per-frame instance stream + batch list. Aspect ratio
367    /// goes into the uniform buffer so the shader can correct the X
368    /// radius without each instance carrying a copy.
369    pub fn update(
370        &mut self,
371        queue: &wgpu::Queue,
372        instances: &[CustomShapeInstance],
373        batches: &[CustomShapeBatch],
374        aspect: f32,
375    ) {
376        let u = ShapeUniforms {
377            aspect_pad: [aspect.max(1e-3), 0.0, 0.0, 0.0],
378        };
379        queue.write_buffer(&self.uniform_buffer, 0, bytemuck::bytes_of(&u));
380
381        let n = instances.len().min(MAX_CUSTOM_SHAPE_INSTANCES);
382        if instances.len() > MAX_CUSTOM_SHAPE_INSTANCES {
383            log::warn!(
384                "custom-shape instance stream truncated: {} > cap {}",
385                instances.len(),
386                MAX_CUSTOM_SHAPE_INSTANCES
387            );
388        }
389        if n > 0 {
390            queue.write_buffer(
391                &self.instance_buffer,
392                0,
393                bytemuck::cast_slice(&instances[..n]),
394            );
395        }
396        self.instance_count = n as u32;
397        self.batches.clear();
398        for b in batches {
399            if b.start_instance + b.instance_count <= self.instance_count {
400                self.batches.push(*b);
401            }
402        }
403    }
404
405    /// Issue fill + (optional) outline draw calls for each batch. Loads
406    /// existing contents (no clear); meant to overlay on the warp +
407    /// wave output.
408    pub fn render(&self, encoder: &mut wgpu::CommandEncoder, view: &wgpu::TextureView) {
409        if self.batches.is_empty() || self.instance_count == 0 {
410            return;
411        }
412        let mut rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
413            label: Some("Custom Shape Pass"),
414            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
415                view,
416                depth_slice: None,
417                resolve_target: None,
418                ops: wgpu::Operations {
419                    load: wgpu::LoadOp::Load,
420                    store: wgpu::StoreOp::Store,
421                },
422            })],
423            depth_stencil_attachment: None,
424            timestamp_writes: None,
425            occlusion_query_set: None,
426            multiview_mask: None,
427        });
428        rp.set_bind_group(0, &self.bind_group, &[]);
429
430        for b in &self.batches {
431            if b.instance_count == 0 {
432                continue;
433            }
434            let sides = b.sides.clamp(3, MAX_SHAPE_SIDES);
435            let fill_pipe = &self.fill_pipelines[additive_index(b.additive)];
436            rp.set_pipeline(fill_pipe);
437            let end_instance = b.start_instance + b.instance_count;
438            rp.draw(0..(sides * 3), b.start_instance..end_instance);
439
440            if b.thick_outline {
441                let outline_pipe = &self.outline_pipelines[additive_index(b.additive)];
442                rp.set_pipeline(outline_pipe);
443                // TriangleList: 6 vertices per perimeter segment build a
444                // screen-space quad of `OUTLINE_THICKNESS` perpendicular
445                // to the segment. `sides * 6` total — `sides` segments
446                // closing the perimeter (segment `sides - 1` wraps
447                // vertex `sides - 1 → 0` via the shader's modulo).
448                rp.draw(0..(sides * 6), b.start_instance..end_instance);
449            }
450        }
451    }
452
453    pub fn instance_count(&self) -> u32 {
454        self.instance_count
455    }
456
457    pub fn batch_count(&self) -> usize {
458        self.batches.len()
459    }
460}
461
462/// Convert MD2 preset-space `(x, y)` (origin top-left, range `[0, 1]`)
463/// to clip-space `(x', y')` (origin centre, range `[-1, 1]`, Y-up).
464/// Same convention as `custom_wave::preset_xy_to_clip`.
465#[inline]
466pub fn preset_xy_to_clip(x: f64, y: f64) -> [f32; 2] {
467    [(x * 2.0 - 1.0) as f32, (1.0 - y * 2.0) as f32]
468}
469
470/// Build a `CustomShapeInstance` from MD2-space eval output. `rad` is
471/// converted from preset units (roughly [0, 1] for full-screen) to
472/// clip-space half-extent (radius 0.5 = quarter-screen-width before
473/// aspect correction).
474#[allow(clippy::too_many_arguments)]
475pub fn instance_from_md2_state(
476    x: f64,
477    y: f64,
478    rad: f64,
479    ang: f64,
480    tex_zoom: f64,
481    tex_ang: f64,
482    sides: u32,
483    color_center: [f32; 4],
484    color_edge: [f32; 4],
485    border_color: [f32; 4],
486    border_size: f32,
487    flags: ShapeFlags,
488) -> CustomShapeInstance {
489    let [cx, cy] = preset_xy_to_clip(x, y);
490    // MD2's `rad` is a screen-relative half-extent already. Map 0..1 →
491    // 0..2 clip units (full screen) without further scaling.
492    let r = (rad as f32) * 2.0;
493    CustomShapeInstance {
494        center_radius: [cx, cy, r, r],
495        angle_zoom_ang_sides: [ang as f32, tex_zoom as f32, tex_ang as f32, sides as f32],
496        color_center,
497        color_edge,
498        border_color,
499        border_flags: [border_size, flags.pack(), 0.0, 0.0],
500    }
501}
502
503#[cfg(test)]
504mod tests {
505    use super::*;
506
507    #[test]
508    fn preset_xy_to_clip_matches_wave_convention() {
509        // Same mapping as custom_wave::preset_xy_to_clip — both modules
510        // own a copy so the renderer can be sliced apart later, but
511        // they must agree on the convention.
512        assert_eq!(
513            preset_xy_to_clip(0.5, 0.5),
514            crate::custom_wave::preset_xy_to_clip(0.5, 0.5)
515        );
516        assert_eq!(
517            preset_xy_to_clip(0.0, 1.0),
518            crate::custom_wave::preset_xy_to_clip(0.0, 1.0)
519        );
520    }
521
522    #[test]
523    fn instance_from_md2_state_packs_flags() {
524        let inst = instance_from_md2_state(
525            0.5,
526            0.5,
527            0.1,
528            0.0,
529            1.0,
530            0.0,
531            6,
532            [1.0, 0.0, 0.0, 1.0],
533            [0.0, 1.0, 0.0, 1.0],
534            [1.0, 1.0, 1.0, 1.0],
535            0.0,
536            ShapeFlags::TEXTURED | ShapeFlags::ADDITIVE,
537        );
538        let flag_bits = inst.border_flags[1] as u32;
539        assert!(flag_bits & ShapeFlags::TEXTURED.bits() != 0);
540        assert!(flag_bits & ShapeFlags::ADDITIVE.bits() != 0);
541        assert!(flag_bits & ShapeFlags::THICK_OUTLINE.bits() == 0);
542        assert_eq!(inst.angle_zoom_ang_sides[3], 6.0);
543    }
544
545    #[test]
546    fn shape_flags_round_trip_through_f32() {
547        let f = ShapeFlags::THICK_OUTLINE.pack();
548        let bits = f as u32;
549        assert_eq!(bits, ShapeFlags::THICK_OUTLINE.bits());
550    }
551
552    #[test]
553    fn additive_index_table() {
554        assert_eq!(additive_index(false), 0);
555        assert_eq!(additive_index(true), 1);
556    }
557}