onedrop_renderer/
sprite_pipeline.rs

1//! Sprite (§5) GPU pipeline.
2//!
3//! Renders one textured quad per active sprite into `render_texture`,
4//! between the custom-shape pass and the borders pass. Two pipelines
5//! cover the two blend modes the engine emits ([`SpriteBlendKind`] —
6//! alpha or additive). Each sprite draw owns its own bind group
7//! recreated per frame so we can bind a different texture per draw
8//! without an atlas (sprites in the wild are few, ~handful per
9//! frame, so per-draw bind groups are cheap).
10//!
11//! The renderer drives this through [`SpritePipeline::record`], which
12//! takes a slice of [`SpriteDrawCmd`] produced by
13//! [`crate::sprite_pipeline::SpritePool::build_draw_commands`]. The
14//! pool resolves the engine-side `texture_index` to a real
15//! [`wgpu::TextureView`], applies aspect ratio correction, and packs
16//! the per-sprite uniform.
17
18use std::path::PathBuf;
19use std::sync::Arc;
20
21use crate::pipeline_helpers::{blend_state_for, load_wgsl};
22
23/// Hard cap on simultaneously active sprites. The active list is
24/// pruned every tick so this only matters during a key-mash spawn
25/// burst; 64 is well above MD2's practical limit (~5-10).
26pub const MAX_ACTIVE_SPRITES: usize = 64;
27
28/// Which of the two GPU pipelines to use for a given sprite. Mirrors
29/// `onedrop_engine::SpriteBlendMode`; kept independent so the
30/// renderer compiles without an engine-side dep cycle.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum SpriteBlendKind {
33    Alpha,
34    Additive,
35}
36
37/// Per-sprite uniform pushed to the GPU each draw. 48 bytes; the
38/// pipeline allocates `MAX_ACTIVE_SPRITES * 256 B` total uniform
39/// storage so each draw can dynamic-offset into its own slot.
40#[repr(C)]
41#[derive(Debug, Clone, Copy, Default)]
42pub struct SpriteUniform {
43    /// `(center_x, center_y, size_x, size_y)` in mixed units —
44    /// centre is `[0, 1]` screen coords (top-left origin), size is
45    /// half-width / half-height in clip-space units (the shader
46    /// adds and subtracts them around the centre).
47    center_size: [f32; 4],
48    /// `(rot, _, _, _)` — three slots reserved for `flipx/flipy/_`.
49    rot_pad: [f32; 4],
50    /// Tint (`r, g, b, a`), premultiplied by `a` host-side.
51    rgba: [f32; 4],
52}
53
54// SAFETY: `SpriteUniform` is `#[repr(C)]` with only POD members.
55unsafe impl bytemuck::Pod for SpriteUniform {}
56unsafe impl bytemuck::Zeroable for SpriteUniform {}
57
58/// One per-frame sprite the engine asks the renderer to draw. POD —
59/// no GPU resources, just numbers. The renderer resolves the
60/// `texture_index` through its [`SpritePool`] and builds the
61/// [`SpriteDrawCmd`] internally each frame.
62#[derive(Debug, Clone, Copy)]
63pub struct SpriteFrame {
64    pub texture_index: u32,
65    pub x: f32,
66    pub y: f32,
67    pub sx: f32,
68    pub sy: f32,
69    pub rot: f32,
70    pub rgba: [f32; 4],
71    pub blend: SpriteBlendKind,
72    pub burn: bool,
73}
74
75/// One sprite ready to draw. The renderer assembles these from the
76/// engine's [`SpriteFrame`] list + the sprite [`SpritePool`].
77pub struct SpriteDrawCmd<'a> {
78    pub uniform: SpriteUniform,
79    pub texture_view: &'a wgpu::TextureView,
80    pub blend: SpriteBlendKind,
81    /// Burn-pending: caller treats it identically (one draw); the
82    /// flag is forwarded for future fence/persistence hooks (none
83    /// today — the quad already lands in `render_texture` which is
84    /// the feedback target, so "burning into the background" is
85    /// implicit).
86    pub burn: bool,
87}
88
89/// One GPU-resident sprite texture.
90pub struct SpriteTexture {
91    pub texture: wgpu::Texture,
92    pub view: wgpu::TextureView,
93    pub width: u32,
94    pub height: u32,
95    /// Name the engine resolves against — typically the canonical
96    /// stem of the source filename.
97    pub name: String,
98}
99
100/// Sprite texture pool: one texture per sprite def, in load order.
101/// Engine-side `SpriteRenderInstance::texture_index` indexes into
102/// `textures` directly. Missing / undecodable files get a 1×1
103/// transparent fallback so the bind group always populates.
104pub struct SpritePool {
105    textures: Vec<Arc<SpriteTexture>>,
106    fallback: Arc<SpriteTexture>,
107    sampler: wgpu::Sampler,
108}
109
110impl SpritePool {
111    /// Build an empty pool with just the transparent fallback. Used
112    /// by tests and as the renderer default when no `MILK_IMG.INI`
113    /// has been loaded yet.
114    pub fn new(device: &wgpu::Device, queue: &wgpu::Queue) -> Self {
115        let fallback = Arc::new(make_fallback(device, queue));
116        let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
117            label: Some("Sprite Sampler"),
118            address_mode_u: wgpu::AddressMode::ClampToEdge,
119            address_mode_v: wgpu::AddressMode::ClampToEdge,
120            mag_filter: wgpu::FilterMode::Linear,
121            min_filter: wgpu::FilterMode::Linear,
122            ..Default::default()
123        });
124        Self {
125            textures: Vec::new(),
126            fallback,
127            sampler,
128        }
129    }
130
131    /// Borrow the GPU sampler for this pool. The sprite pipeline binds
132    /// it identically across every draw.
133    pub fn sampler(&self) -> &wgpu::Sampler {
134        &self.sampler
135    }
136
137    /// Number of loaded sprite textures.
138    pub fn len(&self) -> usize {
139        self.textures.len()
140    }
141
142    pub fn is_empty(&self) -> bool {
143        self.textures.is_empty()
144    }
145
146    /// Resolve a sprite by index. Returns the fallback for any
147    /// out-of-range or missing entry — keeps the per-frame draw path
148    /// branchless on the renderer side.
149    pub fn get_or_fallback(&self, idx: u32) -> &Arc<SpriteTexture> {
150        self.textures.get(idx as usize).unwrap_or(&self.fallback)
151    }
152
153    /// Reload the pool from a directory of image files, indexed by
154    /// the engine's `SpriteDef` list. Missing files / decode failures
155    /// emit a single warn log and substitute the fallback so the
156    /// engine's `texture_index` mapping stays dense.
157    ///
158    /// `def_imgs` is the parallel list of `img=` filenames from the
159    /// `MILK_IMG.INI`; `dir` is the directory those filenames resolve
160    /// against (e.g. `~/.local/share/onedrop/sprites`).
161    pub fn load_from_defs(
162        &mut self,
163        device: &wgpu::Device,
164        queue: &wgpu::Queue,
165        dir: &std::path::Path,
166        def_imgs: &[String],
167    ) {
168        self.textures.clear();
169        for img in def_imgs {
170            let path = dir.join(img);
171            match load_sprite_file(device, queue, &path) {
172                Ok(tex) => self.textures.push(Arc::new(tex)),
173                Err(e) => {
174                    log::warn!(
175                        "sprite texture {} unloadable ({}); using fallback",
176                        path.display(),
177                        e
178                    );
179                    self.textures.push(Arc::clone(&self.fallback));
180                }
181            }
182        }
183    }
184}
185
186/// Default search paths for sprite assets. The engine extends this
187/// with config-driven dirs.
188pub fn default_sprite_dirs() -> Vec<PathBuf> {
189    let mut out = Vec::new();
190    if let Some(home) = std::env::var_os("HOME") {
191        let home = PathBuf::from(home);
192        out.push(home.join(".local/share/onedrop/sprites"));
193        out.push(home.join(".config/onedrop/sprites"));
194        out.push(home.join("Music/milkdrop/sprites"));
195    }
196    if let Some(xdg) = std::env::var_os("XDG_DATA_HOME") {
197        out.push(PathBuf::from(xdg).join("onedrop/sprites"));
198    }
199    out
200}
201
202/// Pick the first existing directory from a search-path list.
203pub fn pick_first_existing(dirs: &[PathBuf]) -> Option<PathBuf> {
204    dirs.iter().find(|p| p.is_dir()).cloned()
205}
206
207/// Sprite GPU pipeline. Owns two `RenderPipeline`s (alpha + additive)
208/// and a per-slot uniform buffer; the renderer constructs / drives it
209/// once per chain.
210pub struct SpritePipeline {
211    pipeline_alpha: wgpu::RenderPipeline,
212    pipeline_additive: wgpu::RenderPipeline,
213    bgl: wgpu::BindGroupLayout,
214    /// `MAX_ACTIVE_SPRITES * UNIFORM_STRIDE` bytes; each draw writes
215    /// at its slot's offset. UNIFORM_STRIDE is rounded up to the
216    /// device's min uniform-buffer offset alignment so dynamic
217    /// offsets land on a valid boundary.
218    uniform_buffer: wgpu::Buffer,
219    uniform_stride: u64,
220}
221
222impl SpritePipeline {
223    pub fn new(device: &wgpu::Device, format: wgpu::TextureFormat) -> Self {
224        let shader = load_wgsl(
225            device,
226            "Sprite Shader",
227            include_str!("../shaders/sprite.wgsl"),
228        );
229
230        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
231            label: Some("Sprite BGL"),
232            entries: &[
233                wgpu::BindGroupLayoutEntry {
234                    binding: 0,
235                    visibility: wgpu::ShaderStages::VERTEX_FRAGMENT,
236                    ty: wgpu::BindingType::Buffer {
237                        ty: wgpu::BufferBindingType::Uniform,
238                        has_dynamic_offset: true,
239                        min_binding_size: wgpu::BufferSize::new(
240                            std::mem::size_of::<SpriteUniform>() as u64,
241                        ),
242                    },
243                    count: None,
244                },
245                wgpu::BindGroupLayoutEntry {
246                    binding: 1,
247                    visibility: wgpu::ShaderStages::FRAGMENT,
248                    ty: wgpu::BindingType::Texture {
249                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
250                        view_dimension: wgpu::TextureViewDimension::D2,
251                        multisampled: false,
252                    },
253                    count: None,
254                },
255                wgpu::BindGroupLayoutEntry {
256                    binding: 2,
257                    visibility: wgpu::ShaderStages::FRAGMENT,
258                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
259                    count: None,
260                },
261            ],
262        });
263
264        let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
265            label: Some("Sprite Layout"),
266            bind_group_layouts: &[Some(&bgl)],
267            immediate_size: 0,
268        });
269
270        let make_pipeline = |label: &str, additive: bool| -> wgpu::RenderPipeline {
271            let blend = blend_state_for(additive);
272            device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
273                label: Some(label),
274                layout: Some(&layout),
275                vertex: wgpu::VertexState {
276                    module: &shader,
277                    entry_point: Some("vs_main"),
278                    buffers: &[],
279                    compilation_options: Default::default(),
280                },
281                fragment: Some(wgpu::FragmentState {
282                    module: &shader,
283                    entry_point: Some("fs_main"),
284                    targets: &[Some(wgpu::ColorTargetState {
285                        format,
286                        blend: Some(blend),
287                        write_mask: wgpu::ColorWrites::ALL,
288                    })],
289                    compilation_options: Default::default(),
290                }),
291                primitive: wgpu::PrimitiveState {
292                    topology: wgpu::PrimitiveTopology::TriangleList,
293                    ..Default::default()
294                },
295                depth_stencil: None,
296                multisample: wgpu::MultisampleState::default(),
297                multiview_mask: None,
298                cache: None,
299            })
300        };
301
302        let pipeline_alpha = make_pipeline("Sprite Pipeline (alpha)", false);
303        let pipeline_additive = make_pipeline("Sprite Pipeline (additive)", true);
304
305        // Round each slot to the device's min uniform-buffer alignment
306        // (typically 256 bytes; we hard-code the conservative number
307        // because querying `Limits` from the pipeline is awkward).
308        let uniform_stride = 256u64;
309        let uniform_buffer = device.create_buffer(&wgpu::BufferDescriptor {
310            label: Some("Sprite Uniforms"),
311            size: uniform_stride * MAX_ACTIVE_SPRITES as u64,
312            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
313            mapped_at_creation: false,
314        });
315
316        Self {
317            pipeline_alpha,
318            pipeline_additive,
319            bgl,
320            uniform_buffer,
321            uniform_stride,
322        }
323    }
324
325    /// Record one sprite-pass draw set into the encoder. `cmds`
326    /// must be `<= MAX_ACTIVE_SPRITES`; longer slices are truncated
327    /// to keep the per-frame allocation budget predictable.
328    pub fn record(
329        &self,
330        encoder: &mut wgpu::CommandEncoder,
331        queue: &wgpu::Queue,
332        device: &wgpu::Device,
333        target: &wgpu::TextureView,
334        sampler: &wgpu::Sampler,
335        cmds: &[SpriteDrawCmd<'_>],
336    ) {
337        if cmds.is_empty() {
338            return;
339        }
340        let n = cmds.len().min(MAX_ACTIVE_SPRITES);
341        // Stage uniforms into one contiguous write so the GPU only
342        // sees one CPU→GPU copy. Stride = 256, so the per-slot data
343        // occupies the first `sizeof(SpriteUniform)` bytes and the
344        // padding stays zero.
345        let mut staging = vec![0u8; self.uniform_stride as usize * n];
346        for (i, cmd) in cmds.iter().take(n).enumerate() {
347            let start = i * self.uniform_stride as usize;
348            let bytes = bytemuck::bytes_of(&cmd.uniform);
349            staging[start..start + bytes.len()].copy_from_slice(bytes);
350        }
351        queue.write_buffer(&self.uniform_buffer, 0, &staging);
352
353        // Build a separate bind group per draw — different textures
354        // mean we can't share. Allocations are scoped to this frame
355        // (the bind groups drop when `cmds` goes out of scope), and
356        // wgpu's BG creation is cheap.
357        let mut bind_groups: Vec<wgpu::BindGroup> = Vec::with_capacity(n);
358        for cmd in cmds.iter().take(n) {
359            bind_groups.push(
360                device.create_bind_group(&wgpu::BindGroupDescriptor {
361                    label: Some("Sprite BG (per-draw)"),
362                    layout: &self.bgl,
363                    entries: &[
364                        wgpu::BindGroupEntry {
365                            binding: 0,
366                            resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
367                                buffer: &self.uniform_buffer,
368                                offset: 0,
369                                size: wgpu::BufferSize::new(
370                                    std::mem::size_of::<SpriteUniform>() as u64
371                                ),
372                            }),
373                        },
374                        wgpu::BindGroupEntry {
375                            binding: 1,
376                            resource: wgpu::BindingResource::TextureView(cmd.texture_view),
377                        },
378                        wgpu::BindGroupEntry {
379                            binding: 2,
380                            resource: wgpu::BindingResource::Sampler(sampler),
381                        },
382                    ],
383                }),
384            );
385        }
386
387        let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
388            label: Some("Sprite Pass"),
389            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
390                view: target,
391                resolve_target: None,
392                ops: wgpu::Operations {
393                    load: wgpu::LoadOp::Load,
394                    store: wgpu::StoreOp::Store,
395                },
396                depth_slice: None,
397            })],
398            depth_stencil_attachment: None,
399            timestamp_writes: None,
400            occlusion_query_set: None,
401            multiview_mask: None,
402        });
403
404        for (i, cmd) in cmds.iter().take(n).enumerate() {
405            let pipeline = match cmd.blend {
406                SpriteBlendKind::Alpha => &self.pipeline_alpha,
407                SpriteBlendKind::Additive => &self.pipeline_additive,
408            };
409            pass.set_pipeline(pipeline);
410            pass.set_bind_group(
411                0,
412                Some(&bind_groups[i]),
413                &[i as u32 * self.uniform_stride as u32],
414            );
415            pass.draw(0..6, 0..1);
416        }
417    }
418}
419
420/// Build a [`SpriteUniform`] from raw engine-side numbers + texture
421/// dimensions + render aspect ratio. Pulled out so the renderer's
422/// `update_sprites` step can produce a single contiguous list of
423/// uniforms for upload + the matching draw-cmd list without
424/// recomputing aspect ratios per call.
425#[allow(clippy::too_many_arguments)]
426pub fn build_sprite_uniform(
427    x: f32,
428    y: f32,
429    sx: f32,
430    sy: f32,
431    rot: f32,
432    rgba: [f32; 4],
433    texture_w: u32,
434    texture_h: u32,
435    render_w: u32,
436    render_h: u32,
437) -> SpriteUniform {
438    // MD2 sizes: `sx = sy = 1` should make the sprite fill ~1/3 of
439    // the shorter render axis, matching the historical default. We
440    // pick `0.33` as the half-extent (so a full sprite occupies
441    // 0.66 of the axis); preset-side scales scale around that.
442    let base = 0.33;
443    let tex_aspect = (texture_w.max(1) as f32) / (texture_h.max(1) as f32);
444    let render_aspect = (render_w.max(1) as f32) / (render_h.max(1) as f32);
445    // Quad half-size in clip space, baked aspect-correct.
446    let size_x = sx * base * tex_aspect / render_aspect;
447    let size_y = sy * base;
448    SpriteUniform {
449        center_size: [x, y, size_x, size_y],
450        rot_pad: [rot, 0.0, 0.0, 0.0],
451        rgba,
452    }
453}
454
455fn make_fallback(device: &wgpu::Device, queue: &wgpu::Queue) -> SpriteTexture {
456    upload_sprite_rgba8(device, queue, "__fallback_transparent", 1, 1, &[0, 0, 0, 0])
457}
458
459fn load_sprite_file(
460    device: &wgpu::Device,
461    queue: &wgpu::Queue,
462    path: &std::path::Path,
463) -> Result<SpriteTexture, image::ImageError> {
464    let img = image::open(path)?;
465    let rgba = img.to_rgba8();
466    let (w, h) = rgba.dimensions();
467    let name = path
468        .file_stem()
469        .and_then(|s| s.to_str())
470        .map(|s| s.to_ascii_lowercase())
471        .unwrap_or_default();
472    Ok(upload_sprite_rgba8(device, queue, &name, w, h, &rgba))
473}
474
475fn upload_sprite_rgba8(
476    device: &wgpu::Device,
477    queue: &wgpu::Queue,
478    name: &str,
479    width: u32,
480    height: u32,
481    bytes: &[u8],
482) -> SpriteTexture {
483    let texture = device.create_texture(&wgpu::TextureDescriptor {
484        label: Some(&format!("Sprite {name}")),
485        size: wgpu::Extent3d {
486            width,
487            height,
488            depth_or_array_layers: 1,
489        },
490        mip_level_count: 1,
491        sample_count: 1,
492        dimension: wgpu::TextureDimension::D2,
493        format: wgpu::TextureFormat::Rgba8Unorm,
494        usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
495        view_formats: &[],
496    });
497    queue.write_texture(
498        wgpu::TexelCopyTextureInfo {
499            texture: &texture,
500            mip_level: 0,
501            origin: wgpu::Origin3d::ZERO,
502            aspect: wgpu::TextureAspect::All,
503        },
504        bytes,
505        wgpu::TexelCopyBufferLayout {
506            offset: 0,
507            bytes_per_row: Some(width * 4),
508            rows_per_image: Some(height),
509        },
510        wgpu::Extent3d {
511            width,
512            height,
513            depth_or_array_layers: 1,
514        },
515    );
516    let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
517    SpriteTexture {
518        texture,
519        view,
520        width,
521        height,
522        name: name.to_string(),
523    }
524}
525
526#[cfg(test)]
527mod tests {
528    use super::*;
529
530    #[test]
531    fn build_uniform_packs_aspect_correctly() {
532        // 100×50 sprite (2:1) on a 1000×500 render (2:1) at sx=sy=1
533        // should land with size_x == size_y * (2/1) / (2/1) == size_y.
534        let u = build_sprite_uniform(0.5, 0.5, 1.0, 1.0, 0.0, [1.0; 4], 100, 50, 1000, 500);
535        assert!((u.center_size[2] - u.center_size[3]).abs() < 1e-6);
536    }
537
538    #[test]
539    fn build_uniform_handles_zero_dims() {
540        // Degenerate texture/render dims must not blow up `f32::INFINITY`.
541        let u = build_sprite_uniform(0.5, 0.5, 1.0, 1.0, 0.0, [1.0; 4], 0, 0, 0, 0);
542        assert!(u.center_size[2].is_finite());
543        assert!(u.center_size[3].is_finite());
544    }
545
546    #[test]
547    fn build_uniform_writes_rot_to_first_lane() {
548        let u = build_sprite_uniform(0.5, 0.5, 1.0, 1.0, 0.7, [1.0; 4], 100, 100, 100, 100);
549        assert!((u.rot_pad[0] - 0.7).abs() < 1e-6);
550    }
551}