From 1937105ec37952047bb6f0baa4ae8f251c5abf6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Wed, 11 Oct 2023 16:39:47 +0200 Subject: [PATCH] Get the `fine` CPU shader running --- shader/writeback.wgsl | 25 +++ src/cpu_dispatch.rs | 25 ++- src/cpu_shader/fine.rs | 29 +++- src/cpu_shader/mod.rs | 1 + src/engine.rs | 22 ++- src/lib.rs | 14 +- src/render.rs | 25 ++- src/shaders.rs | 33 ++-- src/wgpu_engine.rs | 386 +++++++++++++++++++++++++++++------------ 9 files changed, 415 insertions(+), 145 deletions(-) create mode 100644 shader/writeback.wgsl diff --git a/shader/writeback.wgsl b/shader/writeback.wgsl new file mode 100644 index 00000000..8a61bcf2 --- /dev/null +++ b/shader/writeback.wgsl @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// Writes an array to a texture. + +#import config + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var source: array; + +@group(0) @binding(2) +var output: texture_storage_2d; + +@compute @workgroup_size(1) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + let row = global_id.y * config.target_width; + let pixel = source[row + global_id.x]; + textureStore(output, vec2(global_id.xy), unpack4x8unorm(pixel)); +} diff --git a/src/cpu_dispatch.rs b/src/cpu_dispatch.rs index 01c28c31..44b776ff 100644 --- a/src/cpu_dispatch.rs +++ b/src/cpu_dispatch.rs @@ -10,12 +10,14 @@ use std::{ use bytemuck::Pod; +use crate::ImageProxy; + #[derive(Clone, Copy)] pub enum CpuBinding<'a> { Buffer(&'a [u8]), BufferRW(&'a RefCell>), - #[allow(unused)] - Texture(&'a CpuTexture), + Texture(&'a [u8]), + TextureRW(&'a RefCell), } pub enum TypedBufGuard<'a, T: ?Sized> { @@ -109,7 +111,14 @@ impl<'a> CpuBinding<'a> { #[allow(unused)] pub fn as_tex(&self) -> &CpuTexture { match self { - CpuBinding::Texture(t) => t, + CpuBinding::Texture(t) => todo!(), + _ => panic!("resource type mismatch"), + } + } + + pub fn as_tex_mut(&self) -> RefMut { + match self { + CpuBinding::TextureRW(t) => t.borrow_mut(), _ => panic!("resource type mismatch"), } } @@ -122,3 +131,13 @@ pub struct CpuTexture { // In RGBA format. May expand in the future. pub pixels: Vec, } + +impl CpuTexture { + pub fn new(img: &ImageProxy) -> Self { + CpuTexture { + width: img.width as usize, + height: img.height as usize, + pixels: vec![0; img.width as usize * img.height as usize], + } + } +} diff --git a/src/cpu_shader/fine.rs b/src/cpu_shader/fine.rs index c64c8762..8267e5ea 100644 --- a/src/cpu_shader/fine.rs +++ b/src/cpu_shader/fine.rs @@ -1,9 +1,9 @@ // Copyright 2023 The Vello authors // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense -use vello_encoding::{ConfigUniform, PathSegment, Tile}; +use vello_encoding::{ConfigUniform, PathSegment}; -use crate::cpu_dispatch::CpuTexture; +use crate::cpu_dispatch::{CpuBinding, CpuTexture}; use super::{CMD_COLOR, CMD_END, CMD_FILL, CMD_JUMP, CMD_SOLID, PTCL_INITIAL_ALLOC}; @@ -111,7 +111,6 @@ fn fill_path(area: &mut [f32], segments: &[PathSegment], fill: &CmdFill, x_tile: #[allow(unused)] fn fine_main( config: &ConfigUniform, - tiles: &[Tile], segments: &[PathSegment], output: &mut CpuTexture, ptcl: &[u32], @@ -154,7 +153,7 @@ fn fine_main( for a in &mut area { *a = 1.0; } - cmd_ix += 2; + cmd_ix += 1; } CMD_COLOR => { let color = read_color(ptcl, cmd_ix); @@ -177,12 +176,28 @@ fn fine_main( } // Write tile (in rgba) for y in 0..TILE_HEIGHT { - let base = - output.width * (tile_y as usize * TILE_HEIGHT + y) + tile_x as usize * TILE_WIDTH; + let base = config.target_width as usize * (tile_y as usize * TILE_HEIGHT + y) + + tile_x as usize * TILE_WIDTH; for x in 0..TILE_WIDTH { let rgba32 = pack4x8unorm(rgba[y * TILE_WIDTH + x]); - output.pixels[base + x] = rgba32; + // TODO: Fix out of bounds + //output.pixels[base + x] = rgba32; + if let Some(p) = output.pixels.get_mut(base + x) { + *p = rgba32; + } } } } } + +pub fn fine(_n_wg: u32, resources: &[CpuBinding]) { + let config = resources[0].as_typed(); + let segments = resources[1].as_slice(); + let ptcl = resources[2].as_slice(); + let info = resources[3].as_slice(); + let mut output = resources[4].as_tex_mut(); + //let gradients = resources[4].as_tex(); + //let image_atlas = resources[5].as_tex(); + + fine_main(&config, &segments, &mut output, &ptcl, &info); +} diff --git a/src/cpu_shader/mod.rs b/src/cpu_shader/mod.rs index 16d261f6..9bb19946 100644 --- a/src/cpu_shader/mod.rs +++ b/src/cpu_shader/mod.rs @@ -34,6 +34,7 @@ pub use clip_reduce::clip_reduce; pub use coarse::coarse; pub use draw_leaf::draw_leaf; pub use draw_reduce::draw_reduce; +pub use fine::fine; pub use flatten::flatten; pub use path_count::path_count; pub use path_count_setup::path_count_setup; diff --git a/src/engine.rs b/src/engine.rs index a122e353..d108c8dd 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -48,12 +48,20 @@ pub enum ImageFormat { Bgra8, } +#[derive(Clone, Copy, PartialEq)] +pub enum ImageAccess { + Read, + Full, + WriteOnce, +} + #[derive(Clone, Copy)] pub struct ImageProxy { pub width: u32, pub height: u32, pub format: ImageFormat, pub id: Id, + pub access: ImageAccess, } #[derive(Clone, Copy)] @@ -73,6 +81,11 @@ pub enum Command { Dispatch(ShaderId, (u32, u32, u32), Vec), DispatchIndirect(ShaderId, BufProxy, u64, Vec), Download(BufProxy), + Writeback { + image: ImageProxy, + shader: ShaderId, + config: ResourceProxy, + }, Clear(BufProxy, u64, Option), FreeBuf(BufProxy), FreeImage(ImageProxy), @@ -121,7 +134,7 @@ impl Recording { data: impl Into>, ) -> ImageProxy { let data = data.into(); - let image_proxy = ImageProxy::new(width, height, format); + let image_proxy = ImageProxy::new(width, height, format, ImageAccess::Read); self.push(Command::UploadImage(image_proxy, data)); image_proxy } @@ -219,13 +232,14 @@ impl ImageFormat { } impl ImageProxy { - pub fn new(width: u32, height: u32, format: ImageFormat) -> Self { + pub fn new(width: u32, height: u32, format: ImageFormat, access: ImageAccess) -> Self { let id = Id::next(); ImageProxy { width, height, format, id, + access, } } } @@ -235,8 +249,8 @@ impl ResourceProxy { Self::Buf(BufProxy::new(size, name)) } - pub fn new_image(width: u32, height: u32, format: ImageFormat) -> Self { - Self::Image(ImageProxy::new(width, height, format)) + pub fn new_image(width: u32, height: u32, format: ImageFormat, access: ImageAccess) -> Self { + Self::Image(ImageProxy::new(width, height, format, access)) } pub fn as_buf(&self) -> Option<&BufProxy> { diff --git a/src/lib.rs b/src/lib.rs index 053f30af..2f14124b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,7 +47,7 @@ pub use engine::{ }; pub use shaders::FullShaders; #[cfg(feature = "wgpu")] -use wgpu_engine::{ExternalResource, WgpuEngine}; +use wgpu_engine::{ExternalResource, TransientBindMap, WgpuEngine}; /// Temporary export, used in with_winit for stats pub use vello_encoding::BumpAllocators; @@ -73,7 +73,7 @@ enum AaConfig { /// Configuration of antialiasing. Currently this is static, but could be switched to /// a launch option or even finer-grained. -const ANTIALIASING: AaConfig = AaConfig::Msaa16; +const ANTIALIASING: AaConfig = AaConfig::Area; /// Renders a scene into a texture or surface. #[cfg(feature = "wgpu")] @@ -154,12 +154,13 @@ impl Renderer { *target.as_image().unwrap(), texture, )]; + let mut transient_map = TransientBindMap::new(&external_resources); self.engine.run_recording( device, queue, &recording, - &external_resources, "render_to_texture", + &mut transient_map, #[cfg(feature = "wgpu-profiler")] &mut self.profiler, )?; @@ -274,12 +275,14 @@ impl Renderer { let recording = render.render_encoding_coarse(encoding, &self.shaders, params, robust); let target = render.out_image(); let bump_buf = render.bump_buf(); + let external_resources = [ExternalResource::Image(target, texture)]; + let mut transient_map = TransientBindMap::new(&external_resources); self.engine.run_recording( device, queue, &recording, - &[], "t_async_coarse", + &mut transient_map, #[cfg(feature = "wgpu-profiler")] &mut self.profiler, )?; @@ -303,13 +306,12 @@ impl Renderer { // Maybe clear to reuse allocation? let mut recording = Recording::default(); render.record_fine(&self.shaders, &mut recording); - let external_resources = [ExternalResource::Image(target, texture)]; self.engine.run_recording( device, queue, &recording, - &external_resources, "t_async_fine", + &mut transient_map, #[cfg(feature = "wgpu-profiler")] &mut self.profiler, )?; diff --git a/src/render.rs b/src/render.rs index 0bb65795..f299a227 100644 --- a/src/render.rs +++ b/src/render.rs @@ -1,7 +1,7 @@ //! Take an encoded scene and create a graph to render it use crate::{ - engine::{BufProxy, ImageFormat, ImageProxy, Recording, ResourceProxy}, + engine::{BufProxy, ImageAccess, ImageFormat, ImageProxy, Recording, ResourceProxy}, shaders::FullShaders, AaConfig, RenderParams, Scene, ANTIALIASING, }; @@ -85,7 +85,7 @@ impl Render { let mut packed = vec![]; let (layout, ramps, images) = resolver.resolve(encoding, &mut packed); let gradient_image = if ramps.height == 0 { - ResourceProxy::new_image(1, 1, ImageFormat::Rgba8) + ResourceProxy::new_image(1, 1, ImageFormat::Rgba8, ImageAccess::Full) } else { let data: &[u8] = bytemuck::cast_slice(ramps.data); ResourceProxy::Image(recording.upload_image( @@ -96,9 +96,14 @@ impl Render { )) }; let image_atlas = if images.images.is_empty() { - ImageProxy::new(1, 1, ImageFormat::Rgba8) + ImageProxy::new(1, 1, ImageFormat::Rgba8, ImageAccess::Full) } else { - ImageProxy::new(images.width, images.height, ImageFormat::Rgba8) + ImageProxy::new( + images.width, + images.height, + ImageFormat::Rgba8, + ImageAccess::Full, + ) }; for image in images.images { recording.write_image( @@ -390,7 +395,12 @@ impl Render { recording.free_resource(draw_monoid_buf); recording.free_resource(bin_header_buf); recording.free_resource(path_buf); - let out_image = ImageProxy::new(params.width, params.height, ImageFormat::Rgba8); + let out_image = ImageProxy::new( + params.width, + params.height, + ImageFormat::Rgba8, + ImageAccess::WriteOnce, + ); self.fine_wg_count = Some(wg_counts.fine); self.fine_resources = Some(FineResources { config_buf, @@ -456,6 +466,11 @@ impl Render { ); } } + recording.push(crate::Command::Writeback { + image: fine.out_image, + config: fine.config_buf, + shader: shaders.writeback, + }); recording.free_resource(fine.config_buf); recording.free_resource(fine.tile_buf); recording.free_resource(fine.segments_buf); diff --git a/src/shaders.rs b/src/shaders.rs index de23e077..9d1777d5 100644 --- a/src/shaders.rs +++ b/src/shaders.rs @@ -18,19 +18,18 @@ mod preprocess; -use std::collections::HashSet; - +use crate::engine::ShaderId; #[cfg(feature = "wgpu")] -use wgpu::Device; - -use crate::{ - cpu_shader, - engine::{BindType, Error, ImageFormat, ShaderId}, +use { + crate::wgpu_engine::WgpuEngine, + crate::{ + cpu_shader, + engine::{BindType, Error, ImageFormat}, + }, + std::collections::HashSet, + wgpu::Device, }; -#[cfg(feature = "wgpu")] -use crate::wgpu_engine::WgpuEngine; - macro_rules! shader { ($name:expr) => {&{ let shader = include_str!(concat!( @@ -79,6 +78,7 @@ pub struct FullShaders { pub path_tiling_setup: ShaderId, pub path_tiling: ShaderId, pub fine: ShaderId, + pub writeback: ShaderId, // 2-level dispatch works for CPU pathtag scan even for large // inputs, 3-level is not yet implemented. pub pathtag_is_cpu: bool, @@ -114,12 +114,16 @@ pub fn full_shaders(device: &Device, engine: &mut WgpuEngine) -> Result = None; // Uncomment this to force use of GPU shaders from the specified shader and later even // if `engine.use_cpu` is specified. //let force_gpu_from = Some("binning"); + // Use the GPU for the fine shader for now as the CPU shader is incomplete. + let force_gpu_from = Some("fine"); + macro_rules! add_shader { ($name:ident, $bindings:expr, $defines:expr, $cpu:expr) => {{ if force_gpu_from == Some(stringify!($name)) { @@ -282,7 +286,7 @@ pub fn full_shaders(device: &Device, engine: &mut WgpuEngine) -> Result add_shader!( fine, @@ -300,6 +304,12 @@ pub fn full_shaders(device: &Device, engine: &mut WgpuEngine) -> Result Result), +} + #[derive(Default)] struct BindMap { buf_map: HashMap, - image_map: HashMap, + image_map: HashMap, pending_clears: HashSet, } @@ -111,10 +117,9 @@ struct ResourcePool { /// `run_recording()`, including external resources and also buffer /// uploads. #[derive(Default)] -struct TransientBindMap<'a> { +pub struct TransientBindMap<'a> { bufs: HashMap>, - // TODO: create transient image type - images: HashMap, + images: HashMap>, } enum TransientBuf<'a> { @@ -122,6 +127,15 @@ enum TransientBuf<'a> { Gpu(&'a Buffer), } +enum TransientImage<'a> { + Cpu(&'a [u8]), + CpuOwned { + data: RefCell, + view: &'a TextureView, + }, + Gpu(&'a TextureView), +} + impl WgpuEngine { pub fn new(use_cpu: bool) -> WgpuEngine { Self { @@ -251,18 +265,17 @@ impl WgpuEngine { }) } - pub fn run_recording( + pub fn run_recording<'a>( &mut self, device: &Device, queue: &Queue, - recording: &Recording, - external_resources: &[ExternalResource], + recording: &'a Recording, label: &'static str, + transient_map: &mut TransientBindMap<'a>, #[cfg(feature = "wgpu-profiler")] profiler: &mut wgpu_profiler::GpuProfiler, ) -> Result<(), Error> { let mut free_bufs: HashSet = Default::default(); let mut free_images: HashSet = Default::default(); - let mut transient_map = TransientBindMap::new(external_resources); let mut encoder = device.create_command_encoder(&CommandEncoderDescriptor { label: Some(label) }); @@ -297,6 +310,9 @@ impl WgpuEngine { self.bind_map.insert_buf(buf_proxy, buf); } Command::UploadImage(image_proxy, bytes) => { + transient_map + .images + .insert(image_proxy.id, TransientImage::Cpu(bytes)); let format = image_proxy.format.to_wgpu(); let block_size = format .block_size(None) @@ -347,31 +363,95 @@ impl WgpuEngine { self.bind_map .insert_image(image_proxy.id, texture, texture_view) } - Command::WriteImage(proxy, [x, y, width, height], data) => { - if let Ok((texture, _)) = self.bind_map.get_or_create_image(*proxy, device) { - let format = proxy.format.to_wgpu(); - let block_size = format - .block_size(None) - .expect("ImageFormat must have a valid block size"); - queue.write_texture( - wgpu::ImageCopyTexture { - texture, - mip_level: 0, - origin: wgpu::Origin3d { x: *x, y: *y, z: 0 }, - aspect: TextureAspect::All, - }, - &data[..], - wgpu::ImageDataLayout { - offset: 0, - bytes_per_row: Some(*width * block_size), - rows_per_image: None, + Command::Writeback { + image, + shader, + config, + } => { + transient_map.prepare_proxy( + &mut self.bind_map, + &mut self.pool, + device, + queue, + &mut encoder, + config, + ); + + if let Some(TransientImage::CpuOwned { data, view }) = + transient_map.images.get(&image.id) + { + let data = &*data.borrow(); + + let wgpu_shader = + if let ShaderKind::Wgpu(shader) = self.shaders[shader.0].select() { + shader + } else { + panic!("expected GPU shader") + }; + let buf = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: data.pixels.len() as u64 * 4, + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + queue.write_buffer(&buf, 0, bytemuck::cast_slice(&data.pixels)); + + let entries = [ + transient_map.create_bind_entry(&self.bind_map, 0, config), + wgpu::BindGroupEntry { + binding: 1, + resource: buf.as_entire_binding(), }, - wgpu::Extent3d { - width: *width, - height: *height, - depth_or_array_layers: 1, + wgpu::BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::TextureView(view), }, - ); + ]; + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &wgpu_shader.bind_group_layout, + entries: &entries, + }); + let mut cpass = encoder.begin_compute_pass(&Default::default()); + #[cfg(feature = "wgpu-profiler")] + profiler.begin_scope("writeback", &mut cpass, device); + cpass.set_pipeline(&wgpu_shader.pipeline); + cpass.set_bind_group(0, &bind_group, &[]); + cpass.dispatch_workgroups(data.width as u32, data.height as u32, 1); + #[cfg(feature = "wgpu-profiler")] + profiler.end_scope(&mut cpass); + } + } + Command::WriteImage(proxy, [x, y, width, height], data) => { + if let Ok(image) = self.bind_map.get_or_create_image(*proxy, device) { + match image { + MaterializedImage::Gpu((texture, _)) => { + let format = proxy.format.to_wgpu(); + let block_size = format + .block_size(None) + .expect("ImageFormat must have a valid block size"); + queue.write_texture( + wgpu::ImageCopyTexture { + texture, + mip_level: 0, + origin: wgpu::Origin3d { x: *x, y: *y, z: 0 }, + aspect: TextureAspect::All, + }, + &data[..], + wgpu::ImageDataLayout { + offset: 0, + bytes_per_row: Some(*width * block_size), + rows_per_image: None, + }, + wgpu::Extent3d { + width: *width, + height: *height, + depth_or_array_layers: 1, + }, + ); + } + MaterializedImage::Cpu(..) => todo!(), + } } } Command::Dispatch(shader_id, wg_size, bindings) => { @@ -512,10 +592,9 @@ impl WgpuEngine { } } for id in free_images { - if let Some((texture, view)) = self.bind_map.image_map.remove(&id) { + if let Some(image) = self.bind_map.image_map.remove(&id) { // TODO: have a pool to avoid needless re-allocation - drop(texture); - drop(view); + drop(image); } } Ok(()) @@ -549,6 +628,14 @@ impl BindMap { }) } + /// Get a image, only if it's on GPU. + fn get_gpu_image(&self, id: Id) -> Option<&(Texture, TextureView)> { + self.image_map.get(&id).and_then(|b| match &b { + MaterializedImage::Gpu(b) => Some(b), + _ => None, + }) + } + /// Get a CPU buffer. /// /// Panics if buffer is not present or is on GPU. @@ -559,6 +646,16 @@ impl BindMap { } } + /// Get a CPU image. + /// + /// Panics if image is not present or is on GPU. + fn get_cpu_image(&self, id: Id) -> CpuBinding { + match &self.image_map[&id] { + MaterializedImage::Cpu(b) => CpuBinding::TextureRW(b), + _ => panic!("getting cpu image, but it's on gpu"), + } + } + fn materialize_cpu_buf(&mut self, buf: &BufProxy) { self.buf_map.entry(buf.id).or_insert_with(|| { let buffer = MaterializedBuffer::Cpu(RefCell::new(vec![0; buf.size as usize])); @@ -570,8 +667,15 @@ impl BindMap { }); } + fn materialize_cpu_image(&mut self, img: &ImageProxy) { + self.image_map + .entry(img.id) + .or_insert_with(|| MaterializedImage::Cpu(RefCell::new(CpuTexture::new(img)))); + } + fn insert_image(&mut self, id: Id, image: Texture, image_view: TextureView) { - self.image_map.insert(id, (image, image_view)); + self.image_map + .insert(id, MaterializedImage::Gpu((image, image_view))); } fn get_buf(&mut self, proxy: BufProxy) -> Option<&BindMapBuffer> { @@ -582,7 +686,7 @@ impl BindMap { &mut self, proxy: ImageProxy, device: &Device, - ) -> Result<&(Texture, TextureView), Error> { + ) -> Result<&MaterializedImage, Error> { match self.image_map.entry(proxy.id) { Entry::Occupied(occupied) => Ok(occupied.into_mut()), Entry::Vacant(vacant) => { @@ -611,7 +715,7 @@ impl BindMap { array_layer_count: None, format: Some(proxy.format.to_wgpu()), }); - Ok(vacant.insert((texture, texture_view))) + Ok(vacant.insert(MaterializedImage::Gpu((texture, texture_view)))) } } } @@ -688,9 +792,28 @@ impl BindMapBuffer { } } +impl MaterializedImage { + // Upload a image from CPU to GPU if needed. + // + // Note data flow is one way only, from CPU to GPU. Once this method is + // called, the image is no longer materialized on CPU, and cannot be + // accessed from a CPU shader. + fn upload_if_needed( + &mut self, + _proxy: &ImageProxy, + _device: &Device, + _queue: &Queue, + _pool: &mut ResourcePool, + ) { + if let MaterializedImage::Cpu(_cpu_buf) = &self { + todo!() + } + } +} + impl<'a> TransientBindMap<'a> { /// Create new transient bind map, seeded from external resources - fn new(external_resources: &'a [ExternalResource]) -> Self { + pub fn new(external_resources: &'a [ExternalResource]) -> Self { let mut bufs = HashMap::default(); let mut images = HashMap::default(); for resource in external_resources { @@ -698,8 +821,8 @@ impl<'a> TransientBindMap<'a> { ExternalResource::Buf(proxy, gpu_buf) => { bufs.insert(proxy.id, TransientBuf::Gpu(gpu_buf)); } - ExternalResource::Image(proxy, gpu_image) => { - images.insert(proxy.id, *gpu_image); + ExternalResource::Image(proxy, view) => { + images.insert(proxy.id, TransientImage::Gpu(view)); } } } @@ -721,49 +844,47 @@ impl<'a> TransientBindMap<'a> { } } - #[allow(clippy::too_many_arguments)] - fn create_bind_group( + fn prepare_proxy( &mut self, bind_map: &mut BindMap, pool: &mut ResourcePool, device: &Device, queue: &Queue, encoder: &mut CommandEncoder, - layout: &BindGroupLayout, - bindings: &[ResourceProxy], - ) -> Result { - for proxy in bindings { - match proxy { - ResourceProxy::Buf(proxy) => { - if self.bufs.contains_key(&proxy.id) { - continue; - } - match bind_map.buf_map.entry(proxy.id) { - Entry::Vacant(v) => { - // TODO: only some buffers will need indirect, but does it hurt? - let usage = BufferUsages::COPY_SRC - | BufferUsages::COPY_DST - | BufferUsages::STORAGE - | BufferUsages::INDIRECT; - let buf = pool.get_buf(proxy.size, proxy.name, usage, device); - if bind_map.pending_clears.remove(&proxy.id) { - encoder.clear_buffer(&buf, 0, None); - } - v.insert(BindMapBuffer { - buffer: MaterializedBuffer::Gpu(buf), - label: proxy.name, - }); - } - Entry::Occupied(mut o) => { - o.get_mut().upload_if_needed(proxy, device, queue, pool) + proxy: &ResourceProxy, + ) { + match proxy { + ResourceProxy::Buf(proxy) => { + if self.bufs.contains_key(&proxy.id) { + return; + } + match bind_map.buf_map.entry(proxy.id) { + Entry::Vacant(v) => { + // TODO: only some buffers will need indirect, but does it hurt? + let usage = BufferUsages::COPY_SRC + | BufferUsages::COPY_DST + | BufferUsages::STORAGE + | BufferUsages::INDIRECT; + let buf = pool.get_buf(proxy.size, proxy.name, usage, device); + if bind_map.pending_clears.remove(&proxy.id) { + encoder.clear_buffer(&buf, 0, None); } + v.insert(BindMapBuffer { + buffer: MaterializedBuffer::Gpu(buf), + label: proxy.name, + }); } - } - ResourceProxy::Image(proxy) => { - if self.images.contains_key(&proxy.id) { - continue; + Entry::Occupied(mut o) => { + o.get_mut().upload_if_needed(proxy, device, queue, pool) } - if let Entry::Vacant(v) = bind_map.image_map.entry(proxy.id) { + } + } + ResourceProxy::Image(proxy) => { + if self.images.contains_key(&proxy.id) { + return; + } + match bind_map.image_map.entry(proxy.id) { + Entry::Vacant(v) => { let format = proxy.format.to_wgpu(); let texture = device.create_texture(&wgpu::TextureDescriptor { label: None, @@ -789,39 +910,67 @@ impl<'a> TransientBindMap<'a> { array_layer_count: None, format: Some(proxy.format.to_wgpu()), }); - v.insert((texture, texture_view)); + v.insert(MaterializedImage::Gpu((texture, texture_view))); + } + Entry::Occupied(mut o) => { + o.get_mut().upload_if_needed(proxy, device, queue, pool) } } } } - let entries = bindings - .iter() - .enumerate() - .map(|(i, proxy)| match proxy { - ResourceProxy::Buf(proxy) => { - let buf = match self.bufs.get(&proxy.id) { - Some(TransientBuf::Gpu(b)) => b, - _ => bind_map.get_gpu_buf(proxy.id).unwrap(), - }; - Ok(wgpu::BindGroupEntry { - binding: i as u32, - resource: buf.as_entire_binding(), - }) + } + + fn create_bind_entry<'b>( + &'b self, + bind_map: &'b BindMap, + binding: u32, + proxy: &ResourceProxy, + ) -> wgpu::BindGroupEntry<'b> { + match proxy { + ResourceProxy::Buf(proxy) => { + let buf = match self.bufs.get(&proxy.id) { + Some(TransientBuf::Gpu(b)) => b, + _ => bind_map.get_gpu_buf(proxy.id).unwrap(), + }; + wgpu::BindGroupEntry { + binding, + resource: buf.as_entire_binding(), } - ResourceProxy::Image(proxy) => { - let view = self - .images - .get(&proxy.id) - .copied() - .or_else(|| bind_map.image_map.get(&proxy.id).map(|v| &v.1)) - .unwrap(); - Ok(wgpu::BindGroupEntry { - binding: i as u32, - resource: wgpu::BindingResource::TextureView(view), - }) + } + ResourceProxy::Image(proxy) => { + let view = match self.images.get(&proxy.id) { + Some(TransientImage::Gpu(view)) => *view, + _ => &bind_map.get_gpu_image(proxy.id).unwrap().1, + }; + wgpu::BindGroupEntry { + binding, + resource: wgpu::BindingResource::TextureView(view), } - }) - .collect::, Error>>()?; + } + } + } + + #[allow(clippy::too_many_arguments)] + fn create_bind_group( + &mut self, + bind_map: &mut BindMap, + pool: &mut ResourcePool, + device: &Device, + queue: &Queue, + encoder: &mut CommandEncoder, + layout: &BindGroupLayout, + bindings: &[ResourceProxy], + ) -> Result { + for proxy in bindings { + self.prepare_proxy(bind_map, pool, device, queue, encoder, proxy); + } + + let bind_map = &mut *bind_map; + let entries: Vec<_> = bindings + .iter() + .enumerate() + .map(|(i, proxy)| self.create_bind_entry(bind_map, i as u32, proxy)) + .collect(); let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { label: None, layout, @@ -830,11 +979,11 @@ impl<'a> TransientBindMap<'a> { Ok(bind_group) } - fn create_cpu_resources( - &self, - bind_map: &'a mut BindMap, + fn create_cpu_resources<'b>( + &'b mut self, + bind_map: &'b mut BindMap, bindings: &[ResourceProxy], - ) -> Vec { + ) -> Vec> { // First pass is mutable; create buffers as needed for resource in bindings { match resource { @@ -843,7 +992,22 @@ impl<'a> TransientBindMap<'a> { Some(TransientBuf::Gpu(_)) => panic!("buffer was already materialized on GPU"), _ => bind_map.materialize_cpu_buf(buf), }, - ResourceProxy::Image(_) => todo!(), + ResourceProxy::Image(img) => match self.images.get(&img.id) { + Some(TransientImage::Cpu(..) | TransientImage::CpuOwned { .. }) => (), + Some(TransientImage::Gpu(view)) => { + if img.access == ImageAccess::WriteOnce { + let id = img.id; + let image = TransientImage::CpuOwned { + data: RefCell::new(CpuTexture::new(img)), + view, + }; + self.images.insert(id, image); + } else { + panic!("image was already materialized on GPU") + } + } + _ => bind_map.materialize_cpu_image(img), + }, }; } // Second pass takes immutable references @@ -854,7 +1018,11 @@ impl<'a> TransientBindMap<'a> { Some(TransientBuf::Cpu(b)) => CpuBinding::Buffer(b), _ => bind_map.get_cpu_buf(buf.id), }, - ResourceProxy::Image(_) => todo!(), + ResourceProxy::Image(img) => match self.images.get(&img.id) { + Some(TransientImage::Cpu(b)) => CpuBinding::Texture(b), + Some(TransientImage::CpuOwned { data, .. }) => CpuBinding::TextureRW(data), + _ => bind_map.get_cpu_image(img.id), + }, }) .collect() }