Skip to content

Commit

Permalink
Get the fine CPU shader running
Browse files Browse the repository at this point in the history
  • Loading branch information
Zoxc committed Nov 1, 2023
1 parent ddca7c5 commit 0747e52
Show file tree
Hide file tree
Showing 9 changed files with 405 additions and 133 deletions.
25 changes: 25 additions & 0 deletions shader/writeback.wgsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense

// Writes an array to a texture.

#import config

@group(0) @binding(0)
var<uniform> config: Config;

@group(0) @binding(1)
var<storage> source: array<u32>;

@group(0) @binding(2)
var output: texture_storage_2d<rgba8unorm, write>;

@compute @workgroup_size(1)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
let row = global_id.y * config.target_width;
let pixel = source[row + global_id.x];
textureStore(output, vec2<i32>(global_id.xy), unpack4x8unorm(pixel));
}
25 changes: 22 additions & 3 deletions src/cpu_dispatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ use std::{

use bytemuck::Pod;

use crate::ImageProxy;

#[derive(Clone, Copy)]
pub enum CpuBinding<'a> {
Buffer(&'a [u8]),
BufferRW(&'a RefCell<Vec<u8>>),
#[allow(unused)]
Texture(&'a CpuTexture),
Texture(&'a [u8]),
TextureRW(&'a RefCell<CpuTexture>),
}

pub enum TypedBufGuard<'a, T: ?Sized> {
Expand Down Expand Up @@ -109,7 +111,14 @@ impl<'a> CpuBinding<'a> {
#[allow(unused)]
pub fn as_tex(&self) -> &CpuTexture {
match self {
CpuBinding::Texture(t) => t,
CpuBinding::Texture(t) => todo!(),
_ => panic!("resource type mismatch"),
}
}

pub fn as_tex_mut(&self) -> RefMut<CpuTexture> {
match self {
CpuBinding::TextureRW(t) => t.borrow_mut(),
_ => panic!("resource type mismatch"),
}
}
Expand All @@ -122,3 +131,13 @@ pub struct CpuTexture {
// In RGBA format. May expand in the future.
pub pixels: Vec<u32>,
}

impl CpuTexture {
pub fn new(img: &ImageProxy) -> Self {
CpuTexture {
width: img.width as usize,
height: img.height as usize,
pixels: vec![0; img.width as usize * img.height as usize],
}
}
}
29 changes: 22 additions & 7 deletions src/cpu_shader/fine.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// Copyright 2023 The Vello authors
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense

use vello_encoding::{ConfigUniform, PathSegment, Tile};
use vello_encoding::{ConfigUniform, PathSegment};

use crate::cpu_dispatch::CpuTexture;
use crate::cpu_dispatch::{CpuBinding, CpuTexture};

use super::{CMD_COLOR, CMD_END, CMD_FILL, CMD_JUMP, CMD_SOLID, PTCL_INITIAL_ALLOC};

Expand Down Expand Up @@ -111,7 +111,6 @@ fn fill_path(area: &mut [f32], segments: &[PathSegment], fill: &CmdFill, x_tile:
#[allow(unused)]
fn fine_main(
config: &ConfigUniform,
tiles: &[Tile],
segments: &[PathSegment],
output: &mut CpuTexture,
ptcl: &[u32],
Expand Down Expand Up @@ -154,7 +153,7 @@ fn fine_main(
for a in &mut area {
*a = 1.0;
}
cmd_ix += 2;
cmd_ix += 1;
}
CMD_COLOR => {
let color = read_color(ptcl, cmd_ix);
Expand All @@ -177,12 +176,28 @@ fn fine_main(
}
// Write tile (in rgba)
for y in 0..TILE_HEIGHT {
let base =
output.width * (tile_y as usize * TILE_HEIGHT + y) + tile_x as usize * TILE_WIDTH;
let base = config.target_width as usize * (tile_y as usize * TILE_HEIGHT + y)
+ tile_x as usize * TILE_WIDTH;
for x in 0..TILE_WIDTH {
let rgba32 = pack4x8unorm(rgba[y * TILE_WIDTH + x]);
output.pixels[base + x] = rgba32;
// TODO: Fix out of bounds
//output.pixels[base + x] = rgba32;
if let Some(p) = output.pixels.get_mut(base + x) {
*p = rgba32;
}
}
}
}
}

pub fn fine(_n_wg: u32, resources: &[CpuBinding]) {
let config = resources[0].as_typed();
let segments = resources[1].as_slice();
let ptcl = resources[2].as_slice();
let info = resources[3].as_slice();
let mut output = resources[4].as_tex_mut();
//let gradients = resources[4].as_tex();
//let image_atlas = resources[5].as_tex();

fine_main(&config, &segments, &mut output, &ptcl, &info);
}
1 change: 1 addition & 0 deletions src/cpu_shader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pub use clip_reduce::clip_reduce;
pub use coarse::coarse;
pub use draw_leaf::draw_leaf;
pub use draw_reduce::draw_reduce;
pub use fine::fine;
pub use flatten::flatten;
pub use path_count::path_count;
pub use path_count_setup::path_count_setup;
Expand Down
22 changes: 18 additions & 4 deletions src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,20 @@ pub enum ImageFormat {
Bgra8,
}

#[derive(Clone, Copy, PartialEq)]
pub enum ImageAccess {
Read,
Full,
WriteOnce,
}

#[derive(Clone, Copy)]
pub struct ImageProxy {
pub width: u32,
pub height: u32,
pub format: ImageFormat,
pub id: Id,
pub access: ImageAccess,
}

#[derive(Clone, Copy)]
Expand All @@ -73,6 +81,11 @@ pub enum Command {
Dispatch(ShaderId, (u32, u32, u32), Vec<ResourceProxy>),
DispatchIndirect(ShaderId, BufProxy, u64, Vec<ResourceProxy>),
Download(BufProxy),
Writeback {
image: ImageProxy,
shader: ShaderId,
config: ResourceProxy,
},
Clear(BufProxy, u64, Option<NonZeroU64>),
FreeBuf(BufProxy),
FreeImage(ImageProxy),
Expand Down Expand Up @@ -121,7 +134,7 @@ impl Recording {
data: impl Into<Vec<u8>>,
) -> ImageProxy {
let data = data.into();
let image_proxy = ImageProxy::new(width, height, format);
let image_proxy = ImageProxy::new(width, height, format, ImageAccess::Read);
self.push(Command::UploadImage(image_proxy, data));
image_proxy
}
Expand Down Expand Up @@ -219,13 +232,14 @@ impl ImageFormat {
}

impl ImageProxy {
pub fn new(width: u32, height: u32, format: ImageFormat) -> Self {
pub fn new(width: u32, height: u32, format: ImageFormat, access: ImageAccess) -> Self {
let id = Id::next();
ImageProxy {
width,
height,
format,
id,
access,
}
}
}
Expand All @@ -235,8 +249,8 @@ impl ResourceProxy {
Self::Buf(BufProxy::new(size, name))
}

pub fn new_image(width: u32, height: u32, format: ImageFormat) -> Self {
Self::Image(ImageProxy::new(width, height, format))
pub fn new_image(width: u32, height: u32, format: ImageFormat, access: ImageAccess) -> Self {
Self::Image(ImageProxy::new(width, height, format, access))
}

pub fn as_buf(&self) -> Option<&BufProxy> {
Expand Down
11 changes: 7 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pub use engine::{
BufProxy, Command, Id, ImageFormat, ImageProxy, Recording, ResourceProxy, ShaderId,
};
pub use shaders::FullShaders;
use wgpu_engine::TransientBindMap;
#[cfg(feature = "wgpu")]
use wgpu_engine::{ExternalResource, WgpuEngine};

Expand Down Expand Up @@ -154,12 +155,13 @@ impl Renderer {
*target.as_image().unwrap(),
texture,
)];
let mut transient_map = TransientBindMap::new(&external_resources);
self.engine.run_recording(
device,
queue,
&recording,
&external_resources,
"render_to_texture",
&mut transient_map,
#[cfg(feature = "wgpu-profiler")]
&mut self.profiler,
)?;
Expand Down Expand Up @@ -274,12 +276,14 @@ impl Renderer {
let recording = render.render_encoding_coarse(encoding, &self.shaders, params, robust);
let target = render.out_image();
let bump_buf = render.bump_buf();
let external_resources = [ExternalResource::Image(target, texture)];
let mut transient_map = TransientBindMap::new(&external_resources);
self.engine.run_recording(
device,
queue,
&recording,
&[],
"t_async_coarse",
&mut transient_map,
#[cfg(feature = "wgpu-profiler")]
&mut self.profiler,
)?;
Expand All @@ -303,13 +307,12 @@ impl Renderer {
// Maybe clear to reuse allocation?
let mut recording = Recording::default();
render.record_fine(&self.shaders, &mut recording);
let external_resources = [ExternalResource::Image(target, texture)];
self.engine.run_recording(
device,
queue,
&recording,
&external_resources,
"t_async_fine",
&mut transient_map,
#[cfg(feature = "wgpu-profiler")]
&mut self.profiler,
)?;
Expand Down
25 changes: 20 additions & 5 deletions src/render.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Take an encoded scene and create a graph to render it

use crate::{
engine::{BufProxy, ImageFormat, ImageProxy, Recording, ResourceProxy},
engine::{BufProxy, ImageAccess, ImageFormat, ImageProxy, Recording, ResourceProxy},
shaders::FullShaders,
AaConfig, RenderParams, Scene, ANTIALIASING,
};
Expand Down Expand Up @@ -85,7 +85,7 @@ impl Render {
let mut packed = vec![];
let (layout, ramps, images) = resolver.resolve(encoding, &mut packed);
let gradient_image = if ramps.height == 0 {
ResourceProxy::new_image(1, 1, ImageFormat::Rgba8)
ResourceProxy::new_image(1, 1, ImageFormat::Rgba8, ImageAccess::Full)
} else {
let data: &[u8] = bytemuck::cast_slice(ramps.data);
ResourceProxy::Image(recording.upload_image(
Expand All @@ -96,9 +96,14 @@ impl Render {
))
};
let image_atlas = if images.images.is_empty() {
ImageProxy::new(1, 1, ImageFormat::Rgba8)
ImageProxy::new(1, 1, ImageFormat::Rgba8, ImageAccess::Full)
} else {
ImageProxy::new(images.width, images.height, ImageFormat::Rgba8)
ImageProxy::new(
images.width,
images.height,
ImageFormat::Rgba8,
ImageAccess::Full,
)
};
for image in images.images {
recording.write_image(
Expand Down Expand Up @@ -390,7 +395,12 @@ impl Render {
recording.free_resource(draw_monoid_buf);
recording.free_resource(bin_header_buf);
recording.free_resource(path_buf);
let out_image = ImageProxy::new(params.width, params.height, ImageFormat::Rgba8);
let out_image = ImageProxy::new(
params.width,
params.height,
ImageFormat::Rgba8,
ImageAccess::WriteOnce,
);
self.fine_wg_count = Some(wg_counts.fine);
self.fine_resources = Some(FineResources {
config_buf,
Expand Down Expand Up @@ -456,6 +466,11 @@ impl Render {
);
}
}
recording.push(crate::Command::Writeback {
image: fine.out_image,
config: fine.config_buf,
shader: shaders.writeback,
});
recording.free_resource(fine.config_buf);
recording.free_resource(fine.tile_buf);
recording.free_resource(fine.segments_buf);
Expand Down
14 changes: 13 additions & 1 deletion src/shaders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ pub struct FullShaders {
pub path_tiling_setup: ShaderId,
pub path_tiling: ShaderId,
pub fine: ShaderId,
pub writeback: ShaderId,
// 2-level dispatch works for CPU pathtag scan even for large
// inputs, 3-level is not yet implemented.
pub pathtag_is_cpu: bool,
Expand Down Expand Up @@ -114,12 +115,16 @@ pub fn full_shaders(device: &Device, engine: &mut WgpuEngine) -> Result<FullShad

let mut force_gpu = false;

#[allow(unused_variables)]
let force_gpu_from: Option<&str> = None;

// Uncomment this to force use of GPU shaders from the specified shader and later even
// if `engine.use_cpu` is specified.
//let force_gpu_from = Some("binning");

// Use the GPU for the fine shader for now as the CPU shader is incomplete.
let force_gpu_from = Some("fine");

macro_rules! add_shader {
($name:ident, $bindings:expr, $defines:expr, $cpu:expr) => {{
if force_gpu_from == Some(stringify!($name)) {
Expand Down Expand Up @@ -282,7 +287,7 @@ pub fn full_shaders(device: &Device, engine: &mut WgpuEngine) -> Result<FullShad
ImageRead(ImageFormat::Rgba8),
],
&full_config,
CpuShaderType::Missing
CpuShaderType::Present(cpu_shader::fine)
),
_ => add_shader!(
fine,
Expand All @@ -300,6 +305,12 @@ pub fn full_shaders(device: &Device, engine: &mut WgpuEngine) -> Result<FullShad
CpuShaderType::Missing
),
};
let writeback = add_shader!(
writeback,
[Uniform, BufReadOnly, Image(ImageFormat::Rgba8)],
&empty,
CpuShaderType::Missing
);
Ok(FullShaders {
pathtag_reduce,
pathtag_reduce2,
Expand All @@ -321,6 +332,7 @@ pub fn full_shaders(device: &Device, engine: &mut WgpuEngine) -> Result<FullShad
path_tiling_setup,
path_tiling,
fine,
writeback,
pathtag_is_cpu: engine.use_cpu,
})
}
Expand Down
Loading

0 comments on commit 0747e52

Please sign in to comment.