Skip to content

Commit 2223f6e

Browse files
authored
Meshlet fix software rasterization (#16049)
# Objective 1. Prevent weird glitches with stray pixels scattered around the scene ![image](https://github.com/user-attachments/assets/f12adb38-5996-4dc7-bea6-bd326b7317e1) 2. Prevent weird glitchy full-screen triangles that pop-up and destroy perf (SW rasterizing huge triangles is slow) ![image](https://github.com/user-attachments/assets/d3705427-13a5-47bc-a54b-756f0409da0b) ## Solution 1. Use floating point math in the SW rasterizer bounding box calculation to handle negative verticss, and add backface culling 2. Force hardware raster for clusters that clip the near plane, and let the hardware rasterizer handle the clipping I also adjusted the SW rasterizer threshold to < 64 pixels (little bit better perf in my test scene, but still need to do a more comprehensive test), and enabled backface culling for the hardware raster pipeline. ## Testing - Did you test these changes? If so, how? - Yes, on an example scene. Issues no longer occur. - Are there any parts that need more testing? - No. - How can other people (reviewers) test your changes? Is there anything specific they need to know? - Run the meshlet example.
1 parent fe4f44b commit 2223f6e

File tree

3 files changed

+42
-39
lines changed

3 files changed

+42
-39
lines changed

crates/bevy_pbr/src/meshlet/cull_clusters.wgsl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,13 @@ fn cull_clusters(
126126
aabb_width_pixels = (aabb.z - aabb.x) * view.viewport.z;
127127
aabb_height_pixels = (aabb.w - aabb.y) * view.viewport.w;
128128
#endif
129-
let cluster_is_small = all(vec2(aabb_width_pixels, aabb_height_pixels) < vec2(32.0)); // TODO: Nanite does something different. Come up with my own heuristic.
129+
let cluster_is_small = all(vec2(aabb_width_pixels, aabb_height_pixels) < vec2(64.0));
130+
131+
// Let the hardware rasterizer handle near-plane clipping
132+
let not_intersects_near_plane = dot(view.frustum[4u], culling_bounding_sphere_center) > culling_bounding_sphere_radius;
130133

131-
// TODO: Also check if needs depth clipping
132134
var buffer_slot: u32;
133-
if cluster_is_small {
135+
if cluster_is_small && not_intersects_near_plane {
134136
// Append this cluster to the list for software rasterization
135137
buffer_slot = atomicAdd(&meshlet_software_raster_indirect_args.x, 1u);
136138
} else {

crates/bevy_pbr/src/meshlet/pipelines.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ impl FromWorld for MeshletPipelines {
249249
topology: PrimitiveTopology::TriangleList,
250250
strip_index_format: None,
251251
front_face: FrontFace::Ccw,
252-
cull_mode: None,
252+
cull_mode: Some(Face::Back),
253253
unclipped_depth: false,
254254
polygon_mode: PolygonMode::Fill,
255255
conservative: false,
@@ -292,7 +292,7 @@ impl FromWorld for MeshletPipelines {
292292
topology: PrimitiveTopology::TriangleList,
293293
strip_index_format: None,
294294
front_face: FrontFace::Ccw,
295-
cull_mode: None,
295+
cull_mode: Some(Face::Back),
296296
unclipped_depth: false,
297297
polygon_mode: PolygonMode::Fill,
298298
conservative: false,
@@ -336,7 +336,7 @@ impl FromWorld for MeshletPipelines {
336336
topology: PrimitiveTopology::TriangleList,
337337
strip_index_format: None,
338338
front_face: FrontFace::Ccw,
339-
cull_mode: None,
339+
cull_mode: Some(Face::Back),
340340
unclipped_depth: false,
341341
polygon_mode: PolygonMode::Fill,
342342
conservative: false,

crates/bevy_pbr/src/meshlet/visibility_buffer_software_raster.wgsl

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
/// Compute shader for rasterizing small clusters into a visibility buffer.
2222

23-
// TODO: Subpixel precision and top-left rule
23+
// TODO: Fixed-point math and top-left rule
2424

2525
var<workgroup> viewport_vertices: array<vec3f, 255>;
2626

@@ -79,98 +79,99 @@ fn rasterize_cluster(
7979
let vertex_2 = viewport_vertices[vertex_ids[0]];
8080
let packed_ids = (cluster_id << 7u) | triangle_id;
8181

82-
// Compute triangle bounding box
83-
let min_x = u32(min3(vertex_0.x, vertex_1.x, vertex_2.x));
84-
let min_y = u32(min3(vertex_0.y, vertex_1.y, vertex_2.y));
85-
var max_x = u32(ceil(max3(vertex_0.x, vertex_1.x, vertex_2.x)));
86-
var max_y = u32(ceil(max3(vertex_0.y, vertex_1.y, vertex_2.y)));
87-
max_x = min(max_x, u32(view.viewport.z) - 1u);
88-
max_y = min(max_y, u32(view.viewport.w) - 1u);
89-
if any(vec2(min_x, min_y) > vec2(max_x, max_y)) { return; }
82+
// Backface culling
83+
let triangle_double_area = edge_function(vertex_0.xy, vertex_1.xy, vertex_2.xy);
84+
if triangle_double_area <= 0.0 { return; }
9085

9186
// Setup triangle gradients
9287
let w_x = vec3(vertex_1.y - vertex_2.y, vertex_2.y - vertex_0.y, vertex_0.y - vertex_1.y);
9388
let w_y = vec3(vertex_2.x - vertex_1.x, vertex_0.x - vertex_2.x, vertex_1.x - vertex_0.x);
94-
let triangle_double_area = edge_function(vertex_0.xy, vertex_1.xy, vertex_2.xy); // TODO: Reuse earlier calculations and take advantage of summing to 1
9589
let vertices_z = vec3(vertex_0.z, vertex_1.z, vertex_2.z) / triangle_double_area;
9690
let z_x = dot(vertices_z, w_x);
9791
let z_y = dot(vertices_z, w_y);
9892

93+
// Compute triangle bounding box
94+
var min_x = floor(min3(vertex_0.x, vertex_1.x, vertex_2.x));
95+
var min_y = floor(min3(vertex_0.y, vertex_1.y, vertex_2.y));
96+
var max_x = ceil(max3(vertex_0.x, vertex_1.x, vertex_2.x));
97+
var max_y = ceil(max3(vertex_0.y, vertex_1.y, vertex_2.y));
98+
min_x = max(min_x, 0.0);
99+
min_y = max(min_y, 0.0);
100+
max_x = min(max_x, view.viewport.z - 1.0);
101+
max_y = min(max_y, view.viewport.w - 1.0);
102+
99103
// Setup initial triangle equations
100-
let starting_pixel = vec2(f32(min_x), f32(min_y)) + 0.5;
104+
let starting_pixel = vec2(min_x, min_y) + 0.5;
101105
var w_row = vec3(
102-
// TODO: Reuse earlier calculations and take advantage of summing to 1
103106
edge_function(vertex_1.xy, vertex_2.xy, starting_pixel),
104107
edge_function(vertex_2.xy, vertex_0.xy, starting_pixel),
105108
edge_function(vertex_0.xy, vertex_1.xy, starting_pixel),
106109
);
107110
var z_row = dot(vertices_z, w_row);
108-
let view_width = u32(view.viewport.z);
109-
var frag_coord_1d_row = min_y * view_width;
110111

111112
// Rasterize triangle
112-
if subgroupAny(max_x - min_x > 4u) {
113+
if subgroupAny(max_x - min_x > 4.0) {
113114
// Scanline setup
114115
let edge_012 = -w_x;
115116
let open_edge = edge_012 < vec3(0.0);
116117
let inverse_edge_012 = select(1.0 / edge_012, vec3(1e8), edge_012 == vec3(0.0));
117-
let max_x_diff = vec3<f32>(max_x - min_x);
118-
for (var y = min_y; y <= max_y; y++) {
118+
let max_x_diff = vec3(max_x - min_x);
119+
for (var y = min_y; y <= max_y; y += 1.0) {
119120
// Calculate start and end X interval for pixels in this row within the triangle
120121
let cross_x = w_row * inverse_edge_012;
121122
let min_x2 = select(vec3(0.0), cross_x, open_edge);
122123
let max_x2 = select(cross_x, max_x_diff, open_edge);
123-
var x0 = u32(ceil(max3(min_x2[0], min_x2[1], min_x2[2])));
124-
var x1 = u32(min3(max_x2[0], max_x2[1], max_x2[2]));
124+
var x0 = ceil(max3(min_x2[0], min_x2[1], min_x2[2]));
125+
var x1 = min3(max_x2[0], max_x2[1], max_x2[2]);
125126

126-
var w = w_row + w_x * f32(x0);
127-
var z = z_row + z_x * f32(x0);
127+
var w = w_row + w_x * x0;
128+
var z = z_row + z_x * x0;
128129
x0 += min_x;
129130
x1 += min_x;
130131

131132
// Iterate scanline X interval
132-
for (var x = x0; x <= x1; x++) {
133+
for (var x = x0; x <= x1; x += 1.0) {
133134
// Check if point at pixel is within triangle (TODO: this shouldn't be needed, but there's bugs without it)
134135
if min3(w[0], w[1], w[2]) >= 0.0 {
135-
write_visibility_buffer_pixel(frag_coord_1d_row + x, z, packed_ids);
136+
write_visibility_buffer_pixel(x, y, z, packed_ids);
136137
}
137138

138-
// Increment edge functions along the X-axis
139+
// Increment triangle equations along the X-axis
139140
w += w_x;
140141
z += z_x;
141142
}
142143

143-
// Increment edge functions along the Y-axis
144+
// Increment triangle equations along the Y-axis
144145
w_row += w_y;
145146
z_row += z_y;
146-
frag_coord_1d_row += view_width;
147147
}
148148
} else {
149149
// Iterate over every pixel in the triangle's bounding box
150-
for (var y = min_y; y <= max_y; y++) {
150+
for (var y = min_y; y <= max_y; y += 1.0) {
151151
var w = w_row;
152152
var z = z_row;
153153

154-
for (var x = min_x; x <= max_x; x++) {
154+
for (var x = min_x; x <= max_x; x += 1.0) {
155155
// Check if point at pixel is within triangle
156156
if min3(w[0], w[1], w[2]) >= 0.0 {
157-
write_visibility_buffer_pixel(frag_coord_1d_row + x, z, packed_ids);
157+
write_visibility_buffer_pixel(x, y, z, packed_ids);
158158
}
159159

160-
// Increment edge functions along the X-axis
160+
// Increment triangle equations along the X-axis
161161
w += w_x;
162162
z += z_x;
163163
}
164164

165-
// Increment edge functions along the Y-axis
165+
// Increment triangle equations along the Y-axis
166166
w_row += w_y;
167167
z_row += z_y;
168-
frag_coord_1d_row += view_width;
169168
}
170169
}
171170
}
172171

173-
fn write_visibility_buffer_pixel(frag_coord_1d: u32, z: f32, packed_ids: u32) {
172+
fn write_visibility_buffer_pixel(x: f32, y: f32, z: f32, packed_ids: u32) {
173+
let frag_coord_1d = u32(y * view.viewport.z + x);
174+
174175
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
175176
let depth = bitcast<u32>(z);
176177
let visibility = (u64(depth) << 32u) | u64(packed_ids);

0 commit comments

Comments
 (0)