lvp: port ray intersection routines from nir_builder to C
The lavapipe ray intersection routines are pretty hairy and it's easy to get lost in the nir_builder when trying to understand (let alone modify) the math. This MR rewrites them in C, using vtn_bindgen2 to let us build drop-in replacements for the existing nir_builder routines.
uint2
lvp_build_intersect_ray_box(global struct lvp_bvh_box_node *node,
float ray_tmax, float3 origin, float3 dir,
float3 inv_dir)
{
float2 distances = INFINITY;
uint2 child_indices = 0xffffffffu;
inv_dir = (dir == 0) ? FLT_MAX : inv_dir;
for (int i = 0; i < 2; i++) {
float3 bound_min = unpack_lvp_vec3(node->bounds[i].min);
float3 bound_max = unpack_lvp_vec3(node->bounds[i].max);
float3 bound0 = (bound_min - origin) * inv_dir;
float3 bound1 = (bound_max - origin) * inv_dir;
float3 bmin = min(bound0, bound1);
float3 bmax = max(bound0, bound1);
float tmin = MAX3(bmin.x, bmin.y, bmin.z);
float tmax = MIN3(bmax.x, bmax.y, bmax.z);
/* If x of the aabb min is NaN, then this is an inactive aabb.
* We don't need to care about any other components being NaN as that is
* UB.
* https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR
*/
if (!isnan(bound_min.x) && tmax >= max(0.0f, tmin) && tmin < ray_tmax) {
child_indices[i] = node->children[i];
distances[i] = tmin;
}
}
return (distances.y < distances.x) ? child_indices.yx : child_indices.xy;
}
Look at how legible that is!
compare:
static nir_def *
lvp_build_intersect_ray_box(nir_builder *b, nir_def *node_addr, nir_def *ray_tmax,
nir_def *origin, nir_def *dir, nir_def *inv_dir)
{
const struct glsl_type *vec2_type = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
const struct glsl_type *uvec2_type = glsl_vector_type(GLSL_TYPE_UINT, 2);
nir_variable *distances =
nir_variable_create(b->shader, nir_var_shader_temp, vec2_type, "distances");
nir_store_var(b, distances, nir_imm_vec2(b, INFINITY, INFINITY), 0xf);
nir_variable *child_indices =
nir_variable_create(b->shader, nir_var_shader_temp, uvec2_type, "child_indices");
nir_store_var(b, child_indices, nir_imm_ivec2(b, 0xffffffffu, 0xffffffffu), 0xf);
inv_dir = nir_bcsel(b, nir_feq_imm(b, dir, 0), nir_imm_float(b, FLT_MAX), inv_dir);
for (int i = 0; i < 2; i++) {
const uint32_t child_offset = offsetof(struct lvp_bvh_box_node, children[i]);
const uint32_t coord_offsets[2] = {
offsetof(struct lvp_bvh_box_node, bounds[i].min.x),
offsetof(struct lvp_bvh_box_node, bounds[i].max.x),
};
nir_def *child_index =
nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset));
nir_def *node_coords[2] = {
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0])),
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1])),
};
/* If x of the aabb min is NaN, then this is an inactive aabb.
* We don't need to care about any other components being NaN as that is UB.
* https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR
*/
nir_def *min_x = nir_channel(b, node_coords[0], 0);
nir_def *min_x_is_not_nan =
nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */
nir_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir);
nir_def *bound1 = nir_fmul(b, nir_fsub(b, node_coords[1], origin), inv_dir);
nir_def *tmin =
nir_fmax(b,
nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
nir_def *tmax =
nir_fmin(b,
nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
nir_push_if(b,
nir_iand(b, min_x_is_not_nan,
nir_iand(b, nir_fge(b, tmax, nir_fmax(b, nir_imm_float(b, 0.0f), tmin)),
nir_flt(b, tmin, ray_tmax))));
{
nir_def *new_child_indices[2] = {child_index, child_index};
nir_store_var(b, child_indices, nir_vec(b, new_child_indices, 2), 1u << i);
nir_def *new_distances[2] = {tmin, tmin};
nir_store_var(b, distances, nir_vec(b, new_distances, 2), 1u << i);
}
nir_pop_if(b, NULL);
}
nir_def *ssa_distances = nir_load_var(b, distances);
nir_def *ssa_indices = nir_load_var(b, child_indices);
nir_push_if(b, nir_flt(b, nir_channel(b, ssa_distances, 1), nir_channel(b, ssa_distances, 0)));
{
nir_store_var(b, child_indices,
nir_vec2(b, nir_channel(b, ssa_indices, 1), nir_channel(b, ssa_indices, 0)),
0b11);
}
nir_pop_if(b, NULL);
return nir_load_var(b, child_indices);
}