Skip to content

lvp: port ray intersection routines from nir_builder to C

Alyssa Rosenzweig requested to merge alyssa/mesa:bindgen+lvp into main

The lavapipe ray intersection routines are pretty hairy and it's easy to get lost in the nir_builder when trying to understand (let alone modify) the math. This MR rewrites them in C, using vtn_bindgen2 to let us build drop-in replacements for the existing nir_builder routines.

uint2
lvp_build_intersect_ray_box(global struct lvp_bvh_box_node *node,
                            float ray_tmax, float3 origin, float3 dir,
                            float3 inv_dir)
{
   float2 distances = INFINITY;
   uint2 child_indices = 0xffffffffu;

   inv_dir = (dir == 0) ? FLT_MAX : inv_dir;

   for (int i = 0; i < 2; i++) {
      float3 bound_min = unpack_lvp_vec3(node->bounds[i].min);
      float3 bound_max = unpack_lvp_vec3(node->bounds[i].max);

      float3 bound0 = (bound_min - origin) * inv_dir;
      float3 bound1 = (bound_max - origin) * inv_dir;

      float3 bmin = min(bound0, bound1);
      float3 bmax = max(bound0, bound1);

      float tmin = MAX3(bmin.x, bmin.y, bmin.z);
      float tmax = MIN3(bmax.x, bmax.y, bmax.z);

      /* If x of the aabb min is NaN, then this is an inactive aabb.
       * We don't need to care about any other components being NaN as that is
       * UB.
       * https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR
       */
      if (!isnan(bound_min.x) && tmax >= max(0.0f, tmin) && tmin < ray_tmax) {
         child_indices[i] = node->children[i];
         distances[i] = tmin;
      }
   }

   return (distances.y < distances.x) ? child_indices.yx : child_indices.xy;
}

Look at how legible that is!

compare:

static nir_def *
lvp_build_intersect_ray_box(nir_builder *b, nir_def *node_addr, nir_def *ray_tmax,
                            nir_def *origin, nir_def *dir, nir_def *inv_dir)
{
   const struct glsl_type *vec2_type = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
   const struct glsl_type *uvec2_type = glsl_vector_type(GLSL_TYPE_UINT, 2);

   nir_variable *distances =
      nir_variable_create(b->shader, nir_var_shader_temp, vec2_type, "distances");
   nir_store_var(b, distances, nir_imm_vec2(b, INFINITY, INFINITY), 0xf);

   nir_variable *child_indices =
      nir_variable_create(b->shader, nir_var_shader_temp, uvec2_type, "child_indices");
   nir_store_var(b, child_indices, nir_imm_ivec2(b, 0xffffffffu, 0xffffffffu), 0xf);

   inv_dir = nir_bcsel(b, nir_feq_imm(b, dir, 0), nir_imm_float(b, FLT_MAX), inv_dir);

   for (int i = 0; i < 2; i++) {
      const uint32_t child_offset = offsetof(struct lvp_bvh_box_node, children[i]);
      const uint32_t coord_offsets[2] = {
         offsetof(struct lvp_bvh_box_node, bounds[i].min.x),
         offsetof(struct lvp_bvh_box_node, bounds[i].max.x),
      };

      nir_def *child_index =
         nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset));

      nir_def *node_coords[2] = {
         nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0])),
         nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1])),
      };

      /* If x of the aabb min is NaN, then this is an inactive aabb.
       * We don't need to care about any other components being NaN as that is UB.
       * https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR
       */
      nir_def *min_x = nir_channel(b, node_coords[0], 0);
      nir_def *min_x_is_not_nan =
         nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */

      nir_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir);
      nir_def *bound1 = nir_fmul(b, nir_fsub(b, node_coords[1], origin), inv_dir);

      nir_def *tmin =
         nir_fmax(b,
                  nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
                           nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
                  nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));

      nir_def *tmax =
         nir_fmin(b,
                  nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
                           nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
                  nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));

      nir_push_if(b,
                  nir_iand(b, min_x_is_not_nan,
                           nir_iand(b, nir_fge(b, tmax, nir_fmax(b, nir_imm_float(b, 0.0f), tmin)),
                                    nir_flt(b, tmin, ray_tmax))));
      {
         nir_def *new_child_indices[2] = {child_index, child_index};
         nir_store_var(b, child_indices, nir_vec(b, new_child_indices, 2), 1u << i);

         nir_def *new_distances[2] = {tmin, tmin};
         nir_store_var(b, distances, nir_vec(b, new_distances, 2), 1u << i);
      }
      nir_pop_if(b, NULL);
   }

   nir_def *ssa_distances = nir_load_var(b, distances);
   nir_def *ssa_indices = nir_load_var(b, child_indices);
   nir_push_if(b, nir_flt(b, nir_channel(b, ssa_distances, 1), nir_channel(b, ssa_distances, 0)));
   {
      nir_store_var(b, child_indices,
                    nir_vec2(b, nir_channel(b, ssa_indices, 1), nir_channel(b, ssa_indices, 0)),
                    0b11);
   }
   nir_pop_if(b, NULL);

   return nir_load_var(b, child_indices);
}

Merge request reports

Loading