Skip to content

Draft: Add clc support for panfrost and move afbc packing shaders to cl

Antonino Maniscalco requested to merge antonino/mesa:panclc into main

This MR ports (mostly copy&paste) Alyssa's work for using CL for internal driver shaders on the Asahi driver to Panfrost. It also includes a porting of the AFBC packing shader to CL (WIP there is still some nir that I intend to port).

I've encountered strange behavior with CL shaders:

void
libpan_copy_superblock(uint8_t *dst, uint32_t dst_idx, uint64_t hdr_sz,
                   uint8_t *src, uint32_t src_idx,
                   struct pan_afbc_block_info *metadata, uint32_t meta_idx,
                   uint32_t align)
{

   uint4 hdr = ((uint4*)src)[src_idx];
   uint64_t src_bodyptr = (uint64_t)src + hdr.x;

   struct pan_afbc_block_info meta_entry = metadata[meta_idx];
   uint64_t dst_body_base_ptr = meta_entry.offset + hdr_sz;
   uint64_t dst_bodyptr = (uint64_t) dst + dst_body_base_ptr;
   uint32_t size = meta_entry.size;

   uint4 hdr2 = (uint4)(dst_body_base_ptr, hdr.yzw);
   hdr = hdr.x == 0 ? hdr : hdr2;
   ((uint4 *)dst)[dst_idx] = hdr;

   uint32_t *wdst = (uint32_t*)dst_bodyptr;
   uint32_t *wsrc = (uint32_t*)src_bodyptr;
   for (uint32_t offset = 0; offset < size / 4; offset += 1) {
      wdst[offset] = wsrc[offset];
   }
}
void
libpan_copy_superblock(uint8_t *dst, uint32_t dst_idx, uint64_t hdr_sz,
                   uint8_t *src, uint32_t src_idx,
                   struct pan_afbc_block_info *metadata, uint32_t meta_idx,
                   uint32_t align)
{

   uint4 hdr = ((uint4*)src)[src_idx];
   uint8_t *src_bodyptr = src + hdr.x;

   struct pan_afbc_block_info meta_entry = metadata[meta_idx];
   uint64_t dst_body_base_ptr = meta_entry.offset + hdr_sz;
   uint8_t *dst_bodyptr = dst + dst_body_base_ptr;
   uint32_t size = meta_entry.size;

   uint4 hdr2 = (uint4)(dst_body_base_ptr, hdr.yzw);
   hdr = hdr.x == 0 ? hdr : hdr2;
   ((uint4 *)dst)[dst_idx] = hdr;

   uint32_t *wdst = dst_bodyptr;
   uint32_t *wsrc = src_bodyptr;
   for (uint32_t offset = 0; offset < size / 4; offset += 1) {
      wdst[offset] = wsrc[offset];
   }
}

The one above works, the one below doesn't. I'm not sure whether it is just UB due to unalighed pointers or whether I'm missing/messing up some nir pass on CL shaders.

Edited by Antonino Maniscalco

Merge request reports