Commit 66427494 authored by Jason Ekstrand's avatar Jason Ekstrand Committed by Marge Bot
Browse files

intel/dev: Add a max_cs_workgroup_threads field



This is distinct form max_cs_threads because it also encodes
restrictions about the way we use GPGPU/COMPUTE_WALKER.  This gets rid
of the MIN2(64, devinfo->max_cs_threads) we have scattered all over the
driver and puts it in a central place.
Reviewed-by: Caio Oliveira's avatarCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Part-of: <mesa/mesa!11861>
parent 915e5a8c
......@@ -545,8 +545,7 @@ crocus_get_compute_param(struct pipe_screen *pscreen,
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
const struct intel_device_info *devinfo = &screen->devinfo;
const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
const uint32_t max_invocations = 32 * max_threads;
const uint32_t max_invocations = 32 * devinfo->max_cs_workgroup_threads;
if (devinfo->ver < 7)
return 0;
......
......@@ -516,9 +516,7 @@ iris_get_compute_param(struct pipe_screen *pscreen,
struct iris_screen *screen = (struct iris_screen *)pscreen;
const struct intel_device_info *devinfo = &screen->devinfo;
/* Limit max_threads to 64 for the GPGPU_WALKER command. */
const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
const uint32_t max_invocations = 32 * max_threads;
const uint32_t max_invocations = 32 * devinfo->max_cs_workgroup_threads;
#define RET(x) do { \
if (ret) \
......
......@@ -10099,7 +10099,7 @@ brw_compile_cs(const struct brw_compiler *compiler,
prog_data->local_size[2];
/* Limit max_threads to 64 for the GPGPU_WALKER command */
const uint32_t max_threads = MIN2(64, compiler->devinfo->max_cs_threads);
const uint32_t max_threads = compiler->devinfo->max_cs_workgroup_threads;
min_dispatch_width = util_next_power_of_two(
MAX2(8, DIV_ROUND_UP(local_workgroup_size, max_threads)));
assert(min_dispatch_width <= 32);
......@@ -10316,8 +10316,7 @@ brw_cs_simd_size_for_group_size(const struct intel_device_info *devinfo,
if ((INTEL_DEBUG & DEBUG_DO32) && (mask & simd32))
return 32;
/* Limit max_threads to 64 for the GPGPU_WALKER command */
const uint32_t max_threads = MIN2(64, devinfo->max_cs_threads);
const uint32_t max_threads = devinfo->max_cs_workgroup_threads;
if ((mask & simd8) && group_size <= 8 * max_threads) {
/* Prefer SIMD16 if can do without spilling. Matches logic in
......
......@@ -1228,6 +1228,21 @@ getparam(int fd, uint32_t param, int *value)
return true;
}
static void
update_cs_workgroup_threads(struct intel_device_info *devinfo)
{
/* GPGPU_WALKER::ThreadWidthCounterMaximum is U6-1 so the most threads we
* can program is 64 without going up to a rectangular group. This only
* impacts Haswell and TGL which have higher thread counts.
*
* INTERFACE_DESCRIPTOR_DATA::NumberofThreadsinGPGPUThreadGroup on Xe-HP+
* is 10 bits so we have no such restrictions.
*/
devinfo->max_cs_workgroup_threads =
devinfo->verx10 >= 125 ? devinfo->max_cs_threads :
MIN2(devinfo->max_cs_threads, 64);
}
bool
intel_get_device_info_from_pci_id(int pci_id,
struct intel_device_info *devinfo)
......@@ -1302,6 +1317,8 @@ intel_get_device_info_from_pci_id(int pci_id,
if (devinfo->verx10 == 0)
devinfo->verx10 = devinfo->ver * 10;
update_cs_workgroup_threads(devinfo);
devinfo->chipset_id = pci_id;
return true;
}
......@@ -1434,6 +1451,8 @@ fixup_chv_device_info(struct intel_device_info *devinfo)
if (max_cs_threads > devinfo->max_cs_threads)
devinfo->max_cs_threads = max_cs_threads;
update_cs_workgroup_threads(devinfo);
/* Braswell is even more annoying. Its marketing name isn't determinable
* from the PCI ID and is also dependent on fusing.
*/
......
......@@ -214,6 +214,17 @@ struct intel_device_info
*/
unsigned max_cs_threads;
/**
* Maximum number of threads per workgroup supported by the GPGPU_WALKER or
* COMPUTE_WALKER command.
*
* This may be smaller than max_cs_threads as it takes into account added
* restrictions on the GPGPU/COMPUTE_WALKER commands. While max_cs_threads
* expresses the total parallelism of the GPU, this expresses the maximum
* number of threads we can dispatch in a single workgroup.
*/
unsigned max_cs_workgroup_threads;
struct {
/**
* Fixed size of the URB.
......
......@@ -1899,8 +1899,7 @@ void anv_GetPhysicalDeviceProperties(
pdevice->has_bindless_images && pdevice->has_a64_buffer_access
? UINT32_MAX : MAX_BINDING_TABLE_SIZE - MAX_RTS - 1;
/* Limit max_threads to 64 for the GPGPU_WALKER command */
const uint32_t max_workgroup_size = 32 * MIN2(64, devinfo->max_cs_threads);
const uint32_t max_workgroup_size = 32 * devinfo->max_cs_workgroup_threads;
VkSampleCountFlags sample_counts =
isl_device_get_sample_counts(&pdevice->isl_dev);
......@@ -2537,8 +2536,7 @@ void anv_GetPhysicalDeviceProperties2(
STATIC_ASSERT(8 <= BRW_SUBGROUP_SIZE && BRW_SUBGROUP_SIZE <= 32);
props->minSubgroupSize = 8;
props->maxSubgroupSize = 32;
/* Limit max_threads to 64 for the GPGPU_WALKER command. */
props->maxComputeWorkgroupSubgroups = MIN2(64, pdevice->info.max_cs_threads);
props->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
break;
}
......
......@@ -840,14 +840,8 @@ brw_initialize_cs_context_constants(struct brw_context *brw)
/* Maximum number of scalar compute shader invocations that can be run in
* parallel in the same subslice assuming SIMD32 dispatch.
*
* We don't advertise more than 64 threads, because we are limited to 64 by
* our usage of thread_width_max in the gpgpu walker command. This only
* currently impacts Haswell, which otherwise might be able to advertise 70
* threads. With SIMD32 and 64 threads, Haswell still provides twice the
* required the number of invocation needed for ARB_compute_shader.
*/
const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
const unsigned max_threads = devinfo->max_cs_workgroup_threads;
const uint32_t max_invocations = 32 * max_threads;
ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment