Skip to content
Snippets Groups Projects
Commit cfb7fbc0 authored by Lionel Landwerlin's avatar Lionel Landwerlin Committed by unerlige
Browse files

lib/i915/perf-config: extend the device info


This will allow equations to check for finer information on the
topology. Also add EuDualSubslicesSlice0123Count.

v2: Since the patches are now split, update version from 1.2.0 -> 1.3.0
v3: s/DIV_ROUND_UP/_DIV_ROUND_UP/ to fix compile warning

Signed-off-by: default avatarLionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: default avatarUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
parent b951bd7d
No related branches found
No related tags found
No related merge requests found
...@@ -169,6 +169,7 @@ class Gen: ...@@ -169,6 +169,7 @@ class Gen:
"$EuSlicesTotalCount": { 'c': "perf->devinfo.n_eu_slices" }, "$EuSlicesTotalCount": { 'c': "perf->devinfo.n_eu_slices" },
"$EuSubslicesTotalCount": { 'c': "perf->devinfo.n_eu_sub_slices" }, "$EuSubslicesTotalCount": { 'c': "perf->devinfo.n_eu_sub_slices" },
"$EuDualSubslicesTotalCount": { 'c': "perf->devinfo.n_eu_sub_slices" }, "$EuDualSubslicesTotalCount": { 'c': "perf->devinfo.n_eu_sub_slices" },
"$EuDualSubslicesSlice0123Count": { 'c': "perf->devinfo.n_eu_sub_slices_half_slices" },
"$EuThreadsCount": { 'c': "perf->devinfo.eu_threads_count" }, "$EuThreadsCount": { 'c': "perf->devinfo.eu_threads_count" },
"$SliceMask": { 'c': "perf->devinfo.slice_mask" }, "$SliceMask": { 'c': "perf->devinfo.slice_mask" },
"$DualSubsliceMask": { 'c': "perf->devinfo.subslice_mask" }, "$DualSubsliceMask": { 'c': "perf->devinfo.subslice_mask" },
......
...@@ -155,6 +155,10 @@ intel_perf_for_devinfo(uint32_t device_id, ...@@ -155,6 +155,10 @@ intel_perf_for_devinfo(uint32_t device_id,
{ {
const struct intel_device_info *devinfo = intel_get_device_info(device_id); const struct intel_device_info *devinfo = intel_get_device_info(device_id);
struct intel_perf *perf; struct intel_perf *perf;
uint32_t subslice_mask_len;
uint32_t eu_mask_len;
uint32_t half_max_subslices;
uint64_t half_subslices_mask;
int bits_per_subslice; int bits_per_subslice;
if (!devinfo) if (!devinfo)
...@@ -182,6 +186,25 @@ intel_perf_for_devinfo(uint32_t device_id, ...@@ -182,6 +186,25 @@ intel_perf_for_devinfo(uint32_t device_id,
"%s", devinfo->codename); "%s", devinfo->codename);
} }
/* Store i915 topology. */
perf->devinfo.max_slices = topology->max_slices;
perf->devinfo.max_subslices_per_slice = topology->max_subslices;
perf->devinfo.max_eu_per_subslice = topology->max_eus_per_subslice;
subslice_mask_len =
topology->max_slices * topology->subslice_stride;
assert(sizeof(perf->devinfo.subslice_masks) >= subslice_mask_len);
memcpy(perf->devinfo.subslice_masks,
&topology->data[topology->subslice_offset],
subslice_mask_len);
eu_mask_len = topology->eu_stride *
topology->max_subslices * topology->max_slices;
assert(sizeof(perf->devinfo.eu_masks) >= eu_mask_len);
memcpy(perf->devinfo.eu_masks,
&topology->data[topology->eu_offset],
eu_mask_len);
/* On Gen11+ the equations from the xml files expect an 8bits /* On Gen11+ the equations from the xml files expect an 8bits
* mask per subslice, versus only 3bits on prior Gens. * mask per subslice, versus only 3bits on prior Gens.
*/ */
...@@ -207,6 +230,14 @@ intel_perf_for_devinfo(uint32_t device_id, ...@@ -207,6 +230,14 @@ intel_perf_for_devinfo(uint32_t device_id,
perf->devinfo.n_eu_slices = __builtin_popcount(perf->devinfo.slice_mask); perf->devinfo.n_eu_slices = __builtin_popcount(perf->devinfo.slice_mask);
perf->devinfo.n_eu_sub_slices = __builtin_popcount(perf->devinfo.subslice_mask); perf->devinfo.n_eu_sub_slices = __builtin_popcount(perf->devinfo.subslice_mask);
/* Compute number of subslices/dualsubslices in first half of
* the GPU.
*/
half_max_subslices = topology->max_subslices / 2;
half_subslices_mask = perf->devinfo.subslice_mask &
((1 << half_max_subslices) - 1);
perf->devinfo.n_eu_sub_slices_half_slices = __builtin_popcount(half_subslices_mask);
/* Valid on most generations except Gen9LP. */ /* Valid on most generations except Gen9LP. */
perf->devinfo.eu_threads_count = 7; perf->devinfo.eu_threads_count = 7;
......
...@@ -32,7 +32,11 @@ extern "C" { ...@@ -32,7 +32,11 @@ extern "C" {
#include "igt_list.h" #include "igt_list.h"
struct intel_device_info; #define _DIV_ROUND_UP(a, b) (((a) + (b) - 1) / (b))
#define INTEL_DEVICE_MAX_SLICES (6) /* Maximum on gfx10 */
#define INTEL_DEVICE_MAX_SUBSLICES (8) /* Maximum on gfx11 */
#define INTEL_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gfx12 */
struct intel_perf_devinfo { struct intel_perf_devinfo {
char devname[20]; char devname[20];
...@@ -62,12 +66,66 @@ struct intel_perf_devinfo { ...@@ -62,12 +66,66 @@ struct intel_perf_devinfo {
uint64_t n_eu_slices; uint64_t n_eu_slices;
/* Total number of subslices/dualsubslices */ /* Total number of subslices/dualsubslices */
uint64_t n_eu_sub_slices; uint64_t n_eu_sub_slices;
/* Number of subslices/dualsubslices in the first half of the
* slices.
*/
uint64_t n_eu_sub_slices_half_slices;
/* Mask of available subslices/dualsubslices */ /* Mask of available subslices/dualsubslices */
uint64_t subslice_mask; uint64_t subslice_mask;
/* Mask of available slices */ /* Mask of available slices */
uint64_t slice_mask; uint64_t slice_mask;
/* Number of threads in one EU */ /* Number of threads in one EU */
uint64_t eu_threads_count; uint64_t eu_threads_count;
/**
* Maximu number of slices present on this device (can be more than
* num_slices if some slices are fused).
*/
uint16_t max_slices;
/**
* Maximu number of subslices per slice present on this device (can be more
* than the maximum value in the num_subslices[] array if some subslices are
* fused).
*/
uint16_t max_subslices_per_slice;
/**
* Stride to access subslice_masks[].
*/
uint16_t subslice_slice_stride;
/**
* Maximum number of EUs per subslice (can be more than
* num_eu_per_subslice if some EUs are fused off).
*/
uint16_t max_eu_per_subslice;
/**
* Strides to access eu_masks[].
*/
uint16_t eu_slice_stride;
uint16_t eu_subslice_stride;
/**
* A bit mask of the slices available.
*/
uint8_t slice_masks[_DIV_ROUND_UP(INTEL_DEVICE_MAX_SLICES, 8)];
/**
* An array of bit mask of the subslices available, use subslice_slice_stride
* to access this array.
*/
uint8_t subslice_masks[INTEL_DEVICE_MAX_SLICES *
_DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)];
/**
* An array of bit mask of EUs available, use eu_slice_stride &
* eu_subslice_stride to access this array.
*/
uint8_t eu_masks[INTEL_DEVICE_MAX_SLICES *
INTEL_DEVICE_MAX_SUBSLICES *
_DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)];
}; };
typedef enum { typedef enum {
...@@ -217,6 +275,31 @@ struct intel_perf { ...@@ -217,6 +275,31 @@ struct intel_perf {
struct drm_i915_perf_record_header; struct drm_i915_perf_record_header;
struct drm_i915_query_topology_info; struct drm_i915_query_topology_info;
static inline bool
intel_perf_devinfo_slice_available(const struct intel_perf_devinfo *devinfo,
int slice)
{
return (devinfo->slice_masks[slice / 8] & (1U << (slice % 8))) != 0;
}
static inline bool
intel_perf_devinfo_subslice_available(const struct intel_perf_devinfo *devinfo,
int slice, int subslice)
{
return (devinfo->subslice_masks[slice * devinfo->subslice_slice_stride +
subslice / 8] & (1U << (subslice % 8))) != 0;
}
static inline bool
intel_perf_devinfo_eu_available(const struct intel_perf_devinfo *devinfo,
int slice, int subslice, int eu)
{
unsigned subslice_offset = slice * devinfo->eu_slice_stride +
subslice * devinfo->eu_subslice_stride;
return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0;
}
struct intel_perf *intel_perf_for_fd(int drm_fd); struct intel_perf *intel_perf_for_fd(int drm_fd);
struct intel_perf *intel_perf_for_devinfo(uint32_t device_id, struct intel_perf *intel_perf_for_devinfo(uint32_t device_id,
uint32_t revision, uint32_t revision,
......
...@@ -341,7 +341,7 @@ pkgconf.set('prefix', get_option('prefix')) ...@@ -341,7 +341,7 @@ pkgconf.set('prefix', get_option('prefix'))
pkgconf.set('exec_prefix', '${prefix}') pkgconf.set('exec_prefix', '${prefix}')
pkgconf.set('libdir', '${prefix}/@0@'.format(get_option('libdir'))) pkgconf.set('libdir', '${prefix}/@0@'.format(get_option('libdir')))
pkgconf.set('includedir', '${prefix}/@0@'.format(get_option('includedir'))) pkgconf.set('includedir', '${prefix}/@0@'.format(get_option('includedir')))
pkgconf.set('i915_perf_version', '1.2.0') pkgconf.set('i915_perf_version', '1.3.0')
configure_file( configure_file(
input : 'i915-perf.pc.in', input : 'i915-perf.pc.in',
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment