Commits (196)
......@@ -65,10 +65,16 @@ NUMBER_OF_RETRIES_TIMEOUT_DETECTION = int(getenv("LAVA_NUMBER_OF_RETRIES_TIMEOUT
# How many attempts should be made when a timeout happen during LAVA device boot.
NUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 3))
# Helper constants to colorize the job output
CONSOLE_LOG_COLOR_GREEN = "\x1b[1;32;5;197m"
CONSOLE_LOG_COLOR_RED = "\x1b[1;38;5;197m"
CONSOLE_LOG_COLOR_RESET = "\x1b[0m"
def print_log(msg):
print("{}: {}".format(datetime.now(), msg))
def fatal_err(msg):
print_log(msg)
sys.exit(1)
......@@ -234,6 +240,8 @@ def _call_proxy(fn, *args):
class LAVAJob:
color_status_map = {"pass": CONSOLE_LOG_COLOR_GREEN}
def __init__(self, proxy, definition):
self.job_id = None
self.proxy = proxy
......@@ -241,9 +249,11 @@ class LAVAJob:
self.last_log_line = 0
self.last_log_time = None
self.is_finished = False
self.status = "created"
def heartbeat(self):
self.last_log_time = datetime.now()
self.status = "running"
def validate(self) -> Optional[dict]:
"""Returns a dict with errors, if the validation fails.
......@@ -294,6 +304,27 @@ class LAVAJob:
f"Could not get LAVA job logs. Reason: {mesa_ci_err}"
) from mesa_ci_err
def parse_job_result_from_log(self, lava_lines: list[dict[str, str]]) -> None:
"""Use the console log to catch if the job has completed successfully or
not.
Returns true only the job finished by looking into the log result
parsing.
"""
log_lines = [l["msg"] for l in lava_lines if l["lvl"] == "target"]
for line in log_lines:
if result := re.search(r"hwci: mesa: (\S*)", line):
self.is_finished = True
self.status = result.group(1)
color = LAVAJob.color_status_map.get(self.status, CONSOLE_LOG_COLOR_RED)
print_log(
f"{color}"
f"LAVA Job finished with result: {self.status}"
f"{CONSOLE_LOG_COLOR_RESET}"
)
# We reached the log end here. hwci script has finished.
break
def find_exception_from_metadata(metadata, job_id):
if "result" not in metadata or metadata["result"] != "fail":
......@@ -320,34 +351,17 @@ def find_exception_from_metadata(metadata, job_id):
return metadata
def get_job_results(proxy, job_id, test_suite):
def find_lava_error(job) -> None:
# Look for infrastructure errors and retry if we see them.
results_yaml = _call_proxy(proxy.results.get_testjob_results_yaml, job_id)
results_yaml = _call_proxy(job.proxy.results.get_testjob_results_yaml, job.job_id)
results = yaml.load(results_yaml, Loader=loader(False))
for res in results:
metadata = res["metadata"]
find_exception_from_metadata(metadata, job_id)
find_exception_from_metadata(metadata, job.job_id)
results_yaml = _call_proxy(
proxy.results.get_testsuite_results_yaml, job_id, test_suite
)
results: list = yaml.load(results_yaml, Loader=loader(False))
if not results:
raise MesaCIException(
f"LAVA: no result for test_suite '{test_suite}'"
)
for metadata in results:
test_case = metadata["name"]
result = metadata["metadata"]["result"]
print_log(
f"LAVA: result for test_suite '{test_suite}', "
f"test_case '{test_case}': {result}"
)
if result != "pass":
return False
return True
# If we reach this far, it means that the job ended without hwci script
# result and no LAVA infrastructure problem was found
job.status = "fail"
def show_job_data(job):
......@@ -362,8 +376,8 @@ def parse_lava_lines(new_lines) -> list[str]:
if line["lvl"] in ["results", "feedback"]:
continue
elif line["lvl"] in ["warning", "error"]:
prefix = "\x1b[1;38;5;197m"
suffix = "\x1b[0m"
prefix = CONSOLE_LOG_COLOR_RED
suffix = CONSOLE_LOG_COLOR_RESET
elif line["lvl"] == "input":
prefix = "$ "
suffix = ""
......@@ -376,7 +390,7 @@ def parse_lava_lines(new_lines) -> list[str]:
return parsed_lines
def fetch_logs(job, max_idle_time):
def fetch_logs(job, max_idle_time) -> None:
# Poll to check for new logs, assuming that a prolonged period of
# silence means that the device has died and we should try it again
if datetime.now() - job.last_log_time > max_idle_time:
......@@ -398,16 +412,18 @@ def fetch_logs(job, max_idle_time):
# Retry the log fetching several times before exposing the error.
for _ in range(5):
with contextlib.suppress(MesaCIParseException):
new_lines = job.get_logs()
new_log_lines = job.get_logs()
break
else:
raise MesaCIParseException
parsed_lines = parse_lava_lines(new_lines)
parsed_lines = parse_lava_lines(new_log_lines)
for line in parsed_lines:
print_log(line)
job.parse_job_result_from_log(new_log_lines)
def follow_job_execution(job):
try:
......@@ -429,16 +445,22 @@ def follow_job_execution(job):
fetch_logs(job, max_idle_time)
show_job_data(job)
return get_job_results(job.proxy, job.job_id, "0_mesa")
# Mesa Developers expect to have a simple pass/fail job result.
# If this does not happen, it probably means a LAVA infrastructure error
# happened.
if job.status not in ["pass", "fail"]:
find_lava_error(job)
def retriable_follow_job(proxy, job_definition):
def retriable_follow_job(proxy, job_definition) -> LAVAJob:
retry_count = NUMBER_OF_RETRIES_TIMEOUT_DETECTION
for attempt_no in range(1, retry_count + 2):
job = LAVAJob(proxy, job_definition)
try:
return follow_job_execution(job)
follow_job_execution(job)
return job
except MesaCIException as mesa_exception:
print_log(mesa_exception)
job.cancel()
......@@ -477,8 +499,8 @@ def main(args):
if args.validate_only:
return
has_job_passed = retriable_follow_job(proxy, job_definition)
exit_code = 0 if has_job_passed else 1
finished_job = retriable_follow_job(proxy, job_definition)
exit_code = 0 if finished_job.status == "pass" else 1
sys.exit(exit_code)
......
......@@ -187,6 +187,7 @@
image: registry.freedesktop.org/mupuf/valve-infra/mesa-trigger:2022-03-03.2
extends:
- .use-debian/x86_test-vk
timeout: 1h 40m
variables:
# No need by default to pull the whole repo
GIT_STRATEGY: none
......@@ -198,13 +199,13 @@
B2C_LOG_LEVEL: 6
B2C_POWEROFF_DELAY: 15
B2C_SESSION_END_REGEX: '^.*It''s now safe to turn off your computer\r$'
B2C_SESSION_REBOOT_REGEX: 'GPU hang detected!'
B2C_TIMEOUT_BOOT_MINUTES: 240
B2C_TIMEOUT_BOOT_RETRIES: 2
B2C_SESSION_REBOOT_REGEX: '(GPU hang detected!|\*ERROR\* ring [^\s]+ timeout|The CS has been cancelled because the context is lost)'
B2C_TIMEOUT_BOOT_MINUTES: 45
B2C_TIMEOUT_BOOT_RETRIES: 1
B2C_TIMEOUT_FIRST_MINUTES: 5
B2C_TIMEOUT_FIRST_RETRIES: 3
B2C_TIMEOUT_MINUTES: 2
B2C_TIMEOUT_OVERALL_MINUTES: 240
B2C_TIMEOUT_OVERALL_MINUTES: 90
B2C_TIMEOUT_RETRIES: 0
# As noted in the top description, we make a distinction between the
......
......@@ -38,7 +38,6 @@ from lava.lava_job_submitter import (
NUMBER_OF_RETRIES_TIMEOUT_DETECTION,
LAVAJob,
follow_job_execution,
get_job_results,
hide_sensitive_data,
retriable_follow_job,
)
......@@ -46,8 +45,12 @@ from lava.lava_job_submitter import (
NUMBER_OF_MAX_ATTEMPTS = NUMBER_OF_RETRIES_TIMEOUT_DETECTION + 1
def jobs_logs_response(finished=False, msg=None, lvl="target") -> Tuple[bool, str]:
def jobs_logs_response(finished=False, msg=None, lvl="target", result=None) -> Tuple[bool, str]:
timed_msg = {"dt": str(datetime.now()), "msg": "New message", "lvl": lvl}
if result:
timed_msg["lvl"] = "target"
timed_msg["msg"] = f"hwci: mesa: {result}"
logs = [timed_msg] if msg is None else msg
return finished, yaml.safe_dump(logs)
......@@ -131,7 +134,7 @@ def level_generator():
# Tests all known levels by default
yield from cycle(( "results", "feedback", "warning", "error", "debug", "target" ))
def generate_n_logs(n=1, tick_fn: Union[Generator, Iterable[int], int]=1, level_fn=level_generator):
def generate_n_logs(n=1, tick_fn: Union[Generator, Iterable[int], int]=1, level_fn=level_generator, result="pass"):
"""Simulate a log partitionated in n components"""
level_gen = level_fn()
......@@ -153,7 +156,7 @@ def generate_n_logs(n=1, tick_fn: Union[Generator, Iterable[int], int]=1, level_
yield jobs_logs_response(finished=False, msg=[], lvl=level)
time_travel.tick(tick_sec)
yield jobs_logs_response(finished=True)
yield jobs_logs_response(finished=True, result=result)
NETWORK_EXCEPTION = xmlrpc.client.ProtocolError("", 0, "test", {})
......@@ -192,7 +195,7 @@ PROXY_SCENARIOS = {
},
),
"no retries, but testsuite fails": (
generate_n_logs(n=1, tick_fn=0),
generate_n_logs(n=1, tick_fn=0, result="fail"),
does_not_raise(),
False,
{
......@@ -202,7 +205,7 @@ PROXY_SCENARIOS = {
},
),
"no retries, one testsuite fails": (
generate_n_logs(n=1, tick_fn=0),
generate_n_logs(n=1, tick_fn=0, result="fail"),
does_not_raise(),
False,
{
......@@ -220,7 +223,7 @@ PROXY_SCENARIOS = {
),
# If a protocol error happens, _call_proxy will retry without affecting timeouts
"unstable connection, ProtocolError followed by final message": (
(NETWORK_EXCEPTION, jobs_logs_response(finished=True)),
(NETWORK_EXCEPTION, jobs_logs_response(finished=True, result="pass")),
does_not_raise(),
True,
{},
......@@ -247,8 +250,8 @@ def test_retriable_follow_job(
):
with expectation:
proxy = mock_proxy(side_effect=side_effect, **proxy_args)
result = retriable_follow_job(proxy, "")
assert job_result == result
job: LAVAJob = retriable_follow_job(proxy, "")
assert job_result == (job.status == "pass")
WAIT_FOR_JOB_SCENARIOS = {
......@@ -270,7 +273,7 @@ def test_simulate_a_long_wait_to_start_a_job(
mock_proxy_waiting_time,
):
start_time = datetime.now()
result = retriable_follow_job(
job: LAVAJob = retriable_follow_job(
mock_proxy_waiting_time(
frozen_time, side_effect=side_effect, wait_time=wait_time
),
......@@ -280,7 +283,7 @@ def test_simulate_a_long_wait_to_start_a_job(
end_time = datetime.now()
delta_time = end_time - start_time
assert has_finished == result
assert has_finished == (job.status == "pass")
assert delta_time.total_seconds() >= wait_time
......@@ -316,11 +319,6 @@ def test_hide_sensitive_data(input, expectation, tag):
assert result == expectation
def test_get_job_results(mock_proxy):
proxy = mock_proxy()
get_job_results(proxy, 1, "0_mesa")
CORRUPTED_LOG_SCENARIOS = {
"too much subsequent corrupted data": (
[(False, "{'msg': 'Incomplete}")] * 100 + [jobs_logs_response(True)],
......
......@@ -143,7 +143,7 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, radeonsi, llvmpipe, virg
GL 4.1, GLSL 4.10 --- all DONE: i965/gen7+, nvc0, r600, radeonsi, llvmpipe, virgl, zink, d3d12
GL_ARB_ES2_compatibility DONE (freedreno, i965, nv50, softpipe, v3d, vc4, panfrost, lima)
GL_ARB_get_program_binary DONE (freedreno, 0 or 1 binary formats)
GL_ARB_get_program_binary DONE (freedreno, v3d, 0 or 1 binary formats)
GL_ARB_separate_shader_objects DONE (all drivers)
GL_ARB_shader_precision DONE (i965/gen7+, all drivers that support GLSL 4.10)
GL_ARB_vertex_attrib_64bit DONE (i965/gen7+, softpipe, )
......
WGL_ARB_create_context_robustness
d3d12 ARB_robust_buffer_access_behavior
VK_EXT_robustness2 for lavapipe
VK_EXT_image_2d_view_of_3d on RADV
......@@ -221,28 +221,32 @@ CHIPSET(0x4907, sg1, "SG1", "Intel(R) Graphics")
CHIPSET(0x4908, dg1, "DG1", "Intel(R) Graphics")
CHIPSET(0x4909, dg1, "DG1", "Intel(R) Graphics")
/* Waiting on i915 upstream support */
#if 0
CHIPSET(0x4f80, dg2_g10, "DG2", "Intel(R) Graphics")
CHIPSET(0x4f81, dg2_g10, "DG2", "Intel(R) Graphics")
CHIPSET(0x4f82, dg2_g10, "DG2", "Intel(R) Graphics")
CHIPSET(0x4f83, dg2_g10, "DG2", "Intel(R) Graphics")
CHIPSET(0x4f84, dg2_g10, "DG2", "Intel(R) Graphics")
CHIPSET(0x4f87, dg2_g11, "DG2", "Intel(R) Graphics")
CHIPSET(0x4f88, dg2_g11, "DG2", "Intel(R) Graphics")
/* Commented devices are waiting on i915 upstream support */
/* CHIPSET(0x4f80, dg2_g10, "DG2", "Intel(R) Graphics") */
/* CHIPSET(0x4f81, dg2_g10, "DG2", "Intel(R) Graphics") */
/* CHIPSET(0x4f82, dg2_g10, "DG2", "Intel(R) Graphics") */
/* CHIPSET(0x4f83, dg2_g10, "DG2", "Intel(R) Graphics") */
/* CHIPSET(0x4f84, dg2_g10, "DG2", "Intel(R) Graphics") */
/* CHIPSET(0x4f87, dg2_g11, "DG2", "Intel(R) Graphics") */
/* CHIPSET(0x4f88, dg2_g11, "DG2", "Intel(R) Graphics") */
CHIPSET(0x5690, dg2_g10, "DG2", "Intel(R) Graphics")
CHIPSET(0x5691, dg2_g10, "DG2", "Intel(R) Graphics")
CHIPSET(0x5692, dg2_g10, "DG2", "Intel(R) Graphics")
CHIPSET(0x5693, dg2_g11, "DG2", "Intel(R) Graphics")
CHIPSET(0x5694, dg2_g11, "DG2", "Intel(R) Graphics")
CHIPSET(0x5695, dg2_g11, "DG2", "Intel(R) Graphics")
CHIPSET(0x56a0, dg2_g10, "DG2", "Intel(R) Graphics")
CHIPSET(0x56a1, dg2_g10, "DG2", "Intel(R) Graphics")
CHIPSET(0x56a2, dg2_g10, "DG2", "Intel(R) Graphics")
CHIPSET(0x56a5, dg2_g11, "DG2", "Intel(R) Graphics")
CHIPSET(0x56a6, dg2_g11, "DG2", "Intel(R) Graphics")
CHIPSET(0x5696, dg2_g12, "DG2", "Intel(R) Graphics")
CHIPSET(0x5697, dg2_g12, "DG2", "Intel(R) Graphics")
/* CHIPSET(0x56a0, dg2_g10, "DG2", "Intel(R) Graphics") */
/* CHIPSET(0x56a1, dg2_g10, "DG2", "Intel(R) Graphics") */
/* CHIPSET(0x56a2, dg2_g10, "DG2", "Intel(R) Graphics") */
/* CHIPSET(0x56a3, dg2_g12, "DG2", "Intel(R) Graphics") */
/* CHIPSET(0x56a4, dg2_g12, "DG2", "Intel(R) Graphics") */
/* CHIPSET(0x56a5, dg2_g11, "DG2", "Intel(R) Graphics") */
/* CHIPSET(0x56a6, dg2_g11, "DG2", "Intel(R) Graphics") */
CHIPSET(0x56b0, dg2_g11, "DG2", "Intel(R) Graphics")
CHIPSET(0x56b1, dg2_g11, "DG2", "Intel(R) Graphics")
CHIPSET(0x56c0, dg2_g10, "ATS-M", "Intel(R) Graphics")
CHIPSET(0x56c1, dg2_g11, "ATS-M", "Intel(R) Graphics")
#endif
/* CHIPSET(0x56b1, dg2_g11, "DG2", "Intel(R) Graphics") */
CHIPSET(0x56b2, dg2_g12, "DG2", "Intel(R) Graphics")
/* CHIPSET(0x56b3, dg2_g12, "DG2", "Intel(R) Graphics") */
/* CHIPSET(0x56c0, dg2_g10, "ATS-M", "Intel(R) Graphics") */
/* CHIPSET(0x56c1, dg2_g11, "ATS-M", "Intel(R) Graphics") */
......@@ -671,10 +671,6 @@ if vdpau_drivers_path == ''
vdpau_drivers_path = join_paths(get_option('libdir'), 'vdpau')
endif
if with_gallium_zink
dep_vulkan = dependency('vulkan')
endif
dep_dxheaders = null_dep
if with_gallium_d3d12 or with_microsoft_clc or with_microsoft_vk
dep_dxheaders = dependency('directx-headers', required : false)
......
......@@ -70,7 +70,9 @@ radv_stoney_vkcts:amd64:
variables:
# FIXME: Create this baseline for KABINI
GPU_VERSION: radv-kabini-aco
B2C_TIMEOUT_BOOT_MINUTES: 360
B2C_TIMEOUT_OVERALL_MINUTES: 360
B2C_TIMEOUT_BOOT_RETRIES: 0
B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=8g'
tags:
- amdgpu:codename:KABINI
......@@ -88,13 +90,15 @@ radv_stoney_vkcts:amd64:
GPU_VERSION: radv-stoney-aco
# Note, it only has a wee 32g disk!
B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=8g'
B2C_TIMEOUT_BOOT_MINUTES: 240
B2C_TIMEOUT_OVERALL_MINUTES: 240
B2C_TIMEOUT_BOOT_RETRIES: 0
tags:
- amdgpu:codename:STONEY
deqp-polaris10-valve:
extends:
- .deqp-test-valve
timeout: 3h 10m
variables:
GPU_VERSION: radv-polaris10-aco
FDO_CI_CONCURRENT: 16
......@@ -107,7 +111,6 @@ deqp-polaris10-valve:
deqp-vega10-valve:
extends:
- .deqp-test-valve
timeout: 3h 10m
variables:
GPU_VERSION: radv-vega10-aco
FDO_CI_CONCURRENT: 16
......@@ -118,7 +121,6 @@ deqp-vega10-valve:
deqp-renoir-valve:
extends:
- .deqp-test-valve
timeout: 2h 10m
variables:
GPU_VERSION: radv-renoir-aco
FDO_CI_CONCURRENT: 24
......@@ -129,7 +131,6 @@ deqp-renoir-valve:
deqp-navi10-valve:
extends:
- .deqp-test-valve
timeout: 2h 10m
variables:
GPU_VERSION: radv-navi10-aco
FDO_CI_CONCURRENT: 24
......@@ -144,7 +145,6 @@ deqp-navi10-valve:
deqp-navi21-valve:
extends:
- .deqp-test-valve
timeout: 2h 10m
variables:
GPU_VERSION: radv-sienna_cichlid-aco
FDO_CI_CONCURRENT: 16
......@@ -155,13 +155,12 @@ deqp-navi21-valve:
deqp-vangogh-valve:
extends:
- .deqp-test-valve
timeout: 7h 10m
timeout: 2h 10m
variables:
GPU_VERSION: radv-vangogh-aco
FDO_CI_CONCURRENT: 8
B2C_TIMEOUT_BOOT_MINUTES: 420 # 7 hours
B2C_TIMEOUT_OVERALL_MINUTES: 420 # 7 hours
B2C_TIMEOUT_MINUTES: 3
B2C_TIMEOUT_BOOT_MINUTES: 65
B2C_TIMEOUT_OVERALL_MINUTES: 120
B2C_KERNEL_CMDLINE_EXTRAS: 'b2c.swap=16g'
tags:
- amdgpu:codename:VANGOGH
......
......@@ -92,7 +92,7 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wav
break;
case R_0286E8_SPI_TMPRING_SIZE:
case R_00B860_COMPUTE_TMPRING_SIZE:
if (info->chip_class >= GFX11)
if (info->gfx_level >= GFX11)
conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(value) * 256;
else
conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(value) * 1024;
......@@ -126,7 +126,7 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wav
*
* For shader-db stats, set num_vgprs that the hw actually uses.
*/
if (info->chip_class == GFX10_3) {
if (info->gfx_level == GFX10_3) {
conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8);
}
......
......@@ -67,7 +67,7 @@ struct ac_ib_parser {
unsigned num_dw;
const int *trace_ids;
unsigned trace_id_count;
enum chip_class chip_class;
enum amd_gfx_level gfx_level;
ac_debug_addr_callback addr_callback;
void *addr_callback_data;
......@@ -109,12 +109,12 @@ static void print_named_value(FILE *file, const char *name, uint32_t value, int
print_value(file, value, bits);
}
static const struct si_reg *find_register(enum chip_class chip_class, unsigned offset)
static const struct si_reg *find_register(enum amd_gfx_level gfx_level, unsigned offset)
{
const struct si_reg *table;
unsigned table_size;
switch (chip_class) {
switch (gfx_level) {
case GFX11:
table = gfx11_reg_table;
table_size = ARRAY_SIZE(gfx11_reg_table);
......@@ -154,17 +154,17 @@ static const struct si_reg *find_register(enum chip_class chip_class, unsigned o
return NULL;
}
const char *ac_get_register_name(enum chip_class chip_class, unsigned offset)
const char *ac_get_register_name(enum amd_gfx_level gfx_level, unsigned offset)
{
const struct si_reg *reg = find_register(chip_class, offset);
const struct si_reg *reg = find_register(gfx_level, offset);
return reg ? sid_strings + reg->name_offset : "(no name)";
}
void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset, uint32_t value,
void ac_dump_reg(FILE *file, enum amd_gfx_level gfx_level, unsigned offset, uint32_t value,
uint32_t field_mask)
{
const struct si_reg *reg = find_register(chip_class, offset);
const struct si_reg *reg = find_register(gfx_level, offset);
if (reg) {
const char *reg_name = sid_strings + reg->name_offset;
......@@ -252,7 +252,7 @@ static void ac_parse_set_reg_packet(FILE *f, unsigned count, unsigned reg_offset
}
for (i = 0; i < count; i++)
ac_dump_reg(f, ib->chip_class, reg + i * 4, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, reg + i * 4, ac_ib_get(ib), ~0);
}
static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
......@@ -297,30 +297,30 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
ac_parse_set_reg_packet(f, count, SI_SH_REG_OFFSET, ib);
break;
case PKT3_ACQUIRE_MEM:
ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0);
print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
if (ib->chip_class >= GFX10)
ac_dump_reg(f, ib->chip_class, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
if (ib->gfx_level >= GFX10)
ac_dump_reg(f, ib->gfx_level, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
break;
case PKT3_SURFACE_SYNC:
if (ib->chip_class >= GFX7) {
ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
if (ib->gfx_level >= GFX7) {
ac_dump_reg(f, ib->gfx_level, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
} else {
ac_dump_reg(f, ib->chip_class, R_0085F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_0085F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_0085F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_0085F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_0085F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_0085F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
}
print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
break;
case PKT3_EVENT_WRITE: {
uint32_t event_dw = ac_ib_get(ib);
ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
ac_dump_reg(f, ib->gfx_level, R_028A90_VGT_EVENT_INITIATOR, event_dw,
S_028A90_EVENT_TYPE(~0));
print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
print_named_value(f, "INV_L2", (event_dw >> 20) & 0x1, 1);
......@@ -332,7 +332,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
}
case PKT3_EVENT_WRITE_EOP: {
uint32_t event_dw = ac_ib_get(ib);
ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
ac_dump_reg(f, ib->gfx_level, R_028A90_VGT_EVENT_INITIATOR, event_dw,
S_028A90_EVENT_TYPE(~0));
print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1);
......@@ -352,10 +352,10 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
}
case PKT3_RELEASE_MEM: {
uint32_t event_dw = ac_ib_get(ib);
if (ib->chip_class >= GFX10) {
ac_dump_reg(f, ib->chip_class, R_490_RELEASE_MEM_OP, event_dw, ~0u);
if (ib->gfx_level >= GFX10) {
ac_dump_reg(f, ib->gfx_level, R_490_RELEASE_MEM_OP, event_dw, ~0u);
} else {
ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
ac_dump_reg(f, ib->gfx_level, R_028A90_VGT_EVENT_INITIATOR, event_dw,
S_028A90_EVENT_TYPE(~0));
print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1);
......@@ -387,52 +387,52 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
break;
case PKT3_DRAW_INDEX_AUTO:
ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
break;
case PKT3_DRAW_INDEX_2:
ac_dump_reg(f, ib->chip_class, R_028A78_VGT_DMA_MAX_SIZE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_0287E8_VGT_DMA_BASE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_0287E4_VGT_DMA_BASE_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_028A78_VGT_DMA_MAX_SIZE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_0287E8_VGT_DMA_BASE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_0287E4_VGT_DMA_BASE_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
break;
case PKT3_INDEX_TYPE:
ac_dump_reg(f, ib->chip_class, R_028A7C_VGT_DMA_INDEX_TYPE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_028A7C_VGT_DMA_INDEX_TYPE, ac_ib_get(ib), ~0);
break;
case PKT3_NUM_INSTANCES:
ac_dump_reg(f, ib->chip_class, R_030934_VGT_NUM_INSTANCES, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_030934_VGT_NUM_INSTANCES, ac_ib_get(ib), ~0);
break;
case PKT3_WRITE_DATA:
ac_dump_reg(f, ib->chip_class, R_370_CONTROL, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_371_DST_ADDR_LO, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_372_DST_ADDR_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_370_CONTROL, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_371_DST_ADDR_LO, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_372_DST_ADDR_HI, ac_ib_get(ib), ~0);
/* The payload is written automatically */
break;
case PKT3_CP_DMA:
ac_dump_reg(f, ib->chip_class, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_415_COMMAND, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_415_COMMAND, ac_ib_get(ib), ~0);
break;
case PKT3_DMA_DATA:
ac_dump_reg(f, ib->chip_class, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_501_SRC_ADDR_LO, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_415_COMMAND, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_501_SRC_ADDR_LO, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, R_415_COMMAND, ac_ib_get(ib), ~0);
break;
case PKT3_INDIRECT_BUFFER_SI:
case PKT3_INDIRECT_BUFFER_CONST:
case PKT3_INDIRECT_BUFFER_CIK: {
uint32_t base_lo_dw = ac_ib_get(ib);
ac_dump_reg(f, ib->chip_class, R_3F0_IB_BASE_LO, base_lo_dw, ~0);
ac_dump_reg(f, ib->gfx_level, R_3F0_IB_BASE_LO, base_lo_dw, ~0);
uint32_t base_hi_dw = ac_ib_get(ib);
ac_dump_reg(f, ib->chip_class, R_3F1_IB_BASE_HI, base_hi_dw, ~0);
ac_dump_reg(f, ib->gfx_level, R_3F1_IB_BASE_HI, base_hi_dw, ~0);
uint32_t control_dw = ac_ib_get(ib);
ac_dump_reg(f, ib->chip_class, R_3F2_IB_CONTROL, control_dw, ~0);
ac_dump_reg(f, ib->gfx_level, R_3F2_IB_CONTROL, control_dw, ~0);
if (!ib->addr_callback)
break;
......@@ -590,7 +590,7 @@ static void format_ib_output(FILE *f, char *out)
* \param f file
* \param ib_ptr IB
* \param num_dw size of the IB
* \param chip_class chip class
* \param gfx_level gfx level
* \param trace_ids the last trace IDs that are known to have been reached
* and executed by the CP, typically read from a buffer
* \param trace_id_count The number of entries in the trace_ids array.
......@@ -599,7 +599,7 @@ static void format_ib_output(FILE *f, char *out)
* \param addr_callback_data user data for addr_callback
*/
void ac_parse_ib_chunk(FILE *f, uint32_t *ib_ptr, int num_dw, const int *trace_ids,
unsigned trace_id_count, enum chip_class chip_class,
unsigned trace_id_count, enum amd_gfx_level gfx_level,
ac_debug_addr_callback addr_callback, void *addr_callback_data)
{
struct ac_ib_parser ib = {0};
......@@ -607,7 +607,7 @@ void ac_parse_ib_chunk(FILE *f, uint32_t *ib_ptr, int num_dw, const int *trace_i
ib.num_dw = num_dw;
ib.trace_ids = trace_ids;
ib.trace_id_count = trace_id_count;
ib.chip_class = chip_class;
ib.gfx_level = gfx_level;
ib.addr_callback = addr_callback;
ib.addr_callback_data = addr_callback_data;
......@@ -637,7 +637,7 @@ void ac_parse_ib_chunk(FILE *f, uint32_t *ib_ptr, int num_dw, const int *trace_i
* \param f file
* \param ib IB
* \param num_dw size of the IB
* \param chip_class chip class
* \param gfx_level gfx level
* \param trace_ids the last trace IDs that are known to have been reached
* and executed by the CP, typically read from a buffer
* \param trace_id_count The number of entries in the trace_ids array.
......@@ -646,12 +646,12 @@ void ac_parse_ib_chunk(FILE *f, uint32_t *ib_ptr, int num_dw, const int *trace_i
* \param addr_callback_data user data for addr_callback
*/
void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, unsigned trace_id_count,
const char *name, enum chip_class chip_class, ac_debug_addr_callback addr_callback,
const char *name, enum amd_gfx_level gfx_level, ac_debug_addr_callback addr_callback,
void *addr_callback_data)
{
fprintf(f, "------------------ %s begin ------------------\n", name);
ac_parse_ib_chunk(f, ib, num_dw, trace_ids, trace_id_count, chip_class, addr_callback,
ac_parse_ib_chunk(f, ib, num_dw, trace_ids, trace_id_count, gfx_level, addr_callback,
addr_callback_data);
fprintf(f, "------------------- %s end -------------------\n\n", name);
......@@ -660,11 +660,11 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, unsign
/**
* Parse dmesg and return TRUE if a VM fault has been detected.
*
* \param chip_class chip class
* \param gfx_level gfx level
* \param old_dmesg_timestamp previous dmesg timestamp parsed at init time
* \param out_addr detected VM fault addr
*/
bool ac_vm_fault_occured(enum chip_class chip_class, uint64_t *old_dmesg_timestamp,
bool ac_vm_fault_occured(enum amd_gfx_level gfx_level, uint64_t *old_dmesg_timestamp,
uint64_t *out_addr)
{
#ifdef _WIN32
......@@ -722,7 +722,7 @@ bool ac_vm_fault_occured(enum chip_class chip_class, uint64_t *old_dmesg_timesta
const char *header_line, *addr_line_prefix, *addr_line_format;
if (chip_class >= GFX9) {
if (gfx_level >= GFX9) {
/* Match this:
* ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
* ..: at page 0x0000000219f8f000 from 27
......@@ -802,7 +802,7 @@ static int compare_wave(const void *p1, const void *p2)
}
/* Return wave information. "waves" should be a large enough array. */
unsigned ac_get_wave_info(enum chip_class chip_class,
unsigned ac_get_wave_info(enum amd_gfx_level gfx_level,
struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP])
{
#ifdef _WIN32
......@@ -811,7 +811,7 @@ unsigned ac_get_wave_info(enum chip_class chip_class,
char line[2000], cmd[128];
unsigned num_waves = 0;
sprintf(cmd, "umr -O halt_waves -wa %s", chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
sprintf(cmd, "umr -O halt_waves -wa %s", gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
FILE *p = popen(cmd, "r");
if (!p)
......
......@@ -56,20 +56,20 @@ struct ac_wave_info {
typedef void *(*ac_debug_addr_callback)(void *data, uint64_t addr);
const char *ac_get_register_name(enum chip_class chip_class, unsigned offset);
void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset, uint32_t value,
const char *ac_get_register_name(enum amd_gfx_level gfx_level, unsigned offset);
void ac_dump_reg(FILE *file, enum amd_gfx_level gfx_level, unsigned offset, uint32_t value,
uint32_t field_mask);
void ac_parse_ib_chunk(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
unsigned trace_id_count, enum chip_class chip_class,
unsigned trace_id_count, enum amd_gfx_level gfx_level,
ac_debug_addr_callback addr_callback, void *addr_callback_data);
void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, unsigned trace_id_count,
const char *name, enum chip_class chip_class, ac_debug_addr_callback addr_callback,
const char *name, enum amd_gfx_level gfx_level, ac_debug_addr_callback addr_callback,
void *addr_callback_data);
bool ac_vm_fault_occured(enum chip_class chip_class, uint64_t *old_dmesg_timestamp,
bool ac_vm_fault_occured(enum amd_gfx_level gfx_level, uint64_t *old_dmesg_timestamp,
uint64_t *out_addr);
unsigned ac_get_wave_info(enum chip_class chip_class,
unsigned ac_get_wave_info(enum amd_gfx_level gfx_level,
struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]);
#ifdef __cplusplus
......
This diff is collapsed.
......@@ -63,7 +63,7 @@ struct radeon_info {
uint32_t pci_id;
uint32_t pci_rev_id;
enum radeon_family family;
enum chip_class chip_class;
enum amd_gfx_level gfx_level;
uint32_t family_id;
uint32_t chip_external_rev;
uint32_t clock_crystal_freq;
......@@ -253,7 +253,7 @@ void ac_compute_driver_uuid(char *uuid, size_t size);
void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size);
void ac_print_gpu_info(struct radeon_info *info, FILE *f);
int ac_get_gs_table_depth(enum chip_class chip_class, enum radeon_family family);
int ac_get_gs_table_depth(enum amd_gfx_level gfx_level, enum radeon_family family);
void ac_get_raster_config(struct radeon_info *info, uint32_t *raster_config_p,
uint32_t *raster_config_1_p, uint32_t *se_tile_repeat_p);
void ac_get_harvested_configs(struct radeon_info *info, unsigned raster_config,
......
......@@ -37,7 +37,7 @@ ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_
bool
ac_nir_lower_indirect_derefs(nir_shader *shader,
enum chip_class chip_class)
enum amd_gfx_level gfx_level)
{
bool progress = false;
......@@ -49,7 +49,7 @@ ac_nir_lower_indirect_derefs(nir_shader *shader,
glsl_get_natural_size_align_bytes);
/* LLVM doesn't support VGPR indexing on GFX9. */
bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
bool llvm_has_working_vgpr_indexing = gfx_level != GFX9;
/* TODO: Indirect indexing of GS inputs is unimplemented.
*
......
......@@ -72,7 +72,7 @@ ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
void
ac_nir_lower_hs_outputs_to_mem(nir_shader *shader,
enum chip_class chip_class,
enum amd_gfx_level gfx_level,
bool tes_reads_tessfactors,
uint64_t tes_inputs_read,
uint64_t tes_patch_inputs_read,
......@@ -88,17 +88,17 @@ ac_nir_lower_tes_inputs_to_mem(nir_shader *shader,
void
ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
enum chip_class chip_class,
enum amd_gfx_level gfx_level,
unsigned num_reserved_es_outputs);
void
ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
enum chip_class chip_class,
enum amd_gfx_level gfx_level,
unsigned num_reserved_es_outputs);
bool
ac_nir_lower_indirect_derefs(nir_shader *shader,
enum chip_class chip_class);
enum amd_gfx_level gfx_level);
void
ac_nir_lower_ngg_nogs(nir_shader *shader,
......@@ -127,6 +127,19 @@ void
ac_nir_lower_ngg_ms(nir_shader *shader,
unsigned wave_size);
void
ac_nir_apply_first_task_to_task_shader(nir_shader *shader);
void
ac_nir_lower_task_outputs_to_mem(nir_shader *shader,
unsigned task_payload_entry_bytes,
unsigned task_num_entries);
void
ac_nir_lower_mesh_inputs_to_mem(nir_shader *shader,
unsigned task_payload_entry_bytes,
unsigned task_num_entries);
nir_ssa_def *
ac_nir_cull_triangle(nir_builder *b,
nir_ssa_def *initially_accepted,
......
......@@ -42,7 +42,7 @@
typedef struct {
/* Which hardware generation we're dealing with */
enum chip_class chip_class;
enum amd_gfx_level gfx_level;
/* Number of ES outputs for which memory should be reserved.
* When compacted, this should be the number of linked ES outputs.
......@@ -127,7 +127,7 @@ lower_es_output_store(nir_builder *b,
b->cursor = nir_before_instr(instr);
nir_ssa_def *io_off = nir_build_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u);
if (st->chip_class <= GFX8) {
if (st->gfx_level <= GFX8) {
/* GFX6-8: ES is a separate HW stage, data is passed from ES to GS in VRAM. */
nir_ssa_def *ring = nir_build_load_ring_esgs_amd(b);
nir_ssa_def *es2gs_off = nir_build_load_ring_es2gs_offset_amd(b);
......@@ -193,11 +193,11 @@ gs_per_vertex_input_offset(nir_builder *b,
nir_intrinsic_instr *instr)
{
nir_src *vertex_src = nir_get_io_arrayed_index_src(instr);
nir_ssa_def *vertex_offset = st->chip_class >= GFX9
nir_ssa_def *vertex_offset = st->gfx_level >= GFX9
? gs_per_vertex_input_vertex_offset_gfx9(b, vertex_src)
: gs_per_vertex_input_vertex_offset_gfx6(b, vertex_src);
unsigned base_stride = st->chip_class >= GFX9 ? 1 : 64 /* Wave size on GFX6-8 */;
unsigned base_stride = st->gfx_level >= GFX9 ? 1 : 64 /* Wave size on GFX6-8 */;
nir_ssa_def *io_off = nir_build_calc_io_offset(b, instr, nir_imm_int(b, base_stride * 4u), base_stride);
nir_ssa_def *off = nir_iadd(b, io_off, vertex_offset);
return nir_imul_imm(b, off, 4u);
......@@ -212,7 +212,7 @@ lower_gs_per_vertex_input_load(nir_builder *b,
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
nir_ssa_def *off = gs_per_vertex_input_offset(b, st, intrin);
if (st->chip_class >= GFX9)
if (st->gfx_level >= GFX9)
return nir_build_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off,
.align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u);
......@@ -230,11 +230,11 @@ filter_load_per_vertex_input(const nir_instr *instr, UNUSED const void *state)
void
ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
enum chip_class chip_class,
enum amd_gfx_level gfx_level,
unsigned num_reserved_es_outputs)
{
lower_esgs_io_state state = {
.chip_class = chip_class,
.gfx_level = gfx_level,
.num_reserved_es_outputs = num_reserved_es_outputs,
};
......@@ -246,11 +246,11 @@ ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
void
ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
enum chip_class chip_class,
enum amd_gfx_level gfx_level,
unsigned num_reserved_es_outputs)
{
lower_esgs_io_state state = {
.chip_class = chip_class,
.gfx_level = gfx_level,
.num_reserved_es_outputs = num_reserved_es_outputs,
};
......
/*
* Copyright © 2022 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include "ac_nir.h"
#include "nir_builder.h"
#include "amdgfxregs.h"
#include "u_math.h"
/*
* These NIR passes are used to lower NIR cross-stage I/O intrinsics
* between task and mesh shader stages into the memory accesses
* that actually happen on the HW.
*
*/
typedef struct {
unsigned payload_entry_bytes;
unsigned draw_entry_bytes;
unsigned num_entries;
} lower_tsms_io_state;
typedef struct {
nir_ssa_def *hw_workgroup_id;
nir_ssa_def *api_workgroup_id;
} add_first_task_to_workgroup_id_state;
static bool filter_workgroup_id(const nir_instr *instr,
UNUSED const void *state)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
return intrin->intrinsic == nir_intrinsic_load_workgroup_id;
}
static nir_ssa_def *
replace_workgroup_id_use_first_task(nir_builder *b,
nir_instr *instr,
void *state)
{
add_first_task_to_workgroup_id_state *s = (add_first_task_to_workgroup_id_state *) state;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
assert(s->hw_workgroup_id);
if (s->hw_workgroup_id == &intrin->dest.ssa)
return NULL;
return s->api_workgroup_id;
}
void
ac_nir_apply_first_task_to_task_shader(nir_shader *shader)
{
/* The draw packets on RDNA2 GPUs don't support adding an offset to the task shader
* workgroups, so we have to emulate the firstTask feature for NV_mesh_shader.
*
* 1. Pass the address of the IB (indirect buffer) from the NV_mesh_shader draw call
* to the shader in an SGPR argument (2 SGPRs for address, 1 SGPR for stride).
* 2. Create a descriptor for the IB in the shader.
* 3. Load the firstTask value from the IB
* 4. Add the firstTask value the workgroup ID and use the result instead of the
* workgroup ID generated by the HW.
*
* NOTE: This pass must run _before_ lowering the task shader outputs to memory
* accesses. The lowering uses the workgroup ID and that must be unchanged
* because it has to be the real HW workgroup ID.
*/
/* If the shader doesn't use workgroup ID, nothing to do here. */
if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID))
return;
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
assert(impl);
nir_builder builder;
nir_builder *b = &builder; /* This is to avoid the & */
nir_builder_init(b, impl);
b->cursor = nir_before_cf_list(&impl->body);
/* This is the stride passed to vkCmdDrawMeshTasksIndirectNV */
nir_ssa_def *ib_stride = nir_load_task_ib_stride(b);
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_ssa_def *first_task = NULL;
/* If the stride is zero, we assume that firstTask is also 0. */
nir_if *if_stride = nir_push_if(b, nir_ine(b, ib_stride, zero));
{
/* Address of the IB (indirect buffer) used by the current draw call. */
nir_ssa_def *ib_addr = nir_load_task_ib_addr(b);
/* Compose a 64-bit address from the IB address. */
nir_ssa_def *addr = nir_pack_64_2x32_split(b, nir_channel(b, ib_addr, 0), nir_channel(b, ib_addr, 1));
/* The IB needs to be addressed by draw ID * stride. */
addr = nir_iadd(b, addr, nir_u2u64(b, nir_imul(b, nir_load_draw_id(b), ib_stride)));
/* Byte offset of the firstTask field in VkDrawMeshTasksIndirectCommandNV. */
addr = nir_iadd_imm(b, addr, 4);
first_task = nir_build_load_global(b, 1, 32, addr, .access = ACCESS_NON_WRITEABLE | ACCESS_COHERENT);
}
nir_pop_if(b, if_stride);
first_task = nir_if_phi(b, first_task, zero);
/* NV_mesh_shader workgroups are 1 dimensional so we only care about X here. */
nir_ssa_def *hw_workgroup_id = nir_load_workgroup_id(b, 32);
nir_ssa_def *api_workgroup_id_x = nir_iadd(b, nir_channel(b, hw_workgroup_id, 0), first_task);
nir_ssa_def *api_workgroup_id = nir_vec3(b, api_workgroup_id_x, zero, zero);
add_first_task_to_workgroup_id_state state = {
.hw_workgroup_id = hw_workgroup_id,
.api_workgroup_id = api_workgroup_id,
};
nir_shader_lower_instructions(shader,
filter_workgroup_id,
replace_workgroup_id_use_first_task,
&state);
nir_validate_shader(shader, "after including firstTask in the task shader workgroup ID");
}
static nir_ssa_def *
task_workgroup_index(nir_builder *b,
lower_tsms_io_state *s)
{
nir_ssa_def *id = nir_load_workgroup_id(b, 32);
/* NV_mesh_shader: workgroups are always 1D, so index is the same as ID.x */
return nir_channel(b, id, 0);
}
static nir_ssa_def *
task_ring_entry_index(nir_builder *b,
lower_tsms_io_state *s)
{
/* Task shader ring_entry shader argument:
*
* - It's a copy of write_ptr[31:0] from the task control buffer.
* - The same value (which is the initial value at dispatch)
* seems to be copied to all workgroups in the same dispatch,
* therefore a workgroup index needs to be added.
* - write_ptr must be initialized to num_entries so ring_entry needs
* AND with num_entries - 1 to get the correct meaning.
* Note that num_entries must be a power of two.
*/
nir_ssa_def *ring_entry = nir_load_task_ring_entry_amd(b);
nir_ssa_def *idx = nir_iadd_nuw(b, ring_entry, task_workgroup_index(b, s));
return nir_iand_imm(b, idx, s->num_entries - 1);
}
static nir_ssa_def *
task_draw_ready_bit(nir_builder *b,
lower_tsms_io_state *s)
{
/* Value of the ready bit is 1 for odd and 0 for even passes through the draw ring.
*
* The ring_entry is a copy of the write_ptr. We use that to determine whether
* the current pass through the draw ring is odd or even, so we can write the
* correct value to the draw ready bit.
*
* This tells the firmware that it can now start launching mesh shader workgroups.
* The encoding of the last dword of the draw ring entry is:
* - bit 0: Draw ready bit.
* Its meaning flips on every pass through the entry.
* - bit 1: Packet end bit.
* The firmware uses this to mark the entry after the last one
* used by the current task dispatch.
* - bits [2:31] unused.
*
* Task shaders MUST write the draw ready bit to the draw ring
* before they finish. The firmware waits for the shader to write
* this bit before it reads the mesh dispatch size to launch the
* mesh shader workgroups.
*
* If the task shader doesn't write this bit, the HW hangs.
*/
nir_ssa_def *ring_entry = nir_load_task_ring_entry_amd(b);
nir_ssa_def *workgroup_index = task_workgroup_index(b, s);
nir_ssa_def *idx = nir_iadd_nuw(b, ring_entry, workgroup_index);
return nir_ubfe(b, idx, nir_imm_int(b, util_bitcount(s->num_entries - 1)), nir_imm_int(b, 1));
}
static nir_ssa_def *
mesh_ring_entry_index(nir_builder *b,
lower_tsms_io_state *s)
{
/* Mesh shader ring_entry shader argument:
*
* - It's a copy of the read_ptr[31:0] from the task control buffer.
* - All workgroups in the same task->mesh dispatch get the same value,
* which is fine because they need to read the same entry.
* - read_ptr must be initialized to num_entries so ring_entry needs
* AND with num_entries - 1 to get the correct meaning.
* Note that num_entries must be a power of two.
*/
return nir_iand_imm(b, nir_load_task_ring_entry_amd(b), s->num_entries - 1);
}
static void
task_write_draw_ring(nir_builder *b,
nir_ssa_def *store_val,
unsigned const_off,
lower_tsms_io_state *s)
{
nir_ssa_def *ptr = task_ring_entry_index(b, s);
nir_ssa_def *ring = nir_load_ring_task_draw_amd(b);
nir_ssa_def *scalar_off = nir_imul_imm(b, ptr, s->draw_entry_bytes);
nir_ssa_def *vector_off = nir_imm_int(b, 0);
nir_store_buffer_amd(b, store_val, ring, vector_off, scalar_off,
.base = const_off, .memory_modes = nir_var_shader_out);
}
static bool
filter_task_output_or_payload(const nir_instr *instr,
UNUSED const void *state)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
return intrin->intrinsic == nir_intrinsic_store_output ||
intrin->intrinsic == nir_intrinsic_store_task_payload ||
intrin->intrinsic == nir_intrinsic_load_task_payload;
}
static nir_ssa_def *
lower_task_output_store(nir_builder *b,
nir_intrinsic_instr *intrin,
lower_tsms_io_state *s)
{
/* NV_mesh_shader:
* Task shaders should only have 1 output: TASK_COUNT
* which is the number of launched mesh shader workgroups in 1D.
*
* Task count is one dimensional, but the HW needs X, Y, Z.
* Use the shader's value for X, and write Y=1, Z=1.
*/
nir_ssa_def *store_val = nir_vec3(b, intrin->src[0].ssa,
nir_imm_int(b, 1),
nir_imm_int(b, 1));
task_write_draw_ring(b, store_val, 0, s);
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
}
static nir_ssa_def *
lower_task_payload_store(nir_builder *b,
nir_intrinsic_instr *intrin,
lower_tsms_io_state *s)
{
unsigned write_mask = nir_intrinsic_write_mask(intrin);
unsigned base = nir_intrinsic_base(intrin);
nir_ssa_def *store_val = intrin->src[0].ssa;
nir_ssa_def *addr = intrin->src[1].ssa;
nir_ssa_def *ring = nir_load_ring_task_payload_amd(b);
nir_ssa_def *ptr = task_ring_entry_index(b, s);
nir_ssa_def *ring_off = nir_imul_imm(b, ptr, s->payload_entry_bytes);
nir_store_buffer_amd(b, store_val, ring, addr, ring_off, .base = base,
.write_mask = write_mask,
.memory_modes = nir_var_mem_task_payload);
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
}