Anv: starfield create compute pipeline times out.
Conclusion: wine/vk3d is not giving fs_reg_alloc::assign_regs
enough time. Making this faster or having wine give more time are the solutions that could be pursued. Any other suggestions, should this be closed?
Requires !25512 (merged)
export ENABLE_VK_LAYER_VALVE_cheako_shader_capture_1=1
export VK_LOADER_DEBUG=error,warn,layer
export VKD3D_FEATURE_LEVEL=12_1
export VKD3D_SHADER_MODEL=6_6
export VK_INSTANCE_LAYERS="${VK_INSTANCE_LAYERS}${VK_INSTANCE_LAYERS:+:}VK_LAYER_LUNARG_api_dump"
System specs: #9814
Edit: Forgot about this:
cheako@mx1:~$ cat .drirc
<?xml version="1.0" standalone="yes"?>
<driconf>
<device driver="anv">
<application name="Starfield" executable="Starfield.exe">
<!-- option name="force_vk_vendor" value="0x1002" /-->
<option name="shader_spilling_rate" value="15" />
</application>
</device>
</driconf>
shaders.tar.xz This should be enough to recreate, I'll next try and turn this into an application.
Edit: forgot to add stderr log.err
// Copyright (C) 2023 Michael Mestnik <cheako@mikemestnik.net>
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
use core::slice::SlicePattern;
use std::fs::File;
#[allow(unused_imports)]
use std::hash::{Hash, Hasher};
use std::io::Write;
use std::sync::atomic::Ordering;
use std::sync::{mpsc, Arc};
use std::{panic::catch_unwind, sync::atomic::AtomicUsize};
use std::{slice, thread};
use ash::vk::{
self, AllocationCallbacks, ComputePipelineCreateInfo, Device, Pipeline, PipelineCache,
PipelineShaderStageCreateFlags, ShaderModule, ShaderModuleCreateFlags, ShaderStageFlags,
};
static CTR: AtomicUsize = AtomicUsize::new(0);
struct ShaderData(
Arc<[u32]>,
ShaderModuleCreateFlags,
ShaderModule,
ShaderStageFlags,
PipelineShaderStageCreateFlags,
Box<str>,
bool,
);
impl std::fmt::Debug for ShaderData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// let mut a = std::collections::hash_map::DefaultHasher::new();
f.debug_struct("ShaderData")
.field("module_crate_flags", &self.1.as_raw())
.field("stage_flags", &self.3.as_raw())
.field("state_create_flags", &self.4.as_raw())
.field("name", &self.5)
.field("bool", &self.6)
.finish()
}
}
#[derive(Debug)]
struct MyDumper {
ctr: usize,
create_infos: Box<[(ShaderData, vk::PipelineLayout)]>,
}
fn process_stage_create_info(
stage_create_info: &vk::PipelineShaderStageCreateInfo,
) -> Option<ShaderData> {
super::shader_module::get_spirv(&stage_create_info.module).map(|(spirv, shader_flags)| {
ShaderData(
spirv,
shader_flags,
stage_create_info.module,
stage_create_info.stage,
stage_create_info.flags,
unsafe { std::ffi::CStr::from_ptr(stage_create_info.p_name) }
.to_str()
.unwrap()
.into(),
unsafe { stage_create_info.p_specialization_info.as_ref() }.is_some(),
)
})
}
pub(crate) unsafe extern "system" fn create_compute_pipelines(
device: Device,
pipeline_cache: PipelineCache,
create_info_count: u32,
p_create_infos: *const ComputePipelineCreateInfo,
p_allocator: *const AllocationCallbacks,
p_pipelines: *mut Pipeline,
) -> vk::Result {
let result = catch_unwind(|| {
let create_infos = unsafe { slice::from_raw_parts(p_create_infos, create_info_count as _) };
let (tx, rx) = mpsc::channel();
let ctr = CTR.fetch_add(1, Ordering::SeqCst);
let my_data = MyDumper {
ctr,
create_infos: create_infos
.iter()
.flat_map(|create_info| {
if !create_info.p_next.is_null() {
dbg!("ComputePipelineCreateInfo has next".len());
}
Some(create_info.stage)
.iter()
.filter_map(process_stage_create_info)
.map(|x| (x, create_info.layout))
.take_while(|_| create_info.p_next.is_null())
.next()
})
.collect(),
};
let _ = thread::spawn(move || {
if rx.recv_timeout(std::time::Duration::from_secs(15))
== Err(mpsc::RecvTimeoutError::Timeout)
{
let _ = dbg!(&my_data);
let ctr = my_data.ctr;
my_data.create_infos.iter().enumerate().for_each(|x| {
let mut f = File::create(format!("/tmp/{}-{}.bin", &ctr, x.0)).unwrap();
let data =
x.1 .0
.0
.iter()
.copied()
.flat_map(u32::to_ne_bytes)
.collect::<Box<_>>();
let _ = f.write_all(data.as_slice());
});
};
});
let result = unsafe {
super::DEVICE
.read()
.unwrap()
.get(&device)
.unwrap()
.device
.create_compute_pipelines(pipeline_cache, create_infos, p_allocator.as_ref())
};
let _ = tx.send(());
let (x, ret) = match result.map(|x| (x, vk::Result::SUCCESS)) {
Ok(x) => x,
Err(x) => x,
};
for (i, pipeline) in x.into_iter().take(create_infos.len()).enumerate() {
unsafe { *p_pipelines.add(i) = pipeline }
}
ret
});
result.unwrap()
}
Edited by Michael Mestnik