Commit bdca7756 authored by Alyssa Rosenzweig's avatar Alyssa Rosenzweig 💜

Merge branch 'upstream'

parents 1c2e2a93 93675145
......@@ -72,7 +72,9 @@ F: src/loader/
EGL
R: Eric Engestrom <eric@engestrom.ch>
R: Emil Velikov <emil.l.velikov@gmail.com>
F: src/egl/
F: include/EGL/
HAIKU
R: Alexander von Gluck IV <kallisti5@unixzen.com>
......@@ -136,3 +138,8 @@ F: src/gallium/drivers/freedreno/
GLX
R: Adam Jackson <ajax@redhat.com>
F: src/glx/
VULKAN
R: Eric Engestrom <eric@engestrom.ch>
F: src/vulkan/
F: include/vulkan/
......@@ -3053,7 +3053,7 @@ AC_SUBST([XVMC_MAJOR], 1)
AC_SUBST([XVMC_MINOR], 0)
AC_SUBST([XA_MAJOR], 2)
AC_SUBST([XA_MINOR], 4)
AC_SUBST([XA_MINOR], 5)
AC_SUBST([XA_PATCH], 0)
AC_SUBST([XA_VERSION], "$XA_MAJOR.$XA_MINOR.$XA_PATCH")
......
......@@ -1327,6 +1327,7 @@ struct __DRIdri2ExtensionRec {
#define __DRI_IMAGE_FOURCC_NV16 0x3631564e
#define __DRI_IMAGE_FOURCC_YUYV 0x56595559
#define __DRI_IMAGE_FOURCC_UYVY 0x59565955
#define __DRI_IMAGE_FOURCC_AYUV 0x56555941
#define __DRI_IMAGE_FOURCC_YVU410 0x39555659
#define __DRI_IMAGE_FOURCC_YVU411 0x31315659
......@@ -1353,6 +1354,7 @@ struct __DRIdri2ExtensionRec {
#define __DRI_IMAGE_COMPONENTS_Y_UV 0x3004
#define __DRI_IMAGE_COMPONENTS_Y_XUXV 0x3005
#define __DRI_IMAGE_COMPONENTS_Y_UXVX 0x3008
#define __DRI_IMAGE_COMPONENTS_AYUV 0x3009
#define __DRI_IMAGE_COMPONENTS_R 0x3006
#define __DRI_IMAGE_COMPONENTS_RG 0x3007
......
......@@ -789,7 +789,7 @@ endif
# Check for generic C arguments
c_args = []
foreach a : ['-Wall', '-Werror=implicit-function-declaration',
foreach a : ['-Werror=implicit-function-declaration',
'-Werror=missing-prototypes', '-Werror=return-type',
'-fno-math-errno',
'-fno-trapping-math', '-Qunused-arguments']
......@@ -811,7 +811,7 @@ endif
# Check for generic C++ arguments
cpp_args = []
foreach a : ['-Wall', '-Werror=return-type',
foreach a : ['-Werror=return-type',
'-fno-math-errno', '-fno-trapping-math',
'-Qunused-arguments']
if cpp.has_argument(a)
......@@ -907,8 +907,9 @@ if not cc.links('''#include <stdint.h>
int main() {
return __sync_add_and_fetch(&v, (uint64_t)1);
}''',
dependencies : dep_atomic,
name : 'GCC 64bit atomics')
pre_args += '-DMISSING_64_BIT_ATOMICS'
pre_args += '-DMISSING_64BIT_ATOMICS'
endif
# TODO: shared/static? Is this even worth doing?
......@@ -1319,13 +1320,6 @@ if with_platform_wayland
'linux-dmabuf', 'linux-dmabuf-unstable-v1.xml'
)
pre_args += ['-DHAVE_WAYLAND_PLATFORM', '-DWL_HIDE_DEPRECATED']
else
prog_wl_scanner = []
wl_scanner_arg = ''
dep_wl_protocols = null_dep
dep_wayland_client = null_dep
dep_wayland_server = null_dep
wayland_dmabuf_xml = ''
endif
dep_x11 = null_dep
......
......@@ -1483,6 +1483,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
int i;
struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
unsigned num_bpp64_colorbufs = 0;
/* this may happen for inherited secondary recording */
if (!framebuffer)
......@@ -1506,6 +1507,9 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
radv_emit_fb_color_state(cmd_buffer, i, att, image, layout);
radv_load_color_clear_metadata(cmd_buffer, image, i);
if (image->surface.bpe >= 8)
num_bpp64_colorbufs++;
}
if(subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
......@@ -1541,6 +1545,23 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
S_028208_BR_X(framebuffer->width) |
S_028208_BR_Y(framebuffer->height));
if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) {
uint8_t watermark = 4; /* Default value for VI. */
/* For optimal DCC performance. */
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
if (num_bpp64_colorbufs >= 5) {
watermark = 8;
} else {
watermark = 6;
}
}
radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL,
S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
S_028424_OVERWRITE_COMBINER_WATERMARK(watermark));
}
if (cmd_buffer->device->dfsm_allowed) {
radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
......@@ -3541,8 +3562,13 @@ static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
uint32_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;
/* Index & Vertex buffer don't change context regs, and pipeline is handled later. */
used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_PIPELINE);
/* Index, vertex and streamout buffers don't change context regs, and
* pipeline is handled later.
*/
used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER |
RADV_CMD_DIRTY_VERTEX_BUFFER |
RADV_CMD_DIRTY_STREAMOUT_BUFFER |
RADV_CMD_DIRTY_PIPELINE);
/* Assume all state changes except these two can imply context rolls. */
if (cmd_buffer->state.dirty & used_states)
......
......@@ -2046,16 +2046,15 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff
max_offchip_buffers = max_offchip_buffers_per_se *
device->physical_device->rad_info.max_se;
switch (device->tess_offchip_block_dw_size) {
default:
assert(0);
/* fall through */
case 8192:
offchip_granularity = V_03093C_X_8K_DWORDS;
break;
case 4096:
/* Hawaii has a bug with offchip buffers > 256 that can be worked
* around by setting 4K granularity.
*/
if (device->tess_offchip_block_dw_size == 4096) {
assert(device->physical_device->rad_info.family == CHIP_HAWAII);
offchip_granularity = V_03093C_X_4K_DWORDS;
break;
} else {
assert(device->tess_offchip_block_dw_size == 8192);
offchip_granularity = V_03093C_X_8K_DWORDS;
}
switch (device->physical_device->rad_info.chip_class) {
......
......@@ -15,8 +15,8 @@ build_buffer_fill_shader(struct radv_device *dev)
b.shader->info.cs.local_size[1] = 1;
b.shader->info.cs.local_size[2] = 1;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
......@@ -67,8 +67,8 @@ build_buffer_copy_shader(struct radv_device *dev)
b.shader->info.cs.local_size[1] = 1;
b.shader->info.cs.local_size[2] = 1;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
......
......@@ -60,8 +60,8 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
......@@ -289,8 +289,8 @@ build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
......@@ -719,8 +719,8 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
......@@ -1139,8 +1139,8 @@ build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
output_img->data.descriptor_set = 0;
output_img->data.binding = 0;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
......
......@@ -81,8 +81,8 @@ build_color_shaders(struct nir_shader **out_vs,
"v_layer");
vs_out_layer->data.location = VARYING_SLOT_LAYER;
vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0);
nir_ssa_def *base_instance = nir_load_system_value(&vs_b, nir_intrinsic_load_base_instance, 0);
nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
......@@ -470,8 +470,8 @@ build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs
"v_layer");
vs_out_layer->data.location = VARYING_SLOT_LAYER;
vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0);
nir_ssa_def *base_instance = nir_load_system_value(&vs_b, nir_intrinsic_load_base_instance, 0);
nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
......
......@@ -58,8 +58,8 @@ build_dcc_decompress_compute_shader(struct radv_device *dev)
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
......
......@@ -99,8 +99,8 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
......
......@@ -1814,6 +1814,10 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders)
nir_lower_io_arrays_to_elements(ordered_shaders[i],
ordered_shaders[i - 1]);
if (nir_link_constant_varyings(ordered_shaders[i],
ordered_shaders[i - 1]))
radv_optimize_nir(ordered_shaders[i - 1], false, false);
nir_remove_dead_variables(ordered_shaders[i],
nir_var_shader_out);
nir_remove_dead_variables(ordered_shaders[i - 1],
......@@ -2701,7 +2705,7 @@ radv_pipeline_generate_raster_state(struct radeon_cmdbuf *cs,
const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState;
const VkConservativeRasterizationModeEXT mode =
radv_get_conservative_raster_mode(vkraster);
uint32_t pa_sc_conservative_rast = 0;
uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1);
radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions.
......@@ -3371,14 +3375,8 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline,
else
ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */
ia_multi_vgt_param.partial_es_wave = false;
if (pipeline->device->has_distributed_tess) {
if (radv_pipeline_has_gs(pipeline)) {
if (device->physical_device->rad_info.chip_class <= VI)
ia_multi_vgt_param.partial_es_wave = true;
}
}
/* GS requirement. */
ia_multi_vgt_param.partial_es_wave = false;
if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= VI)
if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3)
ia_multi_vgt_param.partial_es_wave = true;
......@@ -3425,6 +3423,9 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline,
/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
if (device->has_distributed_tess) {
if (radv_pipeline_has_gs(pipeline)) {
if (device->physical_device->rad_info.chip_class <= VI)
ia_multi_vgt_param.partial_es_wave = true;
if (device->physical_device->rad_info.family == CHIP_TONGA ||
device->physical_device->rad_info.family == CHIP_FIJI ||
device->physical_device->rad_info.family == CHIP_POLARIS10 ||
......
......@@ -153,8 +153,8 @@ build_occlusion_query_shader(struct radv_device *device) {
nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
nir_builder_instr_insert(&b, &src_buf->instr);
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
......@@ -343,8 +343,8 @@ build_pipeline_statistics_query_shader(struct radv_device *device) {
nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
nir_builder_instr_insert(&b, &src_buf->instr);
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
......
......@@ -278,8 +278,7 @@ si_emit_graphics(struct radv_physical_device *physical_device,
radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
if (physical_device->rad_info.num_good_compute_units /
(physical_device->rad_info.max_se * physical_device->rad_info.max_sh_per_se) <= 4) {
if (physical_device->rad_info.num_good_cu_per_sh <= 4) {
/* Too few available compute units per SH. Disallowing
* VS to run on CU0 could hurt us more than late VS
* allocation would help.
......@@ -306,9 +305,6 @@ si_emit_graphics(struct radv_physical_device *physical_device,
if (physical_device->rad_info.chip_class >= VI) {
uint32_t vgt_tess_distribution;
radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL,
S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
S_028424_OVERWRITE_COMBINER_WATERMARK(4));
vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) |
S_028B50_ACCUM_TRI(11) |
......
......@@ -166,6 +166,7 @@ int test_optpass(int argc, char **argv)
int loop = 0;
int shader_type = GL_VERTEX_SHADER;
int quiet = 0;
int error;
const struct option optpass_opts[] = {
{ "input-ir", no_argument, &input_format_ir, 1 },
......@@ -264,9 +265,11 @@ int test_optpass(int argc, char **argv)
printf("--\n");
}
error = state->error;
ralloc_free(state);
ralloc_free(shader);
return state->error;
return error;
}
......@@ -2083,6 +2083,9 @@ typedef struct nir_shader_compiler_options {
*/
bool fdot_replicates;
/** lowers ffloor to fsub+ffract: */
bool lower_ffloor;
/** lowers ffract to fsub+ffloor: */
bool lower_ffract;
......@@ -2905,6 +2908,7 @@ typedef struct nir_lower_tex_options {
unsigned lower_y_u_v_external;
unsigned lower_yx_xuxv_external;
unsigned lower_xy_uxvx_external;
unsigned lower_ayuv_external;
/**
* To emulate certain texture wrap modes, this can be used
......@@ -3013,7 +3017,15 @@ typedef struct nir_lower_bitmap_options {
void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options);
bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset);
bool nir_lower_to_source_mods(nir_shader *shader);
typedef enum {
nir_lower_int_source_mods = 1 << 0,
nir_lower_float_source_mods = 1 << 1,
nir_lower_all_source_mods = (1 << 2) - 1
} nir_lower_to_source_mods_flags;
bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options);
bool nir_lower_gs_intrinsics(nir_shader *shader);
......
......@@ -55,11 +55,28 @@ nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
return &load->dest.ssa;
}
<%
def sysval_decl_list(opcode):
res = ''
if opcode.indices:
res += ', unsigned ' + opcode.indices[0].lower()
return res
def sysval_arg_list(opcode):
args = []
if opcode.indices:
args.append(opcode.indices[0].lower())
else:
args.append('0')
return ', '.join(args)
%>
% for name, opcode in filter(lambda v: v[1].sysval, sorted(INTR_OPCODES.items())):
static inline nir_ssa_def *
nir_${name}(nir_builder *build)
nir_${name}(nir_builder *build${sysval_decl_list(opcode)})
{
return nir_load_system_value(build, nir_intrinsic_${name}, 0);
return nir_load_system_value(build, nir_intrinsic_${name},
${sysval_arg_list(opcode)});
}
% endfor
......
......@@ -196,9 +196,12 @@ nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
}
static uint8_t
get_interp_type(nir_variable *var, bool default_to_smooth_interp)
get_interp_type(nir_variable *var, const struct glsl_type *type,
bool default_to_smooth_interp)
{
if (var->data.interpolation != INTERP_MODE_NONE)
if (glsl_type_is_integer(type))
return INTERP_MODE_FLAT;
else if (var->data.interpolation != INTERP_MODE_NONE)
return var->data.interpolation;
else if (default_to_smooth_interp)
return INTERP_MODE_SMOOTH;
......@@ -253,7 +256,7 @@ get_slot_component_masks_and_interp_types(struct exec_list *var_list,
unsigned comps_slot2 = 0;
for (unsigned i = 0; i < slots; i++) {
interp_type[location + i] =
get_interp_type(var, default_to_smooth_interp);
get_interp_type(var, type, default_to_smooth_interp);
interp_loc[location + i] = get_interp_loc(var);
if (dual_slot) {
......@@ -425,7 +428,7 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps,
continue;
bool found_new_offset = false;
uint8_t interp = get_interp_type(var, default_to_smooth_interp);
uint8_t interp = get_interp_type(var, type, default_to_smooth_interp);
for (; cursor[interp] < 32; cursor[interp]++) {
uint8_t cursor_used_comps = comps[cursor[interp]];
......
......@@ -173,8 +173,7 @@ lower_clip_vs(nir_function_impl *impl, unsigned ucp_enables,
for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
if (ucp_enables & (1 << plane)) {
nir_ssa_def *ucp =
nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane, plane);
nir_ssa_def *ucp = nir_load_user_clip_plane(&b, plane);
/* calculate clipdist[plane] - dot(ucp, cv): */
clipdist[plane] = nir_fdot4(&b, ucp, cv);
......
......@@ -31,12 +31,24 @@
static nir_ssa_def*
build_local_group_size(nir_builder *b)
{
nir_const_value local_size;
memset(&local_size, 0, sizeof(local_size));
local_size.u32[0] = b->shader->info.cs.local_size[0];
local_size.u32[1] = b->shader->info.cs.local_size[1];
local_size.u32[2] = b->shader->info.cs.local_size[2];
return nir_build_imm(b, 3, 32, local_size);
nir_ssa_def *local_size;
/*
* If the local work group size is variable it can't be lowered at this
* point, but its intrinsic can still be used.
*/
if (b->shader->info.cs.local_size_variable) {
local_size = nir_load_local_group_size(b);
} else {
nir_const_value local_size_const;
memset(&local_size_const, 0, sizeof(local_size_const));
local_size_const.u32[0] = b->shader->info.cs.local_size[0];
local_size_const.u32[1] = b->shader->info.cs.local_size[1];
local_size_const.u32[2] = b->shader->info.cs.local_size[2];
local_size = nir_build_imm(b, 3, 32, local_size_const);
}
return local_size;
}
static bool
......
......@@ -261,7 +261,8 @@ sample_plane(nir_builder *b, nir_tex_instr *tex, int plane)
static void
convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v)
nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
nir_ssa_def *a)
{
nir_const_value m[3] = {
{ .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } },
......@@ -281,7 +282,7 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1]));
nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2]));
nir_ssa_def *result = nir_vec4(b, red, green, blue, nir_imm_float(b, 1.0f));
nir_ssa_def *result = nir_vec4(b, red, green, blue, a);
nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
}
......@@ -297,7 +298,8 @@ lower_y_uv_external(nir_builder *b, nir_tex_instr *tex)
convert_yuv_to_rgb(b, tex,
nir_channel(b, y, 0),
nir_channel(b, uv, 0),
nir_channel(b, uv, 1));
nir_channel(b, uv, 1),
nir_imm_float(b, 1.0f));
}
static void
......@@ -312,7 +314,8 @@ lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex)
convert_yuv_to_rgb(b, tex,
nir_channel(b, y, 0),
nir_channel(b, u, 0),
nir_channel(b, v, 0));
nir_channel(b, v, 0),
nir_imm_float(b, 1.0f));
}
static void
......@@ -326,7 +329,8 @@ lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex)
convert_yuv_to_rgb(b, tex,
nir_channel(b, y, 0),
nir_channel(b, xuxv, 1),
nir_channel(b, xuxv, 3));
nir_channel(b, xuxv, 3),
nir_imm_float(b, 1.0f));
}
static void
......@@ -340,7 +344,22 @@ lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex)
convert_yuv_to_rgb(b, tex,
nir_channel(b, y, 1),
nir_channel(b, uxvx, 0),
nir_channel(b, uxvx, 2));
nir_channel(b, uxvx, 2),
nir_imm_float(b, 1.0f));
}
static void
lower_ayuv_external(nir_builder *b, nir_tex_instr *tex)
{
b->cursor = nir_after_instr(&tex->instr);
nir_ssa_def *ayuv = sample_plane(b, tex, 0);
convert_yuv_to_rgb(b, tex,
nir_channel(b, ayuv, 2),
nir_channel(b, ayuv, 1),
nir_channel(b, ayuv, 0),
nir_channel(b, ayuv, 3));
}
/*
......@@ -788,6 +807,11 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
progress = true;
}
if ((1 << tex->texture_index) & options->lower_ayuv_external) {
lower_ayuv_external(b, tex);
progress = true;
}
if (sat_mask) {
saturate_src(b, tex, sat_mask);
progress = true;
......
......@@ -34,7 +34,8 @@
*/
static bool
nir_lower_to_source_mods_block(nir_block *block)
nir_lower_to_source_mods_block(nir_block *block,
nir_lower_to_source_mods_flags options)
{
bool progress = false;
......@@ -58,10 +59,14 @@ nir_lower_to_source_mods_block(nir_block *block)
switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[i])) {
case nir_type_float:
if (!(options & nir_lower_float_source_mods))
continue;
if (parent->op != nir_op_fmov)
continue;
break;
case nir_type_int:
if (!(options & nir_lower_int_source_mods))
continue;
if (parent->op != nir_op_imov)
continue;
break;
......@@ -97,33 +102,41 @@ nir_lower_to_source_mods_block(nir_block *block)
progress = true;
}
switch (alu->op) {
case nir_op_fsat:
alu->op = nir_op_fmov;
alu->dest.saturate = true;
break;
case nir_op_ineg:
alu->op = nir_op_imov;
alu->src[0].negate = !alu->src[0].negate;
break;
case nir_op_fneg:
alu->op = nir_op_fmov;
alu->src[0].negate = !alu->src[0].negate;
break;
case nir_op_iabs:
alu->op = nir_op_imov;
alu->src[0].abs = true;
alu->src[0].negate = false;
break;
case nir_op_fabs:
alu->op = nir_op_fmov;
alu->src[0].abs = true;
alu->src[0].negate = false;
break;
default:
break;
if (options & nir_lower_float_source_mods) {
switch (alu->op) {
case nir_op_fsat:
alu->op = nir_op_fmov;
alu->dest.saturate = true;