Commit bd8e84eb authored by Daniel Schürmann's avatar Daniel Schürmann Committed by Marge Bot
Browse files

nir: replace .lower_sub with .has_fsub and .has_isub



This allows a more fine-grained control about whether
a backend supports one of these instructions.
Reviewed-by: Emma Anholt's avatarEric Anholt <eric@anholt.net>
Part-of: <!6597>
parent b3ce55b4
Pipeline #254257 waiting for manual action with stages
......@@ -78,6 +78,8 @@ static const struct nir_shader_compiler_options nir_options = {
.lower_fpow = true,
.lower_mul_2x32_64 = true,
.lower_rotate = true,
.has_fsub = true,
.has_isub = true,
.use_scoped_barrier = true,
.max_unroll_iterations = 32,
.use_interpolated_input_intrinsics = true,
......
......@@ -3252,6 +3252,8 @@ const nir_shader_compiler_options v3d_nir_options = {
.lower_wpos_pntc = true,
.lower_rotate = true,
.lower_to_scalar = true,
.has_fsub = true,
.has_isub = true,
};
/**
......
......@@ -250,6 +250,8 @@ const nir_shader_compiler_options v3dv_nir_options = {
.lower_wpos_pntc = true,
.lower_rotate = true,
.lower_to_scalar = true,
.has_fsub = true,
.has_isub = true,
.vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
* needs to be supported */
.lower_interpolate_at = true,
......
......@@ -3137,8 +3137,6 @@ typedef struct nir_shader_compiler_options {
bool lower_fneg;
/** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */
bool lower_ineg;
/** lowers fsub and isub to fadd+fneg and iadd+ineg. */
bool lower_sub;
/* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */
bool lower_scmp;
......@@ -3359,6 +3357,14 @@ typedef struct nir_shader_compiler_options {
* to imul with masked inputs and iadd */
bool has_umad24;
/** Backend supports fsub, if not set fsub will automatically be lowered to
* fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */
bool has_fsub;
/** Backend supports isub, if not set isub will automatically be lowered to
* iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */
bool has_isub;
/* Whether to generate only scoped_barrier intrinsics instead of the set of
* memory and control barrier intrinsics based on GLSL.
*/
......
......@@ -2106,9 +2106,9 @@ late_optimizations = [
(('fneg', ('fneg', a)), a),
# Subtractions get lowered during optimization, so we need to recombine them
(('fadd', 'a', ('fneg', 'b')), ('fsub', 'a', 'b'), '!options->lower_sub'),
(('fadd', a, ('fneg', 'b')), ('fsub', 'a', 'b'), 'options->has_fsub'),
(('fneg', a), ('fmul', a, -1.0), 'options->lower_fneg'),
(('iadd', a, ('ineg', 'b')), ('isub', 'a', 'b'), '!options->lower_sub || options->lower_ineg'),
(('iadd', a, ('ineg', 'b')), ('isub', 'a', 'b'), 'options->has_isub || options->lower_ineg'),
(('ineg', a), ('isub', 0, a), 'options->lower_ineg'),
(('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'),
(('~fadd@16', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma16'),
......
......@@ -72,6 +72,8 @@ static const nir_shader_compiler_options options = {
.lower_rotate = true,
.lower_to_scalar = true,
.has_imul24 = true,
.has_fsub = true,
.has_isub = true,
.lower_wpos_pntc = true,
.lower_cs_local_index_from_id = true,
......@@ -125,6 +127,8 @@ static const nir_shader_compiler_options options_a6xx = {
.vectorize_io = true,
.lower_to_scalar = true,
.has_imul24 = true,
.has_fsub = true,
.has_isub = true,
.max_unroll_iterations = 32,
.lower_wpos_pntc = true,
.lower_cs_local_index_from_id = true,
......
......@@ -2703,7 +2703,6 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
.lower_flrp64 = true,
.lower_fmod = true,
.lower_rotate = true,
.lower_sub = true,
.lower_uniforms_to_ubo = true,
.lower_vector_cmp = true,
.use_interpolated_input_intrinsics = true,
......
......@@ -1033,7 +1033,6 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
screen->options = (nir_shader_compiler_options) {
.lower_fpow = true,
.lower_sub = true,
.lower_ftrunc = true,
.fuse_ffma16 = true,
.fuse_ffma32 = true,
......
......@@ -45,6 +45,8 @@ static const nir_shader_compiler_options options = {
.lower_rotate = true,
.lower_vector_cmp = true,
.lower_fdph = true,
.has_fsub = true,
.has_isub = true,
};
const nir_shader_compiler_options *
......
......@@ -50,7 +50,6 @@ static const nir_shader_compiler_options vs_nir_options = {
.lower_fdiv = true,
.lower_fmod = true,
.lower_fsqrt = true,
.lower_sub = true,
.lower_flrp32 = true,
.lower_flrp64 = true,
/* could be implemented by clamp */
......@@ -68,7 +67,6 @@ static const nir_shader_compiler_options fs_nir_options = {
.lower_fpow = true,
.lower_fdiv = true,
.lower_fmod = true,
.lower_sub = true,
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_fsign = true,
......
......@@ -547,7 +547,6 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_fsat = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
.lower_sub = true,
.lower_fdot = true,
.lower_fdph = true,
.lower_ffma16 = true,
......
......@@ -3234,7 +3234,6 @@ nvir_nir_shader_compiler_options(int chipset)
op.lower_mul_high = false;
op.lower_fneg = false;
op.lower_ineg = false;
op.lower_sub = true;
op.lower_scmp = true; // TODO: not implemented yet
op.lower_vector_cmp = false;
op.lower_bitops = false;
......
......@@ -932,7 +932,6 @@ static const nir_shader_compiler_options nir_options = {
.lower_fpow = false,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_sub = true,
.lower_ffract = true,
.lower_pack_half_2x16 = true,
.lower_pack_unorm_2x16 = true,
......
......@@ -1192,7 +1192,9 @@ const struct nir_shader_compiler_options r600_nir_fs_options = {
.vectorize_io = true,
.has_umad24 = true,
.has_umul24 = true,
.use_interpolated_input_intrinsics = true
.use_interpolated_input_intrinsics = true,
.has_fsub = true,
.has_isub = true,
};
const struct nir_shader_compiler_options r600_nir_options = {
......@@ -1214,6 +1216,8 @@ const struct nir_shader_compiler_options r600_nir_options = {
.vectorize_io = true,
.has_umad24 = true,
.has_umul24 = true,
.has_fsub = true,
.has_isub = true,
};
......
......@@ -926,7 +926,6 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
.lower_fdiv = true,
.lower_bitfield_insert_to_bitfield_select = true,
.lower_bitfield_extract = true,
.lower_sub = true,
/* |---------------------------------- Performance & Availability --------------------------------|
* |MAD/MAC/MADAK/MADMK|MAD_LEGACY|MAC_LEGACY| FMA |FMAC/FMAAK/FMAMK|FMA_LEGACY|PK_FMA_F16,|Best choice
* Arch | F32,F16,F64 | F32,F16 | F32,F16 |F32,F16,F64 | F32,F16 | F32,F16 |PK_FMAC_F16|F16,F32,F64
......
......@@ -82,7 +82,6 @@ static const nir_shader_compiler_options sp_compiler_options = {
.lower_flrp64 = true,
.lower_fmod = true,
.lower_rotate = true,
.lower_sub = true,
.lower_uniforms_to_ubo = true,
.lower_vector_cmp = true,
.use_interpolated_input_intrinsics = true,
......
......@@ -2190,6 +2190,8 @@ static const nir_shader_compiler_options nir_options = {
.lower_umax = true,
.lower_umin = true,
.lower_isign = true,
.has_fsub = true,
.has_isub = true,
.max_unroll_iterations = 32,
};
......
......@@ -224,6 +224,8 @@ zink_screen_init_compiler(struct zink_screen *screen)
.use_scoped_barrier = true,
.lower_int64_options = 0,
.lower_doubles_options = ~nir_lower_fp64_full_software,
.has_fsub = true,
.has_isub = true,
};
screen->nir_options = default_options;
......
......@@ -30,7 +30,6 @@
#include "util/debug.h"
#define COMMON_OPTIONS \
.lower_sub = true, \
.lower_fdiv = true, \
.lower_scmp = true, \
.lower_flrp16 = true, \
......
......@@ -98,6 +98,8 @@ nir_options = {
.lower_pack_32_2x16_split = true,
.lower_unpack_64_2x32_split = true,
.lower_unpack_32_2x16_split = true,
.has_fsub = true,
.has_isub = true,
.use_scoped_barrier = true,
.vertex_id_zero_based = true,
.lower_base_vertex = true,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment