nir_opt_access.c 11.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/*
 * Copyright © 2019 Valve Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

24
#include "nir.h"
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57

/* This pass optimizes GL access qualifiers. So far it does two things:
 *
 * - Infer readonly when it's missing.
 * - Infer ACCESS_CAN_REORDER when the following are true:
 *   - Either there are no writes, or ACCESS_NON_WRITEABLE and ACCESS_RESTRICT
 *     are both set. In either case there are no writes to the underlying
 *     memory.
 *   - If ACCESS_COHERENT is set, then there must be no memory barriers
 *     involving the access. Coherent accesses may return different results
 *     before and after barriers.
 *   - ACCESS_VOLATILE is not set.
 *
 * If these conditions are true, then image and buffer reads may be treated as
 * if they were uniform buffer reads, i.e. they may be arbitrarily moved,
 * combined, rematerialized etc.
 */

struct access_state {
   struct set *vars_written;
   bool images_written;
   bool buffers_written;
   bool image_barriers;
   bool buffer_barriers;
};

static void
gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr)
{
   nir_variable *var;
   switch (instr->intrinsic) {
   case nir_intrinsic_image_deref_store:
   case nir_intrinsic_image_deref_atomic_add:
58 59 60 61
   case nir_intrinsic_image_deref_atomic_imin:
   case nir_intrinsic_image_deref_atomic_umin:
   case nir_intrinsic_image_deref_atomic_imax:
   case nir_intrinsic_image_deref_atomic_umax:
62 63 64 65 66 67 68 69 70 71 72 73
   case nir_intrinsic_image_deref_atomic_and:
   case nir_intrinsic_image_deref_atomic_or:
   case nir_intrinsic_image_deref_atomic_xor:
   case nir_intrinsic_image_deref_atomic_exchange:
   case nir_intrinsic_image_deref_atomic_comp_swap:
   case nir_intrinsic_image_deref_atomic_fadd:
      var = nir_intrinsic_get_var(instr, 0);

      /* In OpenGL, buffer images use normal buffer objects, whereas other
       * image types use textures which cannot alias with buffer objects.
       * Therefore we have to group buffer samplers together with SSBO's.
       */
74 75
      if (glsl_get_sampler_dim(glsl_without_array(var->type)) ==
          GLSL_SAMPLER_DIM_BUF)
76 77 78 79 80 81 82 83 84 85
         state->buffers_written = true;
      else
         state->images_written = true;

      if (var->data.mode == nir_var_uniform)
         _mesa_set_add(state->vars_written, var);
      break;

   case nir_intrinsic_bindless_image_store:
   case nir_intrinsic_bindless_image_atomic_add:
86 87 88 89
   case nir_intrinsic_bindless_image_atomic_imin:
   case nir_intrinsic_bindless_image_atomic_umin:
   case nir_intrinsic_bindless_image_atomic_imax:
   case nir_intrinsic_bindless_image_atomic_umax:
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
   case nir_intrinsic_bindless_image_atomic_and:
   case nir_intrinsic_bindless_image_atomic_or:
   case nir_intrinsic_bindless_image_atomic_xor:
   case nir_intrinsic_bindless_image_atomic_exchange:
   case nir_intrinsic_bindless_image_atomic_comp_swap:
   case nir_intrinsic_bindless_image_atomic_fadd:
      if (nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_BUF)
         state->buffers_written = true;
      else
         state->images_written = true;
      break;

   case nir_intrinsic_store_deref:
   case nir_intrinsic_deref_atomic_add:
   case nir_intrinsic_deref_atomic_imin:
   case nir_intrinsic_deref_atomic_umin:
   case nir_intrinsic_deref_atomic_imax:
   case nir_intrinsic_deref_atomic_umax:
   case nir_intrinsic_deref_atomic_and:
   case nir_intrinsic_deref_atomic_or:
   case nir_intrinsic_deref_atomic_xor:
   case nir_intrinsic_deref_atomic_exchange:
   case nir_intrinsic_deref_atomic_comp_swap:
   case nir_intrinsic_deref_atomic_fadd:
   case nir_intrinsic_deref_atomic_fmin:
   case nir_intrinsic_deref_atomic_fmax:
   case nir_intrinsic_deref_atomic_fcomp_swap:
      var = nir_intrinsic_get_var(instr, 0);
      if (var->data.mode != nir_var_mem_ssbo)
         break;

      _mesa_set_add(state->vars_written, var);
      state->buffers_written = true;
123
      break;
124 125 126 127 128 129 130 131 132 133 134 135 136 137

   case nir_intrinsic_memory_barrier:
      state->buffer_barriers = true;
      state->image_barriers = true;
      break;

   case nir_intrinsic_memory_barrier_buffer:
      state->buffer_barriers = true;
      break;

   case nir_intrinsic_memory_barrier_image:
      state->image_barriers = true;
      break;

138
   case nir_intrinsic_scoped_barrier:
139 140 141
      /* TODO: Could be more granular if we had nir_var_mem_image. */
      if (nir_intrinsic_memory_modes(instr) & (nir_var_mem_ubo |
                                               nir_var_mem_ssbo |
142 143
                                               nir_var_uniform |
                                               nir_var_mem_global)) {
144 145 146 147 148
         state->buffer_barriers = true;
         state->image_barriers = true;
      }
      break;

149 150 151 152 153 154 155 156 157 158 159 160 161 162
   default:
      break;
   }
}

static bool
process_variable(struct access_state *state, nir_variable *var)
{
   if (var->data.mode != nir_var_mem_ssbo &&
       !(var->data.mode == nir_var_uniform &&
         glsl_type_is_image(var->type)))
      return false;

   /* Ignore variables we've already marked */
163
   if (var->data.access & ACCESS_CAN_REORDER)
164 165
      return false;

166
   if (!(var->data.access & ACCESS_NON_WRITEABLE) &&
167
       !_mesa_set_search(state->vars_written, var)) {
168
      var->data.access |= ACCESS_NON_WRITEABLE;
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
      return true;
   }

   return false;
}

static bool
can_reorder(struct access_state *state, enum gl_access_qualifier access,
            bool is_buffer, bool is_ssbo)
{
   bool is_any_written = is_buffer ? state->buffers_written :
      state->images_written;

   /* Can we guarantee that the underlying memory is never written? */
   if (!is_any_written ||
       ((access & ACCESS_NON_WRITEABLE) &&
        (access & ACCESS_RESTRICT))) {
      /* Note: memoryBarrierBuffer() is only guaranteed to flush buffer
       * variables and not imageBuffer's, so we only consider the GL-level
       * type here.
       */
      bool is_any_barrier = is_ssbo ?
         state->buffer_barriers : state->image_barriers;

      return (!is_any_barrier || !(access & ACCESS_COHERENT)) &&
          !(access & ACCESS_VOLATILE);
   }

   return false;
}

static bool
process_intrinsic(struct access_state *state, nir_intrinsic_instr *instr)
{
   switch (instr->intrinsic) {
   case nir_intrinsic_bindless_image_load:
      if (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER)
         return false;

      /* We have less information about bindless intrinsics, since we can't
       * always trace uses back to the variable. Don't try and infer if it's
       * read-only, unless there are no image writes at all.
       */
      bool progress = false;
      bool is_buffer =
         nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_BUF;

      bool is_any_written =
         is_buffer ? state->buffers_written : state->images_written;

      if (!(nir_intrinsic_access(instr) & ACCESS_NON_WRITEABLE) &&
          !is_any_written) {
         progress = true;
         nir_intrinsic_set_access(instr,
                                  nir_intrinsic_access(instr) |
                                  ACCESS_NON_WRITEABLE);
      }

      if (can_reorder(state, nir_intrinsic_access(instr), is_buffer, false)) {
         progress = true;
         nir_intrinsic_set_access(instr,
                                  nir_intrinsic_access(instr) |
                                  ACCESS_CAN_REORDER);
      }

      return progress;

   case nir_intrinsic_load_deref:
   case nir_intrinsic_image_deref_load: {
      nir_variable *var = nir_intrinsic_get_var(instr, 0);

      if (instr->intrinsic == nir_intrinsic_load_deref &&
          var->data.mode != nir_var_mem_ssbo)
         return false;

      if (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER)
         return false;

      bool progress = false;

      /* Check if we were able to mark the whole variable non-writeable */
      if (!(nir_intrinsic_access(instr) & ACCESS_NON_WRITEABLE) &&
251
          var->data.access & ACCESS_NON_WRITEABLE) {
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
         progress = true;
         nir_intrinsic_set_access(instr,
                                  nir_intrinsic_access(instr) |
                                  ACCESS_NON_WRITEABLE);
      }

      bool is_ssbo = var->data.mode == nir_var_mem_ssbo;

      bool is_buffer = is_ssbo ||
         glsl_get_sampler_dim(glsl_without_array(var->type)) == GLSL_SAMPLER_DIM_BUF;

      if (can_reorder(state, nir_intrinsic_access(instr), is_buffer, is_ssbo)) {
         progress = true;
         nir_intrinsic_set_access(instr,
                                  nir_intrinsic_access(instr) |
                                  ACCESS_CAN_REORDER);
      }

      return progress;
   }

   default:
      return false;
   }
}

static bool
opt_access_impl(struct access_state *state,
                nir_function_impl *impl)
{
   bool progress = false;

   nir_foreach_block(block, impl) {
      nir_foreach_instr(instr, block) {
         if (instr->type == nir_instr_type_intrinsic)
            progress |= process_intrinsic(state,
                                          nir_instr_as_intrinsic(instr));
      }
   }

   if (progress) {
      nir_metadata_preserve(impl,
                            nir_metadata_block_index |
                            nir_metadata_dominance |
                            nir_metadata_live_ssa_defs |
                            nir_metadata_loop_analysis);
   }


   return progress;
}

bool
305
nir_opt_access(nir_shader *shader)
306 307 308 309 310
{
   struct access_state state = {
      .vars_written = _mesa_pointer_set_create(NULL),
   };

311
   bool var_progress = false;
312 313 314 315 316 317 318 319 320 321 322 323 324 325
   bool progress = false;

   nir_foreach_function(func, shader) {
      if (func->impl) {
         nir_foreach_block(block, func->impl) {
            nir_foreach_instr(instr, block) {
               if (instr->type == nir_instr_type_intrinsic)
                  gather_intrinsic(&state, nir_instr_as_intrinsic(instr));
            }
         }
      }
   }

   nir_foreach_variable(var, &shader->uniforms)
326
      var_progress |= process_variable(&state, var);
327 328 329 330

   nir_foreach_function(func, shader) {
      if (func->impl) {
         progress |= opt_access_impl(&state, func->impl);
331 332 333 334 335 336 337 338 339

         /* If we make a change to the uniforms, update all the impls. */
         if (var_progress) {
            nir_metadata_preserve(func->impl,
                                  nir_metadata_block_index |
                                  nir_metadata_dominance |
                                  nir_metadata_live_ssa_defs |
                                  nir_metadata_loop_analysis);
         }
340 341 342
      }
   }

343 344
   progress |= var_progress;

345 346 347
   _mesa_set_destroy(state.vars_written, NULL);
   return progress;
}