etnaviv_emit.c 31.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
/*
 * Copyright (c) 2014-2015 Etnaviv Project
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sub license,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 *
 * Authors:
 *    Wladimir J. van der Laan <laanwj@gmail.com>
 */

#include "etnaviv_emit.h"

#include "etnaviv_blend.h"
#include "etnaviv_compiler.h"
#include "etnaviv_context.h"
#include "etnaviv_rasterizer.h"
#include "etnaviv_resource.h"
#include "etnaviv_rs.h"
#include "etnaviv_screen.h"
#include "etnaviv_shader.h"
#include "etnaviv_texture.h"
#include "etnaviv_translate.h"
#include "etnaviv_uniforms.h"
#include "etnaviv_util.h"
#include "etnaviv_zsa.h"
#include "hw/common.xml.h"
#include "hw/state.xml.h"
44
#include "hw/state_blt.xml.h"
45
46
47
48
49
50
51
52
53
54
55
56
57
#include "util/u_math.h"

/* Queue a STALL command (queues 2 words) */
static inline void
CMD_STALL(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
{
   etna_cmd_stream_emit(stream, VIV_FE_STALL_HEADER_OP_STALL);
   etna_cmd_stream_emit(stream, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
}

void
etna_stall(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
{
58
59
   bool blt = (from == SYNC_RECIPIENT_BLT) || (to == SYNC_RECIPIENT_BLT);
   etna_cmd_stream_reserve(stream, blt ? 8 : 4);
60

61
62
63
64
65
66
   if (blt) {
      etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
      etna_cmd_stream_emit(stream, 1);
   }

   /* TODO: set bit 28/29 of token after BLT COPY_BUFFER */
67
68
69
70
71
72
73
74
75
76
77
   etna_emit_load_state(stream, VIVS_GL_SEMAPHORE_TOKEN >> 2, 1, 0);
   etna_cmd_stream_emit(stream, VIVS_GL_SEMAPHORE_TOKEN_FROM(from) | VIVS_GL_SEMAPHORE_TOKEN_TO(to));

   if (from == SYNC_RECIPIENT_FE) {
      /* if the frontend is to be stalled, queue a STALL frontend command */
      CMD_STALL(stream, from, to);
   } else {
      /* otherwise, load the STALL token state */
      etna_emit_load_state(stream, VIVS_GL_STALL_TOKEN >> 2, 1, 0);
      etna_cmd_stream_emit(stream, VIVS_GL_STALL_TOKEN_FROM(from) | VIVS_GL_STALL_TOKEN_TO(to));
   }
78
79
80
81
82

   if (blt) {
      etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
      etna_cmd_stream_emit(stream, 0);
   }
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
}

#define EMIT_STATE(state_name, src_value) \
   etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value)

#define EMIT_STATE_FIXP(state_name, src_value) \
   etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value)

#define EMIT_STATE_RELOC(state_name, src_value) \
   etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)

#define ETNA_3D_CONTEXT_SIZE  (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */

static unsigned
required_stream_size(struct etna_context *ctx)
{
   unsigned size = ETNA_3D_CONTEXT_SIZE;

   /* stall + flush */
   size += 2 + 4;

   /* vertex elements */
   size += ctx->vertex_elements->num_elements + 1;

   /* uniforms - worst case (2 words per uniform load) */
Jonathan Marek's avatar
Jonathan Marek committed
108
109
   size += ctx->shader.vs->uniforms.imm_count * 2;
   size += ctx->shader.fs->uniforms.imm_count * 2;
110
111
112
113
114
115
116
117
118
119
120
121
122
123

   /* shader */
   size += ctx->shader_state.vs_inst_mem_size + 1;
   size += ctx->shader_state.ps_inst_mem_size + 1;

   /* DRAW_INDEXED_PRIMITIVES command */
   size += 6;

   /* reserve for alignment etc. */
   size += 64;

   return size;
}

124
125
126
127
128
129
130
131
132
133
134
/* Emit state that only exists on HALTI5+ */
static void
emit_halti5_only_state(struct etna_context *ctx, int vs_output_count)
{
   struct etna_cmd_stream *stream = ctx->stream;
   uint32_t dirty = ctx->dirty;
   struct etna_coalesce coalesce;

   etna_coalesce_start(stream, &coalesce);
   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
      /* Magic states (load balancing, inter-unit sync, buffers) */
135
      /*007C4*/ EMIT_STATE(FE_HALTI5_ID_CONFIG, ctx->shader_state.FE_HALTI5_ID_CONFIG);
136
137
138
139
140
141
142
143
144
145
146
147
      /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT, vs_output_count | ((vs_output_count * 0x10) << 8));
      /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0, 0x0001000e | ((0x110/vs_output_count) << 20));
      for (int x = 0; x < 4; ++x) {
         /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
      }
   }
   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
      for (int x = 0; x < 4; ++x) {
         /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x), ctx->shader_state.VS_INPUT[x]);
      }
   }
   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
148
149
      /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);
      /*00A94*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(1), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);
150
      /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT, vs_output_count);
151
152
      /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);
      /*01084*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(1), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
      /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS, ctx->shader_state.GL_HALTI5_SH_SPECIALS);
   }
   etna_coalesce_end(stream, &coalesce);
}

/* Emit state that no longer exists on HALTI5 */
static void
emit_pre_halti5_state(struct etna_context *ctx)
{
   struct etna_cmd_stream *stream = ctx->stream;
   uint32_t dirty = ctx->dirty;
   struct etna_coalesce coalesce;

   etna_coalesce_start(stream, &coalesce);
   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
      /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
   }
   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
      for (int x = 0; x < 4; ++x) {
        /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
      }
   }
   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
      for (int x = 0; x < 4; ++x) {
        /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
      }
   }
   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
      /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
   }
   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
      for (int x = 0; x < 10; ++x) {
         /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
      }
   }
   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
      /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
      for (int x = 0; x < 4; ++x) {
         /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
      }
      for (int x = 0; x < 16; ++x) {
         /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
      }
   }
   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
      /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
   }
   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
      /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
   }
   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
204
      /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);
205
206
207
      for (int x = 0; x < 2; ++x) {
         /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
      }
208
      /*03834*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS2, ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);
209
210
211
212
   }
   etna_coalesce_end(stream, &coalesce);
}

213
214
215
216
217
218
219
220
/* Weave state before draw operation. This function merges all the compiled
 * state blocks under the context into one device register state. Parts of
 * this state that are changed since last call (dirty) will be uploaded as
 * state changes in the command buffer. */
void
etna_emit_state(struct etna_context *ctx)
{
   struct etna_cmd_stream *stream = ctx->stream;
221
   struct etna_screen *screen = ctx->screen;
222
223
   unsigned ccw = ctx->rasterizer->front_ccw;

224
225
226
227
228
229
230
231
232
233

   /* Pre-reserve the command buffer space which we are likely to need.
    * This must cover all the state emitted below, and the following
    * draw command. */
   etna_cmd_stream_reserve(stream, required_stream_size(ctx));

   uint32_t dirty = ctx->dirty;

   /* Pre-processing: see what caches we need to flush before making state changes. */
   uint32_t to_flush = 0;
234
   if (unlikely(dirty & (ETNA_DIRTY_BLEND)))
235
      to_flush |= VIVS_GL_FLUSH_CACHE_COLOR;
236
237
   if (unlikely(dirty & ETNA_DIRTY_ZSA))
      to_flush |= VIVS_GL_FLUSH_CACHE_DEPTH;
238
239
240
241
242
243
244
245
246
247
248
249
   if (unlikely(dirty & (ETNA_DIRTY_TEXTURE_CACHES)))
      to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE;
   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) /* Framebuffer config changed? */
      to_flush |= VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
   if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL))
      to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;

   if (to_flush) {
      etna_set_state(stream, VIVS_GL_FLUSH_CACHE, to_flush);
      etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
   }

250
251
252
253
254
   /* Flush TS cache before changing TS configuration. */
   if (unlikely(dirty & ETNA_DIRTY_TS)) {
      etna_set_state(stream, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH);
   }

255
256
257
258
   /* Update vertex elements. This is different from any of the other states, in that
    * a) the number of vertex elements written matters: so write only active ones
    * b) the vertex element states must all be written: do not skip entries that stay the same */
   if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
259
      if (screen->specs.halti >= 5) {
260
261
262
263
         /*17800*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
            ctx->vertex_elements->num_elements,
            ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG0);
         /*17A00*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_SCALE(0),
264
265
            ctx->vertex_elements->num_elements,
            ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
266
267
268
269
270
271
272
273
         /*17A80*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),
            ctx->vertex_elements->num_elements,
            ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG1);
      } else {
         /* Special case: vertex elements must always be sent in full if changed */
         /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
            ctx->vertex_elements->num_elements,
            ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
274
         if (screen->specs.halti >= 2) {
275
276
277
278
            /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
               ctx->vertex_elements->num_elements,
               ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
         }
279
      }
280
   }
281
282
283
   unsigned vs_output_count = etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex
                           ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
                           : ctx->shader_state.VS_OUTPUT_COUNT;
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319

   /* The following code is originally generated by gen_merge_state.py, to
    * emit state in increasing order of address (this makes it possible to merge
    * consecutive register updates into one SET_STATE command)
    *
    * There have been some manual changes, where the weaving operation is not
    * simply bitwise or:
    * - scissor fixp
    * - num vertex elements
    * - scissor handling
    * - num samplers
    * - texture lod
    * - ETNA_DIRTY_TS
    * - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not
    * change anyway
    * - PS / framebuffer interaction for MSAA
    * - move update of GL_MULTI_SAMPLE_CONFIG first
    * - add unlikely()/likely()
    */
   struct etna_coalesce coalesce;

   etna_coalesce_start(stream, &coalesce);

   /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here
    * directly
    *    or indirectly */
   /* multi sample config is set first, and outside of the normal sorting
    * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
    * possibly PS.TEMP_REGISTER_CONTROL).
    */
   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SAMPLE_MASK))) {
      uint32_t val = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx->sample_mask);
      val |= ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG;

      /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, val);
   }
320
   if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
321
322
323
324
325
326
      /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
      /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL);
   }
   if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
      /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);
   }
327
   if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
328
      if (screen->specs.halti >= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */
329
330
331
332
333
334
335
336
         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
            /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
         }
         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
            if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
               /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
            }
         }
337
      } else if(screen->specs.stream_count > 1) { /* hw w/ multiple vertex streams */
338
339
         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
            /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
340
         }
341
342
343
344
345
346
347
348
         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
            if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
               /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
            }
         }
      } else { /* hw w/ single vertex stream */
         /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
         /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
349
350
      }
   }
351
   /* gallium has instance divisor as part of elements state */
352
   if ((dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) && screen->specs.halti >= 2) {
353
354
355
356
357
      for (int x = 0; x < ctx->vertex_elements->num_buffers; ++x) {
         /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_VERTEX_DIVISOR(x), ctx->vertex_elements->NFE_VERTEX_STREAMS_VERTEX_DIVISOR[x]);
      }
   }

358
359
   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {

360
      /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, vs_output_count);
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
   }
   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
      /*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);
      /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL);
   }
   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
      /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
   }
   if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
      /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
      /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, ctx->viewport.PA_VIEWPORT_SCALE_Y);
      /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, ctx->viewport.PA_VIEWPORT_SCALE_Z);
      /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, ctx->viewport.PA_VIEWPORT_OFFSET_X);
      /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, ctx->viewport.PA_VIEWPORT_OFFSET_Y);
      /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, ctx->viewport.PA_VIEWPORT_OFFSET_Z);
   }
   if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
      struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);

      /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, rasterizer->PA_LINE_WIDTH);
      /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, rasterizer->PA_POINT_SIZE);
      /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, rasterizer->PA_SYSTEM_MODE);
   }
   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
      /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, ctx->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT);
   }
   if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_SHADER))) {
      uint32_t val = etna_rasterizer_state(ctx->rasterizer)->PA_CONFIG;
      /*00A34*/ EMIT_STATE(PA_CONFIG, val & ctx->shader_state.PA_CONFIG);
   }
   if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
      struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
      /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);
      /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);
   }
396
   if (unlikely(dirty & (ETNA_DIRTY_SCISSOR_CLIP))) {
397
398
399
400
      /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, ctx->clipping.minx << 16);
      /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, ctx->clipping.miny << 16);
      /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, (ctx->clipping.maxx << 16) + ETNA_SE_SCISSOR_MARGIN_RIGHT);
      /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, (ctx->clipping.maxy << 16) + ETNA_SE_SCISSOR_MARGIN_BOTTOM);
401
402
403
404
405
406
407
408
   }
   if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
      struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);

      /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, rasterizer->SE_DEPTH_SCALE);
      /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, rasterizer->SE_DEPTH_BIAS);
      /*00C18*/ EMIT_STATE(SE_CONFIG, rasterizer->SE_CONFIG);
   }
409
   if (unlikely(dirty & (ETNA_DIRTY_SCISSOR_CLIP))) {
410
411
      /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT, (ctx->clipping.maxx << 16) + ETNA_SE_CLIP_MARGIN_RIGHT);
      /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM, (ctx->clipping.maxy << 16) + ETNA_SE_CLIP_MARGIN_BOTTOM);
412
   }
413
414
   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
      /*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
415
416
   }
   if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
417
      /*00E08*/ EMIT_STATE(RA_EARLY_DEPTH, etna_zsa_state(ctx->zsa)->RA_DEPTH_CONFIG);
418
419
420
421
422
423
424
425
426
427
428
   }
   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
      /*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);
      /*01008*/ EMIT_STATE(PS_INPUT_COUNT,
                           ctx->framebuffer.msaa_mode
                              ? ctx->shader_state.PS_INPUT_COUNT_MSAA
                              : ctx->shader_state.PS_INPUT_COUNT);
      /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL,
                           ctx->framebuffer.msaa_mode
                              ? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA
                              : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
429
430
      /*01010*/ EMIT_STATE(PS_CONTROL, ctx->framebuffer.PS_CONTROL);
      /*01030*/ EMIT_STATE(PS_CONTROL_EXT, ctx->framebuffer.PS_CONTROL_EXT);
431
   }
432
433
   if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SHADER))) {
      /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, (etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG |
434
                                             ctx->framebuffer.PE_DEPTH_CONFIG));
435
436
437
438
439
440
441
442
   }
   if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
      /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, ctx->viewport.PE_DEPTH_NEAR);
      /*01408*/ EMIT_STATE(PE_DEPTH_FAR, ctx->viewport.PE_DEPTH_FAR);
   }
   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
      /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, ctx->framebuffer.PE_DEPTH_NORMALIZE);

443
      if (screen->specs.pixel_pipes == 1) {
444
445
446
447
448
         /*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR, &ctx->framebuffer.PE_DEPTH_ADDR);
      }

      /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, ctx->framebuffer.PE_DEPTH_STRIDE);
   }
449
450
451

   if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_RASTERIZER))) {
      uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_OP[ccw];
452
453
      /*01418*/ EMIT_STATE(PE_STENCIL_OP, val);
   }
454
455
456
   if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_STENCIL_REF | ETNA_DIRTY_RASTERIZER))) {
      uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG[ccw];
      /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, val | ctx->stencil_ref.PE_STENCIL_CONFIG[ccw]);
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
   }
   if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
      uint32_t val = etna_zsa_state(ctx->zsa)->PE_ALPHA_OP;
      /*01420*/ EMIT_STATE(PE_ALPHA_OP, val);
   }
   if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR))) {
      /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, ctx->blend_color.PE_ALPHA_BLEND_COLOR);
   }
   if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
      uint32_t val = etna_blend_state(ctx->blend)->PE_ALPHA_CONFIG;
      /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, val);
   }
   if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
      uint32_t val;
      /* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT
       * as a mask to enable the bits from blend PE_COLOR_FORMAT */
      val = ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK |
              VIVS_PE_COLOR_FORMAT_OVERWRITE);
      val |= etna_blend_state(ctx->blend)->PE_COLOR_FORMAT;
      val &= ctx->framebuffer.PE_COLOR_FORMAT;
      /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, val);
   }
   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
480
      if (screen->specs.pixel_pipes == 1) {
481
482
483
         /*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR, &ctx->framebuffer.PE_COLOR_ADDR);
         /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
         /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
484
      } else if (screen->specs.pixel_pipes == 2) {
485
486
487
488
489
490
491
492
493
494
         /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
         /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
         /*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[0]);
         /*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[1]);
         /*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[0]);
         /*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[1]);
      } else {
         abort();
      }
   }
495
496
497
   if (unlikely(dirty & (ETNA_DIRTY_STENCIL_REF | ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_ZSA))) {
      uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG_EXT;
      /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, val | ctx->stencil_ref.PE_STENCIL_CONFIG_EXT[ccw]);
498
   }
499
500
501
502
   if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
      struct etna_blend_state *blend = etna_blend_state(ctx->blend);
      /*014A4*/ EMIT_STATE(PE_LOGIC_OP, blend->PE_LOGIC_OP | ctx->framebuffer.PE_LOGIC_OP);
   }
503
504
505
506
507
508
   if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
      struct etna_blend_state *blend = etna_blend_state(ctx->blend);
      for (int x = 0; x < 2; ++x) {
         /*014A8*/ EMIT_STATE(PE_DITHER(x), blend->PE_DITHER[x]);
      }
   }
509
510
   if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR)) &&
       VIV_FEATURE(screen, chipMinorFeatures1, HALF_FLOAT)) {
511
512
513
         /*014B0*/ EMIT_STATE(PE_ALPHA_COLOR_EXT0, ctx->blend_color.PE_ALPHA_COLOR_EXT0);
         /*014B4*/ EMIT_STATE(PE_ALPHA_COLOR_EXT1, ctx->blend_color.PE_ALPHA_COLOR_EXT1);
   }
514
515
516
   if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_RASTERIZER))) {
      /*014B8*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT2, etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG_EXT2[ccw]);
   }
517
   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER)) && screen->specs.halti >= 3)
518
      /*014BC*/ EMIT_STATE(PE_MEM_CONFIG, ctx->framebuffer.PE_MEM_CONFIG);
519
520
521
522
523
524
525
526
   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_TS))) {
      /*01654*/ EMIT_STATE(TS_MEM_CONFIG, ctx->framebuffer.TS_MEM_CONFIG);
      /*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE, &ctx->framebuffer.TS_COLOR_STATUS_BASE);
      /*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE, &ctx->framebuffer.TS_COLOR_SURFACE_BASE);
      /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, ctx->framebuffer.TS_COLOR_CLEAR_VALUE);
      /*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE, &ctx->framebuffer.TS_DEPTH_STATUS_BASE);
      /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);
      /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);
527
      /*016BC*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE_EXT, ctx->framebuffer.TS_COLOR_CLEAR_VALUE_EXT);
528
529
530
531
532
533
534
   }
   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
      /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);
   }
   etna_coalesce_end(stream, &coalesce);
   /* end only EMIT_STATE */

535
   /* Emit strongly architecture-specific state */
536
   if (screen->specs.halti >= 5)
537
538
539
540
      emit_halti5_only_state(ctx, vs_output_count);
   else
      emit_pre_halti5_state(ctx);

541
542
543
544
545
546
547
548
   /* Beginning from Halti0 some of the new shader and sampler states are not
    * self-synchronizing anymore. Thus we need to stall the FE on PE completion
    * before loading the new states to avoid corrupting the state of the
    * in-flight draw.
    */
   if (screen->specs.halti >= 0 &&
       (ctx->dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF |
                      ETNA_DIRTY_SAMPLERS | ETNA_DIRTY_SAMPLER_VIEWS)))
549
550
      etna_stall(ctx->stream, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);

551
552
   ctx->emit_texture_state(ctx);

553
554
555
556
557
558
559
560
561
562
563
564
565
566
   /* We need to update the uniform cache only if one of the following bits are
    * set in ctx->dirty:
    * - ETNA_DIRTY_SHADER
    * - ETNA_DIRTY_CONSTBUF
    * - uniforms_dirty_bits
    *
    * In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In
    * all
    * other cases we can load on the changed uniforms.
    */
   static const uint32_t uniform_dirty_bits =
      ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF;

   /**** Large dynamically-sized state ****/
567
   bool do_uniform_flush = screen->specs.halti < 5;
568
569
570
   if (dirty & (ETNA_DIRTY_SHADER)) {
      /* Special case: a new shader was loaded; simply re-load all uniforms and
       * shader code at once */
571
572
573
574
575
576
577
578
      /* This sequence is special, do not change ordering unless necessary. According to comment
         snippets in the Vivante kernel driver a process called "steering" goes on while programming
         shader state. This (as I understand it) means certain unified states are "steered"
         toward a specific shader unit (VS/PS/...) based on either explicit flags in register
         00860, or what other state is written before "auto-steering". So this means some
         state can legitimately be programmed multiple times.
       */

579
      if (screen->specs.halti >= 5) { /* ICACHE (HALTI5) */
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
         assert(ctx->shader_state.VS_INST_ADDR.bo && ctx->shader_state.PS_INST_ADDR.bo);
         /* Set icache (VS) */
         etna_set_state(stream, VIVS_VS_NEWRANGE_LOW, 0);
         etna_set_state(stream, VIVS_VS_NEWRANGE_HIGH, ctx->shader_state.vs_inst_mem_size / 4);
         assert(ctx->shader_state.VS_INST_ADDR.bo);
         etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
         etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
         etna_set_state(stream, VIVS_VS_ICACHE_COUNT, ctx->shader_state.vs_inst_mem_size / 4 - 1);

         /* Set icache (PS) */
         etna_set_state(stream, VIVS_PS_NEWRANGE_LOW, 0);
         etna_set_state(stream, VIVS_PS_NEWRANGE_HIGH, ctx->shader_state.ps_inst_mem_size / 4);
         assert(ctx->shader_state.PS_INST_ADDR.bo);
         etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
         etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
         etna_set_state(stream, VIVS_PS_ICACHE_COUNT, ctx->shader_state.ps_inst_mem_size / 4 - 1);

      } else if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
         /* ICACHE (pre-HALTI5) */
601
         assert(screen->specs.has_icache && screen->specs.has_shader_range_registers);
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
         /* Set icache (VS) */
         etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
               VIVS_VS_ICACHE_CONTROL_ENABLE |
               VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
         assert(ctx->shader_state.VS_INST_ADDR.bo);
         etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);

         /* Set icache (PS) */
         etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16);
         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
               VIVS_VS_ICACHE_CONTROL_ENABLE |
               VIVS_VS_ICACHE_CONTROL_FLUSH_PS);
         assert(ctx->shader_state.PS_INST_ADDR.bo);
         etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
      } else {
         /* Upload shader directly, first flushing and disabling icache if
          * supported on this hw */
620
         if (screen->specs.has_icache) {
621
622
623
624
            etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
                  VIVS_VS_ICACHE_CONTROL_FLUSH_PS |
                  VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
         }
625
         if (screen->specs.has_shader_range_registers) {
626
627
628
629
            etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
            etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
                                        0x100);
         }
630
         etna_set_state_multi(stream, screen->specs.vs_offset,
631
632
                              ctx->shader_state.vs_inst_mem_size,
                              ctx->shader_state.VS_INST_MEM);
633
         etna_set_state_multi(stream, screen->specs.ps_offset,
634
635
636
                              ctx->shader_state.ps_inst_mem_size,
                              ctx->shader_state.PS_INST_MEM);
      }
637

638
      if (screen->specs.has_unified_uniforms) {
639
         etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
640
         etna_set_state(stream, VIVS_PS_UNIFORM_BASE, screen->specs.max_vs_uniforms);
641
      }
642
643
644

      if (do_uniform_flush)
         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
Jonathan Marek's avatar
Jonathan Marek committed
645

646
      etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX].cb);
Jonathan Marek's avatar
Jonathan Marek committed
647

648
649
      if (do_uniform_flush)
         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
Jonathan Marek's avatar
Jonathan Marek committed
650

651
      etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
652

653
      if (screen->specs.halti >= 5) {
654
655
656
657
658
         /* HALTI5 needs to be prompted to pre-fetch shaders */
         etna_set_state(stream, VIVS_VS_ICACHE_PREFETCH, 0x00000000);
         etna_set_state(stream, VIVS_PS_ICACHE_PREFETCH, 0x00000000);
         etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
      }
659
   } else {
660
      /* ideally this cache would only be flushed if there are VS uniform changes */
661
662
      if (do_uniform_flush)
         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
Jonathan Marek's avatar
Jonathan Marek committed
663
664

      if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
665
         etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX].cb);
666

667
      /* ideally this cache would only be flushed if there are PS uniform changes */
668
669
      if (do_uniform_flush)
         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
Jonathan Marek's avatar
Jonathan Marek committed
670
671

      if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
672
         etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
673
674
675
676
677
678
   }
/**** End of state update ****/
#undef EMIT_STATE
#undef EMIT_STATE_FIXP
#undef EMIT_STATE_RELOC
   ctx->dirty = 0;
679
   ctx->dirty_sampler_views = 0;
680
}