Commit 99132849 authored by Marek Olšák's avatar Marek Olšák
Browse files

radeonsi: move SI and CIK+ SDMA code into 1 common function for cleanups


Reviewed-by: Pierre-Eric Pelloux-Prayer's avatarPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-By: Timur Kristóf's avatarTimur Kristóf <timur.kristof@gmail.com>
parent 3c265c25
......@@ -19,7 +19,6 @@ C_SOURCES := \
si_cp_dma.c \
si_debug.c \
si_descriptors.c \
si_dma.c \
si_dma_cs.c \
si_fence.c \
si_get.c \
......
......@@ -26,58 +26,6 @@
#include "sid.h"
#include "si_pipe.h"
static void cik_sdma_copy_buffer(struct si_context *ctx,
struct pipe_resource *dst,
struct pipe_resource *src,
uint64_t dst_offset,
uint64_t src_offset,
uint64_t size)
{
struct radeon_cmdbuf *cs = ctx->sdma_cs;
unsigned i, ncopy, csize;
unsigned align = ~0u;
struct si_resource *sdst = si_resource(dst);
struct si_resource *ssrc = si_resource(src);
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
dst_offset + size);
dst_offset += sdst->gpu_address;
src_offset += ssrc->gpu_address;
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
/* Align copy size to dw if src/dst address are dw aligned */
if ((src_offset & 0x3) == 0 &&
(dst_offset & 0x3) == 0 &&
size > 4 &&
(size & 3) != 0) {
align = ~0x3u;
ncopy++;
}
si_need_dma_space(ctx, ncopy * 7, sdst, ssrc);
for (i = 0; i < ncopy; i++) {
csize = size >= 4 ? MIN2(size & align, CIK_SDMA_COPY_MAX_SIZE) : size;
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
0));
radeon_emit(cs, ctx->chip_class >= GFX9 ? csize - 1 : csize);
radeon_emit(cs, 0); /* src/dst endian swap */
radeon_emit(cs, src_offset);
radeon_emit(cs, src_offset >> 32);
radeon_emit(cs, dst_offset);
radeon_emit(cs, dst_offset >> 32);
dst_offset += csize;
src_offset += csize;
size -= csize;
}
}
static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w)
{
width = u_minify(width, level);
......@@ -680,17 +628,13 @@ static void cik_sdma_copy(struct pipe_context *ctx,
{
struct si_context *sctx = (struct si_context *)ctx;
assert(src->target != PIPE_BUFFER);
if (!sctx->sdma_cs ||
src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
dst->flags & PIPE_RESOURCE_FLAG_SPARSE)
goto fallback;
/* If src is a buffer and dst is a texture, we are uploading metadata. */
if (src->target == PIPE_BUFFER) {
cik_sdma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
return;
}
/* SDMA causes corruption. See:
* https://bugs.freedesktop.org/show_bug.cgi?id=110575
* https://bugs.freedesktop.org/show_bug.cgi?id=110635
......
......@@ -34,7 +34,6 @@ files_libradeonsi = files(
'si_cp_dma.c',
'si_debug.c',
'si_descriptors.c',
'si_dma.c',
'si_dma_cs.c',
'si_fence.c',
'si_get.c',
......
......@@ -1212,7 +1212,6 @@ static void si_blit(struct pipe_context *ctx,
* on failure (recursion).
*/
if (dst->surface.is_linear &&
sctx->dma_copy &&
util_can_blit_via_copy_region(info, false)) {
sctx->dma_copy(ctx, info->dst.resource, info->dst.level,
info->dst.box.x, info->dst.box.y,
......
......@@ -503,9 +503,9 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx,
box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT)));
if (staging) {
/* Copy the VRAM buffer to the staging buffer. */
sctx->dma_copy(ctx, &staging->b.b, 0,
box->x % SI_MAP_BUFFER_ALIGNMENT,
0, 0, resource, 0, box);
si_sdma_copy_buffer(sctx, &staging->b.b, resource,
box->x % SI_MAP_BUFFER_ALIGNMENT,
box->x, box->width);
data = si_buffer_map_sync_with_rings(sctx, staging,
usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
......
/*
* Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
* Copyright 2018 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sid.h"
#include "si_pipe.h"
#include "util/format/u_format.h"
static void si_dma_copy_buffer(struct si_context *ctx,
struct pipe_resource *dst,
struct pipe_resource *src,
uint64_t dst_offset,
uint64_t src_offset,
uint64_t size)
{
struct radeon_cmdbuf *cs = ctx->sdma_cs;
unsigned i, ncopy, count, max_size, sub_cmd, shift;
struct si_resource *sdst = si_resource(dst);
struct si_resource *ssrc = si_resource(src);
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
dst_offset + size);
dst_offset += sdst->gpu_address;
src_offset += ssrc->gpu_address;
/* see whether we should use the dword-aligned or byte-aligned copy */
if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) {
sub_cmd = SI_DMA_COPY_DWORD_ALIGNED;
shift = 2;
max_size = SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE;
} else {
sub_cmd = SI_DMA_COPY_BYTE_ALIGNED;
shift = 0;
max_size = SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE;
}
ncopy = DIV_ROUND_UP(size, max_size);
si_need_dma_space(ctx, ncopy * 5, sdst, ssrc);
for (i = 0; i < ncopy; i++) {
count = MIN2(size, max_size);
radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd,
count >> shift));
radeon_emit(cs, dst_offset);
radeon_emit(cs, src_offset);
radeon_emit(cs, (dst_offset >> 32UL) & 0xff);
radeon_emit(cs, (src_offset >> 32UL) & 0xff);
dst_offset += count;
src_offset += count;
size -= count;
}
}
static void si_dma_copy(struct pipe_context *ctx,
struct pipe_resource *dst,
unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
unsigned src_level,
const struct pipe_box *src_box)
{
struct si_context *sctx = (struct si_context *)ctx;
if (sctx->sdma_cs == NULL ||
src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
goto fallback;
}
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
si_dma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
return;
}
/* SI SDMA image copies are unimplemented. */
fallback:
si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box);
}
void si_init_dma_functions(struct si_context *sctx)
{
sctx->dma_copy = si_dma_copy;
}
......@@ -125,6 +125,95 @@ void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
}
}
void si_sdma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
struct pipe_resource *src, uint64_t dst_offset,
uint64_t src_offset, uint64_t size)
{
struct radeon_cmdbuf *cs = sctx->sdma_cs;
unsigned i, ncopy, csize;
struct si_resource *sdst = si_resource(dst);
struct si_resource *ssrc = si_resource(src);
if (!cs ||
dst->flags & PIPE_RESOURCE_FLAG_SPARSE ||
src->flags & PIPE_RESOURCE_FLAG_SPARSE) {
si_copy_buffer(sctx, dst, src, dst_offset, src_offset, size);
return;
}
/* Mark the buffer range of destination as valid (initialized),
* so that transfer_map knows it should wait for the GPU when mapping
* that range. */
util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
dst_offset + size);
dst_offset += sdst->gpu_address;
src_offset += ssrc->gpu_address;
if (sctx->chip_class == GFX6) {
unsigned max_size, sub_cmd, shift;
/* see whether we should use the dword-aligned or byte-aligned copy */
if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) {
sub_cmd = SI_DMA_COPY_DWORD_ALIGNED;
shift = 2;
max_size = SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE;
} else {
sub_cmd = SI_DMA_COPY_BYTE_ALIGNED;
shift = 0;
max_size = SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE;
}
ncopy = DIV_ROUND_UP(size, max_size);
si_need_dma_space(sctx, ncopy * 5, sdst, ssrc);
for (i = 0; i < ncopy; i++) {
csize = MIN2(size, max_size);
radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd,
csize >> shift));
radeon_emit(cs, dst_offset);
radeon_emit(cs, src_offset);
radeon_emit(cs, (dst_offset >> 32UL) & 0xff);
radeon_emit(cs, (src_offset >> 32UL) & 0xff);
dst_offset += csize;
src_offset += csize;
size -= csize;
}
return;
}
/* The following code is for CI and later. */
unsigned align = ~0u;
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
/* Align copy size to dw if src/dst address are dw aligned */
if ((src_offset & 0x3) == 0 &&
(dst_offset & 0x3) == 0 &&
size > 4 &&
(size & 3) != 0) {
align = ~0x3u;
ncopy++;
}
si_need_dma_space(sctx, ncopy * 7, sdst, ssrc);
for (i = 0; i < ncopy; i++) {
csize = size >= 4 ? MIN2(size & align, CIK_SDMA_COPY_MAX_SIZE) : size;
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
0));
radeon_emit(cs, sctx->chip_class >= GFX9 ? csize - 1 : csize);
radeon_emit(cs, 0); /* src/dst endian swap */
radeon_emit(cs, src_offset);
radeon_emit(cs, src_offset >> 32);
radeon_emit(cs, dst_offset);
radeon_emit(cs, dst_offset >> 32);
dst_offset += csize;
src_offset += csize;
size -= csize;
}
}
void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
struct si_resource *dst, struct si_resource *src)
{
......
......@@ -119,14 +119,12 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
ctx->sdma_uploads_in_progress = true;
for (unsigned i = 0; i < ctx->num_sdma_uploads; i++) {
struct si_sdma_upload *up = &ctx->sdma_uploads[i];
struct pipe_box box;
assert(up->src_offset % 4 == 0 && up->dst_offset % 4 == 0 &&
up->size % 4 == 0);
u_box_1d(up->src_offset, up->size, &box);
ctx->dma_copy(&ctx->b, &up->dst->b.b, 0, up->dst_offset, 0, 0,
&up->src->b.b, 0, &box);
si_sdma_copy_buffer(ctx, &up->dst->b.b, &up->src->b.b,
up->dst_offset, up->src_offset, up->size);
}
ctx->sdma_uploads_in_progress = false;
si_unref_sdma_uploads(ctx);
......
......@@ -595,7 +595,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
if (sctx->chip_class >= GFX7)
cik_init_sdma_functions(sctx);
else
si_init_dma_functions(sctx);
sctx->dma_copy = si_resource_copy_region;
if (sscreen->debug_flags & DBG(FORCE_SDMA))
sctx->b.resource_copy_region = sctx->dma_copy;
......
......@@ -1363,14 +1363,14 @@ void si_check_vm_faults(struct si_context *sctx,
struct radeon_saved_cs *saved, enum ring_type ring);
bool si_replace_shader(unsigned num, struct si_shader_binary *binary);
/* si_dma.c */
void si_init_dma_functions(struct si_context *sctx);
/* si_dma_cs.c */
void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst,
uint64_t offset);
void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned clear_value);
void si_sdma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
struct pipe_resource *src, uint64_t dst_offset,
uint64_t src_offset, uint64_t size);
void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
struct si_resource *dst, struct si_resource *src);
void si_flush_dma_cs(struct si_context *ctx, unsigned flags,
......
......@@ -191,9 +191,7 @@ void si_test_dma_perf(struct si_screen *sscreen)
} else if (test_sdma) {
/* SDMA */
if (is_copy) {
struct pipe_box box;
u_box_1d(0, size, &box);
sctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, 0, &box);
si_sdma_copy_buffer(sctx, dst, src, 0, 0, size);
} else {
si_sdma_clear_buffer(sctx, dst, 0, size, clear_value);
}
......
......@@ -1513,14 +1513,12 @@ si_texture_create_object(struct pipe_screen *screen,
/* Copy the staging buffer to the buffer backing the texture. */
struct si_context *sctx = (struct si_context*)sscreen->aux_context;
struct pipe_box box;
u_box_1d(0, buf->b.b.width0, &box);
assert(tex->surface.dcc_retile_map_offset <= UINT_MAX);
simple_mtx_lock(&sscreen->aux_context_lock);
sctx->dma_copy(&sctx->b, &tex->buffer.b.b, 0,
tex->surface.dcc_retile_map_offset, 0, 0,
&buf->b.b, 0, &box);
si_sdma_copy_buffer(sctx, &tex->buffer.b.b, &buf->b.b,
tex->surface.dcc_retile_map_offset,
0, buf->b.b.width0);
sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
simple_mtx_unlock(&sscreen->aux_context_lock);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment