Commit 277632d8 authored by Nan Hai Zou's avatar Nan Hai Zou

965 xvmc, current only frame based DCT_type support

parent 6b6be2b2
......@@ -65,7 +65,9 @@ INTEL_XVMC_SRCS = \
i830_hwmc.h \
i830_hwmc.c \
i915_hwmc.c \
i915_hwmc.h
i915_hwmc.h \
i965_hwmc.c \
i965_hwmc.h
intel_drv_la_SOURCES = \
......@@ -135,7 +137,6 @@ INTEL_G4A = \
exa_wm_src_sample_argb.g4a \
exa_wm_src_sample_a.g4a \
exa_wm_src_sample_planar.g4a \
exa_wm_src_data.g4a \
exa_wm_mask_affine.g4a \
exa_wm_mask_projective.g4a \
exa_wm_mask_sample_argb.g4a \
......
......@@ -51,17 +51,14 @@ Bool intel_xvmc_probe(ScrnInfoPtr pScrn)
{
I830Ptr pI830 = I830PTR(pScrn);
Bool ret = FALSE;
if (!pI830->XvMCEnabled)
if (!pI830->XvMCEnabled)
return FALSE;
if (IS_I9XX(pI830)) {
if (!IS_I965G(pI830))
ret = intel_xvmc_set_driver(&i915_xvmc_driver);
/*
else
ret = intel_xvmc_set_driver(&i965_xvmc_driver);
*/
} else {
ErrorF("Your chipset doesn't support XvMC.\n");
return FALSE;
......@@ -124,7 +121,7 @@ Bool intel_xvmc_init_batch(ScrnInfoPtr pScrn)
I830Ptr pI830 = I830PTR(pScrn);
if (!i830_allocate_xvmc_buffer(pScrn, "[XvMC] batch buffer",
&(xvmc_driver->batch), 8 * 1024,
&(xvmc_driver->batch), 16 * 1024,
ALIGN_BOTH_ENDS))
return FALSE;
......
......@@ -70,7 +70,8 @@ struct intel_xvmc_command {
unsigned int subPicNo;
unsigned int flags;
unsigned int real_id;
unsigned int pad[6];
unsigned int surf_offset;
unsigned int pad[5];
};
#ifdef _INTEL_XVMC_SERVER_
......@@ -91,7 +92,7 @@ struct intel_xvmc_driver {
extern struct intel_xvmc_driver *xvmc_driver;
extern struct intel_xvmc_driver i915_xvmc_driver;
/* extern struct intel_xvmc_driver i965_xvmc_driver; */
extern struct intel_xvmc_driver i965_xvmc_driver;
extern Bool intel_xvmc_set_driver(struct intel_xvmc_driver *);
extern Bool intel_xvmc_probe(ScrnInfoPtr);
......
/*
* Copyright © 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Author:
* Zou Nan hai <nanhai.zou@intel.com>
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <X11/extensions/Xv.h>
#include <X11/extensions/XvMC.h>
#include <fourcc.h>
#include <errno.h>
#include "i830.h"
#include "i830_dri.h"
#define _INTEL_XVMC_SERVER_
#include "i830_hwmc.h"
#include "i965_hwmc.h"
#define STRIDE(w) (w)
#define SIZE_YUV420(w, h) (h * (STRIDE(w) + STRIDE(w >> 1)))
static PutImageFuncPtr XvPutImage;
static int alloc_drm_memory_tiled(ScrnInfoPtr pScrn,
struct drm_memory_block *mem,
char *name, size_t size, unsigned long pitch, unsigned long alignment)
{
I830Ptr pI830 = I830PTR(pScrn);
if ((mem->buffer = i830_allocate_memory_tiled(pScrn,
name, size, pitch,
GTT_PAGE_SIZE, ALIGN_BOTH_ENDS, TILE_XMAJOR)) == NULL) {
ErrorF("Fail to alloc \n");
return BadAlloc;
}
if (drmAddMap(pI830->drmSubFD,
(drm_handle_t)(mem->buffer->offset + pI830->LinearAddr),
size, DRM_AGP, 0,
(drmAddress)&mem->handle) < 0) {
ErrorF("Fail to map %d \n", errno);
i830_free_memory(pScrn, mem->buffer);
return BadAlloc;
}
mem->size = size;
mem->offset = mem->buffer->offset;
return Success;
}
static int alloc_drm_memory(ScrnInfoPtr pScrn,
struct drm_memory_block *mem,
char *name, size_t size)
{
I830Ptr pI830 = I830PTR(pScrn);
if ((mem->buffer = i830_allocate_memory(pScrn,
name, size,
GTT_PAGE_SIZE, ALIGN_BOTH_ENDS)) == NULL) {
ErrorF("Fail to alloc \n");
return BadAlloc;
}
if (drmAddMap(pI830->drmSubFD,
(drm_handle_t)(mem->buffer->offset + pI830->LinearAddr),
size, DRM_AGP, 0,
(drmAddress)&mem->handle) < 0) {
ErrorF("Fail to map %d \n", errno);
i830_free_memory(pScrn, mem->buffer);
return BadAlloc;
}
mem->size = size;
mem->offset = mem->buffer->offset;
return Success;
}
static void free_drm_memory(ScrnInfoPtr pScrn,
struct drm_memory_block *mem)
{
I830Ptr pI830 = I830PTR(pScrn);
drmRmMap(pI830->drmSubFD, mem->handle);
i830_free_memory(pScrn, mem->buffer);
}
static int create_context(ScrnInfoPtr pScrn,
XvMCContextPtr context, int *num_privates, CARD32 **private)
{
struct i965_xvmc_context *private_context, *context_dup;
I830Ptr I830 = I830PTR(pScrn);
DRIInfoPtr driinfo = I830->pDRIInfo;
unsigned int blocknum =
(((context->width + 15)/16)*((context->height+15)/16));
unsigned int blocksize = 6*blocknum*64*sizeof(short);
blocksize = (blocksize + 4095)&(~4095);
if ((private_context = Xcalloc(sizeof(*private_context))) == NULL) {
ErrorF("XVMC Can not allocate private context\n");
return BadAlloc;
}
if ((context_dup = Xcalloc(sizeof(*private_context))) == NULL) {
ErrorF("XVMC Can not allocate private context\n");
return BadAlloc;
}
private_context->comm.type = xvmc_driver->flag;
private_context->comm.sarea_size = driinfo->SAREASize;
private_context->comm.batchbuffer.offset = xvmc_driver->batch->offset;
private_context->comm.batchbuffer.size = xvmc_driver->batch->size;
private_context->comm.batchbuffer.handle = xvmc_driver->batch_handle;
if (alloc_drm_memory(pScrn, &private_context->static_buffer,
"XVMC static buffers",
I965_MC_STATIC_BUFFER_SIZE)) {
ErrorF("Unable to allocate and map static buffer for XVMC\n");
return BadAlloc;
}
if (alloc_drm_memory(pScrn, &private_context->blocks,
"XVMC blocks", blocksize)) {
ErrorF("Unable to allocate and map block buffer for XVMC\n");
return BadAlloc;
}
*num_privates = sizeof(*private_context)/sizeof(CARD32);
*private = (CARD32 *)private_context;
memcpy(context_dup, private_context, sizeof(*private_context));
context->driver_priv = context_dup;
return Success;
}
static void destroy_context(ScrnInfoPtr pScrn, XvMCContextPtr context)
{
struct i965_xvmc_context *private_context;
private_context = context->driver_priv;
free_drm_memory(pScrn, &private_context->static_buffer);
Xfree(private_context);
}
static int create_surface(ScrnInfoPtr pScrn, XvMCSurfacePtr surface,
int *num_priv, CARD32 **priv)
{
XvMCContextPtr ctx = surface->context;
struct i965_xvmc_surface *priv_surface, *surface_dup;
struct i965_xvmc_context *priv_ctx = ctx->driver_priv;
size_t bufsize = SIZE_YUV420(ctx->width, ctx->height);
int i;
for (i = 0 ; i < I965_MAX_SURFACES; i++) {
if (priv_ctx->surfaces[i] == NULL) {
priv_surface = Xcalloc(sizeof(*priv_surface));
if (priv_surface == NULL)
return BadAlloc;
surface_dup = Xcalloc(sizeof(*priv_surface));
if (surface_dup == NULL)
return BadAlloc;
priv_surface->no = i;
priv_surface->handle = priv_surface;
priv_ctx->surfaces[i] = surface->driver_priv
= priv_surface;
if (alloc_drm_memory(pScrn, &priv_surface->buffer,
"surface buffer\n", (bufsize+0xFFF)&~(0xFFF))) {
ErrorF("Unable to allocate surface buffer\n");
return BadAlloc;
}
memcpy(surface_dup, priv_surface, sizeof(*priv_surface));
*num_priv = sizeof(*priv_surface)/sizeof(CARD32);
*priv = (CARD32 *)surface_dup;
break;
}
}
if (i >= I965_MAX_SURFACES) {
ErrorF("I965 XVMC too many surfaces in one context\n");
return BadAlloc;
}
return Success;
}
static void destory_surface(ScrnInfoPtr pScrn, XvMCSurfacePtr surface)
{
XvMCContextPtr ctx = surface->context;
struct i965_xvmc_surface *priv_surface = surface->driver_priv;
struct i965_xvmc_context *priv_ctx = ctx->driver_priv;
priv_ctx->surfaces[priv_surface->no] = NULL;
free_drm_memory(pScrn, &priv_surface->buffer);
Xfree(priv_surface);
}
static int create_subpicture(ScrnInfoPtr pScrn, XvMCSubpicturePtr subpicture,
int *num_priv, CARD32 **priv)
{
return Success;
}
static void destroy_subpicture(ScrnInfoPtr pScrn, XvMCSubpicturePtr subpicture)
{
}
static int put_image(ScrnInfoPtr pScrn,
short src_x, short src_y,
short drw_x, short drw_y, short src_w,
short src_h, short drw_w, short drw_h,
int id, unsigned char *buf, short width,
short height, Bool sync, RegionPtr clipBoxes, pointer data,
DrawablePtr pDraw)
{
I830Ptr pI830 = I830PTR(pScrn);
struct intel_xvmc_command *cmd = (struct intel_xvmc_command *)buf;
if (id == FOURCC_XVMC) {
buf = pI830->FbBase + cmd->surf_offset;
XvPutImage(pScrn, src_x, src_y, drw_x, drw_y, src_w, src_h,
drw_w, drw_h, FOURCC_I420, buf, width, height, sync, clipBoxes,
data, pDraw);
}else
XvPutImage(pScrn, src_x, src_y, drw_x, drw_y, src_w, src_h,
drw_w, drw_h, id, buf, width, height, sync, clipBoxes,
data, pDraw);
return Success;
}
static Bool init(ScrnInfoPtr screen_info, XF86VideoAdaptorPtr adaptor)
{
if (!intel_xvmc_init_batch(screen_info)) {
ErrorF("[XvMC] fail to init batch buffer\n");
return FALSE;
}
XvPutImage = adaptor->PutImage;
adaptor->PutImage = put_image;
return TRUE;
}
static void fini(ScrnInfoPtr screen_info)
{
}
static XF86MCSurfaceInfoRec yv12_mpeg2_surface =
{
FOURCC_YV12,
XVMC_CHROMA_FORMAT_420,
0,
1920,
1080,
1920,
1080,
XVMC_MPEG_2|XVMC_MOCOMP,
/* XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING,*/
XVMC_INTRA_UNSIGNED,
/* &yv12_subpicture_list*/
NULL
};
static XF86MCSurfaceInfoRec yv12_mpeg1_surface =
{
FOURCC_YV12,
XVMC_CHROMA_FORMAT_420,
0,
1920,
1080,
1920,
1080,
XVMC_MPEG_1|XVMC_MOCOMP,
/*XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING |
XVMC_INTRA_UNSIGNED,*/
XVMC_INTRA_UNSIGNED,
/*&yv12_subpicture_list*/
NULL
};
static XF86MCSurfaceInfoPtr surface_info[] = {
&yv12_mpeg2_surface,
&yv12_mpeg1_surface
};
static XF86MCAdaptorRec adaptor = {
.name = "Intel(R) Textured Video",
.num_surfaces = sizeof(surface_info)/sizeof(surface_info[0]),
.surfaces = surface_info,
.CreateContext = create_context,
.DestroyContext = destroy_context,
.CreateSurface = create_surface,
.DestroySurface = destory_surface,
.CreateSubpicture = create_subpicture,
.DestroySubpicture = destroy_subpicture
};
struct intel_xvmc_driver i965_xvmc_driver = {
.name = "i965_xvmc",
.adaptor = &adaptor,
.flag = XVMC_I965_MPEG2_MC,
.init = init,
.fini = fini
};
#define I965_MC_STATIC_BUFFER_SIZE (1024*512)
#define I965_MAX_SURFACES 32
struct _i830_memory;
struct drm_memory_block {
struct _i830_memory *buffer;
drm_handle_t handle;
drmAddress ptr;
size_t size;
unsigned long offset;
};
struct i965_xvmc_surface {
struct drm_memory_block buffer;
unsigned int no;
void *handle;
};
struct i965_xvmc_context {
struct _intel_xvmc_common comm;
struct drm_memory_block static_buffer;
struct drm_memory_block blocks;
struct i965_xvmc_surface *surfaces[I965_MAX_SURFACES];
};
......@@ -16,6 +16,8 @@ libIntelXvMC_la_SOURCES = intel_xvmc.c \
i915_program.h \
i915_xvmc.c \
i915_xvmc.h \
i965_xvmc.c \
i965_xvmc.h \
intel_batchbuffer.c \
intel_batchbuffer.h \
xf86dri.c \
......@@ -25,4 +27,46 @@ libIntelXvMC_la_SOURCES = intel_xvmc.c \
libIntelXvMC_la_CFLAGS = @XORG_CFLAGS@ @DRI_CFLAGS@ @XVMCLIB_CFLAGS@ -I$(top_srcdir)/src -DTRUE=1 -DFALSE=0
libIntelXvMC_la_LDFLAGS = -version-number 1:0:0
libIntelXvMC_la_LIBADD = @DRI_LIBS@
INTEL_G4A = \
forward.g4a \
backward.g4a \
f_b.g4a \
ipicture.g4a \
null.g4a
INTEL_G4I = \
motion.g4i \
avg.g4i \
correct.g4i \
media_read.g4i \
media_read_avg_x0y1.g4i \
media_read_avg_x1y0.g4i \
media_read_avg_x1y1.g4i
INTEL_G4B = \
forward.g4b \
backward.g4b \
ipicture.g4b \
null.g4b \
f_b.g4b
EXTRA_DIST = \
$(INTEL_G4A) \
$(INTEL_G4I) \
$(INTEL_G4B)
if HAVE_GEN4ASM
SUFFIXES = .g4a .g4b
.g4a.g4b:
m4 $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && rm $*.g4m
$(INTEL_G4B): $(INTEL_G4I)
BUILT_SOURCES= $(INTEL_G4B)
clean-local:
-rm -f $(INTEL_G4B)
endif
endif
/*
* Copyright © 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Author:
* Zou Nan hai <nanhai.zou@intel.com>
*/
/* average consider satuate
* f = (f1 + f2 + 1)/2;
*/
mov(16) g6.0<1>UW g4.0<16,16,1>UB {align1};
mov(16) g7.0<1>UW g4.16<16,16,1>UB {align1};
mov(16) g8.0<1>UW g5.0<16,16,1>UB {align1};
mov(16) g9.0<1>UW g5.16<16,16,1>UB {align1};
add(16) g6.0<1>UW g6.0<16,16,1>UW g10.0<16,16,1>UB {align1};
add(16) g7.0<1>UW g7.0<16,16,1>UW g10.16<16,16,1>UB {align1};
add(16) g8.0<1>UW g8.0<16,16,1>UW g11.0<16,16,1>UB {align1};
add(16) g9.0<1>UW g9.0<16,16,1>UW g11.16<16,16,1>UB {align1};
shr.sat(16) g6.0<2>UB g6.0<16,16,1>UW 1UW {align1};
shr.sat(16) g7.0<2>UB g7.0<16,16,1>UW 1UW {align1};
shr.sat(16) g8.0<2>UB g8.0<16,16,1>UW 1UW {align1};
shr.sat(16) g9.0<2>UB g9.0<16,16,1>UW 1UW {align1};
mov(16) g4.0<1>UB g6.0<16,16,2>UB{align1};
mov(16) g4.16<1>UB g7.0<16,16,2>UB{align1};
mov(16) g5.0<1>UB g8.0<16,16,2>UB{align1};
mov(16) g5.16<1>UB g9.0<16,16,2>UB{align1};
/*
* Copyright © 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Author:
* Zou Nan hai <nanhai.zou@intel.com>
*
*/
/* shader for backward predict mc
*/
mov (2) g1.16<1>UD g1.24<2,2,1>UD {align1};
/* save payload */
mov (8) g3.0<1>UD g1.0<8,8,1>UD {align1};
/* 8x8 media read/write payload */
mov (1) g1.8<1>UD 0x0070007UD {align1};
/* save payload, again */
mov (8) g2.0<1>UD g1.0<8,8,1>UD {align1};
define(`input_surface', `7')
define(`reg1', `g4')
define(`reg2', `g5')
define(`mv1', `g3.16')
define(`mv2', `g3.20')
/* Y0 */
/* (x', y') = (x, y) + (motion_vector.x >> 1, motion_vector.y >> 1) */
asr (2) g1.16<1>D g3.16<2,2,1>D 1D {align1};
add (2) g2.0<1>UD g3.0<2,2,1>UD g1.16<2,2,1>D {align1};
include(`motion.g4i')
/* if (mb->coded_block_pattern & 0x20) */
and.z (1) null g3.12<1,1,1>UD 0x20UD {align1};
/* add IDCT data */
include(`correct.g4i')
/* write Y0 to dest */
mov (16) m1.0<1>UD g4.0<8,8,1>UD {align1 compr};
send (16) 0 acc0<1>UW g1<8,8,1>UW write(0, 0, 2, 0) mlen 3 rlen 0 {align1};
/* Y1 */
/* (x', y') = (x+8, y) + (motion_vector.x >> 1, motion_vector.y >> 1) */
add (1) g1.0<1>UD g3.0<1,1,1>UD 8D {align1};
add (1) g1.4<1>UD g3.4<1,1,1>UD 0D {align1};
add.sat (2) g2.0<1>UD g1.0<2,2,1>UD g1.16<2,2,1>D {align1};
include(`motion.g4i')
/* if (mb->coded_block_pattern & 0x10) */
and.z (1) null g3.12<1,1,1>UD 0x10UD {align1};
/* add IDCT data */
include(`correct.g4i')
/* write Y1 to dest */
mov (16) m1.0<1>UD g4.0<8,8,1>UD {align1 compr};
send (16) 0 acc0<1>UW g1<8,8,1>UW write(0, 0, 2, 0) mlen 3 rlen 0 {align1};
/* Y2 */
/* (x', y') = (x, y+8) + (motion_vector.x >> 1, motion_vector.y >> 1) */
add (1) g1.0<1>UD g3.0<1,1,1>UD 0D {align1};
add (1) g1.4<1>UD g3.4<1,1,1>UD 8D {align1};
add (2) g2.0<1>UD g1.0<2,2,1>UD g1.16<2,2,1>D {align1};
include(`motion.g4i')
/* if (mb->coded_block_pattern & 0x8) */
and.z (1) null g3.12<1,1,1>UD 0x8UD {align1};
/* add IDCT data */
include(`correct.g4i')
/* write Y2 to dest */
mov (16) m1.0<1>UD g4.0<8,8,1>UD {align1 compr};
send (16) 0 acc0<1>UW g1<8,8,1>UW write(0, 0, 2, 0) mlen 3 rlen 0 {align1};
/* Y3 */
/* (x', y') = (x+8, y+8) + (motion_vector.x >> 1, motion_vector.y >> 1) */
add (2) g1.0<1>UD g3.0<2,2,1>UD 8D {align1};
add.sat (2) g2.0<1>UD g1.0<2,2,1>UD g1.16<2,2,1>D {align1};
include(`motion.g4i')
/* if (mb->coded_block_pattern & 0x4) */
and.z (1) null g3.12<1,1,1>UD 0x4UD {align1};
/* add IDCT data */
include(`correct.g4i')
/* write Y3 to dest */
mov (16) m1.0<1>UD g4.0<8,8,1>UD {align1 compr};
send (16) 0 acc0<1>UW g1<8,8,1>UW write(0, 0, 2, 0) mlen 3 rlen 0 {align1};
/* motion_vector = motion_vector >> 1 */
/* (x', y') = (x, y) + (motion_vector.x >> 1, motion_vector.y >> 1) */
shr (2) g1.0<1>UD g3.0<2,2,1>UD 1UD {align1};
asr (2) g3.16<1>D g3.16<2,2,1>D 1D {align1};
asr (2) g1.16<1>D g3.16<2,2,1>D 1D {align1};
add (2) g2.0<1>UD g1.0<2,2,1>UD g1.16<2,2,1>D {align1};
/* U */
define(`input_surface', `8')
include(`motion.g4i')
/* if (mb->coded_block_pattern & 0x1) */
and.z (1) null g3.12<1,1,1>UD 0x2UD {align1};
/* add IDCT data */
include(`correct.g4i')
/* write V to dest */
mov (16) m1.0<1>UD g4.0<8,8,1>UD {align1 compr};
send (16) 0 acc0<1>UW g1<8,8,1>UW write(1, 0, 2, 0) mlen 3 rlen 0 { align1 };
/* V */
/* (x', y') = (x, y) + (motion_vector.x >> 1, motion_vector.y >> 1) */
add (2) g2.0<1>UD g1.0<2,2,1>UD g1.16<2,2,1>D {align1};
define(`input_surface', `9')
include(`motion.g4i')
/* if (mb->coded_block_pattern & 0x1) */
and.z (1) null g3.12<1,1,1>UD 0x1UD {align1};
/* add IDCT data */
include(`correct.g4i')
/* write V to dest */
mov (16) m1.0<1>UD g4.0<8,8,1>UD {align1 compr};
send (16) 0 acc0<1>UW g1<8,8,1>UW write(2, 0, 2, 0) mlen 3 rlen 0 { align1 };
/* kill the thread */
send (16) 0 null g0<8,8,1>UW
thread_spawner(0, 0, 0) mlen 1 rlen 0 { align1 EOT};
This diff is collapsed.
/*
* Copyright © 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Author:
* Zou Nan hai <nanhai.zou@intel.com>
*/
/* read IDCT block using OWORD read,
satuate and add it
*/
(f0) jmpi out;
/* XXX 8 OWORD read not working as I expected,
* use 2 OWORD read
* fix it
*/
add (1) g2.4<1>UD g3.8<1,1,1>UD 0UD{align1};
send (16) 0 g6.0<1>UD g2<8,8,1>UW read(3, 0, 2, 0) mlen 1 rlen 1 { align1 };
add (1) g2.4<1>UD g2.4<1,1,1>UD 32UD {align1};
send (16) 0 g7.0<1>UD g2<8,8,1>UW read(3, 0, 2, 0) mlen 1 rlen 1 { align1 };
add (1) g2.4<1>UD g2.4<1,1,1>UD 32UD {align1};
send (16) 0 g8.0<1>UD g2<8,8,1>UW read(3, 0, 2, 0) mlen 1 rlen 1 { align1 };
add (1) g2.4<1>UD g2.4<1,1,1>UD 32UD {align1};
send (16) 0 g9.0<1>UD g2<8,8,1>UW read(3, 0, 2, 0) mlen 1 rlen 1 { align1 };
add (1) g3.8<1>UD g3.8<1,1,1>UD 128UD {align1};
add (16) g6.0<1>W g6.0<16,16,1>W g4.0<16,16,1>UB {align1};
add (16) g7.0<1>W g7.0<16,16,1>W g4.16<16,16,1>UB {align1};
add (16) g8.0<1>W g8.0<16,16,1>W g5.0<16,16,1>UB {align1};
add (16) g9.0<1>W g9.0<16,16,1>W g5.16<16,16,1>UB {align1};
mov.sat (16) g6.0<2>UB g6.0<16,16,1>W {align1};
mov.sat (16) g7.0<2>UB g7.0<16,16,1>W {align1};
mov.sat (16) g8.0<2>UB g8.0<16,16,1>W {align1};
mov.sat (16) g9.0<2>UB g9.0<16,16,1>W {align1};
mov (16) g4.0<1>UB g6.0<16,16,2>UB {align1};
mov (16) g4.16<1>UB g7.0<16,16,2>UB {align1};
mov (16) g5.0<1>UB g8.0<16,16,2>UB {align1};
mov (16) g5.16<1>UB g9.0<16,16,2>UB {align1};
out:
/*
* Copyright © 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*