Commit ebcb64fd authored by Nan Hai Zou's avatar Nan Hai Zou

XvMC VLD extension support for G4X

parent e6af995b
......@@ -271,6 +271,8 @@ AC_OUTPUT([
uxa/Makefile
src/Makefile
src/xvmc/Makefile
src/xvmc/shader/Makefile
src/xvmc/shader/vld/Makefile
src/bios_reader/Makefile
src/ch7017/Makefile
src/ch7xxx/Makefile
......
......@@ -64,6 +64,8 @@ Bool intel_xvmc_probe(ScrnInfoPtr pScrn)
if (IS_I9XX(pI830)) {
if (IS_I915(pI830))
ret = intel_xvmc_set_driver(&i915_xvmc_driver);
else if (IS_G4X(pI830))
ret = intel_xvmc_set_driver(&vld_xvmc_driver);
else
ret = intel_xvmc_set_driver(&i965_xvmc_driver);
} else {
......
......@@ -100,6 +100,7 @@ struct intel_xvmc_driver {
extern struct intel_xvmc_driver *xvmc_driver;
extern struct intel_xvmc_driver i915_xvmc_driver;
extern struct intel_xvmc_driver i965_xvmc_driver;
extern struct intel_xvmc_driver vld_xvmc_driver;
extern Bool intel_xvmc_set_driver(struct intel_xvmc_driver *);
extern Bool intel_xvmc_probe(ScrnInfoPtr);
......
......@@ -41,36 +41,14 @@
#define STRIDE(w) (w)
#define SIZE_YUV420(w, h) (h * (STRIDE(w) + STRIDE(w >> 1)))
#define VLD_MAX_SLICE_LEN (32*1024)
static PutImageFuncPtr XvPutImage;
#ifndef XVMC_VLD
#define XVMC_VLD 0x00020000
#endif
#if 0
static int alloc_drm_memory_tiled(ScrnInfoPtr pScrn,
struct drm_memory_block *mem,
char *name, size_t size, unsigned long pitch, unsigned long alignment)
{
I830Ptr pI830 = I830PTR(pScrn);
if ((mem->buffer = i830_allocate_memory(pScrn,
name, size, pitch,
GTT_PAGE_SIZE, ALIGN_BOTH_ENDS, TILE_XMAJOR)) == NULL) {
ErrorF("Fail to alloc \n");
return BadAlloc;
}
static PutImageFuncPtr XvPutImage;
if (drmAddMap(pI830->drmSubFD,
(drm_handle_t)(mem->buffer->offset + pI830->LinearAddr),
size, DRM_AGP, 0,
(drmAddress)&mem->handle) < 0) {
ErrorF("Fail to map %d \n", errno);
i830_free_memory(pScrn, mem->buffer);
return BadAlloc;
}
mem->size = size;
mem->offset = mem->buffer->offset;
return Success;
}
#endif
static int alloc_drm_memory(ScrnInfoPtr pScrn,
struct drm_memory_block *mem,
......@@ -109,57 +87,69 @@ static void free_drm_memory(ScrnInfoPtr pScrn,
static int create_context(ScrnInfoPtr pScrn,
XvMCContextPtr context, int *num_privates, CARD32 **private)
{
struct i965_xvmc_context *private_context, *context_dup;
I830Ptr I830 = I830PTR(pScrn);
DRIInfoPtr driinfo = I830->pDRIInfo;
unsigned int blocknum =
(((context->width + 15)/16)*((context->height+15)/16));
unsigned int blocksize = 6*blocknum*64*sizeof(short);
blocksize = (blocksize + 4095)&(~4095);
if ((private_context = Xcalloc(sizeof(*private_context))) == NULL) {
ErrorF("XVMC Can not allocate private context\n");
return BadAlloc;
}
struct i965_xvmc_context *private_context, *context_dup;
I830Ptr I830 = I830PTR(pScrn);
DRIInfoPtr driinfo = I830->pDRIInfo;
unsigned int blocknum =
(((context->width + 15)/16)*((context->height+15)/16));
unsigned int blocksize = 6*blocknum*64*sizeof(short);
blocksize = (blocksize + 4095)&(~4095);
if ((private_context = Xcalloc(sizeof(*private_context))) == NULL) {
ErrorF("XVMC Can not allocate private context\n");
return BadAlloc;
}
if ((context_dup = Xcalloc(sizeof(*private_context))) == NULL) {
ErrorF("XVMC Can not allocate private context\n");
return BadAlloc;
}
if ((context_dup = Xcalloc(sizeof(*private_context))) == NULL) {
ErrorF("XVMC Can not allocate private context\n");
return BadAlloc;
}
private_context->is_g4x = IS_G4X(I830);
private_context->is_965_q = IS_965_Q(I830);
private_context->comm.type = xvmc_driver->flag;
private_context->comm.sarea_size = driinfo->SAREASize;
private_context->comm.batchbuffer.offset = xvmc_driver->batch->offset;
private_context->comm.batchbuffer.size = xvmc_driver->batch->size;
private_context->comm.batchbuffer.handle = xvmc_driver->batch_handle;
private_context->is_g4x = IS_G4X(I830);
private_context->is_965_q = IS_965_Q(I830);
private_context->comm.type = xvmc_driver->flag;
private_context->comm.sarea_size = driinfo->SAREASize;
private_context->comm.batchbuffer.offset = xvmc_driver->batch->offset;
private_context->comm.batchbuffer.size = xvmc_driver->batch->size;
private_context->comm.batchbuffer.handle = xvmc_driver->batch_handle;
if (alloc_drm_memory(pScrn, &private_context->static_buffer,
if (alloc_drm_memory(pScrn, &private_context->static_buffer,
"XVMC static buffers",
I965_MC_STATIC_BUFFER_SIZE)) {
ErrorF("Unable to allocate and map static buffer for XVMC\n");
return BadAlloc;
}
ErrorF("Unable to allocate and map static buffer for XVMC\n");
return BadAlloc;
}
if (alloc_drm_memory(pScrn, &private_context->blocks,
if (alloc_drm_memory(pScrn, &private_context->blocks,
"XVMC blocks", blocksize)) {
ErrorF("Unable to allocate and map block buffer for XVMC\n");
ErrorF("Unable to allocate and map block buffer for XVMC\n");
return BadAlloc;
}
if (IS_G4X(I830)) {
if (alloc_drm_memory(pScrn, &private_context->slice,
"XVMC vld slice", VLD_MAX_SLICE_LEN)) {
ErrorF("Unable to allocate and vld slice buffer for XVMC\n");
return BadAlloc;
}
*num_privates = sizeof(*private_context)/sizeof(CARD32);
*private = (CARD32 *)private_context;
memcpy(context_dup, private_context, sizeof(*private_context));
context->driver_priv = context_dup;
}
return Success;
*num_privates = sizeof(*private_context)/sizeof(CARD32);
*private = (CARD32 *)private_context;
memcpy(context_dup, private_context, sizeof(*private_context));
context->driver_priv = context_dup;
return Success;
}
static void destroy_context(ScrnInfoPtr pScrn, XvMCContextPtr context)
{
struct i965_xvmc_context *private_context;
I830Ptr pI830 = I830PTR(pScrn);
private_context = context->driver_priv;
free_drm_memory(pScrn, &private_context->static_buffer);
if (IS_G4X(pI830))
free_drm_memory(pScrn, &private_context->slice);
Xfree(private_context);
}
......@@ -167,6 +157,7 @@ static int create_surface(ScrnInfoPtr pScrn, XvMCSurfacePtr surface,
int *num_priv, CARD32 **priv)
{
XvMCContextPtr ctx = surface->context;
struct i965_xvmc_surface *priv_surface, *surface_dup;
struct i965_xvmc_context *priv_ctx = ctx->driver_priv;
size_t bufsize = SIZE_YUV420(ctx->width, ctx->height);
......@@ -223,6 +214,7 @@ static int create_subpicture(ScrnInfoPtr pScrn, XvMCSubpicturePtr subpicture,
static void destroy_subpicture(ScrnInfoPtr pScrn, XvMCSubpicturePtr subpicture)
{
}
static int put_image(ScrnInfoPtr pScrn,
short src_x, short src_y,
short drw_x, short drw_y, short src_w,
......@@ -258,6 +250,20 @@ static void fini(ScrnInfoPtr screen_info)
{
}
static XF86MCSurfaceInfoRec yv12_mpeg2_vld_surface =
{
FOURCC_YV12,
XVMC_CHROMA_FORMAT_420,
0,
1936,
1096,
1920,
1080,
XVMC_MPEG_2|XVMC_VLD,
XVMC_INTRA_UNSIGNED,
NULL
};
static XF86MCSurfaceInfoRec yv12_mpeg2_surface =
{
FOURCC_YV12,
......@@ -297,6 +303,23 @@ static XF86MCSurfaceInfoPtr surface_info[] = {
&yv12_mpeg1_surface
};
static XF86MCSurfaceInfoPtr surface_info_vld[] = {
&yv12_mpeg2_vld_surface,
};
static XF86MCAdaptorRec adaptor_vld = {
.name = "Intel(R) Textured Video",
.num_surfaces = sizeof(surface_info_vld)/sizeof(surface_info_vld[0]),
.surfaces = surface_info_vld,
.CreateContext = create_context,
.DestroyContext = destroy_context,
.CreateSurface = create_surface,
.DestroySurface = destory_surface,
.CreateSubpicture = create_subpicture,
.DestroySubpicture = destroy_subpicture
};
static XF86MCAdaptorRec adaptor = {
.name = "Intel(R) Textured Video",
.num_surfaces = sizeof(surface_info)/sizeof(surface_info[0]),
......@@ -318,3 +341,11 @@ struct intel_xvmc_driver i965_xvmc_driver = {
.fini = fini
};
struct intel_xvmc_driver vld_xvmc_driver = {
.name = "xvmc_vld",
.adaptor = &adaptor_vld,
.flag = XVMC_I965_MPEG2_VLD,
.init = init,
.fini = fini
};
#define I965_MC_STATIC_BUFFER_SIZE (1024*512)
#define I965_MAX_SURFACES 32
#define I965_MAX_SURFACES 12
struct _i830_memory;
struct drm_memory_block {
struct _i830_memory *buffer;
......@@ -19,6 +19,7 @@ struct i965_xvmc_context {
struct _intel_xvmc_common comm;
struct drm_memory_block static_buffer;
struct drm_memory_block blocks;
struct drm_memory_block slice;
struct i965_xvmc_surface *surfaces[I965_MAX_SURFACES];
unsigned int is_g4x:1;
unsigned int is_965_q:1;
......
......@@ -365,106 +365,3 @@ void i810_free_privContext(i810XvMCContext *pI810XvMC);
void dp(unsigned int *address, unsigned int i);
#endif
......@@ -2,6 +2,7 @@ if XVMC
lib_LTLIBRARIES=libI810XvMC.la libIntelXvMC.la
endif
SUBDIRS = shader
libI810XvMC_la_SOURCES = I810XvMC.c \
I810XvMC.h
......@@ -19,6 +20,8 @@ libIntelXvMC_la_SOURCES = intel_xvmc.c \
i915_xvmc.h \
i965_xvmc.c \
i965_xvmc.h \
xvmc_vld.c \
xvmc_vld.h \
intel_batchbuffer.c \
intel_batchbuffer.h \
xf86dri.c \
......
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
......@@ -96,13 +96,6 @@ static const uint32_t dual_prime_igd_kernel_static[][4]= {
#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1))
#define VFE_GENERIC_MODE 0x0
#define VFE_VLD_MODE 0x1
#define VFE_IS_MODE 0x2
#define VFE_AVC_MC_MODE 0x4
#define VFE_AVC_IT_MODE 0x7
#define VFE_VC1_IT_MODE 0x7
#define MAX_SURFACE_NUM 10
#define DESCRIPTOR_NUM 12
......
......@@ -337,8 +337,10 @@ _X_EXPORT Status XvMCCreateContext(Display *display, XvPortID port,
case XVMC_I965_MPEG2_MC:
xvmc_driver = &i965_xvmc_mc_driver;
break;
case XVMC_I945_MPEG2_VLD:
case XVMC_I965_MPEG2_VLD:
xvmc_driver = &xvmc_vld_driver;
break;
case XVMC_I945_MPEG2_VLD:
default:
XVMC_ERR("unimplemented xvmc type %d", comm->type);
XFree(priv_data);
......@@ -1159,3 +1161,47 @@ _X_EXPORT Status XvMCGetAttribute(Display *display, XvMCContext *context,
{
return Success;
}
_X_EXPORT Status XvMCBeginSurface(Display *display, XvMCContext *context,
XvMCSurface *target,
XvMCSurface *past,
XvMCSurface *future,
const XvMCMpegControl *control)
{
if (xvmc_driver->begin_surface(display, context,
target, past, future, control)) {
XVMC_ERR("BeginSurface fail\n");
return BadValue;
}
return Success;
}
_X_EXPORT Status XvMCLoadQMatrix(Display *display, XvMCContext *context,
const XvMCQMatrix *qmx)
{
if (xvmc_driver->load_qmatrix(display, context, qmx)) {
XVMC_ERR("LoadQMatrix fail\n");
return BadValue;
}
return Success;
}
_X_EXPORT Status XvMCPutSlice(Display *display, XvMCContext *context,
char *slice, int nbytes)
{
if (xvmc_driver->put_slice(display, context, slice, nbytes)) {
XVMC_ERR("PutSlice fail\n");
return BadValue;
}
return Success;
}
_X_EXPORT Status XvMCPutSlice2(Display *display, XvMCContext *context,
char *slice, int nbytes, int slice_code)
{
if (xvmc_driver->put_slice2(display, context, slice, nbytes, slice_code)) {
XVMC_ERR("PutSlice2 fail\n");
return BadValue;
}
return Success;
}
......@@ -51,6 +51,7 @@
#include <X11/extensions/Xvlib.h>
#include <X11/extensions/XvMC.h>
#include <X11/extensions/XvMClib.h>
#include <X11/extensions/vldXvMC.h>
#include <drm_sarea.h>
#include "xf86dri.h"
......@@ -197,11 +198,23 @@ typedef struct _intel_xvmc_driver {
Status (*get_surface_status)(Display *display, XvMCSurface *surface, int *stat);
/* XXX more for vld */
Status (*begin_surface)(Display *display, XvMCContext *context,
XvMCSurface *target_surface,
XvMCSurface *past_surface,
XvMCSurface *future_surface,
const XvMCMpegControl *control);
Status (*load_qmatrix)(Display *display, XvMCContext *context,
const XvMCQMatrix *qmx);
Status (*put_slice)(Display *display, XvMCContext *context,
unsigned char *slice, int bytes);
Status (*put_slice2)(Display *display, XvMCContext *context,
unsigned char *slice, int bytes, int slice_code);
} intel_xvmc_driver_t, *intel_xvmc_driver_ptr;
extern struct _intel_xvmc_driver i915_xvmc_mc_driver;
extern struct _intel_xvmc_driver i965_xvmc_mc_driver;
extern struct _intel_xvmc_driver xvmc_vld_driver;
extern struct _intel_xvmc_driver *xvmc_driver;
#define SET_BLOCKED_SIGSET() do { \
......@@ -262,4 +275,12 @@ extern void intel_xvmc_dump_render(XvMCContext *context, unsigned int picture_st
unsigned int num_macroblocks, unsigned int first_macroblock,
XvMCMacroBlockArray *macroblock_array, XvMCBlockArray *blocks);
#define VFE_GENERIC_MODE 0x0
#define VFE_VLD_MODE 0x1
#define VFE_IS_MODE 0x2
#define VFE_AVC_MC_MODE 0x4
#define VFE_AVC_IT_MODE 0x7
#define VFE_VC1_IT_MODE 0x7
#endif
INTEL_G4A = ipicture.g4a \
lib.g4a \
frame_forward.g4a \
frame_backward.g4a \
frame_f_b.g4a \
field_forward.g4a \
field_backward.g4a \
field_f_b.g4a
INTEL_G4B = ipicture.g4b \
lib.g4b \
frame_forward.g4b \
frame_backward.g4b \
frame_f_b.g4b \
field_forward.g4b \
field_backward.g4b \
field_f_b.g4b
if HAVE_GEN4ASM
SUFFIXES = .g4a .g4b
.g4a.g4b:
m4 $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && rm $*.g4m
$(INTEL_G4B): $(INTEL_G4I)
BUILT_SOURCES= $(INTEL_G4B)
clean-local:
-rm -f $(INTEL_G4B)
endif
/*
* Copyright © 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Author:
* Zou Nan hai <nanhai.zou@intel.com>
* Yan Li <li.l.yan@intel.com>
* Liu Xi bin<xibin.liu@intel.com>
*/
/* GRF allocation:
g1~g30: constant buffer
g1~g2:intra IQ matrix
g3~g4:non intra IQ matrix
g5~g20:IDCT table
g31: thread payload
g32: message descriptor for reading reference data
g58~g81:reference data
g82: thread payload backup
g83~g106:IDCT data */
mov (2) g31.0<1>UD g82.12<2,2,1>UW {align1}; //restore x and y
and.nz (1) null g82.2<1,1,1>UW 0x20UW {align1}; //dct_type
(f0) jmpi field_dct;
add (16) g58.0<1>W g83.0<16,16,1>W g58.0<16,16,2>UB {align1};
add (16) g59.0<1>W g84.0<16,16,1>W g59.0<16,16,2>UB {align1};
add (16) g60.0<1>W g85.0<16,16,1>W g60.0<16,16,2>UB {align1};
add (16) g61.0<1>W g86.0<16,16,1>W g61.0<16,16,2>UB {align1};
add (16) g62.0<1>W g87.0<16,16,1>W g62.0<16,16,2>UB {align1};
add (16) g63.0<1>W g88.0<16,16,1>W g63.0<16,16,2>UB {align1};
add (16) g64.0<1>W g89.0<16,16,1>W g64.0<16,16,2>UB {align1};
add (16) g65.0<1>W g90.0<16,16,1>W g65.0<16,16,2>UB {align1};
add (16) g66.0<1>W g91.0<16,16,1>W g66.0<16,16,2>UB {align1};
add (16) g67.0<1>W g92.0<16,16,1>W g67.0<16,16,2>UB {align1};
add (16) g68.0<1>W g93.0<16,16,1>W g68.0<16,16,2>UB {align1};
add (16) g69.0<1>W g94.0<16,16,1>W g69.0<16,16,2>UB {align1};
add (16) g70.0<1>W g95.0<16,16,1>W g70.0<16,16,2>UB {align1};
add (16) g71.0<1>W g96.0<16,16,1>W g71.0<16,16,2>UB {align1};
add (16) g72.0<1>W g97.0<16,16,1>W g72.0<16,16,2>UB {align1};
add (16) g73.0<1>W g98.0<16,16,1>W g73.0<16,16,2>UB {align1};
jmpi write_back;
field_dct:
add (16) g58.0<1>W g83.0<16,16,1>W g58.0<16,16,2>UB {align1};
add (16) g59.0<1>W g91.0<16,16,1>W g59.0<16,16,2>UB {align1};
add (16) g60.0<1>W g84.0<16,16,1>W g60.0<16,16,2>UB {align1};
add (16) g61.0<1>W g92.0<16,16,1>W g61.0<16,16,2>UB {align1};
add (16) g62.0<1>W g85.0<16,16,1>W g62.0<16,16,2>UB {align1};
add (16) g63.0<1>W g93.0<16,16,1>W g63.0<16,16,2>UB {align1};
add (16) g64.0<1>W g86.0<16,16,1>W g64.0<16,16,2>UB {align1};
add (16) g65.0<1>W g94.0<16,16,1>W g65.0<16,16,2>UB {align1};
add (16) g66.0<1>W g87.0<16,16,1>W g66.0<16,16,2>UB {align1};
add (16) g67.0<1>W g95.0<16,16,1>W g67.0<16,16,2>UB {align1};
add (16) g68.0<1>W g88.0<16,16,1>W g68.0<16,16,2>UB {align1};
add (16) g69.0<1>W g96.0<16,16,1>W g69.0<16,16,2>UB {align1};
add (16) g70.0<1>W g89.0<16,16,1>W g70.0<16,16,2>UB {align1};
add (16) g71.0<1>W g97.0<16,16,1>W g71.0<16,16,2>UB {align1};
add (16) g72.0<1>W g90.0<16,16,1>W g72.0<16,16,2>UB {align1};
add (16) g73.0<1>W g98.0<16,16,1>W g73.0<16,16,2>UB {align1};
write_back:
mov (1) g31.8<1>UD 0x00F000FUD {align1};
mov.sat (16) g58.0<2>UB g58.0<16,16,1>W {align1};
mov.sat (16) g59.0<2>UB g59.0<16,16,1>W {align1};
mov.sat (16) g60.0<2>UB g60.0<16,16,1>W {align1};
mov.sat (16) g61.0<2>UB g61.0<16,16,1>W {align1};
mov.sat (16) g62.0<2>UB g62.0<16,16,1>W {align1};
mov.sat (16) g63.0<2>UB g63.0<16,16,1>W {align1};
mov.sat (16) g64.0<2>UB g64.0<16,16,1>W {align1};
mov.sat (16) g65.0<2>UB g65.0<16,16,1>W {align1};
mov.sat (16) g66.0<2>UB g66.0<16,16,1>W {align1};
mov.sat (16) g67.0<2>UB g67.0<16,16,1>W {align1};
mov.sat (16) g68.0<2>UB g68.0<16,16,1>W {align1};
mov.sat (16) g69.0<2>UB g69.0<16,16,1>W {align1};
mov.sat (16) g70.0<2>UB g70.0<16,16,1>W {align1};
mov.sat (16) g71.0<2>UB g71.0<16,16,1>W {align1};
mov.sat (16) g72.0<2>UB g72.0<16,16,1>W {align1};
mov.sat (16) g73.0<2>UB g73.0<16,16,1>W {align1};
mov (16) m1.0<1>UB g58.0<16,16,2>UB {align1};
mov (16) m1.16<1>UB g59.0<16,16,2>UB {align1};
mov (16) m2.0<1>UB g60.0<16,16,2>UB {align1};
mov (16) m2.16<1>UB g61.0<16,16,2>UB {align1};
mov (16) m3.0<1>UB g62.0<16,16,2>UB {align1};
mov (16) m3.16<1>UB g63.0<16,16,2>UB {align1};
mov (16) m4.0<1>UB g64.0<16,16,2>UB {align1};
mov (16) m4.16<1>UB g65.0<16,16,2>UB {align1};
mov (16) m5.0<1>UB g66.0<16,16,2>UB {align1};
mov (16) m5.16<1>UB g67.0<16,16,2>UB {align1};
mov (16) m6.0<1>UB g68.0<16,16,2>UB {align1};
mov (16) m6.16<1>UB g69.0<16,16,2>UB {align1};
mov (16) m7.0<1>UB g70.0<16,16,2>UB {align1};
mov (16) m7.16<1>UB g71.0<16,16,2>UB {align1};
mov (16) m8.0<1>UB g72.0<16,16,2>UB {align1};
mov (16) m8.16<1>UB g73.0<16,16,2>UB {align1};
send (16) 0 acc0<1>UW g31<8,8,1>UW write(0,0,2,0) mlen 9 rlen 0 {align1};
//U
mov (1) g31.8<1>UD 0x0070007UD { align1 };
shr (2) g31.0<1>UD g31.0<2,2,1>UD 1D {align1};
add (16) g74.0<1>W g99.0<16,16,1>W g74.0<16,16,1>UW {align1};
add (16) g75.0<1>W g100.0<16,16,1>W g75.0<16,16,1>UW {align1};
add (16) g76.0<1>W g101.0<16,16,1>W g76.0<16,16,1>UW {align1};
add (16) g77.0<1>W g102.0<16,16,1>W g77.0<16,16,1>UW {align1};
mov.sat (16) g74.0<2>UB g74.0<16,16,1>W {align1};
mov.sat (16) g75.0<2>UB g75.0<16,16,1>W {align1};
mov.sat (16) g76.0<2>UB g76.0<16,16,1>W {align1};
mov.sat (16) g77.0<2>UB g77.0<16,16,1>W {align1};
mov (16) m1.0<1>UB g74.0<16,16,2>UB {align1};
mov (16) m1.16<1>UB g75.0<16,16,2>UB {align1};
mov (16) m2.0<1>UB g76.0<16,16,2>UB {align1};
mov (16) m2.16<1>UB g77.0<16,16,2>UB {align1};
send (16) 0 acc0<1>UW g31<8,8,1>UW write(1, 0, 2, 0) mlen 3 rlen 0 { align1 };
//V
add (16) g78.0<1>UW g103.0<16,16,1>W g78.0<16,16,1>UW {align1};
add (16) g79.0<1>UW g104.0<16,16,1>W g79.0<16,16,1>UW {align1};
add (16) g80.0<1>UW g105.0<16,16,1>W g80.0<16,16,1>UW {align1};
add (16) g81.0<1>UW g106.0<16,16,1>W g81.0<16,16,1>UW {align1};
mov.sat (16) g78.0<2>UB g78.0<16,16,1>W {align1};
mov.sat (16) g79.0<2>UB g79.0<16,16,1>W {align1};
mov.sat (16) g80.0<2>UB g80.0<16,16,1>W {align1};
mov.sat (16) g81.0<2>UB g81.0<16,16,1>W {align1};
mov (16) m1.0<1>UB g78.0<16,16,2>UB {align1};
mov (16) m1.16<1>UB g79.0<16,16,2>UB {align1};
mov (16) m2.0<1>UB g80.0<16,16,2>UB {align1};
mov (16) m2.16<1>UB g81.0<16,16,2>UB {align1};
send (16) 0 acc0<1>UW g31<8,8,1>UW write(2, 0, 2, 0) mlen 3 rlen 0 { align1 };
send (16) 0 acc0<1>UW g0<8,8,1>UW
thread_spawner(0, 0, 0) mlen 1 rlen 0 { align1 EOT};
/*
* Copyright © 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Author:
* Zou Nan hai <nanhai.zou@intel.com>
* Yan Li <li.l.yan@intel.com>
* Liu Xi bin<xibin.liu@intel.com>
*/
/* GRF allocation:
g1~g30: constant buffer
g1~g2:intra IQ matrix
g3~g4:non intra IQ matrix
g5~g20:IDCT table
g112~g115: intra IQ matrix in UW format (in order to use instruction compress), copys from g1~g2
g[a0.0]:DCT data of a block
g125: ip before jump
if(v==0 && u==0 && intra_mb)
F''[v][u] = QF[v][u] * intra_dc_mult
else
F''[v][u] = (QF[v][u]*W[w][v][u]*quantiser_scale*2)/32
*/
DO_IQ_INTRA:
add (1) a0.0<1>UD a0.0<1,1,1>UD 0x00200020UD {align1};
mov (1) g111.0<1>W g[a0.0]<1,1,1>W {align1};