Commit 5799a933 authored by Martin Eikermann's avatar Martin Eikermann Committed by Sebastian Dröge

[MOVED FROM BAD 02/56] gst/deinterlace2/: Add a deinterlacer plugin based on...

[MOVED FROM BAD 02/56] gst/deinterlace2/: Add a deinterlacer plugin based on the tvtime/DScaler deinterlacer, which was relicensed to LGPL f...

Original commit message from CVS:
Based on a patch by: Martin Eikermann <meiker at upb dot de>
* gst/deinterlace2/Makefile.am:
* gst/deinterlace2/gstdeinterlace2.c:
(gst_deinterlace2_method_get_type),
(gst_deinterlace2_fields_get_type),
(gst_deinterlace2_field_layout_get_type),
(gst_deinterlace2_base_init), (gst_deinterlace2_class_init),
(gst_deinterlace2_init), (gst_deinterlace2_set_method),
(gst_deinterlace2_set_property), (gst_deinterlace2_get_property),
(gst_deinterlace2_finalize), (gst_deinterlace2_pop_history),
(gst_deinterlace2_head_history), (gst_deinterlace2_push_history),
(gst_deinterlace2_deinterlace_scanlines), (gst_deinterlace2_chain),
(gst_deinterlace2_setcaps), (gst_deinterlace2_sink_event),
(gst_deinterlace2_change_state), (gst_deinterlace2_src_event),
(gst_deinterlace2_src_query), (gst_deinterlace2_src_query_types),
(plugin_init):
* gst/deinterlace2/gstdeinterlace2.h:
* gst/deinterlace2/tvtime/greedy.c: (copy_scanline),
(deinterlace_greedy_packed422_scanline_mmxext),
(dscaler_greedyl_get_method):
* gst/deinterlace2/tvtime/greedyh.asm:
* gst/deinterlace2/tvtime/greedyh.c:
(deinterlace_frame_di_greedyh), (dscaler_greedyh_get_method),
(greedyh_init), (greedyh_filter_mmx), (greedyh_filter_3dnow),
(greedyh_filter_sse):
* gst/deinterlace2/tvtime/greedyh.h:
* gst/deinterlace2/tvtime/greedyhmacros.h:
* gst/deinterlace2/tvtime/mmx.h:
* gst/deinterlace2/tvtime/plugins.h:
* gst/deinterlace2/tvtime/speedtools.h:
* gst/deinterlace2/tvtime/speedy.c: (multiply_alpha), (clip255),
(comb_factor_packed422_scanline_mmx),
(diff_factor_packed422_scanline_c),
(diff_factor_packed422_scanline_mmx),
(diff_packed422_block8x8_mmx), (diff_packed422_block8x8_c),
(packed444_to_packed422_scanline_c),
(packed422_to_packed444_scanline_c),
(packed422_to_packed444_rec601_scanline_c),
(vfilter_chroma_121_packed422_scanline_mmx),
(vfilter_chroma_121_packed422_scanline_c),
(vfilter_chroma_332_packed422_scanline_mmx),
(vfilter_chroma_332_packed422_scanline_c),
(kill_chroma_packed422_inplace_scanline_mmx),
(kill_chroma_packed422_inplace_scanline_c),
(invert_colour_packed422_inplace_scanline_mmx),
(invert_colour_packed422_inplace_scanline_c),
(mirror_packed422_inplace_scanline_c),
(interpolate_packed422_scanline_c),
(convert_uyvy_to_yuyv_scanline_mmx),
(convert_uyvy_to_yuyv_scanline_c),
(interpolate_packed422_scanline_mmx),
(interpolate_packed422_scanline_mmxext),
(blit_colour_packed422_scanline_c),
(blit_colour_packed422_scanline_mmx),
(blit_colour_packed422_scanline_mmxext),
(blit_colour_packed4444_scanline_c),
(blit_colour_packed4444_scanline_mmx),
(blit_colour_packed4444_scanline_mmxext), (small_memcpy),
(speedy_memcpy_c), (speedy_memcpy_mmx), (speedy_memcpy_mmxext),
(blit_packed422_scanline_c), (blit_packed422_scanline_mmx),
(blit_packed422_scanline_mmxext),
(composite_colour4444_alpha_to_packed422_scanline_c),
(composite_colour4444_alpha_to_packed422_scanline_mmxext),
(composite_packed4444_alpha_to_packed422_scanline_c),
(composite_packed4444_alpha_to_packed422_scanline_mmxext),
(composite_packed4444_to_packed422_scanline_c),
(composite_packed4444_to_packed422_scanline_mmxext),
(composite_alphamask_to_packed4444_scanline_c),
(composite_alphamask_to_packed4444_scanline_mmxext),
(composite_alphamask_alpha_to_packed4444_scanline_c),
(premultiply_packed4444_scanline_c),
(premultiply_packed4444_scanline_mmxext),
(blend_packed422_scanline_c), (blend_packed422_scanline_mmxext),
(quarter_blit_vertical_packed422_scanline_mmxext),
(quarter_blit_vertical_packed422_scanline_c),
(subpix_blit_vertical_packed422_scanline_c),
(a8_subpix_blit_scanline_c), (myround), (init_RGB_to_YCbCr_tables),
(init_YCbCr_to_RGB_tables), (rgb24_to_packed444_rec601_scanline_c),
(rgba32_to_packed4444_rec601_scanline_c),
(packed444_to_rgb24_rec601_scanline_c),
(packed444_to_nonpremultiplied_packed4444_scanline_c),
(aspect_adjust_packed4444_scanline_c), (setup_speedy_calls),
(speedy_get_accel):
* gst/deinterlace2/tvtime/speedy.h:
* gst/deinterlace2/tvtime/sse.h:
* gst/deinterlace2/tvtime/tomsmocomp.c: (Fieldcopy),
(deinterlace_frame_di_tomsmocomp), (dscaler_tomsmocomp_get_method),
(tomsmocomp_init), (tomsmocomp_filter_mmx),
(tomsmocomp_filter_3dnow), (tomsmocomp_filter_sse):
* gst/deinterlace2/tvtime/tomsmocomp.h:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoop0A.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA8.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA6.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVA.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVAH.inc:
* gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc:
* gst/deinterlace2/tvtime/vfir.c: (deinterlace_line),
(deinterlace_scanline_vfir), (copy_scanline),
(dscaler_vfir_get_method):
* gst/deinterlace2/tvtime/x86-64_macros.inc:
Add a deinterlacer plugin based on the tvtime/DScaler deinterlacer,
which was relicensed to LGPL for GStreamer and in theory provides
better and faster results than the simple deinterlace element.
Fixes bug #163578.
Ported to GStreamer 0.10 but still not enabled or included in the
build system by default because of bad artefacts caused by a bug
somewhere and as it can be only build on x86/amd64 ATM and requires
special CFLAGS. Will be fixed soon.
parent 0518c150
// -*- c++ -*-
// Searches just the center pixel, in both the old
// and new fields, but takes averages. This is an even
// pixel address. Any chroma match will be used. (YUY2)
// We best like finding 0 motion so we will bias everything we found previously
// up by a little, and adjust later
#ifdef IS_SSE2
"paddusb "_ONES", %%xmm7\n\t" // bias toward no motion
#else
"paddusb "_ONES", %%mm7\n\t" // bias toward no motion
#endif
MERGE4PIXavg("(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")") // center, in old and new
// -*- c++ -*-
#ifdef IS_SSE2
//sse2 code deleted for now
#else
// Version for non-SSE2
#ifdef SKIP_SEARCH
"movq %%mm6, %%mm0\n\t" // just use the results of our wierd bob
#else
// JA 9/Dec/2002
// failed experiment
// but leave in placeholder for me to play about
#ifdef DONT_USE_STRANGE_BOB
// Use the best weave if diffs less than 10 as that
// means the image is still or moving cleanly
// if there is motion we will clip which will catch anything
"psubusb "_FOURS", %%mm7\n\t" // sets bits to zero if weave diff < 4
"pxor %%mm0, %%mm0\n\t"
"pcmpeqb %%mm0, %%mm7\n\t" // all ff where weave better, else 00
"pcmpeqb %%mm7, %%mm0\n\t" // all ff where bob better, else 00
"pand %%mm6, %%mm0\n\t" // use bob for these pixel values
"pand %%mm5, %%mm7\n\t" // use weave for these
"por %%mm7, %%mm0\n\t" // combine both
#else
// Use the better of bob or weave
// pminub mm4, TENS // the most we care about
V_PMINUB ("%%mm4", _TENS, "%%mm0") // the most we care about
"psubusb %%mm4, %%mm7\n\t" // foregive that much from weave est?
"psubusb "_FOURS", %%mm7\n\t" // bias it a bit toward weave
"pxor %%mm0, %%mm0\n\t"
"pcmpeqb %%mm0, %%mm7\n\t" // all ff where weave better, else 00
"pcmpeqb %%mm7, %%mm0\n\t" // all ff where bob better, else 00
"pand %%mm6, %%mm0\n\t" // use bob for these pixel values
"pand %%mm5, %%mm7\n\t" // use weave for these
"por %%mm7, %%mm0\n\t" // combine both
#endif
// pminub mm0, Max_Vals // but clip to catch the stray error
V_PMINUB ("%%mm0", _Max_Vals, "%%mm1") // but clip to catch the stray error
// pmaxub mm0, Min_Vals
V_PMAXUB ("%%mm0", _Min_Vals)
#endif
MOVX" "_pDest", %%"XAX"\n\t"
#ifdef USE_VERTICAL_FILTER
"movq %%mm0, %%mm1\n\t"
// pavgb mm0, qword ptr["XBX"]
V_PAVGB ("%%mm0", "(%%"XBX")", "%%mm2", _ShiftMask)
// movntq qword ptr["XAX"+"XDX"], mm0
V_MOVNTQ ("(%"XAX", %%"XDX")", "%%mm0")
// pavgb mm1, qword ptr["XBX"+"XCX"]
V_PAVGB ("%%mm1", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask)
"addq "_dst_pitchw", %%"XBX
// movntq qword ptr["XAX"+"XDX"], mm1
V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm1")
#else
// movntq qword ptr["XAX"+"XDX"], mm0
V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm0")
#endif
LEAX" 8(%%"XDX"), %%"XDX"\n\t" // bump offset pointer
CMPX" "_Last8", %%"XDX"\n\t" // done with line?
"jb 1b\n\t" // y
#endif
MOVX" "_oldbx", %%"XBX"\n\t"
: /* no outputs */
: "m"(pBob),
"m"(src_pitch2),
"m"(ShiftMask),
"m"(pDest),
"m"(dst_pitchw),
"m"(Last8),
"m"(pSrc),
"m"(pSrcP),
"m"(pBobP),
"m"(DiffThres),
"m"(Min_Vals),
"m"(Max_Vals),
"m"(FOURS),
"m"(TENS),
"m"(ONES),
"m"(UVMask),
"m"(Max_Mov),
"m"(YMask),
"m"(oldbx)
: XAX, XCX, XDX, XSI, XDI,
#ifdef ARCH_386
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
#endif
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
"memory", "cc"
);
// adjust for next line
pSrc += src_pitch2;
pSrcP += src_pitch2;
pDest += dst_pitch2;
pBob += src_pitch2;
pBobP += src_pitch2;
}
return 0;
// -*- c++ -*-
// Searches 2 pixel to the left and right, in both the old
// and new fields, but takes averages. These are even
// pixel addresses. Chroma match will be used. (YUY2)
MERGE4PIXavg("-4(%%"XDI")", "4(%%"XSI", %%"XCX", 2)") // up left, down right
MERGE4PIXavg("4(%%"XDI")", "-4(%%"XSI", %%"XCX", 2)") // up right, down left
MERGE4PIXavg("-4(%%"XDI", %%"XCX")", "4(%%"XSI", %%"XCX")") // left, right
MERGE4PIXavg("4(%%"XDI", %%"XCX")", "-4(%%"XSI", %%"XCX")") // right, left
MERGE4PIXavg("-4(%%"XDI", %%"XCX", 2)", "4(%%"XSI")") // down left, up right
MERGE4PIXavg("4(%%"XDI", %%"XCX", 2)", "-4(%%"XSI")") // down right, up left
// -*- c++ -*-
// Searches 4 pixel to the left and right, in both the old
// and new fields, but takes averages. These are even
// pixel addresses. Chroma match will be used. (YUY2)
MERGE4PIXavg("-8(%%"XDI")", "8(%%"XSI", %%"XCX", 2)") // up left, down right
MERGE4PIXavg("8(%%"XDI")", "-8(%%"XSI", %%"XCX", 2)") // up right, down left
MERGE4PIXavg("-8(%%"XDI", %%"XCX")", "8(%%"XSI", %%"XCX")") // left, right
MERGE4PIXavg("8(%%"XDI", %%"XCX")", "-8(%%"XSI", %%"XCX")") // right, left
MERGE4PIXavg("-8(%%"XDI", %%"XCX", 2)", "8(%%"XSI")") // down left, up right
MERGE4PIXavg("8(%%"XDI", %%"XCX", 2)", "-8(%%"XSI")") // down right, up left
// -*- c++ -*-
// Searches 1 pixel to the left and right, in both the old
// and new fields, but takes averages. These are odd
// pixel addresses. Any chroma match will not be used. (YUY2)
MERGE4PIXavg("-2(%%"XDI")", "2(%%"XSI", %%"XCX", 2)") // up left, down right
MERGE4PIXavg("2(%%"XDI")", "-2(%%"XSI", %%"XCX", 2)") // up right, down left
MERGE4PIXavg("-2(%%"XDI", %%"XCX", 2)", "2(%%"XSI")") // down left, up right
MERGE4PIXavg("2(%%"XDI", %%"XCX", 2)", "-2(%%"XSI")") // down right, up left
#include "SearchLoopOddA2.inc"
// Searches 1 pixel to the left and right, in both the old
// and new fields, but takes averages. These are odd
// pixel addresses. Any chroma match will not be used. (YUY2)
MERGE4PIXavg("-2(%%"XDI", %%"XCX")", "2(%%"XSI", %%"XCX")") // left, right
MERGE4PIXavg("2(%%"XDI", %%"XCX")", "-2(%%"XSI", %%"XCX")") // right, left
// -*- c++ -*-
// Searches 3 pixels to the left and right, in both the old
// and new fields, but takes averages. These are odd
// pixel addresses. Any chroma match will not be used. (YUY2)
MERGE4PIXavg("-6(%%"XDI")", "6(%%"XSI", %%"XCX", 2)") // up left, down right
MERGE4PIXavg("6(%%"XDI")", "-6(%%"XSI", %%"XCX", 2)") // up right, down left
MERGE4PIXavg("-6(%%"XDI", %%"XCX")", "6(%%"XSI", %%"XCX")") // left, right
MERGE4PIXavg("6(%%"XDI", %%"XCX")", "-6(%%"XSI", %%"XCX")") // right, left
MERGE4PIXavg("-6(%%"XDI", %%"XCX", 2)", "6(%%"XSI")") // down left, up right
MERGE4PIXavg("6(%%"XDI", %%"XCX", 2)", "-6(%%"XSI")") // down right, up left
// Searches 1 pixel to the left and right, in both the old
// and new fields, but takes v-half pel averages. These are odd
// pixel addresses. Any chroma match will not be used. (YUY2)
__asm
{
MERGE4PIXavgH("XDI"-2, "XDI"+"XCX"-2, "XSI"+"XCX"+2, "XSI"+2*"XCX"+2) // up left, down right
MERGE4PIXavgH("XDI"+2, "XDI"+"XCX"+2, "XSI"+"XCX"-2, "XSI"+2*"XCX"-2) // up right, down left
MERGE4PIXavgH("XDI"+2*"XCX"-2, "XDI"+"XCX"-2, "XSI"+"XCX"+2, "XSI"+2) // down left, up right
MERGE4PIXavgH("XDI"+2*"XCX"+2, "XDI"+"XCX"+2, "XSI"+"XCX"-2, "XSI"-2) // down right, up left
}
// Searches 1 pixel to the left and right, in both the old
// and new fields, but takes vertical averages. These are odd
// pixel addresses. Any chroma match will not be used. (YUY2)
MERGE4PIXavgH("-2(%%"XDI", %%"XCX")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "2(%%"XSI", %%"XCX")") // left, right
MERGE4PIXavgH("2(%%"XDI", %%"XCX")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "-2(%%"XSI", %%"XCX")") // right, left
// -*- c++ -*-
unsigned char* pDest;
const unsigned char* pSrcP;
const unsigned char* pSrc;
const unsigned char* pBob;
const unsigned char* pBobP;
int64_t Max_Mov = 0x0404040404040404ull;
int64_t DiffThres = 0x0f0f0f0f0f0f0f0full;
int64_t YMask = 0x00ff00ff00ff00ffull; // keeps only luma
int64_t UVMask = 0xff00ff00ff00ff00ull; // keeps only chroma
int64_t TENS = 0x0a0a0a0a0a0a0a0aull;
int64_t FOURS = 0x0404040404040404ull;
int64_t ONES = 0x0101010101010101ull;
int64_t Min_Vals = 0x0000000000000000ull;
int64_t Max_Vals = 0x0000000000000000ull;
int64_t ShiftMask = 0xfefffefffefffeffull;
// long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way
// saves a lot of xor's to delete 64bit garbage.
#if defined(DBL_RESIZE) || defined(USE_FOR_DSCALER)
long src_pitch2 = src_pitch; // even & odd lines are not interleaved in DScaler
#else
long src_pitch2 = 2 * src_pitch; // even & odd lines are interleaved in Avisynth
#endif
long dst_pitch2 = 2 * dst_pitch;
long y;
#ifdef IS_SSE2
long Last8 = (rowsize-16); // ofs to last 16 bytes in row for SSE2
#else
long Last8 = (rowsize-8); // ofs to last 8 bytes in row
#endif
long dst_pitchw = dst_pitch; // local stor so asm can ref
pSrc = pWeaveSrc; // points 1 weave line above
pSrcP = pWeaveSrcP; // "
#ifdef DBL_RESIZE
#ifdef USE_VERTICAL_FILTER
pDest = pWeaveDest + dst_pitch2;
#else
pDest = pWeaveDest + 3*dst_pitch;
#endif
#else
#ifdef USE_VERTICAL_FILTER
pDest = pWeaveDest + dst_pitch;
#else
pDest = pWeaveDest + dst_pitch2;
#endif
#endif
if (TopFirst)
{
pBob = pCopySrc + src_pitch2; // remember one weave line just copied previously
pBobP = pCopySrcP + src_pitch2;
}
else
{
pBob = pCopySrc;
pBobP = pCopySrcP;
}
#ifndef _pBob
#define _pBob "%0"
#define _src_pitch2 "%1"
#define _ShiftMask "%2"
#define _pDest "%3"
#define _dst_pitchw "%4"
#define _Last8 "%5"
#define _pSrc "%6"
#define _pSrcP "%7"
#define _pBobP "%8"
#define _DiffThres "%9"
#define _Min_Vals "%10"
#define _Max_Vals "%11"
#define _FOURS "%12"
#define _TENS "%13"
#define _ONES "%14"
#define _UVMask "%15"
#define _Max_Mov "%16"
#define _YMask "%17"
#define _oldbx "%18"
#endif
long oldbx;
for (y=1; y < FldHeight-1; y++)
{
// pretend it's indented -->>
__asm__ __volatile__
(
// Loop general reg usage
//
// XAX - pBobP, then pDest
// XBX - pBob
// XCX - src_pitch2
// XDX - current offset
// XDI - prev weave pixels, 1 line up
// XSI - next weave pixels, 1 line up
// Save "XBX" (-fPIC)
MOVX" %%"XBX", "_oldbx"\n\t"
#ifdef IS_SSE2
// sse2 code deleted for now
#else
// simple bob first 8 bytes
MOVX" "_pBob", %%"XBX"\n\t"
MOVX" "_src_pitch2", %%"XCX"\n\t"
#ifdef USE_VERTICAL_FILTER
"movq (%%"XBX"), %%mm0\n\t"
"movq (%%"XBX", %%"XCX"), %%mm1\n\t" //, qword ptr["XBX"+"XCX"]
"movq %%mm0, %%mm2\n\t"
V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // halfway between
V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask) // 1/4 way
V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask) // 3/4 way
MOVX" "_pDest", %%"XDI"\n\t"
MOVX" "_dst_pitchw", %%"XAX"\n\t"
V_MOVNTQ ("(%%"XDI")", "%%mm0")
V_MOVNTQ ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1
// simple bob last 8 bytes
MOVX" "_Last8", %%"XDX"\n\t"
LEAX" (%%"XBX", %%"XDX"), %%"XSI"\n\t" // ["XBX"+"XDX"]
"movq (%%"XSI"), %%mm0\n\t"
"movq (%%"XSI", %%"XCX"), %%mm1\n\t" // qword ptr["XSI"+"XCX"]
"movq %%mm0, %%mm2\n\t"
V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // halfway between
V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask) // 1/4 way
V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask) // 3/4 way
ADDX" %%"XDX", %%"XDI"\n\t" // last 8 bytes of dest
V_MOVNTQ ("%%"XDI"", "%%mm0")
V_MOVNTQ ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1)
#else
"movq (%%"XBX"), %%mm0\n\t"
// pavgb mm0, qword ptr["XBX"+"XCX"]
V_PAVGB ("%%mm0", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XBX"+"XCX"], mm2, ShiftMask)
MOVX" "_pDest", %%"XDI"\n\t"
V_MOVNTQ ("(%%"XDI")", "%%mm0")
// simple bob last 8 bytes
MOVX" "_Last8", %%"XDX"\n\t"
LEAX" (%%"XBX", %%"XDX"), %%"XSI"\n\t" //"XSI", ["XBX"+"XDX"]
"movq (%%"XSI"), %%mm0\n\t"
// pavgb mm0, qword ptr["XSI"+"XCX"]
V_PAVGB ("%%mm0", "(%%"XSI", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XSI"+"XCX"], mm2, ShiftMask)
V_MOVNTQ ("(%%"XDI", %%"XDX")", "%%mm0") // qword ptr["XDI"+"XDX"], mm0)
#endif
// now loop and get the middle qwords
MOVX" "_pSrc", %%"XSI"\n\t"
MOVX" "_pSrcP", %%"XDI"\n\t"
MOVX" $8, %%"XDX"\n\t" // curr offset longo all lines
"1:\n\t"
MOVX" "_pBobP", %%"XAX"\n\t"
ADDX" $8, %%"XDI"\n\t"
ADDX" $8, %%"XSI"\n\t"
ADDX" $8, %%"XBX"\n\t"
ADDX" %%"XDX", %%"XAX"\n\t"
#ifdef USE_STRANGE_BOB
#include "StrangeBob.inc"
#else
#include "WierdBob.inc"
#endif
// For non-SSE2:
// through out most of the rest of this loop we will maintain
// mm4 our min bob value
// mm5 best weave pixels so far
// mm6 our max Bob value
// mm7 best weighted pixel ratings so far
// We will keep a slight bias to using the weave pixels
// from the current location, by rating them by the min distance
// from the Bob value instead of the avg distance from that value.
// our best and only rating so far
"pcmpeqb %%mm7, %%mm7\n\t" // ffff, say we didn't find anything good yet
#endif
// -*- c++ -*-
// Searches the center vertical line above center and below, in both the old
// and new fields, but takes averages. These are even pixel addresses.
MERGE4PIXavg("(%%"XDI", %%"XCX", 2)", "(%%"XSI")") // down, up
MERGE4PIXavg("(%%"XDI")", "(%%"XSI", %%"XCX", 2)") // up, down
// -*- c++ -*-
// Searches the center vertical line above center and below, in both the old
// and new fields, but takes averages. These are even pixel addresses.
MERGE4PIXavgH("(%%"XDI", %%"XCX", 2)", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "(%%"XSI")") // down, up
MERGE4PIXavgH("(%%"XDI")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "(%%"XSI", %%"XCX", 2)") // up, down
// -*- c++ -*-
// First, get and save our possible Bob values
// Assume our pixels are layed out as follows with x the calc'd bob value
// and the other pixels are from the current field
//
// j a b c k current field
// x calculated line
// m d e f n current field
//
// we calc the bob value luma value as:
// if |j - n| < Thres && |a - m| > Thres
// avg(j,n)
// end if
// if |k - m| < Thres && |c - n| > Thres
// avg(k,m)
// end if
// if |c - d| < Thres && |b - f| > Thres
// avg(c,d)
// end if
// if |a - f| < Thres && |b - d| > Thres
// avg(a,f)
// end if
// if |b - e| < Thres
// avg(b,e)
// end if
// pickup any thing not yet set with avg(b,e)
// j, n
"pxor %%mm5, %%mm5\n\t"
"pxor %%mm6, %%mm6\n\t"
"pxor %%mm7, %%mm7\n\t"
"movq -2(%%"XBX"), %%mm0\n\t" // value a from top left
"movq -4(%%"XBX", %%"XCX"), %%mm1\n\t" // value m from bottom right
"movq %%mm0, %%mm3\n\t"
"psubusb %%mm1, %%mm3\n\t"
"psubusb %%mm0, %%mm1\n\t"
"por %%mm1, %%mm3\n\t" // abs(a,m)
"psubusb "_DiffThres", %%mm3\n\t" // nonzero where abs(a,m) > Thres else 0
"pxor %%mm4, %%mm4\n\t"
"pcmpeqb %%mm4, %%mm3\n\t" // now ff where abs(a,m) < Thres, else 00
"pcmpeqb %%mm3, %%mm4\n\t" // here ff where abs(a,m) > Thres, else 00
"movq -4(%%"XBX"), %%mm0\n\t" // value j
"movq 4(%%"XBX", %%"XCX"), %%mm1\n\t" // value n
"movq %%mm0, %%mm2\n\t"
"pavgb %%mm1, %%mm2\n\t" // avg(j,n)
"movq %%mm0, %%mm3\n\t"
"psubusb %%mm1, %%mm0\n\t"
"psubusb %%mm3, %%mm1\n\t"
"por %%mm1, %%mm0\n\t" // abs(j,n)
"movq %%mm0, %%mm1\n\t"
"psubusb "_DiffThres", %%mm1\n\t" // nonzero where abs(j,n) > Thres else 0
"pxor %%mm3, %%mm3\n\t"
"pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(j,n) < Thres, else 00
"pand %%mm4, %%mm1\n\t"
"pand %%mm1, %%mm2\n\t"
"pand %%mm1, %%mm0\n\t"
"movq %%mm1, %%mm3\n\t"
"pxor %%mm5, %%mm3\n\t"
"pand %%mm3, %%mm6\n\t"
"pand %%mm3, %%mm7\n\t"
"pand %%mm3, %%mm5\n\t"
"por %%mm1, %%mm5\n\t"
"por %%mm2, %%mm6\n\t"
"por %%mm0, %%mm7\n\t"
// k & m
"movq 2(%%"XBX"), %%mm0\n\t" // value c from top left
"movq 4(%%"XBX", %%"XCX"), %%mm1\n\t" // value n from bottom right
"movq %%mm0, %%mm3\n\t"
"psubusb %%mm1, %%mm3\n\t"
"psubusb %%mm0, %%mm1\n\t"
"por %%mm1, %%mm3\n\t" // abs(c,n)
"psubusb "_DiffThres", %%mm3\n\t" // nonzero where abs(c,n) > Thres else 0
"pxor %%mm4, %%mm4\n\t"
"pcmpeqb %%mm4, %%mm3\n\t" // now ff where abs(c,n) < Thres, else 00
"pcmpeqb %%mm3, %%mm4\n\t" // here ff where abs(c,n) > Thres, else 00
"movq 4(%%"XBX"), %%mm0\n\t" // value k
"movq -4(%%"XBX", %%"XCX"), %%mm1\n\t" // value m
"movq %%mm0, %%mm2\n\t"
V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(k,m)
"movq %%mm0, %%mm3\n\t"
"psubusb %%mm1, %%mm0\n\t"
"psubusb %%mm3, %%mm1\n\t"
"por %%mm1, %%mm0\n\t" // abs(k,m)
"movq %%mm0, %%mm1\n\t"
"psubusb "_DiffThres", %%mm1\n\t" // nonzero where abs(k,m) > Thres else 0
"pxor %%mm3, %%mm3\n\t"
"pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(k,m) < Thres, else 00
"pand %%mm4, %%mm1\n\t"
"pand %%mm1, %%mm2\n\t"
"pand %%mm1, %%mm0\n\t"
"movq %%mm1, %%mm3\n\t"
"pxor %%mm5, %%mm3\n\t"
"pand %%mm3, %%mm6\n\t"
"pand %%mm3, %%mm7\n\t"
"pand %%mm3, %%mm5\n\t"
"por %%mm1, %%mm5\n\t"
"por %%mm2, %%mm6\n\t"
"por %%mm0, %%mm7\n\t"
// c & d
"movq (%%"XBX"), %%mm0\n\t" // value b from top left
"movq 2(%%"XBX", %%"XCX"), %%mm1\n\t" // value f from bottom right
"movq %%mm0, %%mm3\n\t"
"psubusb %%mm1, %%mm3\n\t"
"psubusb %%mm0, %%mm1\n\t"
"por %%mm1, %%mm3\n\t" // abs(b,f)
"psubusb "_DiffThres", %%mm3\n\t" // nonzero where abs(b,f) > Thres else 0
"pxor %%mm4, %%mm4\n\t"
"pcmpeqb %%mm4, %%mm3\n\t" // now ff where abs(b,f) < Thres, else 00
"pcmpeqb %%mm3, %%mm4\n\t" // here ff where abs(b,f) > Thres, else 00
"movq 2(%%"XBX"), %%mm0\n\t" // value c
"movq -2(%%"XBX", %%"XCX"), %%mm1\n\t" // value d
"movq %%mm0, %%mm2\n\t"
V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(c,d)
"movq %%mm0, %%mm3\n\t"
"psubusb %%mm1, %%mm0\n\t"
"psubusb %%mm3, %%mm1\n\t"
"por %%mm1, %%mm0\n\t" // abs(c,d)
"movq %%mm0, %%mm1\n\t"
"psubusb "_DiffThres", %%mm1\n\t" // nonzero where abs(c,d) > Thres else 0
"pxor %%mm3, %%mm3\n\t"
"pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(c,d) < Thres, else 00
"pand %%mm4, %%mm1\n\t"
"pand %%mm1, %%mm2\n\t"
"pand %%mm1, %%mm0\n\t"
"movq %%mm1, %%mm3\n\t"
"pxor %%mm5, %%mm3\n\t"
"pand %%mm3, %%mm6\n\t"
"pand %%mm3, %%mm7\n\t"
"pand %%mm3, %%mm5\n\t"
"por %%mm1, %%mm5\n\t"
"por %%mm2, %%mm6\n\t"
"por %%mm0, %%mm7\n\t"
// a & f
"movq (%%"XBX"), %%mm0\n\t" // value b from top left
"movq -2(%%"XBX", %%"XCX"), %%mm1\n\t" // value d from bottom right
"movq %%mm0, %%mm3\n\t"