Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
GStreamer
gst-plugins-good
Commits
536ff477
Commit
536ff477
authored
Jun 16, 2020
by
Vivia Nikolaidou
🦆
Browse files
deinterlace: Add yadif ASM optimisations
Measured to be about 3.4x faster than C Part-of: <
gstreamer/gst-plugins-good!621
>
parent
ef78014d
Pipeline
#161470
waiting for manual action with stages
in 28 seconds
Changes
7
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
gst/deinterlace/meson.build
View file @
536ff477
...
...
@@ -33,8 +33,60 @@ else
copy : true)
endif
asm_gen_objs = []
if have_nasm
if host_system == 'windows'
outputname = '@PLAINNAME@.obj'
else
outputname = '@PLAINNAME@.o'
endif
if get_option('b_staticpic')
asm_pic_def = '-DPIC'
else
asm_pic_def = '-UPIC'
endif
# Assembly has to be told when the symbols have to be prefixed with _
if cc.symbols_have_underscore_prefix()
asm_prefix_def = '-DPREFIX'
else
asm_prefix_def = '-UPREFIX'
endif
asm_arch_def = '-DARCH_X86_64=1'
if host_system == 'windows'
asm_outformat = 'win64'
elif ['darwin', 'ios'].contains(host_system)
asm_outformat = 'macho64'
elif host_system.endswith('bsd')
asm_outformat = 'aoutb'
else
asm_outformat = 'elf64'
endif
asm_x = files('x86/yadif.asm',
'x86/x86inc.asm')
asm_stackalign_def = '-DSTACK_ALIGNMENT=64'
asm_incdir = 'x86'
message('Nasm configured on x86-64')
asm_gen = generator(nasm,
output: outputname,
arguments: ['-I@CURRENT_SOURCE_DIR@',
'-I@CURRENT_SOURCE_DIR@/@0@/'.format(asm_incdir),
asm_arch_def,
asm_stackalign_def,
asm_pic_def,
asm_prefix_def,
'-f', asm_outformat,
'-o', '@OUTPUT@',
'@INPUT@'])
asm_gen_objs = asm_gen.process(asm_x)
endif
gstdeinterlace = library('gstdeinterlace',
interlace_sources, orc_c, orc_h,
interlace_sources,
asm_gen_objs,
orc_c, orc_h,
c_args : gst_plugins_good_args,
include_directories : [configinc],
dependencies : [orc_dep, gstbase_dep, gstvideo_dep],
...
...
gst/deinterlace/x86/x86inc.asm
0 → 100644
View file @
536ff477
This diff is collapsed.
Click to expand it.
gst/deinterlace/x86/yadif.asm
0 → 100644
View file @
536ff477
;*****************************************************************************
;* x86-optimized functions for yadif filter
;* Copyright (C) 2020 Vivia Nikolaidou <vivia.nikolaidou@ltnglobal.com>
;*
;* Based on libav's vf_yadif.asm file
;* Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
;* Copyright (c) 2013 Daniel Kang <daniel.d.kang@gmail.com>
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "x86inc.asm"
SECTION
_RODATA
; 16 bytes of value 1
pb_1:
times
16
db
1
; 8 words of value 1
pw_1:
times
8
dw
1
SECTION
.text
%macro ABS1 2
%if cpuflag(ssse3)
pabsw
%
1
,
%
1
%elif cpuflag(mmxext)
; a, tmp
pxor
%
2
,
%
2
psubw
%
2
,
%
1
pmaxsw
%
1
,
%
2
%else
; a, tmp
pxor
%
2
,
%
2
pcmpgtw
%
2
,
%
1
pxor
%
1
,
%
2
psubw
%
1
,
%
2
%endif
%endmacro
%macro CHECK 2
; %1 = 1+j, %2 = 1-j
; m2 = t0[x+1+j]
movu
m2
,
[
tzeroq
+%
1
]
; m3 = b0[x+1-j]
movu
m3
,
[
bzeroq
+%
2
]
; m4 = t0[x+1+j]
mova
m4
,
m2
; m5 = t0[x+1+j]
mova
m5
,
m2
; m4 = xor(t0[x+1+j], b0[x+1-j]
pxor
m4
,
m3
pavgb
m5
,
m3
; round down to 0
pand
m4
,
[
pb_1
]
; m5 = rounded down average of the whole thing
psubusb
m5
,
m4
; shift by 1 quadword to prepare for spatial_pred
psrldq
m5
,
1
; m7 = 0
; Interleave low-order bytes with 0
; so one pixel doesn't spill into the next one
punpcklbw
m5
,
m7
; m4 = t0[x+1+j] (reset)
mova
m4
,
m2
; m2 = t0[x+1+j] - b0[x+1-j]
psubusb
m2
,
m3
; m3 = -m2
psubusb
m3
,
m4
; m2 = FFABS(t0[x+1+j] - b0[x+1-j]);
pmaxub
m2
,
m3
; m3 = FFABS(t0[x+1+j] - b0[x+1-j]);
mova
m3
,
m2
; m4 = FFABS(FFABS(t0[x+1+j] - b0[x+1-j]);
mova
m4
,
m2
; m3 = FFABS(t0[x+j] - b0[x-j])
psrldq
m3
,
1
; m4 = FFABS(t0[x-1+j] - b0[x-1-j])
psrldq
m4
,
2
; prevent pixel spilling for all of them
punpcklbw
m2
,
m7
punpcklbw
m3
,
m7
punpcklbw
m4
,
m7
paddw
m2
,
m3
; m2 = score
paddw
m2
,
m4
%endmacro
%macro CHECK1 0
; m0 was spatial_score
; m1 was spatial_pred
mova
m3
,
m0
; compare for greater than
; each word will be 1111 or 0000
pcmpgtw
m3
,
m2
; if (score < spatial_score) spatial_score = score;
pminsw
m0
,
m2
; m6 = the mask
mova
m6
,
m3
; m5 = becomes 0 if it should change
pand
m5
,
m3
; nand: m3 = becomes 0 if it should not change
pandn
m3
,
m1
; m3 = put them together in an OR
por
m3
,
m5
; and put it in spatial_pred
mova
m1
,
m3
%endmacro
%macro CHECK2 0
; m6 was the mask from CHECK1 (we don't change it)
paddw
m6
,
[
pw_1
]
; shift words left while shifting in 14 0s (16 - j)
; essentially to not recalculate the mask!
psllw
m6
,
14
; add it to score
paddsw
m2
,
m6
; same as CHECK1
mova
m3
,
m0
pcmpgtw
m3
,
m2
pminsw
m0
,
m2
pand
m5
,
m3
pandn
m3
,
m1
por
m3
,
m5
mova
m1
,
m3
%endmacro
%macro LOAD 2
movh
%
1
,
%
2
punpcklbw
%
1
,
m7
%endmacro
%macro FILTER_HEAD 0
; m7 = 0
pxor
m7
,
m7
; m0 = c
LOAD
m0
,
[
tzeroq
]
; m1 = e
LOAD
m1
,
[
bzeroq
]
; m3 = mp
LOAD
m3
,
[
mpq
]
; m2 = m1
LOAD
m2
,
[
moneq
]
; m4 = mp
mova
m4
,
m3
; m3 = m1 + mp
paddw
m3
,
m2
; m3 = d
psraw
m3
,
1
; rsp + 0 = d
mova
[
rsp
+
0
],
m3
; m2 = m1 - mp
psubw
m2
,
m4
; m2 = temporal_diff0 (m4 is temporary)
ABS1
m2
,
m4
; m3 = t2
LOAD
m3
,
[
ttwoq
]
; m4 = b2
LOAD
m4
,
[
btwoq
]
; m3 = t2 - c
psubw
m3
,
m0
; m4 = b2 - e
psubw
m4
,
m1
; m3 = ABS(t2 - c)
ABS1
m3
,
m5
; m4 = ABS(b2 - e)
ABS1
m4
,
m5
paddw
m3
,
m4
psrlw
m2
,
1
; m3 = temporal_diff1
psrlw
m3
,
1
; m2 = left part of diff
pmaxsw
m2
,
m3
; m3 = tp2
LOAD
m3
,
[
tptwoq
]
; m4 = bp2
LOAD
m4
,
[
bp
twoq
]
psubw
m3
,
m0
psubw
m4
,
m1
ABS1
m3
,
m5
ABS1
m4
,
m5
paddw
m3
,
m4
; m3 = temporal_diff2
psrlw
m3
,
1
; m2 = diff (for real)
pmaxsw
m2
,
m3
; rsp + 16 = diff
mova
[
rsp
+
16
],
m2
; m1 = e + c
paddw
m1
,
m0
; m0 = 2c
paddw
m0
,
m0
; m0 = c - e
psubw
m0
,
m1
; m1 = spatial_pred
psrlw
m1
,
1
; m0 = FFABS(c-e)
ABS1
m0
,
m2
; m2 = t0[x-1]
; if it's unpacked it should contain 4 bytes
movu
m2
,
[
tzeroq
-
1
]
; m3 = b0[x-1]
movu
m3
,
[
bzeroq
-
1
]
; m4 = t0[x-1]
mova
m4
,
m2
; m2 = t0[x-1]-b0[x-1] unsigned packed
psubusb
m2
,
m3
; m3 = m3 - m4 = b0[x-1]-t0[x-1] = -m2 unsigned packed
psubusb
m3
,
m4
; m2 = max(m2, -m2) = abs(t0[x-1]-b0[x-1])
pmaxub
m2
,
m3
%if mmsize == 16
; m3 = m2 >> 2quadwords
; pixel jump: go from x-1 to x+1
mova
m3
,
m2
psrldq
m3
,
2
%else
pshufw
m3
,
m2
,
q0021
%endif
; m7 = 0
; unpack and interleave low-order bytes
; to prevent pixel spilling when adding
punpcklbw
m2
,
m7
punpcklbw
m3
,
m7
paddw
m0
,
m2
paddw
m0
,
m3
; m0 = spatial_score
psubw
m0
,
[
pw_1
]
CHECK
-
2
,
0
CHECK1
CHECK
-
3
,
1
CHECK2
CHECK
0
,
-
2
CHECK1
CHECK
1
,
-
3
CHECK2
; now m0 = spatial_score, m1 = spatial_pred
; m6 = diff
mova
m6
,
[
rsp
+
16
]
%endmacro
%macro FILTER_TAIL 0
; m2 = d
mova
m2
,
[
rsp
]
; m3 = d
mova
m3
,
m2
; m2 = d - diff
psubw
m2
,
m6
; m3 = d + diff
paddw
m3
,
m6
; m1 = max(spatial_pred, d-diff)
pmaxsw
m1
,
m2
; m1 = min(d + diff, max(spatial_pred, d-diff))
; m1 = spatial_pred
pminsw
m1
,
m3
; Converts 8 signed word integers into 16 unsigned byte integers with saturation
packuswb
m1
,
m1
; dst = spatial_pred
movh
[
ds
tq
],
m1
; half the register size
add
ds
tq
,
mmsize
/
2
add
tzeroq
,
mmsize
/
2
add
bzeroq
,
mmsize
/
2
add
moneq
,
mmsize
/
2
add
mpq
,
mmsize
/
2
add
ttwoq
,
mmsize
/
2
add
btwoq
,
mmsize
/
2
add
tptwoq
,
mmsize
/
2
add
bp
twoq
,
mmsize
/
2
add
ttoneq
,
mmsize
/
2
add
ttpq
,
mmsize
/
2
add
bboneq
,
mmsize
/
2
add
bbpq
,
mmsize
/
2
%endmacro
%macro FILTER_MODE0 0
.loop0:
FILTER_HEAD
; m2 = tt1
LOAD
m2
,
[
ttoneq
]
; m4 = ttp
LOAD
m4
,
[
ttpq
]
; m3 = bb1
LOAD
m3
,
[
bboneq
]
; m5 = bbp
LOAD
m5
,
[
bbpq
]
paddw
m2
,
m4
paddw
m3
,
m5
; m2 = b
psrlw
m2
,
1
; m3 = f
psrlw
m3
,
1
; m4 = c
LOAD
m4
,
[
tzeroq
]
; m5 = d
mova
m5
,
[
rsp
]
; m7 = e
LOAD
m7
,
[
bzeroq
]
; m2 = b - c
psubw
m2
,
m4
; m3 = f - e
psubw
m3
,
m7
; m0 = d
mova
m0
,
m5
; m5 = d - c
psubw
m5
,
m4
; m0 = d - e
psubw
m0
,
m7
; m4 = b - c
mova
m4
,
m2
; m2 = FFMIN(b-c, f-e)
pminsw
m2
,
m3
; m3 = FFMAX(f-e, b-c)
pmaxsw
m3
,
m4
; m2 = FFMAX(d-c, FFMIN(b-c, f-e))
pmaxsw
m2
,
m5
; m3 = FFMIN(d-c, FFMAX(f-e, b-c))
pminsw
m3
,
m5
; m2 = max
pmaxsw
m2
,
m0
; m3 = min
pminsw
m3
,
m0
; m4 = 0
pxor
m4
,
m4
; m6 = MAX(diff, min)
pmaxsw
m6
,
m3
; m4 = -max
psubw
m4
,
m2
; m6 = diff
pmaxsw
m6
,
m4
FILTER_TAIL
; r13m = w
sub
DWORD
r13m
,
mmsize
/
2
jg
.loop0
%endmacro
%macro FILTER_MODE2 0
.loop2:
FILTER_HEAD
FILTER_TAIL
; r13m = w
sub
DWORD
r13m
,
mmsize
/
2
jg
.loop2
%endmacro
%macro YADIF_ADD3 0
; start 3 pixels later
add
ds
tq
,
3
add
tzeroq
,
3
add
bzeroq
,
3
add
moneq
,
3
add
mpq
,
3
add
ttwoq
,
3
add
btwoq
,
3
add
tptwoq
,
3
add
bp
twoq
,
3
add
ttoneq
,
3
add
ttpq
,
3
add
bboneq
,
3
add
bbpq
,
3
%endmacro
; cglobal foo, 2,3,7,0x40, dst, src, tmp
; declares a function (foo) that automatically loads two arguments (dst and
; src) into registers, uses one additional register (tmp) plus 7 vector
; registers (m0-m6) and allocates 0x40 bytes of stack space.
%macro YADIF_MODE0 0
cglobal
yadif_filter_line_mode0
,
13
,
14
,
8
,
80
,
ds
t
,
tzero
,
bzero
,
mone
,
mp
,
\
ttwo
,
btwo
,
tptwo
,
bp
two
,
ttone
,
\
ttp
,
bbone
,
bbp
,
w
YADIF_ADD3
FILTER_MODE0
RET
%endmacro
%macro YADIF_MODE2 0
cglobal
yadif_filter_line_mode2
,
13
,
14
,
8
,
80
,
ds
t
,
tzero
,
bzero
,
mone
,
mp
,
\
ttwo
,
btwo
,
tptwo
,
bp
two
,
ttone
,
\
ttp
,
bbone
,
bbp
,
w
YADIF_ADD3
FILTER_MODE2
RET
%endmacro
; declares two functions for ssse3, and two for sse2
INIT_XMM
ss
se3
YADIF_MODE0
YADIF_MODE2
INIT_XMM
ss
e2
YADIF_MODE0
YADIF_MODE2
gst/deinterlace/yadif.c
View file @
536ff477
...
...
@@ -31,6 +31,7 @@
#include
<gst/gst.h>
#ifdef HAVE_ORC
#include
<orc/orc.h>
#include
<orc/orcsse.h>
#endif
#include
"gstdeinterlacemethod.h"
#include
"yadif.h"
...
...
@@ -86,6 +87,41 @@ static void
filter_scanline_yadif_packed_3
(
GstDeinterlaceSimpleMethod
*
self
,
guint8
*
out
,
const
GstDeinterlaceScanlineData
*
scanlines
,
guint
size
);
static
void
filter_line_c_planar_mode0
(
void
*
ORC_RESTRICT
dst
,
const
void
*
ORC_RESTRICT
tzero
,
const
void
*
ORC_RESTRICT
bzero
,
const
void
*
ORC_RESTRICT
mone
,
const
void
*
ORC_RESTRICT
mp
,
const
void
*
ORC_RESTRICT
ttwo
,
const
void
*
ORC_RESTRICT
btwo
,
const
void
*
ORC_RESTRICT
tptwo
,
const
void
*
ORC_RESTRICT
bptwo
,
const
void
*
ORC_RESTRICT
ttone
,
const
void
*
ORC_RESTRICT
ttp
,
const
void
*
ORC_RESTRICT
bbone
,
const
void
*
ORC_RESTRICT
bbp
,
int
w
);
static
void
filter_line_c_planar_mode2
(
void
*
ORC_RESTRICT
dst
,
const
void
*
ORC_RESTRICT
tzero
,
const
void
*
ORC_RESTRICT
bzero
,
const
void
*
ORC_RESTRICT
mone
,
const
void
*
ORC_RESTRICT
mp
,
const
void
*
ORC_RESTRICT
ttwo
,
const
void
*
ORC_RESTRICT
btwo
,
const
void
*
ORC_RESTRICT
tptwo
,
const
void
*
ORC_RESTRICT
bptwo
,
const
void
*
ORC_RESTRICT
ttone
,
const
void
*
ORC_RESTRICT
ttp
,
const
void
*
ORC_RESTRICT
bbone
,
const
void
*
ORC_RESTRICT
bbp
,
int
w
);
static
void
(
*
filter_mode2
)
(
void
*
ORC_RESTRICT
dst
,
const
void
*
ORC_RESTRICT
tzero
,
const
void
*
ORC_RESTRICT
bzero
,
const
void
*
ORC_RESTRICT
mone
,
const
void
*
ORC_RESTRICT
mp
,
const
void
*
ORC_RESTRICT
ttwo
,
const
void
*
ORC_RESTRICT
btwo
,
const
void
*
ORC_RESTRICT
tptwo
,
const
void
*
ORC_RESTRICT
bptwo
,
const
void
*
ORC_RESTRICT
ttone
,
const
void
*
ORC_RESTRICT
ttp
,
const
void
*
ORC_RESTRICT
bbone
,
const
void
*
ORC_RESTRICT
bbp
,
int
w
);
static
void
(
*
filter_mode0
)
(
void
*
ORC_RESTRICT
dst
,
const
void
*
ORC_RESTRICT
tzero
,
const
void
*
ORC_RESTRICT
bzero
,
const
void
*
ORC_RESTRICT
mone
,
const
void
*
ORC_RESTRICT
mp
,
const
void
*
ORC_RESTRICT
ttwo
,
const
void
*
ORC_RESTRICT
btwo
,
const
void
*
ORC_RESTRICT
tptwo
,
const
void
*
ORC_RESTRICT
bptwo
,
const
void
*
ORC_RESTRICT
ttone
,
const
void
*
ORC_RESTRICT
ttp
,
const
void
*
ORC_RESTRICT
bbone
,
const
void
*
ORC_RESTRICT
bbp
,
int
w
);
static
void
copy_scanline
(
GstDeinterlaceSimpleMethod
*
self
,
guint8
*
out
,
const
GstDeinterlaceScanlineData
*
scanlines
,
guint
size
)
...
...
@@ -139,36 +175,31 @@ static void
dism_class
->
interpolate_scanline_nv21
=
filter_scanline_yadif_semiplanar
;
}
static
void
gst_deinterlace_method_yadif_init
(
GstDeinterlaceMethodYadif
*
self
)
{
}
#define FFABS(a) ABS(a)
#define FFMIN(a,b) MIN(a,b)
#define FFMAX(a,b) MAX(a,b)
#define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
#define FFMIN3(a,b,c) FFMIN(FFMIN(a,b),c)
#define CHECK(j)\
{ int score = FFABS(s
->t0[x - colors2 * (1 + (j))] - s->b0[x - colors2 * (1 - (j))
])\
+ FFABS(s
->t0
[x
+
colors2 * (j)] - s->b0
[x
-
colors2 * (j)
])\
+ FFABS(s
->t0[x + colors2 * (1 + (j))] - s->b0[x + colors2 * (1 - (j))
]);\
#define CHECK(j
1, j2, j3
)\
{ int score = FFABS(s
tzero[x - j1] - sbzero[x - j2
])\
+ FFABS(s
tzero
[x +
j3] - sbzero
[x -
j3
])\
+ FFABS(s
tzero[x + j1] - sbzero[x + j2
]);\
if (score < spatial_score) {\
spatial_score= score;\
spatial_pred= (s
->t0
[x
+
colors2 * ((j))] + s->b0[x - colors2 * (j)
])>>1;\
spatial_pred= (s
tzero
[x +
j3] + sbzero[x - j3
])>>1;\
/* The is_not_edge argument here controls when the code will enter a branch
* which reads up to and including x-3 and x+3. */
#define FILTER(start, end, is_not_edge) \
for (x = start; x < end; x++) { \
int c = s
->t0
[x]; \
int d = (s
->m1
[x] + s
->
mp[x])>>1; \
int e = s
->b0
[x]; \
int temporal_diff0 = FFABS(s
->m1
[x] - s
->
mp[x]); \
int temporal_diff1 =(FFABS(s
->t2
[x] - c) + FFABS(s
->b2
[x] - e) )>>1; \
int temporal_diff2 =(FFABS(s
->tp2
[x] - c) + FFABS(s
->bp2
[x] - e) )>>1; \
int c = s
tzero
[x]; \
int d = (s
mone
[x] + smp[x])>>1; \
int e = s
bzero
[x]; \
int temporal_diff0 = FFABS(s
mone
[x] - smp[x]); \
int temporal_diff1 =(FFABS(s
ttwo
[x] - c) + FFABS(s
btwo
[x] - e) )>>1; \
int temporal_diff2 =(FFABS(s
tptwo
[x] - c) + FFABS(s
bptwo
[x] - e) )>>1; \
int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); \
int spatial_pred = (c+e) >> 1; \
int colors2 = colors; \
...
...
@@ -177,15 +208,21 @@ gst_deinterlace_method_yadif_init (GstDeinterlaceMethodYadif * self)
colors2 = 2; \
\
if (is_not_edge) {\
int spatial_score = FFABS(s->t0[x-colors2] - s->b0[x-colors2]) + FFABS(c-e) \
+ FFABS(s->t0[x+colors2] - s->b0[x+colors2]); \
CHECK(-1) CHECK(-2) }} }} \
CHECK( 1) CHECK( 2) }} }} \
int spatial_score = FFABS(stzero[x-colors2] - sbzero[x-colors2]) + FFABS(c-e) \
+ FFABS(stzero[x+colors2] - sbzero[x+colors2]); \
int twice_colors2 = colors2 << 1; \
int minus_colors2 = -colors2; \
int thrice_colors2 = colors2 * 3; \
int minus2_colors2 = colors2 * -2; \
CHECK(0, twice_colors2, minus_colors2) \
CHECK(-colors2, thrice_colors2, minus2_colors2) }} }} \
CHECK(twice_colors2, 0, colors2) \
CHECK(thrice_colors2, minus_colors2, twice_colors2) }} }} \
}\
\
if (!(mode&2)) { \
int b = (s
->tt1
[x] + s
->
ttp[x])>>1; \
int f = (s
->bb1
[x] + s
->
bbp[x])>>1; \
int b = (s
ttone
[x] + sttp[x])>>1; \
int f = (s
bbone
[x] + sbbp[x])>>1; \
int max = FFMAX3(d - e, d - c, FFMIN(b - c, f - e)); \
int min = FFMIN3(d - e, d - c, FFMAX(b - c, f - e)); \
\
...
...
@@ -197,16 +234,20 @@ gst_deinterlace_method_yadif_init (GstDeinterlaceMethodYadif * self)
else if (spatial_pred < d - diff) \
spatial_pred = d - diff; \
\
dst[x] = spatial_pred; \
s
dst[x] = spatial_pred; \
\
}
ALWAYS_INLINE
static
void
filter_line_c
(
guint8
*
dst
,
const
GstDeinterlaceScanlineData
*
s
,
int
start
,
int
end
,
int
mode
,
int
colors
,
int
y_alternates_every
)
filter_line_c
(
guint8
*
sdst
,
const
guint8
*
stzero
,
const
guint8
*
sbzero
,
const
guint8
*
smone
,
const
guint8
*
smp
,
const
guint8
*
sttwo
,
const
guint8
*
sbtwo
,
const
guint8
*
stptwo
,
const
guint8
*
sbptwo
,
const
guint8
*
sttone
,
const
guint8
*
sttp
,
const
guint8
*
sbbone
,
const
guint8
*
sbbp
,
int
w
,
int
colors
,
int
y_alternates_every
,
int
start
,
int
end
,
int
mode
)
{
int
x
;
/* The function is called for processing the middle
* pixels of each line, excluding 3 at each end.
* This allows the FILTER macro to be
...
...
@@ -218,9 +259,74 @@ filter_line_c (guint8 * dst,