/* | |
* Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com> | |
* | |
* This file is part of FFmpeg. | |
* | |
* FFmpeg is free software; you can redistribute it and/or | |
* modify it under the terms of the GNU Lesser General Public | |
* License as published by the Free Software Foundation; either | |
* version 2.1 of the License, or (at your option) any later version. | |
* | |
* FFmpeg is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
* Lesser General Public License for more details. | |
* | |
* You should have received a copy of the GNU Lesser General Public | |
* License along with FFmpeg; if not, write to the Free Software | |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
*/ | |
#include "libavutil/arm/asm.S" | |
.macro alias name, tgt, set=1 | |
.if \set != 0 | |
\name .req \tgt | |
.else | |
.unreq \name | |
.endif | |
.endm | |
.altmacro | |
.macro alias_dw_all qw, dw_l, dw_h | |
alias q\qw\()_l, d\dw_l | |
alias q\qw\()_h, d\dw_h | |
.if \qw < 15 | |
alias_dw_all %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2) | |
.endif | |
.endm | |
alias_dw_all 0, 0, 1 | |
.noaltmacro | |
.macro alias_qw name, qw, set=1 | |
alias \name\(), \qw, \set | |
alias \name\()_l, \qw\()_l, \set | |
alias \name\()_h, \qw\()_h, \set | |
.endm | |
.macro prologue | |
push {r4-r12, lr} | |
vpush {q4-q7} | |
.endm | |
.macro epilogue | |
vpop {q4-q7} | |
pop {r4-r12, pc} | |
.endm | |
.macro load_arg reg, ix | |
ldr \reg, [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)] | |
.endm | |
/* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma | |
* int width, int height, | |
* int y_stride, int c_stride, int src_stride, | |
* int32_t coeff_table[9]); | |
*/ | |
.macro alias_loop_420sp set=1 | |
alias src, r0, \set | |
alias src0, src, \set | |
alias y, r1, \set | |
alias y0, y, \set | |
alias chroma, r2, \set | |
alias width, r3, \set | |
alias header, width, \set | |
alias height, r4, \set | |
alias y_stride, r5, \set | |
alias c_stride, r6, \set | |
alias c_padding, c_stride, \set | |
alias src_stride, r7, \set | |
alias y0_end, r8, \set | |
alias src_padding,r9, \set | |
alias y_padding, r10, \set | |
alias src1, r11, \set | |
alias y1, r12, \set | |
alias coeff_table,r12, \set | |
.endm | |
.macro loop_420sp s_fmt, d_fmt, init, kernel, precision | |
function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1 | |
prologue | |
alias_loop_420sp | |
load_arg height, 4 | |
load_arg y_stride, 5 | |
load_arg c_stride, 6 | |
load_arg src_stride, 7 | |
load_arg coeff_table, 8 | |
\init coeff_table | |
sub y_padding, y_stride, width | |
sub c_padding, c_stride, width | |
sub src_padding, src_stride, width, LSL #2 | |
add y0_end, y0, width | |
and header, width, #15 | |
add y1, y0, y_stride | |
add src1, src0, src_stride | |
0: | |
cmp header, #0 | |
beq 1f | |
\kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header | |
1: | |
\kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma | |
cmp y0, y0_end | |
blt 1b | |
2: | |
add y0, y1, y_padding | |
add y0_end, y1, y_stride | |
add chroma, chroma, c_padding | |
add src0, src1, src_padding | |
add y1, y0, y_stride | |
add src1, src0, src_stride | |
subs height, height, #2 | |
bgt 0b | |
epilogue | |
alias_loop_420sp 0 | |
endfunc | |
.endm | |
.macro downsample | |
vpaddl.u8 r16x8, r8x16 | |
vpaddl.u8 g16x8, g8x16 | |
vpaddl.u8 b16x8, b8x16 | |
.endm | |
/* acculumate and right shift by 2 */ | |
.macro downsample_ars2 | |
vpadal.u8 r16x8, r8x16 | |
vpadal.u8 g16x8, g8x16 | |
vpadal.u8 b16x8, b8x16 | |
vrshr.u16 r16x8, r16x8, #2 | |
vrshr.u16 g16x8, g16x8, #2 | |
vrshr.u16 b16x8, b16x8, #2 | |
.endm | |
.macro store_y8_16x1 dst, count | |
.ifc "\count","" | |
vstmia \dst!, {y8x16} | |
.else | |
vstmia \dst, {y8x16} | |
add \dst, \dst, \count | |
.endif | |
.endm | |
.macro store_chroma_nv12_8x1 dst, count | |
.ifc "\count","" | |
vst2.i8 {u8x8, v8x8}, [\dst]! | |
.else | |
vst2.i8 {u8x8, v8x8}, [\dst], \count | |
.endif | |
.endm | |
.macro store_chroma_nv21_8x1 dst, count | |
.ifc "\count","" | |
vst2.i8 {v8x8, u8x8}, [\dst]! | |
.else | |
vst2.i8 {v8x8, u8x8}, [\dst], \count | |
.endif | |
.endm | |
.macro load_8888_16x1 a, b, c, d, src, count | |
.ifc "\count","" | |
vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]! | |
vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]! | |
.else | |
vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]! | |
vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src] | |
sub \src, \src, #32 | |
add \src, \src, \count, LSL #2 | |
.endif | |
.endm | |
.macro load_rgbx_16x1 src, count | |
load_8888_16x1 r, g, b, x, \src, \count | |
.endm | |
.macro load_bgrx_16x1 src, count | |
load_8888_16x1 b, g, r, x, \src, \count | |
.endm | |
.macro alias_src_rgbx set=1 | |
alias_src_8888 r, g, b, x, \set | |
.endm | |
.macro alias_src_bgrx set=1 | |
alias_src_8888 b, g, r, x, \set | |
.endm | |
.macro alias_dst_nv12 set=1 | |
alias u8x8, c8x8x2_l, \set | |
alias v8x8, c8x8x2_h, \set | |
.endm | |
.macro alias_dst_nv21 set=1 | |
alias v8x8, c8x8x2_l, \set | |
alias u8x8, c8x8x2_h, \set | |
.endm | |
// common aliases | |
alias CO_R d0 | |
CO_RY .dn d0.s16[0] | |
CO_RU .dn d0.s16[1] | |
CO_RV .dn d0.s16[2] | |
alias CO_G d1 | |
CO_GY .dn d1.s16[0] | |
CO_GU .dn d1.s16[1] | |
CO_GV .dn d1.s16[2] | |
alias CO_B d2 | |
CO_BY .dn d2.s16[0] | |
CO_BU .dn d2.s16[1] | |
CO_BV .dn d2.s16[2] | |
alias BIAS_U, d3 | |
alias BIAS_V, BIAS_U | |
alias BIAS_Y, q2 | |
/* q3-q6 R8G8B8X8 x16 */ | |
.macro alias_src_8888 a, b, c, d, set | |
alias_qw \a\()8x16, q3, \set | |
alias_qw \b\()8x16, q4, \set | |
alias_qw \c\()8x16, q5, \set | |
alias_qw \d\()8x16, q6, \set | |
.endm | |
.macro kernel_420_16x2 rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count | |
alias_src_\rgb_fmt | |
alias_dst_\yuv_fmt | |
load_\rgb_fmt\()_16x1 \rgb0, \count | |
downsample | |
compute_y_16x1 | |
store_y8_16x1 \y0, \count | |
load_\rgb_fmt\()_16x1 \rgb1, \count | |
downsample_ars2 | |
compute_y_16x1 | |
store_y8_16x1 \y1, \count | |
compute_chroma_8x1 u, U | |
compute_chroma_8x1 v, V | |
store_chroma_\yuv_fmt\()_8x1 \chroma, \count | |
alias_dst_\yuv_fmt 0 | |
alias_src_\rgb_fmt 0 | |
.endm | |