This commit is contained in:
Anthony Minessale 2019-09-11 15:53:30 +00:00 committed by Andrey Volk
parent 6175c55b2f
commit 1b1c66aae4
45 changed files with 3684 additions and 574 deletions

View File

@ -69,6 +69,7 @@ cc_library {
// with libyuv (b/37646797)
cc_library_static {
name: "libyuv_static",
vendor_available: true,
whole_static_libs: ["libyuv"],
}

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1724
Version: 1735
License: BSD
License File: LICENSE

View File

@ -100,4 +100,8 @@ Inverting can be achieved with almost any libyuv function by passing a negative
I420Mirror and ARGBMirror can also be used to rotate by 180 degrees by passing a negative height.
# Cropping - Vertical Flip
When cropping from a subsampled format like NV21, the method of setting the start pointers wont work for odd crop start y on the UV plane.
If the height after cropping will be odd, invert the source - point to the last row, negate the strides, and pass negative height, which
will re-invert the image as the conversion outputs.

View File

@ -226,6 +226,28 @@ int UYVYToI420(const uint8_t* src_uyvy,
int width,
int height);
// Convert AYUV to NV12.
LIBYUV_API
int AYUVToNV12(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert AYUV to NV21.
LIBYUV_API
int AYUVToNV21(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Convert M420 to I420.
LIBYUV_API
int M420ToI420(const uint8_t* src_m420,
@ -322,6 +344,19 @@ int RGB24ToI420(const uint8_t* src_rgb24,
int width,
int height);
// RGB little endian (bgr in memory) to J420.
LIBYUV_API
int RGB24ToJ420(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB big endian (rgb in memory) to I420.
LIBYUV_API
int RAWToI420(const uint8_t* src_raw,
@ -374,14 +409,21 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
int width,
int height);
// RGB little endian (bgr in memory) to J400.
LIBYUV_API
int RGB24ToJ400(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height);
#ifdef HAVE_JPEG
// src_mjpg is pointer to raw jpeg bytes in memory
// src_size_mjpg is size of jpeg in bytes
// src_width/height provided by capture.
// dst_width/height for clipping determine final size.
LIBYUV_API
int MJPGToI420(const uint8_t* src_mjpg,
size_t src_size_mjpg,
int MJPGToI420(const uint8_t* sample,
size_t sample_size,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
@ -395,8 +437,8 @@ int MJPGToI420(const uint8_t* src_mjpg,
// JPEG to NV21
LIBYUV_API
int MJPGToNV21(const uint8_t* src_mjpg,
size_t src_size_mjpg,
int MJPGToNV21(const uint8_t* sample,
size_t sample_size,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
@ -408,8 +450,8 @@ int MJPGToNV21(const uint8_t* src_mjpg,
// Query size of MJPG in pixels.
LIBYUV_API
int MJPGSize(const uint8_t* src_mjpg,
size_t src_size_mjpg,
int MJPGSize(const uint8_t* sample,
size_t sample_size,
int* width,
int* height);
#endif

View File

@ -256,6 +256,7 @@ int NV21ToARGB(const uint8_t* src_y,
int height);
// Convert NV12 to ABGR.
LIBYUV_API
int NV12ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
@ -298,6 +299,17 @@ int NV21ToRGB24(const uint8_t* src_y,
int width,
int height);
// Convert NV21 to YUV24.
LIBYUV_API
int NV21ToYUV24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_yuv24,
int dst_stride_yuv24,
int width,
int height);
// Convert NV12 to RAW.
LIBYUV_API
int NV12ToRAW(const uint8_t* src_y,
@ -627,8 +639,8 @@ int AR30ToAB30(const uint8_t* src_ar30,
// src_width/height provided by capture
// dst_width/height for clipping determine final size.
LIBYUV_API
int MJPGToARGB(const uint8_t* src_mjpg,
size_t src_size_mjpg,
int MJPGToARGB(const uint8_t* sample,
size_t sample_size,
uint8_t* dst_argb,
int dst_stride_argb,
int src_width,

View File

@ -250,6 +250,28 @@ int ARGBToNV21(const uint8_t* src_argb,
int width,
int height);
// Convert ABGR To NV12.
LIBYUV_API
int ABGRToNV12(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert ABGR To NV21.
LIBYUV_API
int ABGRToNV21(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Convert ARGB To NV21.
LIBYUV_API
int ARGBToNV21(const uint8_t* src_argb,

View File

@ -26,7 +26,7 @@ namespace libyuv {
extern "C" {
#endif
LIBYUV_BOOL ValidateJpeg(const uint8_t* src_mjpg, size_t src_size_mjpg_size);
LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size);
#ifdef __cplusplus
} // extern "C"

View File

@ -105,6 +105,15 @@ void MergeUVPlane(const uint8_t* src_u,
int width,
int height);
// Swap U and V channels in interleaved UV plane.
LIBYUV_API
void SwapUVPlane(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Split interleaved RGB plane into separate R, G and B planes.
LIBYUV_API
void SplitRGBPlane(const uint8_t* src_rgb,
@ -224,6 +233,19 @@ int UYVYToNV12(const uint8_t* src_uyvy,
int width,
int height);
// Convert NV21 to NV12.
LIBYUV_API
int NV21ToNV12(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
LIBYUV_API
int YUY2ToY(const uint8_t* src_yuy2,
int src_stride_yuy2,

View File

@ -49,6 +49,24 @@ int I420Rotate(const uint8_t* src_y,
int height,
enum RotationMode mode);
// Rotate I444 frame.
LIBYUV_API
int I444Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum RotationMode mode);
// Rotate NV12 input and store in I420.
LIBYUV_API
int NV12ToI420Rotate(const uint8_t* src_y,

View File

@ -275,6 +275,7 @@ extern "C" {
#define HAS_I422TOAR30ROW_SSSE3
#define HAS_MERGERGBROW_SSSE3
#define HAS_SPLITRGBROW_SSSE3
#define HAS_SWAPUVROW_SSSE3
#endif
// The following are available for AVX2 gcc/clang x86 platforms:
@ -283,6 +284,8 @@ extern "C" {
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_ABGRTOAR30ROW_AVX2
#define HAS_ABGRTOUVROW_AVX2
#define HAS_ABGRTOYROW_AVX2
#define HAS_ARGBTOAR30ROW_AVX2
#define HAS_ARGBTORAWROW_AVX2
#define HAS_ARGBTORGB24ROW_AVX2
@ -295,6 +298,9 @@ extern "C" {
#define HAS_I422TOYUY2ROW_AVX2
#define HAS_MERGEUVROW_16_AVX2
#define HAS_MULTIPLYROW_16_AVX2
#define HAS_SWAPUVROW_AVX2
// TODO(fbarchard): Fix AVX2 version of YUV24
// #define HAS_NV21TOYUV24ROW_AVX2
#endif
// The following are available for AVX512 clang x86 platforms:
@ -330,6 +336,9 @@ extern "C" {
#define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOYJROW_NEON
#define HAS_ARGBTOYROW_NEON
#define HAS_AYUVTOUVROW_NEON
#define HAS_AYUVTOVUROW_NEON
#define HAS_AYUVTOYROW_NEON
#define HAS_BGRATOUVROW_NEON
#define HAS_BGRATOYROW_NEON
#define HAS_BYTETOFLOATROW_NEON
@ -355,6 +364,7 @@ extern "C" {
#define HAS_NV12TORGB565ROW_NEON
#define HAS_NV21TOARGBROW_NEON
#define HAS_NV21TORGB24ROW_NEON
#define HAS_NV21TOYUV24ROW_NEON
#define HAS_RAWTOARGBROW_NEON
#define HAS_RAWTORGB24ROW_NEON
#define HAS_RAWTOUVROW_NEON
@ -370,6 +380,7 @@ extern "C" {
#define HAS_SETROW_NEON
#define HAS_SPLITRGBROW_NEON
#define HAS_SPLITUVROW_NEON
#define HAS_SWAPUVROW_NEON
#define HAS_UYVYTOARGBROW_NEON
#define HAS_UYVYTOUV422ROW_NEON
#define HAS_UYVYTOUVROW_NEON
@ -815,6 +826,10 @@ void NV21ToRGB24Row_NEON(const uint8_t* src_y,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToYUV24Row_NEON(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_yuv24,
int width);
void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
@ -899,6 +914,8 @@ void UYVYToARGBRow_MSA(const uint8_t* src_uyvy,
void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void ABGRToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@ -927,7 +944,7 @@ void ARGBToUV444Row_MSA(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_MSA(const uint8_t* src_argb0,
void ARGBToUVRow_MSA(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
@ -936,7 +953,7 @@ void ARGBToUV444Row_MMI(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_MMI(const uint8_t* src_argb0,
void ARGBToUVRow_MMI(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
@ -986,32 +1003,32 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
void ARGBToUVJRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
void BGRAToUVRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
void ABGRToUVRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGBAToUVRow_MSA(const uint8_t* src_rgb0,
void RGBAToUVRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
void RGB24ToUVRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RAWToUVRow_MSA(const uint8_t* src_rgb0,
void RAWToUVRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
@ -1026,32 +1043,32 @@ void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
void ARGBToUVJRow_MMI(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
void BGRAToUVRow_MMI(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
void ABGRToUVRow_MMI(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
void RGBAToUVRow_MMI(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
void RGB24ToUVRow_MMI(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RAWToUVRow_MMI(const uint8_t* src_rgb0,
void RAWToUVRow_MMI(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
@ -1083,29 +1100,29 @@ void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
uint8_t* dst_y,
int width);
void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGB24ToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RAWToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void BGRAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ABGRToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGBAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGB24ToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RAWToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
void ARGBToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void BGRAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ABGRToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RGBAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RGB24ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void RAWToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void BGRAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ABGRToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGBAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGB24ToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RAWToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
@ -1156,37 +1173,42 @@ void ARGB4444ToYRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
void ARGBToUVRow_AVX2(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
void ABGRToUVRow_AVX2(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
int src_stride_rgba,
uint8_t* dst_u,
uint8_t* dst_v,
@ -1196,6 +1218,11 @@ void ARGBToUVRow_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVRow_Any_AVX2(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
@ -1383,47 +1410,47 @@ void ARGB4444ToUVRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_C(const uint8_t* src_rgb0,
void ARGBToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVJRow_C(const uint8_t* src_rgb0,
void ARGBToUVJRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_C(const uint8_t* src_rgb0,
void ARGBToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVJRow_C(const uint8_t* src_rgb0,
void ARGBToUVJRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void BGRAToUVRow_C(const uint8_t* src_rgb0,
void BGRAToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVRow_C(const uint8_t* src_rgb0,
void ABGRToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGBAToUVRow_C(const uint8_t* src_rgb0,
void RGBAToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGB24ToUVRow_C(const uint8_t* src_rgb0,
void RGB24ToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RAWToUVRow_C(const uint8_t* src_rgb0,
void RAWToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
@ -2183,6 +2210,10 @@ void NV21ToRGB24Row_C(const uint8_t* src_y,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToYUV24Row_C(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_yuv24,
int width);
void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
@ -2349,6 +2380,10 @@ void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_yuv24,
int width);
void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_rgb565,
@ -2554,6 +2589,10 @@ void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToYUV24Row_Any_AVX2(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_yuv24,
int width);
void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@ -3027,6 +3066,10 @@ void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToYUV24Row_Any_NEON(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_yuv24,
int width);
void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@ -3344,6 +3387,40 @@ void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width);
void SwapUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
void AYUVToUVRow_C(const uint8_t* src_ayuv,
int stride_ayuv,
uint8_t* dst_uv,
int width);
void AYUVToVURow_C(const uint8_t* src_ayuv,
int stride_ayuv,
uint8_t* dst_vu,
int width);
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
int stride_ayuv,
uint8_t* dst_uv,
int width);
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
int stride_ayuv,
uint8_t* dst_vu,
int width);
void AYUVToYRow_Any_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
void AYUVToUVRow_Any_NEON(const uint8_t* src_ayuv,
int stride_ayuv,
uint8_t* dst_uv,
int width);
void AYUVToVURow_Any_NEON(const uint8_t* src_ayuv,
int stride_ayuv,
uint8_t* dst_vu,
int width);
void I422ToYUY2Row_C(const uint8_t* src_y,
const uint8_t* src_u,

View File

@ -97,6 +97,54 @@ int I420Scale_16(const uint16_t* src_y,
int dst_height,
enum FilterMode filtering);
// Scales a YUV 4:4:4 image from the src width and height to the
// dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
// used. This produces basic (blocky) quality at the fastest speed.
// If filtering is kFilterBilinear, interpolation is used to produce a better
// quality image, at the expense of speed.
// If filtering is kFilterBox, averaging is used to produce ever better
// quality image, at further expense of speed.
// Returns 0 if successful.
LIBYUV_API
int I444Scale(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
int I444Scale_16(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
#ifdef __cplusplus
// Legacy API. Deprecated.
LIBYUV_API

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1724
#define LIBYUV_VERSION 1735
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -1,3 +0,0 @@
set noparent
agable@chromium.org
phoglund@chromium.org

View File

@ -1,15 +0,0 @@
# Copyright 2018 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
def CheckChangeOnUpload(input_api, output_api):
return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api)
def CheckChangeOnCommit(input_api, output_api):
return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api)

View File

@ -1 +0,0 @@
This directory contains configuration files for infra services.

View File

@ -1,50 +0,0 @@
# Commit Queue configuration file. The documentation of the format can be found
# at http://luci-config.appspot.com/schemas/projects/refs:cq.cfg.
version: 1
cq_status_url: "https://chromium-cq-status.appspot.com"
git_repo_url: "https://chromium.googlesource.com/libyuv/libyuv.git"
gerrit {}
verifiers {
gerrit_cq_ability {
committer_list: "project-libyuv-committers"
dry_run_access_list: "project-libyuv-tryjob-access"
}
try_job {
buckets {
name: "luci.libyuv.try"
builders { name: "win" }
builders { name: "win_rel" }
builders { name: "win_x64_rel" }
builders { name: "win_clang" }
builders { name: "win_clang_rel" }
builders { name: "win_x64_clang_rel" }
builders { name: "mac" }
builders { name: "mac_rel" }
builders { name: "mac_asan" }
builders { name: "ios" }
builders { name: "ios_rel" }
builders { name: "ios_arm64" }
builders { name: "ios_arm64_rel" }
builders { name: "linux" }
builders { name: "linux_rel" }
builders {
name: "linux_gcc"
experiment_percentage: 100
}
builders { name: "linux_tsan2" }
builders { name: "linux_asan" }
builders { name: "linux_msan" }
builders { name: "linux_ubsan" }
builders { name: "linux_ubsan_vptr" }
builders { name: "android" }
builders { name: "android_rel" }
builders { name: "android_arm64" }
builders { name: "android_x86" }
builders { name: "android_x64" }
}
}
}

View File

@ -69,13 +69,13 @@ static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) {
if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
return FOURCC_BGRA;
}
if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
if (argb[3] != 255) { // Fourth byte is not Alpha of 255, so not BGRA.
return FOURCC_ARGB;
}
if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255.
return FOURCC_BGRA;
}
if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255.
if (argb[7] != 255) { // Second pixel fourth byte is not Alpha of 255.
return FOURCC_ARGB;
}
argb += 8;

View File

@ -880,6 +880,144 @@ int UYVYToI420(const uint8_t* src_uyvy,
return 0;
}
// Convert AYUV to NV12.
LIBYUV_API
int AYUVToNV12(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
int y;
void (*AYUVToUVRow)(const uint8_t* src_ayuv, int src_stride_ayuv,
uint8_t* dst_uv, int width) = AYUVToUVRow_C;
void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) =
AYUVToYRow_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_ayuv = src_ayuv + (height - 1) * src_stride_ayuv;
src_stride_ayuv = -src_stride_ayuv;
}
// place holders for future intel code
#if defined(HAS_AYUVTOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
AYUVToUVRow = AYUVToUVRow_Any_SSE2;
AYUVToYRow = AYUVToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
AYUVToUVRow = AYUVToUVRow_SSE2;
AYUVToYRow = AYUVToYRow_SSE2;
}
}
#endif
#if defined(HAS_AYUVTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
AYUVToUVRow = AYUVToUVRow_Any_AVX2;
AYUVToYRow = AYUVToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
AYUVToUVRow = AYUVToUVRow_AVX2;
AYUVToYRow = AYUVToYRow_AVX2;
}
}
#endif
#if defined(HAS_AYUVTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
AYUVToYRow = AYUVToYRow_Any_NEON;
AYUVToUVRow = AYUVToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
AYUVToYRow = AYUVToYRow_NEON;
AYUVToUVRow = AYUVToUVRow_NEON;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
AYUVToUVRow(src_ayuv, src_stride_ayuv, dst_uv, width);
AYUVToYRow(src_ayuv, dst_y, width);
AYUVToYRow(src_ayuv + src_stride_ayuv, dst_y + dst_stride_y, width);
src_ayuv += src_stride_ayuv * 2;
dst_y += dst_stride_y * 2;
dst_uv += dst_stride_uv;
}
if (height & 1) {
AYUVToUVRow(src_ayuv, 0, dst_uv, width);
AYUVToYRow(src_ayuv, dst_y, width);
}
return 0;
}
// Convert AYUV to NV21.
LIBYUV_API
int AYUVToNV21(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height) {
int y;
void (*AYUVToVURow)(const uint8_t* src_ayuv, int src_stride_ayuv,
uint8_t* dst_vu, int width) = AYUVToVURow_C;
void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) =
AYUVToYRow_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_ayuv = src_ayuv + (height - 1) * src_stride_ayuv;
src_stride_ayuv = -src_stride_ayuv;
}
// place holders for future intel code
#if defined(HAS_AYUVTOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
AYUVToVURow = AYUVToVURow_Any_SSE2;
AYUVToYRow = AYUVToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
AYUVToVURow = AYUVToVURow_SSE2;
AYUVToYRow = AYUVToYRow_SSE2;
}
}
#endif
#if defined(HAS_AYUVTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
AYUVToVURow = AYUVToVURow_Any_AVX2;
AYUVToYRow = AYUVToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
AYUVToVURow = AYUVToVURow_AVX2;
AYUVToYRow = AYUVToYRow_AVX2;
}
}
#endif
#if defined(HAS_AYUVTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
AYUVToYRow = AYUVToYRow_Any_NEON;
AYUVToVURow = AYUVToVURow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
AYUVToYRow = AYUVToYRow_NEON;
AYUVToVURow = AYUVToVURow_NEON;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
AYUVToVURow(src_ayuv, src_stride_ayuv, dst_vu, width);
AYUVToYRow(src_ayuv, dst_y, width);
AYUVToYRow(src_ayuv + src_stride_ayuv, dst_y + dst_stride_y, width);
src_ayuv += src_stride_ayuv * 2;
dst_y += dst_stride_y * 2;
dst_vu += dst_stride_vu;
}
if (height & 1) {
AYUVToVURow(src_ayuv, 0, dst_vu, width);
AYUVToYRow(src_ayuv, dst_y, width);
}
return 0;
}
// Convert ARGB to I420.
LIBYUV_API
int ARGBToI420(const uint8_t* src_argb,
@ -1446,6 +1584,155 @@ int RGB24ToI420(const uint8_t* src_rgb24,
return 0;
}
// TODO(fbarchard): Use Matrix version to implement I420 and J420.
// Convert RGB24 to J420.
LIBYUV_API
int RGB24ToJ420(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int y;
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
uint8_t* dst_u, uint8_t* dst_v, int width) =
RGB24ToUVJRow_C;
void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) =
RGB24ToYJRow_C;
#else
void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RGB24ToARGBRow_C;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYJRow_C;
#endif
if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_rgb24 = -src_stride_rgb24;
}
// Neon version does direct RGB24 to YUV.
#if defined(HAS_RGB24TOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToUVJRow = RGB24ToUVJRow_Any_NEON;
RGB24ToYJRow = RGB24ToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToYJRow = RGB24ToYJRow_NEON;
if (IS_ALIGNED(width, 16)) {
RGB24ToUVJRow = RGB24ToUVJRow_NEON;
}
}
}
#elif defined(HAS_RGB24TOYJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RGB24ToUVJRow = RGB24ToUVJRow_Any_MSA;
RGB24ToYJRow = RGB24ToYJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
RGB24ToYJRow = RGB24ToYJRow_MSA;
RGB24ToUVJRow = RGB24ToUVJRow_MSA;
}
}
#elif defined(HAS_RGB24TOYJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RGB24ToUVJRow = RGB24ToUVJRow_Any_MMI;
RGB24ToYJRow = RGB24ToYJRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
RGB24ToYJRow = RGB24ToYJRow_MMI;
if (IS_ALIGNED(width, 16)) {
RGB24ToUVJRow = RGB24ToUVJRow_MMI;
}
}
}
// Other platforms do intermediate conversion from RGB24 to ARGB.
#else
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVJRow = ARGBToUVJRow_AVX2;
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
#endif
{
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
RGB24ToYJRow(src_rgb24, dst_y, width);
RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
#else
RGB24ToARGBRow(src_rgb24, row, width);
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
#endif
src_rgb24 += src_stride_rgb24 * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width);
RGB24ToYJRow(src_rgb24, dst_y, width);
#else
RGB24ToARGBRow(src_rgb24, row, width);
ARGBToUVJRow(row, 0, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width);
#endif
}
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
free_aligned_buffer_64(row);
#endif
}
return 0;
}
// Convert RAW to I420.
LIBYUV_API
int RAWToI420(const uint8_t* src_raw,
@ -2082,6 +2369,124 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
return 0;
}
// Convert RGB24 to J400.
LIBYUV_API
int RGB24ToJ400(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height) {
int y;
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) =
RGB24ToYJRow_C;
#else
void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RGB24ToARGBRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C;
#endif
if (!src_rgb24 || !dst_yj || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_rgb24 = -src_stride_rgb24;
}
// Neon version does direct RGB24 to YUV.
#if defined(HAS_RGB24TOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToYJRow = RGB24ToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToYJRow = RGB24ToYJRow_NEON;
}
}
#elif defined(HAS_RGB24TOYJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RGB24ToYJRow = RGB24ToYJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
RGB24ToYJRow = RGB24ToYJRow_MSA;
}
}
#elif defined(HAS_RGB24TOYJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RGB24ToYJRow = RGB24ToYJRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
RGB24ToYJRow = RGB24ToYJRow_MMI;
}
}
// Other platforms do intermediate conversion from RGB24 to ARGB.
#else
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
#endif
{
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
RGB24ToYJRow(src_rgb24, dst_yj, width);
RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_yj + dst_stride_yj, width);
#else
RGB24ToARGBRow(src_rgb24, row, width);
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
ARGBToYJRow(row, dst_yj, width);
ARGBToYJRow(row + kRowSize, dst_yj + dst_stride_yj, width);
#endif
src_rgb24 += src_stride_rgb24 * 2;
dst_yj += dst_stride_yj * 2;
}
if (height & 1) {
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
RGB24ToYJRow(src_rgb24, dst_yj, width);
#else
RGB24ToARGBRow(src_rgb24, row, width);
ARGBToYJRow(row, dst_yj, width);
#endif
}
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
free_aligned_buffer_64(row);
#endif
}
return 0;
}
static void SplitPixels(const uint8_t* src_u,
int src_pixel_stride_uv,
uint8_t* dst_u,

View File

@ -1793,8 +1793,9 @@ int NV21ToARGB(const uint8_t* src_y,
}
// Convert NV12 to ABGR.
// To output ABGR instead of ARGB swap the UV and use a mirrrored yuc matrix.
// To output ABGR instead of ARGB swap the UV and use a mirrored yuv matrix.
// To swap the UV use NV12 instead of NV21.LIBYUV_API
LIBYUV_API
int NV12ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
@ -1998,6 +1999,54 @@ int NV21ToRAW(const uint8_t* src_y,
dst_stride_raw, &kYvuI601Constants, width, height);
}
// Convert NV21 to YUV24
int NV21ToYUV24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_yuv24,
int dst_stride_yuv24,
int width,
int height) {
int y;
void (*NV21ToYUV24Row)(const uint8_t* src_y, const uint8_t* src_vu,
uint8_t* dst_yuv24, int width) = NV21ToYUV24Row_C;
if (!src_y || !src_vu || !dst_yuv24 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_yuv24 = dst_yuv24 + (height - 1) * dst_stride_yuv24;
dst_stride_yuv24 = -dst_stride_yuv24;
}
#if defined(HAS_NV21TOYUV24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
NV21ToYUV24Row = NV21ToYUV24Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
NV21ToYUV24Row = NV21ToYUV24Row_NEON;
}
}
#endif
#if defined(HAS_NV21TOYUV24ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
NV21ToYUV24Row = NV21ToYUV24Row_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
NV21ToYUV24Row = NV21ToYUV24Row_AVX2;
}
}
#endif
for (y = 0; y < height; ++y) {
NV21ToYUV24Row(src_y, src_vu, dst_yuv24, width);
dst_yuv24 += dst_stride_yuv24;
src_y += src_stride_y;
if (y & 1) {
src_vu += src_stride_vu;
}
}
return 0;
}
// Convert M420 to ARGB.
LIBYUV_API
int M420ToARGB(const uint8_t* src_m420,

View File

@ -572,6 +572,326 @@ int ARGBToNV21(const uint8_t* src_argb,
return 0;
}
LIBYUV_API
int ABGRToNV12(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
int y;
int halfwidth = (width + 1) >> 1;
void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ABGRToUVRow_C;
void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) =
ABGRToYRow_C;
void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_uv, int width) = MergeUVRow_C;
if (!src_abgr || !dst_y || !dst_uv || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
src_stride_abgr = -src_stride_abgr;
}
#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
ABGRToYRow = ABGRToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_SSSE3;
ABGRToYRow = ABGRToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ABGRTOYROW_AVX2) && defined(HAS_ABGRTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ABGRToUVRow = ABGRToUVRow_Any_AVX2;
ABGRToYRow = ABGRToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ABGRToUVRow = ABGRToUVRow_AVX2;
ABGRToYRow = ABGRToYRow_AVX2;
}
}
#endif
#if defined(HAS_ABGRTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToYRow = ABGRToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_NEON;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToUVRow = ABGRToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_NEON;
}
}
#endif
#if defined(HAS_ABGRTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ABGRToYRow = ABGRToYRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ABGRToYRow = ABGRToYRow_MSA;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ABGRToUVRow = ABGRToUVRow_Any_MSA;
if (IS_ALIGNED(width, 32)) {
ABGRToUVRow = ABGRToUVRow_MSA;
}
}
#endif
#if defined(HAS_ABGRTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ABGRToYRow = ABGRToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_MMI;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ABGRToUVRow = ABGRToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_MMI;
}
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_NEON;
}
}
#endif
#if defined(HAS_MERGEUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MergeUVRow_ = MergeUVRow_Any_MSA;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_MSA;
}
}
#endif
#if defined(HAS_MERGEUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MergeUVRow_ = MergeUVRow_Any_MMI;
if (IS_ALIGNED(halfwidth, 8)) {
MergeUVRow_ = MergeUVRow_MMI;
}
}
#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width);
MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
ABGRToYRow(src_abgr, dst_y, width);
ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width);
src_abgr += src_stride_abgr * 2;
dst_y += dst_stride_y * 2;
dst_uv += dst_stride_uv;
}
if (height & 1) {
ABGRToUVRow(src_abgr, 0, row_u, row_v, width);
MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
ABGRToYRow(src_abgr, dst_y, width);
}
free_aligned_buffer_64(row_u);
}
return 0;
}
// Same as NV12 but U and V swapped.
LIBYUV_API
int ABGRToNV21(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height) {
int y;
int halfwidth = (width + 1) >> 1;
void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr,
uint8_t* dst_u, uint8_t* dst_v, int width) =
ABGRToUVRow_C;
void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) =
ABGRToYRow_C;
void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_vu, int width) = MergeUVRow_C;
if (!src_abgr || !dst_y || !dst_vu || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
src_stride_abgr = -src_stride_abgr;
}
#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
ABGRToYRow = ABGRToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_SSSE3;
ABGRToYRow = ABGRToYRow_SSSE3;
}
}
#endif
#if defined(HAS_ABGRTOYROW_AVX2) && defined(HAS_ABGRTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ABGRToUVRow = ABGRToUVRow_Any_AVX2;
ABGRToYRow = ABGRToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ABGRToUVRow = ABGRToUVRow_AVX2;
ABGRToYRow = ABGRToYRow_AVX2;
}
}
#endif
#if defined(HAS_ABGRTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToYRow = ABGRToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_NEON;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToUVRow = ABGRToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_NEON;
}
}
#endif
#if defined(HAS_ABGRTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ABGRToYRow = ABGRToYRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ABGRToYRow = ABGRToYRow_MSA;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ABGRToUVRow = ABGRToUVRow_Any_MSA;
if (IS_ALIGNED(width, 32)) {
ABGRToUVRow = ABGRToUVRow_MSA;
}
}
#endif
#if defined(HAS_ABGRTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ABGRToYRow = ABGRToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_MMI;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ABGRToUVRow = ABGRToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_MMI;
}
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUVRow_ = MergeUVRow_AVX2;
}
}
#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_NEON;
}
}
#endif
#if defined(HAS_MERGEUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MergeUVRow_ = MergeUVRow_Any_MSA;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_MSA;
}
}
#endif
#if defined(HAS_MERGEUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MergeUVRow_ = MergeUVRow_Any_MMI;
if (IS_ALIGNED(halfwidth, 8)) {
MergeUVRow_ = MergeUVRow_MMI;
}
}
#endif
{
// Allocate a rows of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width);
MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
ABGRToYRow(src_abgr, dst_y, width);
ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width);
src_abgr += src_stride_abgr * 2;
dst_y += dst_stride_y * 2;
dst_vu += dst_stride_vu;
}
if (height & 1) {
ABGRToUVRow(src_abgr, 0, row_u, row_v, width);
MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
ABGRToYRow(src_abgr, dst_y, width);
}
free_aligned_buffer_64(row_u);
}
return 0;
}
// Convert ARGB to YUY2.
LIBYUV_API
int ARGBToYUY2(const uint8_t* src_argb,

View File

@ -25,7 +25,8 @@
#endif
#endif
struct FILE; // For jpeglib.h.
#include <stdio.h> // For jpeglib.h.
// C++ build requires extern C for jpeg internals.
#ifdef __cplusplus
@ -427,7 +428,15 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
}
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
cinfo->src->next_input_byte += num_bytes;
jpeg_source_mgr* src = cinfo->src;
size_t bytes = static_cast<size_t>(num_bytes);
if (bytes > src->bytes_in_buffer) {
src->next_input_byte = nullptr;
src->bytes_in_buffer = 0;
} else {
src->next_input_byte += bytes;
src->bytes_in_buffer -= bytes;
}
}
void term_source(j_decompress_ptr cinfo) {

View File

@ -47,7 +47,8 @@ LIBYUV_BOOL ValidateJpeg(const uint8_t* src_mjpg, size_t src_size_mjpg) {
// ERROR: Invalid jpeg size: src_size_mjpg
return LIBYUV_FALSE;
}
if (src_mjpg[0] != 0xff || src_mjpg[1] != 0xd8) { // SOI marker
// SOI marker
if (src_mjpg[0] != 0xff || src_mjpg[1] != 0xd8 || src_mjpg[2] != 0xff) {
// ERROR: Invalid jpeg initial start code
return LIBYUV_FALSE;
}

View File

@ -440,7 +440,6 @@ void MergeUVPlane(const uint8_t* src_u,
int y;
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_uv, int width) = MergeUVRow_C;
// Coalesce rows.
// Negative height means invert the image.
if (height < 0) {
height = -height;
@ -504,6 +503,87 @@ void MergeUVPlane(const uint8_t* src_u,
}
}
// Swap U and V channels in interleaved UV plane.
LIBYUV_API
void SwapUVPlane(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height) {
int y;
void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
SwapUVRow_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_uv = src_uv + (height - 1) * src_stride_uv;
src_stride_uv = -src_stride_uv;
}
// Coalesce rows.
if (src_stride_uv == width * 2 && dst_stride_vu == width * 2) {
width *= height;
height = 1;
src_stride_uv = dst_stride_vu = 0;
}
#if defined(HAS_SWAPUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
SwapUVRow = SwapUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
SwapUVRow = SwapUVRow_SSSE3;
}
}
#endif
#if defined(HAS_SWAPUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
SwapUVRow = SwapUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
SwapUVRow = SwapUVRow_AVX2;
}
}
#endif
#if defined(HAS_SWAPUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
SwapUVRow = SwapUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
SwapUVRow = SwapUVRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
SwapUVRow(src_uv, dst_vu, width);
src_uv += src_stride_uv;
dst_vu += dst_stride_vu;
}
}
// Convert NV21 to NV12.
LIBYUV_API
int NV21ToNV12(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_vu || !dst_uv || width <= 0 || height == 0) {
return -1;
}
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
SwapUVPlane(src_vu, src_stride_vu, dst_uv, dst_stride_uv, halfwidth,
halfheight);
return 0;
}
// Support function for NV12 etc RGB channels.
// Width and height are plane sizes (typically half pixel width).
LIBYUV_API

View File

@ -481,6 +481,66 @@ int I420Rotate(const uint8_t* src_y,
return -1;
}
LIBYUV_API
int I444Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
enum libyuv::RotationMode mode) {
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
!dst_u || !dst_v) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
switch (mode) {
case libyuv::kRotate0:
// copy frame
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case libyuv::kRotate90:
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case libyuv::kRotate270:
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case libyuv::kRotate180:
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
default:
break;
}
return -1;
}
LIBYUV_API
int NV12ToI420Rotate(const uint8_t* src_y,
int src_stride_y,

View File

@ -286,7 +286,12 @@ ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15)
#ifdef HAS_MERGEUVROW_MMI
ANY21(MergeUVRow_Any_MMI, MergeUVRow_MMI, 0, 1, 1, 2, 7)
#endif
#ifdef HAS_NV21TOYUV24ROW_NEON
ANY21(NV21ToYUV24Row_Any_NEON, NV21ToYUV24Row_NEON, 1, 1, 2, 3, 15)
#endif
#ifdef HAS_NV21TOYUV24ROW_AVX2
ANY21(NV21ToYUV24Row_Any_AVX2, NV21ToYUV24Row_AVX2, 1, 1, 2, 3, 31)
#endif
// Math functions.
#ifdef HAS_ARGBMULTIPLYROW_SSE2
ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
@ -575,6 +580,9 @@ ANY11(RAWToRGB24Row_Any_MMI, RAWToRGB24Row_MMI, 0, 3, 3, 3)
#ifdef HAS_ARGBTOYROW_AVX2
ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
#endif
#ifdef HAS_ABGRTOYROW_AVX2
ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31)
#endif
#ifdef HAS_ARGBTOYJROW_AVX2
ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
#endif
@ -702,6 +710,18 @@ ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
#ifdef HAS_UYVYTOYROW_MMI
ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15)
#endif
#ifdef HAS_AYUVTOYROW_NEON
ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15)
#endif
#ifdef HAS_SWAPUVROW_SSSE3
ANY11(SwapUVRow_Any_SSSE3, SwapUVRow_SSSE3, 0, 2, 2, 15)
#endif
#ifdef HAS_SWAPUVROW_AVX2
ANY11(SwapUVRow_Any_AVX2, SwapUVRow_AVX2, 0, 2, 2, 31)
#endif
#ifdef HAS_SWAPUVROW_NEON
ANY11(SwapUVRow_Any_NEON, SwapUVRow_NEON, 0, 2, 2, 15)
#endif
#ifdef HAS_RGB24TOARGBROW_NEON
ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
#endif
@ -1256,6 +1276,9 @@ ANY13(SplitRGBRow_Any_MMI, SplitRGBRow_MMI, 3, 3)
#ifdef HAS_ARGBTOUVROW_AVX2
ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
#endif
#ifdef HAS_ABGRTOUVROW_AVX2
ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31)
#endif
#ifdef HAS_ARGBTOUVJROW_AVX2
ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
#endif
@ -1381,6 +1404,37 @@ ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15)
#endif
#undef ANY12S
// Any 1 to 1 with source stride (2 rows of source). Outputs UV plane.
// 128 byte row allows for 32 avx ARGB pixels.
#define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_vu, \
int width) { \
SIMD_ALIGNED(uint8_t temp[128 * 3]); \
memset(temp, 0, 128 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, src_stride_ptr, dst_vu, n); \
} \
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
SS(r, UVSHIFT) * BPP); \
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
BPP); \
memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
} \
ANY_SIMD(temp, 128, temp + 256, MASK + 1); \
memcpy(dst_vu + (n >> 1) * 2, temp + 256, SS(r, 1) * 2); \
}
#ifdef HAS_AYUVTOVUROW_NEON
ANY11S(AYUVToUVRow_Any_NEON, AYUVToUVRow_NEON, 0, 4, 15)
ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
#endif
#undef ANY11S
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -3231,6 +3231,107 @@ void GaussCol_C(const uint16_t* src0,
}
}
// Convert biplanar NV21 to packed YUV24
void NV21ToYUV24Row_C(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_yuv24,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
dst_yuv24[0] = src_vu[0]; // V
dst_yuv24[1] = src_vu[1]; // U
dst_yuv24[2] = src_y[0]; // Y0
dst_yuv24[3] = src_vu[0]; // V
dst_yuv24[4] = src_vu[1]; // U
dst_yuv24[5] = src_y[1]; // Y1
src_y += 2;
src_vu += 2;
dst_yuv24 += 6; // Advance 2 pixels.
}
if (width & 1) {
dst_yuv24[0] = src_vu[0]; // V
dst_yuv24[1] = src_vu[1]; // U
dst_yuv24[2] = src_y[0]; // Y0
}
}
// Filter 2 rows of AYUV UV's (444) into UV (420).
void AYUVToUVRow_C(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_uv,
int width) {
// Output a row of UV values, filtering 2x2 rows of AYUV.
int x;
for (x = 0; x < width; x += 2) {
dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
src_ayuv[src_stride_ayuv + 5] + 2) >>
2;
dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
src_ayuv[src_stride_ayuv + 4] + 2) >>
2;
src_ayuv += 8;
dst_uv += 2;
}
if (width & 1) {
dst_uv[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
src_ayuv[src_stride_ayuv + 0] + 2) >>
2;
dst_uv[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
src_ayuv[src_stride_ayuv + 1] + 2) >>
2;
}
}
// Filter 2 rows of AYUV UV's (444) into VU (420).
void AYUVToVURow_C(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_vu,
int width) {
// Output a row of VU values, filtering 2x2 rows of AYUV.
int x;
for (x = 0; x < width; x += 2) {
dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
src_ayuv[src_stride_ayuv + 4] + 2) >>
2;
dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
src_ayuv[src_stride_ayuv + 5] + 2) >>
2;
src_ayuv += 8;
dst_vu += 2;
}
if (width & 1) {
dst_vu[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
src_ayuv[src_stride_ayuv + 0] + 2) >>
2;
dst_vu[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
src_ayuv[src_stride_ayuv + 1] + 2) >>
2;
}
}
// Copy row of AYUV Y's into Y
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
// Output a row of Y values.
int x;
for (x = 0; x < width; ++x) {
dst_y[x] = src_ayuv[2]; // v,u,y,a
src_ayuv += 4;
}
}
// Convert UV plane of NV12 to VU of NV21.
void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8_t u = src_uv[0];
uint8_t v = src_uv[1];
dst_vu[0] = v;
dst_vu[1] = u;
src_uv += 2;
dst_vu += 2;
}
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -1154,6 +1154,48 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
}
#endif // HAS_ARGBTOYROW_AVX2
#ifdef HAS_ABGRTOYROW_AVX2
// Convert 32 ABGR pixels (128 bytes) to 32 Y values.
void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
"vmovdqu %5,%%ymm6 \n"
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
"vmovdqu 0x20(%0),%%ymm1 \n"
"vmovdqu 0x40(%0),%%ymm2 \n"
"vmovdqu 0x60(%0),%%ymm3 \n"
"vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
"vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
"lea 0x80(%0),%0 \n"
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
"vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
"vpsrlw $0x7,%%ymm0,%%ymm0 \n"
"vpsrlw $0x7,%%ymm2,%%ymm2 \n"
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
"vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
"vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y
"vmovdqu %%ymm0,(%1) \n"
"lea 0x20(%1),%1 \n"
"sub $0x20,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_abgr), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
: "m"(kABGRToY), // %3
"m"(kAddY16), // %4
"m"(kPermdARGBToY_AVX) // %5
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
#endif // HAS_ABGRTOYROW_AVX2
#ifdef HAS_ARGBTOYJROW_AVX2
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
@ -1328,6 +1370,69 @@ void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
}
#endif // HAS_ARGBTOUVROW_AVX2
#ifdef HAS_ABGRTOUVROW_AVX2
void ABGRToUVRow_AVX2(const uint8_t* src_abgr0,
int src_stride_abgr,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
asm volatile(
"vbroadcastf128 %5,%%ymm5 \n"
"vbroadcastf128 %6,%%ymm6 \n"
"vbroadcastf128 %7,%%ymm7 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
"vmovdqu 0x20(%0),%%ymm1 \n"
"vmovdqu 0x40(%0),%%ymm2 \n"
"vmovdqu 0x60(%0),%%ymm3 \n"
"vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
"vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
"vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n"
"vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n"
"lea 0x80(%0),%0 \n"
"vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
"vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
"vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
"vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
"vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
"vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
"vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
"vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
"vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
"vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n"
"vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
"vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
"vpsraw $0x8,%%ymm1,%%ymm1 \n"
"vpsraw $0x8,%%ymm0,%%ymm0 \n"
"vpacksswb %%ymm0,%%ymm1,%%ymm0 \n"
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"vpshufb %8,%%ymm0,%%ymm0 \n"
"vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
"vextractf128 $0x0,%%ymm0,(%1) \n"
"vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n"
"lea 0x10(%1),%1 \n"
"sub $0x20,%3 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_abgr0), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+rm"(width) // %3
: "r"((intptr_t)(src_stride_abgr)), // %4
"m"(kAddUV128), // %5
"m"(kABGRToV), // %6
"m"(kABGRToU), // %7
"m"(kShufARGBToUV_AVX) // %8
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7");
}
#endif // HAS_ABGRTOUVROW_AVX2
#ifdef HAS_ARGBTOUVJROW_AVX2
void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
int src_stride_argb,
@ -5238,7 +5343,7 @@ void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
,
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
);
);
}
#endif // HAS_ARGBMULTIPLYROW_AVX2
@ -6120,24 +6225,24 @@ void I422ToYUY2Row_SSE2(const uint8_t* src_y,
int width) {
asm volatile(
"sub %1,%2 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"movq (%1),%%xmm2 \n"
"movq 0x00(%1,%2,1),%%xmm1 \n"
"add $0x8,%1 \n"
"punpcklbw %%xmm1,%%xmm2 \n"
"movdqu (%0),%%xmm0 \n"
"add $0x10,%0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n"
"movdqu %%xmm0,(%3) \n"
"movdqu %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"jg 1b \n"
"1: \n"
"movq (%1),%%xmm2 \n"
"movq 0x00(%1,%2,1),%%xmm1 \n"
"add $0x8,%1 \n"
"punpcklbw %%xmm1,%%xmm2 \n"
"movdqu (%0),%%xmm0 \n"
"add $0x10,%0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n"
"movdqu %%xmm0,(%3) \n"
"movdqu %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"jg 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
@ -6156,24 +6261,24 @@ void I422ToUYVYRow_SSE2(const uint8_t* src_y,
int width) {
asm volatile(
"sub %1,%2 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"movq (%1),%%xmm2 \n"
"movq 0x00(%1,%2,1),%%xmm1 \n"
"add $0x8,%1 \n"
"punpcklbw %%xmm1,%%xmm2 \n"
"movdqu (%0),%%xmm0 \n"
"movdqa %%xmm2,%%xmm1 \n"
"add $0x10,%0 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
"movdqu %%xmm1,(%3) \n"
"movdqu %%xmm2,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"jg 1b \n"
"1: \n"
"movq (%1),%%xmm2 \n"
"movq 0x00(%1,%2,1),%%xmm1 \n"
"add $0x8,%1 \n"
"punpcklbw %%xmm1,%%xmm2 \n"
"movdqu (%0),%%xmm0 \n"
"movdqa %%xmm2,%%xmm1 \n"
"add $0x10,%0 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
"movdqu %%xmm1,(%3) \n"
"movdqu %%xmm2,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"jg 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
@ -6192,27 +6297,27 @@ void I422ToYUY2Row_AVX2(const uint8_t* src_y,
int width) {
asm volatile(
"sub %1,%2 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"vpmovzxbw (%1),%%ymm1 \n"
"vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
"add $0x10,%1 \n"
"vpsllw $0x8,%%ymm2,%%ymm2 \n"
"vpor %%ymm1,%%ymm2,%%ymm2 \n"
"vmovdqu (%0),%%ymm0 \n"
"add $0x20,%0 \n"
"vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n"
"vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n"
"vextractf128 $0x0,%%ymm1,(%3) \n"
"vextractf128 $0x0,%%ymm2,0x10(%3) \n"
"vextractf128 $0x1,%%ymm1,0x20(%3) \n"
"vextractf128 $0x1,%%ymm2,0x30(%3) \n"
"lea 0x40(%3),%3 \n"
"sub $0x20,%4 \n"
"jg 1b \n"
"vzeroupper \n"
"1: \n"
"vpmovzxbw (%1),%%ymm1 \n"
"vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
"add $0x10,%1 \n"
"vpsllw $0x8,%%ymm2,%%ymm2 \n"
"vpor %%ymm1,%%ymm2,%%ymm2 \n"
"vmovdqu (%0),%%ymm0 \n"
"add $0x20,%0 \n"
"vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n"
"vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n"
"vextractf128 $0x0,%%ymm1,(%3) \n"
"vextractf128 $0x0,%%ymm2,0x10(%3) \n"
"vextractf128 $0x1,%%ymm1,0x20(%3) \n"
"vextractf128 $0x1,%%ymm2,0x30(%3) \n"
"lea 0x40(%3),%3 \n"
"sub $0x20,%4 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
@ -6231,27 +6336,27 @@ void I422ToUYVYRow_AVX2(const uint8_t* src_y,
int width) {
asm volatile(
"sub %1,%2 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"vpmovzxbw (%1),%%ymm1 \n"
"vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
"add $0x10,%1 \n"
"vpsllw $0x8,%%ymm2,%%ymm2 \n"
"vpor %%ymm1,%%ymm2,%%ymm2 \n"
"vmovdqu (%0),%%ymm0 \n"
"add $0x20,%0 \n"
"vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n"
"vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n"
"vextractf128 $0x0,%%ymm1,(%3) \n"
"vextractf128 $0x0,%%ymm2,0x10(%3) \n"
"vextractf128 $0x1,%%ymm1,0x20(%3) \n"
"vextractf128 $0x1,%%ymm2,0x30(%3) \n"
"lea 0x40(%3),%3 \n"
"sub $0x20,%4 \n"
"jg 1b \n"
"vzeroupper \n"
"1: \n"
"vpmovzxbw (%1),%%ymm1 \n"
"vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
"add $0x10,%1 \n"
"vpsllw $0x8,%%ymm2,%%ymm2 \n"
"vpor %%ymm1,%%ymm2,%%ymm2 \n"
"vmovdqu (%0),%%ymm0 \n"
"add $0x20,%0 \n"
"vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n"
"vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n"
"vextractf128 $0x0,%%ymm1,(%3) \n"
"vextractf128 $0x0,%%ymm2,0x10(%3) \n"
"vextractf128 $0x1,%%ymm1,0x20(%3) \n"
"vextractf128 $0x1,%%ymm2,0x30(%3) \n"
"lea 0x40(%3),%3 \n"
"sub $0x20,%4 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
@ -6669,6 +6774,186 @@ void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb,
}
#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
#ifdef HAS_NV21TOYUV24ROW_AVX2
// begin NV21ToYUV24Row_C avx2 constants
static const ulvec8 kBLEND0 = {0x80, 0x00, 0x80, 0x80, 0x00, 0x80, 0x80, 0x00,
0x80, 0x80, 0x00, 0x80, 0x80, 0x00, 0x80, 0x80,
0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80,
0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00};
static const ulvec8 kBLEND1 = {0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00,
0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00,
0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00,
0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80};
static const ulvec8 kBLEND2 = {0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00,
0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80,
0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00,
0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00};
static const ulvec8 kSHUF0 = {0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02, 0x0d,
0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80, 0x05,
0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02, 0x0d,
0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80, 0x05};
static const ulvec8 kSHUF1 = {0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02,
0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80,
0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02,
0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80};
static const ulvec8 kSHUF2 = {0x0a, 0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80,
0x02, 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f,
0x0a, 0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80,
0x02, 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f};
static const ulvec8 kSHUF3 = {0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80, 0x80,
0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a, 0x80,
0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80, 0x80,
0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a, 0x80};
static const ulvec8 kSHUF4 = {0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80,
0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a,
0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80,
0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a};
static const ulvec8 kSHUF5 = {0x80, 0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07,
0x80, 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80,
0x80, 0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07,
0x80, 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80};
// NV21ToYUV24Row_AVX2
void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_yuv24,
int width) {
uint8_t* src_y_ptr;
uint64_t src_offset = 0;
uint64_t width64;
width64 = width;
src_y_ptr = (uint8_t*)src_y;
asm volatile(
"vmovdqu %5, %%ymm0 \n" // init blend value
"vmovdqu %6, %%ymm1 \n" // init blend value
"vmovdqu %7, %%ymm2 \n" // init blend value
// "sub $0x20, %3 \n" //sub 32 from width for final loop
LABELALIGN
"1: \n" // label 1
"vmovdqu (%0,%4), %%ymm3 \n" // src_y
"vmovdqu 1(%1,%4), %%ymm4 \n" // src_uv+1
"vmovdqu (%1), %%ymm5 \n" // src_uv
"vpshufb %8, %%ymm3, %%ymm13 \n" // y, kSHUF0 for shuf
"vpshufb %9, %%ymm4, %%ymm14 \n" // uv+1, kSHUF1 for
// shuf
"vpshufb %10, %%ymm5, %%ymm15 \n" // uv, kSHUF2 for
// shuf
"vpshufb %11, %%ymm3, %%ymm3 \n" // y kSHUF3 for shuf
"vpshufb %12, %%ymm4, %%ymm4 \n" // uv+1 kSHUF4 for
// shuf
"vpblendvb %%ymm0, %%ymm14, %%ymm13, %%ymm12 \n" // blend 0
"vpblendvb %%ymm0, %%ymm13, %%ymm14, %%ymm14 \n" // blend 0
"vpblendvb %%ymm2, %%ymm15, %%ymm12, %%ymm12 \n" // blend 2
"vpblendvb %%ymm1, %%ymm15, %%ymm14, %%ymm13 \n" // blend 1
"vpshufb %13, %%ymm5, %%ymm15 \n" // shuffle const
"vpor %%ymm4, %%ymm3, %%ymm5 \n" // get results
"vmovdqu %%ymm12, 0x20(%2) \n" // store dst_yuv+20h
"vpor %%ymm15, %%ymm5, %%ymm3 \n" // get results
"add $0x20, %4 \n" // add to src buffer
// ptr
"vinserti128 $0x1, %%xmm3, %%ymm13, %%ymm4 \n" // insert
"vperm2i128 $0x31, %%ymm13, %%ymm3, %%ymm5 \n" // insert
"vmovdqu %%ymm4, (%2) \n" // store dst_yuv
"vmovdqu %%ymm5, 0x40(%2) \n" // store dst_yuv+40h
"add $0x60,%2 \n" // add to dst buffer
// ptr
// "cmp %3, %4 \n" //(width64 -
// 32 bytes) and src_offset
"sub $0x20,%3 \n" // 32 pixels per loop
"jg 1b \n"
"vzeroupper \n" // sse-avx2
// transistions
: "+r"(src_y), //%0
"+r"(src_vu), //%1
"+r"(dst_yuv24), //%2
"+r"(width64), //%3
"+r"(src_offset) //%4
: "m"(kBLEND0), //%5
"m"(kBLEND1), //%6
"m"(kBLEND2), //%7
"m"(kSHUF0), //%8
"m"(kSHUF1), //%9
"m"(kSHUF2), //%10
"m"(kSHUF3), //%11
"m"(kSHUF4), //%12
"m"(kSHUF5) //%13
: "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm12",
"xmm13", "xmm14", "xmm15");
}
#endif // HAS_NV21TOYUV24ROW_AVX2
#ifdef HAS_SWAPUVROW_SSSE3
// Shuffle table for reversing the bytes.
static const uvec8 kShuffleUVToVU = {1u, 0u, 3u, 2u, 5u, 4u, 7u, 6u,
9u, 8u, 11u, 10u, 13u, 12u, 15u, 14u};
// Convert UV plane of NV12 to VU of NV21.
void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
asm volatile(
"movdqu %3,%%xmm5 \n"
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n"
"lea 0x20(%0),%0 \n"
"pshufb %%xmm5,%%xmm0 \n"
"pshufb %%xmm5,%%xmm1 \n"
"movdqu %%xmm0,(%1) \n"
"movdqu %%xmm1,0x10(%1) \n"
"lea 0x20(%1),%1 \n"
"sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_vu), // %1
"+r"(width) // %2
: "m"(kShuffleUVToVU) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm5");
}
#endif // HAS_SWAPUVROW_SSSE3
#ifdef HAS_SWAPUVROW_AVX2
void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm5 \n"
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
"vmovdqu 0x20(%0),%%ymm1 \n"
"lea 0x40(%0),%0 \n"
"vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
"vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
"vmovdqu %%ymm0,(%1) \n"
"vmovdqu %%ymm1,0x20(%1) \n"
"lea 0x40(%1),%1 \n"
"sub $0x20,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_uv), // %0
"+r"(dst_vu), // %1
"+r"(width) // %2
: "m"(kShuffleUVToVU) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm5");
}
#endif // HAS_SWAPUVROW_AVX2
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus

View File

@ -561,7 +561,7 @@ void SplitUVRow_NEON(const uint8_t* src_uv,
"+r"(width) // %3 // Output registers
: // Input registers
: "cc", "memory", "q0", "q1" // Clobber List
);
);
}
// Reads 16 U's and V's and writes out 16 pairs of UV.
@ -582,7 +582,7 @@ void MergeUVRow_NEON(const uint8_t* src_u,
"+r"(width) // %3 // Output registers
: // Input registers
: "cc", "memory", "q0", "q1" // Clobber List
);
);
}
// Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b.
@ -607,7 +607,7 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb,
"+r"(width) // %4
: // Input registers
: "cc", "memory", "d0", "d1", "d2" // Clobber List
);
);
}
// Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time
@ -632,7 +632,7 @@ void MergeRGBRow_NEON(const uint8_t* src_r,
"+r"(width) // %4
: // Input registers
: "cc", "memory", "q0", "q1", "q2" // Clobber List
);
);
}
// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15.
@ -648,7 +648,7 @@ void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
"+r"(width) // %2 // Output registers
: // Input registers
: "cc", "memory", "q0", "q1" // Clobber List
);
);
}
// SetRow writes 'width' bytes using an 8 bit value repeated.
@ -761,7 +761,7 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
"+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
);
);
}
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
@ -778,7 +778,7 @@ void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
"+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
);
);
}
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
@ -795,7 +795,7 @@ void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
"+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3" // Clobber List
);
);
}
#define RGB565TOARGB \
@ -826,7 +826,7 @@ void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
);
);
}
#define ARGB1555TOARGB \
@ -872,7 +872,7 @@ void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
);
);
}
#define ARGB4444TOARGB \
@ -901,7 +901,7 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2" // Clobber List
);
);
}
void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
@ -919,7 +919,7 @@ void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
"+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
);
);
}
void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
@ -935,7 +935,7 @@ void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
"+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
);
);
}
void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
@ -950,7 +950,7 @@ void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1" // Clobber List
);
);
}
void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
@ -965,7 +965,7 @@ void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1" // Clobber List
);
);
}
void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
@ -985,7 +985,7 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
"+r"(width) // %3
:
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
);
);
}
void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
@ -1005,7 +1005,7 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
"+r"(width) // %3
:
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
);
);
}
void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
@ -1032,7 +1032,7 @@ void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6",
"d7" // Clobber List
);
);
}
void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
@ -1059,7 +1059,7 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6",
"d7" // Clobber List
);
);
}
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
@ -1081,7 +1081,7 @@ void ARGBShuffleRow_NEON(const uint8_t* src_argb,
"+r"(width) // %2
: "r"(shuffler) // %3
: "cc", "memory", "q0", "q1", "q2" // Clobber List
);
);
}
void I422ToYUY2Row_NEON(const uint8_t* src_y,
@ -1241,7 +1241,7 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
);
);
}
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
@ -2564,7 +2564,7 @@ void SobelXRow_NEON(const uint8_t* src_y0,
: "r"(2), // %5
"r"(6) // %6
: "cc", "memory", "q0", "q1" // Clobber List
);
);
}
// SobelY as a matrix is
@ -2601,7 +2601,7 @@ void SobelYRow_NEON(const uint8_t* src_y0,
: "r"(1), // %4
"r"(6) // %5
: "cc", "memory", "q0", "q1" // Clobber List
);
);
}
// %y passes a float as a scalar vector for vector * scalar multiply.
@ -2685,6 +2685,205 @@ void ByteToFloatRow_NEON(const uint8_t* src,
: "cc", "memory", "q1", "q2", "q3");
}
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
void GaussCol_NEON(const uint16_t* src0,
const uint16_t* src1,
const uint16_t* src2,
const uint16_t* src3,
const uint16_t* src4,
uint32_t* dst,
int width) {
asm volatile(
"vmov.u16 d6, #4 \n" // constant 4
"vmov.u16 d7, #6 \n" // constant 6
"1: \n"
"vld1.16 {q1}, [%0]! \n" // load 8 samples, 5 rows
"vld1.16 {q2}, [%4]! \n"
"vaddl.u16 q0, d2, d4 \n" // * 1
"vaddl.u16 q1, d3, d5 \n" // * 1
"vld1.16 {q2}, [%1]! \n"
"vmlal.u16 q0, d4, d6 \n" // * 4
"vmlal.u16 q1, d5, d6 \n" // * 4
"vld1.16 {q2}, [%2]! \n"
"vmlal.u16 q0, d4, d7 \n" // * 6
"vmlal.u16 q1, d5, d7 \n" // * 6
"vld1.16 {q2}, [%3]! \n"
"vmlal.u16 q0, d4, d6 \n" // * 4
"vmlal.u16 q1, d5, d6 \n" // * 4
"subs %6, %6, #8 \n" // 8 processed per loop
"vst1.32 {q0, q1}, [%5]! \n" // store 8 samples
"bgt 1b \n"
: "+r"(src0), // %0
"+r"(src1), // %1
"+r"(src2), // %2
"+r"(src3), // %3
"+r"(src4), // %4
"+r"(dst), // %5
"+r"(width) // %6
:
: "cc", "memory", "q0", "q1", "q2", "q3");
}
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) {
const uint32_t* src1 = src + 1;
const uint32_t* src2 = src + 2;
const uint32_t* src3 = src + 3;
asm volatile(
"vmov.u32 q10, #4 \n" // constant 4
"vmov.u32 q11, #6 \n" // constant 6
"1: \n"
"vld1.32 {q0, q1}, [%0]! \n" // load 12 source samples
"vld1.32 {q2}, [%0] \n"
"vadd.u32 q0, q0, q1 \n" // * 1
"vadd.u32 q1, q1, q2 \n" // * 1
"vld1.32 {q2, q3}, [%2]! \n"
"vmla.u32 q0, q2, q11 \n" // * 6
"vmla.u32 q1, q3, q11 \n" // * 6
"vld1.32 {q2, q3}, [%1]! \n"
"vld1.32 {q8, q9}, [%3]! \n"
"vadd.u32 q2, q2, q8 \n" // add rows for * 4
"vadd.u32 q3, q3, q9 \n"
"vmla.u32 q0, q2, q10 \n" // * 4
"vmla.u32 q1, q3, q10 \n" // * 4
"subs %5, %5, #8 \n" // 8 processed per loop
"vqshrn.u32 d0, q0, #8 \n" // round and pack
"vqshrn.u32 d1, q1, #8 \n"
"vst1.u16 {q0}, [%4]! \n" // store 8 samples
"bgt 1b \n"
: "+r"(src), // %0
"+r"(src1), // %1
"+r"(src2), // %2
"+r"(src3), // %3
"+r"(dst), // %4
"+r"(width) // %5
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
}
// Convert biplanar NV21 to packed YUV24
void NV21ToYUV24Row_NEON(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_yuv24,
int width) {
asm volatile(
"1: \n"
"vld1.8 {q2}, [%0]! \n" // load 16 Y values
"vld2.8 {d0, d2}, [%1]! \n" // load 8 VU values
"vmov d1, d0 \n"
"vzip.u8 d0, d1 \n" // VV
"vmov d3, d2 \n"
"vzip.u8 d2, d3 \n" // UU
"subs %3, %3, #16 \n" // 16 pixels per loop
"vst3.8 {d0, d2, d4}, [%2]! \n" // store 16 YUV pixels
"vst3.8 {d1, d3, d5}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_vu), // %1
"+r"(dst_yuv24), // %2
"+r"(width) // %3
:
: "cc", "memory", "q0", "q1", "q2");
}
void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_uv,
int width) {
asm volatile(
"add %1, %0, %1 \n" // src_stride + src_AYUV
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels.
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV
// pixels.
"vpaddl.u8 q0, q0 \n" // V 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // U 16 bytes -> 8 shorts.
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more AYUV
// pixels.
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 AYUV
// pixels.
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
"vqrshrun.s16 d1, q0, #2 \n" // 2x2 average
"vqrshrun.s16 d0, q1, #2 \n"
"subs %3, %3, #16 \n" // 16 processed per loop.
"vst2.8 {d0, d1}, [%2]! \n" // store 8 pixels UV.
"bgt 1b \n"
: "+r"(src_ayuv), // %0
"+r"(src_stride_ayuv), // %1
"+r"(dst_uv), // %2
"+r"(width) // %3
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7");
}
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_vu,
int width) {
asm volatile(
"add %1, %0, %1 \n" // src_stride + src_AYUV
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels.
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV
// pixels.
"vpaddl.u8 q0, q0 \n" // V 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // U 16 bytes -> 8 shorts.
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more AYUV
// pixels.
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 AYUV
// pixels.
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
"vqrshrun.s16 d0, q0, #2 \n" // 2x2 average
"vqrshrun.s16 d1, q1, #2 \n"
"subs %3, %3, #16 \n" // 16 processed per loop.
"vst2.8 {d0, d1}, [%2]! \n" // store 8 pixels VU.
"bgt 1b \n"
: "+r"(src_ayuv), // %0
"+r"(src_stride_ayuv), // %1
"+r"(dst_vu), // %2
"+r"(width) // %3
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7");
}
// Copy row of AYUV Y's into Y.
// Similar to ARGBExtractAlphaRow_NEON
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
asm volatile(
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV pixels
"subs %2, %2, #16 \n" // 16 processed per loop
"vst1.8 {q2}, [%1]! \n" // store 16 Y's.
"bgt 1b \n"
: "+r"(src_ayuv), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3");
}
// Convert UV plane of NV12 to VU of NV21.
void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
asm volatile(
"1: \n"
"vld2.8 {d0, d2}, [%0]! \n" // load 16 UV values
"vld2.8 {d1, d3}, [%0]! \n"
"vorr.u8 q2, q0, q0 \n" // move U after V
"subs %2, %2, #16 \n" // 16 pixels per loop
"vst2.8 {q1, q2}, [%1]! \n" // store 16 VU pixels
"bgt 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_vu), // %1
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2");
}
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)..
#ifdef __cplusplus

View File

@ -608,7 +608,7 @@ void SplitUVRow_NEON(const uint8_t* src_uv,
"+r"(width) // %3 // Output registers
: // Input registers
: "cc", "memory", "v0", "v1" // Clobber List
);
);
}
// Reads 16 U's and V's and writes out 16 pairs of UV.
@ -629,7 +629,7 @@ void MergeUVRow_NEON(const uint8_t* src_u,
"+r"(width) // %3 // Output registers
: // Input registers
: "cc", "memory", "v0", "v1" // Clobber List
);
);
}
// Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b.
@ -653,7 +653,7 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb,
"+r"(width) // %4
: // Input registers
: "cc", "memory", "v0", "v1", "v2" // Clobber List
);
);
}
// Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time
@ -677,7 +677,7 @@ void MergeRGBRow_NEON(const uint8_t* src_r,
"+r"(width) // %4
: // Input registers
: "cc", "memory", "v0", "v1", "v2" // Clobber List
);
);
}
// Copy multiple of 32.
@ -693,7 +693,7 @@ void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
"+r"(width) // %2 // Output registers
: // Input registers
: "cc", "memory", "v0", "v1" // Clobber List
);
);
}
// SetRow writes 'width' bytes using an 8 bit value repeated.
@ -800,7 +800,7 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
"+r"(width) // %2
:
: "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
);
);
}
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
@ -818,7 +818,7 @@ void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List
);
);
}
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
@ -835,7 +835,7 @@ void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List
);
);
}
#define RGB565TOARGB \
@ -867,7 +867,7 @@ void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6" // Clobber List
);
);
}
#define ARGB1555TOARGB \
@ -924,7 +924,7 @@ void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
);
}
#define ARGB4444TOARGB \
@ -955,7 +955,7 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List
);
);
}
void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
@ -973,7 +973,7 @@ void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
"+r"(width) // %2
:
: "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
);
);
}
void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
@ -990,7 +990,7 @@ void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
"+r"(width) // %2
:
: "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List
);
);
}
void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
@ -1005,7 +1005,7 @@ void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1" // Clobber List
);
);
}
void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
@ -1020,7 +1020,7 @@ void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1" // Clobber List
);
);
}
void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
@ -1040,7 +1040,7 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
"+r"(width) // %3
:
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
);
}
void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
@ -1060,7 +1060,7 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
"+r"(width) // %3
:
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
);
}
void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
@ -1087,7 +1087,7 @@ void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
"v7" // Clobber List
);
);
}
void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
@ -1114,7 +1114,7 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
"v7" // Clobber List
);
);
}
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
@ -1135,7 +1135,7 @@ void ARGBShuffleRow_NEON(const uint8_t* src_argb,
"+r"(width) // %2
: "r"(shuffler) // %3
: "cc", "memory", "v0", "v1", "v2" // Clobber List
);
);
}
void I422ToYUY2Row_NEON(const uint8_t* src_y,
@ -1298,7 +1298,7 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
);
}
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
@ -1863,7 +1863,7 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27",
"v28"
);
);
}
void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
@ -2611,7 +2611,7 @@ void SobelXRow_NEON(const uint8_t* src_y0,
: "r"(2LL), // %5
"r"(6LL) // %6
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
);
}
// SobelY as a matrix is
@ -2648,7 +2648,7 @@ void SobelYRow_NEON(const uint8_t* src_y0,
: "r"(1LL), // %4
"r"(6LL) // %5
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
);
);
}
// Caveat - rounds float to half float whereas scaling version truncates.
@ -2876,6 +2876,115 @@ void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
}
// Convert biplanar NV21 to packed YUV24
void NV21ToYUV24Row_NEON(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_yuv24,
int width) {
asm volatile(
"1: \n"
"ld1 {v2.16b}, [%0], #16 \n" // load 16 Y values
"ld2 {v0.8b, v1.8b}, [%1], #16 \n" // load 8 VU values
"zip1 v0.16b, v0.16b, v0.16b \n" // replicate V values
"zip1 v1.16b, v1.16b, v1.16b \n" // replicate U values
"subs %w3, %w3, #16 \n" // 16 pixels per loop
"st3 {v0.16b,v1.16b,v2.16b}, [%2], #48 \n" // store 16 YUV pixels
"b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_vu), // %1
"+r"(dst_yuv24), // %2
"+r"(width) // %3
:
: "cc", "memory", "v0", "v1", "v2");
}
void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_uv,
int width) {
const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv;
asm volatile(
"1: \n"
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 ayuv
"uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts.
"uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts.
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
"uadalp v0.8h, v4.16b \n" // V 16 bytes -> 8 shorts.
"uadalp v1.8h, v5.16b \n" // U 16 bytes -> 8 shorts.
"uqrshrn v3.8b, v0.8h, #2 \n" // 2x2 average
"uqrshrn v2.8b, v1.8h, #2 \n"
"subs %w3, %w3, #16 \n" // 16 processed per loop.
"st2 {v2.8b,v3.8b}, [%2], #16 \n" // store 8 pixels UV.
"b.gt 1b \n"
: "+r"(src_ayuv), // %0
"+r"(src_ayuv_1), // %1
"+r"(dst_uv), // %2
"+r"(width) // %3
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
}
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
int src_stride_ayuv,
uint8_t* dst_vu,
int width) {
const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv;
asm volatile(
"1: \n"
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
// pixels.
"uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts.
"uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts.
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
"uadalp v0.8h, v4.16b \n" // V 16 bytes -> 8 shorts.
"uadalp v1.8h, v5.16b \n" // U 16 bytes -> 8 shorts.
"uqrshrn v0.8b, v0.8h, #2 \n" // 2x2 average
"uqrshrn v1.8b, v1.8h, #2 \n"
"subs %w3, %w3, #16 \n" // 16 processed per loop.
"st2 {v0.8b,v1.8b}, [%2], #16 \n" // store 8 pixels VU.
"b.gt 1b \n"
: "+r"(src_ayuv), // %0
"+r"(src_ayuv_1), // %1
"+r"(dst_vu), // %2
"+r"(width) // %3
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
}
// Copy row of AYUV Y's into Y
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
asm volatile(
"1: \n"
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
// pixels
"subs %w2, %w2, #16 \n" // 16 pixels per loop
"st1 {v2.16b}, [%1], #16 \n" // store 16 Y pixels
"b.gt 1b \n"
: "+r"(src_ayuv), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3");
}
// Convert UV plane of NV12 to VU of NV21.
void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
asm volatile(
"1: \n"
"ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 UV values
"orr v2.16b, v0.16b, v0.16b \n" // move U after V
"subs %w2, %w2, #16 \n" // 16 pixels per loop
"st2 {v1.16b, v2.16b}, [%1], #32 \n" // store 16 VU pixels
"b.gt 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_vu), // %1
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2");
}
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus

View File

@ -1594,9 +1594,9 @@ __declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // width
vbroadcastf128 ymm5, xmmword ptr kAddUV128
vbroadcastf128 ymm6, xmmword ptr kARGBToV
vbroadcastf128 ymm7, xmmword ptr kARGBToU
vbroadcastf128 ymm5, xmmword ptr kAddUVJ128
vbroadcastf128 ymm6, xmmword ptr kARGBToVJ
vbroadcastf128 ymm7, xmmword ptr kARGBToUJ
sub edi, edx // stride from u to v
convertloop:
@ -4222,7 +4222,7 @@ __declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
add ecx, 4 - 1
jl convertloop1b
// 1 pixel loop.
// 1 pixel loop.
convertloop1:
movd xmm3, [eax] // src argb
lea eax, [eax + 4]
@ -5360,7 +5360,7 @@ void CumulativeSumToAverageRow_SSE2(const int32_t* topleft,
add ecx, 4 - 1
jl l1b
// 1 pixel loop
// 1 pixel loop
l1:
movdqu xmm0, [eax]
psubd xmm0, [eax + edx * 4]
@ -5448,9 +5448,9 @@ void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
add ecx, 4 - 1
jl l1b
// 1 pixel loop
// 1 pixel loop
l1:
movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes.
movd xmm2, dword ptr [eax] // 1 argb pixel
lea eax, [eax + 4]
punpcklbw xmm2, xmm1
punpcklwd xmm2, xmm1
@ -5534,7 +5534,7 @@ __declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8_t* src_argb,
add ecx, 4 - 1
jl l1b
// 1 pixel loop
// 1 pixel loop
l1:
cvttps2dq xmm0, xmm2 // x, y float to int
packssdw xmm0, xmm0 // x, y as shorts

View File

@ -1788,6 +1788,75 @@ int I420Scale_16(const uint16_t* src_y,
return 0;
}
// Scale an I444 image.
// This function in turn calls a scaling function for each plane.
LIBYUV_API
int I444Scale(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering) {
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
dst_width, dst_height, filtering);
ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
dst_width, dst_height, filtering);
ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
dst_width, dst_height, filtering);
return 0;
}
LIBYUV_API
int I444Scale_16(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering) {
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
dst_width, dst_height, filtering);
ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
dst_width, dst_height, filtering);
ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
dst_width, dst_height, filtering);
return 0;
}
// Deprecated api
LIBYUV_API
int Scale(const uint8_t* src_y,

View File

@ -483,7 +483,7 @@ void ScaleRowDown34_SSSE3(const uint8_t* src_ptr,
: "m"(kShuf0), // %0
"m"(kShuf1), // %1
"m"(kShuf2) // %2
);
);
asm volatile(
LABELALIGN
@ -521,7 +521,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
: "m"(kShuf01), // %0
"m"(kShuf11), // %1
"m"(kShuf21) // %2
);
);
asm volatile(
"movdqa %0,%%xmm5 \n" // kMadd01
"movdqa %1,%%xmm0 \n" // kMadd11
@ -530,7 +530,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
: "m"(kMadd01), // %0
"m"(kMadd11), // %1
"m"(kRound34) // %2
);
);
asm volatile(
LABELALIGN
@ -587,7 +587,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
: "m"(kShuf01), // %0
"m"(kShuf11), // %1
"m"(kShuf21) // %2
);
);
asm volatile(
"movdqa %0,%%xmm5 \n" // kMadd01
"movdqa %1,%%xmm0 \n" // kMadd11
@ -596,7 +596,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
: "m"(kMadd01), // %0
"m"(kMadd11), // %1
"m"(kRound34) // %2
);
);
asm volatile(
@ -690,7 +690,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
"m"(kShufAb1), // %1
"m"(kShufAb2), // %2
"m"(kScaleAb2) // %3
);
);
asm volatile(
LABELALIGN
@ -734,7 +734,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
: "m"(kShufAc), // %0
"m"(kShufAc3), // %1
"m"(kScaleAc33) // %2
);
);
asm volatile(
LABELALIGN
@ -1272,7 +1272,7 @@ void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb,
:
: "m"(kShuffleColARGB), // %0
"m"(kShuffleFractions) // %1
);
);
asm volatile(
"movd %5,%%xmm2 \n"

View File

@ -40,7 +40,7 @@ void ScaleRowDown2_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %2
:
: "q0", "q1" // Clobber List
);
);
}
// Read 32x1 average down and write 16x1.
@ -61,7 +61,7 @@ void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %2
:
: "q0", "q1" // Clobber List
);
);
}
// Read 32x2 average down and write 16x1.
@ -92,7 +92,7 @@ void ScaleRowDown2Box_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %3
:
: "q0", "q1", "q2", "q3" // Clobber List
);
);
}
void ScaleRowDown4_NEON(const uint8_t* src_ptr,
@ -523,7 +523,7 @@ void ScaleAddRow_NEON(const uint8_t* src_ptr,
"+r"(src_width) // %2
:
: "memory", "cc", "q0", "q1", "q2" // Clobber List
);
);
}
// TODO(Yang Zhang): Investigate less load instructions for
@ -705,7 +705,7 @@ void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %2
:
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
);
);
}
// 46: f964 018d vld4.32 {d16,d18,d20,d22}, [r4]!
@ -734,7 +734,7 @@ void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
"+r"(dst_width) // %2
:
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
);
);
}
void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,

View File

@ -38,7 +38,7 @@ void ScaleRowDown2_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %2
:
: "v0", "v1" // Clobber List
);
);
}
// Read 32x1 average down and write 16x1.
@ -60,7 +60,7 @@ void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %2
:
: "v0", "v1" // Clobber List
);
);
}
// Read 32x2 average down and write 16x1.
@ -89,7 +89,7 @@ void ScaleRowDown2Box_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %3
:
: "v0", "v1", "v2", "v3" // Clobber List
);
);
}
void ScaleRowDown4_NEON(const uint8_t* src_ptr,
@ -534,7 +534,7 @@ void ScaleAddRow_NEON(const uint8_t* src_ptr,
"+r"(src_width) // %2
:
: "memory", "cc", "v0", "v1", "v2" // Clobber List
);
);
}
// TODO(Yang Zhang): Investigate less load instructions for
@ -719,7 +719,7 @@ void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %2
:
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
);
);
}
void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
@ -742,7 +742,7 @@ void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
"+r"(dst_width) // %2
:
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
);
);
}
void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
@ -991,7 +991,7 @@ void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
"+r"(dst_width) // %3
:
: "v0", "v1", "v2", "v3" // Clobber List
);
);
}
// Read 8x2 upsample with filtering and write 16x1.
@ -1041,7 +1041,7 @@ void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
"r"(14LL) // %5
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18",
"v19" // Clobber List
);
);
}
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)

View File

@ -37,7 +37,7 @@ CHROMIUM_LOG_TEMPLATE = CHROMIUM_SRC_URL + '/+log/%s'
CHROMIUM_FILE_TEMPLATE = CHROMIUM_SRC_URL + '/+/%s/%s'
COMMIT_POSITION_RE = re.compile('^Cr-Commit-Position: .*#([0-9]+).*$')
CLANG_REVISION_RE = re.compile(r'^CLANG_REVISION = \'(\d+)\'$')
CLANG_REVISION_RE = re.compile(r'^CLANG_REVISION = \'([0-9a-z]+)\'$')
ROLL_BRANCH_NAME = 'roll_chromium_revision'
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))

View File

@ -15,10 +15,13 @@
#include "../unit_test/unit_test.h"
#include "libyuv/basic_types.h"
#include "libyuv/compare.h"
#include "libyuv/compare_row.h" /* For HammingDistance_C */
#include "libyuv/cpu_id.h"
#include "libyuv/video_common.h"
#ifdef ENABLE_ROW_TESTS
#include "libyuv/compare_row.h" /* For HammingDistance_C */
#endif
namespace libyuv {
// hash seed of 5381 recommended.
@ -206,6 +209,7 @@ TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Unaligned) {
free_aligned_buffer_page_end(src_a);
}
#ifdef ENABLE_ROW_TESTS
TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) {
const int kMaxWidth = 4096 * 3;
align_buffer_page_end(src_a, kMaxWidth);
@ -403,6 +407,7 @@ TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) {
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
}
#endif // ENABLE_ROW_TESTS
TEST_F(LibYUVCompareTest, TestHammingDistance) {
align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_);

View File

@ -12,8 +12,6 @@
#include <stdlib.h>
#include <time.h>
#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */
#include "libyuv/basic_types.h"
#include "libyuv/compare.h"
#include "libyuv/convert.h"
@ -29,6 +27,10 @@
#include "libyuv/rotate.h"
#include "libyuv/video_common.h"
#ifdef ENABLE_ROW_TESTS
#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */
#endif
#if defined(__arm__) || defined(__aarch64__)
// arm version subsamples by summing 4 pixels then multiplying by matrix with
// 4x smaller coefficients which are rounded to nearest integer.
@ -37,6 +39,11 @@
#define ARM_YUV_ERROR 0
#endif
// Some functions fail on big endian. Enable these tests on all cpus except PowerPC
#if !defined(__powerpc__)
#define LITTLE_ENDIAN_TEST 1
#endif
namespace libyuv {
// Alias to copy pixels as is
@ -311,10 +318,10 @@ int I400ToNV21(const uint8_t* src_y,
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \
@ -329,21 +336,21 @@ int I400ToNV21(const uint8_t* src_y,
} \
memset(dst_y_c, 1, kWidth* kHeight); \
memset(dst_uv_c, 2, \
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_y_opt, 101, kWidth* kHeight); \
memset(dst_uv_opt, 102, \
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
} \
int max_diff = 0; \
for (int i = 0; i < kHeight; ++i) { \
@ -357,12 +364,12 @@ int I400ToNV21(const uint8_t* src_y,
} \
EXPECT_LE(max_diff, 1); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \
int abs_diff = \
abs(static_cast<int>( \
dst_uv_c[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \
dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \
static_cast<int>( \
dst_uv_opt[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \
dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
@ -395,6 +402,100 @@ TESTPLANARTOBP(I422, 2, 1, NV21, 2, 2)
TESTPLANARTOBP(I444, 1, 1, NV21, 2, 2)
TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2)
#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, \
OFF, DOY) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end(dst_uv_c, 2 * SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end(dst_uv_opt, 2 * SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
src_uv[(i * 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
(fastrand() & 0xff); \
} \
} \
memset(dst_y_c, 1, kWidth* kHeight); \
memset(dst_uv_c, 2, \
2 * SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_y_opt, 101, kWidth* kHeight); \
memset(dst_uv_opt, 102, \
2 * SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y + OFF, kWidth, src_uv + OFF, \
2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), DOY ? dst_y_c : NULL, kWidth, \
dst_uv_c, 2 * SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y + OFF, kWidth, src_uv + OFF, \
2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), DOY ? dst_y_opt : NULL, \
kWidth, dst_uv_opt, 2 * SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, \
NEG kHeight); \
} \
int max_diff = 0; \
if (DOY) { \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \
int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
static_cast<int>(dst_y_opt[i * kWidth + j])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
} \
} \
EXPECT_LE(max_diff, 1); \
} \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
int abs_diff = \
abs(static_cast<int>( \
dst_uv_c[i * 2 * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
static_cast<int>( \
dst_uv_opt[i * 2 * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
} \
} \
EXPECT_LE(max_diff, 1); \
free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_uv_c); \
free_aligned_buffer_page_end(dst_y_opt); \
free_aligned_buffer_page_end(dst_uv_opt); \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_uv); \
}
#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0, 1) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1, \
1) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0)
TESTBIPLANARTOBP(NV21, 2, 2, NV12, 2, 2)
#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \
DOY) \
@ -585,13 +686,15 @@ TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1)
TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1)
TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1)
TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1)
#ifdef LITTLE_ENDIAN_TEST
TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1)
TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1)
TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1)
TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1)
TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1)
TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
#endif
TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(J422, 2, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(J422, 2, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(H422, 2, 1, ARGB, 4, 4, 1)
@ -608,8 +711,10 @@ TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1)
#ifdef LITTLE_ENDIAN_TEST
TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1)
TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
#endif
#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, W1280, DIFF, N, NEG, OFF, ATTEN) \
@ -680,8 +785,8 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2)
TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
W1280, DIFF, N, NEG, OFF) \
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \
BPP_B, W1280, DIFF, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \
@ -716,9 +821,9 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \
memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \
memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \
FMT_B##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \
FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \
kHeight); \
FMT_B##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \
FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \
kHeight); \
int max_diff = 0; \
for (int i = 0; i < kHeight; ++i) { \
@ -740,25 +845,29 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
free_aligned_buffer_page_end(dst_argb32_opt); \
}
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
benchmark_width_ - 4, DIFF, _Any, +, 0) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
benchmark_width_, DIFF, _Unaligned, +, 1) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
benchmark_width_, DIFF, _Invert, -, 0) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
DIFF) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_ - 4, DIFF, _Any, +, 0) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, DIFF, _Unaligned, +, 1) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, DIFF, _Invert, -, 0) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, DIFF, _Opt, +, 0)
TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2)
TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2)
TESTBIPLANARTOB(NV12, 2, 2, ABGR, 4, 2)
TESTBIPLANARTOB(NV21, 2, 2, ABGR, 4, 2)
TESTBIPLANARTOB(NV12, 2, 2, RGB24, 3, 2)
TESTBIPLANARTOB(NV21, 2, 2, RGB24, 3, 2)
TESTBIPLANARTOB(NV12, 2, 2, RAW, 3, 2)
TESTBIPLANARTOB(NV21, 2, 2, RAW, 3, 2)
TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4, 2)
TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4, 2)
TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4, 2)
TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4, 2)
TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3, 2)
TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3, 2)
TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3, 2)
TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3, 2)
#ifdef LITTLE_ENDIAN_TEST
TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2, 9)
#endif
TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2)
#ifdef DO_THREE_PLANES
// Do 3 allocations for yuv. conventional but slower.
@ -885,26 +994,30 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, DIFF, _Opt, +, 0)
TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4)
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR)
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR)
TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4)
TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4)
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9.
TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4)
TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2)
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2)
TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2)
TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2)
TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2)
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR)
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR)
#ifdef LITTLE_ENDIAN_TEST
TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
#endif
TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4)
TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2)
TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, ARM_YUV_ERROR)
#ifdef LITTLE_ENDIAN_TEST
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
#endif
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2)
TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2)
TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2)
TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \
SUBSAMP_Y, W1280, N, NEG, OFF) \
@ -976,8 +1089,12 @@ TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2)
TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2)
TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, W1280, DIFF, N, NEG, OFF) \
@ -1069,45 +1186,58 @@ TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
HEIGHT_B, DIFF)
// TODO(fbarchard): make ARM version of C code that matches NEON.
TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0)
TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0)
TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0)
#ifdef LITTLE_ENDIAN_TEST
TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0)
#endif
TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
#ifdef LITTLE_ENDIAN_TEST
TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0)
#endif
TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0)
#ifdef LITTLE_ENDIAN_TEST
TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0)
TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0)
#endif
TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
#ifdef LITTLE_ENDIAN_TEST
TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
#endif
TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0)
TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2)
TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2)
TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
#ifdef LITTLE_ENDIAN_TEST
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
#endif
TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0)
TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0)
TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0)
TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0)
TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0)
TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0)
TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0)
TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0)
TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0)
TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0)
#ifdef LITTLE_ENDIAN_TEST
TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0)
TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0)
TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0)
TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0)
TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR)
#endif
TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR)
TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR)
TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0)
TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0)
TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0)
TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0)
TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0)
TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0)
#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, W1280, DIFF, N, NEG, OFF) \
@ -1204,7 +1334,9 @@ TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0)
TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, DIFF)
#ifdef LITTLE_ENDIAN_TEST
TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
#endif
#define TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_ATOB##_Symetric##N) { \
@ -1291,6 +1423,7 @@ TEST_F(LibYUVConvertTest, ValidateJpeg) {
// EOI, SOI. Expect pass.
orig_pixels[0] = 0xff;
orig_pixels[1] = 0xd8; // SOI.
orig_pixels[2] = 0xff;
orig_pixels[kSize - kOff + 0] = 0xff;
orig_pixels[kSize - kOff + 1] = 0xd9; // EOI.
for (int times = 0; times < benchmark_iterations_; ++times) {
@ -1317,6 +1450,7 @@ TEST_F(LibYUVConvertTest, ValidateJpegLarge) {
// EOI, SOI. Expect pass.
orig_pixels[0] = 0xff;
orig_pixels[1] = 0xd8; // SOI.
orig_pixels[2] = 0xff;
orig_pixels[kSize - kOff + 0] = 0xff;
orig_pixels[kSize - kOff + 1] = 0xd9; // EOI.
for (int times = 0; times < benchmark_iterations_; ++times) {
@ -1350,6 +1484,7 @@ TEST_F(LibYUVConvertTest, InvalidateJpeg) {
// SOI but no EOI. Expect fail.
orig_pixels[0] = 0xff;
orig_pixels[1] = 0xd8; // SOI.
orig_pixels[2] = 0xff;
for (int times = 0; times < benchmark_iterations_; ++times) {
EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
}
@ -1367,22 +1502,24 @@ TEST_F(LibYUVConvertTest, InvalidateJpeg) {
TEST_F(LibYUVConvertTest, FuzzJpeg) {
// SOI but no EOI. Expect fail.
for (int times = 0; times < benchmark_iterations_; ++times) {
const int kSize = fastrand() % 5000 + 2;
const int kSize = fastrand() % 5000 + 3;
align_buffer_page_end(orig_pixels, kSize);
MemRandomize(orig_pixels, kSize);
// Add SOI so frame will be scanned.
orig_pixels[0] = 0xff;
orig_pixels[1] = 0xd8; // SOI.
orig_pixels[2] = 0xff;
orig_pixels[kSize - 1] = 0xff;
ValidateJpeg(orig_pixels, kSize); // Failure normally expected.
ValidateJpeg(orig_pixels,
kSize); // Failure normally expected.
free_aligned_buffer_page_end(orig_pixels);
}
}
// Test data created in GIMP. In export jpeg, disable thumbnails etc,
// choose a subsampling, and use low quality (50) to keep size small.
// Generated with xxd -i test.jpg
// Test data created in GIMP. In export jpeg, disable
// thumbnails etc, choose a subsampling, and use low quality
// (50) to keep size small. Generated with xxd -i test.jpg
// test 0 is J400
static const uint8_t kTest0Jpg[] = {
0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
@ -1984,8 +2121,8 @@ TEST_F(LibYUVConvertTest, TestMJPGInfo) {
EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen));
EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen));
EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen));
EXPECT_EQ(1,
ShowJPegInfo(kTest4Jpg, kTest4JpgLen)); // Valid but unsupported.
EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg,
kTest4JpgLen)); // Valid but unsupported.
}
#endif // HAVE_JPEG
@ -2296,8 +2433,9 @@ TEST_F(LibYUVConvertTest, TestDither) {
TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C)
#ifdef LITTLE_ENDIAN_TEST
TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4)
#endif
#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \
TEST_F(LibYUVConvertTest, NAME) { \
const int kWidth = benchmark_width_; \
@ -2437,10 +2575,12 @@ TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3)
TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3)
TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3)
TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4)
#ifdef LITTLE_ENDIAN_TEST
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2)
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2)
#endif
TESTPLANARTOE(J422, 2, 1, ARGB, 1, 4, ARGB, 4)
TESTPLANARTOE(J422, 2, 1, ABGR, 1, 4, ARGB, 4)
TESTPLANARTOE(H422, 2, 1, ARGB, 1, 4, ARGB, 4)
@ -2574,6 +2714,7 @@ TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
_Opt, +, 0, FMT_C, BPP_C)
// Caveat: Destination needs to be 4 bytes
#ifdef LITTLE_ENDIAN_TEST
TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4)
TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4)
TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4)
@ -2582,6 +2723,7 @@ TESTPLANETOE(ARGB, 1, 4, AB30, 1, 4, ARGB, 4)
TESTPLANETOE(ABGR, 1, 4, AB30, 1, 4, ABGR, 4)
TESTPLANETOE(AB30, 1, 4, ARGB, 1, 4, ABGR, 4)
TESTPLANETOE(AB30, 1, 4, ABGR, 1, 4, ARGB, 4)
#endif
TEST_F(LibYUVConvertTest, RotateWithARGBSource) {
// 2x2 frames
@ -2753,12 +2895,16 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 2)
TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 2)
#ifdef LITTLE_ENDIAN_TEST
TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 2)
TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 2)
#endif
TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 2)
TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 2)
#ifdef LITTLE_ENDIAN_TEST
TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2)
TESTPLANAR16TOB(H010, 2, 2, AB30, 4, 4, 1, 2)
#endif
static int Clamp(int y) {
if (y < 0) {
@ -2903,7 +3049,8 @@ TEST_F(LibYUVConvertTest, TestH010ToARGB) {
}
// Test 10 bit YUV to 10 bit RGB
// Caveat: Result is near due to float rounding in expected result.
// Caveat: Result is near due to float rounding in expected
// result.
TEST_F(LibYUVConvertTest, TestH010ToAR30) {
const int kSize = 1024;
int histogram_b[1024];
@ -2966,7 +3113,8 @@ TEST_F(LibYUVConvertTest, TestH010ToAR30) {
}
// Test 10 bit YUV to 10 bit RGB
// Caveat: Result is near due to float rounding in expected result.
// Caveat: Result is near due to float rounding in expected
// result.
TEST_F(LibYUVConvertTest, TestH010ToAB30) {
const int kSize = 1024;
int histogram_b[1024];

View File

@ -16,10 +16,14 @@
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/scale.h"
#ifdef ENABLE_ROW_TESTS
#include "libyuv/scale_row.h"
#endif
namespace libyuv {
#ifdef ENABLE_ROW_TESTS
TEST_F(LibYUVBaseTest, TestFixedDiv) {
int num[1280];
int div[1280];
@ -151,5 +155,6 @@ TEST_F(LibYUVBaseTest, TestFixedDiv1_Opt) {
EXPECT_NEAR(result_c[j], result_opt[j], 1);
}
}
#endif // ENABLE_ROW_TESTS
} // namespace libyuv

View File

@ -12,8 +12,6 @@
#include <stdlib.h>
#include <time.h>
// row.h defines SIMD_ALIGNED, overriding unit_test.h
#include "libyuv/row.h" /* For ScaleSumSamples_Neon */
#include "../unit_test/unit_test.h"
#include "libyuv/compare.h"
@ -25,6 +23,12 @@
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#ifdef ENABLE_ROW_TESTS
// row.h defines SIMD_ALIGNED, overriding unit_test.h
// TODO(fbarchard): Remove row.h from unittests. Test public functions.
#include "libyuv/row.h" /* For ScaleSumSamples_Neon */
#endif
namespace libyuv {
TEST_F(LibYUVPlanarTest, TestAttenuate) {
@ -2321,7 +2325,8 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
}
TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
const int kPixels = benchmark_width_ * benchmark_height_;
// Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels, kPixels * 4);
align_buffer_page_end(dst_pixels_opt, kPixels);
align_buffer_page_end(dst_pixels_c, kPixels);
@ -2349,7 +2354,8 @@ TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
}
TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
const int kPixels = benchmark_width_ * benchmark_height_;
// Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(orig_pixels, kPixels);
align_buffer_page_end(dst_pixels_opt, kPixels * 4);
align_buffer_page_end(dst_pixels_c, kPixels * 4);
@ -2482,7 +2488,8 @@ TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
}
TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_;
// Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels, kPixels * 2);
align_buffer_page_end(tmp_pixels_u, kPixels);
align_buffer_page_end(tmp_pixels_v, kPixels);
@ -2526,7 +2533,8 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
}
TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_;
// Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels, kPixels * 2);
align_buffer_page_end(tmp_pixels_u, kPixels);
align_buffer_page_end(tmp_pixels_v, kPixels);
@ -2568,8 +2576,39 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
free_aligned_buffer_page_end(dst_pixels_c);
}
TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
// Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels, kPixels * 2);
align_buffer_page_end(dst_pixels_opt, kPixels * 2);
align_buffer_page_end(dst_pixels_c, kPixels * 2);
MemRandomize(src_pixels, kPixels * 2);
MemRandomize(dst_pixels_opt, kPixels * 2);
MemRandomize(dst_pixels_c, kPixels * 2);
MaskCpuFlags(disable_cpu_flags_);
SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
benchmark_width_ * 2, benchmark_width_, benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_);
for (int i = 0; i < benchmark_iterations_; ++i) {
SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
benchmark_width_ * 2, benchmark_width_, benchmark_height_);
}
for (int i = 0; i < kPixels * 2; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
free_aligned_buffer_page_end(src_pixels);
free_aligned_buffer_page_end(dst_pixels_opt);
free_aligned_buffer_page_end(dst_pixels_c);
}
TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_;
// Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels, kPixels * 3);
align_buffer_page_end(tmp_pixels_r, kPixels);
align_buffer_page_end(tmp_pixels_g, kPixels);
@ -2617,7 +2656,8 @@ TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
}
TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_;
// Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels, kPixels * 3);
align_buffer_page_end(tmp_pixels_r, kPixels);
align_buffer_page_end(tmp_pixels_g, kPixels);
@ -2666,7 +2706,8 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
// TODO(fbarchard): improve test for platforms and cpu detect
#ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_;
// Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels_u, kPixels * 2);
align_buffer_page_end(src_pixels_v, kPixels * 2);
align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
@ -2710,7 +2751,8 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
// TODO(fbarchard): Improve test for more platforms.
#ifdef HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_;
// Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels_y, kPixels * 2);
align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
@ -2746,7 +2788,8 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
#endif // HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
const int kPixels = benchmark_width_ * benchmark_height_;
// Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels_y, kPixels * 2);
align_buffer_page_end(dst_pixels_y_opt, kPixels);
align_buffer_page_end(dst_pixels_y_c, kPixels);
@ -2776,6 +2819,7 @@ TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
free_aligned_buffer_page_end(dst_pixels_y_c);
}
#ifdef ENABLE_ROW_TESTS
// TODO(fbarchard): Improve test for more platforms.
#ifdef HAS_CONVERT16TO8ROW_AVX2
TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
@ -2821,9 +2865,11 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
free_aligned_buffer_page_end(dst_pixels_y_c);
}
#endif // HAS_CONVERT16TO8ROW_AVX2
#endif // ENABLE_ROW_TESTS
TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
const int kPixels = benchmark_width_ * benchmark_height_;
// Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels_y, kPixels);
align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
@ -2855,6 +2901,7 @@ TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
free_aligned_buffer_page_end(dst_pixels_y_c);
}
#ifdef ENABLE_ROW_TESTS
// TODO(fbarchard): Improve test for more platforms.
#ifdef HAS_CONVERT8TO16ROW_AVX2
TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
@ -3186,7 +3233,8 @@ TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
}
GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640);
for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
int has_neon = TestCpuFlag(kCpuHasNEON);
if (has_neon) {
GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640);
@ -3239,7 +3287,8 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
&orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0],
640);
for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
int has_neon = TestCpuFlag(kCpuHasNEON);
if (has_neon) {
GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
@ -3267,4 +3316,36 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
}
TEST_F(LibYUVPlanarTest, SwapUVRow) {
const int kPixels = benchmark_width_ * benchmark_height_;
void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
SwapUVRow_C;
align_buffer_page_end(src_pixels_vu, kPixels * 2);
align_buffer_page_end(dst_pixels_uv, kPixels * 2);
MemRandomize(src_pixels_vu, kPixels * 2);
memset(dst_pixels_uv, 1, kPixels * 2);
#if defined(HAS_SWAPUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
SwapUVRow = SwapUVRow_Any_NEON;
if (IS_ALIGNED(kPixels, 16)) {
SwapUVRow = SwapUVRow_NEON;
}
}
#endif
for (int j = 0; j < benchmark_iterations_; j++) {
SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels);
}
for (int i = 0; i < kPixels; ++i) {
EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
}
free_aligned_buffer_page_end(src_pixels_vu);
free_aligned_buffer_page_end(dst_pixels_uv);
}
#endif
} // namespace libyuv

View File

@ -135,6 +135,123 @@ TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) {
benchmark_cpu_info_);
}
static void I444TestRotate(int src_width,
int src_height,
int dst_width,
int dst_height,
libyuv::RotationMode mode,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (src_width < 1) {
src_width = 1;
}
if (src_height == 0) {
src_height = 1;
}
if (dst_width < 1) {
dst_width = 1;
}
if (dst_height < 1) {
dst_height = 1;
}
int src_i444_y_size = src_width * Abs(src_height);
int src_i444_uv_size = src_width * Abs(src_height);
int src_i444_size = src_i444_y_size + src_i444_uv_size * 2;
align_buffer_page_end(src_i444, src_i444_size);
for (int i = 0; i < src_i444_size; ++i) {
src_i444[i] = fastrand() & 0xff;
}
int dst_i444_y_size = dst_width * dst_height;
int dst_i444_uv_size = dst_width * dst_height;
int dst_i444_size = dst_i444_y_size + dst_i444_uv_size * 2;
align_buffer_page_end(dst_i444_c, dst_i444_size);
align_buffer_page_end(dst_i444_opt, dst_i444_size);
memset(dst_i444_c, 2, dst_i444_size);
memset(dst_i444_opt, 3, dst_i444_size);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
dst_i444_c, dst_width, dst_i444_c + dst_i444_y_size, dst_width,
dst_i444_c + dst_i444_y_size + dst_i444_uv_size, dst_width,
src_width, src_height, mode);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
for (int i = 0; i < benchmark_iterations; ++i) {
I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
dst_i444_opt, dst_width, dst_i444_opt + dst_i444_y_size,
dst_width, dst_i444_opt + dst_i444_y_size + dst_i444_uv_size,
dst_width, src_width, src_height, mode);
}
// Rotation should be exact.
for (int i = 0; i < dst_i444_size; ++i) {
EXPECT_EQ(dst_i444_c[i], dst_i444_opt[i]);
}
free_aligned_buffer_page_end(dst_i444_c);
free_aligned_buffer_page_end(dst_i444_opt);
free_aligned_buffer_page_end(src_i444);
}
TEST_F(LibYUVRotateTest, I444Rotate0_Opt) {
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate0, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I444Rotate90_Opt) {
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate90, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I444Rotate180_Opt) {
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate180, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I444Rotate270_Opt) {
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate270, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
// TODO(fbarchard): Remove odd width tests.
// Odd width tests work but disabled because they use C code and can be
// tested by passing an odd width command line or environment variable.
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate0_Odd) {
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
benchmark_width_ - 3, benchmark_height_ - 1, kRotate0,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate90_Odd) {
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
benchmark_height_ - 1, benchmark_width_ - 3, kRotate90,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate180_Odd) {
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
benchmark_width_ - 3, benchmark_height_ - 1, kRotate180,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate270_Odd) {
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
benchmark_height_ - 1, benchmark_width_ - 3, kRotate270,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
static void NV12TestRotate(int src_width,
int src_height,
int dst_width,

View File

@ -259,7 +259,7 @@ static int ARGBClipTestFilter(int src_width,
TEST_FACTOR(2, 1, 2)
TEST_FACTOR(4, 1, 4)
TEST_FACTOR(8, 1, 8)
// TEST_FACTOR(8, 1, 8) Disable for benchmark performance.
TEST_FACTOR(3by4, 3, 4)
TEST_FACTOR(3by8, 3, 8)
TEST_FACTOR(3, 1, 3)

View File

@ -14,7 +14,10 @@
#include "../unit_test/unit_test.h"
#include "libyuv/cpu_id.h"
#include "libyuv/scale.h"
#ifdef ENABLE_ROW_TESTS
#include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C
#endif
#define STRINGIZE(line) #line
#define FILELINESTR(file, line) file ":" STRINGIZE(line)
@ -22,14 +25,14 @@
namespace libyuv {
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
static int TestFilter(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
static int I420TestFilter(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
@ -141,14 +144,14 @@ static int TestFilter(int src_width,
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
// 0 = exact.
static int TestFilter_16(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
static int I420TestFilter_16(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
@ -256,6 +259,241 @@ static int TestFilter_16(int src_width,
return max_diff;
}
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
static int I444TestFilter(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
int i, j;
int src_width_uv = Abs(src_width);
int src_height_uv = Abs(src_height);
int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
int src_stride_y = Abs(src_width);
int src_stride_uv = src_width_uv;
align_buffer_page_end(src_y, src_y_plane_size);
align_buffer_page_end(src_u, src_uv_plane_size);
align_buffer_page_end(src_v, src_uv_plane_size);
if (!src_y || !src_u || !src_v) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
MemRandomize(src_y, src_y_plane_size);
MemRandomize(src_u, src_uv_plane_size);
MemRandomize(src_v, src_uv_plane_size);
int dst_width_uv = dst_width;
int dst_height_uv = dst_height;
int64_t dst_y_plane_size = (dst_width) * (dst_height);
int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
int dst_stride_y = dst_width;
int dst_stride_uv = dst_width_uv;
align_buffer_page_end(dst_y_c, dst_y_plane_size);
align_buffer_page_end(dst_u_c, dst_uv_plane_size);
align_buffer_page_end(dst_v_c, dst_uv_plane_size);
align_buffer_page_end(dst_y_opt, dst_y_plane_size);
align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
!dst_v_opt) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
double c_time = get_time();
I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
c_time = (get_time() - c_time);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
double opt_time = get_time();
for (i = 0; i < benchmark_iterations; ++i) {
I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
f);
}
opt_time = (get_time() - opt_time) / benchmark_iterations;
// Report performance of C vs OPT.
printf("filter %d - %8d us C - %8d us OPT\n", f,
static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
// C version may be a little off from the optimized. Order of
// operations may introduce rounding somewhere. So do a difference
// of the buffers and look to see that the max difference is not
// over 3.
int max_diff = 0;
for (i = 0; i < (dst_height); ++i) {
for (j = 0; j < (dst_width); ++j) {
int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
dst_y_opt[(i * dst_stride_y) + j]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
}
for (i = 0; i < (dst_height_uv); ++i) {
for (j = 0; j < (dst_width_uv); ++j) {
int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
dst_u_opt[(i * dst_stride_uv) + j]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
dst_v_opt[(i * dst_stride_uv) + j]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
}
free_aligned_buffer_page_end(dst_y_c);
free_aligned_buffer_page_end(dst_u_c);
free_aligned_buffer_page_end(dst_v_c);
free_aligned_buffer_page_end(dst_y_opt);
free_aligned_buffer_page_end(dst_u_opt);
free_aligned_buffer_page_end(dst_v_opt);
free_aligned_buffer_page_end(src_y);
free_aligned_buffer_page_end(src_u);
free_aligned_buffer_page_end(src_v);
return max_diff;
}
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
// 0 = exact.
static int I444TestFilter_16(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
int i;
int src_width_uv = Abs(src_width);
int src_height_uv = Abs(src_height);
int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
int src_stride_y = Abs(src_width);
int src_stride_uv = src_width_uv;
align_buffer_page_end(src_y, src_y_plane_size);
align_buffer_page_end(src_u, src_uv_plane_size);
align_buffer_page_end(src_v, src_uv_plane_size);
align_buffer_page_end(src_y_16, src_y_plane_size * 2);
align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
MemRandomize(src_y, src_y_plane_size);
MemRandomize(src_u, src_uv_plane_size);
MemRandomize(src_v, src_uv_plane_size);
for (i = 0; i < src_y_plane_size; ++i) {
p_src_y_16[i] = src_y[i];
}
for (i = 0; i < src_uv_plane_size; ++i) {
p_src_u_16[i] = src_u[i];
p_src_v_16[i] = src_v[i];
}
int dst_width_uv = dst_width;
int dst_height_uv = dst_height;
int dst_y_plane_size = (dst_width) * (dst_height);
int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
int dst_stride_y = dst_width;
int dst_stride_uv = dst_width_uv;
align_buffer_page_end(dst_y_8, dst_y_plane_size);
align_buffer_page_end(dst_u_8, dst_uv_plane_size);
align_buffer_page_end(dst_v_8, dst_uv_plane_size);
align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
for (i = 0; i < benchmark_iterations; ++i) {
I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
dst_stride_uv, dst_width, dst_height, f);
}
// Expect an exact match.
int max_diff = 0;
for (i = 0; i < dst_y_plane_size; ++i) {
int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
for (i = 0; i < dst_uv_plane_size; ++i) {
int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
free_aligned_buffer_page_end(dst_y_8);
free_aligned_buffer_page_end(dst_u_8);
free_aligned_buffer_page_end(dst_v_8);
free_aligned_buffer_page_end(dst_y_16);
free_aligned_buffer_page_end(dst_u_16);
free_aligned_buffer_page_end(dst_v_16);
free_aligned_buffer_page_end(src_y);
free_aligned_buffer_page_end(src_u);
free_aligned_buffer_page_end(src_v);
free_aligned_buffer_page_end(src_y_16);
free_aligned_buffer_page_end(src_u_16);
free_aligned_buffer_page_end(src_v_16);
return max_diff;
}
// The following adjustments in dimensions ensure the scale factor will be
// exactly achieved.
// 2 is chroma subsample.
@ -263,16 +501,32 @@ static int TestFilter_16(int src_width,
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) { \
int diff = TestFilter( \
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
int diff = I420TestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter##_16) { \
int diff = TestFilter_16( \
TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
int diff = I444TestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \
int diff = I420TestFilter_16( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \
int diff = I444TestFilter_16( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
@ -290,7 +544,7 @@ static int TestFilter_16(int src_width,
TEST_FACTOR(2, 1, 2, 0)
TEST_FACTOR(4, 1, 4, 0)
TEST_FACTOR(8, 1, 8, 0)
// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
TEST_FACTOR(3by4, 3, 4, 1)
TEST_FACTOR(3by8, 3, 8, 1)
TEST_FACTOR(3, 1, 3, 0)
@ -300,30 +554,58 @@ TEST_FACTOR(3, 1, 3, 0)
#undef DX
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
int diff = TestFilter(benchmark_width_, benchmark_height_, width, height, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
int diff = TestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \
int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter##_16) { \
int diff = TestFilter_16(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \
int diff = I420TestFilter_16( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter##_16) { \
int diff = TestFilter_16(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \
int diff = I444TestFilter_16( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \
int diff = I444TestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
I420##name##From##width##x##height##_##filter##_16) { \
int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
I444##name##From##width##x##height##_##filter##_16) { \
int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
}
@ -343,6 +625,7 @@ TEST_SCALETO(Scale, 1920, 1080)
#undef TEST_SCALETO1
#undef TEST_SCALETO
#ifdef ENABLE_ROW_TESTS
#ifdef HAS_SCALEROWDOWN2_SSSE3
TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
@ -524,6 +807,7 @@ TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
EXPECT_EQ(dst_pixels_c[1279], 3839);
}
#endif // ENABLE_ROW_TESTS
// Test scaling plane with 8 bit C vs 16 bit C and return maximum pixel
// difference.
@ -614,7 +898,7 @@ static int TestPlaneFilter_16(int src_width,
TEST_FACTOR(2, 1, 2, 0)
TEST_FACTOR(4, 1, 4, 0)
TEST_FACTOR(8, 1, 8, 0)
// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
TEST_FACTOR(3by4, 3, 4, 1)
TEST_FACTOR(3by8, 3, 8, 1)
TEST_FACTOR(3, 1, 3, 0)

View File

@ -17,6 +17,9 @@
#ifdef LIBYUV_USE_GFLAGS
#include "gflags/gflags.h"
#endif
#ifdef LIBYUV_USE_BASE_FLAGS
#include "base/commandlineflags.h"
#endif
#include "libyuv/cpu_id.h"
unsigned int fastrand_seed = 0xfb;

View File

@ -189,7 +189,7 @@ static uint32_t SumSquareError_SSE2(const uint8_t* src_a,
,
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
); // NOLINT
); // NOLINT
return sse;
}
#endif // LIBYUV_DISABLE_X86 etc