[libyuv] Update to hash ea23edfb from https://chromium.googlesource.com/libyuv/libyuv/
This commit is contained in:
parent
6175c55b2f
commit
1b1c66aae4
|
@ -69,6 +69,7 @@ cc_library {
|
|||
// with libyuv (b/37646797)
|
||||
cc_library_static {
|
||||
name: "libyuv_static",
|
||||
vendor_available: true,
|
||||
whole_static_libs: ["libyuv"],
|
||||
}
|
||||
|
||||
|
|
841
libs/libyuv/DEPS
841
libs/libyuv/DEPS
File diff suppressed because it is too large
Load Diff
|
@ -1,6 +1,6 @@
|
|||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1724
|
||||
Version: 1735
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
|
|
@ -100,4 +100,8 @@ Inverting can be achieved with almost any libyuv function by passing a negative
|
|||
|
||||
I420Mirror and ARGBMirror can also be used to rotate by 180 degrees by passing a negative height.
|
||||
|
||||
# Cropping - Vertical Flip
|
||||
|
||||
When cropping from a subsampled format like NV21, the method of setting the start pointers wont work for odd crop start y on the UV plane.
|
||||
If the height after cropping will be odd, invert the source - point to the last row, negate the strides, and pass negative height, which
|
||||
will re-invert the image as the conversion outputs.
|
||||
|
|
|
@ -226,6 +226,28 @@ int UYVYToI420(const uint8_t* src_uyvy,
|
|||
int width,
|
||||
int height);
|
||||
|
||||
// Convert AYUV to NV12.
|
||||
LIBYUV_API
|
||||
int AYUVToNV12(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert AYUV to NV21.
|
||||
LIBYUV_API
|
||||
int AYUVToNV21(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert M420 to I420.
|
||||
LIBYUV_API
|
||||
int M420ToI420(const uint8_t* src_m420,
|
||||
|
@ -322,6 +344,19 @@ int RGB24ToI420(const uint8_t* src_rgb24,
|
|||
int width,
|
||||
int height);
|
||||
|
||||
// RGB little endian (bgr in memory) to J420.
|
||||
LIBYUV_API
|
||||
int RGB24ToJ420(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB big endian (rgb in memory) to I420.
|
||||
LIBYUV_API
|
||||
int RAWToI420(const uint8_t* src_raw,
|
||||
|
@ -374,14 +409,21 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
|
|||
int width,
|
||||
int height);
|
||||
|
||||
// RGB little endian (bgr in memory) to J400.
|
||||
LIBYUV_API
|
||||
int RGB24ToJ400(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
// src_mjpg is pointer to raw jpeg bytes in memory
|
||||
// src_size_mjpg is size of jpeg in bytes
|
||||
// src_width/height provided by capture.
|
||||
// dst_width/height for clipping determine final size.
|
||||
LIBYUV_API
|
||||
int MJPGToI420(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
int MJPGToI420(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
|
@ -395,8 +437,8 @@ int MJPGToI420(const uint8_t* src_mjpg,
|
|||
|
||||
// JPEG to NV21
|
||||
LIBYUV_API
|
||||
int MJPGToNV21(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
int MJPGToNV21(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
|
@ -408,8 +450,8 @@ int MJPGToNV21(const uint8_t* src_mjpg,
|
|||
|
||||
// Query size of MJPG in pixels.
|
||||
LIBYUV_API
|
||||
int MJPGSize(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
int MJPGSize(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
int* width,
|
||||
int* height);
|
||||
#endif
|
||||
|
|
|
@ -256,6 +256,7 @@ int NV21ToARGB(const uint8_t* src_y,
|
|||
int height);
|
||||
|
||||
// Convert NV12 to ABGR.
|
||||
LIBYUV_API
|
||||
int NV12ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
|
@ -298,6 +299,17 @@ int NV21ToRGB24(const uint8_t* src_y,
|
|||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV21 to YUV24.
|
||||
LIBYUV_API
|
||||
int NV21ToYUV24(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int dst_stride_yuv24,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV12 to RAW.
|
||||
LIBYUV_API
|
||||
int NV12ToRAW(const uint8_t* src_y,
|
||||
|
@ -627,8 +639,8 @@ int AR30ToAB30(const uint8_t* src_ar30,
|
|||
// src_width/height provided by capture
|
||||
// dst_width/height for clipping determine final size.
|
||||
LIBYUV_API
|
||||
int MJPGToARGB(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
int MJPGToARGB(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int src_width,
|
||||
|
|
|
@ -250,6 +250,28 @@ int ARGBToNV21(const uint8_t* src_argb,
|
|||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ABGR To NV12.
|
||||
LIBYUV_API
|
||||
int ABGRToNV12(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ABGR To NV21.
|
||||
LIBYUV_API
|
||||
int ABGRToNV21(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To NV21.
|
||||
LIBYUV_API
|
||||
int ARGBToNV21(const uint8_t* src_argb,
|
||||
|
|
|
@ -26,7 +26,7 @@ namespace libyuv {
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
LIBYUV_BOOL ValidateJpeg(const uint8_t* src_mjpg, size_t src_size_mjpg_size);
|
||||
LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
|
|
@ -105,6 +105,15 @@ void MergeUVPlane(const uint8_t* src_u,
|
|||
int width,
|
||||
int height);
|
||||
|
||||
// Swap U and V channels in interleaved UV plane.
|
||||
LIBYUV_API
|
||||
void SwapUVPlane(const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Split interleaved RGB plane into separate R, G and B planes.
|
||||
LIBYUV_API
|
||||
void SplitRGBPlane(const uint8_t* src_rgb,
|
||||
|
@ -224,6 +233,19 @@ int UYVYToNV12(const uint8_t* src_uyvy,
|
|||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV21 to NV12.
|
||||
LIBYUV_API
|
||||
int NV21ToNV12(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int YUY2ToY(const uint8_t* src_yuy2,
|
||||
int src_stride_yuy2,
|
||||
|
|
|
@ -49,6 +49,24 @@ int I420Rotate(const uint8_t* src_y,
|
|||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate I444 frame.
|
||||
LIBYUV_API
|
||||
int I444Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate NV12 input and store in I420.
|
||||
LIBYUV_API
|
||||
int NV12ToI420Rotate(const uint8_t* src_y,
|
||||
|
|
|
@ -275,6 +275,7 @@ extern "C" {
|
|||
#define HAS_I422TOAR30ROW_SSSE3
|
||||
#define HAS_MERGERGBROW_SSSE3
|
||||
#define HAS_SPLITRGBROW_SSSE3
|
||||
#define HAS_SWAPUVROW_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are available for AVX2 gcc/clang x86 platforms:
|
||||
|
@ -283,6 +284,8 @@ extern "C" {
|
|||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \
|
||||
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||
#define HAS_ABGRTOAR30ROW_AVX2
|
||||
#define HAS_ABGRTOUVROW_AVX2
|
||||
#define HAS_ABGRTOYROW_AVX2
|
||||
#define HAS_ARGBTOAR30ROW_AVX2
|
||||
#define HAS_ARGBTORAWROW_AVX2
|
||||
#define HAS_ARGBTORGB24ROW_AVX2
|
||||
|
@ -295,6 +298,9 @@ extern "C" {
|
|||
#define HAS_I422TOYUY2ROW_AVX2
|
||||
#define HAS_MERGEUVROW_16_AVX2
|
||||
#define HAS_MULTIPLYROW_16_AVX2
|
||||
#define HAS_SWAPUVROW_AVX2
|
||||
// TODO(fbarchard): Fix AVX2 version of YUV24
|
||||
// #define HAS_NV21TOYUV24ROW_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available for AVX512 clang x86 platforms:
|
||||
|
@ -330,6 +336,9 @@ extern "C" {
|
|||
#define HAS_ARGBTOUVROW_NEON
|
||||
#define HAS_ARGBTOYJROW_NEON
|
||||
#define HAS_ARGBTOYROW_NEON
|
||||
#define HAS_AYUVTOUVROW_NEON
|
||||
#define HAS_AYUVTOVUROW_NEON
|
||||
#define HAS_AYUVTOYROW_NEON
|
||||
#define HAS_BGRATOUVROW_NEON
|
||||
#define HAS_BGRATOYROW_NEON
|
||||
#define HAS_BYTETOFLOATROW_NEON
|
||||
|
@ -355,6 +364,7 @@ extern "C" {
|
|||
#define HAS_NV12TORGB565ROW_NEON
|
||||
#define HAS_NV21TOARGBROW_NEON
|
||||
#define HAS_NV21TORGB24ROW_NEON
|
||||
#define HAS_NV21TOYUV24ROW_NEON
|
||||
#define HAS_RAWTOARGBROW_NEON
|
||||
#define HAS_RAWTORGB24ROW_NEON
|
||||
#define HAS_RAWTOUVROW_NEON
|
||||
|
@ -370,6 +380,7 @@ extern "C" {
|
|||
#define HAS_SETROW_NEON
|
||||
#define HAS_SPLITRGBROW_NEON
|
||||
#define HAS_SPLITUVROW_NEON
|
||||
#define HAS_SWAPUVROW_NEON
|
||||
#define HAS_UYVYTOARGBROW_NEON
|
||||
#define HAS_UYVYTOUV422ROW_NEON
|
||||
#define HAS_UYVYTOUVROW_NEON
|
||||
|
@ -815,6 +826,10 @@ void NV21ToRGB24Row_NEON(const uint8_t* src_y,
|
|||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToYUV24Row_NEON(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width);
|
||||
void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
|
@ -899,6 +914,8 @@ void UYVYToARGBRow_MSA(const uint8_t* src_uyvy,
|
|||
|
||||
void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width);
|
||||
void ABGRToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
|
@ -927,7 +944,7 @@ void ARGBToUV444Row_MSA(const uint8_t* src_argb,
|
|||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVRow_MSA(const uint8_t* src_argb0,
|
||||
void ARGBToUVRow_MSA(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
|
@ -936,7 +953,7 @@ void ARGBToUV444Row_MMI(const uint8_t* src_argb,
|
|||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVRow_MMI(const uint8_t* src_argb0,
|
||||
void ARGBToUVRow_MMI(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
|
@ -986,32 +1003,32 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
|
|||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
|
||||
void ARGBToUVJRow_MSA(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
|
||||
void BGRAToUVRow_MSA(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
|
||||
void ABGRToUVRow_MSA(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGBAToUVRow_MSA(const uint8_t* src_rgb0,
|
||||
void RGBAToUVRow_MSA(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
|
||||
void RGB24ToUVRow_MSA(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RAWToUVRow_MSA(const uint8_t* src_rgb0,
|
||||
void RAWToUVRow_MSA(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
|
@ -1026,32 +1043,32 @@ void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555,
|
|||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
|
||||
void ARGBToUVJRow_MMI(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
void BGRAToUVRow_MMI(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
void ABGRToUVRow_MMI(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
void RGBAToUVRow_MMI(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
void RGB24ToUVRow_MMI(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RAWToUVRow_MMI(const uint8_t* src_rgb0,
|
||||
void RAWToUVRow_MMI(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
|
@ -1083,29 +1100,29 @@ void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
|
|||
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
|
||||
uint8_t* dst_y,
|
||||
int width);
|
||||
void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void RGB24ToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
|
||||
void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
|
||||
void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void BGRAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void ABGRToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void RGBAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void RGB24ToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
|
||||
void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
|
||||
void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
|
||||
|
||||
void ARGBToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void BGRAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ABGRToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void RGBAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void RGB24ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void BGRAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void ABGRToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void RGBAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void RGB24ToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
|
||||
void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
|
||||
void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
|
||||
|
@ -1156,37 +1173,42 @@ void ARGB4444ToYRow_Any_MMI(const uint8_t* src_ptr,
|
|||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
|
||||
void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
|
||||
void ARGBToUVRow_AVX2(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
|
||||
void ABGRToUVRow_AVX2(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
|
||||
void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
|
||||
void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
|
||||
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
|
||||
int src_stride_bgra,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
|
||||
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
|
||||
void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
||||
int src_stride_rgba,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
|
@ -1196,6 +1218,11 @@ void ARGBToUVRow_Any_AVX2(const uint8_t* src_ptr,
|
|||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ABGRToUVRow_Any_AVX2(const uint8_t* src_ptr,
|
||||
int src_stride_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr,
|
||||
int src_stride_ptr,
|
||||
uint8_t* dst_u,
|
||||
|
@ -1383,47 +1410,47 @@ void ARGB4444ToUVRow_Any_MMI(const uint8_t* src_ptr,
|
|||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVRow_C(const uint8_t* src_rgb0,
|
||||
void ARGBToUVRow_C(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJRow_C(const uint8_t* src_rgb0,
|
||||
void ARGBToUVJRow_C(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVRow_C(const uint8_t* src_rgb0,
|
||||
void ARGBToUVRow_C(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJRow_C(const uint8_t* src_rgb0,
|
||||
void ARGBToUVJRow_C(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void BGRAToUVRow_C(const uint8_t* src_rgb0,
|
||||
void BGRAToUVRow_C(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ABGRToUVRow_C(const uint8_t* src_rgb0,
|
||||
void ABGRToUVRow_C(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGBAToUVRow_C(const uint8_t* src_rgb0,
|
||||
void RGBAToUVRow_C(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGB24ToUVRow_C(const uint8_t* src_rgb0,
|
||||
void RGB24ToUVRow_C(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RAWToUVRow_C(const uint8_t* src_rgb0,
|
||||
void RAWToUVRow_C(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
|
@ -2183,6 +2210,10 @@ void NV21ToRGB24Row_C(const uint8_t* src_y,
|
|||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToYUV24Row_C(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width);
|
||||
void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
|
||||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
|
@ -2349,6 +2380,10 @@ void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
|
|||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width);
|
||||
void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
|
||||
const uint8_t* src_uv,
|
||||
uint8_t* dst_rgb565,
|
||||
|
@ -2554,6 +2589,10 @@ void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
|
|||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToYUV24Row_Any_AVX2(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width);
|
||||
void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf,
|
||||
const uint8_t* uv_buf,
|
||||
uint8_t* dst_ptr,
|
||||
|
@ -3027,6 +3066,10 @@ void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf,
|
|||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToYUV24Row_Any_NEON(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width);
|
||||
void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf,
|
||||
const uint8_t* uv_buf,
|
||||
uint8_t* dst_ptr,
|
||||
|
@ -3344,6 +3387,40 @@ void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr,
|
|||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||
void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||
void SwapUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||
void SwapUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||
void SwapUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
||||
void AYUVToUVRow_C(const uint8_t* src_ayuv,
|
||||
int stride_ayuv,
|
||||
uint8_t* dst_uv,
|
||||
int width);
|
||||
void AYUVToVURow_C(const uint8_t* src_ayuv,
|
||||
int stride_ayuv,
|
||||
uint8_t* dst_vu,
|
||||
int width);
|
||||
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
||||
void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
|
||||
int stride_ayuv,
|
||||
uint8_t* dst_uv,
|
||||
int width);
|
||||
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
||||
int stride_ayuv,
|
||||
uint8_t* dst_vu,
|
||||
int width);
|
||||
void AYUVToYRow_Any_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
||||
void AYUVToUVRow_Any_NEON(const uint8_t* src_ayuv,
|
||||
int stride_ayuv,
|
||||
uint8_t* dst_uv,
|
||||
int width);
|
||||
void AYUVToVURow_Any_NEON(const uint8_t* src_ayuv,
|
||||
int stride_ayuv,
|
||||
uint8_t* dst_vu,
|
||||
int width);
|
||||
|
||||
void I422ToYUY2Row_C(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
|
|
|
@ -97,6 +97,54 @@ int I420Scale_16(const uint16_t* src_y,
|
|||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Scales a YUV 4:4:4 image from the src width and height to the
|
||||
// dst width and height.
|
||||
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
|
||||
// used. This produces basic (blocky) quality at the fastest speed.
|
||||
// If filtering is kFilterBilinear, interpolation is used to produce a better
|
||||
// quality image, at the expense of speed.
|
||||
// If filtering is kFilterBox, averaging is used to produce ever better
|
||||
// quality image, at further expense of speed.
|
||||
// Returns 0 if successful.
|
||||
|
||||
LIBYUV_API
|
||||
int I444Scale(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
LIBYUV_API
|
||||
int I444Scale_16(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
#ifdef __cplusplus
|
||||
// Legacy API. Deprecated.
|
||||
LIBYUV_API
|
||||
|
|
|
@ -11,6 +11,6 @@
|
|||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1724
|
||||
#define LIBYUV_VERSION 1735
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
set noparent
|
||||
agable@chromium.org
|
||||
phoglund@chromium.org
|
|
@ -1,15 +0,0 @@
|
|||
# Copyright 2018 The LibYuv Project Authors. All rights reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
|
||||
def CheckChangeOnUpload(input_api, output_api):
|
||||
return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api)
|
||||
|
||||
|
||||
def CheckChangeOnCommit(input_api, output_api):
|
||||
return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api)
|
|
@ -1 +0,0 @@
|
|||
This directory contains configuration files for infra services.
|
|
@ -1,50 +0,0 @@
|
|||
# Commit Queue configuration file. The documentation of the format can be found
|
||||
# at http://luci-config.appspot.com/schemas/projects/refs:cq.cfg.
|
||||
|
||||
version: 1
|
||||
cq_status_url: "https://chromium-cq-status.appspot.com"
|
||||
git_repo_url: "https://chromium.googlesource.com/libyuv/libyuv.git"
|
||||
|
||||
gerrit {}
|
||||
|
||||
verifiers {
|
||||
gerrit_cq_ability {
|
||||
committer_list: "project-libyuv-committers"
|
||||
dry_run_access_list: "project-libyuv-tryjob-access"
|
||||
}
|
||||
|
||||
try_job {
|
||||
buckets {
|
||||
name: "luci.libyuv.try"
|
||||
builders { name: "win" }
|
||||
builders { name: "win_rel" }
|
||||
builders { name: "win_x64_rel" }
|
||||
builders { name: "win_clang" }
|
||||
builders { name: "win_clang_rel" }
|
||||
builders { name: "win_x64_clang_rel" }
|
||||
builders { name: "mac" }
|
||||
builders { name: "mac_rel" }
|
||||
builders { name: "mac_asan" }
|
||||
builders { name: "ios" }
|
||||
builders { name: "ios_rel" }
|
||||
builders { name: "ios_arm64" }
|
||||
builders { name: "ios_arm64_rel" }
|
||||
builders { name: "linux" }
|
||||
builders { name: "linux_rel" }
|
||||
builders {
|
||||
name: "linux_gcc"
|
||||
experiment_percentage: 100
|
||||
}
|
||||
builders { name: "linux_tsan2" }
|
||||
builders { name: "linux_asan" }
|
||||
builders { name: "linux_msan" }
|
||||
builders { name: "linux_ubsan" }
|
||||
builders { name: "linux_ubsan_vptr" }
|
||||
builders { name: "android" }
|
||||
builders { name: "android_rel" }
|
||||
builders { name: "android_arm64" }
|
||||
builders { name: "android_x86" }
|
||||
builders { name: "android_x64" }
|
||||
}
|
||||
}
|
||||
}
|
|
@ -69,13 +69,13 @@ static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) {
|
|||
if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
|
||||
return FOURCC_BGRA;
|
||||
}
|
||||
if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
|
||||
if (argb[3] != 255) { // Fourth byte is not Alpha of 255, so not BGRA.
|
||||
return FOURCC_ARGB;
|
||||
}
|
||||
if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255.
|
||||
return FOURCC_BGRA;
|
||||
}
|
||||
if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255.
|
||||
if (argb[7] != 255) { // Second pixel fourth byte is not Alpha of 255.
|
||||
return FOURCC_ARGB;
|
||||
}
|
||||
argb += 8;
|
||||
|
|
|
@ -880,6 +880,144 @@ int UYVYToI420(const uint8_t* src_uyvy,
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Convert AYUV to NV12.
|
||||
LIBYUV_API
|
||||
int AYUVToNV12(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*AYUVToUVRow)(const uint8_t* src_ayuv, int src_stride_ayuv,
|
||||
uint8_t* dst_uv, int width) = AYUVToUVRow_C;
|
||||
void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) =
|
||||
AYUVToYRow_C;
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_ayuv = src_ayuv + (height - 1) * src_stride_ayuv;
|
||||
src_stride_ayuv = -src_stride_ayuv;
|
||||
}
|
||||
// place holders for future intel code
|
||||
#if defined(HAS_AYUVTOYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
AYUVToUVRow = AYUVToUVRow_Any_SSE2;
|
||||
AYUVToYRow = AYUVToYRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
AYUVToUVRow = AYUVToUVRow_SSE2;
|
||||
AYUVToYRow = AYUVToYRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_AYUVTOYROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
AYUVToUVRow = AYUVToUVRow_Any_AVX2;
|
||||
AYUVToYRow = AYUVToYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
AYUVToUVRow = AYUVToUVRow_AVX2;
|
||||
AYUVToYRow = AYUVToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_AYUVTOYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
AYUVToYRow = AYUVToYRow_Any_NEON;
|
||||
AYUVToUVRow = AYUVToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
AYUVToYRow = AYUVToYRow_NEON;
|
||||
AYUVToUVRow = AYUVToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
AYUVToUVRow(src_ayuv, src_stride_ayuv, dst_uv, width);
|
||||
AYUVToYRow(src_ayuv, dst_y, width);
|
||||
AYUVToYRow(src_ayuv + src_stride_ayuv, dst_y + dst_stride_y, width);
|
||||
src_ayuv += src_stride_ayuv * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_uv += dst_stride_uv;
|
||||
}
|
||||
if (height & 1) {
|
||||
AYUVToUVRow(src_ayuv, 0, dst_uv, width);
|
||||
AYUVToYRow(src_ayuv, dst_y, width);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert AYUV to NV21.
|
||||
LIBYUV_API
|
||||
int AYUVToNV21(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*AYUVToVURow)(const uint8_t* src_ayuv, int src_stride_ayuv,
|
||||
uint8_t* dst_vu, int width) = AYUVToVURow_C;
|
||||
void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) =
|
||||
AYUVToYRow_C;
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_ayuv = src_ayuv + (height - 1) * src_stride_ayuv;
|
||||
src_stride_ayuv = -src_stride_ayuv;
|
||||
}
|
||||
// place holders for future intel code
|
||||
#if defined(HAS_AYUVTOYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
AYUVToVURow = AYUVToVURow_Any_SSE2;
|
||||
AYUVToYRow = AYUVToYRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
AYUVToVURow = AYUVToVURow_SSE2;
|
||||
AYUVToYRow = AYUVToYRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_AYUVTOYROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
AYUVToVURow = AYUVToVURow_Any_AVX2;
|
||||
AYUVToYRow = AYUVToYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
AYUVToVURow = AYUVToVURow_AVX2;
|
||||
AYUVToYRow = AYUVToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_AYUVTOYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
AYUVToYRow = AYUVToYRow_Any_NEON;
|
||||
AYUVToVURow = AYUVToVURow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
AYUVToYRow = AYUVToYRow_NEON;
|
||||
AYUVToVURow = AYUVToVURow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
AYUVToVURow(src_ayuv, src_stride_ayuv, dst_vu, width);
|
||||
AYUVToYRow(src_ayuv, dst_y, width);
|
||||
AYUVToYRow(src_ayuv + src_stride_ayuv, dst_y + dst_stride_y, width);
|
||||
src_ayuv += src_stride_ayuv * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_vu += dst_stride_vu;
|
||||
}
|
||||
if (height & 1) {
|
||||
AYUVToVURow(src_ayuv, 0, dst_vu, width);
|
||||
AYUVToYRow(src_ayuv, dst_y, width);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert ARGB to I420.
|
||||
LIBYUV_API
|
||||
int ARGBToI420(const uint8_t* src_argb,
|
||||
|
@ -1446,6 +1584,155 @@ int RGB24ToI420(const uint8_t* src_rgb24,
|
|||
return 0;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Use Matrix version to implement I420 and J420.
|
||||
// Convert RGB24 to J420.
|
||||
LIBYUV_API
|
||||
int RGB24ToJ420(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||
RGB24ToUVJRow_C;
|
||||
void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) =
|
||||
RGB24ToYJRow_C;
|
||||
#else
|
||||
void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
||||
RGB24ToARGBRow_C;
|
||||
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||
ARGBToUVJRow_C;
|
||||
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
||||
ARGBToYJRow_C;
|
||||
#endif
|
||||
if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
|
||||
src_stride_rgb24 = -src_stride_rgb24;
|
||||
}
|
||||
|
||||
// Neon version does direct RGB24 to YUV.
|
||||
#if defined(HAS_RGB24TOYJROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
RGB24ToUVJRow = RGB24ToUVJRow_Any_NEON;
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToUVJRow = RGB24ToUVJRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RGB24TOYJROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
RGB24ToUVJRow = RGB24ToUVJRow_Any_MSA;
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_MSA;
|
||||
RGB24ToUVJRow = RGB24ToUVJRow_MSA;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RGB24TOYJROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
RGB24ToUVJRow = RGB24ToUVJRow_Any_MMI;
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_MMI;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_MMI;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToUVJRow = RGB24ToUVJRow_MMI;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Other platforms do intermediate conversion from RGB24 to ARGB.
|
||||
#else
|
||||
#if defined(HAS_RGB24TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
|
||||
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
|
||||
ARGBToYJRow = ARGBToYJRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
|
||||
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToUVJRow = ARGBToUVJRow_AVX2;
|
||||
ARGBToYJRow = ARGBToYJRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
{
|
||||
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 31) & ~31;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
|
||||
RGB24ToYJRow(src_rgb24, dst_y, width);
|
||||
RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
|
||||
#else
|
||||
RGB24ToARGBRow(src_rgb24, row, width);
|
||||
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
|
||||
ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width);
|
||||
ARGBToYJRow(row, dst_y, width);
|
||||
ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
|
||||
#endif
|
||||
src_rgb24 += src_stride_rgb24 * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width);
|
||||
RGB24ToYJRow(src_rgb24, dst_y, width);
|
||||
#else
|
||||
RGB24ToARGBRow(src_rgb24, row, width);
|
||||
ARGBToUVJRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYJRow(row, dst_y, width);
|
||||
#endif
|
||||
}
|
||||
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
free_aligned_buffer_64(row);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert RAW to I420.
|
||||
LIBYUV_API
|
||||
int RAWToI420(const uint8_t* src_raw,
|
||||
|
@ -2082,6 +2369,124 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Convert RGB24 to J400.
|
||||
LIBYUV_API
|
||||
int RGB24ToJ400(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) =
|
||||
RGB24ToYJRow_C;
|
||||
#else
|
||||
void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
||||
RGB24ToARGBRow_C;
|
||||
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
|
||||
ARGBToYJRow_C;
|
||||
#endif
|
||||
if (!src_rgb24 || !dst_yj || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
|
||||
src_stride_rgb24 = -src_stride_rgb24;
|
||||
}
|
||||
|
||||
// Neon version does direct RGB24 to YUV.
|
||||
#if defined(HAS_RGB24TOYJROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_NEON;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RGB24TOYJROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_MSA;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RGB24TOYJROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_MMI;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_MMI;
|
||||
}
|
||||
}
|
||||
// Other platforms do intermediate conversion from RGB24 to ARGB.
|
||||
#else
|
||||
#if defined(HAS_RGB24TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYJRow = ARGBToYJRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToYJRow = ARGBToYJRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
{
|
||||
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 31) & ~31;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
RGB24ToYJRow(src_rgb24, dst_yj, width);
|
||||
RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_yj + dst_stride_yj, width);
|
||||
#else
|
||||
RGB24ToARGBRow(src_rgb24, row, width);
|
||||
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
|
||||
ARGBToYJRow(row, dst_yj, width);
|
||||
ARGBToYJRow(row + kRowSize, dst_yj + dst_stride_yj, width);
|
||||
#endif
|
||||
src_rgb24 += src_stride_rgb24 * 2;
|
||||
dst_yj += dst_stride_yj * 2;
|
||||
}
|
||||
if (height & 1) {
|
||||
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
RGB24ToYJRow(src_rgb24, dst_yj, width);
|
||||
#else
|
||||
RGB24ToARGBRow(src_rgb24, row, width);
|
||||
ARGBToYJRow(row, dst_yj, width);
|
||||
#endif
|
||||
}
|
||||
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
free_aligned_buffer_64(row);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void SplitPixels(const uint8_t* src_u,
|
||||
int src_pixel_stride_uv,
|
||||
uint8_t* dst_u,
|
||||
|
|
|
@ -1793,8 +1793,9 @@ int NV21ToARGB(const uint8_t* src_y,
|
|||
}
|
||||
|
||||
// Convert NV12 to ABGR.
|
||||
// To output ABGR instead of ARGB swap the UV and use a mirrrored yuc matrix.
|
||||
// To output ABGR instead of ARGB swap the UV and use a mirrored yuv matrix.
|
||||
// To swap the UV use NV12 instead of NV21.LIBYUV_API
|
||||
LIBYUV_API
|
||||
int NV12ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
|
@ -1998,6 +1999,54 @@ int NV21ToRAW(const uint8_t* src_y,
|
|||
dst_stride_raw, &kYvuI601Constants, width, height);
|
||||
}
|
||||
|
||||
// Convert NV21 to YUV24
|
||||
int NV21ToYUV24(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int dst_stride_yuv24,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*NV21ToYUV24Row)(const uint8_t* src_y, const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24, int width) = NV21ToYUV24Row_C;
|
||||
if (!src_y || !src_vu || !dst_yuv24 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_yuv24 = dst_yuv24 + (height - 1) * dst_stride_yuv24;
|
||||
dst_stride_yuv24 = -dst_stride_yuv24;
|
||||
}
|
||||
#if defined(HAS_NV21TOYUV24ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
NV21ToYUV24Row = NV21ToYUV24Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
NV21ToYUV24Row = NV21ToYUV24Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV21TOYUV24ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
NV21ToYUV24Row = NV21ToYUV24Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
NV21ToYUV24Row = NV21ToYUV24Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV21ToYUV24Row(src_y, src_vu, dst_yuv24, width);
|
||||
dst_yuv24 += dst_stride_yuv24;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_vu += src_stride_vu;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert M420 to ARGB.
|
||||
LIBYUV_API
|
||||
int M420ToARGB(const uint8_t* src_m420,
|
||||
|
|
|
@ -572,6 +572,326 @@ int ARGBToNV21(const uint8_t* src_argb,
|
|||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int ABGRToNV12(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr,
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||
ABGRToUVRow_C;
|
||||
void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) =
|
||||
ABGRToYRow_C;
|
||||
void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
|
||||
uint8_t* dst_uv, int width) = MergeUVRow_C;
|
||||
if (!src_abgr || !dst_y || !dst_uv || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
|
||||
src_stride_abgr = -src_stride_abgr;
|
||||
}
|
||||
#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
|
||||
ABGRToYRow = ABGRToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToUVRow = ABGRToUVRow_SSSE3;
|
||||
ABGRToYRow = ABGRToYRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_AVX2) && defined(HAS_ABGRTOUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_AVX2;
|
||||
ABGRToYRow = ABGRToYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToUVRow = ABGRToUVRow_AVX2;
|
||||
ABGRToYRow = ABGRToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ABGRToYRow = ABGRToYRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToUVRow = ABGRToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToYRow = ABGRToYRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOUVROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToUVRow = ABGRToUVRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_MMI;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ABGRToYRow = ABGRToYRow_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOUVROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_MMI;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToUVRow = ABGRToUVRow_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_SSE2;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
MergeUVRow_ = MergeUVRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_AVX2;
|
||||
if (IS_ALIGNED(halfwidth, 32)) {
|
||||
MergeUVRow_ = MergeUVRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
MergeUVRow_ = MergeUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_MSA;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
MergeUVRow_ = MergeUVRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_MMI;
|
||||
if (IS_ALIGNED(halfwidth, 8)) {
|
||||
MergeUVRow_ = MergeUVRow_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
{
|
||||
// Allocate a rows of uv.
|
||||
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
|
||||
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width);
|
||||
MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
|
||||
ABGRToYRow(src_abgr, dst_y, width);
|
||||
ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width);
|
||||
src_abgr += src_stride_abgr * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_uv += dst_stride_uv;
|
||||
}
|
||||
if (height & 1) {
|
||||
ABGRToUVRow(src_abgr, 0, row_u, row_v, width);
|
||||
MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
|
||||
ABGRToYRow(src_abgr, dst_y, width);
|
||||
}
|
||||
free_aligned_buffer_64(row_u);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Same as NV12 but U and V swapped.
|
||||
LIBYUV_API
|
||||
int ABGRToNV21(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr,
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||
ABGRToUVRow_C;
|
||||
void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) =
|
||||
ABGRToYRow_C;
|
||||
void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
|
||||
uint8_t* dst_vu, int width) = MergeUVRow_C;
|
||||
if (!src_abgr || !dst_y || !dst_vu || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
|
||||
src_stride_abgr = -src_stride_abgr;
|
||||
}
|
||||
#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
|
||||
ABGRToYRow = ABGRToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToUVRow = ABGRToUVRow_SSSE3;
|
||||
ABGRToYRow = ABGRToYRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_AVX2) && defined(HAS_ABGRTOUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_AVX2;
|
||||
ABGRToYRow = ABGRToYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToUVRow = ABGRToUVRow_AVX2;
|
||||
ABGRToYRow = ABGRToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ABGRToYRow = ABGRToYRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToUVRow = ABGRToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToYRow = ABGRToYRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOUVROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ABGRToUVRow = ABGRToUVRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOYROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
ABGRToYRow = ABGRToYRow_Any_MMI;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ABGRToYRow = ABGRToYRow_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ABGRTOUVROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
ABGRToUVRow = ABGRToUVRow_Any_MMI;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ABGRToUVRow = ABGRToUVRow_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_MERGEUVROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_SSE2;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
MergeUVRow_ = MergeUVRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_AVX2;
|
||||
if (IS_ALIGNED(halfwidth, 32)) {
|
||||
MergeUVRow_ = MergeUVRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
MergeUVRow_ = MergeUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_MSA;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
MergeUVRow_ = MergeUVRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
MergeUVRow_ = MergeUVRow_Any_MMI;
|
||||
if (IS_ALIGNED(halfwidth, 8)) {
|
||||
MergeUVRow_ = MergeUVRow_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
{
|
||||
// Allocate a rows of uv.
|
||||
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
|
||||
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width);
|
||||
MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
|
||||
ABGRToYRow(src_abgr, dst_y, width);
|
||||
ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width);
|
||||
src_abgr += src_stride_abgr * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_vu += dst_stride_vu;
|
||||
}
|
||||
if (height & 1) {
|
||||
ABGRToUVRow(src_abgr, 0, row_u, row_v, width);
|
||||
MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
|
||||
ABGRToYRow(src_abgr, dst_y, width);
|
||||
}
|
||||
free_aligned_buffer_64(row_u);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert ARGB to YUY2.
|
||||
LIBYUV_API
|
||||
int ARGBToYUY2(const uint8_t* src_argb,
|
||||
|
|
|
@ -25,7 +25,8 @@
|
|||
#endif
|
||||
|
||||
#endif
|
||||
struct FILE; // For jpeglib.h.
|
||||
|
||||
#include <stdio.h> // For jpeglib.h.
|
||||
|
||||
// C++ build requires extern C for jpeg internals.
|
||||
#ifdef __cplusplus
|
||||
|
@ -427,7 +428,15 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
|
|||
}
|
||||
|
||||
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
|
||||
cinfo->src->next_input_byte += num_bytes;
|
||||
jpeg_source_mgr* src = cinfo->src;
|
||||
size_t bytes = static_cast<size_t>(num_bytes);
|
||||
if (bytes > src->bytes_in_buffer) {
|
||||
src->next_input_byte = nullptr;
|
||||
src->bytes_in_buffer = 0;
|
||||
} else {
|
||||
src->next_input_byte += bytes;
|
||||
src->bytes_in_buffer -= bytes;
|
||||
}
|
||||
}
|
||||
|
||||
void term_source(j_decompress_ptr cinfo) {
|
||||
|
|
|
@ -47,7 +47,8 @@ LIBYUV_BOOL ValidateJpeg(const uint8_t* src_mjpg, size_t src_size_mjpg) {
|
|||
// ERROR: Invalid jpeg size: src_size_mjpg
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
if (src_mjpg[0] != 0xff || src_mjpg[1] != 0xd8) { // SOI marker
|
||||
// SOI marker
|
||||
if (src_mjpg[0] != 0xff || src_mjpg[1] != 0xd8 || src_mjpg[2] != 0xff) {
|
||||
// ERROR: Invalid jpeg initial start code
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
|
|
|
@ -440,7 +440,6 @@ void MergeUVPlane(const uint8_t* src_u,
|
|||
int y;
|
||||
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
|
||||
uint8_t* dst_uv, int width) = MergeUVRow_C;
|
||||
// Coalesce rows.
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
|
@ -504,6 +503,87 @@ void MergeUVPlane(const uint8_t* src_u,
|
|||
}
|
||||
}
|
||||
|
||||
// Swap U and V channels in interleaved UV plane.
|
||||
LIBYUV_API
|
||||
void SwapUVPlane(const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
|
||||
SwapUVRow_C;
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_uv = src_uv + (height - 1) * src_stride_uv;
|
||||
src_stride_uv = -src_stride_uv;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_uv == width * 2 && dst_stride_vu == width * 2) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_uv = dst_stride_vu = 0;
|
||||
}
|
||||
|
||||
#if defined(HAS_SWAPUVROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
SwapUVRow = SwapUVRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
SwapUVRow = SwapUVRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SWAPUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
SwapUVRow = SwapUVRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
SwapUVRow = SwapUVRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SWAPUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
SwapUVRow = SwapUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
SwapUVRow = SwapUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
SwapUVRow(src_uv, dst_vu, width);
|
||||
src_uv += src_stride_uv;
|
||||
dst_vu += dst_stride_vu;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert NV21 to NV12.
|
||||
LIBYUV_API
|
||||
int NV21ToNV12(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (!src_vu || !dst_uv || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
if (dst_y) {
|
||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
}
|
||||
SwapUVPlane(src_vu, src_stride_vu, dst_uv, dst_stride_uv, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Support function for NV12 etc RGB channels.
|
||||
// Width and height are plane sizes (typically half pixel width).
|
||||
LIBYUV_API
|
||||
|
|
|
@ -481,6 +481,66 @@ int I420Rotate(const uint8_t* src_y,
|
|||
return -1;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I444Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum libyuv::RotationMode mode) {
|
||||
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
|
||||
!dst_u || !dst_v) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (height - 1) * src_stride_u;
|
||||
src_v = src_v + (height - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case libyuv::kRotate0:
|
||||
// copy frame
|
||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
|
||||
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
|
||||
return 0;
|
||||
case libyuv::kRotate90:
|
||||
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
|
||||
RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
|
||||
return 0;
|
||||
case libyuv::kRotate270:
|
||||
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
|
||||
RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
|
||||
return 0;
|
||||
case libyuv::kRotate180:
|
||||
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
|
||||
RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int NV12ToI420Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
|
|
|
@ -286,7 +286,12 @@ ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15)
|
|||
#ifdef HAS_MERGEUVROW_MMI
|
||||
ANY21(MergeUVRow_Any_MMI, MergeUVRow_MMI, 0, 1, 1, 2, 7)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_NV21TOYUV24ROW_NEON
|
||||
ANY21(NV21ToYUV24Row_Any_NEON, NV21ToYUV24Row_NEON, 1, 1, 2, 3, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV21TOYUV24ROW_AVX2
|
||||
ANY21(NV21ToYUV24Row_Any_AVX2, NV21ToYUV24Row_AVX2, 1, 1, 2, 3, 31)
|
||||
#endif
|
||||
// Math functions.
|
||||
#ifdef HAS_ARGBMULTIPLYROW_SSE2
|
||||
ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
|
||||
|
@ -575,6 +580,9 @@ ANY11(RAWToRGB24Row_Any_MMI, RAWToRGB24Row_MMI, 0, 3, 3, 3)
|
|||
#ifdef HAS_ARGBTOYROW_AVX2
|
||||
ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOYROW_AVX2
|
||||
ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOYJROW_AVX2
|
||||
ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
|
||||
#endif
|
||||
|
@ -702,6 +710,18 @@ ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
|
|||
#ifdef HAS_UYVYTOYROW_MMI
|
||||
ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_AYUVTOYROW_NEON
|
||||
ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_SWAPUVROW_SSSE3
|
||||
ANY11(SwapUVRow_Any_SSSE3, SwapUVRow_SSSE3, 0, 2, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_SWAPUVROW_AVX2
|
||||
ANY11(SwapUVRow_Any_AVX2, SwapUVRow_AVX2, 0, 2, 2, 31)
|
||||
#endif
|
||||
#ifdef HAS_SWAPUVROW_NEON
|
||||
ANY11(SwapUVRow_Any_NEON, SwapUVRow_NEON, 0, 2, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOARGBROW_NEON
|
||||
ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
|
||||
#endif
|
||||
|
@ -1256,6 +1276,9 @@ ANY13(SplitRGBRow_Any_MMI, SplitRGBRow_MMI, 3, 3)
|
|||
#ifdef HAS_ARGBTOUVROW_AVX2
|
||||
ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
|
||||
#endif
|
||||
#ifdef HAS_ABGRTOUVROW_AVX2
|
||||
ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUVJROW_AVX2
|
||||
ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
|
||||
#endif
|
||||
|
@ -1381,6 +1404,37 @@ ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15)
|
|||
#endif
|
||||
#undef ANY12S
|
||||
|
||||
// Any 1 to 1 with source stride (2 rows of source). Outputs UV plane.
|
||||
// 128 byte row allows for 32 avx ARGB pixels.
|
||||
#define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_vu, \
|
||||
int width) { \
|
||||
SIMD_ALIGNED(uint8_t temp[128 * 3]); \
|
||||
memset(temp, 0, 128 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, src_stride_ptr, dst_vu, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
|
||||
SS(r, UVSHIFT) * BPP); \
|
||||
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
|
||||
BPP); \
|
||||
memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
|
||||
temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
} \
|
||||
ANY_SIMD(temp, 128, temp + 256, MASK + 1); \
|
||||
memcpy(dst_vu + (n >> 1) * 2, temp + 256, SS(r, 1) * 2); \
|
||||
}
|
||||
|
||||
#ifdef HAS_AYUVTOVUROW_NEON
|
||||
ANY11S(AYUVToUVRow_Any_NEON, AYUVToUVRow_NEON, 0, 4, 15)
|
||||
ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
|
||||
#endif
|
||||
#undef ANY11S
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
|
|
@ -3231,6 +3231,107 @@ void GaussCol_C(const uint16_t* src0,
|
|||
}
|
||||
}
|
||||
|
||||
// Convert biplanar NV21 to packed YUV24
|
||||
void NV21ToYUV24Row_C(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
dst_yuv24[0] = src_vu[0]; // V
|
||||
dst_yuv24[1] = src_vu[1]; // U
|
||||
dst_yuv24[2] = src_y[0]; // Y0
|
||||
dst_yuv24[3] = src_vu[0]; // V
|
||||
dst_yuv24[4] = src_vu[1]; // U
|
||||
dst_yuv24[5] = src_y[1]; // Y1
|
||||
src_y += 2;
|
||||
src_vu += 2;
|
||||
dst_yuv24 += 6; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
dst_yuv24[0] = src_vu[0]; // V
|
||||
dst_yuv24[1] = src_vu[1]; // U
|
||||
dst_yuv24[2] = src_y[0]; // Y0
|
||||
}
|
||||
}
|
||||
|
||||
// Filter 2 rows of AYUV UV's (444) into UV (420).
|
||||
void AYUVToUVRow_C(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_uv,
|
||||
int width) {
|
||||
// Output a row of UV values, filtering 2x2 rows of AYUV.
|
||||
int x;
|
||||
for (x = 0; x < width; x += 2) {
|
||||
dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
|
||||
src_ayuv[src_stride_ayuv + 5] + 2) >>
|
||||
2;
|
||||
dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
|
||||
src_ayuv[src_stride_ayuv + 4] + 2) >>
|
||||
2;
|
||||
src_ayuv += 8;
|
||||
dst_uv += 2;
|
||||
}
|
||||
if (width & 1) {
|
||||
dst_uv[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
|
||||
src_ayuv[src_stride_ayuv + 0] + 2) >>
|
||||
2;
|
||||
dst_uv[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
|
||||
src_ayuv[src_stride_ayuv + 1] + 2) >>
|
||||
2;
|
||||
}
|
||||
}
|
||||
|
||||
// Filter 2 rows of AYUV UV's (444) into VU (420).
|
||||
void AYUVToVURow_C(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_vu,
|
||||
int width) {
|
||||
// Output a row of VU values, filtering 2x2 rows of AYUV.
|
||||
int x;
|
||||
for (x = 0; x < width; x += 2) {
|
||||
dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
|
||||
src_ayuv[src_stride_ayuv + 4] + 2) >>
|
||||
2;
|
||||
dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
|
||||
src_ayuv[src_stride_ayuv + 5] + 2) >>
|
||||
2;
|
||||
src_ayuv += 8;
|
||||
dst_vu += 2;
|
||||
}
|
||||
if (width & 1) {
|
||||
dst_vu[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
|
||||
src_ayuv[src_stride_ayuv + 0] + 2) >>
|
||||
2;
|
||||
dst_vu[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
|
||||
src_ayuv[src_stride_ayuv + 1] + 2) >>
|
||||
2;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy row of AYUV Y's into Y
|
||||
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
|
||||
// Output a row of Y values.
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_y[x] = src_ayuv[2]; // v,u,y,a
|
||||
src_ayuv += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert UV plane of NV12 to VU of NV21.
|
||||
void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
uint8_t u = src_uv[0];
|
||||
uint8_t v = src_uv[1];
|
||||
dst_vu[0] = v;
|
||||
dst_vu[1] = u;
|
||||
src_uv += 2;
|
||||
dst_vu += 2;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
|
|
@ -1154,6 +1154,48 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
|||
}
|
||||
#endif // HAS_ARGBTOYROW_AVX2
|
||||
|
||||
#ifdef HAS_ABGRTOYROW_AVX2
|
||||
// Convert 32 ABGR pixels (128 bytes) to 32 Y values.
|
||||
void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
|
||||
asm volatile(
|
||||
"vbroadcastf128 %3,%%ymm4 \n"
|
||||
"vbroadcastf128 %4,%%ymm5 \n"
|
||||
"vmovdqu %5,%%ymm6 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"vmovdqu 0x20(%0),%%ymm1 \n"
|
||||
"vmovdqu 0x40(%0),%%ymm2 \n"
|
||||
"vmovdqu 0x60(%0),%%ymm3 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
|
||||
"lea 0x80(%0),%0 \n"
|
||||
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
|
||||
"vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
|
||||
"vpsrlw $0x7,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $0x7,%%ymm2,%%ymm2 \n"
|
||||
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
|
||||
"vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
|
||||
"vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y
|
||||
"vmovdqu %%ymm0,(%1) \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_abgr), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kABGRToY), // %3
|
||||
"m"(kAddY16), // %4
|
||||
"m"(kPermdARGBToY_AVX) // %5
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
||||
}
|
||||
#endif // HAS_ABGRTOYROW_AVX2
|
||||
|
||||
|
||||
#ifdef HAS_ARGBTOYJROW_AVX2
|
||||
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
|
||||
void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
|
@ -1328,6 +1370,69 @@ void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
|
|||
}
|
||||
#endif // HAS_ARGBTOUVROW_AVX2
|
||||
|
||||
#ifdef HAS_ABGRTOUVROW_AVX2
|
||||
void ABGRToUVRow_AVX2(const uint8_t* src_abgr0,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"vbroadcastf128 %5,%%ymm5 \n"
|
||||
"vbroadcastf128 %6,%%ymm6 \n"
|
||||
"vbroadcastf128 %7,%%ymm7 \n"
|
||||
"sub %1,%2 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"vmovdqu 0x20(%0),%%ymm1 \n"
|
||||
"vmovdqu 0x40(%0),%%ymm2 \n"
|
||||
"vmovdqu 0x60(%0),%%ymm3 \n"
|
||||
"vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
|
||||
"vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
|
||||
"vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n"
|
||||
"vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n"
|
||||
"lea 0x80(%0),%0 \n"
|
||||
"vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
|
||||
"vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
|
||||
"vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
|
||||
"vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
|
||||
"vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
|
||||
"vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
|
||||
"vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
|
||||
"vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
|
||||
"vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n"
|
||||
"vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||
"vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vpsraw $0x8,%%ymm1,%%ymm1 \n"
|
||||
"vpsraw $0x8,%%ymm0,%%ymm0 \n"
|
||||
"vpacksswb %%ymm0,%%ymm1,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vpshufb %8,%%ymm0,%%ymm0 \n"
|
||||
"vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
|
||||
|
||||
"vextractf128 $0x0,%%ymm0,(%1) \n"
|
||||
"vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x20,%3 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_abgr0), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+rm"(width) // %3
|
||||
: "r"((intptr_t)(src_stride_abgr)), // %4
|
||||
"m"(kAddUV128), // %5
|
||||
"m"(kABGRToV), // %6
|
||||
"m"(kABGRToU), // %7
|
||||
"m"(kShufARGBToUV_AVX) // %8
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
}
|
||||
#endif // HAS_ABGRTOUVROW_AVX2
|
||||
|
||||
#ifdef HAS_ARGBTOUVJROW_AVX2
|
||||
void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
|
||||
int src_stride_argb,
|
||||
|
@ -5238,7 +5343,7 @@ void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
|
|||
,
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
#endif
|
||||
);
|
||||
);
|
||||
}
|
||||
#endif // HAS_ARGBMULTIPLYROW_AVX2
|
||||
|
||||
|
@ -6120,24 +6225,24 @@ void I422ToYUY2Row_SSE2(const uint8_t* src_y,
|
|||
int width) {
|
||||
asm volatile(
|
||||
|
||||
"sub %1,%2 \n"
|
||||
"sub %1,%2 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movq (%1),%%xmm2 \n"
|
||||
"movq 0x00(%1,%2,1),%%xmm1 \n"
|
||||
"add $0x8,%1 \n"
|
||||
"punpcklbw %%xmm1,%%xmm2 \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"add $0x10,%0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklbw %%xmm2,%%xmm0 \n"
|
||||
"punpckhbw %%xmm2,%%xmm1 \n"
|
||||
"movdqu %%xmm0,(%3) \n"
|
||||
"movdqu %%xmm1,0x10(%3) \n"
|
||||
"lea 0x20(%3),%3 \n"
|
||||
"sub $0x10,%4 \n"
|
||||
"jg 1b \n"
|
||||
"1: \n"
|
||||
"movq (%1),%%xmm2 \n"
|
||||
"movq 0x00(%1,%2,1),%%xmm1 \n"
|
||||
"add $0x8,%1 \n"
|
||||
"punpcklbw %%xmm1,%%xmm2 \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"add $0x10,%0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklbw %%xmm2,%%xmm0 \n"
|
||||
"punpckhbw %%xmm2,%%xmm1 \n"
|
||||
"movdqu %%xmm0,(%3) \n"
|
||||
"movdqu %%xmm1,0x10(%3) \n"
|
||||
"lea 0x20(%3),%3 \n"
|
||||
"sub $0x10,%4 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
|
@ -6156,24 +6261,24 @@ void I422ToUYVYRow_SSE2(const uint8_t* src_y,
|
|||
int width) {
|
||||
asm volatile(
|
||||
|
||||
"sub %1,%2 \n"
|
||||
"sub %1,%2 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movq (%1),%%xmm2 \n"
|
||||
"movq 0x00(%1,%2,1),%%xmm1 \n"
|
||||
"add $0x8,%1 \n"
|
||||
"punpcklbw %%xmm1,%%xmm2 \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqa %%xmm2,%%xmm1 \n"
|
||||
"add $0x10,%0 \n"
|
||||
"punpcklbw %%xmm0,%%xmm1 \n"
|
||||
"punpckhbw %%xmm0,%%xmm2 \n"
|
||||
"movdqu %%xmm1,(%3) \n"
|
||||
"movdqu %%xmm2,0x10(%3) \n"
|
||||
"lea 0x20(%3),%3 \n"
|
||||
"sub $0x10,%4 \n"
|
||||
"jg 1b \n"
|
||||
"1: \n"
|
||||
"movq (%1),%%xmm2 \n"
|
||||
"movq 0x00(%1,%2,1),%%xmm1 \n"
|
||||
"add $0x8,%1 \n"
|
||||
"punpcklbw %%xmm1,%%xmm2 \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqa %%xmm2,%%xmm1 \n"
|
||||
"add $0x10,%0 \n"
|
||||
"punpcklbw %%xmm0,%%xmm1 \n"
|
||||
"punpckhbw %%xmm0,%%xmm2 \n"
|
||||
"movdqu %%xmm1,(%3) \n"
|
||||
"movdqu %%xmm2,0x10(%3) \n"
|
||||
"lea 0x20(%3),%3 \n"
|
||||
"sub $0x10,%4 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
|
@ -6192,27 +6297,27 @@ void I422ToYUY2Row_AVX2(const uint8_t* src_y,
|
|||
int width) {
|
||||
asm volatile(
|
||||
|
||||
"sub %1,%2 \n"
|
||||
"sub %1,%2 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vpmovzxbw (%1),%%ymm1 \n"
|
||||
"vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
|
||||
"add $0x10,%1 \n"
|
||||
"vpsllw $0x8,%%ymm2,%%ymm2 \n"
|
||||
"vpor %%ymm1,%%ymm2,%%ymm2 \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"add $0x20,%0 \n"
|
||||
"vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n"
|
||||
"vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n"
|
||||
"vextractf128 $0x0,%%ymm1,(%3) \n"
|
||||
"vextractf128 $0x0,%%ymm2,0x10(%3) \n"
|
||||
"vextractf128 $0x1,%%ymm1,0x20(%3) \n"
|
||||
"vextractf128 $0x1,%%ymm2,0x30(%3) \n"
|
||||
"lea 0x40(%3),%3 \n"
|
||||
"sub $0x20,%4 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
"1: \n"
|
||||
"vpmovzxbw (%1),%%ymm1 \n"
|
||||
"vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
|
||||
"add $0x10,%1 \n"
|
||||
"vpsllw $0x8,%%ymm2,%%ymm2 \n"
|
||||
"vpor %%ymm1,%%ymm2,%%ymm2 \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"add $0x20,%0 \n"
|
||||
"vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n"
|
||||
"vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n"
|
||||
"vextractf128 $0x0,%%ymm1,(%3) \n"
|
||||
"vextractf128 $0x0,%%ymm2,0x10(%3) \n"
|
||||
"vextractf128 $0x1,%%ymm1,0x20(%3) \n"
|
||||
"vextractf128 $0x1,%%ymm2,0x30(%3) \n"
|
||||
"lea 0x40(%3),%3 \n"
|
||||
"sub $0x20,%4 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
|
@ -6231,27 +6336,27 @@ void I422ToUYVYRow_AVX2(const uint8_t* src_y,
|
|||
int width) {
|
||||
asm volatile(
|
||||
|
||||
"sub %1,%2 \n"
|
||||
"sub %1,%2 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vpmovzxbw (%1),%%ymm1 \n"
|
||||
"vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
|
||||
"add $0x10,%1 \n"
|
||||
"vpsllw $0x8,%%ymm2,%%ymm2 \n"
|
||||
"vpor %%ymm1,%%ymm2,%%ymm2 \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"add $0x20,%0 \n"
|
||||
"vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n"
|
||||
"vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n"
|
||||
"vextractf128 $0x0,%%ymm1,(%3) \n"
|
||||
"vextractf128 $0x0,%%ymm2,0x10(%3) \n"
|
||||
"vextractf128 $0x1,%%ymm1,0x20(%3) \n"
|
||||
"vextractf128 $0x1,%%ymm2,0x30(%3) \n"
|
||||
"lea 0x40(%3),%3 \n"
|
||||
"sub $0x20,%4 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
"1: \n"
|
||||
"vpmovzxbw (%1),%%ymm1 \n"
|
||||
"vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n"
|
||||
"add $0x10,%1 \n"
|
||||
"vpsllw $0x8,%%ymm2,%%ymm2 \n"
|
||||
"vpor %%ymm1,%%ymm2,%%ymm2 \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"add $0x20,%0 \n"
|
||||
"vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n"
|
||||
"vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n"
|
||||
"vextractf128 $0x0,%%ymm1,(%3) \n"
|
||||
"vextractf128 $0x0,%%ymm2,0x10(%3) \n"
|
||||
"vextractf128 $0x1,%%ymm1,0x20(%3) \n"
|
||||
"vextractf128 $0x1,%%ymm2,0x30(%3) \n"
|
||||
"lea 0x40(%3),%3 \n"
|
||||
"sub $0x20,%4 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_u), // %1
|
||||
"+r"(src_v), // %2
|
||||
|
@ -6669,6 +6774,186 @@ void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb,
|
|||
}
|
||||
#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
|
||||
|
||||
#ifdef HAS_NV21TOYUV24ROW_AVX2
|
||||
|
||||
// begin NV21ToYUV24Row_C avx2 constants
|
||||
static const ulvec8 kBLEND0 = {0x80, 0x00, 0x80, 0x80, 0x00, 0x80, 0x80, 0x00,
|
||||
0x80, 0x80, 0x00, 0x80, 0x80, 0x00, 0x80, 0x80,
|
||||
0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80,
|
||||
0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00};
|
||||
|
||||
static const ulvec8 kBLEND1 = {0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00,
|
||||
0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00,
|
||||
0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00,
|
||||
0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80};
|
||||
|
||||
static const ulvec8 kBLEND2 = {0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00,
|
||||
0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80,
|
||||
0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00,
|
||||
0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00};
|
||||
|
||||
static const ulvec8 kSHUF0 = {0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02, 0x0d,
|
||||
0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80, 0x05,
|
||||
0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02, 0x0d,
|
||||
0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80, 0x05};
|
||||
|
||||
static const ulvec8 kSHUF1 = {0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02,
|
||||
0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80,
|
||||
0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02,
|
||||
0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80};
|
||||
|
||||
static const ulvec8 kSHUF2 = {0x0a, 0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80,
|
||||
0x02, 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f,
|
||||
0x0a, 0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80,
|
||||
0x02, 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f};
|
||||
|
||||
static const ulvec8 kSHUF3 = {0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80, 0x80,
|
||||
0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a, 0x80,
|
||||
0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80, 0x80,
|
||||
0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a, 0x80};
|
||||
|
||||
static const ulvec8 kSHUF4 = {0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80,
|
||||
0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a,
|
||||
0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80,
|
||||
0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a};
|
||||
|
||||
static const ulvec8 kSHUF5 = {0x80, 0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07,
|
||||
0x80, 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80,
|
||||
0x80, 0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07,
|
||||
0x80, 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80};
|
||||
|
||||
// NV21ToYUV24Row_AVX2
|
||||
void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width) {
|
||||
uint8_t* src_y_ptr;
|
||||
uint64_t src_offset = 0;
|
||||
uint64_t width64;
|
||||
|
||||
width64 = width;
|
||||
src_y_ptr = (uint8_t*)src_y;
|
||||
|
||||
asm volatile(
|
||||
"vmovdqu %5, %%ymm0 \n" // init blend value
|
||||
"vmovdqu %6, %%ymm1 \n" // init blend value
|
||||
"vmovdqu %7, %%ymm2 \n" // init blend value
|
||||
// "sub $0x20, %3 \n" //sub 32 from width for final loop
|
||||
|
||||
LABELALIGN
|
||||
"1: \n" // label 1
|
||||
"vmovdqu (%0,%4), %%ymm3 \n" // src_y
|
||||
"vmovdqu 1(%1,%4), %%ymm4 \n" // src_uv+1
|
||||
"vmovdqu (%1), %%ymm5 \n" // src_uv
|
||||
"vpshufb %8, %%ymm3, %%ymm13 \n" // y, kSHUF0 for shuf
|
||||
"vpshufb %9, %%ymm4, %%ymm14 \n" // uv+1, kSHUF1 for
|
||||
// shuf
|
||||
"vpshufb %10, %%ymm5, %%ymm15 \n" // uv, kSHUF2 for
|
||||
// shuf
|
||||
"vpshufb %11, %%ymm3, %%ymm3 \n" // y kSHUF3 for shuf
|
||||
"vpshufb %12, %%ymm4, %%ymm4 \n" // uv+1 kSHUF4 for
|
||||
// shuf
|
||||
"vpblendvb %%ymm0, %%ymm14, %%ymm13, %%ymm12 \n" // blend 0
|
||||
"vpblendvb %%ymm0, %%ymm13, %%ymm14, %%ymm14 \n" // blend 0
|
||||
"vpblendvb %%ymm2, %%ymm15, %%ymm12, %%ymm12 \n" // blend 2
|
||||
"vpblendvb %%ymm1, %%ymm15, %%ymm14, %%ymm13 \n" // blend 1
|
||||
"vpshufb %13, %%ymm5, %%ymm15 \n" // shuffle const
|
||||
"vpor %%ymm4, %%ymm3, %%ymm5 \n" // get results
|
||||
"vmovdqu %%ymm12, 0x20(%2) \n" // store dst_yuv+20h
|
||||
"vpor %%ymm15, %%ymm5, %%ymm3 \n" // get results
|
||||
"add $0x20, %4 \n" // add to src buffer
|
||||
// ptr
|
||||
"vinserti128 $0x1, %%xmm3, %%ymm13, %%ymm4 \n" // insert
|
||||
"vperm2i128 $0x31, %%ymm13, %%ymm3, %%ymm5 \n" // insert
|
||||
"vmovdqu %%ymm4, (%2) \n" // store dst_yuv
|
||||
"vmovdqu %%ymm5, 0x40(%2) \n" // store dst_yuv+40h
|
||||
"add $0x60,%2 \n" // add to dst buffer
|
||||
// ptr
|
||||
// "cmp %3, %4 \n" //(width64 -
|
||||
// 32 bytes) and src_offset
|
||||
"sub $0x20,%3 \n" // 32 pixels per loop
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n" // sse-avx2
|
||||
// transistions
|
||||
|
||||
: "+r"(src_y), //%0
|
||||
"+r"(src_vu), //%1
|
||||
"+r"(dst_yuv24), //%2
|
||||
"+r"(width64), //%3
|
||||
"+r"(src_offset) //%4
|
||||
: "m"(kBLEND0), //%5
|
||||
"m"(kBLEND1), //%6
|
||||
"m"(kBLEND2), //%7
|
||||
"m"(kSHUF0), //%8
|
||||
"m"(kSHUF1), //%9
|
||||
"m"(kSHUF2), //%10
|
||||
"m"(kSHUF3), //%11
|
||||
"m"(kSHUF4), //%12
|
||||
"m"(kSHUF5) //%13
|
||||
: "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm12",
|
||||
"xmm13", "xmm14", "xmm15");
|
||||
}
|
||||
#endif // HAS_NV21TOYUV24ROW_AVX2
|
||||
|
||||
#ifdef HAS_SWAPUVROW_SSSE3
|
||||
|
||||
// Shuffle table for reversing the bytes.
|
||||
static const uvec8 kShuffleUVToVU = {1u, 0u, 3u, 2u, 5u, 4u, 7u, 6u,
|
||||
9u, 8u, 11u, 10u, 13u, 12u, 15u, 14u};
|
||||
|
||||
// Convert UV plane of NV12 to VU of NV21.
|
||||
void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
|
||||
asm volatile(
|
||||
|
||||
"movdqu %3,%%xmm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu 0x10(%0),%%xmm1 \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"pshufb %%xmm5,%%xmm0 \n"
|
||||
"pshufb %%xmm5,%%xmm1 \n"
|
||||
"movdqu %%xmm0,(%1) \n"
|
||||
"movdqu %%xmm1,0x10(%1) \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(dst_vu), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleUVToVU) // %3
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm5");
|
||||
}
|
||||
#endif // HAS_SWAPUVROW_SSSE3
|
||||
|
||||
#ifdef HAS_SWAPUVROW_AVX2
|
||||
void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
|
||||
asm volatile(
|
||||
|
||||
"vbroadcastf128 %3,%%ymm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"vmovdqu 0x20(%0),%%ymm1 \n"
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
|
||||
"vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
|
||||
"vmovdqu %%ymm0,(%1) \n"
|
||||
"vmovdqu %%ymm1,0x20(%1) \n"
|
||||
"lea 0x40(%1),%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(dst_vu), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleUVToVU) // %3
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm5");
|
||||
}
|
||||
#endif // HAS_SWAPUVROW_AVX2
|
||||
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -561,7 +561,7 @@ void SplitUVRow_NEON(const uint8_t* src_uv,
|
|||
"+r"(width) // %3 // Output registers
|
||||
: // Input registers
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Reads 16 U's and V's and writes out 16 pairs of UV.
|
||||
|
@ -582,7 +582,7 @@ void MergeUVRow_NEON(const uint8_t* src_u,
|
|||
"+r"(width) // %3 // Output registers
|
||||
: // Input registers
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b.
|
||||
|
@ -607,7 +607,7 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb,
|
|||
"+r"(width) // %4
|
||||
: // Input registers
|
||||
: "cc", "memory", "d0", "d1", "d2" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time
|
||||
|
@ -632,7 +632,7 @@ void MergeRGBRow_NEON(const uint8_t* src_r,
|
|||
"+r"(width) // %4
|
||||
: // Input registers
|
||||
: "cc", "memory", "q0", "q1", "q2" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15.
|
||||
|
@ -648,7 +648,7 @@ void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
|
|||
"+r"(width) // %2 // Output registers
|
||||
: // Input registers
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// SetRow writes 'width' bytes using an 8 bit value repeated.
|
||||
|
@ -761,7 +761,7 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
|
||||
|
@ -778,7 +778,7 @@ void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
|
||||
|
@ -795,7 +795,7 @@ void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "d1", "d2", "d3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
#define RGB565TOARGB \
|
||||
|
@ -826,7 +826,7 @@ void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGB1555TOARGB \
|
||||
|
@ -872,7 +872,7 @@ void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGB4444TOARGB \
|
||||
|
@ -901,7 +901,7 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
|
||||
|
@ -919,7 +919,7 @@ void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
|
||||
|
@ -935,7 +935,7 @@ void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
|
||||
|
@ -950,7 +950,7 @@ void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
|
||||
|
@ -965,7 +965,7 @@ void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
|
||||
|
@ -985,7 +985,7 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
|
|||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
|
||||
|
@ -1005,7 +1005,7 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
|
|||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
|
||||
|
@ -1032,7 +1032,7 @@ void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
|
|||
:
|
||||
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6",
|
||||
"d7" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
|
||||
|
@ -1059,7 +1059,7 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
|
|||
:
|
||||
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6",
|
||||
"d7" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
||||
|
@ -1081,7 +1081,7 @@ void ARGBShuffleRow_NEON(const uint8_t* src_argb,
|
|||
"+r"(width) // %2
|
||||
: "r"(shuffler) // %3
|
||||
: "cc", "memory", "q0", "q1", "q2" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void I422ToYUY2Row_NEON(const uint8_t* src_y,
|
||||
|
@ -1241,7 +1241,7 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
|
@ -2564,7 +2564,7 @@ void SobelXRow_NEON(const uint8_t* src_y0,
|
|||
: "r"(2), // %5
|
||||
"r"(6) // %6
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// SobelY as a matrix is
|
||||
|
@ -2601,7 +2601,7 @@ void SobelYRow_NEON(const uint8_t* src_y0,
|
|||
: "r"(1), // %4
|
||||
"r"(6) // %5
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// %y passes a float as a scalar vector for vector * scalar multiply.
|
||||
|
@ -2685,6 +2685,205 @@ void ByteToFloatRow_NEON(const uint8_t* src,
|
|||
: "cc", "memory", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
|
||||
void GaussCol_NEON(const uint16_t* src0,
|
||||
const uint16_t* src1,
|
||||
const uint16_t* src2,
|
||||
const uint16_t* src3,
|
||||
const uint16_t* src4,
|
||||
uint32_t* dst,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"vmov.u16 d6, #4 \n" // constant 4
|
||||
"vmov.u16 d7, #6 \n" // constant 6
|
||||
|
||||
"1: \n"
|
||||
"vld1.16 {q1}, [%0]! \n" // load 8 samples, 5 rows
|
||||
"vld1.16 {q2}, [%4]! \n"
|
||||
"vaddl.u16 q0, d2, d4 \n" // * 1
|
||||
"vaddl.u16 q1, d3, d5 \n" // * 1
|
||||
"vld1.16 {q2}, [%1]! \n"
|
||||
"vmlal.u16 q0, d4, d6 \n" // * 4
|
||||
"vmlal.u16 q1, d5, d6 \n" // * 4
|
||||
"vld1.16 {q2}, [%2]! \n"
|
||||
"vmlal.u16 q0, d4, d7 \n" // * 6
|
||||
"vmlal.u16 q1, d5, d7 \n" // * 6
|
||||
"vld1.16 {q2}, [%3]! \n"
|
||||
"vmlal.u16 q0, d4, d6 \n" // * 4
|
||||
"vmlal.u16 q1, d5, d6 \n" // * 4
|
||||
"subs %6, %6, #8 \n" // 8 processed per loop
|
||||
"vst1.32 {q0, q1}, [%5]! \n" // store 8 samples
|
||||
"bgt 1b \n"
|
||||
: "+r"(src0), // %0
|
||||
"+r"(src1), // %1
|
||||
"+r"(src2), // %2
|
||||
"+r"(src3), // %3
|
||||
"+r"(src4), // %4
|
||||
"+r"(dst), // %5
|
||||
"+r"(width) // %6
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
|
||||
void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) {
|
||||
const uint32_t* src1 = src + 1;
|
||||
const uint32_t* src2 = src + 2;
|
||||
const uint32_t* src3 = src + 3;
|
||||
asm volatile(
|
||||
"vmov.u32 q10, #4 \n" // constant 4
|
||||
"vmov.u32 q11, #6 \n" // constant 6
|
||||
|
||||
"1: \n"
|
||||
"vld1.32 {q0, q1}, [%0]! \n" // load 12 source samples
|
||||
"vld1.32 {q2}, [%0] \n"
|
||||
"vadd.u32 q0, q0, q1 \n" // * 1
|
||||
"vadd.u32 q1, q1, q2 \n" // * 1
|
||||
"vld1.32 {q2, q3}, [%2]! \n"
|
||||
"vmla.u32 q0, q2, q11 \n" // * 6
|
||||
"vmla.u32 q1, q3, q11 \n" // * 6
|
||||
"vld1.32 {q2, q3}, [%1]! \n"
|
||||
"vld1.32 {q8, q9}, [%3]! \n"
|
||||
"vadd.u32 q2, q2, q8 \n" // add rows for * 4
|
||||
"vadd.u32 q3, q3, q9 \n"
|
||||
"vmla.u32 q0, q2, q10 \n" // * 4
|
||||
"vmla.u32 q1, q3, q10 \n" // * 4
|
||||
"subs %5, %5, #8 \n" // 8 processed per loop
|
||||
"vqshrn.u32 d0, q0, #8 \n" // round and pack
|
||||
"vqshrn.u32 d1, q1, #8 \n"
|
||||
"vst1.u16 {q0}, [%4]! \n" // store 8 samples
|
||||
"bgt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(src1), // %1
|
||||
"+r"(src2), // %2
|
||||
"+r"(src3), // %3
|
||||
"+r"(dst), // %4
|
||||
"+r"(width) // %5
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
|
||||
}
|
||||
|
||||
// Convert biplanar NV21 to packed YUV24
|
||||
void NV21ToYUV24Row_NEON(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"vld1.8 {q2}, [%0]! \n" // load 16 Y values
|
||||
"vld2.8 {d0, d2}, [%1]! \n" // load 8 VU values
|
||||
"vmov d1, d0 \n"
|
||||
"vzip.u8 d0, d1 \n" // VV
|
||||
"vmov d3, d2 \n"
|
||||
"vzip.u8 d2, d3 \n" // UU
|
||||
"subs %3, %3, #16 \n" // 16 pixels per loop
|
||||
"vst3.8 {d0, d2, d4}, [%2]! \n" // store 16 YUV pixels
|
||||
"vst3.8 {d1, d3, d5}, [%2]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_vu), // %1
|
||||
"+r"(dst_yuv24), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2");
|
||||
}
|
||||
|
||||
void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_uv,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"add %1, %0, %1 \n" // src_stride + src_AYUV
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels.
|
||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV
|
||||
// pixels.
|
||||
"vpaddl.u8 q0, q0 \n" // V 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q1, q1 \n" // U 16 bytes -> 8 shorts.
|
||||
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more AYUV
|
||||
// pixels.
|
||||
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 AYUV
|
||||
// pixels.
|
||||
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vqrshrun.s16 d1, q0, #2 \n" // 2x2 average
|
||||
"vqrshrun.s16 d0, q1, #2 \n"
|
||||
"subs %3, %3, #16 \n" // 16 processed per loop.
|
||||
"vst2.8 {d0, d1}, [%2]! \n" // store 8 pixels UV.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ayuv), // %0
|
||||
"+r"(src_stride_ayuv), // %1
|
||||
"+r"(dst_uv), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7");
|
||||
}
|
||||
|
||||
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_vu,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"add %1, %0, %1 \n" // src_stride + src_AYUV
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels.
|
||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV
|
||||
// pixels.
|
||||
"vpaddl.u8 q0, q0 \n" // V 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q1, q1 \n" // U 16 bytes -> 8 shorts.
|
||||
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more AYUV
|
||||
// pixels.
|
||||
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 AYUV
|
||||
// pixels.
|
||||
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vqrshrun.s16 d0, q0, #2 \n" // 2x2 average
|
||||
"vqrshrun.s16 d1, q1, #2 \n"
|
||||
"subs %3, %3, #16 \n" // 16 processed per loop.
|
||||
"vst2.8 {d0, d1}, [%2]! \n" // store 8 pixels VU.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ayuv), // %0
|
||||
"+r"(src_stride_ayuv), // %1
|
||||
"+r"(dst_vu), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7");
|
||||
}
|
||||
|
||||
// Copy row of AYUV Y's into Y.
|
||||
// Similar to ARGBExtractAlphaRow_NEON
|
||||
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels
|
||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV pixels
|
||||
"subs %2, %2, #16 \n" // 16 processed per loop
|
||||
"vst1.8 {q2}, [%1]! \n" // store 16 Y's.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ayuv), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
// Convert UV plane of NV12 to VU of NV21.
|
||||
void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"vld2.8 {d0, d2}, [%0]! \n" // load 16 UV values
|
||||
"vld2.8 {d1, d3}, [%0]! \n"
|
||||
"vorr.u8 q2, q0, q0 \n" // move U after V
|
||||
"subs %2, %2, #16 \n" // 16 pixels per loop
|
||||
"vst2.8 {q1, q2}, [%1]! \n" // store 16 VU pixels
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(dst_vu), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2");
|
||||
}
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)..
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -608,7 +608,7 @@ void SplitUVRow_NEON(const uint8_t* src_uv,
|
|||
"+r"(width) // %3 // Output registers
|
||||
: // Input registers
|
||||
: "cc", "memory", "v0", "v1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Reads 16 U's and V's and writes out 16 pairs of UV.
|
||||
|
@ -629,7 +629,7 @@ void MergeUVRow_NEON(const uint8_t* src_u,
|
|||
"+r"(width) // %3 // Output registers
|
||||
: // Input registers
|
||||
: "cc", "memory", "v0", "v1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b.
|
||||
|
@ -653,7 +653,7 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb,
|
|||
"+r"(width) // %4
|
||||
: // Input registers
|
||||
: "cc", "memory", "v0", "v1", "v2" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time
|
||||
|
@ -677,7 +677,7 @@ void MergeRGBRow_NEON(const uint8_t* src_r,
|
|||
"+r"(width) // %4
|
||||
: // Input registers
|
||||
: "cc", "memory", "v0", "v1", "v2" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Copy multiple of 32.
|
||||
|
@ -693,7 +693,7 @@ void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
|
|||
"+r"(width) // %2 // Output registers
|
||||
: // Input registers
|
||||
: "cc", "memory", "v0", "v1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// SetRow writes 'width' bytes using an 8 bit value repeated.
|
||||
|
@ -800,7 +800,7 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
|
||||
|
@ -818,7 +818,7 @@ void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
|
||||
|
@ -835,7 +835,7 @@ void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
#define RGB565TOARGB \
|
||||
|
@ -867,7 +867,7 @@ void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGB1555TOARGB \
|
||||
|
@ -924,7 +924,7 @@ void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGB4444TOARGB \
|
||||
|
@ -955,7 +955,7 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
|
||||
|
@ -973,7 +973,7 @@ void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
|
||||
|
@ -990,7 +990,7 @@ void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
|
||||
|
@ -1005,7 +1005,7 @@ void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v0", "v1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
|
||||
|
@ -1020,7 +1020,7 @@ void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v0", "v1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
|
||||
|
@ -1040,7 +1040,7 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
|
|||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
|
||||
|
@ -1060,7 +1060,7 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
|
|||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
|
||||
|
@ -1087,7 +1087,7 @@ void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
|
|||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
|
||||
"v7" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
|
||||
|
@ -1114,7 +1114,7 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
|
|||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
|
||||
"v7" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
||||
|
@ -1135,7 +1135,7 @@ void ARGBShuffleRow_NEON(const uint8_t* src_argb,
|
|||
"+r"(width) // %2
|
||||
: "r"(shuffler) // %3
|
||||
: "cc", "memory", "v0", "v1", "v2" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void I422ToYUY2Row_NEON(const uint8_t* src_y,
|
||||
|
@ -1298,7 +1298,7 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
|
|||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
|
@ -1863,7 +1863,7 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
|
|||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27",
|
||||
"v28"
|
||||
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
|
||||
|
@ -2611,7 +2611,7 @@ void SobelXRow_NEON(const uint8_t* src_y0,
|
|||
: "r"(2LL), // %5
|
||||
"r"(6LL) // %6
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// SobelY as a matrix is
|
||||
|
@ -2648,7 +2648,7 @@ void SobelYRow_NEON(const uint8_t* src_y0,
|
|||
: "r"(1LL), // %4
|
||||
"r"(6LL) // %5
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Caveat - rounds float to half float whereas scaling version truncates.
|
||||
|
@ -2876,6 +2876,115 @@ void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) {
|
|||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
|
||||
}
|
||||
|
||||
// Convert biplanar NV21 to packed YUV24
|
||||
void NV21ToYUV24Row_NEON(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"ld1 {v2.16b}, [%0], #16 \n" // load 16 Y values
|
||||
"ld2 {v0.8b, v1.8b}, [%1], #16 \n" // load 8 VU values
|
||||
"zip1 v0.16b, v0.16b, v0.16b \n" // replicate V values
|
||||
"zip1 v1.16b, v1.16b, v1.16b \n" // replicate U values
|
||||
"subs %w3, %w3, #16 \n" // 16 pixels per loop
|
||||
"st3 {v0.16b,v1.16b,v2.16b}, [%2], #48 \n" // store 16 YUV pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_vu), // %1
|
||||
"+r"(dst_yuv24), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2");
|
||||
}
|
||||
|
||||
void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_uv,
|
||||
int width) {
|
||||
const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv;
|
||||
asm volatile(
|
||||
|
||||
"1: \n"
|
||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 ayuv
|
||||
"uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts.
|
||||
"uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts.
|
||||
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
|
||||
"uadalp v0.8h, v4.16b \n" // V 16 bytes -> 8 shorts.
|
||||
"uadalp v1.8h, v5.16b \n" // U 16 bytes -> 8 shorts.
|
||||
"uqrshrn v3.8b, v0.8h, #2 \n" // 2x2 average
|
||||
"uqrshrn v2.8b, v1.8h, #2 \n"
|
||||
"subs %w3, %w3, #16 \n" // 16 processed per loop.
|
||||
"st2 {v2.8b,v3.8b}, [%2], #16 \n" // store 8 pixels UV.
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_ayuv), // %0
|
||||
"+r"(src_ayuv_1), // %1
|
||||
"+r"(dst_uv), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
|
||||
}
|
||||
|
||||
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_vu,
|
||||
int width) {
|
||||
const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv;
|
||||
asm volatile(
|
||||
|
||||
"1: \n"
|
||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
|
||||
// pixels.
|
||||
"uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts.
|
||||
"uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts.
|
||||
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
|
||||
"uadalp v0.8h, v4.16b \n" // V 16 bytes -> 8 shorts.
|
||||
"uadalp v1.8h, v5.16b \n" // U 16 bytes -> 8 shorts.
|
||||
"uqrshrn v0.8b, v0.8h, #2 \n" // 2x2 average
|
||||
"uqrshrn v1.8b, v1.8h, #2 \n"
|
||||
"subs %w3, %w3, #16 \n" // 16 processed per loop.
|
||||
"st2 {v0.8b,v1.8b}, [%2], #16 \n" // store 8 pixels VU.
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_ayuv), // %0
|
||||
"+r"(src_ayuv_1), // %1
|
||||
"+r"(dst_vu), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
|
||||
}
|
||||
|
||||
// Copy row of AYUV Y's into Y
|
||||
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
|
||||
// pixels
|
||||
"subs %w2, %w2, #16 \n" // 16 pixels per loop
|
||||
"st1 {v2.16b}, [%1], #16 \n" // store 16 Y pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_ayuv), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3");
|
||||
}
|
||||
|
||||
// Convert UV plane of NV12 to VU of NV21.
|
||||
void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 UV values
|
||||
"orr v2.16b, v0.16b, v0.16b \n" // move U after V
|
||||
"subs %w2, %w2, #16 \n" // 16 pixels per loop
|
||||
"st2 {v1.16b, v2.16b}, [%1], #32 \n" // store 16 VU pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(dst_vu), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2");
|
||||
}
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -1594,9 +1594,9 @@ __declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
|
|||
mov edx, [esp + 8 + 12] // dst_u
|
||||
mov edi, [esp + 8 + 16] // dst_v
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
vbroadcastf128 ymm5, xmmword ptr kAddUV128
|
||||
vbroadcastf128 ymm6, xmmword ptr kARGBToV
|
||||
vbroadcastf128 ymm7, xmmword ptr kARGBToU
|
||||
vbroadcastf128 ymm5, xmmword ptr kAddUVJ128
|
||||
vbroadcastf128 ymm6, xmmword ptr kARGBToVJ
|
||||
vbroadcastf128 ymm7, xmmword ptr kARGBToUJ
|
||||
sub edi, edx // stride from u to v
|
||||
|
||||
convertloop:
|
||||
|
@ -4222,7 +4222,7 @@ __declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
|
|||
add ecx, 4 - 1
|
||||
jl convertloop1b
|
||||
|
||||
// 1 pixel loop.
|
||||
// 1 pixel loop.
|
||||
convertloop1:
|
||||
movd xmm3, [eax] // src argb
|
||||
lea eax, [eax + 4]
|
||||
|
@ -5360,7 +5360,7 @@ void CumulativeSumToAverageRow_SSE2(const int32_t* topleft,
|
|||
add ecx, 4 - 1
|
||||
jl l1b
|
||||
|
||||
// 1 pixel loop
|
||||
// 1 pixel loop
|
||||
l1:
|
||||
movdqu xmm0, [eax]
|
||||
psubd xmm0, [eax + edx * 4]
|
||||
|
@ -5448,9 +5448,9 @@ void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
|
|||
add ecx, 4 - 1
|
||||
jl l1b
|
||||
|
||||
// 1 pixel loop
|
||||
// 1 pixel loop
|
||||
l1:
|
||||
movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes.
|
||||
movd xmm2, dword ptr [eax] // 1 argb pixel
|
||||
lea eax, [eax + 4]
|
||||
punpcklbw xmm2, xmm1
|
||||
punpcklwd xmm2, xmm1
|
||||
|
@ -5534,7 +5534,7 @@ __declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8_t* src_argb,
|
|||
add ecx, 4 - 1
|
||||
jl l1b
|
||||
|
||||
// 1 pixel loop
|
||||
// 1 pixel loop
|
||||
l1:
|
||||
cvttps2dq xmm0, xmm2 // x, y float to int
|
||||
packssdw xmm0, xmm0 // x, y as shorts
|
||||
|
|
|
@ -1788,6 +1788,75 @@ int I420Scale_16(const uint16_t* src_y,
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Scale an I444 image.
|
||||
// This function in turn calls a scaling function for each plane.
|
||||
|
||||
LIBYUV_API
|
||||
int I444Scale(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering) {
|
||||
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
|
||||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
|
||||
dst_width <= 0 || dst_height <= 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
|
||||
dst_width, dst_height, filtering);
|
||||
ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
|
||||
dst_width, dst_height, filtering);
|
||||
ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
|
||||
dst_width, dst_height, filtering);
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I444Scale_16(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering) {
|
||||
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
|
||||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
|
||||
dst_width <= 0 || dst_height <= 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
|
||||
dst_width, dst_height, filtering);
|
||||
ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
|
||||
dst_width, dst_height, filtering);
|
||||
ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
|
||||
dst_width, dst_height, filtering);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Deprecated api
|
||||
LIBYUV_API
|
||||
int Scale(const uint8_t* src_y,
|
||||
|
|
|
@ -483,7 +483,7 @@ void ScaleRowDown34_SSSE3(const uint8_t* src_ptr,
|
|||
: "m"(kShuf0), // %0
|
||||
"m"(kShuf1), // %1
|
||||
"m"(kShuf2) // %2
|
||||
);
|
||||
);
|
||||
asm volatile(
|
||||
|
||||
LABELALIGN
|
||||
|
@ -521,7 +521,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
|
|||
: "m"(kShuf01), // %0
|
||||
"m"(kShuf11), // %1
|
||||
"m"(kShuf21) // %2
|
||||
);
|
||||
);
|
||||
asm volatile(
|
||||
"movdqa %0,%%xmm5 \n" // kMadd01
|
||||
"movdqa %1,%%xmm0 \n" // kMadd11
|
||||
|
@ -530,7 +530,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
|
|||
: "m"(kMadd01), // %0
|
||||
"m"(kMadd11), // %1
|
||||
"m"(kRound34) // %2
|
||||
);
|
||||
);
|
||||
asm volatile(
|
||||
|
||||
LABELALIGN
|
||||
|
@ -587,7 +587,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
|
|||
: "m"(kShuf01), // %0
|
||||
"m"(kShuf11), // %1
|
||||
"m"(kShuf21) // %2
|
||||
);
|
||||
);
|
||||
asm volatile(
|
||||
"movdqa %0,%%xmm5 \n" // kMadd01
|
||||
"movdqa %1,%%xmm0 \n" // kMadd11
|
||||
|
@ -596,7 +596,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
|
|||
: "m"(kMadd01), // %0
|
||||
"m"(kMadd11), // %1
|
||||
"m"(kRound34) // %2
|
||||
);
|
||||
);
|
||||
|
||||
asm volatile(
|
||||
|
||||
|
@ -690,7 +690,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
|
|||
"m"(kShufAb1), // %1
|
||||
"m"(kShufAb2), // %2
|
||||
"m"(kScaleAb2) // %3
|
||||
);
|
||||
);
|
||||
asm volatile(
|
||||
|
||||
LABELALIGN
|
||||
|
@ -734,7 +734,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
|
|||
: "m"(kShufAc), // %0
|
||||
"m"(kShufAc3), // %1
|
||||
"m"(kScaleAc33) // %2
|
||||
);
|
||||
);
|
||||
asm volatile(
|
||||
|
||||
LABELALIGN
|
||||
|
@ -1272,7 +1272,7 @@ void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb,
|
|||
:
|
||||
: "m"(kShuffleColARGB), // %0
|
||||
"m"(kShuffleFractions) // %1
|
||||
);
|
||||
);
|
||||
|
||||
asm volatile(
|
||||
"movd %5,%%xmm2 \n"
|
||||
|
|
|
@ -40,7 +40,7 @@ void ScaleRowDown2_NEON(const uint8_t* src_ptr,
|
|||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "q0", "q1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Read 32x1 average down and write 16x1.
|
||||
|
@ -61,7 +61,7 @@ void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr,
|
|||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "q0", "q1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Read 32x2 average down and write 16x1.
|
||||
|
@ -92,7 +92,7 @@ void ScaleRowDown2Box_NEON(const uint8_t* src_ptr,
|
|||
"+r"(dst_width) // %3
|
||||
:
|
||||
: "q0", "q1", "q2", "q3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4_NEON(const uint8_t* src_ptr,
|
||||
|
@ -523,7 +523,7 @@ void ScaleAddRow_NEON(const uint8_t* src_ptr,
|
|||
"+r"(src_width) // %2
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// TODO(Yang Zhang): Investigate less load instructions for
|
||||
|
@ -705,7 +705,7 @@ void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
|
|||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// 46: f964 018d vld4.32 {d16,d18,d20,d22}, [r4]!
|
||||
|
@ -734,7 +734,7 @@ void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
|
|||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
|
||||
|
|
|
@ -38,7 +38,7 @@ void ScaleRowDown2_NEON(const uint8_t* src_ptr,
|
|||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "v0", "v1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Read 32x1 average down and write 16x1.
|
||||
|
@ -60,7 +60,7 @@ void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr,
|
|||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "v0", "v1" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Read 32x2 average down and write 16x1.
|
||||
|
@ -89,7 +89,7 @@ void ScaleRowDown2Box_NEON(const uint8_t* src_ptr,
|
|||
"+r"(dst_width) // %3
|
||||
:
|
||||
: "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4_NEON(const uint8_t* src_ptr,
|
||||
|
@ -534,7 +534,7 @@ void ScaleAddRow_NEON(const uint8_t* src_ptr,
|
|||
"+r"(src_width) // %2
|
||||
:
|
||||
: "memory", "cc", "v0", "v1", "v2" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// TODO(Yang Zhang): Investigate less load instructions for
|
||||
|
@ -719,7 +719,7 @@ void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
|
|||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
|
||||
|
@ -742,7 +742,7 @@ void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
|
|||
"+r"(dst_width) // %2
|
||||
:
|
||||
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
|
||||
|
@ -991,7 +991,7 @@ void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
|
|||
"+r"(dst_width) // %3
|
||||
:
|
||||
: "v0", "v1", "v2", "v3" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
// Read 8x2 upsample with filtering and write 16x1.
|
||||
|
@ -1041,7 +1041,7 @@ void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
|
|||
"r"(14LL) // %5
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18",
|
||||
"v19" // Clobber List
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
|
|
@ -37,7 +37,7 @@ CHROMIUM_LOG_TEMPLATE = CHROMIUM_SRC_URL + '/+log/%s'
|
|||
CHROMIUM_FILE_TEMPLATE = CHROMIUM_SRC_URL + '/+/%s/%s'
|
||||
|
||||
COMMIT_POSITION_RE = re.compile('^Cr-Commit-Position: .*#([0-9]+).*$')
|
||||
CLANG_REVISION_RE = re.compile(r'^CLANG_REVISION = \'(\d+)\'$')
|
||||
CLANG_REVISION_RE = re.compile(r'^CLANG_REVISION = \'([0-9a-z]+)\'$')
|
||||
ROLL_BRANCH_NAME = 'roll_chromium_revision'
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
|
|
@ -15,10 +15,13 @@
|
|||
#include "../unit_test/unit_test.h"
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/compare.h"
|
||||
#include "libyuv/compare_row.h" /* For HammingDistance_C */
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
#ifdef ENABLE_ROW_TESTS
|
||||
#include "libyuv/compare_row.h" /* For HammingDistance_C */
|
||||
#endif
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
// hash seed of 5381 recommended.
|
||||
|
@ -206,6 +209,7 @@ TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Unaligned) {
|
|||
free_aligned_buffer_page_end(src_a);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ROW_TESTS
|
||||
TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) {
|
||||
const int kMaxWidth = 4096 * 3;
|
||||
align_buffer_page_end(src_a, kMaxWidth);
|
||||
|
@ -403,6 +407,7 @@ TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) {
|
|||
free_aligned_buffer_page_end(src_a);
|
||||
free_aligned_buffer_page_end(src_b);
|
||||
}
|
||||
#endif // ENABLE_ROW_TESTS
|
||||
|
||||
TEST_F(LibYUVCompareTest, TestHammingDistance) {
|
||||
align_buffer_page_end(src_a, benchmark_width_ * benchmark_height_);
|
||||
|
|
|
@ -12,8 +12,6 @@
|
|||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/compare.h"
|
||||
#include "libyuv/convert.h"
|
||||
|
@ -29,6 +27,10 @@
|
|||
#include "libyuv/rotate.h"
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
#ifdef ENABLE_ROW_TESTS
|
||||
#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */
|
||||
#endif
|
||||
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
// arm version subsamples by summing 4 pixels then multiplying by matrix with
|
||||
// 4x smaller coefficients which are rounded to nearest integer.
|
||||
|
@ -37,6 +39,11 @@
|
|||
#define ARM_YUV_ERROR 0
|
||||
#endif
|
||||
|
||||
// Some functions fail on big endian. Enable these tests on all cpus except PowerPC
|
||||
#if !defined(__powerpc__)
|
||||
#define LITTLE_ENDIAN_TEST 1
|
||||
#endif
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
// Alias to copy pixels as is
|
||||
|
@ -311,10 +318,10 @@ int I400ToNV21(const uint8_t* src_y,
|
|||
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
|
||||
align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
|
||||
align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
for (int i = 0; i < kHeight; ++i) \
|
||||
for (int j = 0; j < kWidth; ++j) \
|
||||
|
@ -329,21 +336,21 @@ int I400ToNV21(const uint8_t* src_y,
|
|||
} \
|
||||
memset(dst_y_c, 1, kWidth* kHeight); \
|
||||
memset(dst_uv_c, 2, \
|
||||
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
memset(dst_y_opt, 101, kWidth* kHeight); \
|
||||
memset(dst_uv_opt, 102, \
|
||||
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
|
||||
dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
|
||||
dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
|
||||
dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
|
||||
dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
|
||||
} \
|
||||
int max_diff = 0; \
|
||||
for (int i = 0; i < kHeight; ++i) { \
|
||||
|
@ -357,12 +364,12 @@ int I400ToNV21(const uint8_t* src_y,
|
|||
} \
|
||||
EXPECT_LE(max_diff, 1); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \
|
||||
int abs_diff = \
|
||||
abs(static_cast<int>( \
|
||||
dst_uv_c[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \
|
||||
dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \
|
||||
static_cast<int>( \
|
||||
dst_uv_opt[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \
|
||||
dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
|
@ -395,6 +402,100 @@ TESTPLANARTOBP(I422, 2, 1, NV21, 2, 2)
|
|||
TESTPLANARTOBP(I444, 1, 1, NV21, 2, 2)
|
||||
TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2)
|
||||
|
||||
#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, \
|
||||
OFF, DOY) \
|
||||
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
||||
align_buffer_page_end(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_uv_c, 2 * SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_uv_opt, 2 * SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
for (int i = 0; i < kHeight; ++i) \
|
||||
for (int j = 0; j < kWidth; ++j) \
|
||||
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
|
||||
src_uv[(i * 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
|
||||
(fastrand() & 0xff); \
|
||||
} \
|
||||
} \
|
||||
memset(dst_y_c, 1, kWidth* kHeight); \
|
||||
memset(dst_uv_c, 2, \
|
||||
2 * SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
memset(dst_y_opt, 101, kWidth* kHeight); \
|
||||
memset(dst_uv_opt, 102, \
|
||||
2 * SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_uv + OFF, \
|
||||
2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), DOY ? dst_y_c : NULL, kWidth, \
|
||||
dst_uv_c, 2 * SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_uv + OFF, \
|
||||
2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), DOY ? dst_y_opt : NULL, \
|
||||
kWidth, dst_uv_opt, 2 * SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, \
|
||||
NEG kHeight); \
|
||||
} \
|
||||
int max_diff = 0; \
|
||||
if (DOY) { \
|
||||
for (int i = 0; i < kHeight; ++i) { \
|
||||
for (int j = 0; j < kWidth; ++j) { \
|
||||
int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
|
||||
static_cast<int>(dst_y_opt[i * kWidth + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 1); \
|
||||
} \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
|
||||
int abs_diff = \
|
||||
abs(static_cast<int>( \
|
||||
dst_uv_c[i * 2 * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
|
||||
static_cast<int>( \
|
||||
dst_uv_opt[i * 2 * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 1); \
|
||||
free_aligned_buffer_page_end(dst_y_c); \
|
||||
free_aligned_buffer_page_end(dst_uv_c); \
|
||||
free_aligned_buffer_page_end(dst_y_opt); \
|
||||
free_aligned_buffer_page_end(dst_uv_opt); \
|
||||
free_aligned_buffer_page_end(src_y); \
|
||||
free_aligned_buffer_page_end(src_uv); \
|
||||
}
|
||||
|
||||
#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
|
||||
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||
SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0, 1) \
|
||||
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1, \
|
||||
1) \
|
||||
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1) \
|
||||
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1) \
|
||||
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0)
|
||||
|
||||
TESTBIPLANARTOBP(NV21, 2, 2, NV12, 2, 2)
|
||||
|
||||
#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \
|
||||
DOY) \
|
||||
|
@ -585,13 +686,15 @@ TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1)
|
|||
TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1)
|
||||
TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1)
|
||||
TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1)
|
||||
TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1)
|
||||
TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1)
|
||||
TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1)
|
||||
TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1)
|
||||
TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
|
||||
TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
|
||||
#endif
|
||||
TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
|
||||
TESTPLANARTOB(J422, 2, 1, ARGB, 4, 4, 1)
|
||||
TESTPLANARTOB(J422, 2, 1, ABGR, 4, 4, 1)
|
||||
TESTPLANARTOB(H422, 2, 1, ARGB, 4, 4, 1)
|
||||
|
@ -608,8 +711,10 @@ TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
|
|||
TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
|
||||
TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
|
||||
TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1)
|
||||
TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
|
||||
#endif
|
||||
|
||||
#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, W1280, DIFF, N, NEG, OFF, ATTEN) \
|
||||
|
@ -680,8 +785,8 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
|
|||
TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2)
|
||||
TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
|
||||
|
||||
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
W1280, DIFF, N, NEG, OFF) \
|
||||
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \
|
||||
BPP_B, W1280, DIFF, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
|
@ -716,9 +821,9 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
|
|||
align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \
|
||||
memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \
|
||||
memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \
|
||||
FMT_B##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \
|
||||
FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \
|
||||
kHeight); \
|
||||
FMT_B##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \
|
||||
FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \
|
||||
kHeight); \
|
||||
int max_diff = 0; \
|
||||
for (int i = 0; i < kHeight; ++i) { \
|
||||
|
@ -740,25 +845,29 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
|
|||
free_aligned_buffer_page_end(dst_argb32_opt); \
|
||||
}
|
||||
|
||||
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
benchmark_width_ - 4, DIFF, _Any, +, 0) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
benchmark_width_, DIFF, _Unaligned, +, 1) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
benchmark_width_, DIFF, _Invert, -, 0) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
DIFF) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_ - 4, DIFF, _Any, +, 0) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_, DIFF, _Unaligned, +, 1) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_, DIFF, _Invert, -, 0) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_, DIFF, _Opt, +, 0)
|
||||
|
||||
TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, ABGR, 4, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, ABGR, 4, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RGB24, 3, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, RGB24, 3, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RAW, 3, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, RAW, 3, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3, 2)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2, 9)
|
||||
#endif
|
||||
TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2)
|
||||
|
||||
#ifdef DO_THREE_PLANES
|
||||
// Do 3 allocations for yuv. conventional but slower.
|
||||
|
@ -885,26 +994,30 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
|
|||
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
benchmark_width_, DIFF, _Opt, +, 0)
|
||||
|
||||
TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR)
|
||||
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR)
|
||||
TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
|
||||
// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9.
|
||||
TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
|
||||
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
|
||||
TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2)
|
||||
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2)
|
||||
TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2)
|
||||
TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2)
|
||||
TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
|
||||
TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2)
|
||||
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR)
|
||||
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
|
||||
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
|
||||
#endif
|
||||
TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2)
|
||||
TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
|
||||
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, ARM_YUV_ERROR)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
|
||||
#endif
|
||||
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2)
|
||||
TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2)
|
||||
TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2)
|
||||
TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
|
||||
|
||||
#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \
|
||||
SUBSAMP_Y, W1280, N, NEG, OFF) \
|
||||
|
@ -976,8 +1089,12 @@ TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
|
|||
|
||||
TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
|
||||
TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2)
|
||||
TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
|
||||
|
||||
#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||
HEIGHT_B, W1280, DIFF, N, NEG, OFF) \
|
||||
|
@ -1069,45 +1186,58 @@ TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
|
|||
HEIGHT_B, DIFF)
|
||||
|
||||
// TODO(fbarchard): make ARM version of C code that matches NEON.
|
||||
TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0)
|
||||
TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
||||
TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0)
|
||||
#endif
|
||||
TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0)
|
||||
#endif
|
||||
TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0)
|
||||
TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
#endif
|
||||
TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
|
||||
#endif
|
||||
TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0)
|
||||
TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
|
||||
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
|
||||
TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2)
|
||||
TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2)
|
||||
TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
|
||||
#endif
|
||||
TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
|
||||
TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
|
||||
TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0)
|
||||
TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0)
|
||||
TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0)
|
||||
TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0)
|
||||
TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0)
|
||||
TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
||||
TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
||||
TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0)
|
||||
TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR)
|
||||
#endif
|
||||
TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR)
|
||||
TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR)
|
||||
TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0)
|
||||
TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0)
|
||||
TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0)
|
||||
TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0)
|
||||
|
||||
#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||
HEIGHT_B, W1280, DIFF, N, NEG, OFF) \
|
||||
|
@ -1204,7 +1334,9 @@ TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0)
|
|||
TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||
HEIGHT_B, DIFF)
|
||||
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
|
||||
#endif
|
||||
|
||||
#define TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, W1280, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, FMT_ATOB##_Symetric##N) { \
|
||||
|
@ -1291,6 +1423,7 @@ TEST_F(LibYUVConvertTest, ValidateJpeg) {
|
|||
// EOI, SOI. Expect pass.
|
||||
orig_pixels[0] = 0xff;
|
||||
orig_pixels[1] = 0xd8; // SOI.
|
||||
orig_pixels[2] = 0xff;
|
||||
orig_pixels[kSize - kOff + 0] = 0xff;
|
||||
orig_pixels[kSize - kOff + 1] = 0xd9; // EOI.
|
||||
for (int times = 0; times < benchmark_iterations_; ++times) {
|
||||
|
@ -1317,6 +1450,7 @@ TEST_F(LibYUVConvertTest, ValidateJpegLarge) {
|
|||
// EOI, SOI. Expect pass.
|
||||
orig_pixels[0] = 0xff;
|
||||
orig_pixels[1] = 0xd8; // SOI.
|
||||
orig_pixels[2] = 0xff;
|
||||
orig_pixels[kSize - kOff + 0] = 0xff;
|
||||
orig_pixels[kSize - kOff + 1] = 0xd9; // EOI.
|
||||
for (int times = 0; times < benchmark_iterations_; ++times) {
|
||||
|
@ -1350,6 +1484,7 @@ TEST_F(LibYUVConvertTest, InvalidateJpeg) {
|
|||
// SOI but no EOI. Expect fail.
|
||||
orig_pixels[0] = 0xff;
|
||||
orig_pixels[1] = 0xd8; // SOI.
|
||||
orig_pixels[2] = 0xff;
|
||||
for (int times = 0; times < benchmark_iterations_; ++times) {
|
||||
EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
|
||||
}
|
||||
|
@ -1367,22 +1502,24 @@ TEST_F(LibYUVConvertTest, InvalidateJpeg) {
|
|||
TEST_F(LibYUVConvertTest, FuzzJpeg) {
|
||||
// SOI but no EOI. Expect fail.
|
||||
for (int times = 0; times < benchmark_iterations_; ++times) {
|
||||
const int kSize = fastrand() % 5000 + 2;
|
||||
const int kSize = fastrand() % 5000 + 3;
|
||||
align_buffer_page_end(orig_pixels, kSize);
|
||||
MemRandomize(orig_pixels, kSize);
|
||||
|
||||
// Add SOI so frame will be scanned.
|
||||
orig_pixels[0] = 0xff;
|
||||
orig_pixels[1] = 0xd8; // SOI.
|
||||
orig_pixels[2] = 0xff;
|
||||
orig_pixels[kSize - 1] = 0xff;
|
||||
ValidateJpeg(orig_pixels, kSize); // Failure normally expected.
|
||||
ValidateJpeg(orig_pixels,
|
||||
kSize); // Failure normally expected.
|
||||
free_aligned_buffer_page_end(orig_pixels);
|
||||
}
|
||||
}
|
||||
|
||||
// Test data created in GIMP. In export jpeg, disable thumbnails etc,
|
||||
// choose a subsampling, and use low quality (50) to keep size small.
|
||||
// Generated with xxd -i test.jpg
|
||||
// Test data created in GIMP. In export jpeg, disable
|
||||
// thumbnails etc, choose a subsampling, and use low quality
|
||||
// (50) to keep size small. Generated with xxd -i test.jpg
|
||||
// test 0 is J400
|
||||
static const uint8_t kTest0Jpg[] = {
|
||||
0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
|
||||
|
@ -1984,8 +2121,8 @@ TEST_F(LibYUVConvertTest, TestMJPGInfo) {
|
|||
EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen));
|
||||
EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen));
|
||||
EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen));
|
||||
EXPECT_EQ(1,
|
||||
ShowJPegInfo(kTest4Jpg, kTest4JpgLen)); // Valid but unsupported.
|
||||
EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg,
|
||||
kTest4JpgLen)); // Valid but unsupported.
|
||||
}
|
||||
#endif // HAVE_JPEG
|
||||
|
||||
|
@ -2296,8 +2433,9 @@ TEST_F(LibYUVConvertTest, TestDither) {
|
|||
TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C)
|
||||
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4)
|
||||
|
||||
#endif
|
||||
#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \
|
||||
TEST_F(LibYUVConvertTest, NAME) { \
|
||||
const int kWidth = benchmark_width_; \
|
||||
|
@ -2437,10 +2575,12 @@ TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3)
|
|||
TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3)
|
||||
TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3)
|
||||
TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2)
|
||||
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
|
||||
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
|
||||
TESTPLANARTOE(I422, 2, 1, ARGB, 1, 4, RGB565, 2)
|
||||
#endif
|
||||
TESTPLANARTOE(J422, 2, 1, ARGB, 1, 4, ARGB, 4)
|
||||
TESTPLANARTOE(J422, 2, 1, ABGR, 1, 4, ARGB, 4)
|
||||
TESTPLANARTOE(H422, 2, 1, ARGB, 1, 4, ARGB, 4)
|
||||
|
@ -2574,6 +2714,7 @@ TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
|
|||
_Opt, +, 0, FMT_C, BPP_C)
|
||||
|
||||
// Caveat: Destination needs to be 4 bytes
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4)
|
||||
TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4)
|
||||
TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4)
|
||||
|
@ -2582,6 +2723,7 @@ TESTPLANETOE(ARGB, 1, 4, AB30, 1, 4, ARGB, 4)
|
|||
TESTPLANETOE(ABGR, 1, 4, AB30, 1, 4, ABGR, 4)
|
||||
TESTPLANETOE(AB30, 1, 4, ARGB, 1, 4, ABGR, 4)
|
||||
TESTPLANETOE(AB30, 1, 4, ABGR, 1, 4, ARGB, 4)
|
||||
#endif
|
||||
|
||||
TEST_F(LibYUVConvertTest, RotateWithARGBSource) {
|
||||
// 2x2 frames
|
||||
|
@ -2753,12 +2895,16 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
|
|||
|
||||
TESTPLANAR16TOB(I010, 2, 2, ARGB, 4, 4, 1, 2)
|
||||
TESTPLANAR16TOB(I010, 2, 2, ABGR, 4, 4, 1, 2)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 2)
|
||||
TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 2)
|
||||
#endif
|
||||
TESTPLANAR16TOB(H010, 2, 2, ARGB, 4, 4, 1, 2)
|
||||
TESTPLANAR16TOB(H010, 2, 2, ABGR, 4, 4, 1, 2)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2)
|
||||
TESTPLANAR16TOB(H010, 2, 2, AB30, 4, 4, 1, 2)
|
||||
#endif
|
||||
|
||||
static int Clamp(int y) {
|
||||
if (y < 0) {
|
||||
|
@ -2903,7 +3049,8 @@ TEST_F(LibYUVConvertTest, TestH010ToARGB) {
|
|||
}
|
||||
|
||||
// Test 10 bit YUV to 10 bit RGB
|
||||
// Caveat: Result is near due to float rounding in expected result.
|
||||
// Caveat: Result is near due to float rounding in expected
|
||||
// result.
|
||||
TEST_F(LibYUVConvertTest, TestH010ToAR30) {
|
||||
const int kSize = 1024;
|
||||
int histogram_b[1024];
|
||||
|
@ -2966,7 +3113,8 @@ TEST_F(LibYUVConvertTest, TestH010ToAR30) {
|
|||
}
|
||||
|
||||
// Test 10 bit YUV to 10 bit RGB
|
||||
// Caveat: Result is near due to float rounding in expected result.
|
||||
// Caveat: Result is near due to float rounding in expected
|
||||
// result.
|
||||
TEST_F(LibYUVConvertTest, TestH010ToAB30) {
|
||||
const int kSize = 1024;
|
||||
int histogram_b[1024];
|
||||
|
|
|
@ -16,10 +16,14 @@
|
|||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/scale.h"
|
||||
|
||||
#ifdef ENABLE_ROW_TESTS
|
||||
#include "libyuv/scale_row.h"
|
||||
#endif
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
#ifdef ENABLE_ROW_TESTS
|
||||
TEST_F(LibYUVBaseTest, TestFixedDiv) {
|
||||
int num[1280];
|
||||
int div[1280];
|
||||
|
@ -151,5 +155,6 @@ TEST_F(LibYUVBaseTest, TestFixedDiv1_Opt) {
|
|||
EXPECT_NEAR(result_c[j], result_opt[j], 1);
|
||||
}
|
||||
}
|
||||
#endif // ENABLE_ROW_TESTS
|
||||
|
||||
} // namespace libyuv
|
||||
|
|
|
@ -12,8 +12,6 @@
|
|||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
// row.h defines SIMD_ALIGNED, overriding unit_test.h
|
||||
#include "libyuv/row.h" /* For ScaleSumSamples_Neon */
|
||||
|
||||
#include "../unit_test/unit_test.h"
|
||||
#include "libyuv/compare.h"
|
||||
|
@ -25,6 +23,12 @@
|
|||
#include "libyuv/planar_functions.h"
|
||||
#include "libyuv/rotate.h"
|
||||
|
||||
#ifdef ENABLE_ROW_TESTS
|
||||
// row.h defines SIMD_ALIGNED, overriding unit_test.h
|
||||
// TODO(fbarchard): Remove row.h from unittests. Test public functions.
|
||||
#include "libyuv/row.h" /* For ScaleSumSamples_Neon */
|
||||
#endif
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestAttenuate) {
|
||||
|
@ -2321,7 +2325,8 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
|
|||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
align_buffer_page_end(src_pixels, kPixels * 4);
|
||||
align_buffer_page_end(dst_pixels_opt, kPixels);
|
||||
align_buffer_page_end(dst_pixels_c, kPixels);
|
||||
|
@ -2349,7 +2354,8 @@ TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
|
|||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
align_buffer_page_end(orig_pixels, kPixels);
|
||||
align_buffer_page_end(dst_pixels_opt, kPixels * 4);
|
||||
align_buffer_page_end(dst_pixels_c, kPixels * 4);
|
||||
|
@ -2482,7 +2488,8 @@ TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
|
|||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
align_buffer_page_end(src_pixels, kPixels * 2);
|
||||
align_buffer_page_end(tmp_pixels_u, kPixels);
|
||||
align_buffer_page_end(tmp_pixels_v, kPixels);
|
||||
|
@ -2526,7 +2533,8 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
|
|||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
align_buffer_page_end(src_pixels, kPixels * 2);
|
||||
align_buffer_page_end(tmp_pixels_u, kPixels);
|
||||
align_buffer_page_end(tmp_pixels_v, kPixels);
|
||||
|
@ -2568,8 +2576,39 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
|
|||
free_aligned_buffer_page_end(dst_pixels_c);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
align_buffer_page_end(src_pixels, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_opt, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_c, kPixels * 2);
|
||||
|
||||
MemRandomize(src_pixels, kPixels * 2);
|
||||
MemRandomize(dst_pixels_opt, kPixels * 2);
|
||||
MemRandomize(dst_pixels_c, kPixels * 2);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags_);
|
||||
SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
|
||||
benchmark_width_ * 2, benchmark_width_, benchmark_height_);
|
||||
MaskCpuFlags(benchmark_cpu_info_);
|
||||
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||
SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
|
||||
benchmark_width_ * 2, benchmark_width_, benchmark_height_);
|
||||
}
|
||||
|
||||
for (int i = 0; i < kPixels * 2; ++i) {
|
||||
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(src_pixels);
|
||||
free_aligned_buffer_page_end(dst_pixels_opt);
|
||||
free_aligned_buffer_page_end(dst_pixels_c);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
align_buffer_page_end(src_pixels, kPixels * 3);
|
||||
align_buffer_page_end(tmp_pixels_r, kPixels);
|
||||
align_buffer_page_end(tmp_pixels_g, kPixels);
|
||||
|
@ -2617,7 +2656,8 @@ TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
|
|||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
align_buffer_page_end(src_pixels, kPixels * 3);
|
||||
align_buffer_page_end(tmp_pixels_r, kPixels);
|
||||
align_buffer_page_end(tmp_pixels_g, kPixels);
|
||||
|
@ -2666,7 +2706,8 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
|
|||
// TODO(fbarchard): improve test for platforms and cpu detect
|
||||
#ifdef HAS_MERGEUVROW_16_AVX2
|
||||
TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
align_buffer_page_end(src_pixels_u, kPixels * 2);
|
||||
align_buffer_page_end(src_pixels_v, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
|
||||
|
@ -2710,7 +2751,8 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
|
|||
// TODO(fbarchard): Improve test for more platforms.
|
||||
#ifdef HAS_MULTIPLYROW_16_AVX2
|
||||
TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
align_buffer_page_end(src_pixels_y, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
|
||||
|
@ -2746,7 +2788,8 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
|
|||
#endif // HAS_MULTIPLYROW_16_AVX2
|
||||
|
||||
TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
align_buffer_page_end(src_pixels_y, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_y_opt, kPixels);
|
||||
align_buffer_page_end(dst_pixels_y_c, kPixels);
|
||||
|
@ -2776,6 +2819,7 @@ TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
|
|||
free_aligned_buffer_page_end(dst_pixels_y_c);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ROW_TESTS
|
||||
// TODO(fbarchard): Improve test for more platforms.
|
||||
#ifdef HAS_CONVERT16TO8ROW_AVX2
|
||||
TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
|
||||
|
@ -2821,9 +2865,11 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
|
|||
free_aligned_buffer_page_end(dst_pixels_y_c);
|
||||
}
|
||||
#endif // HAS_CONVERT16TO8ROW_AVX2
|
||||
#endif // ENABLE_ROW_TESTS
|
||||
|
||||
TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
align_buffer_page_end(src_pixels_y, kPixels);
|
||||
align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
|
||||
|
@ -2855,6 +2901,7 @@ TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
|
|||
free_aligned_buffer_page_end(dst_pixels_y_c);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ROW_TESTS
|
||||
// TODO(fbarchard): Improve test for more platforms.
|
||||
#ifdef HAS_CONVERT8TO16ROW_AVX2
|
||||
TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
|
||||
|
@ -3186,7 +3233,8 @@ TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
|
|||
}
|
||||
GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640);
|
||||
for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
||||
int has_neon = TestCpuFlag(kCpuHasNEON);
|
||||
if (has_neon) {
|
||||
GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640);
|
||||
|
@ -3239,7 +3287,8 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
|
|||
&orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0],
|
||||
640);
|
||||
for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
||||
int has_neon = TestCpuFlag(kCpuHasNEON);
|
||||
if (has_neon) {
|
||||
GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
|
||||
|
@ -3267,4 +3316,36 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
|
|||
EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, SwapUVRow) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
|
||||
SwapUVRow_C;
|
||||
|
||||
align_buffer_page_end(src_pixels_vu, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_uv, kPixels * 2);
|
||||
MemRandomize(src_pixels_vu, kPixels * 2);
|
||||
memset(dst_pixels_uv, 1, kPixels * 2);
|
||||
|
||||
#if defined(HAS_SWAPUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
SwapUVRow = SwapUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(kPixels, 16)) {
|
||||
SwapUVRow = SwapUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int j = 0; j < benchmark_iterations_; j++) {
|
||||
SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels);
|
||||
}
|
||||
for (int i = 0; i < kPixels; ++i) {
|
||||
EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
|
||||
EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(src_pixels_vu);
|
||||
free_aligned_buffer_page_end(dst_pixels_uv);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace libyuv
|
||||
|
|
|
@ -135,6 +135,123 @@ TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) {
|
|||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
static void I444TestRotate(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
libyuv::RotationMode mode,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (src_width < 1) {
|
||||
src_width = 1;
|
||||
}
|
||||
if (src_height == 0) {
|
||||
src_height = 1;
|
||||
}
|
||||
if (dst_width < 1) {
|
||||
dst_width = 1;
|
||||
}
|
||||
if (dst_height < 1) {
|
||||
dst_height = 1;
|
||||
}
|
||||
int src_i444_y_size = src_width * Abs(src_height);
|
||||
int src_i444_uv_size = src_width * Abs(src_height);
|
||||
int src_i444_size = src_i444_y_size + src_i444_uv_size * 2;
|
||||
align_buffer_page_end(src_i444, src_i444_size);
|
||||
for (int i = 0; i < src_i444_size; ++i) {
|
||||
src_i444[i] = fastrand() & 0xff;
|
||||
}
|
||||
|
||||
int dst_i444_y_size = dst_width * dst_height;
|
||||
int dst_i444_uv_size = dst_width * dst_height;
|
||||
int dst_i444_size = dst_i444_y_size + dst_i444_uv_size * 2;
|
||||
align_buffer_page_end(dst_i444_c, dst_i444_size);
|
||||
align_buffer_page_end(dst_i444_opt, dst_i444_size);
|
||||
memset(dst_i444_c, 2, dst_i444_size);
|
||||
memset(dst_i444_opt, 3, dst_i444_size);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
|
||||
src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
|
||||
dst_i444_c, dst_width, dst_i444_c + dst_i444_y_size, dst_width,
|
||||
dst_i444_c + dst_i444_y_size + dst_i444_uv_size, dst_width,
|
||||
src_width, src_height, mode);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||
I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
|
||||
src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
|
||||
dst_i444_opt, dst_width, dst_i444_opt + dst_i444_y_size,
|
||||
dst_width, dst_i444_opt + dst_i444_y_size + dst_i444_uv_size,
|
||||
dst_width, src_width, src_height, mode);
|
||||
}
|
||||
|
||||
// Rotation should be exact.
|
||||
for (int i = 0; i < dst_i444_size; ++i) {
|
||||
EXPECT_EQ(dst_i444_c[i], dst_i444_opt[i]);
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(dst_i444_c);
|
||||
free_aligned_buffer_page_end(dst_i444_opt);
|
||||
free_aligned_buffer_page_end(src_i444);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I444Rotate0_Opt) {
|
||||
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I444Rotate90_Opt) {
|
||||
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I444Rotate180_Opt) {
|
||||
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I444Rotate270_Opt) {
|
||||
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Remove odd width tests.
|
||||
// Odd width tests work but disabled because they use C code and can be
|
||||
// tested by passing an odd width command line or environment variable.
|
||||
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate0_Odd) {
|
||||
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_width_ - 3, benchmark_height_ - 1, kRotate0,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate90_Odd) {
|
||||
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_height_ - 1, benchmark_width_ - 3, kRotate90,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate180_Odd) {
|
||||
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_width_ - 3, benchmark_height_ - 1, kRotate180,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate270_Odd) {
|
||||
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_height_ - 1, benchmark_width_ - 3, kRotate270,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
static void NV12TestRotate(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
|
|
|
@ -259,7 +259,7 @@ static int ARGBClipTestFilter(int src_width,
|
|||
|
||||
TEST_FACTOR(2, 1, 2)
|
||||
TEST_FACTOR(4, 1, 4)
|
||||
TEST_FACTOR(8, 1, 8)
|
||||
// TEST_FACTOR(8, 1, 8) Disable for benchmark performance.
|
||||
TEST_FACTOR(3by4, 3, 4)
|
||||
TEST_FACTOR(3by8, 3, 8)
|
||||
TEST_FACTOR(3, 1, 3)
|
||||
|
|
|
@ -14,7 +14,10 @@
|
|||
#include "../unit_test/unit_test.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/scale.h"
|
||||
|
||||
#ifdef ENABLE_ROW_TESTS
|
||||
#include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C
|
||||
#endif
|
||||
|
||||
#define STRINGIZE(line) #line
|
||||
#define FILELINESTR(file, line) file ":" STRINGIZE(line)
|
||||
|
@ -22,14 +25,14 @@
|
|||
namespace libyuv {
|
||||
|
||||
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
|
||||
static int TestFilter(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
static int I420TestFilter(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -141,14 +144,14 @@ static int TestFilter(int src_width,
|
|||
|
||||
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
|
||||
// 0 = exact.
|
||||
static int TestFilter_16(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
static int I420TestFilter_16(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -256,6 +259,241 @@ static int TestFilter_16(int src_width,
|
|||
return max_diff;
|
||||
}
|
||||
|
||||
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
|
||||
static int I444TestFilter(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int i, j;
|
||||
int src_width_uv = Abs(src_width);
|
||||
int src_height_uv = Abs(src_height);
|
||||
|
||||
int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
|
||||
int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
|
||||
|
||||
int src_stride_y = Abs(src_width);
|
||||
int src_stride_uv = src_width_uv;
|
||||
|
||||
align_buffer_page_end(src_y, src_y_plane_size);
|
||||
align_buffer_page_end(src_u, src_uv_plane_size);
|
||||
align_buffer_page_end(src_v, src_uv_plane_size);
|
||||
if (!src_y || !src_u || !src_v) {
|
||||
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
|
||||
return 0;
|
||||
}
|
||||
MemRandomize(src_y, src_y_plane_size);
|
||||
MemRandomize(src_u, src_uv_plane_size);
|
||||
MemRandomize(src_v, src_uv_plane_size);
|
||||
|
||||
int dst_width_uv = dst_width;
|
||||
int dst_height_uv = dst_height;
|
||||
|
||||
int64_t dst_y_plane_size = (dst_width) * (dst_height);
|
||||
int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
|
||||
|
||||
int dst_stride_y = dst_width;
|
||||
int dst_stride_uv = dst_width_uv;
|
||||
|
||||
align_buffer_page_end(dst_y_c, dst_y_plane_size);
|
||||
align_buffer_page_end(dst_u_c, dst_uv_plane_size);
|
||||
align_buffer_page_end(dst_v_c, dst_uv_plane_size);
|
||||
align_buffer_page_end(dst_y_opt, dst_y_plane_size);
|
||||
align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
|
||||
align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
|
||||
if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
|
||||
!dst_v_opt) {
|
||||
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
double c_time = get_time();
|
||||
I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
|
||||
src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
|
||||
dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
|
||||
c_time = (get_time() - c_time);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
double opt_time = get_time();
|
||||
for (i = 0; i < benchmark_iterations; ++i) {
|
||||
I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
|
||||
src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
|
||||
dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
|
||||
f);
|
||||
}
|
||||
opt_time = (get_time() - opt_time) / benchmark_iterations;
|
||||
// Report performance of C vs OPT.
|
||||
printf("filter %d - %8d us C - %8d us OPT\n", f,
|
||||
static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
|
||||
|
||||
// C version may be a little off from the optimized. Order of
|
||||
// operations may introduce rounding somewhere. So do a difference
|
||||
// of the buffers and look to see that the max difference is not
|
||||
// over 3.
|
||||
int max_diff = 0;
|
||||
for (i = 0; i < (dst_height); ++i) {
|
||||
for (j = 0; j < (dst_width); ++j) {
|
||||
int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
|
||||
dst_y_opt[(i * dst_stride_y) + j]);
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < (dst_height_uv); ++i) {
|
||||
for (j = 0; j < (dst_width_uv); ++j) {
|
||||
int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
|
||||
dst_u_opt[(i * dst_stride_uv) + j]);
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
|
||||
dst_v_opt[(i * dst_stride_uv) + j]);
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(dst_y_c);
|
||||
free_aligned_buffer_page_end(dst_u_c);
|
||||
free_aligned_buffer_page_end(dst_v_c);
|
||||
free_aligned_buffer_page_end(dst_y_opt);
|
||||
free_aligned_buffer_page_end(dst_u_opt);
|
||||
free_aligned_buffer_page_end(dst_v_opt);
|
||||
free_aligned_buffer_page_end(src_y);
|
||||
free_aligned_buffer_page_end(src_u);
|
||||
free_aligned_buffer_page_end(src_v);
|
||||
|
||||
return max_diff;
|
||||
}
|
||||
|
||||
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
|
||||
// 0 = exact.
|
||||
static int I444TestFilter_16(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int i;
|
||||
int src_width_uv = Abs(src_width);
|
||||
int src_height_uv = Abs(src_height);
|
||||
|
||||
int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
|
||||
int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
|
||||
|
||||
int src_stride_y = Abs(src_width);
|
||||
int src_stride_uv = src_width_uv;
|
||||
|
||||
align_buffer_page_end(src_y, src_y_plane_size);
|
||||
align_buffer_page_end(src_u, src_uv_plane_size);
|
||||
align_buffer_page_end(src_v, src_uv_plane_size);
|
||||
align_buffer_page_end(src_y_16, src_y_plane_size * 2);
|
||||
align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
|
||||
align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
|
||||
if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
|
||||
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
|
||||
return 0;
|
||||
}
|
||||
uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
|
||||
uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
|
||||
uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
|
||||
|
||||
MemRandomize(src_y, src_y_plane_size);
|
||||
MemRandomize(src_u, src_uv_plane_size);
|
||||
MemRandomize(src_v, src_uv_plane_size);
|
||||
|
||||
for (i = 0; i < src_y_plane_size; ++i) {
|
||||
p_src_y_16[i] = src_y[i];
|
||||
}
|
||||
for (i = 0; i < src_uv_plane_size; ++i) {
|
||||
p_src_u_16[i] = src_u[i];
|
||||
p_src_v_16[i] = src_v[i];
|
||||
}
|
||||
|
||||
int dst_width_uv = dst_width;
|
||||
int dst_height_uv = dst_height;
|
||||
|
||||
int dst_y_plane_size = (dst_width) * (dst_height);
|
||||
int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
|
||||
|
||||
int dst_stride_y = dst_width;
|
||||
int dst_stride_uv = dst_width_uv;
|
||||
|
||||
align_buffer_page_end(dst_y_8, dst_y_plane_size);
|
||||
align_buffer_page_end(dst_u_8, dst_uv_plane_size);
|
||||
align_buffer_page_end(dst_v_8, dst_uv_plane_size);
|
||||
align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
|
||||
align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
|
||||
align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
|
||||
|
||||
uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
|
||||
uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
|
||||
uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
|
||||
src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
|
||||
dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
for (i = 0; i < benchmark_iterations; ++i) {
|
||||
I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
|
||||
p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
|
||||
dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
|
||||
dst_stride_uv, dst_width, dst_height, f);
|
||||
}
|
||||
|
||||
// Expect an exact match.
|
||||
int max_diff = 0;
|
||||
for (i = 0; i < dst_y_plane_size; ++i) {
|
||||
int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < dst_uv_plane_size; ++i) {
|
||||
int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(dst_y_8);
|
||||
free_aligned_buffer_page_end(dst_u_8);
|
||||
free_aligned_buffer_page_end(dst_v_8);
|
||||
free_aligned_buffer_page_end(dst_y_16);
|
||||
free_aligned_buffer_page_end(dst_u_16);
|
||||
free_aligned_buffer_page_end(dst_v_16);
|
||||
free_aligned_buffer_page_end(src_y);
|
||||
free_aligned_buffer_page_end(src_u);
|
||||
free_aligned_buffer_page_end(src_v);
|
||||
free_aligned_buffer_page_end(src_y_16);
|
||||
free_aligned_buffer_page_end(src_u_16);
|
||||
free_aligned_buffer_page_end(src_v_16);
|
||||
|
||||
return max_diff;
|
||||
}
|
||||
|
||||
// The following adjustments in dimensions ensure the scale factor will be
|
||||
// exactly achieved.
|
||||
// 2 is chroma subsample.
|
||||
|
@ -263,16 +501,32 @@ static int TestFilter_16(int src_width,
|
|||
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
|
||||
|
||||
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) { \
|
||||
int diff = TestFilter( \
|
||||
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
|
||||
int diff = I420TestFilter( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter##_16) { \
|
||||
int diff = TestFilter_16( \
|
||||
TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
|
||||
int diff = I444TestFilter( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \
|
||||
int diff = I420TestFilter_16( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \
|
||||
int diff = I444TestFilter_16( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
|
||||
|
@ -290,7 +544,7 @@ static int TestFilter_16(int src_width,
|
|||
|
||||
TEST_FACTOR(2, 1, 2, 0)
|
||||
TEST_FACTOR(4, 1, 4, 0)
|
||||
TEST_FACTOR(8, 1, 8, 0)
|
||||
// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
|
||||
TEST_FACTOR(3by4, 3, 4, 1)
|
||||
TEST_FACTOR(3by8, 3, 8, 1)
|
||||
TEST_FACTOR(3, 1, 3, 0)
|
||||
|
@ -300,30 +554,58 @@ TEST_FACTOR(3, 1, 3, 0)
|
|||
#undef DX
|
||||
|
||||
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
|
||||
int diff = TestFilter(benchmark_width_, benchmark_height_, width, height, \
|
||||
kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
|
||||
int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
|
||||
height, kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
|
||||
int diff = TestFilter(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \
|
||||
int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \
|
||||
height, kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter##_16) { \
|
||||
int diff = TestFilter_16(benchmark_width_, benchmark_height_, width, \
|
||||
height, kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \
|
||||
int diff = I420TestFilter_16( \
|
||||
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter##_16) { \
|
||||
int diff = TestFilter_16(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \
|
||||
int diff = I444TestFilter_16( \
|
||||
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
|
||||
int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \
|
||||
int diff = I444TestFilter(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, \
|
||||
I420##name##From##width##x##height##_##filter##_16) { \
|
||||
int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, \
|
||||
I444##name##From##width##x##height##_##filter##_16) { \
|
||||
int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
}
|
||||
|
||||
|
@ -343,6 +625,7 @@ TEST_SCALETO(Scale, 1920, 1080)
|
|||
#undef TEST_SCALETO1
|
||||
#undef TEST_SCALETO
|
||||
|
||||
#ifdef ENABLE_ROW_TESTS
|
||||
#ifdef HAS_SCALEROWDOWN2_SSSE3
|
||||
TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
|
||||
SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
|
||||
|
@ -524,6 +807,7 @@ TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
|
|||
EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
|
||||
EXPECT_EQ(dst_pixels_c[1279], 3839);
|
||||
}
|
||||
#endif // ENABLE_ROW_TESTS
|
||||
|
||||
// Test scaling plane with 8 bit C vs 16 bit C and return maximum pixel
|
||||
// difference.
|
||||
|
@ -614,7 +898,7 @@ static int TestPlaneFilter_16(int src_width,
|
|||
|
||||
TEST_FACTOR(2, 1, 2, 0)
|
||||
TEST_FACTOR(4, 1, 4, 0)
|
||||
TEST_FACTOR(8, 1, 8, 0)
|
||||
// TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
|
||||
TEST_FACTOR(3by4, 3, 4, 1)
|
||||
TEST_FACTOR(3by8, 3, 8, 1)
|
||||
TEST_FACTOR(3, 1, 3, 0)
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
#ifdef LIBYUV_USE_GFLAGS
|
||||
#include "gflags/gflags.h"
|
||||
#endif
|
||||
#ifdef LIBYUV_USE_BASE_FLAGS
|
||||
#include "base/commandlineflags.h"
|
||||
#endif
|
||||
#include "libyuv/cpu_id.h"
|
||||
|
||||
unsigned int fastrand_seed = 0xfb;
|
||||
|
|
|
@ -189,7 +189,7 @@ static uint32_t SumSquareError_SSE2(const uint8_t* src_a,
|
|||
,
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
#endif
|
||||
); // NOLINT
|
||||
); // NOLINT
|
||||
return sse;
|
||||
}
|
||||
#endif // LIBYUV_DISABLE_X86 etc
|
||||
|
|
Loading…
Reference in New Issue