This commit is contained in:
Michael Jerris 2016-02-23 14:32:34 -05:00
parent 1ffc801957
commit d57c6ec48a
154 changed files with 65122 additions and 0 deletions

4
libs/libyuv/AUTHORS Normal file
View File

@ -0,0 +1,4 @@
# Names should be added to this file like so:
# Name or Organization <email address>
Google Inc.

63
libs/libyuv/Android.mk Normal file
View File

@ -0,0 +1,63 @@
# This is the Android makefile for libyuv for both platform and NDK.
LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
LOCAL_CPP_EXTENSION := .cc
LOCAL_SRC_FILES := \
source/compare.cc \
source/compare_common.cc \
source/compare_neon64.cc \
source/compare_gcc.cc \
source/convert.cc \
source/convert_argb.cc \
source/convert_from.cc \
source/convert_from_argb.cc \
source/convert_to_argb.cc \
source/convert_to_i420.cc \
source/cpu_id.cc \
source/planar_functions.cc \
source/rotate.cc \
source/rotate_any.cc \
source/rotate_argb.cc \
source/rotate_common.cc \
source/rotate_mips.cc \
source/rotate_neon64.cc \
source/rotate_gcc.cc \
source/row_any.cc \
source/row_common.cc \
source/row_mips.cc \
source/row_neon64.cc \
source/row_gcc.cc \
source/scale.cc \
source/scale_any.cc \
source/scale_argb.cc \
source/scale_common.cc \
source/scale_mips.cc \
source/scale_neon64.cc \
source/scale_gcc.cc \
source/video_common.cc
# TODO(fbarchard): Enable mjpeg encoder.
# source/mjpeg_decoder.cc
# source/convert_jpeg.cc
# source/mjpeg_validate.cc
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
LOCAL_CFLAGS += -DLIBYUV_NEON
LOCAL_SRC_FILES += \
source/compare_neon.cc.neon \
source/rotate_neon.cc.neon \
source/row_neon.cc.neon \
source/scale_neon.cc.neon
endif
LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/include
LOCAL_C_INCLUDES += $(LOCAL_PATH)/include
LOCAL_MODULE := libyuv_static
LOCAL_MODULE_TAGS := optional
include $(BUILD_STATIC_LIBRARY)

133
libs/libyuv/BUILD.gn Normal file
View File

@ -0,0 +1,133 @@
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
import("//build/config/arm.gni")
import("//build/config/sanitizers/sanitizers.gni")
config("libyuv_config") {
include_dirs = [
".",
"include",
]
}
use_neon = current_cpu == "arm64" || (current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon))
source_set("libyuv") {
sources = [
"include/libyuv.h",
"include/libyuv/basic_types.h",
"include/libyuv/compare.h",
"include/libyuv/convert.h",
"include/libyuv/convert_argb.h",
"include/libyuv/convert_from.h",
"include/libyuv/convert_from_argb.h",
"include/libyuv/cpu_id.h",
"include/libyuv/mjpeg_decoder.h",
"include/libyuv/planar_functions.h",
"include/libyuv/rotate.h",
"include/libyuv/rotate_argb.h",
"include/libyuv/rotate_row.h",
"include/libyuv/row.h",
"include/libyuv/scale.h",
"include/libyuv/scale_argb.h",
"include/libyuv/scale_row.h",
"include/libyuv/version.h",
"include/libyuv/video_common.h",
# sources.
"source/compare.cc",
"source/compare_common.cc",
"source/compare_gcc.cc",
"source/compare_win.cc",
"source/convert.cc",
"source/convert_argb.cc",
"source/convert_from.cc",
"source/convert_from_argb.cc",
"source/convert_jpeg.cc",
"source/convert_to_argb.cc",
"source/convert_to_i420.cc",
"source/cpu_id.cc",
"source/mjpeg_decoder.cc",
"source/mjpeg_validate.cc",
"source/planar_functions.cc",
"source/rotate.cc",
"source/rotate_any.cc",
"source/rotate_argb.cc",
"source/rotate_common.cc",
"source/rotate_mips.cc",
"source/rotate_gcc.cc",
"source/rotate_win.cc",
"source/row_any.cc",
"source/row_common.cc",
"source/row_mips.cc",
"source/row_gcc.cc",
"source/row_win.cc",
"source/scale.cc",
"source/scale_any.cc",
"source/scale_argb.cc",
"source/scale_common.cc",
"source/scale_mips.cc",
"source/scale_gcc.cc",
"source/scale_win.cc",
"source/video_common.cc",
]
configs -= [ "//build/config/compiler:chromium_code" ]
configs += [ "//build/config/compiler:no_chromium_code" ]
public_configs = [ ":libyuv_config" ]
defines = []
if (!is_ios) {
defines += [ "HAVE_JPEG" ]
}
if (is_msan) {
# MemorySanitizer does not support assembly code yet.
# http://crbug.com/344505
defines += [ "LIBYUV_DISABLE_X86" ]
}
deps = [
"//third_party:jpeg",
]
if (use_neon) {
deps += [ ":libyuv_neon" ]
}
if (is_nacl) {
# Always enable optimization under NaCl to workaround crbug.com/538243 .
configs -= [ "//build/config/compiler:default_optimization" ]
configs += [ "//build/config/compiler:optimize_max" ]
}
}
if (use_neon) {
static_library("libyuv_neon") {
sources = [
"source/compare_neon.cc",
"source/compare_neon64.cc",
"source/rotate_neon.cc",
"source/rotate_neon64.cc",
"source/row_neon.cc",
"source/row_neon64.cc",
"source/scale_neon.cc",
"source/scale_neon64.cc",
]
public_configs = [ ":libyuv_config" ]
if (current_cpu != "arm64") {
configs -= [ "//build/config/compiler:compiler_arm_fpu" ]
cflags = [ "-mfpu=neon" ]
}
}
}

141
libs/libyuv/CMakeLists.txt Normal file
View File

@ -0,0 +1,141 @@
cmake_minimum_required(VERSION 2.8)
# CMakeLists for libyuv
# Originally created for "roxlu build system" to compile libyuv on windows
# Run with -DTEST=ON to build unit tests
option(TEST "Built unit tests" OFF)
set(ly_base_dir ${CMAKE_CURRENT_LIST_DIR})
set(ly_src_dir ${ly_base_dir}/source/)
set(ly_inc_dir ${ly_base_dir}/include)
set(ly_lib_name "yuv")
set(ly_source_files
${ly_src_dir}/compare.cc
${ly_src_dir}/compare_common.cc
${ly_src_dir}/compare_neon.cc
${ly_src_dir}/compare_neon64.cc
${ly_src_dir}/compare_gcc.cc
${ly_src_dir}/compare_win.cc
${ly_src_dir}/convert.cc
${ly_src_dir}/convert_argb.cc
${ly_src_dir}/convert_from.cc
${ly_src_dir}/convert_from_argb.cc
${ly_src_dir}/convert_jpeg.cc
${ly_src_dir}/convert_to_argb.cc
${ly_src_dir}/convert_to_i420.cc
${ly_src_dir}/cpu_id.cc
${ly_src_dir}/mjpeg_decoder.cc
${ly_src_dir}/mjpeg_validate.cc
${ly_src_dir}/planar_functions.cc
${ly_src_dir}/rotate.cc
${ly_src_dir}/rotate_any.cc
${ly_src_dir}/rotate_argb.cc
${ly_src_dir}/rotate_common.cc
${ly_src_dir}/rotate_mips.cc
${ly_src_dir}/rotate_neon.cc
${ly_src_dir}/rotate_neon64.cc
${ly_src_dir}/rotate_gcc.cc
${ly_src_dir}/rotate_win.cc
${ly_src_dir}/row_any.cc
${ly_src_dir}/row_common.cc
${ly_src_dir}/row_mips.cc
${ly_src_dir}/row_neon.cc
${ly_src_dir}/row_neon64.cc
${ly_src_dir}/row_gcc.cc
${ly_src_dir}/row_win.cc
${ly_src_dir}/scale.cc
${ly_src_dir}/scale_any.cc
${ly_src_dir}/scale_argb.cc
${ly_src_dir}/scale_common.cc
${ly_src_dir}/scale_mips.cc
${ly_src_dir}/scale_neon.cc
${ly_src_dir}/scale_neon64.cc
${ly_src_dir}/scale_gcc.cc
${ly_src_dir}/scale_win.cc
${ly_src_dir}/video_common.cc
)
set(ly_unittest_sources
${ly_base_dir}/unit_test/basictypes_test.cc
${ly_base_dir}/unit_test/color_test.cc
${ly_base_dir}/unit_test/compare_test.cc
${ly_base_dir}/unit_test/convert_test.cc
${ly_base_dir}/unit_test/cpu_test.cc
${ly_base_dir}/unit_test/math_test.cc
${ly_base_dir}/unit_test/planar_test.cc
${ly_base_dir}/unit_test/rotate_argb_test.cc
${ly_base_dir}/unit_test/rotate_test.cc
${ly_base_dir}/unit_test/scale_argb_test.cc
${ly_base_dir}/unit_test/scale_test.cc
${ly_base_dir}/unit_test/unit_test.cc
${ly_base_dir}/unit_test/video_common_test.cc
)
set(ly_header_files
${ly_inc_dir}/libyuv/basic_types.h
${ly_inc_dir}/libyuv/compare.h
${ly_inc_dir}/libyuv/convert.h
${ly_inc_dir}/libyuv/convert_argb.h
${ly_inc_dir}/libyuv/convert_from.h
${ly_inc_dir}/libyuv/convert_from_argb.h
${ly_inc_dir}/libyuv/cpu_id.h
${ly_inc_dir}/libyuv/planar_functions.h
${ly_inc_dir}/libyuv/rotate.h
${ly_inc_dir}/libyuv/rotate_argb.h
${ly_inc_dir}/libyuv/rotate_row.h
${ly_inc_dir}/libyuv/row.h
${ly_inc_dir}/libyuv/scale.h
${ly_inc_dir}/libyuv/scale_argb.h
${ly_inc_dir}/libyuv/scale_row.h
${ly_inc_dir}/libyuv/version.h
${ly_inc_dir}/libyuv/video_common.h
${ly_inc_dir}/libyuv/mjpeg_decoder.h
)
include_directories(${ly_inc_dir})
add_library(${ly_lib_name} STATIC ${ly_source_files})
add_executable(convert ${ly_base_dir}/util/convert.cc)
target_link_libraries(convert ${ly_lib_name})
include(FindJPEG)
if (JPEG_FOUND)
include_directories(${JPEG_INCLUDE_DIR})
target_link_libraries(convert ${JPEG_LIBRARY})
add_definitions(-DHAVE_JPEG)
endif()
if(TEST)
find_library(GTEST_LIBRARY gtest)
if(GTEST_LIBRARY STREQUAL "GTEST_LIBRARY-NOTFOUND")
set(GTEST_SRC_DIR /usr/src/gtest CACHE STRING "Location of gtest sources")
if(EXISTS ${GTEST_SRC_DIR}/src/gtest-all.cc)
message(STATUS "building gtest from sources in ${GTEST_SRC_DIR}")
set(gtest_sources ${GTEST_SRC_DIR}/src/gtest-all.cc)
add_library(gtest STATIC ${gtest_sources})
include_directories(${GTEST_SRC_DIR})
include_directories(${GTEST_SRC_DIR}/include)
set(GTEST_LIBRARY gtest)
else()
message(FATAL_ERROR "TEST is set but unable to find gtest library")
endif()
endif()
add_executable(libyuv_unittest ${ly_unittest_sources})
target_link_libraries(libyuv_unittest ${ly_lib_name} ${GTEST_LIBRARY} pthread)
if (JPEG_FOUND)
target_link_libraries(libyuv_unittest ${JPEG_LIBRARY})
endif()
endif()
if(NACL AND NACL_LIBC STREQUAL "newlib")
target_link_libraries(libyuv_unittest glibc-compat)
endif()
target_link_libraries(libyuv_unittest gflags)
install(TARGETS ${ly_lib_name} DESTINATION lib)
install(FILES ${ly_header_files} DESTINATION include/libyuv)
install(FILES ${ly_inc_dir}/libyuv.h DESTINATION include/)

42
libs/libyuv/DEPS Normal file
View File

@ -0,0 +1,42 @@
vars = {
# Override root_dir in your .gclient's custom_vars to specify a custom root
# folder name.
'root_dir': 'libyuv',
'extra_gyp_flag': '-Dextra_gyp_flag=0',
'chromium_git': 'https://chromium.googlesource.com',
# Roll the Chromium Git hash to pick up newer versions of all the
# dependencies and tools linked to in setup_links.py.
'chromium_revision': '3c455872750c9d0f74266b04f97701a516ac9075',
}
# NOTE: Prefer revision numbers to tags for svn deps. Use http rather than
# https; the latter can cause problems for users behind proxies.
deps = {
Var('root_dir') + '/third_party/gflags/src':
Var('chromium_git') + '/external/gflags/src@e7390f9185c75f8d902c05ed7d20bb94eb914d0c', # from svn revision 82
}
# Define rules for which include paths are allowed in our source.
include_rules = [ '+gflags' ]
hooks = [
{
# Clone chromium and its deps.
'name': 'sync chromium',
'pattern': '.',
'action': ['python', '-u', Var('root_dir') + '/sync_chromium.py',
'--target-revision', Var('chromium_revision')],
},
{
# Create links to shared dependencies in Chromium.
'name': 'setup_links',
'pattern': '.',
'action': ['python', Var('root_dir') + '/setup_links.py'],
},
{
# A change to a .gyp, .gypi, or to GYP itself should run the generator.
'pattern': '.',
'action': ['python', Var('root_dir') + '/gyp_libyuv'],
},
]

29
libs/libyuv/LICENSE Normal file
View File

@ -0,0 +1,29 @@
Copyright 2011 The LibYuv Project Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Google nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,8 @@
This source tree contains third party source code which is governed by third
party licenses. This file contains references to files which are under other
licenses than the one provided in the LICENSE file in the root of the source
tree.
Files governed by third party licenses:
source/x86inc.asm

13
libs/libyuv/OWNERS Normal file
View File

@ -0,0 +1,13 @@
fbarchard@chromium.org
magjed@chromium.org
torbjorng@chromium.org
per-file *.gyp=kjellander@chromium.org
per-file *.gn=kjellander@chromium.org
per-file .gitignore=*
per-file AUTHORS=*
per-file DEPS=*
per-file PRESUBMIT.py=kjellander@chromium.org
per-file gyp_libyuv.py=kjellander@chromium.org
per-file setup_links.py=*
per-file sync_chromium.py=kjellander@chromium.org

24
libs/libyuv/PATENTS Normal file
View File

@ -0,0 +1,24 @@
Additional IP Rights Grant (Patents)
"This implementation" means the copyrightable works distributed by
Google as part of the LibYuv code package.
Google hereby grants to you a perpetual, worldwide, non-exclusive,
no-charge, irrevocable (except as stated in this section) patent
license to make, have made, use, offer to sell, sell, import,
transfer, and otherwise run, modify and propagate the contents of this
implementation of the LibYuv code package, where such license applies
only to those patent claims, both currently owned by Google and
acquired in the future, licensable by Google that are necessarily
infringed by this implementation of the LibYuv code package. This
grant does not include claims that would be infringed only as a
consequence of further modification of this implementation. If you or
your agent or exclusive licensee institute or order or agree to the
institution of patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that this
implementation of the LibYuv code package or any code incorporated
within this implementation of the LibYuv code package constitutes
direct or contributory patent infringement, or inducement of patent
infringement, then any patent rights granted to you under this License
for this implementation of the LibYuv code package shall terminate as
of the date such litigation is filed.

51
libs/libyuv/PRESUBMIT.py Executable file
View File

@ -0,0 +1,51 @@
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
import re
import sys
def GetDefaultTryConfigs(bots=None):
"""Returns a list of ('bot', set(['tests']), optionally filtered by [bots].
For WebRTC purposes, we always return an empty list of tests, since we want
to run all tests by default on all our trybots.
"""
return { 'tryserver.libyuv': dict((bot, []) for bot in bots)}
# pylint: disable=W0613
def GetPreferredTryMasters(project, change):
files = change.LocalPaths()
bots = [
'win',
'win_rel',
'win_x64_rel',
'mac',
'mac_rel',
'ios',
'ios_rel',
'ios_arm64',
'ios_arm64_rel',
'mac_asan',
'linux',
'linux_rel',
'linux_memcheck',
'linux_tsan2',
'linux_asan',
'linux_msan',
'linux_ubsan',
'linux_ubsan_vptr',
'android',
'android_rel',
'android_clang',
'android_arm64',
]
if not files or all(re.search(r'[\\/]OWNERS$', f) for f in files):
return {}
return GetDefaultTryConfigs(bots)

View File

@ -0,0 +1,8 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1577
License: BSD
License File: LICENSE
Description:
libyuv is an open source project that includes YUV conversion and scaling functionality.

18
libs/libyuv/README.md Normal file
View File

@ -0,0 +1,18 @@
**libyuv** is an open source project that includes YUV scaling and conversion functionality.
* Scale YUV to prepare content for compression, with point, bilinear or box filter.
* Convert to YUV from webcam formats.
* Convert from YUV to formats for rendering/effects.
* Rotate by 90/180/270 degrees to adjust for mobile devices in portrait mode.
* Optimized for SSE2/SSSE3/AVX2 on x86/x64.
* Optimized for Neon on Arm.
* Optimized for DSP R2 on Mips.
### Development
See [Getting started] [1] for instructions on how to get started developing.
You can also browse the [docs directory] [2] for more documentation.
[1]: docs/getting_started.md
[2]: docs/

21
libs/libyuv/all.gyp Normal file
View File

@ -0,0 +1,21 @@
# Copyright 2013 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
# all.gyp and All target are for benefit of android gyp build.
{
'targets': [
{
'target_name': 'All',
'type': 'none',
'dependencies': [
'libyuv.gyp:*',
'libyuv_test.gyp:*',
],
},
],
}

View File

@ -0,0 +1,24 @@
solutions = [{
'name': 'src',
'url': 'https://chromium.googlesource.com/chromium/src.git',
'deps_file': '.DEPS.git',
'managed': False,
'custom_deps': {
# Skip syncing some large dependencies Libyuv will never need.
'src/chrome/tools/test/reference_build/chrome_linux': None,
'src/chrome/tools/test/reference_build/chrome_mac': None,
'src/chrome/tools/test/reference_build/chrome_win': None,
'src/native_client': None,
'src/third_party/cld_2/src': None,
'src/third_party/ffmpeg': None,
'src/third_party/hunspell_dictionaries': None,
'src/third_party/liblouis/src': None,
'src/third_party/pdfium': None,
'src/third_party/skia': None,
'src/third_party/trace-viewer': None,
'src/third_party/webrtc': None,
},
'safesync_url': ''
}]
cache_dir = None

View File

@ -0,0 +1,5 @@
This .gclient file is used to do download a copy of Chromium.
Libyuv uses the Chromium build toolchain and a number of shared
dependencies by creating symlinks to folders in this checkout,
using the ../setup_links.py script.

View File

@ -0,0 +1,12 @@
# This file is used by gcl to get repository specific information.
CODE_REVIEW_SERVER: codereview.chromium.org
#CC_LIST:
VIEW_VC: https://chromium.googlesource.com/libyuv/libyuv/+/
#STATUS:
FORCE_HTTPS_COMMIT_URL: True
PROJECT: libyuv
TRY_ON_UPLOAD: False
TRYSERVER_ROOT: src
TRYSERVER_SVN_URL: svn://svn.chromium.org/chrome-try/try-libyuv
#GITCL_PREUPLOAD:
#GITCL_PREDCOMMIT:

View File

@ -0,0 +1,32 @@
# Introduction
For test purposes, environment variables can be set to control libyuv behavior. These should only be used for testing, to narrow down bugs or to test performance.
# CPU
By default the cpu is detected and the most advanced form of SIMD is used. But you can disable instruction sets selectively, or completely, falling back on C code. Set the variable to 1 to disable the specified instruction set.
LIBYUV_DISABLE_ASM
LIBYUV_DISABLE_X86
LIBYUV_DISABLE_SSE2
LIBYUV_DISABLE_SSSE3
LIBYUV_DISABLE_SSE41
LIBYUV_DISABLE_SSE42
LIBYUV_DISABLE_AVX
LIBYUV_DISABLE_AVX2
LIBYUV_DISABLE_AVX3
LIBYUV_DISABLE_ERMS
LIBYUV_DISABLE_FMA3
LIBYUV_DISABLE_DSPR2
LIBYUV_DISABLE_NEON
# Test Width/Height/Repeat
The unittests default to a small image (32x18) to run fast. This can be set by environment variable to test a specific resolutions.
You can also repeat the test a specified number of iterations, allowing benchmarking and profiling.
set LIBYUV_WIDTH=1280
set LIBYUV_HEIGHT=720
set LIBYUV_REPEAT=999
set LIBYUV_FLAGS=-1
set LIBYUV_CPU_INFO=-1

View File

@ -0,0 +1,196 @@
# Introduction
This document discusses the current state of filtering in libyuv. An emphasis on maximum performance while avoiding memory exceptions, and minimal amount of code/complexity. See future work at end.
# LibYuv Filter Subsampling
There are 2 challenges with subsampling
1. centering of samples, which involves clamping on edges
2. clipping a source region
Centering depends on scale factor and filter mode.
# Down Sampling
If scaling down, the stepping rate is always src_width / dst_width.
dx = src_width / dst_width;
e.g. If scaling from 1280x720 to 640x360, the step thru the source will be 2.0, stepping over 2 pixels of source for each pixel of destination.
Centering, depends on filter mode.
*Point* downsampling takes the middle pixel.
x = dx >> 1;
For odd scale factors (e.g. 3x down) this is exactly the middle. For even scale factors, this rounds up and takes the pixel to the right of center. e.g. scale of 4x down will take pixel 2.
**Bilinear** filter, uses the 2x2 pixels in the middle.
x = dx / 2 - 0.5;
For odd scale factors (e.g. 3x down) this is exactly the middle, and point sampling is used.
For even scale factors, this evenly filters the middle 2x2 pixels. e.g. 4x down will filter pixels 1,2 at 50% in both directions.
**Box** filter averages the entire box so sampling starts at 0.
x = 0;
For a scale factor of 2x down, this is equivalent to bilinear.
# Up Sampling
**Point** upsampling use stepping rate of src_width / dst_width and a starting coordinate of 0.
x = 0;
dx = src_width / dst_width;
e.g. If scaling from 640x360 to 1280x720 the step thru the source will be 0.0, stepping half a pixel of source for each pixel of destination. Each pixel is replicated by the scale factor.
**Bilinear** filter stretches such that the first pixel of source maps to the first pixel of destination, and the last pixel of source maps to the last pixel of destination.
x = 0;
dx = (src_width - 1) / (dst_width - 1);
This method is not technically correct, and will likely change in the future.
* It is inconsistent with the bilinear down sampler. The same method could be used for down sampling, and then it would be more reversible, but that would prevent specialized 2x down sampling.
* Although centered, the image is slightly magnified.
* The filtering was changed in early 2013 - previously it used:
x = 0;
dx = (src_width - 1) / (dst_width - 1);
Which is the correct scale factor, but shifted the image left, and extruded the last pixel. The reason for the change was to remove the extruding code from the low level row functions, allowing 3 functions to sshare the same row functions - ARGBScale, I420Scale, and ARGBInterpolate. Then the one function was ported to many cpu variations: SSE2, SSSE3, AVX2, Neon and 'Any' version for any number of pixels and alignment. The function is also specialized for 0,25,50,75%.
The above goes still has the potential to read the last pixel 100% and last pixel + 1 0%, which may cause a memory exception. So the left pixel goes to a fraction less than the last pixel, but filters in the minimum amount of it, and the maximum of the last pixel.
dx = FixedDiv((src_width << 16) - 0x00010001, (dst << 16) - 0x00010000);
**Box** filter for upsampling switches over to Bilinear.
# Scale snippet:
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
#define FIXEDDIV1(src, dst) FixedDiv((src << 16) - 0x00010001, \
(dst << 16) - 0x00010000);
// Compute slope values for stepping.
void ScaleSlope(int src_width, int src_height,
int dst_width, int dst_height,
FilterMode filtering,
int* x, int* y, int* dx, int* dy) {
assert(x != NULL);
assert(y != NULL);
assert(dx != NULL);
assert(dy != NULL);
assert(src_width != 0);
assert(src_height != 0);
assert(dst_width > 0);
assert(dst_height > 0);
if (filtering == kFilterBox) {
// Scale step for point sampling duplicates all pixels equally.
*dx = FixedDiv(Abs(src_width), dst_width);
*dy = FixedDiv(src_height, dst_height);
*x = 0;
*y = 0;
} else if (filtering == kFilterBilinear) {
// Scale step for bilinear sampling renders last pixel once for upsample.
if (dst_width <= Abs(src_width)) {
*dx = FixedDiv(Abs(src_width), dst_width);
*x = CENTERSTART(*dx, -32768);
} else if (dst_width > 1) {
*dx = FIXEDDIV1(Abs(src_width), dst_width);
*x = 0;
}
if (dst_height <= src_height) {
*dy = FixedDiv(src_height, dst_height);
*y = CENTERSTART(*dy, -32768); // 32768 = -0.5 to center bilinear.
} else if (dst_height > 1) {
*dy = FIXEDDIV1(src_height, dst_height);
*y = 0;
}
} else if (filtering == kFilterLinear) {
// Scale step for bilinear sampling renders last pixel once for upsample.
if (dst_width <= Abs(src_width)) {
*dx = FixedDiv(Abs(src_width), dst_width);
*x = CENTERSTART(*dx, -32768);
} else if (dst_width > 1) {
*dx = FIXEDDIV1(Abs(src_width), dst_width);
*x = 0;
}
*dy = FixedDiv(src_height, dst_height);
*y = *dy >> 1;
} else {
// Scale step for point sampling duplicates all pixels equally.
*dx = FixedDiv(Abs(src_width), dst_width);
*dy = FixedDiv(src_height, dst_height);
*x = CENTERSTART(*dx, 0);
*y = CENTERSTART(*dy, 0);
}
// Negative src_width means horizontally mirror.
if (src_width < 0) {
*x += (dst_width - 1) * *dx;
*dx = -*dx;
src_width = -src_width;
}
}
# Future Work
Point sampling should ideally be the same as bilinear, but pixel by pixel, round to nearest neighbor. But as is, it is reversible and exactly matches ffmpeg at all scale factors, both up and down. The scale factor is
dx = src_width / dst_width;
The step value is centered for down sample:
x = dx / 2;
Or starts at 0 for upsample.
x = 0;
Bilinear filtering is currently correct for down sampling, but not for upsampling.
Upsampling is stretching the first and last pixel of source, to the first and last pixel of destination.
dx = (src_width - 1) / (dst_width - 1);<br>
x = 0;
It should be stretching such that the first pixel is centered in the middle of the scale factor, to match the pixel that would be sampled for down sampling by the same amount. And same on last pixel.
dx = src_width / dst_width;<br>
x = dx / 2 - 0.5;
This would start at -0.5 and go to last pixel + 0.5, sampling 50% from last pixel + 1.
Then clamping would be needed. On GPUs there are numerous ways to clamp.
1. Clamp the coordinate to the edge of the texture, duplicating the first and last pixel.
2. Blend with a constant color, such as transparent black. Typically best for fonts.
3. Mirror the UV coordinate, which is similar to clamping. Good for continuous tone images.
4. Wrap the coordinate, for texture tiling.
5. Allow the coordinate to index beyond the image, which may be the correct data if sampling a subimage.
6. Extrapolate the edge based on the previous pixel. pixel -0.5 is computed from slope of pixel 0 and 1.
Some of these are computational, even for a GPU, which is one reason textures are sometimes limited to power of 2 sizes.
We do care about the clipping case, where allowing coordinates to become negative and index pixels before the image is the correct data. But normally for simple scaling, we want to clamp to the edge pixel. For example, if bilinear scaling from 3x3 to 30x30, wed essentially want 10 pixels of each of the original 3 pixels. But we want the original pixels to land in the middle of each 10 pixels, at offsets 5, 15 and 25. There would be filtering between 5 and 15 between the original pixels 0 and 1. And filtering between 15 and 25 from original pixels 1 and 2. The first 5 pixels are clamped to pixel 0 and the last 5 pixels are clamped to pixel 2.
The easiest way to implement this is copy the original 3 pixels to a buffer, and duplicate the first and last pixels. 0,1,2 becomes 0, 0,1,2, 2. Then implement a filtering without clamping. We call this source extruding. Its only necessary on up sampling, since down sampler will always have valid surrounding pixels.
Extruding is practical when the image is already copied to a temporary buffer. It could be done to the original image, as long as the original memory is restored, but valgrind and/or memory protection would disallow this, so it requires a memcpy to a temporary buffer, which may hurt performance. The memcpy has a performance advantage, from a cache point of view, that can actually make this technique faster, depending on hardware characteristics.
Vertical extrusion can be done with a memcpy of the first/last row, or clamping a pointer.
The other way to implement clamping is handle the edges with a memset. e.g. Read first source pixel and memset the first 5 pixels. Filter pixels 0,1,2 to 5 to 25. Read last pixel and memset the last 5 pixels. Blur is implemented with this method like this, which has 3 loops per row - left, middle and right.
Box filter is only used for 2x down sample or more. Its based on integer sized boxes. Technically it should be filtered edges, but thats substantially slower (roughly 100x), and at that point you may as well do a cubic filter which is more correct.
Box filter currently sums rows into a row buffer. It does this with
Mirroring will use the same slope as normal, but with a negative.
The starting coordinate needs to consider the scale factor and filter. e.g. box filter of 30x30 to 3x3 with mirroring would use -10 for step, but x = 20. width (30) - dx.
Step needs to be accurate, so it uses an integer divide. This is as much as 5% of the profile. An approximated divide is substantially faster, but the inaccuracy causes stepping beyond the original image boundaries. 3 general solutions:
1. copy image to buffer with padding. allows for small errors in stepping.
2. hash the divide, so common values are quickly found.
3. change api so caller provides the slope.

133
libs/libyuv/docs/formats.md Normal file
View File

@ -0,0 +1,133 @@
# Introduction
Formats (FOURCC) supported by libyuv are detailed here.
# Core Formats
There are 2 core formats supported by libyuv - I420 and ARGB. All YUV formats can be converted to/from I420. All RGB formats can be converted to/from ARGB.
Filtering functions such as scaling and planar functions work on I420 and/or ARGB.
# OSX Core Media Pixel Formats
This is how OSX formats map to libyuv
enum {
kCMPixelFormat_32ARGB = 32, FOURCC_BGRA
kCMPixelFormat_32BGRA = 'BGRA', FOURCC_ARGB
kCMPixelFormat_24RGB = 24, FOURCC_RAW
kCMPixelFormat_16BE555 = 16, Not supported.
kCMPixelFormat_16BE565 = 'B565', Not supported.
kCMPixelFormat_16LE555 = 'L555', FOURCC_RGBO
kCMPixelFormat_16LE565 = 'L565', FOURCC_RGBP
kCMPixelFormat_16LE5551 = '5551', FOURCC_RGBO
kCMPixelFormat_422YpCbCr8 = '2vuy', FOURCC_UYVY
kCMPixelFormat_422YpCbCr8_yuvs = 'yuvs', FOURCC_YUY2
kCMPixelFormat_444YpCbCr8 = 'v308', FOURCC_I444 ?
kCMPixelFormat_4444YpCbCrA8 = 'v408', Not supported.
kCMPixelFormat_422YpCbCr16 = 'v216', Not supported.
kCMPixelFormat_422YpCbCr10 = 'v210', FOURCC_V210 previously. Removed now.
kCMPixelFormat_444YpCbCr10 = 'v410', Not supported.
kCMPixelFormat_8IndexedGray_WhiteIsZero = 0x00000028, Not supported.
};
# FOURCC (Four Charactacter Code) List
The following is extracted from video_common.h as a complete list of formats supported by libyuv.
enum FourCC {
// 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
FOURCC_I422 = FOURCC('I', '4', '2', '2'),
FOURCC_I444 = FOURCC('I', '4', '4', '4'),
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
FOURCC_I400 = FOURCC('I', '4', '0', '0'),
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
// 2 Secondary YUV formats: row biplanar.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
// 4 Secondary RGB formats: 4 Bayer Patterns.
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
// 1 Primary Compressed YUV format.
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
// 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
FOURCC_J420 = FOURCC('J', '4', '2', '0'),
FOURCC_J400 = FOURCC('J', '4', '0', '0'),
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422.
FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444.
FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2.
FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac.
FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY.
FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac.
FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG.
FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac.
FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR.
FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW.
FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG.
FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB
FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB
FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO.
FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
// 1 Auxiliary compressed YUV format set aside for capturer.
FOURCC_H264 = FOURCC('H', '2', '6', '4'),
# The ARGB FOURCC
There are 4 ARGB layouts - ARGB, BGRA, ABGR and RGBA. ARGB is most common by far, used for screen formats, and windows webcam drivers.
The fourcc describes the order of channels in a ***register***.
A fourcc provided by capturer, can be thought of string, e.g. "ARGB".
On little endian machines, as an int, this would have 'A' in the lowest byte. The FOURCC macro reverses the order:
#define FOURCC(a, b, c, d) (((uint32)(a)) | ((uint32)(b) << 8) | ((uint32)(c) << 16) | ((uint32)(d) << 24))
So the "ARGB" string, read as an uint32, is
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B')
If you were to read ARGB pixels as uint32's, the alpha would be in the high byte, and the blue in the lowest byte. In memory, these are stored little endian, so 'B' is first, then 'G', 'R' and 'A' last.
When calling conversion functions, the names match the FOURCC, so in this case it would be I420ToARGB().
All formats can be converted to/from ARGB.
Most 'planar_functions' work on ARGB (e.g. ARGBBlend).
Some are channel order agnostic (e.g. ARGBScale).
Some functions are symmetric (e.g. ARGBToBGRA is the same as BGRAToARGB, so its a macro).
ARGBBlend expects preattenuated ARGB. The R,G,B are premultiplied by alpha. Other functions don't care.

View File

@ -0,0 +1,420 @@
# Getting Started
How to get and build the libyuv code.
## Pre-requisites
You'll need to have depot tools installed: https://www.chromium.org/developers/how-tos/install-depot-tools
Refer to chromium instructions for each platform for other prerequisites.
## Getting the Code
Create a working directory, enter it, and run:
gclient config https://chromium.googlesource.com/libyuv/libyuv
gclient sync
Then you'll get a .gclient file like:
solutions = [
{ "name" : "libyuv",
"url" : "https://chromium.googlesource.com/libyuv/libyuv",
"deps_file" : "DEPS",
"managed" : True,
"custom_deps" : {
},
"safesync_url": "",
},
];
For iOS add `;target_os=['ios'];` to your OSX .gclient and run `GYP_DEFINES="OS=ios" gclient sync.`
Browse the Git reprository: https://chromium.googlesource.com/libyuv/libyuv/+/master
### Android
For Android add `;target_os=['android'];` to your Linux .gclient
solutions = [
{ "name" : "libyuv",
"url" : "https://chromium.googlesource.com/libyuv/libyuv",
"deps_file" : "DEPS",
"managed" : True,
"custom_deps" : {
},
"safesync_url": "",
},
];
target_os = ["android", "unix"];
Then run:
export GYP_DEFINES="OS=android"
gclient sync
Caveat: Theres an error with Google Play services updates. If you get the error "Your version of the Google Play services library is not up to date", run the following:
cd chromium/src
./build/android/play_services/update.py download
cd ../..
For Windows the gclient sync must be done from an Administrator command prompt.
The sync will generate native build files for your environment using gyp (Windows: Visual Studio, OSX: XCode, Linux: make). This generation can also be forced manually: `gclient runhooks`
To get just the source (not buildable):
git clone https://chromium.googlesource.com/libyuv/libyuv
## Building the Library and Unittests
### Windows
set GYP_DEFINES=target_arch=ia32
call python gyp_libyuv -fninja -G msvs_version=2013
ninja -j7 -C out\Release
ninja -j7 -C out\Debug
set GYP_DEFINES=target_arch=x64
call python gyp_libyuv -fninja -G msvs_version=2013
ninja -C out\Debug_x64
ninja -C out\Release_x64
#### Building with clangcl
set GYP_DEFINES=clang=1 target_arch=ia32 libyuv_enable_svn=1
set LLVM_REPO_URL=svn://svn.chromium.org/llvm-project
call python tools\clang\scripts\update.py
call python gyp_libyuv -fninja libyuv_test.gyp
ninja -C out\Debug
ninja -C out\Release
### OSX
Clang 64 bit shown. Remove `clang=1` for GCC and change x64 to ia32 for 32 bit.
GYP_DEFINES="clang=1 target_arch=x64" ./gyp_libyuv
ninja -j7 -C out/Debug
ninja -j7 -C out/Release
GYP_DEFINES="clang=1 target_arch=ia32" ./gyp_libyuv
ninja -j7 -C out/Debug
ninja -j7 -C out/Release
### iOS
http://www.chromium.org/developers/how-tos/build-instructions-ios
Add to .gclient last line: `target_os=['ios'];`
armv7
GYP_DEFINES="OS=ios target_arch=armv7 target_subarch=arm32" GYP_CROSSCOMPILE=1 GYP_GENERATOR_FLAGS="output_dir=out_ios" ./gyp_libyuv
ninja -j7 -C out_ios/Debug-iphoneos libyuv_unittest
ninja -j7 -C out_ios/Release-iphoneos libyuv_unittest
arm64
GYP_DEFINES="OS=ios target_arch=arm64 target_subarch=arm64" GYP_CROSSCOMPILE=1 GYP_GENERATOR_FLAGS="output_dir=out_ios" ./gyp_libyuv
ninja -j7 -C out_ios/Debug-iphoneos libyuv_unittest
ninja -j7 -C out_ios/Release-iphoneos libyuv_unittest
both armv7 and arm64 (fat)
GYP_DEFINES="OS=ios target_arch=armv7 target_subarch=both" GYP_CROSSCOMPILE=1 GYP_GENERATOR_FLAGS="output_dir=out_ios" ./gyp_libyuv
ninja -j7 -C out_ios/Debug-iphoneos libyuv_unittest
ninja -j7 -C out_ios/Release-iphoneos libyuv_unittest
simulator
GYP_DEFINES="OS=ios target_arch=ia32 target_subarch=arm32" GYP_CROSSCOMPILE=1 GYP_GENERATOR_FLAGS="output_dir=out_sim" ./gyp_libyuv
ninja -j7 -C out_sim/Debug-iphonesimulator libyuv_unittest
ninja -j7 -C out_sim/Release-iphonesimulator libyuv_unittest
### Android
https://code.google.com/p/chromium/wiki/AndroidBuildInstructions
Add to .gclient last line: `target_os=['android'];`
armv7
GYP_DEFINES="OS=android" GYP_CROSSCOMPILE=1 ./gyp_libyuv
ninja -j7 -C out/Debug libyuv_unittest_apk
ninja -j7 -C out/Release libyuv_unittest_apk
arm64
GYP_DEFINES="OS=android target_arch=arm64 target_subarch=arm64" GYP_CROSSCOMPILE=1 ./gyp_libyuv
ninja -j7 -C out/Debug libyuv_unittest_apk
ninja -j7 -C out/Release libyuv_unittest_apk
ia32
GYP_DEFINES="OS=android target_arch=ia32" GYP_CROSSCOMPILE=1 ./gyp_libyuv
ninja -j7 -C out/Debug libyuv_unittest_apk
ninja -j7 -C out/Release libyuv_unittest_apk
GYP_DEFINES="OS=android target_arch=ia32 android_full_debug=1" GYP_CROSSCOMPILE=1 ./gyp_libyuv
ninja -j7 -C out/Debug libyuv_unittest_apk
mipsel
GYP_DEFINES="OS=android target_arch=mipsel" GYP_CROSSCOMPILE=1 ./gyp_libyuv
ninja -j7 -C out/Debug libyuv_unittest_apk
ninja -j7 -C out/Release libyuv_unittest_apk
arm64 disassembly:
third_party/android_tools/ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d out/Release/obj/source/libyuv.row_neon64.o
Running tests:
util/android/test_runner.py gtest -s libyuv_unittest -t 7200 --verbose --release --gtest_filter=*
Running test as benchmark:
util/android/test_runner.py gtest -s libyuv_unittest -t 7200 --verbose --release --gtest_filter=* -a "--libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=-1"
Running test with C code:
util/android/test_runner.py gtest -s libyuv_unittest -t 7200 --verbose --release --gtest_filter=* -a "--libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=0 --libyuv_cpu_info=0"
#### Building with GN
call gn gen out/Release "--args=is_debug=false target_cpu=\"x86\""
call gn gen out/Debug "--args=is_debug=true target_cpu=\"x86\""
ninja -C out/Release
ninja -C out/Debug
### Linux
GYP_DEFINES="target_arch=x64" ./gyp_libyuv
ninja -j7 -C out/Debug
ninja -j7 -C out/Release
GYP_DEFINES="target_arch=ia32" ./gyp_libyuv
ninja -j7 -C out/Debug
ninja -j7 -C out/Release
#### CentOS
On CentOS 32 bit the following work around allows a sync:
export GYP_DEFINES="host_arch=ia32"
gclient sync
### Windows Shared Library
Modify libyuv.gyp from 'static_library' to 'shared_library', and add 'LIBYUV_BUILDING_SHARED_LIBRARY' to 'defines'.
gclient runhooks
After this command follow the building the library instructions above.
If you get a compile error for atlthunk.lib on Windows, read http://www.chromium.org/developers/how-tos/build-instructions-windows
### Build targets
ninja -C out/Debug libyuv
ninja -C out/Debug libyuv_unittest
ninja -C out/Debug compare
ninja -C out/Debug convert
ninja -C out/Debug psnr
ninja -C out/Debug cpuid
## Building the Library with make
### Linux
make -j7 V=1 -f linux.mk
make -j7 V=1 -f linux.mk clean
make -j7 V=1 -f linux.mk CXX=clang++
## Building the Library with cmake
Install cmake: http://www.cmake.org/
Default debug build:
mkdir out
cd out
cmake ..
cmake --build .
Release build/install
mkdir out
cd out
cmake -DCMAKE_INSTALL_PREFIX="/usr/lib" -DCMAKE_BUILD_TYPE="Release" ..
cmake --build . --config Release
sudo cmake --build . --target install --config Release
### Windows 8 Phone
Pre-requisite:
* Install Visual Studio 2012 and Arm to your environment.<br>
Then:
call "c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin\x86_arm\vcvarsx86_arm.bat"
or with Visual Studio 2013:
call "c:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\x86_arm\vcvarsx86_arm.bat"
nmake /f winarm.mk clean
nmake /f winarm.mk
### Windows Shared Library
Modify libyuv.gyp from 'static_library' to 'shared_library', and add 'LIBYUV_BUILDING_SHARED_LIBRARY' to 'defines'. Then run this.
gclient runhooks
After this command follow the building the library instructions above.
If you get a compile error for atlthunk.lib on Windows, read http://www.chromium.org/developers/how-tos/build-instructions-windows
### 64 bit Windows
set GYP_DEFINES=target_arch=x64
gclient runhooks V=1
### ARM Linux
export GYP_DEFINES="target_arch=arm"
export CROSSTOOL=`<path>`/arm-none-linux-gnueabi
export CXX=$CROSSTOOL-g++
export CC=$CROSSTOOL-gcc
export AR=$CROSSTOOL-ar
export AS=$CROSSTOOL-as
export RANLIB=$CROSSTOOL-ranlib
gclient runhooks
## Running Unittests
### Windows
out\Release\libyuv_unittest.exe --gtest_catch_exceptions=0 --gtest_filter="*"
### OSX
out/Release/libyuv_unittest --gtest_filter="*"
### Linux
out/Release/libyuv_unittest --gtest_filter="*"
Replace --gtest_filter="*" with specific unittest to run. May include wildcards. e.g.
out/Release/libyuv_unittest --gtest_filter=libyuvTest.I420ToARGB_Opt
## CPU Emulator tools
### Intel SDE (Software Development Emulator)
Pre-requisite: Install IntelSDE for Windows: http://software.intel.com/en-us/articles/intel-software-development-emulator
Then run:
c:\intelsde\sde -hsw -- out\release\libyuv_unittest.exe --gtest_filter=*
## Memory tools
### Running Dr Memory memcheck for Windows
Pre-requisite: Install Dr Memory for Windows and add it to your path: http://www.drmemory.org/docs/page_install_windows.html
set GYP_DEFINES=build_for_tool=drmemory target_arch=ia32
call python gyp_libyuv -fninja -G msvs_version=2013
ninja -C out\Debug
drmemory out\Debug\libyuv_unittest.exe --gtest_catch_exceptions=0 --gtest_filter=*
### Running UBSan
See Chromium instructions for sanitizers: https://www.chromium.org/developers/testing/undefinedbehaviorsanitizer
Sanitizers available: TSan, MSan, ASan, UBSan, LSan
GYP_DEFINES='ubsan=1' gclient runhooks
ninja -C out/Release
### Running Valgrind memcheck
Memory errors and race conditions can be found by running tests under special memory tools. [Valgrind] [1] is an instrumentation framework for building dynamic analysis tools. Various tests and profilers are built upon it to find memory handling errors and memory leaks, for instance.
[1]: http://valgrind.org
solutions = [
{ "name" : "libyuv",
"url" : "https://chromium.googlesource.com/libyuv/libyuv",
"deps_file" : "DEPS",
"managed" : True,
"custom_deps" : {
"libyuv/chromium/src/third_party/valgrind": "https://chromium.googlesource.com/chromium/deps/valgrind/binaries",
},
"safesync_url": "",
},
]
Then run:
GYP_DEFINES="clang=0 target_arch=x64 build_for_tool=memcheck" python gyp_libyuv
ninja -C out/Debug
valgrind out/Debug/libyuv_unittest
For more information, see http://www.chromium.org/developers/how-tos/using-valgrind
### Running Thread Sanitizer (TSan)
GYP_DEFINES="clang=0 target_arch=x64 build_for_tool=tsan" python gyp_libyuv
ninja -C out/Debug
valgrind out/Debug/libyuv_unittest
For more info, see http://www.chromium.org/developers/how-tos/using-valgrind/threadsanitizer
### Running Address Sanitizer (ASan)
GYP_DEFINES="clang=0 target_arch=x64 build_for_tool=asan" python gyp_libyuv
ninja -C out/Debug
valgrind out/Debug/libyuv_unittest
For more info, see http://dev.chromium.org/developers/testing/addresssanitizer
## Benchmarking
The unittests can be used to benchmark.
### Windows
set LIBYUV_WIDTH=1280
set LIBYUV_HEIGHT=720
set LIBYUV_REPEAT=999
set LIBYUV_FLAGS=-1
out\Release\libyuv_unittest.exe --gtest_filter=*I420ToARGB_Opt
### Linux and Mac
LIBYUV_WIDTH=1280 LIBYUV_HEIGHT=720 LIBYUV_REPEAT=1000 out/Release/libyuv_unittest --gtest_filter=*I420ToARGB_Opt
libyuvTest.I420ToARGB_Opt (547 ms)
Indicates 0.547 ms/frame for 1280 x 720.
## Making a change
gclient sync
git checkout -b mycl -t origin/master
git pull
<edit files>
git add -u
git commit -m "my change"
git cl lint
git cl try
git cl upload -r a-reviewer@chomium.org -s
<once approved..>
git cl land

View File

@ -0,0 +1,103 @@
# Introduction
Rotation by multiplies of 90 degrees allows mobile devices to rotate webcams from landscape to portrait. The higher level functions ConvertToI420 and ConvertToARGB allow rotation of any format. Optimized functionality is supported for I420, ARGB, NV12 and NV21.
# ConvertToI420
int ConvertToI420(const uint8* src_frame, size_t src_size,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 format);
This function crops, converts, and rotates. You should think of it in that order.
* Crops the original image, which is src_width x src_height, to crop_width x crop_height. At this point the image is still not rotated.
* Converts the cropped region to I420. Supports inverted source for src_height negative.
* Rotates by 90, 180 or 270 degrees.
The buffer the caller provides should account for rotation. Be especially important to get stride of the destination correct.
e.g.
640 x 480 NV12 captured<br>
Crop to 640 x 360<br>
Rotate by 90 degrees to 360 x 640.<br>
Caller passes stride of 360 for Y and 360 / 2 for U and V.<br>
Caller passes crop_width of 640, crop_height of 360.<br>
# ConvertToARGB
int ConvertToARGB(const uint8* src_frame, size_t src_size,
uint8* dst_argb, int dst_stride_argb,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 format);
Same as I420, but implementation is less optimized - reads columns and writes rows, 16 bytes at a time.
# I420Rotate
int I420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int src_width, int src_height, enum RotationMode mode);
Destination is rotated, so pass dst_stride_y etc that consider rotation.<br>
Rotate by 180 can be done in place, but 90 and 270 can not.
Implementation (Neon/SSE2) uses 8 x 8 block transpose, so best efficiency is with sizes and pointers that are aligned to 8.
Cropping can be achieved by adjusting the src_y/u/v pointers and src_width, src_height.
Lower level plane functions are provided, allowing other planar formats to be rotated. (e.g. I444)
For other planar YUV formats (I444, I422, I411, I400, NV16, NV24), the planar functions are exposed and can be called directly
// Rotate a plane by 0, 90, 180, or 270.
int RotatePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int src_width, int src_height, enum RotationMode mode);
# ARGBRotate
LIBYUV_API
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int src_width, int src_height, enum RotationMode mode);
Same as I420, but implementation is less optimized - reads columns and writes rows.
Rotate by 90, or any angle, can be achieved using ARGBAffine.
# Mirror - Horizontal Flip
Mirror functions for horizontally flipping an image, which can be useful for 'self view' of a webcam.
int I420Mirror(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int ARGBMirror(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
Mirror functionality can also be achieved with the I420Scale and ARGBScale functions by passing negative width and/or height.
# Invert - Vertical Flip
Inverting can be achieved with almost any libyuv function by passing a negative source height.
I420Mirror and ARGBMirror can also be used to rotate by 180 degrees by passing a negative height.

View File

@ -0,0 +1,29 @@
#!/usr/bin/env python
#
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
# This script is used to run the vs_toolchain.py script to download the
# Visual Studio toolchain. It's just a temporary measure while waiting for the
# Chrome team to move find_depot_tools into src/build to get rid of these
# workarounds (similar one in gyp_libyuv).
import os
import sys
checkout_root = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, os.path.join(checkout_root, 'build'))
sys.path.insert(0, os.path.join(checkout_root, 'tools', 'find_depot_tools'))
import vs_toolchain
if __name__ == '__main__':
sys.exit(vs_toolchain.main())

97
libs/libyuv/gyp_libyuv Executable file
View File

@ -0,0 +1,97 @@
#!/usr/bin/env python
#
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
# This script is used to run GYP for libyuv. It contains selected parts of the
# main function from the src/build/gyp_chromium file.
import glob
import os
import shlex
import sys
checkout_root = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, os.path.join(checkout_root, 'build'))
import gyp_chromium
import gyp_helper
import vs_toolchain
sys.path.insert(0, os.path.join(checkout_root, 'tools', 'gyp', 'pylib'))
import gyp
def GetSupplementalFiles():
"""Returns a list of the supplemental files that are included in all GYP
sources."""
# Can't use the one in gyp_chromium since the directory location of the root
# is different.
return glob.glob(os.path.join(checkout_root, '*', 'supplement.gypi'))
if __name__ == '__main__':
args = sys.argv[1:]
# This could give false positives since it doesn't actually do real option
# parsing. Oh well.
gyp_file_specified = False
for arg in args:
if arg.endswith('.gyp'):
gyp_file_specified = True
break
# If we didn't get a file, assume 'all.gyp' in the root of the checkout.
if not gyp_file_specified:
# Because of a bug in gyp, simply adding the abspath to all.gyp doesn't
# work, but chdir'ing and adding the relative path does. Spooky :/
os.chdir(checkout_root)
args.append('all.gyp')
# There shouldn't be a circular dependency relationship between .gyp files,
args.append('--no-circular-check')
# Default to ninja unless GYP_GENERATORS is set.
if not os.environ.get('GYP_GENERATORS'):
os.environ['GYP_GENERATORS'] = 'ninja'
vs2013_runtime_dll_dirs = None
if int(os.environ.get('DEPOT_TOOLS_WIN_TOOLCHAIN', '1')):
vs2013_runtime_dll_dirs = vs_toolchain.SetEnvironmentAndGetRuntimeDllDirs()
# Enforce gyp syntax checking. This adds about 20% execution time.
args.append('--check')
supplemental_includes = gyp_chromium.GetSupplementalFiles()
gyp_vars_dict = gyp_chromium.GetGypVars(supplemental_includes)
# Automatically turn on crosscompile support for platforms that need it.
if all(('ninja' in os.environ.get('GYP_GENERATORS', ''),
gyp_vars_dict.get('OS') in ['android', 'ios'],
'GYP_CROSSCOMPILE' not in os.environ)):
os.environ['GYP_CROSSCOMPILE'] = '1'
args.extend(['-I' + i for i in
gyp_chromium.additional_include_files(supplemental_includes,
args)])
# Set the gyp depth variable to the root of the checkout.
args.append('--depth=' + os.path.relpath(checkout_root))
print 'Updating projects from gyp files...'
sys.stdout.flush()
# Off we go...
gyp_rc = gyp.main(args)
if vs2013_runtime_dll_dirs:
x64_runtime, x86_runtime = vs2013_runtime_dll_dirs
vs_toolchain.CopyVsRuntimeDlls(
os.path.join(checkout_root, gyp_chromium.GetOutputDirectory()),
(x86_runtime, x64_runtime))
sys.exit(gyp_rc)

28
libs/libyuv/gyp_libyuv.py Normal file
View File

@ -0,0 +1,28 @@
#!/usr/bin/env python
#
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
# This script is a modified copy of the src/build/gyp_chromium.py file.
# It is needed for parallel processing.
# This file is (possibly, depending on python version) imported by
# gyp_libyuv when GYP_PARALLEL=1 and it creates sub-processes
# through the multiprocessing library.
# Importing in Python 2.6 (fixed in 2.7) on Windows doesn't search for
# imports that don't end in .py (and aren't directories with an
# __init__.py). This wrapper makes "import gyp_libyuv" work with
# those old versions and makes it possible to execute gyp_libyuv.py
# directly on Windows where the extension is useful.
import os
path = os.path.abspath(os.path.split(__file__)[0])
execfile(os.path.join(path, 'gyp_libyuv'))

View File

@ -0,0 +1,32 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_H_ // NOLINT
#define INCLUDE_LIBYUV_H_
#include "libyuv/basic_types.h"
#include "libyuv/compare.h"
#include "libyuv/convert.h"
#include "libyuv/convert_argb.h"
#include "libyuv/convert_from.h"
#include "libyuv/convert_from_argb.h"
#include "libyuv/cpu_id.h"
#include "libyuv/mjpeg_decoder.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/rotate_argb.h"
#include "libyuv/row.h"
#include "libyuv/scale.h"
#include "libyuv/scale_argb.h"
#include "libyuv/scale_row.h"
#include "libyuv/version.h"
#include "libyuv/video_common.h"
#endif // INCLUDE_LIBYUV_H_ NOLINT

View File

@ -0,0 +1,118 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_ // NOLINT
#define INCLUDE_LIBYUV_BASIC_TYPES_H_
#include <stddef.h> // for NULL, size_t
#if defined(__ANDROID__) || (defined(_MSC_VER) && (_MSC_VER < 1600))
#include <sys/types.h> // for uintptr_t on x86
#else
#include <stdint.h> // for uintptr_t
#endif
#ifndef GG_LONGLONG
#ifndef INT_TYPES_DEFINED
#define INT_TYPES_DEFINED
#ifdef COMPILER_MSVC
typedef unsigned __int64 uint64;
typedef __int64 int64;
#ifndef INT64_C
#define INT64_C(x) x ## I64
#endif
#ifndef UINT64_C
#define UINT64_C(x) x ## UI64
#endif
#define INT64_F "I64"
#else // COMPILER_MSVC
#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
typedef unsigned long uint64; // NOLINT
typedef long int64; // NOLINT
#ifndef INT64_C
#define INT64_C(x) x ## L
#endif
#ifndef UINT64_C
#define UINT64_C(x) x ## UL
#endif
#define INT64_F "l"
#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
typedef unsigned long long uint64; // NOLINT
typedef long long int64; // NOLINT
#ifndef INT64_C
#define INT64_C(x) x ## LL
#endif
#ifndef UINT64_C
#define UINT64_C(x) x ## ULL
#endif
#define INT64_F "ll"
#endif // __LP64__
#endif // COMPILER_MSVC
typedef unsigned int uint32;
typedef int int32;
typedef unsigned short uint16; // NOLINT
typedef short int16; // NOLINT
typedef unsigned char uint8;
typedef signed char int8;
#endif // INT_TYPES_DEFINED
#endif // GG_LONGLONG
// Detect compiler is for x86 or x64.
#if defined(__x86_64__) || defined(_M_X64) || \
defined(__i386__) || defined(_M_IX86)
#define CPU_X86 1
#endif
// Detect compiler is for ARM.
#if defined(__arm__) || defined(_M_ARM)
#define CPU_ARM 1
#endif
#ifndef ALIGNP
#ifdef __cplusplus
#define ALIGNP(p, t) \
(reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
((t) - 1)) & ~((t) - 1))))
#else
#define ALIGNP(p, t) \
((uint8*)((((uintptr_t)(p) + ((t) - 1)) & ~((t) - 1)))) /* NOLINT */
#endif
#endif
#if !defined(LIBYUV_API)
#if defined(_WIN32) || defined(__CYGWIN__)
#if defined(LIBYUV_BUILDING_SHARED_LIBRARY)
#define LIBYUV_API __declspec(dllexport)
#elif defined(LIBYUV_USING_SHARED_LIBRARY)
#define LIBYUV_API __declspec(dllimport)
#else
#define LIBYUV_API
#endif // LIBYUV_BUILDING_SHARED_LIBRARY
#elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \
(defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
defined(LIBYUV_USING_SHARED_LIBRARY))
#define LIBYUV_API __attribute__ ((visibility ("default")))
#else
#define LIBYUV_API
#endif // __GNUC__
#endif // LIBYUV_API
#define LIBYUV_BOOL int
#define LIBYUV_FALSE 0
#define LIBYUV_TRUE 1
// Visual C x86 or GCC little endian.
#if defined(__x86_64__) || defined(_M_X64) || \
defined(__i386__) || defined(_M_IX86) || \
defined(__arm__) || defined(_M_ARM) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define LIBYUV_LITTLE_ENDIAN
#endif
#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_ NOLINT

View File

@ -0,0 +1,78 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_COMPARE_H_ // NOLINT
#define INCLUDE_LIBYUV_COMPARE_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Compute a hash for specified memory. Seed of 5381 recommended.
LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
// Scan an opaque argb image and return fourcc based on alpha offset.
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
LIBYUV_API
uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height);
// Sum Square Error - used to compute Mean Square Error or PSNR.
LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a,
const uint8* src_b, int count);
LIBYUV_API
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height);
static const int kMaxPsnr = 128;
LIBYUV_API
double SumSquareErrorToPsnr(uint64 sse, uint64 count);
LIBYUV_API
double CalcFramePsnr(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height);
LIBYUV_API
double I420Psnr(const uint8* src_y_a, int stride_y_a,
const uint8* src_u_a, int stride_u_a,
const uint8* src_v_a, int stride_v_a,
const uint8* src_y_b, int stride_y_b,
const uint8* src_u_b, int stride_u_b,
const uint8* src_v_b, int stride_v_b,
int width, int height);
LIBYUV_API
double CalcFrameSsim(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height);
LIBYUV_API
double I420Ssim(const uint8* src_y_a, int stride_y_a,
const uint8* src_u_a, int stride_u_a,
const uint8* src_v_a, int stride_v_a,
const uint8* src_y_b, int stride_y_b,
const uint8* src_u_b, int stride_u_b,
const uint8* src_v_b, int stride_v_b,
int width, int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_COMPARE_H_ NOLINT

View File

@ -0,0 +1,78 @@
/*
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_COMPARE_ROW_H_ // NOLINT
#define INCLUDE_LIBYUV_COMPARE_ROW_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
// Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && \
defined(_MSC_VER) && _MSC_VER >= 1700
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
// clang >= 3.4.0 required for AVX2.
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
#define CLANG_HAS_AVX2 1
#endif // clang >= 3.4
#endif // __clang__
#if !defined(LIBYUV_DISABLE_X86) && \
defined(_M_IX86) && (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
#define HAS_HASHDJB2_AVX2
#endif
// The following are available for Visual C and GCC:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) || defined(_M_IX86)))
#define HAS_HASHDJB2_SSE41
#define HAS_SUMSQUAREERROR_SSE2
#endif
// The following are available for Visual C and clangcl 32 bit:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
#define HAS_HASHDJB2_AVX2
#define HAS_SUMSQUAREERROR_AVX2
#endif
// The following are available for Neon:
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SUMSQUAREERROR_NEON
#endif
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_COMPARE_ROW_H_ NOLINT

View File

@ -0,0 +1,245 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_H_ // NOLINT
#define INCLUDE_LIBYUV_CONVERT_H_
#include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes.
#include "libyuv/convert_from.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert I444 to I420.
LIBYUV_API
int I444ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I422 to I420.
LIBYUV_API
int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I411 to I420.
LIBYUV_API
int I411ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Copy I420 to I420.
#define I420ToI420 I420Copy
LIBYUV_API
int I420Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I400 (grey) to I420.
LIBYUV_API
int I400ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
#define J400ToJ420 I400ToI420
// Convert NV12 to I420.
LIBYUV_API
int NV12ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert NV21 to I420.
LIBYUV_API
int NV21ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_vu, int src_stride_vu,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert YUY2 to I420.
LIBYUV_API
int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert UYVY to I420.
LIBYUV_API
int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert M420 to I420.
LIBYUV_API
int M420ToI420(const uint8* src_m420, int src_stride_m420,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// ARGB little endian (bgra in memory) to I420.
LIBYUV_API
int ARGBToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// BGRA little endian (argb in memory) to I420.
LIBYUV_API
int BGRAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// ABGR little endian (rgba in memory) to I420.
LIBYUV_API
int ABGRToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGBA little endian (abgr in memory) to I420.
LIBYUV_API
int RGBAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB little endian (bgr in memory) to I420.
LIBYUV_API
int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB big endian (rgb in memory) to I420.
LIBYUV_API
int RAWToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB16 (RGBP fourcc) little endian to I420.
LIBYUV_API
int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB15 (RGBO fourcc) little endian to I420.
LIBYUV_API
int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB12 (R444 fourcc) little endian to I420.
LIBYUV_API
int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
#ifdef HAVE_JPEG
// src_width/height provided by capture.
// dst_width/height for clipping determine final size.
LIBYUV_API
int MJPGToI420(const uint8* sample, size_t sample_size,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int src_width, int src_height,
int dst_width, int dst_height);
// Query size of MJPG in pixels.
LIBYUV_API
int MJPGSize(const uint8* sample, size_t sample_size,
int* width, int* height);
#endif
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG.
// "dst_stride_y" number of bytes in a row of the dst_y plane.
// Normally this would be the same as dst_width, with recommended alignment
// to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected. The caller should
// allocate the I420 buffer according to rotation.
// "dst_stride_u" number of bytes in a row of the dst_u plane.
// Normally this would be the same as (dst_width + 1) / 2, with
// recommended alignment to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected.
// "crop_x" and "crop_y" are starting position for cropping.
// To center, crop_x = (src_width - dst_width) / 2
// crop_y = (src_height - dst_height) / 2
// "src_width" / "src_height" is size of src_frame in pixels.
// "src_height" can be negative indicating a vertically flipped image source.
// "crop_width" / "crop_height" is the size to crop the src to.
// Must be less than or equal to src_width/src_height
// Cropping parameters are pre-rotation.
// "rotation" can be 0, 90, 180 or 270.
// "format" is a fourcc. ie 'I420', 'YUY2'
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
LIBYUV_API
int ConvertToI420(const uint8* src_frame, size_t src_size,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 format);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_H_ NOLINT

View File

@ -0,0 +1,313 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_ // NOLINT
#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
#include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes
#include "libyuv/convert_from.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
// TODO(fbarchard): This set of functions should exactly match convert.h
// TODO(fbarchard): Add tests. Create random content of right size and convert
// with C vs Opt and or to I420 and compare.
// TODO(fbarchard): Some of these functions lack parameter setting.
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Alias.
#define ARGBToARGB ARGBCopy
// Copy ARGB to ARGB.
LIBYUV_API
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I420 to ARGB.
LIBYUV_API
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I422 to ARGB.
LIBYUV_API
int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I444 to ARGB.
LIBYUV_API
int I444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J444 to ARGB.
LIBYUV_API
int J444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I444 to ABGR.
LIBYUV_API
int I444ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert I411 to ARGB.
LIBYUV_API
int I411ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I420 with Alpha to preattenuated ARGB.
LIBYUV_API
int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int attenuate);
// Convert I420 with Alpha to preattenuated ABGR.
LIBYUV_API
int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height, int attenuate);
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
LIBYUV_API
int I400ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J400 (jpeg grey) to ARGB.
LIBYUV_API
int J400ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Alias.
#define YToARGB I400ToARGB
// Convert NV12 to ARGB.
LIBYUV_API
int NV12ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert NV21 to ARGB.
LIBYUV_API
int NV21ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_vu, int src_stride_vu,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert M420 to ARGB.
LIBYUV_API
int M420ToARGB(const uint8* src_m420, int src_stride_m420,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert YUY2 to ARGB.
LIBYUV_API
int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert UYVY to ARGB.
LIBYUV_API
int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J420 to ARGB.
LIBYUV_API
int J420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J422 to ARGB.
LIBYUV_API
int J422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert J420 to ABGR.
LIBYUV_API
int J420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert J422 to ABGR.
LIBYUV_API
int J422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert H420 to ARGB.
LIBYUV_API
int H420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert H422 to ARGB.
LIBYUV_API
int H422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert H420 to ABGR.
LIBYUV_API
int H420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert H422 to ABGR.
LIBYUV_API
int H422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// BGRA little endian (argb in memory) to ARGB.
LIBYUV_API
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// ABGR little endian (rgba in memory) to ARGB.
LIBYUV_API
int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGBA little endian (abgr in memory) to ARGB.
LIBYUV_API
int RGBAToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Deprecated function name.
#define BG24ToARGB RGB24ToARGB
// RGB little endian (bgr in memory) to ARGB.
LIBYUV_API
int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGB big endian (rgb in memory) to ARGB.
LIBYUV_API
int RAWToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGB16 (RGBP fourcc) little endian to ARGB.
LIBYUV_API
int RGB565ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGB15 (RGBO fourcc) little endian to ARGB.
LIBYUV_API
int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGB12 (R444 fourcc) little endian to ARGB.
LIBYUV_API
int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
#ifdef HAVE_JPEG
// src_width/height provided by capture
// dst_width/height for clipping determine final size.
LIBYUV_API
int MJPGToARGB(const uint8* sample, size_t sample_size,
uint8* dst_argb, int dst_stride_argb,
int src_width, int src_height,
int dst_width, int dst_height);
#endif
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG.
// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
// Normally this would be the same as dst_width, with recommended alignment
// to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected. The caller should
// allocate the I420 buffer according to rotation.
// "dst_stride_u" number of bytes in a row of the dst_u plane.
// Normally this would be the same as (dst_width + 1) / 2, with
// recommended alignment to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected.
// "crop_x" and "crop_y" are starting position for cropping.
// To center, crop_x = (src_width - dst_width) / 2
// crop_y = (src_height - dst_height) / 2
// "src_width" / "src_height" is size of src_frame in pixels.
// "src_height" can be negative indicating a vertically flipped image source.
// "crop_width" / "crop_height" is the size to crop the src to.
// Must be less than or equal to src_width/src_height
// Cropping parameters are pre-rotation.
// "rotation" can be 0, 90, 180 or 270.
// "format" is a fourcc. ie 'I420', 'YUY2'
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
LIBYUV_API
int ConvertToARGB(const uint8* src_frame, size_t src_size,
uint8* dst_argb, int dst_stride_argb,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 format);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_ NOLINT

View File

@ -0,0 +1,181 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_ // NOLINT
#define INCLUDE_LIBYUV_CONVERT_FROM_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// See Also convert.h for conversions from formats to I420.
// I420Copy in convert to I420ToI420.
LIBYUV_API
int I420ToI422(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int I420ToI444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int I420ToI411(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
LIBYUV_API
int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// TODO(fbarchard): I420ToM420
LIBYUV_API
int I420ToNV12(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height);
LIBYUV_API
int I420ToNV21(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_vu, int dst_stride_vu,
int width, int height);
LIBYUV_API
int I420ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int I420ToRGBA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height);
LIBYUV_API
int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Convert I420 To RGB565 with 4x4 dither matrix (16 bytes).
// Values in dither matrix from 0 to 7 recommended.
// The order of the dither matrix is first byte is upper left.
LIBYUV_API
int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
const uint8* dither4x4, int width, int height);
LIBYUV_API
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Convert I420 to specified format.
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
LIBYUV_API
int ConvertFromI420(const uint8* y, int y_stride,
const uint8* u, int u_stride,
const uint8* v, int v_stride,
uint8* dst_sample, int dst_sample_stride,
int width, int height,
uint32 format);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_ NOLINT

View File

@ -0,0 +1,190 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ // NOLINT
#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Copy ARGB to ARGB.
#define ARGBToARGB ARGBCopy
LIBYUV_API
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert ARGB To BGRA.
LIBYUV_API
int ARGBToBGRA(const uint8* src_argb, int src_stride_argb,
uint8* dst_bgra, int dst_stride_bgra,
int width, int height);
// Convert ARGB To ABGR.
LIBYUV_API
int ARGBToABGR(const uint8* src_argb, int src_stride_argb,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert ARGB To RGBA.
LIBYUV_API
int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height);
// Convert ARGB To RGB24.
LIBYUV_API
int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height);
// Convert ARGB To RAW.
LIBYUV_API
int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb, int dst_stride_rgb,
int width, int height);
// Convert ARGB To RGB565.
LIBYUV_API
int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
// Values in dither matrix from 0 to 7 recommended.
// The order of the dither matrix is first byte is upper left.
// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
// const uint8(*dither)[4][4];
LIBYUV_API
int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
const uint8* dither4x4, int width, int height);
// Convert ARGB To ARGB1555.
LIBYUV_API
int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb1555, int dst_stride_argb1555,
int width, int height);
// Convert ARGB To ARGB4444.
LIBYUV_API
int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb4444, int dst_stride_argb4444,
int width, int height);
// Convert ARGB To I444.
LIBYUV_API
int ARGBToI444(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I422.
LIBYUV_API
int ARGBToI422(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I420. (also in convert.h)
LIBYUV_API
int ARGBToI420(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB to J420. (JPeg full range I420).
LIBYUV_API
int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
uint8* dst_yj, int dst_stride_yj,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB to J422.
LIBYUV_API
int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
uint8* dst_yj, int dst_stride_yj,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I411.
LIBYUV_API
int ARGBToI411(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB to J400. (JPeg full range).
LIBYUV_API
int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
uint8* dst_yj, int dst_stride_yj,
int width, int height);
// Convert ARGB to I400.
LIBYUV_API
int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
LIBYUV_API
int ARGBToG(const uint8* src_argb, int src_stride_argb,
uint8* dst_g, int dst_stride_g,
int width, int height);
// Convert ARGB To NV12.
LIBYUV_API
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height);
// Convert ARGB To NV21.
LIBYUV_API
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_vu, int dst_stride_vu,
int width, int height);
// Convert ARGB To NV21.
LIBYUV_API
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_vu, int dst_stride_vu,
int width, int height);
// Convert ARGB To YUY2.
LIBYUV_API
int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
uint8* dst_yuy2, int dst_stride_yuy2,
int width, int height);
// Convert ARGB To UYVY.
LIBYUV_API
int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
uint8* dst_uyvy, int dst_stride_uyvy,
int width, int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ NOLINT

View File

@ -0,0 +1,80 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CPU_ID_H_ // NOLINT
#define INCLUDE_LIBYUV_CPU_ID_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Internal flag to indicate cpuid requires initialization.
static const int kCpuInitialized = 0x1;
// These flags are only valid on ARM processors.
static const int kCpuHasARM = 0x2;
static const int kCpuHasNEON = 0x4;
// 0x8 reserved for future ARM flag.
// These flags are only valid on x86 processors.
static const int kCpuHasX86 = 0x10;
static const int kCpuHasSSE2 = 0x20;
static const int kCpuHasSSSE3 = 0x40;
static const int kCpuHasSSE41 = 0x80;
static const int kCpuHasSSE42 = 0x100;
static const int kCpuHasAVX = 0x200;
static const int kCpuHasAVX2 = 0x400;
static const int kCpuHasERMS = 0x800;
static const int kCpuHasFMA3 = 0x1000;
static const int kCpuHasAVX3 = 0x2000;
// 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x10000;
static const int kCpuHasDSPR2 = 0x20000;
// Internal function used to auto-init.
LIBYUV_API
int InitCpuFlags(void);
// Internal function for parsing /proc/cpuinfo.
LIBYUV_API
int ArmCpuCaps(const char* cpuinfo_name);
// Detect CPU has SSE2 etc.
// Test_flag parameter should be one of kCpuHas constants above.
// returns non-zero if instruction set is detected
static __inline int TestCpuFlag(int test_flag) {
LIBYUV_API extern int cpu_info_;
return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag;
}
// For testing, allow CPU flags to be disabled.
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
// MaskCpuFlags(-1) to enable all cpu specific optimizations.
// MaskCpuFlags(0) to disable all cpu specific optimizations.
LIBYUV_API
void MaskCpuFlags(int enable_flags);
// Low level cpuid for X86. Returns zeros on other CPUs.
// eax is the info type that you want.
// ecx is typically the cpu number, and should normally be zero.
LIBYUV_API
void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CPU_ID_H_ NOLINT

View File

@ -0,0 +1,192 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_ // NOLINT
#define INCLUDE_LIBYUV_MJPEG_DECODER_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
// NOTE: For a simplified public API use convert.h MJPGToI420().
struct jpeg_common_struct;
struct jpeg_decompress_struct;
struct jpeg_source_mgr;
namespace libyuv {
#ifdef __cplusplus
extern "C" {
#endif
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size);
#ifdef __cplusplus
} // extern "C"
#endif
static const uint32 kUnknownDataSize = 0xFFFFFFFF;
enum JpegSubsamplingType {
kJpegYuv420,
kJpegYuv422,
kJpegYuv411,
kJpegYuv444,
kJpegYuv400,
kJpegUnknown
};
struct Buffer {
const uint8* data;
int len;
};
struct BufferVector {
Buffer* buffers;
int len;
int pos;
};
struct SetJmpErrorMgr;
// MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are
// simply independent JPEG images with a fixed huffman table (which is omitted).
// It is rarely used in video transmission, but is common as a camera capture
// format, especially in Logitech devices. This class implements a decoder for
// MJPEG frames.
//
// See http://tools.ietf.org/html/rfc2435
class LIBYUV_API MJpegDecoder {
public:
typedef void (*CallbackFunction)(void* opaque,
const uint8* const* data,
const int* strides,
int rows);
static const int kColorSpaceUnknown;
static const int kColorSpaceGrayscale;
static const int kColorSpaceRgb;
static const int kColorSpaceYCbCr;
static const int kColorSpaceCMYK;
static const int kColorSpaceYCCK;
MJpegDecoder();
~MJpegDecoder();
// Loads a new frame, reads its headers, and determines the uncompressed
// image format.
// Returns LIBYUV_TRUE if image looks valid and format is supported.
// If return value is LIBYUV_TRUE, then the values for all the following
// getters are populated.
// src_len is the size of the compressed mjpeg frame in bytes.
LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len);
// Returns width of the last loaded frame in pixels.
int GetWidth();
// Returns height of the last loaded frame in pixels.
int GetHeight();
// Returns format of the last loaded frame. The return value is one of the
// kColorSpace* constants.
int GetColorSpace();
// Number of color components in the color space.
int GetNumComponents();
// Sample factors of the n-th component.
int GetHorizSampFactor(int component);
int GetVertSampFactor(int component);
int GetHorizSubSampFactor(int component);
int GetVertSubSampFactor(int component);
// Public for testability.
int GetImageScanlinesPerImcuRow();
// Public for testability.
int GetComponentScanlinesPerImcuRow(int component);
// Width of a component in bytes.
int GetComponentWidth(int component);
// Height of a component.
int GetComponentHeight(int component);
// Width of a component in bytes with padding for DCTSIZE. Public for testing.
int GetComponentStride(int component);
// Size of a component in bytes.
int GetComponentSize(int component);
// Call this after LoadFrame() if you decide you don't want to decode it
// after all.
LIBYUV_BOOL UnloadFrame();
// Decodes the entire image into a one-buffer-per-color-component format.
// dst_width must match exactly. dst_height must be <= to image height; if
// less, the image is cropped. "planes" must have size equal to at least
// GetNumComponents() and they must point to non-overlapping buffers of size
// at least GetComponentSize(i). The pointers in planes are incremented
// to point to after the end of the written data.
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height);
// Decodes the entire image and passes the data via repeated calls to a
// callback function. Each call will get the data for a whole number of
// image scanlines.
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque,
int dst_width, int dst_height);
// The helper function which recognizes the jpeg sub-sampling type.
static JpegSubsamplingType JpegSubsamplingTypeHelper(
int* subsample_x, int* subsample_y, int number_of_components);
private:
void AllocOutputBuffers(int num_outbufs);
void DestroyOutputBuffers();
LIBYUV_BOOL StartDecode();
LIBYUV_BOOL FinishDecode();
void SetScanlinePointers(uint8** data);
LIBYUV_BOOL DecodeImcuRow();
int GetComponentScanlinePadding(int component);
// A buffer holding the input data for a frame.
Buffer buf_;
BufferVector buf_vec_;
jpeg_decompress_struct* decompress_struct_;
jpeg_source_mgr* source_mgr_;
SetJmpErrorMgr* error_mgr_;
// LIBYUV_TRUE iff at least one component has scanline padding. (i.e.,
// GetComponentScanlinePadding() != 0.)
LIBYUV_BOOL has_scanline_padding_;
// Temporaries used to point to scanline outputs.
int num_outbufs_; // Outermost size of all arrays below.
uint8*** scanlines_;
int* scanlines_sizes_;
// Temporary buffer used for decoding when we can't decode directly to the
// output buffers. Large enough for just one iMCU row.
uint8** databuf_;
int* databuf_strides_;
};
} // namespace libyuv
#endif // __cplusplus
#endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_ NOLINT

View File

@ -0,0 +1,495 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ // NOLINT
#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
#include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes.
#include "libyuv/convert.h"
#include "libyuv/convert_argb.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Copy a plane of data.
LIBYUV_API
void CopyPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
LIBYUV_API
void CopyPlane_16(const uint16* src_y, int src_stride_y,
uint16* dst_y, int dst_stride_y,
int width, int height);
// Set a plane of data to a 32 bit value.
LIBYUV_API
void SetPlane(uint8* dst_y, int dst_stride_y,
int width, int height,
uint32 value);
// Copy I400. Supports inverting.
LIBYUV_API
int I400ToI400(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
#define J400ToJ400 I400ToI400
// Copy I422 to I422.
#define I422ToI422 I422Copy
LIBYUV_API
int I422Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Copy I444 to I444.
#define I444ToI444 I444Copy
LIBYUV_API
int I444Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert YUY2 to I422.
LIBYUV_API
int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert UYVY to I422.
LIBYUV_API
int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height);
LIBYUV_API
int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height);
// Convert I420 to I400. (calls CopyPlane ignoring u/v).
LIBYUV_API
int I420ToI400(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Alias
#define J420ToJ400 I420ToI400
#define I420ToI420Mirror I420Mirror
// I420 mirror.
LIBYUV_API
int I420Mirror(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Alias
#define I400ToI400Mirror I400Mirror
// I400 mirror. A single plane is mirrored horizontally.
// Pass negative height to achieve 180 degree rotation.
LIBYUV_API
int I400Mirror(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Alias
#define ARGBToARGBMirror ARGBMirror
// ARGB mirror.
LIBYUV_API
int ARGBMirror(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert NV12 to RGB565.
LIBYUV_API
int NV12ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// I422ToARGB is in convert_argb.h
// Convert I422 to BGRA.
LIBYUV_API
int I422ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bgra, int dst_stride_bgra,
int width, int height);
// Convert I422 to ABGR.
LIBYUV_API
int I422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert I422 to RGBA.
LIBYUV_API
int I422ToRGBA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height);
// Alias
#define RGB24ToRAW RAWToRGB24
LIBYUV_API
int RAWToRGB24(const uint8* src_raw, int src_stride_raw,
uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height);
// Draw a rectangle into I420.
LIBYUV_API
int I420Rect(uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int x, int y, int width, int height,
int value_y, int value_u, int value_v);
// Draw a rectangle into ARGB.
LIBYUV_API
int ARGBRect(uint8* dst_argb, int dst_stride_argb,
int x, int y, int width, int height, uint32 value);
// Convert ARGB to gray scale ARGB.
LIBYUV_API
int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Make a rectangle of ARGB gray scale.
LIBYUV_API
int ARGBGray(uint8* dst_argb, int dst_stride_argb,
int x, int y, int width, int height);
// Make a rectangle of ARGB Sepia tone.
LIBYUV_API
int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
int x, int y, int width, int height);
// Apply a matrix rotation to each ARGB pixel.
// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2.
// The first 4 coefficients apply to B, G, R, A and produce B of the output.
// The next 4 coefficients apply to B, G, R, A and produce G of the output.
// The next 4 coefficients apply to B, G, R, A and produce R of the output.
// The last 4 coefficients apply to B, G, R, A and produce A of the output.
LIBYUV_API
int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
const int8* matrix_argb,
int width, int height);
// Deprecated. Use ARGBColorMatrix instead.
// Apply a matrix rotation to each ARGB pixel.
// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1.
// The first 4 coefficients apply to B, G, R, A and produce B of the output.
// The next 4 coefficients apply to B, G, R, A and produce G of the output.
// The last 4 coefficients apply to B, G, R, A and produce R of the output.
LIBYUV_API
int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
const int8* matrix_rgb,
int x, int y, int width, int height);
// Apply a color table each ARGB pixel.
// Table contains 256 ARGB values.
LIBYUV_API
int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
const uint8* table_argb,
int x, int y, int width, int height);
// Apply a color table each ARGB pixel but preserve destination alpha.
// Table contains 256 ARGB values.
LIBYUV_API
int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
const uint8* table_argb,
int x, int y, int width, int height);
// Apply a luma/color table each ARGB pixel but preserve destination alpha.
// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from
// RGB (YJ style) and C is an 8 bit color component (R, G or B).
LIBYUV_API
int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
const uint8* luma_rgb_table,
int width, int height);
// Apply a 3 term polynomial to ARGB values.
// poly points to a 4x4 matrix. The first row is constants. The 2nd row is
// coefficients for b, g, r and a. The 3rd row is coefficients for b squared,
// g squared, r squared and a squared. The 4rd row is coefficients for b to
// the 3, g to the 3, r to the 3 and a to the 3. The values are summed and
// result clamped to 0 to 255.
// A polynomial approximation can be dirived using software such as 'R'.
LIBYUV_API
int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
const float* poly,
int width, int height);
// Quantize a rectangle of ARGB. Alpha unaffected.
// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
// interval_size should be a value between 1 and 255.
// interval_offset should be a value between 0 and 255.
LIBYUV_API
int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
int scale, int interval_size, int interval_offset,
int x, int y, int width, int height);
// Copy ARGB to ARGB.
LIBYUV_API
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Copy Alpha channel of ARGB to alpha of ARGB.
LIBYUV_API
int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Copy Y channel to Alpha of ARGB.
LIBYUV_API
int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
// Get function to Alpha Blend ARGB pixels and store to destination.
LIBYUV_API
ARGBBlendRow GetARGBBlend();
// Alpha Blend ARGB images and store to destination.
// Source is pre-multiplied by alpha using ARGBAttenuate.
// Alpha of destination is set to 255.
LIBYUV_API
int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Alpha Blend plane and store to destination.
// Source is not pre-multiplied by alpha.
LIBYUV_API
int BlendPlane(const uint8* src_y0, int src_stride_y0,
const uint8* src_y1, int src_stride_y1,
const uint8* alpha, int alpha_stride,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Alpha Blend YUV images and store to destination.
// Source is not pre-multiplied by alpha.
// Alpha is full width x height and subsampled to half size to apply to UV.
LIBYUV_API
int I420Blend(const uint8* src_y0, int src_stride_y0,
const uint8* src_u0, int src_stride_u0,
const uint8* src_v0, int src_stride_v0,
const uint8* src_y1, int src_stride_y1,
const uint8* src_u1, int src_stride_u1,
const uint8* src_v1, int src_stride_v1,
const uint8* alpha, int alpha_stride,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
LIBYUV_API
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Add ARGB image with ARGB image. Saturates to 255.
LIBYUV_API
int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0.
LIBYUV_API
int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I422 to YUY2.
LIBYUV_API
int I422ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Convert I422 to UYVY.
LIBYUV_API
int I422ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Convert unattentuated ARGB to preattenuated ARGB.
LIBYUV_API
int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert preattentuated ARGB to unattenuated ARGB.
LIBYUV_API
int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Internal function - do not call directly.
// Computes table of cumulative sum for image where the value is the sum
// of all values above and to the left of the entry. Used by ARGBBlur.
LIBYUV_API
int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
int32* dst_cumsum, int dst_stride32_cumsum,
int width, int height);
// Blur ARGB image.
// dst_cumsum table of width * (height + 1) * 16 bytes aligned to
// 16 byte boundary.
// dst_stride32_cumsum is number of ints in a row (width * 4).
// radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5.
// Blur is optimized for radius of 5 (11x11) or less.
LIBYUV_API
int ARGBBlur(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int32* dst_cumsum, int dst_stride32_cumsum,
int width, int height, int radius);
// Multiply ARGB image by ARGB value.
LIBYUV_API
int ARGBShade(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height, uint32 value);
// Interpolate between two images using specified amount of interpolation
// (0 to 255) and store to destination.
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
// and 255 means 1% src0 and 99% src1.
LIBYUV_API
int InterpolatePlane(const uint8* src0, int src_stride0,
const uint8* src1, int src_stride1,
uint8* dst, int dst_stride,
int width, int height, int interpolation);
// Interpolate between two ARGB images using specified amount of interpolation
// Internally calls InterpolatePlane with width * 4 (bpp).
LIBYUV_API
int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int interpolation);
// Interpolate between two YUV images using specified amount of interpolation
// Internally calls InterpolatePlane on each plane where the U and V planes
// are half width and half height.
LIBYUV_API
int I420Interpolate(const uint8* src0_y, int src0_stride_y,
const uint8* src0_u, int src0_stride_u,
const uint8* src0_v, int src0_stride_v,
const uint8* src1_y, int src1_stride_y,
const uint8* src1_u, int src1_stride_u,
const uint8* src1_v, int src1_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height, int interpolation);
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_ARGBAFFINEROW_SSE2
#endif
// Row function for copying pixels from a source with a slope to a row
// of destination. Useful for scaling, rotation, mirror, texture mapping.
LIBYUV_API
void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width);
LIBYUV_API
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width);
// Shuffle ARGB channel order. e.g. BGRA to ARGB.
// shuffler is 16 bytes and must be aligned.
LIBYUV_API
int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
uint8* dst_argb, int dst_stride_argb,
const uint8* shuffler, int width, int height);
// Sobel ARGB effect with planar output.
LIBYUV_API
int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Sobel ARGB effect.
LIBYUV_API
int ARGBSobel(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB.
LIBYUV_API
int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ NOLINT

View File

@ -0,0 +1,117 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROTATE_H_ // NOLINT
#define INCLUDE_LIBYUV_ROTATE_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Supported rotation.
typedef enum RotationMode {
kRotate0 = 0, // No rotation.
kRotate90 = 90, // Rotate 90 degrees clockwise.
kRotate180 = 180, // Rotate 180 degrees.
kRotate270 = 270, // Rotate 270 degrees clockwise.
// Deprecated.
kRotateNone = 0,
kRotateClockwise = 90,
kRotateCounterClockwise = 270,
} RotationModeEnum;
// Rotate I420 frame.
LIBYUV_API
int I420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int src_width, int src_height, enum RotationMode mode);
// Rotate NV12 input and store in I420.
LIBYUV_API
int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int src_width, int src_height, enum RotationMode mode);
// Rotate a plane by 0, 90, 180, or 270.
LIBYUV_API
int RotatePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int src_width, int src_height, enum RotationMode mode);
// Rotate planes by 90, 180, 270. Deprecated.
LIBYUV_API
void RotatePlane90(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height);
LIBYUV_API
void RotatePlane180(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height);
LIBYUV_API
void RotatePlane270(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height);
LIBYUV_API
void RotateUV90(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
// Rotations for when U and V are interleaved.
// These functions take one input pointer and
// split the data into two buffers while
// rotating them. Deprecated.
LIBYUV_API
void RotateUV180(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
LIBYUV_API
void RotateUV270(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
// The 90 and 270 functions are based on transposes.
// Doing a transpose with reversing the read/write
// order will result in a rotation by +- 90 degrees.
// Deprecated.
LIBYUV_API
void TransposePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height);
LIBYUV_API
void TransposeUV(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_ROTATE_H_ NOLINT

View File

@ -0,0 +1,33 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_ // NOLINT
#define INCLUDE_LIBYUV_ROTATE_ARGB_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h" // For RotationMode.
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Rotate ARGB frame
LIBYUV_API
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int src_width, int src_height, enum RotationMode mode);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_ NOLINT

View File

@ -0,0 +1,116 @@
/*
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROTATE_ROW_H_ // NOLINT
#define INCLUDE_LIBYUV_ROTATE_ROW_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
// The following are available for Visual C and clangcl 32 bit:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#define HAS_TRANSPOSEWX8_SSSE3
#define HAS_TRANSPOSEUVWX8_SSE2
#endif
// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
#define HAS_TRANSPOSEWX8_SSSE3
#endif
// The following are available for 64 bit GCC but not NaCL:
#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
defined(__x86_64__)
#define HAS_TRANSPOSEWX8_FAST_SSSE3
#define HAS_TRANSPOSEUVWX8_SSE2
#endif
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_TRANSPOSEWX8_NEON
#define HAS_TRANSPOSEUVWX8_NEON
#endif
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
defined(__mips__) && \
defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#define HAS_TRANSPOSEWX8_DSPR2
#define HAS_TRANSPOSEUVWX8_DSPR2
#endif // defined(__mips__)
void TransposeWxH_C(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width, int height);
void TransposeWx8_C(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Any_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_Any_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeUVWxH_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
void TransposeUVWx8_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_Any_SSE2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_Any_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
void TransposeUVWx8_Any_DSPR2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_ROTATE_ROW_H_ NOLINT

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,103 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_H_ // NOLINT
#define INCLUDE_LIBYUV_SCALE_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Supported filtering.
typedef enum FilterMode {
kFilterNone = 0, // Point sample; Fastest.
kFilterLinear = 1, // Filter horizontally only.
kFilterBilinear = 2, // Faster than box, but lower quality scaling down.
kFilterBox = 3 // Highest quality.
} FilterModeEnum;
// Scale a YUV plane.
LIBYUV_API
void ScalePlane(const uint8* src, int src_stride,
int src_width, int src_height,
uint8* dst, int dst_stride,
int dst_width, int dst_height,
enum FilterMode filtering);
LIBYUV_API
void ScalePlane_16(const uint16* src, int src_stride,
int src_width, int src_height,
uint16* dst, int dst_stride,
int dst_width, int dst_height,
enum FilterMode filtering);
// Scales a YUV 4:2:0 image from the src width and height to the
// dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
// used. This produces basic (blocky) quality at the fastest speed.
// If filtering is kFilterBilinear, interpolation is used to produce a better
// quality image, at the expense of speed.
// If filtering is kFilterBox, averaging is used to produce ever better
// quality image, at further expense of speed.
// Returns 0 if successful.
LIBYUV_API
int I420Scale(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
int src_width, int src_height,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int dst_width, int dst_height,
enum FilterMode filtering);
LIBYUV_API
int I420Scale_16(const uint16* src_y, int src_stride_y,
const uint16* src_u, int src_stride_u,
const uint16* src_v, int src_stride_v,
int src_width, int src_height,
uint16* dst_y, int dst_stride_y,
uint16* dst_u, int dst_stride_u,
uint16* dst_v, int dst_stride_v,
int dst_width, int dst_height,
enum FilterMode filtering);
#ifdef __cplusplus
// Legacy API. Deprecated.
LIBYUV_API
int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
int src_stride_y, int src_stride_u, int src_stride_v,
int src_width, int src_height,
uint8* dst_y, uint8* dst_u, uint8* dst_v,
int dst_stride_y, int dst_stride_u, int dst_stride_v,
int dst_width, int dst_height,
LIBYUV_BOOL interpolate);
// Legacy API. Deprecated.
LIBYUV_API
int ScaleOffset(const uint8* src_i420, int src_width, int src_height,
uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset,
LIBYUV_BOOL interpolate);
// For testing, allow disabling of specialized scalers.
LIBYUV_API
void SetUseReferenceImpl(LIBYUV_BOOL use);
#endif // __cplusplus
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_H_ NOLINT

View File

@ -0,0 +1,56 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_ // NOLINT
#define INCLUDE_LIBYUV_SCALE_ARGB_H_
#include "libyuv/basic_types.h"
#include "libyuv/scale.h" // For FilterMode
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
LIBYUV_API
int ARGBScale(const uint8* src_argb, int src_stride_argb,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
int dst_width, int dst_height,
enum FilterMode filtering);
// Clipped scale takes destination rectangle coordinates for clip values.
LIBYUV_API
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering);
// Scale with YUV conversion to ARGB and clipping.
LIBYUV_API
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint32 src_fourcc,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
uint32 dst_fourcc,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_ARGB_H_ NOLINT

View File

@ -0,0 +1,497 @@
/*
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_ // NOLINT
#define INCLUDE_LIBYUV_SCALE_ROW_H_
#include "libyuv/basic_types.h"
#include "libyuv/scale.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
// GCC >= 4.7.0 required for AVX2.
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
#define GCC_HAS_AVX2 1
#endif // GNUC >= 4.7
#endif // __GNUC__
// clang >= 3.4.0 required for AVX2.
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
#define CLANG_HAS_AVX2 1
#endif // clang >= 3.4
#endif // __clang__
// Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && \
defined(_MSC_VER) && _MSC_VER >= 1700
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_FIXEDDIV1_X86
#define HAS_FIXEDDIV_X86
#define HAS_SCALEARGBCOLS_SSE2
#define HAS_SCALEARGBCOLSUP2_SSE2
#define HAS_SCALEARGBFILTERCOLS_SSSE3
#define HAS_SCALEARGBROWDOWN2_SSE2
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
#define HAS_SCALECOLSUP2_SSE2
#define HAS_SCALEFILTERCOLS_SSSE3
#define HAS_SCALEROWDOWN2_SSSE3
#define HAS_SCALEROWDOWN34_SSSE3
#define HAS_SCALEROWDOWN38_SSSE3
#define HAS_SCALEROWDOWN4_SSSE3
#define HAS_SCALEADDROW_SSE2
#endif
// The following are available on all x86 platforms, but
// require VS2012, clang 3.4 or gcc 4.7.
// The code supports NaCL but requires a new compiler and validator.
#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_SCALEADDROW_AVX2
#define HAS_SCALEROWDOWN2_AVX2
#define HAS_SCALEROWDOWN4_AVX2
#endif
// The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEARGBCOLS_NEON
#define HAS_SCALEARGBROWDOWN2_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON
#define HAS_SCALEFILTERCOLS_NEON
#define HAS_SCALEROWDOWN2_NEON
#define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEARGBFILTERCOLS_NEON
#endif
// The following are available on Mips platforms:
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#define HAS_SCALEROWDOWN2_DSPR2
#define HAS_SCALEROWDOWN4_DSPR2
#define HAS_SCALEROWDOWN34_DSPR2
#define HAS_SCALEROWDOWN38_DSPR2
#endif
// Scale ARGB vertically with bilinear interpolation.
void ScalePlaneVertical(int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int y, int dy,
int bpp, enum FilterMode filtering);
void ScalePlaneVertical_16(int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_argb, uint16* dst_argb,
int x, int y, int dy,
int wpp, enum FilterMode filtering);
// Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width, int src_height,
int dst_width, int dst_height,
enum FilterMode filtering);
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_C(int num, int div);
int FixedDiv_X86(int num, int div);
// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
int FixedDiv1_C(int num, int div);
int FixedDiv1_X86(int num, int div);
#ifdef HAS_FIXEDDIV_X86
#define FixedDiv FixedDiv_X86
#define FixedDiv1 FixedDiv1_X86
#else
#define FixedDiv FixedDiv_C
#define FixedDiv1 FixedDiv1_C
#endif
// Compute slope values for stepping.
void ScaleSlope(int src_width, int src_height,
int dst_width, int dst_height,
enum FilterMode filtering,
int* x, int* y, int* dx, int* dy);
void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width);
void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* d, int dst_width);
void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width);
void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* d, int dst_width);
void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx);
void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int, int);
void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int, int);
void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx);
void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx);
void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width);
void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width);
void ScaleARGBRowDown2_C(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int, int);
void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
// Specialized scalers for x86.
void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_1_Box_Any_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_0_Box_Any_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_3_Box_Any_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
// ARGB Column functions
void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx);
// ARGB Row functions
void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb,
ptrdiff_t src_stride,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx, uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb, ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb,
ptrdiff_t src_stride,
int src_stepx,
uint8* dst_argb, int dst_width);
// ScaleRowDown2Box also used by planar functions
// NEON downscalers with interpolation.
// Note - not static due to reuse in convert for 444 to 420.
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
// to load up the every 4th pixel into a 4 different registers.
// Point samples 32 pixels to 24 pixels.
void ScaleRowDown34_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32 -> 12
void ScaleRowDown38_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32x3 -> 12x1
void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32x2 -> 12x1
void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32 -> 12
void ScaleRowDown38_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32x3 -> 12x1
void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
// 32x2 -> 12x1
void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleAddRow_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width);
void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width);
void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_ROW_H_ NOLINT

View File

@ -0,0 +1,16 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1577
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -0,0 +1,184 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Common definitions for video, including fourcc and VideoFormat.
#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_ // NOLINT
#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
//////////////////////////////////////////////////////////////////////////////
// Definition of FourCC codes
//////////////////////////////////////////////////////////////////////////////
// Convert four characters to a FourCC code.
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
// constants are used in a switch.
#ifdef __cplusplus
#define FOURCC(a, b, c, d) ( \
(static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
(static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
#else
#define FOURCC(a, b, c, d) ( \
((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \
((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */
#endif
// Some pages discussing FourCC codes:
// http://www.fourcc.org/yuv.php
// http://v4l2spec.bytesex.org/spec/book1.htm
// http://developer.apple.com/quicktime/icefloe/dispatch020.html
// http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
// http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
// FourCC codes grouped according to implementation efficiency.
// Primary formats should convert in 1 efficient step.
// Secondary formats are converted in 2 steps.
// Auxilliary formats call primary converters.
enum FourCC {
// 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
FOURCC_I422 = FOURCC('I', '4', '2', '2'),
FOURCC_I444 = FOURCC('I', '4', '4', '4'),
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
FOURCC_I400 = FOURCC('I', '4', '0', '0'),
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
// 2 Secondary YUV formats: row biplanar.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
// 4 Secondary RGB formats: 4 Bayer Patterns. deprecated.
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
// 1 Primary Compressed YUV format.
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
// 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
FOURCC_J420 = FOURCC('J', '4', '2', '0'),
FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422.
FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444.
FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2.
FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac.
FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY.
FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac.
FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG.
FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac.
FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR.
FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW.
FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG.
FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB
FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB
FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO.
FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
// 1 Auxiliary compressed YUV format set aside for capturer.
FOURCC_H264 = FOURCC('H', '2', '6', '4'),
// Match any fourcc.
FOURCC_ANY = -1,
};
enum FourCCBpp {
// Canonical fourcc codes used in our code.
FOURCC_BPP_I420 = 12,
FOURCC_BPP_I422 = 16,
FOURCC_BPP_I444 = 24,
FOURCC_BPP_I411 = 12,
FOURCC_BPP_I400 = 8,
FOURCC_BPP_NV21 = 12,
FOURCC_BPP_NV12 = 12,
FOURCC_BPP_YUY2 = 16,
FOURCC_BPP_UYVY = 16,
FOURCC_BPP_M420 = 12,
FOURCC_BPP_Q420 = 12,
FOURCC_BPP_ARGB = 32,
FOURCC_BPP_BGRA = 32,
FOURCC_BPP_ABGR = 32,
FOURCC_BPP_RGBA = 32,
FOURCC_BPP_24BG = 24,
FOURCC_BPP_RAW = 24,
FOURCC_BPP_RGBP = 16,
FOURCC_BPP_RGBO = 16,
FOURCC_BPP_R444 = 16,
FOURCC_BPP_RGGB = 8,
FOURCC_BPP_BGGR = 8,
FOURCC_BPP_GRBG = 8,
FOURCC_BPP_GBRG = 8,
FOURCC_BPP_YV12 = 12,
FOURCC_BPP_YV16 = 16,
FOURCC_BPP_YV24 = 24,
FOURCC_BPP_YU12 = 12,
FOURCC_BPP_J420 = 12,
FOURCC_BPP_J400 = 8,
FOURCC_BPP_H420 = 12,
FOURCC_BPP_MJPG = 0, // 0 means unknown.
FOURCC_BPP_H264 = 0,
FOURCC_BPP_IYUV = 12,
FOURCC_BPP_YU16 = 16,
FOURCC_BPP_YU24 = 24,
FOURCC_BPP_YUYV = 16,
FOURCC_BPP_YUVS = 16,
FOURCC_BPP_HDYC = 16,
FOURCC_BPP_2VUY = 16,
FOURCC_BPP_JPEG = 1,
FOURCC_BPP_DMB1 = 1,
FOURCC_BPP_BA81 = 8,
FOURCC_BPP_RGB3 = 24,
FOURCC_BPP_BGR3 = 24,
FOURCC_BPP_CM32 = 32,
FOURCC_BPP_CM24 = 24,
// Match any fourcc.
FOURCC_BPP_ANY = 0, // 0 means unknown.
};
// Converts fourcc aliases into canonical ones.
LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_ NOLINT

149
libs/libyuv/libyuv.gyp Normal file
View File

@ -0,0 +1,149 @@
# Copyright 2011 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'includes': [
'libyuv.gypi',
],
# Make sure that if we are being compiled to an xcodeproj, nothing tries to
# include a .pch.
'xcode_settings': {
'GCC_PREFIX_HEADER': '',
'GCC_PRECOMPILE_PREFIX_HEADER': 'NO',
},
'variables': {
'use_system_libjpeg%': 0,
'libyuv_disable_jpeg%': 0,
# 'chromium_code' treats libyuv as internal and increases warning level.
'chromium_code': 1,
# clang compiler default variable usable by other apps that include libyuv.
'clang%': 0,
# Link-Time Optimizations.
'use_lto%': 0,
'build_neon': 0,
'conditions': [
['(target_arch == "armv7" or target_arch == "armv7s" or \
(target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\
and (arm_neon == 1 or arm_neon_optional == 1)',
{
'build_neon': 1,
}],
],
},
'targets': [
{
'target_name': 'libyuv',
# Change type to 'shared_library' to build .so or .dll files.
'type': 'static_library',
'variables': {
'optimize': 'max', # enable O2 and ltcg.
},
# Allows libyuv.a redistributable library without external dependencies.
'standalone_static_library': 1,
'conditions': [
['build_neon != 0', {
'defines': [
'LIBYUV_NEON',
],
'cflags!': [
'-mfpu=vfp',
'-mfpu=vfpv3',
'-mfpu=vfpv3-d16',
],
'conditions': [
# Disable LTO in libyuv_neon target due to gcc 4.9 compiler bug.
['clang == 0 and use_lto == 1', {
'cflags!': [
'-flto',
'-ffat-lto-objects',
],
}],
# arm64 does not need -mfpu=neon option as neon is not optional
['target_arch != "arm64"', {
'cflags': [
'-mfpu=neon',
],
}],
],
}],
['OS != "ios" and libyuv_disable_jpeg != 1', {
'defines': [
'HAVE_JPEG'
],
'conditions': [
# Caveat system jpeg support may not support motion jpeg
[ 'use_system_libjpeg == 1', {
'dependencies': [
'<(DEPTH)/third_party/libjpeg/libjpeg.gyp:libjpeg',
],
}, {
'dependencies': [
'<(DEPTH)/third_party/libjpeg_turbo/libjpeg.gyp:libjpeg',
],
}],
[ 'use_system_libjpeg == 1', {
'link_settings': {
'libraries': [
'-ljpeg',
],
}
}],
],
}],
# MemorySanitizer does not support assembly code yet.
# http://crbug.com/344505
[ 'msan == 1', {
'defines': [
'LIBYUV_DISABLE_X86',
],
}],
], #conditions
'defines': [
# Enable the following 3 macros to turn off assembly for specified CPU.
# 'LIBYUV_DISABLE_X86',
# 'LIBYUV_DISABLE_NEON',
# 'LIBYUV_DISABLE_MIPS',
# Enable the following macro to build libyuv as a shared library (dll).
# 'LIBYUV_USING_SHARED_LIBRARY',
# TODO(fbarchard): Make these into gyp defines.
],
'include_dirs': [
'include',
'.',
],
'direct_dependent_settings': {
'include_dirs': [
'include',
'.',
],
'conditions': [
['OS == "android" and target_arch == "arm64"', {
'ldflags': [
'-Wl,--dynamic-linker,/system/bin/linker64',
],
}],
['OS == "android" and target_arch != "arm64"', {
'ldflags': [
'-Wl,--dynamic-linker,/system/bin/linker',
],
}],
], #conditions
},
'sources': [
'<@(libyuv_sources)',
],
},
], # targets.
}
# Local Variables:
# tab-width:2
# indent-tabs-mode:nil
# End:
# vim: set expandtab tabstop=2 shiftwidth=2:

79
libs/libyuv/libyuv.gypi Normal file
View File

@ -0,0 +1,79 @@
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'variables': {
'libyuv_sources': [
# includes.
'include/libyuv.h',
'include/libyuv/basic_types.h',
'include/libyuv/compare.h',
'include/libyuv/convert.h',
'include/libyuv/convert_argb.h',
'include/libyuv/convert_from.h',
'include/libyuv/convert_from_argb.h',
'include/libyuv/cpu_id.h',
'include/libyuv/mjpeg_decoder.h',
'include/libyuv/planar_functions.h',
'include/libyuv/rotate.h',
'include/libyuv/rotate_argb.h',
'include/libyuv/rotate_row.h',
'include/libyuv/row.h',
'include/libyuv/scale.h',
'include/libyuv/scale_argb.h',
'include/libyuv/scale_row.h',
'include/libyuv/version.h',
'include/libyuv/video_common.h',
# sources.
'source/compare.cc',
'source/compare_common.cc',
'source/compare_gcc.cc',
'source/compare_neon.cc',
'source/compare_neon64.cc',
'source/compare_win.cc',
'source/convert.cc',
'source/convert_argb.cc',
'source/convert_from.cc',
'source/convert_from_argb.cc',
'source/convert_jpeg.cc',
'source/convert_to_argb.cc',
'source/convert_to_i420.cc',
'source/cpu_id.cc',
'source/mjpeg_decoder.cc',
'source/mjpeg_validate.cc',
'source/planar_functions.cc',
'source/rotate.cc',
'source/rotate_any.cc',
'source/rotate_argb.cc',
'source/rotate_common.cc',
'source/rotate_gcc.cc',
'source/rotate_mips.cc',
'source/rotate_neon.cc',
'source/rotate_neon64.cc',
'source/rotate_win.cc',
'source/row_any.cc',
'source/row_common.cc',
'source/row_gcc.cc',
'source/row_mips.cc',
'source/row_neon.cc',
'source/row_neon64.cc',
'source/row_win.cc',
'source/scale.cc',
'source/scale_any.cc',
'source/scale_argb.cc',
'source/scale_common.cc',
'source/scale_gcc.cc',
'source/scale_mips.cc',
'source/scale_neon.cc',
'source/scale_neon64.cc',
'source/scale_win.cc',
'source/video_common.cc',
],
}
}

View File

@ -0,0 +1,37 @@
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'includes': [
'libyuv.gypi',
'../../native_client/build/untrusted.gypi',
],
'targets': [
{
'target_name': 'libyuv_nacl',
'type': 'none',
'variables': {
'nlib_target': 'libyuv_nacl.a',
'build_glibc': 0,
'build_newlib': 0,
'build_pnacl_newlib': 1,
},
'include_dirs': [
'include',
],
'direct_dependent_settings': {
'include_dirs': [
'include',
],
},
'sources': [
'<@(libyuv_sources)',
],
}, # target libyuv_nacl
]
}

227
libs/libyuv/libyuv_test.gyp Normal file
View File

@ -0,0 +1,227 @@
# Copyright 2011 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'variables': {
'libyuv_disable_jpeg%': 0,
},
'targets': [
{
'target_name': 'libyuv_unittest',
'type': '<(gtest_target_type)',
'dependencies': [
'libyuv.gyp:libyuv',
'testing/gtest.gyp:gtest',
'third_party/gflags/gflags.gyp:gflags',
],
'direct_dependent_settings': {
'defines': [
'GTEST_RELATIVE_PATH',
],
},
'export_dependent_settings': [
'<(DEPTH)/testing/gtest.gyp:gtest',
],
'sources': [
# headers
'unit_test/unit_test.h',
# sources
'unit_test/basictypes_test.cc',
'unit_test/compare_test.cc',
'unit_test/color_test.cc',
'unit_test/convert_test.cc',
'unit_test/cpu_test.cc',
'unit_test/math_test.cc',
'unit_test/planar_test.cc',
'unit_test/rotate_argb_test.cc',
'unit_test/rotate_test.cc',
'unit_test/scale_argb_test.cc',
'unit_test/scale_test.cc',
'unit_test/unit_test.cc',
'unit_test/video_common_test.cc',
],
'conditions': [
['OS=="linux"', {
'cflags': [
'-fexceptions',
],
}],
[ 'OS == "ios" and target_subarch == 64', {
'defines': [
'LIBYUV_DISABLE_NEON'
],
}],
[ 'OS == "ios"', {
'xcode_settings': {
'DEBUGGING_SYMBOLS': 'YES',
'DEBUG_INFORMATION_FORMAT' : 'dwarf-with-dsym',
# Work around compile issue with isosim.mm, see
# https://code.google.com/p/libyuv/issues/detail?id=548 for details.
'WARNING_CFLAGS': [
'-Wno-sometimes-uninitialized',
],
},
'cflags': [
'-Wno-sometimes-uninitialized',
],
}],
[ 'OS != "ios" and libyuv_disable_jpeg != 1', {
'defines': [
'HAVE_JPEG',
],
}],
['OS=="android"', {
'dependencies': [
'<(DEPTH)/testing/android/native_test.gyp:native_test_native_code',
],
}],
# TODO(YangZhang): These lines can be removed when high accuracy
# YUV to RGB to Neon is ported.
[ '(target_arch == "armv7" or target_arch == "armv7s" \
or (target_arch == "arm" and arm_version >= 7) \
or target_arch == "arm64") \
and (arm_neon == 1 or arm_neon_optional == 1)', {
'defines': [
'LIBYUV_NEON'
],
}],
# MemorySanitizer does not support assembly code yet.
# http://crbug.com/344505
[ 'msan == 1', {
'defines': [
'LIBYUV_DISABLE_X86',
],
}],
], # conditions
'defines': [
# Enable the following 3 macros to turn off assembly for specified CPU.
# 'LIBYUV_DISABLE_X86',
# 'LIBYUV_DISABLE_NEON',
# 'LIBYUV_DISABLE_MIPS',
# Enable the following macro to build libyuv as a shared library (dll).
# 'LIBYUV_USING_SHARED_LIBRARY',
],
},
{
'target_name': 'compare',
'type': 'executable',
'dependencies': [
'libyuv.gyp:libyuv',
],
'sources': [
# sources
'util/compare.cc',
],
'conditions': [
['OS=="linux"', {
'cflags': [
'-fexceptions',
],
}],
], # conditions
},
{
'target_name': 'convert',
'type': 'executable',
'dependencies': [
'libyuv.gyp:libyuv',
],
'sources': [
# sources
'util/convert.cc',
],
'conditions': [
['OS=="linux"', {
'cflags': [
'-fexceptions',
],
}],
], # conditions
},
# TODO(fbarchard): Enable SSE2 and OpenMP for better performance.
{
'target_name': 'psnr',
'type': 'executable',
'sources': [
# sources
'util/psnr_main.cc',
'util/psnr.cc',
'util/ssim.cc',
],
'dependencies': [
'libyuv.gyp:libyuv',
],
'conditions': [
[ 'OS == "ios" and target_subarch == 64', {
'defines': [
'LIBYUV_DISABLE_NEON'
],
}],
[ 'OS != "ios" and libyuv_disable_jpeg != 1', {
'defines': [
'HAVE_JPEG',
],
}],
], # conditions
},
{
'target_name': 'cpuid',
'type': 'executable',
'sources': [
# sources
'util/cpuid.c',
],
'dependencies': [
'libyuv.gyp:libyuv',
],
},
], # targets
'conditions': [
['OS=="android"', {
'targets': [
{
# TODO(kjellander): Figure out what to change in build/apk_test.gypi
# to it can be used instead of the copied code below. Using it in its
# current version was not possible, since the target starts with 'lib',
# which somewhere confuses the variables.
'target_name': 'libyuv_unittest_apk',
'type': 'none',
'variables': {
# These are used to configure java_apk.gypi included below.
'test_type': 'gtest',
'apk_name': 'libyuv_unittest',
'intermediate_dir': '<(PRODUCT_DIR)/libyuv_unittest_apk',
'final_apk_path': '<(intermediate_dir)/libyuv_unittest-debug.apk',
'java_in_dir': '<(DEPTH)/testing/android/native_test/java',
'native_lib_target': 'libyuv_unittest',
'gyp_managed_install': 0,
},
'includes': [ 'build/java_apk.gypi' ],
'dependencies': [
'<(DEPTH)/base/base.gyp:base_java',
'<(DEPTH)/build/android/pylib/device/commands/commands.gyp:chromium_commands',
'<(DEPTH)/build/android/pylib/remote/device/dummy/dummy.gyp:remote_device_dummy_apk',
'<(DEPTH)/testing/android/appurify_support.gyp:appurify_support_java',
'<(DEPTH)/testing/android/on_device_instrumentation.gyp:reporter_java',
'<(DEPTH)/tools/android/android_tools.gyp:android_tools',
'libyuv_unittest',
],
},
],
}],
],
}
# Local Variables:
# tab-width:2
# indent-tabs-mode:nil
# End:
# vim: set expandtab tabstop=2 shiftwidth=2:

52
libs/libyuv/linux.mk Normal file
View File

@ -0,0 +1,52 @@
# This is a generic makefile for libyuv for gcc.
# make -f linux.mk CXX=clang++
CXX?=g++
CXXFLAGS?=-O2 -fomit-frame-pointer
CXXFLAGS+=-Iinclude/
LOCAL_OBJ_FILES := \
source/compare.o \
source/compare_common.o \
source/compare_gcc.o \
source/convert.o \
source/convert_argb.o \
source/convert_from.o \
source/convert_from_argb.o \
source/convert_to_argb.o \
source/convert_to_i420.o \
source/cpu_id.o \
source/planar_functions.o \
source/rotate.o \
source/rotate_any.o \
source/rotate_argb.o \
source/rotate_common.o \
source/rotate_gcc.o \
source/rotate_mips.o \
source/row_any.o \
source/row_common.o \
source/row_mips.o \
source/row_gcc.o \
source/scale.o \
source/scale_any.o \
source/scale_argb.o \
source/scale_common.o \
source/scale_gcc.o \
source/scale_mips.o \
source/video_common.o
.cc.o:
$(CXX) -c $(CXXFLAGS) $*.cc -o $*.o
all: libyuv.a convert
libyuv.a: $(LOCAL_OBJ_FILES)
$(AR) $(ARFLAGS) $@ $(LOCAL_OBJ_FILES)
# A test utility that uses libyuv conversion.
convert: util/convert.cc libyuv.a
$(CXX) $(CXXFLAGS) -Iutil/ -o $@ util/convert.cc libyuv.a
clean:
/bin/rm -f source/*.o *.ii *.s libyuv.a convert

13
libs/libyuv/public.mk Normal file
View File

@ -0,0 +1,13 @@
# This file contains all the common make variables which are useful for
# anyone depending on this library.
# Note that dependencies on NDK are not directly listed since NDK auto adds
# them.
LIBYUV_INCLUDES := $(LIBYUV_PATH)/include
LIBYUV_C_FLAGS :=
LIBYUV_CPP_FLAGS :=
LIBYUV_LDLIBS :=
LIBYUV_DEP_MODULES :=

519
libs/libyuv/setup_links.py Executable file
View File

@ -0,0 +1,519 @@
#!/usr/bin/env python
# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
"""Setup links to a Chromium checkout for WebRTC.
WebRTC standalone shares a lot of dependencies and build tools with Chromium.
To do this, many of the paths of a Chromium checkout is emulated by creating
symlinks to files and directories. This script handles the setup of symlinks to
achieve this.
It also handles cleanup of the legacy Subversion-based approach that was used
before Chrome switched over their master repo from Subversion to Git.
"""
import ctypes
import errno
import logging
import optparse
import os
import shelve
import shutil
import subprocess
import sys
import textwrap
DIRECTORIES = [
'build',
'buildtools',
'google_apis', # Needed by build/common.gypi.
'net',
'testing',
'third_party/binutils',
'third_party/boringssl',
'third_party/colorama',
'third_party/drmemory',
'third_party/expat',
'third_party/icu',
'third_party/instrumented_libraries',
'third_party/jsoncpp',
'third_party/libjpeg',
'third_party/libjpeg_turbo',
'third_party/libsrtp',
'third_party/libudev',
'third_party/libvpx_new',
'third_party/libyuv',
'third_party/llvm-build',
'third_party/lss',
'third_party/nss',
'third_party/ocmock',
'third_party/openmax_dl',
'third_party/opus',
'third_party/proguard',
'third_party/protobuf',
'third_party/sqlite',
'third_party/syzygy',
'third_party/usrsctp',
'third_party/yasm',
'third_party/zlib',
'tools/clang',
'tools/generate_library_loader',
'tools/gn',
'tools/gyp',
'tools/memory',
'tools/protoc_wrapper',
'tools/python',
'tools/swarming_client',
'tools/valgrind',
'tools/vim',
'tools/win',
]
from sync_chromium import get_target_os_list
target_os = get_target_os_list()
if 'android' in target_os:
DIRECTORIES += [
'base',
'third_party/android_platform',
'third_party/android_testrunner',
'third_party/android_tools',
'third_party/appurify-python',
'third_party/ashmem',
'third_party/catapult',
'third_party/ijar',
'third_party/jsr-305',
'third_party/junit',
'third_party/libevent',
'third_party/libxml',
'third_party/mockito',
'third_party/modp_b64',
'third_party/requests',
'third_party/robolectric',
'tools/android',
'tools/grit',
'tools/relocation_packer',
'tools/telemetry',
]
if 'ios' in target_os:
DIRECTORIES.append('third_party/class-dump')
FILES = {
'tools/find_depot_tools.py': None,
'tools/isolate_driver.py': None,
'third_party/BUILD.gn': None,
}
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
CHROMIUM_CHECKOUT = os.path.join('chromium', 'src')
LINKS_DB = 'links'
# Version management to make future upgrades/downgrades easier to support.
SCHEMA_VERSION = 1
def query_yes_no(question, default=False):
"""Ask a yes/no question via raw_input() and return their answer.
Modified from http://stackoverflow.com/a/3041990.
"""
prompt = " [%s/%%s]: "
prompt = prompt % ('Y' if default is True else 'y')
prompt = prompt % ('N' if default is False else 'n')
if default is None:
default = 'INVALID'
while True:
sys.stdout.write(question + prompt)
choice = raw_input().lower()
if choice == '' and default != 'INVALID':
return default
if 'yes'.startswith(choice):
return True
elif 'no'.startswith(choice):
return False
print "Please respond with 'yes' or 'no' (or 'y' or 'n')."
# Actions
class Action(object):
def __init__(self, dangerous):
self.dangerous = dangerous
def announce(self, planning):
"""Log a description of this action.
Args:
planning - True iff we're in the planning stage, False if we're in the
doit stage.
"""
pass
def doit(self, links_db):
"""Execute the action, recording what we did to links_db, if necessary."""
pass
class Remove(Action):
def __init__(self, path, dangerous):
super(Remove, self).__init__(dangerous)
self._priority = 0
self._path = path
def announce(self, planning):
log = logging.warn
filesystem_type = 'file'
if not self.dangerous:
log = logging.info
filesystem_type = 'link'
if planning:
log('Planning to remove %s: %s', filesystem_type, self._path)
else:
log('Removing %s: %s', filesystem_type, self._path)
def doit(self, _):
os.remove(self._path)
class Rmtree(Action):
def __init__(self, path):
super(Rmtree, self).__init__(dangerous=True)
self._priority = 0
self._path = path
def announce(self, planning):
if planning:
logging.warn('Planning to remove directory: %s', self._path)
else:
logging.warn('Removing directory: %s', self._path)
def doit(self, _):
if sys.platform.startswith('win'):
# shutil.rmtree() doesn't work on Windows if any of the directories are
# read-only, which svn repositories are.
subprocess.check_call(['rd', '/q', '/s', self._path], shell=True)
else:
shutil.rmtree(self._path)
class Makedirs(Action):
def __init__(self, path):
super(Makedirs, self).__init__(dangerous=False)
self._priority = 1
self._path = path
def doit(self, _):
try:
os.makedirs(self._path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
class Symlink(Action):
def __init__(self, source_path, link_path):
super(Symlink, self).__init__(dangerous=False)
self._priority = 2
self._source_path = source_path
self._link_path = link_path
def announce(self, planning):
if planning:
logging.info(
'Planning to create link from %s to %s', self._link_path,
self._source_path)
else:
logging.debug(
'Linking from %s to %s', self._link_path, self._source_path)
def doit(self, links_db):
# Files not in the root directory need relative path calculation.
# On Windows, use absolute paths instead since NTFS doesn't seem to support
# relative paths for symlinks.
if sys.platform.startswith('win'):
source_path = os.path.abspath(self._source_path)
else:
if os.path.dirname(self._link_path) != self._link_path:
source_path = os.path.relpath(self._source_path,
os.path.dirname(self._link_path))
os.symlink(source_path, os.path.abspath(self._link_path))
links_db[self._source_path] = self._link_path
class LinkError(IOError):
"""Failed to create a link."""
pass
# Handles symlink creation on the different platforms.
if sys.platform.startswith('win'):
def symlink(source_path, link_path):
flag = 1 if os.path.isdir(source_path) else 0
if not ctypes.windll.kernel32.CreateSymbolicLinkW(
unicode(link_path), unicode(source_path), flag):
raise OSError('Failed to create symlink to %s. Notice that only NTFS '
'version 5.0 and up has all the needed APIs for '
'creating symlinks.' % source_path)
os.symlink = symlink
class WebRTCLinkSetup(object):
def __init__(self, links_db, force=False, dry_run=False, prompt=False):
self._force = force
self._dry_run = dry_run
self._prompt = prompt
self._links_db = links_db
def CreateLinks(self, on_bot):
logging.debug('CreateLinks')
# First, make a plan of action
actions = []
for source_path, link_path in FILES.iteritems():
actions += self._ActionForPath(
source_path, link_path, check_fn=os.path.isfile, check_msg='files')
for source_dir in DIRECTORIES:
actions += self._ActionForPath(
source_dir, None, check_fn=os.path.isdir,
check_msg='directories')
if not on_bot and self._force:
# When making the manual switch from legacy SVN checkouts to the new
# Git-based Chromium DEPS, the .gclient_entries file that contains cached
# URLs for all DEPS entries must be removed to avoid future sync problems.
entries_file = os.path.join(os.path.dirname(ROOT_DIR), '.gclient_entries')
if os.path.exists(entries_file):
actions.append(Remove(entries_file, dangerous=True))
actions.sort()
if self._dry_run:
for action in actions:
action.announce(planning=True)
logging.info('Not doing anything because dry-run was specified.')
sys.exit(0)
if any(a.dangerous for a in actions):
logging.warn('Dangerous actions:')
for action in (a for a in actions if a.dangerous):
action.announce(planning=True)
print
if not self._force:
logging.error(textwrap.dedent("""\
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
A C T I O N R E Q I R E D
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Because chromium/src is transitioning to Git (from SVN), we needed to
change the way that the WebRTC standalone checkout works. Instead of
individually syncing subdirectories of Chromium in SVN, we're now
syncing Chromium (and all of its DEPS, as defined by its own DEPS file),
into the `chromium/src` directory.
As such, all Chromium directories which are currently pulled by DEPS are
now replaced with a symlink into the full Chromium checkout.
To avoid disrupting developers, we've chosen to not delete your
directories forcibly, in case you have some work in progress in one of
them :).
ACTION REQUIRED:
Before running `gclient sync|runhooks` again, you must run:
%s%s --force
Which will replace all directories which now must be symlinks, after
prompting with a summary of the work-to-be-done.
"""), 'python ' if sys.platform.startswith('win') else '', sys.argv[0])
sys.exit(1)
elif self._prompt:
if not query_yes_no('Would you like to perform the above plan?'):
sys.exit(1)
for action in actions:
action.announce(planning=False)
action.doit(self._links_db)
if not on_bot and self._force:
logging.info('Completed!\n\nNow run `gclient sync|runhooks` again to '
'let the remaining hooks (that probably were interrupted) '
'execute.')
def CleanupLinks(self):
logging.debug('CleanupLinks')
for source, link_path in self._links_db.iteritems():
if source == 'SCHEMA_VERSION':
continue
if os.path.islink(link_path) or sys.platform.startswith('win'):
# os.path.islink() always returns false on Windows
# See http://bugs.python.org/issue13143.
logging.debug('Removing link to %s at %s', source, link_path)
if not self._dry_run:
if os.path.exists(link_path):
if sys.platform.startswith('win') and os.path.isdir(link_path):
subprocess.check_call(['rmdir', '/q', '/s', link_path],
shell=True)
else:
os.remove(link_path)
del self._links_db[source]
@staticmethod
def _ActionForPath(source_path, link_path=None, check_fn=None,
check_msg=None):
"""Create zero or more Actions to link to a file or directory.
This will be a symlink on POSIX platforms. On Windows this requires
that NTFS is version 5.0 or higher (Vista or newer).
Args:
source_path: Path relative to the Chromium checkout root.
For readability, the path may contain slashes, which will
automatically be converted to the right path delimiter on Windows.
link_path: The location for the link to create. If omitted it will be the
same path as source_path.
check_fn: A function returning true if the type of filesystem object is
correct for the attempted call. Otherwise an error message with
check_msg will be printed.
check_msg: String used to inform the user of an invalid attempt to create
a file.
Returns:
A list of Action objects.
"""
def fix_separators(path):
if sys.platform.startswith('win'):
return path.replace(os.altsep, os.sep)
else:
return path
assert check_fn
assert check_msg
link_path = link_path or source_path
link_path = fix_separators(link_path)
source_path = fix_separators(source_path)
source_path = os.path.join(CHROMIUM_CHECKOUT, source_path)
if os.path.exists(source_path) and not check_fn:
raise LinkError('_LinkChromiumPath can only be used to link to %s: '
'Tried to link to: %s' % (check_msg, source_path))
if not os.path.exists(source_path):
logging.debug('Silently ignoring missing source: %s. This is to avoid '
'errors on platform-specific dependencies.', source_path)
return []
actions = []
if os.path.exists(link_path) or os.path.islink(link_path):
if os.path.islink(link_path):
actions.append(Remove(link_path, dangerous=False))
elif os.path.isfile(link_path):
actions.append(Remove(link_path, dangerous=True))
elif os.path.isdir(link_path):
actions.append(Rmtree(link_path))
else:
raise LinkError('Don\'t know how to plan: %s' % link_path)
# Create parent directories to the target link if needed.
target_parent_dirs = os.path.dirname(link_path)
if (target_parent_dirs and
target_parent_dirs != link_path and
not os.path.exists(target_parent_dirs)):
actions.append(Makedirs(target_parent_dirs))
actions.append(Symlink(source_path, link_path))
return actions
def _initialize_database(filename):
links_database = shelve.open(filename)
# Wipe the database if this version of the script ends up looking at a
# newer (future) version of the links db, just to be sure.
version = links_database.get('SCHEMA_VERSION')
if version and version != SCHEMA_VERSION:
logging.info('Found database with schema version %s while this script only '
'supports %s. Wiping previous database contents.', version,
SCHEMA_VERSION)
links_database.clear()
links_database['SCHEMA_VERSION'] = SCHEMA_VERSION
return links_database
def main():
on_bot = os.environ.get('CHROME_HEADLESS') == '1'
parser = optparse.OptionParser()
parser.add_option('-d', '--dry-run', action='store_true', default=False,
help='Print what would be done, but don\'t perform any '
'operations. This will automatically set logging to '
'verbose.')
parser.add_option('-c', '--clean-only', action='store_true', default=False,
help='Only clean previously created links, don\'t create '
'new ones. This will automatically set logging to '
'verbose.')
parser.add_option('-f', '--force', action='store_true', default=on_bot,
help='Force link creation. CAUTION: This deletes existing '
'folders and files in the locations where links are '
'about to be created.')
parser.add_option('-n', '--no-prompt', action='store_false', dest='prompt',
default=(not on_bot),
help='Prompt if we\'re planning to do a dangerous action')
parser.add_option('-v', '--verbose', action='store_const',
const=logging.DEBUG, default=logging.INFO,
help='Print verbose output for debugging.')
options, _ = parser.parse_args()
if options.dry_run or options.force or options.clean_only:
options.verbose = logging.DEBUG
logging.basicConfig(format='%(message)s', level=options.verbose)
# Work from the root directory of the checkout.
script_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(script_dir)
if sys.platform.startswith('win'):
def is_admin():
try:
return os.getuid() == 0
except AttributeError:
return ctypes.windll.shell32.IsUserAnAdmin() != 0
if not is_admin():
logging.error('On Windows, you now need to have administrator '
'privileges for the shell running %s (or '
'`gclient sync|runhooks`).\nPlease start another command '
'prompt as Administrator and try again.', sys.argv[0])
return 1
if not os.path.exists(CHROMIUM_CHECKOUT):
logging.error('Cannot find a Chromium checkout at %s. Did you run "gclient '
'sync" before running this script?', CHROMIUM_CHECKOUT)
return 2
links_database = _initialize_database(LINKS_DB)
try:
symlink_creator = WebRTCLinkSetup(links_database, options.force,
options.dry_run, options.prompt)
symlink_creator.CleanupLinks()
if not options.clean_only:
symlink_creator.CreateLinks(on_bot)
except LinkError as e:
print >> sys.stderr, e.message
return 3
finally:
links_database.close()
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,340 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/compare.h"
#include <float.h>
#include <math.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/cpu_id.h"
#include "libyuv/row.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// hash seed of 5381 recommended.
LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
const int kBlockSize = 1 << 15; // 32768;
int remainder;
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) =
HashDjb2_C;
#if defined(HAS_HASHDJB2_SSE41)
if (TestCpuFlag(kCpuHasSSE41)) {
HashDjb2_SSE = HashDjb2_SSE41;
}
#endif
#if defined(HAS_HASHDJB2_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
HashDjb2_SSE = HashDjb2_AVX2;
}
#endif
while (count >= (uint64)(kBlockSize)) {
seed = HashDjb2_SSE(src, kBlockSize, seed);
src += kBlockSize;
count -= kBlockSize;
}
remainder = (int)(count) & ~15;
if (remainder) {
seed = HashDjb2_SSE(src, remainder, seed);
src += remainder;
count -= remainder;
}
remainder = (int)(count) & 15;
if (remainder) {
seed = HashDjb2_C(src, remainder, seed);
}
return seed;
}
static uint32 ARGBDetectRow_C(const uint8* argb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
return FOURCC_BGRA;
}
if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
return FOURCC_ARGB;
}
if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255.
return FOURCC_BGRA;
}
if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255.
return FOURCC_ARGB;
}
argb += 8;
}
if (width & 1) {
if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
return FOURCC_BGRA;
}
if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
return FOURCC_ARGB;
}
}
return 0;
}
// Scan an opaque argb image and return fourcc based on alpha offset.
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
LIBYUV_API
uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
uint32 fourcc = 0;
int h;
// Coalesce rows.
if (stride_argb == width * 4) {
width *= height;
height = 1;
stride_argb = 0;
}
for (h = 0; h < height && fourcc == 0; ++h) {
fourcc = ARGBDetectRow_C(argb, width);
argb += stride_argb;
}
return fourcc;
}
// TODO(fbarchard): Refactor into row function.
LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
int count) {
// SumSquareError returns values 0 to 65535 for each squared difference.
// Up to 65536 of those can be summed and remain within a uint32.
// After each block of 65536 pixels, accumulate into a uint64.
const int kBlockSize = 65536;
int remainder = count & (kBlockSize - 1) & ~31;
uint64 sse = 0;
int i;
uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
SumSquareError_C;
#if defined(HAS_SUMSQUAREERROR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
SumSquareError = SumSquareError_NEON;
}
#endif
#if defined(HAS_SUMSQUAREERROR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
// Note only used for multiples of 16 so count is not checked.
SumSquareError = SumSquareError_SSE2;
}
#endif
#if defined(HAS_SUMSQUAREERROR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
// Note only used for multiples of 32 so count is not checked.
SumSquareError = SumSquareError_AVX2;
}
#endif
#ifdef _OPENMP
#pragma omp parallel for reduction(+: sse)
#endif
for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
}
src_a += count & ~(kBlockSize - 1);
src_b += count & ~(kBlockSize - 1);
if (remainder) {
sse += SumSquareError(src_a, src_b, remainder);
src_a += remainder;
src_b += remainder;
}
remainder = count & 31;
if (remainder) {
sse += SumSquareError_C(src_a, src_b, remainder);
}
return sse;
}
LIBYUV_API
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height) {
uint64 sse = 0;
int h;
// Coalesce rows.
if (stride_a == width &&
stride_b == width) {
width *= height;
height = 1;
stride_a = stride_b = 0;
}
for (h = 0; h < height; ++h) {
sse += ComputeSumSquareError(src_a, src_b, width);
src_a += stride_a;
src_b += stride_b;
}
return sse;
}
LIBYUV_API
double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
double psnr;
if (sse > 0) {
double mse = (double)(count) / (double)(sse);
psnr = 10.0 * log10(255.0 * 255.0 * mse);
} else {
psnr = kMaxPsnr; // Limit to prevent divide by 0
}
if (psnr > kMaxPsnr)
psnr = kMaxPsnr;
return psnr;
}
LIBYUV_API
double CalcFramePsnr(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height) {
const uint64 samples = width * height;
const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
src_b, stride_b,
width, height);
return SumSquareErrorToPsnr(sse, samples);
}
LIBYUV_API
double I420Psnr(const uint8* src_y_a, int stride_y_a,
const uint8* src_u_a, int stride_u_a,
const uint8* src_v_a, int stride_v_a,
const uint8* src_y_b, int stride_y_b,
const uint8* src_u_b, int stride_u_b,
const uint8* src_v_b, int stride_v_b,
int width, int height) {
const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
src_y_b, stride_y_b,
width, height);
const int width_uv = (width + 1) >> 1;
const int height_uv = (height + 1) >> 1;
const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
src_u_b, stride_u_b,
width_uv, height_uv);
const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
src_v_b, stride_v_b,
width_uv, height_uv);
const uint64 samples = width * height + 2 * (width_uv * height_uv);
const uint64 sse = sse_y + sse_u + sse_v;
return SumSquareErrorToPsnr(sse, samples);
}
static const int64 cc1 = 26634; // (64^2*(.01*255)^2
static const int64 cc2 = 239708; // (64^2*(.03*255)^2
static double Ssim8x8_C(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b) {
int64 sum_a = 0;
int64 sum_b = 0;
int64 sum_sq_a = 0;
int64 sum_sq_b = 0;
int64 sum_axb = 0;
int i;
for (i = 0; i < 8; ++i) {
int j;
for (j = 0; j < 8; ++j) {
sum_a += src_a[j];
sum_b += src_b[j];
sum_sq_a += src_a[j] * src_a[j];
sum_sq_b += src_b[j] * src_b[j];
sum_axb += src_a[j] * src_b[j];
}
src_a += stride_a;
src_b += stride_b;
}
{
const int64 count = 64;
// scale the constants by number of pixels
const int64 c1 = (cc1 * count * count) >> 12;
const int64 c2 = (cc2 * count * count) >> 12;
const int64 sum_a_x_sum_b = sum_a * sum_b;
const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
(2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
const int64 sum_a_sq = sum_a*sum_a;
const int64 sum_b_sq = sum_b*sum_b;
const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
(count * sum_sq_a - sum_a_sq +
count * sum_sq_b - sum_b_sq + c2);
if (ssim_d == 0.0) {
return DBL_MAX;
}
return ssim_n * 1.0 / ssim_d;
}
}
// We are using a 8x8 moving window with starting location of each 8x8 window
// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
// block boundaries to penalize blocking artifacts.
LIBYUV_API
double CalcFrameSsim(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height) {
int samples = 0;
double ssim_total = 0;
double (*Ssim8x8)(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b) = Ssim8x8_C;
// sample point start with each 4x4 location
int i;
for (i = 0; i < height - 8; i += 4) {
int j;
for (j = 0; j < width - 8; j += 4) {
ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
samples++;
}
src_a += stride_a * 4;
src_b += stride_b * 4;
}
ssim_total /= samples;
return ssim_total;
}
LIBYUV_API
double I420Ssim(const uint8* src_y_a, int stride_y_a,
const uint8* src_u_a, int stride_u_a,
const uint8* src_v_a, int stride_v_a,
const uint8* src_y_b, int stride_y_b,
const uint8* src_u_b, int stride_u_b,
const uint8* src_v_b, int stride_v_b,
int width, int height) {
const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
src_y_b, stride_y_b, width, height);
const int width_uv = (width + 1) >> 1;
const int height_uv = (height + 1) >> 1;
const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
src_u_b, stride_u_b,
width_uv, height_uv);
const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
src_v_b, stride_v_b,
width_uv, height_uv);
return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,44 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse = 0u;
int i;
for (i = 0; i < count; ++i) {
int diff = src_a[i] - src_b[i];
sse += (uint32)(diff * diff);
}
return sse;
}
// hash seed of 5381 recommended.
// Internal C version of HashDjb2 with int sized count for efficiency.
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) {
uint32 hash = seed;
int i;
for (i = 0; i < count; ++i) {
hash += (hash << 5) + src[i];
}
return hash;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,151 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// This module is for GCC x86 and x64.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse;
asm volatile (
"pxor %%xmm0,%%xmm0 \n"
"pxor %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm1 \n"
"lea " MEMLEA(0x10, 0) ",%0 \n"
"movdqu " MEMACCESS(1) ",%%xmm2 \n"
"lea " MEMLEA(0x10, 1) ",%1 \n"
"movdqa %%xmm1,%%xmm3 \n"
"psubusb %%xmm2,%%xmm1 \n"
"psubusb %%xmm3,%%xmm2 \n"
"por %%xmm2,%%xmm1 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm1 \n"
"punpckhbw %%xmm5,%%xmm2 \n"
"pmaddwd %%xmm1,%%xmm1 \n"
"pmaddwd %%xmm2,%%xmm2 \n"
"paddd %%xmm1,%%xmm0 \n"
"paddd %%xmm2,%%xmm0 \n"
"sub $0x10,%2 \n"
"jg 1b \n"
"pshufd $0xee,%%xmm0,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"pshufd $0x1,%%xmm0,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"movd %%xmm0,%3 \n"
: "+r"(src_a), // %0
"+r"(src_b), // %1
"+r"(count), // %2
"=g"(sse) // %3
:: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
return sse;
}
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
static uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12
};
static uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8
};
static uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4
};
static uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0
};
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
uint32 hash;
asm volatile (
"movd %2,%%xmm0 \n"
"pxor %%xmm7,%%xmm7 \n"
"movdqa %4,%%xmm6 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm1 \n"
"lea " MEMLEA(0x10, 0) ",%0 \n"
"pmulld %%xmm6,%%xmm0 \n"
"movdqa %5,%%xmm5 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm7,%%xmm2 \n"
"movdqa %%xmm2,%%xmm3 \n"
"punpcklwd %%xmm7,%%xmm3 \n"
"pmulld %%xmm5,%%xmm3 \n"
"movdqa %6,%%xmm5 \n"
"movdqa %%xmm2,%%xmm4 \n"
"punpckhwd %%xmm7,%%xmm4 \n"
"pmulld %%xmm5,%%xmm4 \n"
"movdqa %7,%%xmm5 \n"
"punpckhbw %%xmm7,%%xmm1 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklwd %%xmm7,%%xmm2 \n"
"pmulld %%xmm5,%%xmm2 \n"
"movdqa %8,%%xmm5 \n"
"punpckhwd %%xmm7,%%xmm1 \n"
"pmulld %%xmm5,%%xmm1 \n"
"paddd %%xmm4,%%xmm3 \n"
"paddd %%xmm2,%%xmm1 \n"
"paddd %%xmm3,%%xmm1 \n"
"pshufd $0xe,%%xmm1,%%xmm2 \n"
"paddd %%xmm2,%%xmm1 \n"
"pshufd $0x1,%%xmm1,%%xmm2 \n"
"paddd %%xmm2,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"sub $0x10,%1 \n"
"jg 1b \n"
"movd %%xmm0,%3 \n"
: "+r"(src), // %0
"+r"(count), // %1
"+rm"(seed), // %2
"=g"(hash) // %3
: "m"(kHash16x33), // %4
"m"(kHashMul0), // %5
"m"(kHashMul1), // %6
"m"(kHashMul2), // %7
"m"(kHashMul3) // %8
: "memory", "cc"
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
return hash;
}
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,66 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
!defined(__aarch64__)
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 sse;
asm volatile (
"vmov.u8 q8, #0 \n"
"vmov.u8 q10, #0 \n"
"vmov.u8 q9, #0 \n"
"vmov.u8 q11, #0 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n"
MEMACCESS(1)
"vld1.8 {q1}, [%1]! \n"
"subs %2, %2, #16 \n"
"vsubl.u8 q2, d0, d2 \n"
"vsubl.u8 q3, d1, d3 \n"
"vmlal.s16 q8, d4, d4 \n"
"vmlal.s16 q9, d6, d6 \n"
"vmlal.s16 q10, d5, d5 \n"
"vmlal.s16 q11, d7, d7 \n"
"bgt 1b \n"
"vadd.u32 q8, q8, q9 \n"
"vadd.u32 q10, q10, q11 \n"
"vadd.u32 q11, q8, q10 \n"
"vpaddl.u32 q1, q11 \n"
"vadd.u64 d0, d2, d3 \n"
"vmov.32 %3, d0[0] \n"
: "+r"(src_a),
"+r"(src_b),
"+r"(count),
"=r"(sse)
:
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
return sse;
}
#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,64 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 sse;
asm volatile (
"eor v16.16b, v16.16b, v16.16b \n"
"eor v18.16b, v18.16b, v18.16b \n"
"eor v17.16b, v17.16b, v17.16b \n"
"eor v19.16b, v19.16b, v19.16b \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n"
MEMACCESS(1)
"ld1 {v1.16b}, [%1], #16 \n"
"subs %w2, %w2, #16 \n"
"usubl v2.8h, v0.8b, v1.8b \n"
"usubl2 v3.8h, v0.16b, v1.16b \n"
"smlal v16.4s, v2.4h, v2.4h \n"
"smlal v17.4s, v3.4h, v3.4h \n"
"smlal2 v18.4s, v2.8h, v2.8h \n"
"smlal2 v19.4s, v3.8h, v3.8h \n"
"b.gt 1b \n"
"add v16.4s, v16.4s, v17.4s \n"
"add v18.4s, v18.4s, v19.4s \n"
"add v19.4s, v16.4s, v18.4s \n"
"addv s0, v19.4s \n"
"fmov %w3, s0 \n"
: "+r"(src_a),
"+r"(src_b),
"+r"(count),
"=r"(sse)
:
: "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
return sse;
}
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,222 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// This module is for 32 bit Visual C x86 and clangcl
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
__declspec(naked)
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
mov ecx, [esp + 12] // count
pxor xmm0, xmm0
pxor xmm5, xmm5
wloop:
movdqu xmm1, [eax]
lea eax, [eax + 16]
movdqu xmm2, [edx]
lea edx, [edx + 16]
movdqa xmm3, xmm1 // abs trick
psubusb xmm1, xmm2
psubusb xmm2, xmm3
por xmm1, xmm2
movdqa xmm2, xmm1
punpcklbw xmm1, xmm5
punpckhbw xmm2, xmm5
pmaddwd xmm1, xmm1
pmaddwd xmm2, xmm2
paddd xmm0, xmm1
paddd xmm0, xmm2
sub ecx, 16
jg wloop
pshufd xmm1, xmm0, 0xee
paddd xmm0, xmm1
pshufd xmm1, xmm0, 0x01
paddd xmm0, xmm1
movd eax, xmm0
ret
}
}
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
#pragma warning(disable: 4752)
__declspec(naked)
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
mov ecx, [esp + 12] // count
vpxor ymm0, ymm0, ymm0 // sum
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
sub edx, eax
wloop:
vmovdqu ymm1, [eax]
vmovdqu ymm2, [eax + edx]
lea eax, [eax + 32]
vpsubusb ymm3, ymm1, ymm2 // abs difference trick
vpsubusb ymm2, ymm2, ymm1
vpor ymm1, ymm2, ymm3
vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order.
vpunpckhbw ymm1, ymm1, ymm5
vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32.
vpmaddwd ymm1, ymm1, ymm1
vpaddd ymm0, ymm0, ymm1
vpaddd ymm0, ymm0, ymm2
sub ecx, 32
jg wloop
vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.
vpaddd ymm0, ymm0, ymm1
vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes.
vpaddd ymm0, ymm0, ymm1
vpermq ymm1, ymm0, 0x02 // high + low lane.
vpaddd ymm0, ymm0, ymm1
vmovd eax, xmm0
vzeroupper
ret
}
}
#endif // _MSC_VER >= 1700
uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12
};
uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8
};
uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4
};
uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0
};
__declspec(naked)
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
movd xmm0, [esp + 12] // seed
pxor xmm7, xmm7 // constant 0 for unpck
movdqa xmm6, xmmword ptr kHash16x33
wloop:
movdqu xmm1, [eax] // src[0-15]
lea eax, [eax + 16]
pmulld xmm0, xmm6 // hash *= 33 ^ 16
movdqa xmm5, xmmword ptr kHashMul0
movdqa xmm2, xmm1
punpcklbw xmm2, xmm7 // src[0-7]
movdqa xmm3, xmm2
punpcklwd xmm3, xmm7 // src[0-3]
pmulld xmm3, xmm5
movdqa xmm5, xmmword ptr kHashMul1
movdqa xmm4, xmm2
punpckhwd xmm4, xmm7 // src[4-7]
pmulld xmm4, xmm5
movdqa xmm5, xmmword ptr kHashMul2
punpckhbw xmm1, xmm7 // src[8-15]
movdqa xmm2, xmm1
punpcklwd xmm2, xmm7 // src[8-11]
pmulld xmm2, xmm5
movdqa xmm5, xmmword ptr kHashMul3
punpckhwd xmm1, xmm7 // src[12-15]
pmulld xmm1, xmm5
paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
paddd xmm1, xmm2
pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2
paddd xmm0, xmm1
sub ecx, 16
jg wloop
movd eax, xmm0 // return hash
ret
}
}
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
__declspec(naked)
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
vmovd xmm0, [esp + 12] // seed
wloop:
vpmovzxbd xmm3, [eax] // src[0-3]
vpmulld xmm0, xmm0, xmmword ptr kHash16x33 // hash *= 33 ^ 16
vpmovzxbd xmm4, [eax + 4] // src[4-7]
vpmulld xmm3, xmm3, xmmword ptr kHashMul0
vpmovzxbd xmm2, [eax + 8] // src[8-11]
vpmulld xmm4, xmm4, xmmword ptr kHashMul1
vpmovzxbd xmm1, [eax + 12] // src[12-15]
vpmulld xmm2, xmm2, xmmword ptr kHashMul2
lea eax, [eax + 16]
vpmulld xmm1, xmm1, xmmword ptr kHashMul3
vpaddd xmm3, xmm3, xmm4 // add 16 results
vpaddd xmm1, xmm1, xmm2
vpaddd xmm1, xmm1, xmm3
vpshufd xmm2, xmm1, 0x0e // upper 2 dwords
vpaddd xmm1, xmm1,xmm2
vpshufd xmm2, xmm1, 0x01
vpaddd xmm1, xmm1, xmm2
vpaddd xmm0, xmm0, xmm1
sub ecx, 16
jg wloop
vmovd eax, xmm0 // return hash
vzeroupper
ret
}
}
#endif // _MSC_VER >= 1700
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,392 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert.h"
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
#endif
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#ifdef HAVE_JPEG
struct I420Buffers {
uint8* y;
int y_stride;
uint8* u;
int u_stride;
uint8* v;
int v_stride;
int w;
int h;
};
static void JpegCopyI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I420Copy(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
static void JpegI422ToI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I422ToI420(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
static void JpegI444ToI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I444ToI420(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
static void JpegI411ToI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I411ToI420(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
static void JpegI400ToI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I400ToI420(data[0], strides[0],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
// Query size of MJPG in pixels.
LIBYUV_API
int MJPGSize(const uint8* sample, size_t sample_size,
int* width, int* height) {
MJpegDecoder mjpeg_decoder;
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
if (ret) {
*width = mjpeg_decoder.GetWidth();
*height = mjpeg_decoder.GetHeight();
}
mjpeg_decoder.UnloadFrame();
return ret ? 0 : -1; // -1 for runtime failure.
}
// MJPG (Motion JPeg) to I420
// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
LIBYUV_API
int MJPGToI420(const uint8* sample,
size_t sample_size,
uint8* y, int y_stride,
uint8* u, int u_stride,
uint8* v, int v_stride,
int w, int h,
int dw, int dh) {
if (sample_size == kUnknownDataSize) {
// ERROR: MJPEG frame size unknown
return -1;
}
// TODO(fbarchard): Port MJpeg to C.
MJpegDecoder mjpeg_decoder;
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
if (ret && (mjpeg_decoder.GetWidth() != w ||
mjpeg_decoder.GetHeight() != h)) {
// ERROR: MJPEG frame has unexpected dimensions
mjpeg_decoder.UnloadFrame();
return 1; // runtime failure
}
if (ret) {
I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh };
// YUV420
if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh);
// YUV422
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh);
// YUV444
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh);
// YUV411
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh);
// YUV400
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceGrayscale &&
mjpeg_decoder.GetNumComponents() == 1 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh);
} else {
// TODO(fbarchard): Implement conversion for any other colorspace/sample
// factors that occur in practice. 411 is supported by libjpeg
// ERROR: Unable to convert MJPEG frame because format is not supported
mjpeg_decoder.UnloadFrame();
return 1;
}
}
return ret ? 0 : 1;
}
#ifdef HAVE_JPEG
struct ARGBBuffers {
uint8* argb;
int argb_stride;
int w;
int h;
};
static void JpegI420ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I420ToARGB(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
static void JpegI422ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I422ToARGB(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
static void JpegI444ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I444ToARGB(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
static void JpegI411ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I411ToARGB(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
static void JpegI400ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I400ToARGB(data[0], strides[0],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
// MJPG (Motion JPeg) to ARGB
// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
LIBYUV_API
int MJPGToARGB(const uint8* sample,
size_t sample_size,
uint8* argb, int argb_stride,
int w, int h,
int dw, int dh) {
if (sample_size == kUnknownDataSize) {
// ERROR: MJPEG frame size unknown
return -1;
}
// TODO(fbarchard): Port MJpeg to C.
MJpegDecoder mjpeg_decoder;
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
if (ret && (mjpeg_decoder.GetWidth() != w ||
mjpeg_decoder.GetHeight() != h)) {
// ERROR: MJPEG frame has unexpected dimensions
mjpeg_decoder.UnloadFrame();
return 1; // runtime failure
}
if (ret) {
ARGBBuffers bufs = { argb, argb_stride, dw, dh };
// YUV420
if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dw, dh);
// YUV422
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dw, dh);
// YUV444
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh);
// YUV411
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh);
// YUV400
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceGrayscale &&
mjpeg_decoder.GetNumComponents() == 1 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh);
} else {
// TODO(fbarchard): Implement conversion for any other colorspace/sample
// factors that occur in practice. 411 is supported by libjpeg
// ERROR: Unable to convert MJPEG frame because format is not supported
mjpeg_decoder.UnloadFrame();
return 1;
}
}
return ret ? 0 : 1;
}
#endif
#endif
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,306 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert_argb.h"
#include "libyuv/cpu_id.h"
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
#endif
#include "libyuv/rotate_argb.h"
#include "libyuv/row.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// src_width is used for source stride computation
// src_height is used to compute location of planes, and indicate inversion
// sample_size is measured in bytes and is the size of the frame.
// With MJPEG it is the compressed size of the frame.
LIBYUV_API
int ConvertToARGB(const uint8* sample, size_t sample_size,
uint8* crop_argb, int argb_stride,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 fourcc) {
uint32 format = CanonicalFourCC(fourcc);
int aligned_src_width = (src_width + 1) & ~1;
const uint8* src;
const uint8* src_uv;
int abs_src_height = (src_height < 0) ? -src_height : src_height;
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
int r = 0;
// One pass rotation is available for some formats. For the rest, convert
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
// and then rotate the I420 to the final destination buffer.
// For in-place conversion, if destination crop_argb is same as source sample,
// also enable temporary buffer.
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) ||
crop_argb == sample;
uint8* tmp_argb = crop_argb;
int tmp_argb_stride = argb_stride;
uint8* rotate_buffer = NULL;
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
if (crop_argb == NULL || sample == NULL ||
src_width <= 0 || crop_width <= 0 ||
src_height == 0 || crop_height == 0) {
return -1;
}
if (src_height < 0) {
inv_crop_height = -inv_crop_height;
}
if (need_buf) {
int argb_size = crop_width * abs_crop_height * 4;
rotate_buffer = (uint8*)malloc(argb_size);
if (!rotate_buffer) {
return 1; // Out of memory runtime error.
}
crop_argb = rotate_buffer;
argb_stride = crop_width;
}
switch (format) {
// Single plane formats
case FOURCC_YUY2:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = YUY2ToARGB(src, aligned_src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_UYVY:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = UYVYToARGB(src, aligned_src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_24BG:
src = sample + (src_width * crop_y + crop_x) * 3;
r = RGB24ToARGB(src, src_width * 3,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RAW:
src = sample + (src_width * crop_y + crop_x) * 3;
r = RAWToARGB(src, src_width * 3,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_ARGB:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ARGBToARGB(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_BGRA:
src = sample + (src_width * crop_y + crop_x) * 4;
r = BGRAToARGB(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_ABGR:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ABGRToARGB(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBA:
src = sample + (src_width * crop_y + crop_x) * 4;
r = RGBAToARGB(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBP:
src = sample + (src_width * crop_y + crop_x) * 2;
r = RGB565ToARGB(src, src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBO:
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB1555ToARGB(src, src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_R444:
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB4444ToARGB(src, src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_I400:
src = sample + src_width * crop_y + crop_x;
r = I400ToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
// Biplanar formats
case FOURCC_NV12:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
r = NV12ToARGB(src, src_width,
src_uv, aligned_src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_NV21:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
// Call NV12 but with u and v parameters swapped.
r = NV21ToARGB(src, src_width,
src_uv, aligned_src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_M420:
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
r = M420ToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
// Triplanar formats
case FOURCC_I420:
case FOURCC_YU12:
case FOURCC_YV12: {
const uint8* src_y = sample + (src_width * crop_y + crop_x);
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
if (format == FOURCC_YV12) {
src_v = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
} else {
src_u = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
}
r = I420ToARGB(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_J420: {
const uint8* src_y = sample + (src_width * crop_y + crop_x);
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
src_u = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
r = J420ToARGB(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I422:
case FOURCC_YV16: {
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
if (format == FOURCC_YV16) {
src_v = sample + src_width * abs_src_height +
halfwidth * crop_y + crop_x / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
} else {
src_u = sample + src_width * abs_src_height +
halfwidth * crop_y + crop_x / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
}
r = I422ToARGB(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I444:
case FOURCC_YV24: {
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u;
const uint8* src_v;
if (format == FOURCC_YV24) {
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
} else {
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
}
r = I444ToARGB(src_y, src_width,
src_u, src_width,
src_v, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I411: {
int quarterwidth = (src_width + 3) / 4;
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u = sample + src_width * abs_src_height +
quarterwidth * crop_y + crop_x / 4;
const uint8* src_v = sample + src_width * abs_src_height +
quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
r = I411ToARGB(src_y, src_width,
src_u, quarterwidth,
src_v, quarterwidth,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
#ifdef HAVE_JPEG
case FOURCC_MJPG:
r = MJPGToARGB(sample, sample_size,
crop_argb, argb_stride,
src_width, abs_src_height, crop_width, inv_crop_height);
break;
#endif
default:
r = -1; // unknown fourcc - return failure code.
}
if (need_buf) {
if (!r) {
r = ARGBRotate(crop_argb, argb_stride,
tmp_argb, tmp_argb_stride,
crop_width, abs_crop_height, rotation);
}
free(rotate_buffer);
}
return r;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,339 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include "libyuv/convert.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// src_width is used for source stride computation
// src_height is used to compute location of planes, and indicate inversion
// sample_size is measured in bytes and is the size of the frame.
// With MJPEG it is the compressed size of the frame.
LIBYUV_API
int ConvertToI420(const uint8* sample,
size_t sample_size,
uint8* y, int y_stride,
uint8* u, int u_stride,
uint8* v, int v_stride,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 fourcc) {
uint32 format = CanonicalFourCC(fourcc);
int aligned_src_width = (src_width + 1) & ~1;
const uint8* src;
const uint8* src_uv;
int abs_src_height = (src_height < 0) ? -src_height : src_height;
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
int r = 0;
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 &&
format != FOURCC_NV12 && format != FOURCC_NV21 &&
format != FOURCC_YU12 && format != FOURCC_YV12) || y == sample;
uint8* tmp_y = y;
uint8* tmp_u = u;
uint8* tmp_v = v;
int tmp_y_stride = y_stride;
int tmp_u_stride = u_stride;
int tmp_v_stride = v_stride;
uint8* rotate_buffer = NULL;
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
if (!y || !u || !v || !sample ||
src_width <= 0 || crop_width <= 0 ||
src_height == 0 || crop_height == 0) {
return -1;
}
if (src_height < 0) {
inv_crop_height = -inv_crop_height;
}
// One pass rotation is available for some formats. For the rest, convert
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
// and then rotate the I420 to the final destination buffer.
// For in-place conversion, if destination y is same as source sample,
// also enable temporary buffer.
if (need_buf) {
int y_size = crop_width * abs_crop_height;
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
rotate_buffer = (uint8*)malloc(y_size + uv_size * 2);
if (!rotate_buffer) {
return 1; // Out of memory runtime error.
}
y = rotate_buffer;
u = y + y_size;
v = u + uv_size;
y_stride = crop_width;
u_stride = v_stride = ((crop_width + 1) / 2);
}
switch (format) {
// Single plane formats
case FOURCC_YUY2:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = YUY2ToI420(src, aligned_src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_UYVY:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = UYVYToI420(src, aligned_src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBP:
src = sample + (src_width * crop_y + crop_x) * 2;
r = RGB565ToI420(src, src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBO:
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB1555ToI420(src, src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_R444:
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB4444ToI420(src, src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_24BG:
src = sample + (src_width * crop_y + crop_x) * 3;
r = RGB24ToI420(src, src_width * 3,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RAW:
src = sample + (src_width * crop_y + crop_x) * 3;
r = RAWToI420(src, src_width * 3,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_ARGB:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ARGBToI420(src, src_width * 4,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_BGRA:
src = sample + (src_width * crop_y + crop_x) * 4;
r = BGRAToI420(src, src_width * 4,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_ABGR:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ABGRToI420(src, src_width * 4,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBA:
src = sample + (src_width * crop_y + crop_x) * 4;
r = RGBAToI420(src, src_width * 4,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_I400:
src = sample + src_width * crop_y + crop_x;
r = I400ToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
// Biplanar formats
case FOURCC_NV12:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + (src_width * src_height) +
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
r = NV12ToI420Rotate(src, src_width,
src_uv, aligned_src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height, rotation);
break;
case FOURCC_NV21:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + (src_width * src_height) +
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
// Call NV12 but with u and v parameters swapped.
r = NV12ToI420Rotate(src, src_width,
src_uv, aligned_src_width,
y, y_stride,
v, v_stride,
u, u_stride,
crop_width, inv_crop_height, rotation);
break;
case FOURCC_M420:
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
r = M420ToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
// Triplanar formats
case FOURCC_I420:
case FOURCC_YU12:
case FOURCC_YV12: {
const uint8* src_y = sample + (src_width * crop_y + crop_x);
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
if (format == FOURCC_YV12) {
src_v = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
} else {
src_u = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
}
r = I420Rotate(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height, rotation);
break;
}
case FOURCC_I422:
case FOURCC_YV16: {
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
if (format == FOURCC_YV16) {
src_v = sample + src_width * abs_src_height +
halfwidth * crop_y + crop_x / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
} else {
src_u = sample + src_width * abs_src_height +
halfwidth * crop_y + crop_x / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
}
r = I422ToI420(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I444:
case FOURCC_YV24: {
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u;
const uint8* src_v;
if (format == FOURCC_YV24) {
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
} else {
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
}
r = I444ToI420(src_y, src_width,
src_u, src_width,
src_v, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I411: {
int quarterwidth = (src_width + 3) / 4;
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u = sample + src_width * abs_src_height +
quarterwidth * crop_y + crop_x / 4;
const uint8* src_v = sample + src_width * abs_src_height +
quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
r = I411ToI420(src_y, src_width,
src_u, quarterwidth,
src_v, quarterwidth,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
}
#ifdef HAVE_JPEG
case FOURCC_MJPG:
r = MJPGToI420(sample, sample_size,
y, y_stride,
u, u_stride,
v, v_stride,
src_width, abs_src_height, crop_width, inv_crop_height);
break;
#endif
default:
r = -1; // unknown fourcc - return failure code.
}
if (need_buf) {
if (!r) {
r = I420Rotate(y, y_stride,
u, u_stride,
v, v_stride,
tmp_y, tmp_y_stride,
tmp_u, tmp_u_stride,
tmp_v, tmp_v_stride,
crop_width, abs_crop_height, rotation);
}
free(rotate_buffer);
}
return r;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,299 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/cpu_id.h"
#if defined(_MSC_VER) && !defined(__clang__)
#include <intrin.h> // For __cpuidex()
#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
!defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
defined(_MSC_VER) && !defined(__clang__) && (_MSC_FULL_VER >= 160040219)
#include <immintrin.h> // For _xgetbv()
#endif
#if !defined(__native_client__)
#include <stdlib.h> // For getenv()
#endif
// For ArmCpuCaps() but unittested on all platforms
#include <stdio.h>
#include <string.h>
#include "libyuv/basic_types.h" // For CPU_X86
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// For functions that use the stack and have runtime checks for overflow,
// use SAFEBUFFERS to avoid additional check.
#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219)
#define SAFEBUFFERS __declspec(safebuffers)
#else
#define SAFEBUFFERS
#endif
// Low level cpuid for X86.
#if (defined(_M_IX86) || defined(_M_X64) || \
defined(__i386__) || defined(__x86_64__)) && \
!defined(__pnacl__) && !defined(__CLR_VER)
LIBYUV_API
void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
#if defined(_MSC_VER) && !defined(__clang__)
// Visual C version uses intrinsic or inline x86 assembly.
#if (_MSC_FULL_VER >= 160040219)
__cpuidex((int*)(cpu_info), info_eax, info_ecx);
#elif defined(_M_IX86)
__asm {
mov eax, info_eax
mov ecx, info_ecx
mov edi, cpu_info
cpuid
mov [edi], eax
mov [edi + 4], ebx
mov [edi + 8], ecx
mov [edi + 12], edx
}
#else // Visual C but not x86
if (info_ecx == 0) {
__cpuid((int*)(cpu_info), info_eax);
} else {
cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0;
}
#endif
// GCC version uses inline x86 assembly.
#else // defined(_MSC_VER) && !defined(__clang__)
uint32 info_ebx, info_edx;
asm volatile (
#if defined( __i386__) && defined(__PIC__)
// Preserve ebx for fpic 32 bit.
"mov %%ebx, %%edi \n"
"cpuid \n"
"xchg %%edi, %%ebx \n"
: "=D" (info_ebx),
#else
"cpuid \n"
: "=b" (info_ebx),
#endif // defined( __i386__) && defined(__PIC__)
"+a" (info_eax), "+c" (info_ecx), "=d" (info_edx));
cpu_info[0] = info_eax;
cpu_info[1] = info_ebx;
cpu_info[2] = info_ecx;
cpu_info[3] = info_edx;
#endif // defined(_MSC_VER) && !defined(__clang__)
}
#else // (defined(_M_IX86) || defined(_M_X64) ...
LIBYUV_API
void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
}
#endif
// For VS2010 and earlier emit can be used:
// _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
// __asm {
// xor ecx, ecx // xcr 0
// xgetbv
// mov xcr0, eax
// }
// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
// https://code.google.com/p/libyuv/issues/detail?id=529
#if defined(_M_IX86) && (_MSC_VER < 1900)
#pragma optimize("g", off)
#endif
#if (defined(_M_IX86) || defined(_M_X64) || \
defined(__i386__) || defined(__x86_64__)) && \
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
#define HAS_XGETBV
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
int GetXCR0() {
uint32 xcr0 = 0u;
#if (_MSC_FULL_VER >= 160040219)
xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
#elif defined(__i386__) || defined(__x86_64__)
asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
#endif // defined(__i386__) || defined(__x86_64__)
return xcr0;
}
#endif // defined(_M_IX86) || defined(_M_X64) ..
// Return optimization to previous setting.
#if defined(_M_IX86) && (_MSC_VER < 1900)
#pragma optimize("g", on)
#endif
// based on libvpx arm_cpudetect.c
// For Arm, but public to allow testing on any CPU
LIBYUV_API SAFEBUFFERS
int ArmCpuCaps(const char* cpuinfo_name) {
char cpuinfo_line[512];
FILE* f = fopen(cpuinfo_name, "r");
if (!f) {
// Assume Neon if /proc/cpuinfo is unavailable.
// This will occur for Chrome sandbox for Pepper or Render process.
return kCpuHasNEON;
}
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
if (memcmp(cpuinfo_line, "Features", 8) == 0) {
char* p = strstr(cpuinfo_line, " neon");
if (p && (p[5] == ' ' || p[5] == '\n')) {
fclose(f);
return kCpuHasNEON;
}
// aarch64 uses asimd for Neon.
p = strstr(cpuinfo_line, " asimd");
if (p && (p[6] == ' ' || p[6] == '\n')) {
fclose(f);
return kCpuHasNEON;
}
}
}
fclose(f);
return 0;
}
// CPU detect function for SIMD instruction sets.
LIBYUV_API
int cpu_info_ = 0; // cpu_info is not initialized yet.
// Test environment variable for disabling CPU features. Any non-zero value
// to disable. Zero ignored to make it easy to set the variable on/off.
#if !defined(__native_client__) && !defined(_M_ARM)
static LIBYUV_BOOL TestEnv(const char* name) {
const char* var = getenv(name);
if (var) {
if (var[0] != '0') {
return LIBYUV_TRUE;
}
}
return LIBYUV_FALSE;
}
#else // nacl does not support getenv().
static LIBYUV_BOOL TestEnv(const char*) {
return LIBYUV_FALSE;
}
#endif
LIBYUV_API SAFEBUFFERS
int InitCpuFlags(void) {
// TODO(fbarchard): swap kCpuInit logic so 0 means uninitialized.
int cpu_info = 0;
#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
uint32 cpu_info0[4] = { 0, 0, 0, 0 };
uint32 cpu_info1[4] = { 0, 0, 0, 0 };
uint32 cpu_info7[4] = { 0, 0, 0, 0 };
CpuId(0, 0, cpu_info0);
CpuId(1, 0, cpu_info1);
if (cpu_info0[0] >= 7) {
CpuId(7, 0, cpu_info7);
}
cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
kCpuHasX86;
#ifdef HAS_XGETBV
// AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv
if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX;
// Detect AVX512bw
if ((GetXCR0() & 0xe0) == 0xe0) {
cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0;
}
}
#endif
// Environment variable overrides for testing.
if (TestEnv("LIBYUV_DISABLE_X86")) {
cpu_info &= ~kCpuHasX86;
}
if (TestEnv("LIBYUV_DISABLE_SSE2")) {
cpu_info &= ~kCpuHasSSE2;
}
if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
cpu_info &= ~kCpuHasSSSE3;
}
if (TestEnv("LIBYUV_DISABLE_SSE41")) {
cpu_info &= ~kCpuHasSSE41;
}
if (TestEnv("LIBYUV_DISABLE_SSE42")) {
cpu_info &= ~kCpuHasSSE42;
}
if (TestEnv("LIBYUV_DISABLE_AVX")) {
cpu_info &= ~kCpuHasAVX;
}
if (TestEnv("LIBYUV_DISABLE_AVX2")) {
cpu_info &= ~kCpuHasAVX2;
}
if (TestEnv("LIBYUV_DISABLE_ERMS")) {
cpu_info &= ~kCpuHasERMS;
}
if (TestEnv("LIBYUV_DISABLE_FMA3")) {
cpu_info &= ~kCpuHasFMA3;
}
if (TestEnv("LIBYUV_DISABLE_AVX3")) {
cpu_info &= ~kCpuHasAVX3;
}
#endif
#if defined(__mips__) && defined(__linux__)
#if defined(__mips_dspr2)
cpu_info |= kCpuHasDSPR2;
#endif
cpu_info |= kCpuHasMIPS;
if (getenv("LIBYUV_DISABLE_DSPR2")) {
cpu_info &= ~kCpuHasDSPR2;
}
#endif
#if defined(__arm__) || defined(__aarch64__)
// gcc -mfpu=neon defines __ARM_NEON__
// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
cpu_info = kCpuHasNEON;
// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
// flag in it.
// So for aarch64, neon enabling is hard coded here.
#endif
#if defined(__aarch64__)
cpu_info = kCpuHasNEON;
#else
// Linux arm parse text file for neon detect.
cpu_info = ArmCpuCaps("/proc/cpuinfo");
#endif
cpu_info |= kCpuHasARM;
if (TestEnv("LIBYUV_DISABLE_NEON")) {
cpu_info &= ~kCpuHasNEON;
}
#endif // __arm__
if (TestEnv("LIBYUV_DISABLE_ASM")) {
cpu_info = 0;
}
cpu_info |= kCpuInitialized;
cpu_info_ = cpu_info;
return cpu_info;
}
// Note that use of this function is not thread safe.
LIBYUV_API
void MaskCpuFlags(int enable_flags) {
cpu_info_ = InitCpuFlags() & enable_flags;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,570 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/mjpeg_decoder.h"
#ifdef HAVE_JPEG
#include <assert.h>
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
// Must be included before jpeglib.
#include <setjmp.h>
#define HAVE_SETJMP
#if defined(_MSC_VER)
// disable warning 4324: structure was padded due to __declspec(align())
#pragma warning(disable:4324)
#endif
#endif
struct FILE; // For jpeglib.h.
// C++ build requires extern C for jpeg internals.
#ifdef __cplusplus
extern "C" {
#endif
#include <jpeglib.h>
#ifdef __cplusplus
} // extern "C"
#endif
#include "libyuv/planar_functions.h" // For CopyPlane().
namespace libyuv {
#ifdef HAVE_SETJMP
struct SetJmpErrorMgr {
jpeg_error_mgr base; // Must be at the top
jmp_buf setjmp_buffer;
};
#endif
const int MJpegDecoder::kColorSpaceUnknown = JCS_UNKNOWN;
const int MJpegDecoder::kColorSpaceGrayscale = JCS_GRAYSCALE;
const int MJpegDecoder::kColorSpaceRgb = JCS_RGB;
const int MJpegDecoder::kColorSpaceYCbCr = JCS_YCbCr;
const int MJpegDecoder::kColorSpaceCMYK = JCS_CMYK;
const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
// Methods that are passed to jpeglib.
boolean fill_input_buffer(jpeg_decompress_struct* cinfo);
void init_source(jpeg_decompress_struct* cinfo);
void skip_input_data(jpeg_decompress_struct* cinfo, long num_bytes); // NOLINT
void term_source(jpeg_decompress_struct* cinfo);
void ErrorHandler(jpeg_common_struct* cinfo);
MJpegDecoder::MJpegDecoder()
: has_scanline_padding_(LIBYUV_FALSE),
num_outbufs_(0),
scanlines_(NULL),
scanlines_sizes_(NULL),
databuf_(NULL),
databuf_strides_(NULL) {
decompress_struct_ = new jpeg_decompress_struct;
source_mgr_ = new jpeg_source_mgr;
#ifdef HAVE_SETJMP
error_mgr_ = new SetJmpErrorMgr;
decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
// Override standard exit()-based error handler.
error_mgr_->base.error_exit = &ErrorHandler;
#endif
decompress_struct_->client_data = NULL;
source_mgr_->init_source = &init_source;
source_mgr_->fill_input_buffer = &fill_input_buffer;
source_mgr_->skip_input_data = &skip_input_data;
source_mgr_->resync_to_restart = &jpeg_resync_to_restart;
source_mgr_->term_source = &term_source;
jpeg_create_decompress(decompress_struct_);
decompress_struct_->src = source_mgr_;
buf_vec_.buffers = &buf_;
buf_vec_.len = 1;
}
MJpegDecoder::~MJpegDecoder() {
jpeg_destroy_decompress(decompress_struct_);
delete decompress_struct_;
delete source_mgr_;
#ifdef HAVE_SETJMP
delete error_mgr_;
#endif
DestroyOutputBuffers();
}
LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
if (!ValidateJpeg(src, src_len)) {
return LIBYUV_FALSE;
}
buf_.data = src;
buf_.len = static_cast<int>(src_len);
buf_vec_.pos = 0;
decompress_struct_->client_data = &buf_vec_;
#ifdef HAVE_SETJMP
if (setjmp(error_mgr_->setjmp_buffer)) {
// We called jpeg_read_header, it experienced an error, and we called
// longjmp() and rewound the stack to here. Return error.
return LIBYUV_FALSE;
}
#endif
if (jpeg_read_header(decompress_struct_, TRUE) != JPEG_HEADER_OK) {
// ERROR: Bad MJPEG header
return LIBYUV_FALSE;
}
AllocOutputBuffers(GetNumComponents());
for (int i = 0; i < num_outbufs_; ++i) {
int scanlines_size = GetComponentScanlinesPerImcuRow(i);
if (scanlines_sizes_[i] != scanlines_size) {
if (scanlines_[i]) {
delete scanlines_[i];
}
scanlines_[i] = new uint8* [scanlines_size];
scanlines_sizes_[i] = scanlines_size;
}
// We allocate padding for the final scanline to pad it up to DCTSIZE bytes
// to avoid memory errors, since jpeglib only reads full MCUs blocks. For
// the preceding scanlines, the padding is not needed/wanted because the
// following addresses will already be valid (they are the initial bytes of
// the next scanline) and will be overwritten when jpeglib writes out that
// next scanline.
int databuf_stride = GetComponentStride(i);
int databuf_size = scanlines_size * databuf_stride;
if (databuf_strides_[i] != databuf_stride) {
if (databuf_[i]) {
delete databuf_[i];
}
databuf_[i] = new uint8[databuf_size];
databuf_strides_[i] = databuf_stride;
}
if (GetComponentStride(i) != GetComponentWidth(i)) {
has_scanline_padding_ = LIBYUV_TRUE;
}
}
return LIBYUV_TRUE;
}
static int DivideAndRoundUp(int numerator, int denominator) {
return (numerator + denominator - 1) / denominator;
}
static int DivideAndRoundDown(int numerator, int denominator) {
return numerator / denominator;
}
// Returns width of the last loaded frame.
int MJpegDecoder::GetWidth() {
return decompress_struct_->image_width;
}
// Returns height of the last loaded frame.
int MJpegDecoder::GetHeight() {
return decompress_struct_->image_height;
}
// Returns format of the last loaded frame. The return value is one of the
// kColorSpace* constants.
int MJpegDecoder::GetColorSpace() {
return decompress_struct_->jpeg_color_space;
}
// Number of color components in the color space.
int MJpegDecoder::GetNumComponents() {
return decompress_struct_->num_components;
}
// Sample factors of the n-th component.
int MJpegDecoder::GetHorizSampFactor(int component) {
return decompress_struct_->comp_info[component].h_samp_factor;
}
int MJpegDecoder::GetVertSampFactor(int component) {
return decompress_struct_->comp_info[component].v_samp_factor;
}
int MJpegDecoder::GetHorizSubSampFactor(int component) {
return decompress_struct_->max_h_samp_factor /
GetHorizSampFactor(component);
}
int MJpegDecoder::GetVertSubSampFactor(int component) {
return decompress_struct_->max_v_samp_factor /
GetVertSampFactor(component);
}
int MJpegDecoder::GetImageScanlinesPerImcuRow() {
return decompress_struct_->max_v_samp_factor * DCTSIZE;
}
int MJpegDecoder::GetComponentScanlinesPerImcuRow(int component) {
int vs = GetVertSubSampFactor(component);
return DivideAndRoundUp(GetImageScanlinesPerImcuRow(), vs);
}
int MJpegDecoder::GetComponentWidth(int component) {
int hs = GetHorizSubSampFactor(component);
return DivideAndRoundUp(GetWidth(), hs);
}
int MJpegDecoder::GetComponentHeight(int component) {
int vs = GetVertSubSampFactor(component);
return DivideAndRoundUp(GetHeight(), vs);
}
// Get width in bytes padded out to a multiple of DCTSIZE
int MJpegDecoder::GetComponentStride(int component) {
return (GetComponentWidth(component) + DCTSIZE - 1) & ~(DCTSIZE - 1);
}
int MJpegDecoder::GetComponentSize(int component) {
return GetComponentWidth(component) * GetComponentHeight(component);
}
LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
#ifdef HAVE_SETJMP
if (setjmp(error_mgr_->setjmp_buffer)) {
// We called jpeg_abort_decompress, it experienced an error, and we called
// longjmp() and rewound the stack to here. Return error.
return LIBYUV_FALSE;
}
#endif
jpeg_abort_decompress(decompress_struct_);
return LIBYUV_TRUE;
}
// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
uint8** planes, int dst_width, int dst_height) {
if (dst_width != GetWidth() ||
dst_height > GetHeight()) {
// ERROR: Bad dimensions
return LIBYUV_FALSE;
}
#ifdef HAVE_SETJMP
if (setjmp(error_mgr_->setjmp_buffer)) {
// We called into jpeglib, it experienced an error sometime during this
// function call, and we called longjmp() and rewound the stack to here.
// Return error.
return LIBYUV_FALSE;
}
#endif
if (!StartDecode()) {
return LIBYUV_FALSE;
}
SetScanlinePointers(databuf_);
int lines_left = dst_height;
// Compute amount of lines to skip to implement vertical crop.
// TODO(fbarchard): Ensure skip is a multiple of maximum component
// subsample. ie 2
int skip = (GetHeight() - dst_height) / 2;
if (skip > 0) {
// There is no API to skip lines in the output data, so we read them
// into the temp buffer.
while (skip >= GetImageScanlinesPerImcuRow()) {
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
skip -= GetImageScanlinesPerImcuRow();
}
if (skip > 0) {
// Have a partial iMCU row left over to skip. Must read it and then
// copy the parts we want into the destination.
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
for (int i = 0; i < num_outbufs_; ++i) {
// TODO(fbarchard): Compute skip to avoid this
assert(skip % GetVertSubSampFactor(i) == 0);
int rows_to_skip =
DivideAndRoundDown(skip, GetVertSubSampFactor(i));
int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) -
rows_to_skip;
int data_to_skip = rows_to_skip * GetComponentStride(i);
CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i),
planes[i], GetComponentWidth(i),
GetComponentWidth(i), scanlines_to_copy);
planes[i] += scanlines_to_copy * GetComponentWidth(i);
}
lines_left -= (GetImageScanlinesPerImcuRow() - skip);
}
}
// Read full MCUs but cropped horizontally
for (; lines_left > GetImageScanlinesPerImcuRow();
lines_left -= GetImageScanlinesPerImcuRow()) {
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
for (int i = 0; i < num_outbufs_; ++i) {
int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i);
CopyPlane(databuf_[i], GetComponentStride(i),
planes[i], GetComponentWidth(i),
GetComponentWidth(i), scanlines_to_copy);
planes[i] += scanlines_to_copy * GetComponentWidth(i);
}
}
if (lines_left > 0) {
// Have a partial iMCU row left over to decode.
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
for (int i = 0; i < num_outbufs_; ++i) {
int scanlines_to_copy =
DivideAndRoundUp(lines_left, GetVertSubSampFactor(i));
CopyPlane(databuf_[i], GetComponentStride(i),
planes[i], GetComponentWidth(i),
GetComponentWidth(i), scanlines_to_copy);
planes[i] += scanlines_to_copy * GetComponentWidth(i);
}
}
return FinishDecode();
}
LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque,
int dst_width, int dst_height) {
if (dst_width != GetWidth() ||
dst_height > GetHeight()) {
// ERROR: Bad dimensions
return LIBYUV_FALSE;
}
#ifdef HAVE_SETJMP
if (setjmp(error_mgr_->setjmp_buffer)) {
// We called into jpeglib, it experienced an error sometime during this
// function call, and we called longjmp() and rewound the stack to here.
// Return error.
return LIBYUV_FALSE;
}
#endif
if (!StartDecode()) {
return LIBYUV_FALSE;
}
SetScanlinePointers(databuf_);
int lines_left = dst_height;
// TODO(fbarchard): Compute amount of lines to skip to implement vertical crop
int skip = (GetHeight() - dst_height) / 2;
if (skip > 0) {
while (skip >= GetImageScanlinesPerImcuRow()) {
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
skip -= GetImageScanlinesPerImcuRow();
}
if (skip > 0) {
// Have a partial iMCU row left over to skip.
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
for (int i = 0; i < num_outbufs_; ++i) {
// TODO(fbarchard): Compute skip to avoid this
assert(skip % GetVertSubSampFactor(i) == 0);
int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
int data_to_skip = rows_to_skip * GetComponentStride(i);
// Change our own data buffer pointers so we can pass them to the
// callback.
databuf_[i] += data_to_skip;
}
int scanlines_to_copy = GetImageScanlinesPerImcuRow() - skip;
(*fn)(opaque, databuf_, databuf_strides_, scanlines_to_copy);
// Now change them back.
for (int i = 0; i < num_outbufs_; ++i) {
int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
int data_to_skip = rows_to_skip * GetComponentStride(i);
databuf_[i] -= data_to_skip;
}
lines_left -= scanlines_to_copy;
}
}
// Read full MCUs until we get to the crop point.
for (; lines_left >= GetImageScanlinesPerImcuRow();
lines_left -= GetImageScanlinesPerImcuRow()) {
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
(*fn)(opaque, databuf_, databuf_strides_, GetImageScanlinesPerImcuRow());
}
if (lines_left > 0) {
// Have a partial iMCU row left over to decode.
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
(*fn)(opaque, databuf_, databuf_strides_, lines_left);
}
return FinishDecode();
}
void init_source(j_decompress_ptr cinfo) {
fill_input_buffer(cinfo);
}
boolean fill_input_buffer(j_decompress_ptr cinfo) {
BufferVector* buf_vec = reinterpret_cast<BufferVector*>(cinfo->client_data);
if (buf_vec->pos >= buf_vec->len) {
assert(0 && "No more data");
// ERROR: No more data
return FALSE;
}
cinfo->src->next_input_byte = buf_vec->buffers[buf_vec->pos].data;
cinfo->src->bytes_in_buffer = buf_vec->buffers[buf_vec->pos].len;
++buf_vec->pos;
return TRUE;
}
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
cinfo->src->next_input_byte += num_bytes;
}
void term_source(j_decompress_ptr cinfo) {
// Nothing to do.
}
#ifdef HAVE_SETJMP
void ErrorHandler(j_common_ptr cinfo) {
// This is called when a jpeglib command experiences an error. Unfortunately
// jpeglib's error handling model is not very flexible, because it expects the
// error handler to not return--i.e., it wants the program to terminate. To
// recover from errors we use setjmp() as shown in their example. setjmp() is
// C's implementation for the "call with current continuation" functionality
// seen in some functional programming languages.
// A formatted message can be output, but is unsafe for release.
#ifdef DEBUG
char buf[JMSG_LENGTH_MAX];
(*cinfo->err->format_message)(cinfo, buf);
// ERROR: Error in jpeglib: buf
#endif
SetJmpErrorMgr* mgr = reinterpret_cast<SetJmpErrorMgr*>(cinfo->err);
// This rewinds the call stack to the point of the corresponding setjmp()
// and causes it to return (for a second time) with value 1.
longjmp(mgr->setjmp_buffer, 1);
}
#endif
void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
if (num_outbufs != num_outbufs_) {
// We could perhaps optimize this case to resize the output buffers without
// necessarily having to delete and recreate each one, but it's not worth
// it.
DestroyOutputBuffers();
scanlines_ = new uint8** [num_outbufs];
scanlines_sizes_ = new int[num_outbufs];
databuf_ = new uint8* [num_outbufs];
databuf_strides_ = new int[num_outbufs];
for (int i = 0; i < num_outbufs; ++i) {
scanlines_[i] = NULL;
scanlines_sizes_[i] = 0;
databuf_[i] = NULL;
databuf_strides_[i] = 0;
}
num_outbufs_ = num_outbufs;
}
}
void MJpegDecoder::DestroyOutputBuffers() {
for (int i = 0; i < num_outbufs_; ++i) {
delete [] scanlines_[i];
delete [] databuf_[i];
}
delete [] scanlines_;
delete [] databuf_;
delete [] scanlines_sizes_;
delete [] databuf_strides_;
scanlines_ = NULL;
databuf_ = NULL;
scanlines_sizes_ = NULL;
databuf_strides_ = NULL;
num_outbufs_ = 0;
}
// JDCT_IFAST and do_block_smoothing improve performance substantially.
LIBYUV_BOOL MJpegDecoder::StartDecode() {
decompress_struct_->raw_data_out = TRUE;
decompress_struct_->dct_method = JDCT_IFAST; // JDCT_ISLOW is default
decompress_struct_->dither_mode = JDITHER_NONE;
// Not applicable to 'raw':
decompress_struct_->do_fancy_upsampling = (boolean)(LIBYUV_FALSE);
// Only for buffered mode:
decompress_struct_->enable_2pass_quant = (boolean)(LIBYUV_FALSE);
// Blocky but fast:
decompress_struct_->do_block_smoothing = (boolean)(LIBYUV_FALSE);
if (!jpeg_start_decompress(decompress_struct_)) {
// ERROR: Couldn't start JPEG decompressor";
return LIBYUV_FALSE;
}
return LIBYUV_TRUE;
}
LIBYUV_BOOL MJpegDecoder::FinishDecode() {
// jpeglib considers it an error if we finish without decoding the whole
// image, so we call "abort" rather than "finish".
jpeg_abort_decompress(decompress_struct_);
return LIBYUV_TRUE;
}
void MJpegDecoder::SetScanlinePointers(uint8** data) {
for (int i = 0; i < num_outbufs_; ++i) {
uint8* data_i = data[i];
for (int j = 0; j < scanlines_sizes_[i]; ++j) {
scanlines_[i][j] = data_i;
data_i += GetComponentStride(i);
}
}
}
inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() {
return (unsigned int)(GetImageScanlinesPerImcuRow()) ==
jpeg_read_raw_data(decompress_struct_,
scanlines_,
GetImageScanlinesPerImcuRow());
}
// The helper function which recognizes the jpeg sub-sampling type.
JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
int* subsample_x, int* subsample_y, int number_of_components) {
if (number_of_components == 3) { // Color images.
if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
subsample_x[1] == 2 && subsample_y[1] == 2 &&
subsample_x[2] == 2 && subsample_y[2] == 2) {
return kJpegYuv420;
} else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
subsample_x[1] == 2 && subsample_y[1] == 1 &&
subsample_x[2] == 2 && subsample_y[2] == 1) {
return kJpegYuv422;
} else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
subsample_x[1] == 1 && subsample_y[1] == 1 &&
subsample_x[2] == 1 && subsample_y[2] == 1) {
return kJpegYuv444;
}
} else if (number_of_components == 1) { // Grey-scale images.
if (subsample_x[0] == 1 && subsample_y[0] == 1) {
return kJpegYuv400;
}
}
return kJpegUnknown;
}
} // namespace libyuv
#endif // HAVE_JPEG

View File

@ -0,0 +1,71 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/mjpeg_decoder.h"
#include <string.h> // For memchr.
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Helper function to scan for EOI marker (0xff 0xd9).
static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
if (sample_size >= 2) {
const uint8* end = sample + sample_size - 1;
const uint8* it = sample;
while (it < end) {
// TODO(fbarchard): scan for 0xd9 instead.
it = static_cast<const uint8 *>(memchr(it, 0xff, end - it));
if (it == NULL) {
break;
}
if (it[1] == 0xd9) {
return LIBYUV_TRUE; // Success: Valid jpeg.
}
++it; // Skip over current 0xff.
}
}
// ERROR: Invalid jpeg end code not found. Size sample_size
return LIBYUV_FALSE;
}
// Helper function to validate the jpeg appears intact.
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
// Maximum size that ValidateJpeg will consider valid.
const size_t kMaxJpegSize = 0x7fffffffull;
const size_t kBackSearchSize = 1024;
if (sample_size < 64 || sample_size > kMaxJpegSize || !sample) {
// ERROR: Invalid jpeg size: sample_size
return LIBYUV_FALSE;
}
if (sample[0] != 0xff || sample[1] != 0xd8) { // SOI marker
// ERROR: Invalid jpeg initial start code
return LIBYUV_FALSE;
}
// Look for the End Of Image (EOI) marker near the end of the buffer.
if (sample_size > kBackSearchSize) {
if (ScanEOI(sample + sample_size - kBackSearchSize, kBackSearchSize)) {
return LIBYUV_TRUE; // Success: Valid jpeg.
}
// Reduce search size for forward search.
sample_size = sample_size - kBackSearchSize + 1;
}
// Step over SOI marker and scan for EOI.
return ScanEOI(sample + 2, sample_size - 2);
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,491 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/rotate.h"
#include "libyuv/cpu_id.h"
#include "libyuv/convert.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate_row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
LIBYUV_API
void TransposePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
int i = height;
void (*TransposeWx8)(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width) = TransposeWx8_C;
#if defined(HAS_TRANSPOSEWX8_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
TransposeWx8 = TransposeWx8_NEON;
}
#endif
#if defined(HAS_TRANSPOSEWX8_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
TransposeWx8 = TransposeWx8_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
TransposeWx8 = TransposeWx8_SSSE3;
}
}
#endif
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
TransposeWx8 = TransposeWx8_Fast_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
TransposeWx8 = TransposeWx8_Fast_SSSE3;
}
}
#endif
#if defined(HAS_TRANSPOSEWX8_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
if (IS_ALIGNED(width, 4) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
TransposeWx8 = TransposeWx8_Fast_DSPR2;
} else {
TransposeWx8 = TransposeWx8_DSPR2;
}
}
#endif
// Work across the source in 8x8 tiles
while (i >= 8) {
TransposeWx8(src, src_stride, dst, dst_stride, width);
src += 8 * src_stride; // Go down 8 rows.
dst += 8; // Move over 8 columns.
i -= 8;
}
if (i > 0) {
TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
}
}
LIBYUV_API
void RotatePlane90(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
// Rotate by 90 is a transpose with the source read
// from bottom to top. So set the source pointer to the end
// of the buffer and flip the sign of the source stride.
src += src_stride * (height - 1);
src_stride = -src_stride;
TransposePlane(src, src_stride, dst, dst_stride, width, height);
}
LIBYUV_API
void RotatePlane270(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
// Rotate by 270 is a transpose with the destination written
// from bottom to top. So set the destination pointer to the end
// of the buffer and flip the sign of the destination stride.
dst += dst_stride * (width - 1);
dst_stride = -dst_stride;
TransposePlane(src, src_stride, dst, dst_stride, width, height);
}
LIBYUV_API
void RotatePlane180(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
// Swap first and last row and mirror the content. Uses a temporary row.
align_buffer_64(row, width);
const uint8* src_bot = src + src_stride * (height - 1);
uint8* dst_bot = dst + dst_stride * (height - 1);
int half_height = (height + 1) >> 1;
int y;
void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MirrorRow = MirrorRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_NEON;
}
}
#endif
#if defined(HAS_MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
MirrorRow = MirrorRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_SSSE3;
}
}
#endif
#if defined(HAS_MIRRORROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MirrorRow = MirrorRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
MirrorRow = MirrorRow_AVX2;
}
}
#endif
// TODO(fbarchard): Mirror on mips handle unaligned memory.
#if defined(HAS_MIRRORROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) {
MirrorRow = MirrorRow_DSPR2;
}
#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
}
#endif
#if defined(HAS_COPYROW_AVX)
if (TestCpuFlag(kCpuHasAVX)) {
CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
}
#endif
#if defined(HAS_COPYROW_ERMS)
if (TestCpuFlag(kCpuHasERMS)) {
CopyRow = CopyRow_ERMS;
}
#endif
#if defined(HAS_COPYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
#if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS;
}
#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
MirrorRow(src, row, width); // Mirror first row into a buffer
src += src_stride;
MirrorRow(src_bot, dst, width); // Mirror last row into first row
dst += dst_stride;
CopyRow(row, dst_bot, width); // Copy first mirrored row into last
src_bot -= src_stride;
dst_bot -= dst_stride;
}
free_aligned_buffer_64(row);
}
LIBYUV_API
void TransposeUV(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
int i = height;
void (*TransposeUVWx8)(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) = TransposeUVWx8_C;
#if defined(HAS_TRANSPOSEUVWX8_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
TransposeUVWx8 = TransposeUVWx8_NEON;
}
#endif
#if defined(HAS_TRANSPOSEUVWX8_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
TransposeUVWx8 = TransposeUVWx8_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
TransposeUVWx8 = TransposeUVWx8_SSE2;
}
}
#endif
#if defined(HAS_TRANSPOSEUVWX8_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
TransposeUVWx8 = TransposeUVWx8_DSPR2;
}
#endif
// Work through the source in 8x8 tiles.
while (i >= 8) {
TransposeUVWx8(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width);
src += 8 * src_stride; // Go down 8 rows.
dst_a += 8; // Move over 8 columns.
dst_b += 8; // Move over 8 columns.
i -= 8;
}
if (i > 0) {
TransposeUVWxH_C(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width, i);
}
}
LIBYUV_API
void RotateUV90(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
src += src_stride * (height - 1);
src_stride = -src_stride;
TransposeUV(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width, height);
}
LIBYUV_API
void RotateUV270(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
dst_a += dst_stride_a * (width - 1);
dst_b += dst_stride_b * (width - 1);
dst_stride_a = -dst_stride_a;
dst_stride_b = -dst_stride_b;
TransposeUV(src, src_stride,
dst_a, dst_stride_a,
dst_b, dst_stride_b,
width, height);
}
// Rotate 180 is a horizontal and vertical flip.
LIBYUV_API
void RotateUV180(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
int i;
void (*MirrorUVRow)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
MirrorUVRow_C;
#if defined(HAS_MIRRORUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
MirrorUVRow = MirrorUVRow_NEON;
}
#endif
#if defined(HAS_MIRRORUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
MirrorUVRow = MirrorUVRow_SSSE3;
}
#endif
#if defined(HAS_MIRRORUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
MirrorUVRow = MirrorUVRow_DSPR2;
}
#endif
dst_a += dst_stride_a * (height - 1);
dst_b += dst_stride_b * (height - 1);
for (i = 0; i < height; ++i) {
MirrorUVRow(src, dst_a, dst_b, width);
src += src_stride;
dst_a -= dst_stride_a;
dst_b -= dst_stride_b;
}
}
LIBYUV_API
int RotatePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height,
enum RotationMode mode) {
if (!src || width <= 0 || height == 0 || !dst) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src = src + (height - 1) * src_stride;
src_stride = -src_stride;
}
switch (mode) {
case kRotate0:
// copy frame
CopyPlane(src, src_stride,
dst, dst_stride,
width, height);
return 0;
case kRotate90:
RotatePlane90(src, src_stride,
dst, dst_stride,
width, height);
return 0;
case kRotate270:
RotatePlane270(src, src_stride,
dst, dst_stride,
width, height);
return 0;
case kRotate180:
RotatePlane180(src, src_stride,
dst, dst_stride,
width, height);
return 0;
default:
break;
}
return -1;
}
LIBYUV_API
int I420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
enum RotationMode mode) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
!dst_y || !dst_u || !dst_v) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
switch (mode) {
case kRotate0:
// copy frame
return I420Copy(src_y, src_stride_y,
src_u, src_stride_u,
src_v, src_stride_v,
dst_y, dst_stride_y,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
width, height);
case kRotate90:
RotatePlane90(src_y, src_stride_y,
dst_y, dst_stride_y,
width, height);
RotatePlane90(src_u, src_stride_u,
dst_u, dst_stride_u,
halfwidth, halfheight);
RotatePlane90(src_v, src_stride_v,
dst_v, dst_stride_v,
halfwidth, halfheight);
return 0;
case kRotate270:
RotatePlane270(src_y, src_stride_y,
dst_y, dst_stride_y,
width, height);
RotatePlane270(src_u, src_stride_u,
dst_u, dst_stride_u,
halfwidth, halfheight);
RotatePlane270(src_v, src_stride_v,
dst_v, dst_stride_v,
halfwidth, halfheight);
return 0;
case kRotate180:
RotatePlane180(src_y, src_stride_y,
dst_y, dst_stride_y,
width, height);
RotatePlane180(src_u, src_stride_u,
dst_u, dst_stride_u,
halfwidth, halfheight);
RotatePlane180(src_v, src_stride_v,
dst_v, dst_stride_v,
halfwidth, halfheight);
return 0;
default:
break;
}
return -1;
}
LIBYUV_API
int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
enum RotationMode mode) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_y || !src_uv || width <= 0 || height == 0 ||
!dst_y || !dst_u || !dst_v) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_uv = src_uv + (halfheight - 1) * src_stride_uv;
src_stride_y = -src_stride_y;
src_stride_uv = -src_stride_uv;
}
switch (mode) {
case kRotate0:
// copy frame
return NV12ToI420(src_y, src_stride_y,
src_uv, src_stride_uv,
dst_y, dst_stride_y,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
width, height);
case kRotate90:
RotatePlane90(src_y, src_stride_y,
dst_y, dst_stride_y,
width, height);
RotateUV90(src_uv, src_stride_uv,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
halfwidth, halfheight);
return 0;
case kRotate270:
RotatePlane270(src_y, src_stride_y,
dst_y, dst_stride_y,
width, height);
RotateUV270(src_uv, src_stride_uv,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
halfwidth, halfheight);
return 0;
case kRotate180:
RotatePlane180(src_y, src_stride_y,
dst_y, dst_stride_y,
width, height);
RotateUV180(src_uv, src_stride_uv,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
halfwidth, halfheight);
return 0;
default:
break;
}
return -1;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,80 @@
/*
* Copyright 2015 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/rotate.h"
#include "libyuv/rotate_row.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#define TANY(NAMEANY, TPOS_SIMD, MASK) \
void NAMEANY(const uint8* src, int src_stride, \
uint8* dst, int dst_stride, int width) { \
int r = width & MASK; \
int n = width - r; \
if (n > 0) { \
TPOS_SIMD(src, src_stride, dst, dst_stride, n); \
} \
TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r);\
}
#ifdef HAS_TRANSPOSEWX8_NEON
TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7)
#endif
#ifdef HAS_TRANSPOSEWX8_SSSE3
TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
#endif
#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
#endif
#ifdef HAS_TRANSPOSEWX8_DSPR2
TANY(TransposeWx8_Any_DSPR2, TransposeWx8_DSPR2, 7)
#endif
#undef TANY
#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
void NAMEANY(const uint8* src, int src_stride, \
uint8* dst_a, int dst_stride_a, \
uint8* dst_b, int dst_stride_b, int width) { \
int r = width & MASK; \
int n = width - r; \
if (n > 0) { \
TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, \
n); \
} \
TransposeUVWx8_C(src + n * 2, src_stride, \
dst_a + n * dst_stride_a, dst_stride_a, \
dst_b + n * dst_stride_b, dst_stride_b, r); \
}
#ifdef HAS_TRANSPOSEUVWX8_NEON
TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
#endif
#ifdef HAS_TRANSPOSEUVWX8_SSE2
TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
#endif
#ifdef HAS_TRANSPOSEUVWX8_DSPR2
TUVANY(TransposeUVWx8_Any_DSPR2, TransposeUVWx8_DSPR2, 7)
#endif
#undef TUVANY
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,205 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/rotate.h"
#include "libyuv/cpu_id.h"
#include "libyuv/convert.h"
#include "libyuv/planar_functions.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// ARGBScale has a function to copy pixels to a row, striding each source
// pixel by a constant.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || \
(defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
int src_stepx, uint8* dst_ptr, int dst_width);
#endif
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEARGBROWDOWNEVEN_NEON
void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
int src_stepx, uint8* dst_ptr, int dst_width);
#endif
void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
int src_stepx, uint8* dst_ptr, int dst_width);
static void ARGBTranspose(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width, int height) {
int i;
int src_pixel_step = src_stride >> 2;
void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4)) { // Width of dest.
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
}
#endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4)) { // Width of dest.
ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
}
#endif
for (i = 0; i < width; ++i) { // column of source to row of dest.
ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height);
dst += dst_stride;
src += 4;
}
}
void ARGBRotate90(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width, int height) {
// Rotate by 90 is a ARGBTranspose with the source read
// from bottom to top. So set the source pointer to the end
// of the buffer and flip the sign of the source stride.
src += src_stride * (height - 1);
src_stride = -src_stride;
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
}
void ARGBRotate270(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width, int height) {
// Rotate by 270 is a ARGBTranspose with the destination written
// from bottom to top. So set the destination pointer to the end
// of the buffer and flip the sign of the destination stride.
dst += dst_stride * (width - 1);
dst_stride = -dst_stride;
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
}
void ARGBRotate180(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width, int height) {
// Swap first and last row and mirror the content. Uses a temporary row.
align_buffer_64(row, width * 4);
const uint8* src_bot = src + src_stride * (height - 1);
uint8* dst_bot = dst + dst_stride * (height - 1);
int half_height = (height + 1) >> 1;
int y;
void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
ARGBMirrorRow_C;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
if (IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_NEON;
}
}
#endif
#if defined(HAS_ARGBMIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_SSE2;
}
}
#endif
#if defined(HAS_ARGBMIRRORROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBMirrorRow = ARGBMirrorRow_AVX2;
}
}
#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
}
#endif
#if defined(HAS_COPYROW_AVX)
if (TestCpuFlag(kCpuHasAVX)) {
CopyRow = IS_ALIGNED(width * 4, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
}
#endif
#if defined(HAS_COPYROW_ERMS)
if (TestCpuFlag(kCpuHasERMS)) {
CopyRow = CopyRow_ERMS;
}
#endif
#if defined(HAS_COPYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
#if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS;
}
#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
ARGBMirrorRow(src, row, width); // Mirror first row into a buffer
ARGBMirrorRow(src_bot, dst, width); // Mirror last row into first row
CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last
src += src_stride;
dst += dst_stride;
src_bot -= src_stride;
dst_bot -= dst_stride;
}
free_aligned_buffer_64(row);
}
LIBYUV_API
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, int width, int height,
enum RotationMode mode) {
if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
switch (mode) {
case kRotate0:
// copy frame
return ARGBCopy(src_argb, src_stride_argb,
dst_argb, dst_stride_argb,
width, height);
case kRotate90:
ARGBRotate90(src_argb, src_stride_argb,
dst_argb, dst_stride_argb,
width, height);
return 0;
case kRotate270:
ARGBRotate270(src_argb, src_stride_argb,
dst_argb, dst_stride_argb,
width, height);
return 0;
case kRotate180:
ARGBRotate180(src_argb, src_stride_argb,
dst_argb, dst_stride_argb,
width, height);
return 0;
default:
break;
}
return -1;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,92 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include "libyuv/rotate_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
void TransposeWx8_C(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width) {
int i;
for (i = 0; i < width; ++i) {
dst[0] = src[0 * src_stride];
dst[1] = src[1 * src_stride];
dst[2] = src[2 * src_stride];
dst[3] = src[3 * src_stride];
dst[4] = src[4 * src_stride];
dst[5] = src[5 * src_stride];
dst[6] = src[6 * src_stride];
dst[7] = src[7 * src_stride];
++src;
dst += dst_stride;
}
}
void TransposeUVWx8_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width) {
int i;
for (i = 0; i < width; ++i) {
dst_a[0] = src[0 * src_stride + 0];
dst_b[0] = src[0 * src_stride + 1];
dst_a[1] = src[1 * src_stride + 0];
dst_b[1] = src[1 * src_stride + 1];
dst_a[2] = src[2 * src_stride + 0];
dst_b[2] = src[2 * src_stride + 1];
dst_a[3] = src[3 * src_stride + 0];
dst_b[3] = src[3 * src_stride + 1];
dst_a[4] = src[4 * src_stride + 0];
dst_b[4] = src[4 * src_stride + 1];
dst_a[5] = src[5 * src_stride + 0];
dst_b[5] = src[5 * src_stride + 1];
dst_a[6] = src[6 * src_stride + 0];
dst_b[6] = src[6 * src_stride + 1];
dst_a[7] = src[7 * src_stride + 0];
dst_b[7] = src[7 * src_stride + 1];
src += 2;
dst_a += dst_stride_a;
dst_b += dst_stride_b;
}
}
void TransposeWxH_C(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
int i;
for (i = 0; i < width; ++i) {
int j;
for (j = 0; j < height; ++j) {
dst[i * dst_stride + j] = src[j * src_stride + i];
}
}
}
void TransposeUVWxH_C(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height) {
int i;
for (i = 0; i < width * 2; i += 2) {
int j;
for (j = 0; j < height; ++j) {
dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
}
}
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,368 @@
/*
* Copyright 2015 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include "libyuv/rotate_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// This module is for GCC x86 and x64.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
#if defined(HAS_TRANSPOSEWX8_SSSE3)
void TransposeWx8_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width) {
asm volatile (
// Read in the data from the source pointer.
// First round of bit swap.
LABELALIGN
"1: \n"
"movq (%0),%%xmm0 \n"
"movq (%0,%3),%%xmm1 \n"
"lea (%0,%3,2),%0 \n"
"punpcklbw %%xmm1,%%xmm0 \n"
"movq (%0),%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"palignr $0x8,%%xmm1,%%xmm1 \n"
"movq (%0,%3),%%xmm3 \n"
"lea (%0,%3,2),%0 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"movdqa %%xmm2,%%xmm3 \n"
"movq (%0),%%xmm4 \n"
"palignr $0x8,%%xmm3,%%xmm3 \n"
"movq (%0,%3),%%xmm5 \n"
"lea (%0,%3,2),%0 \n"
"punpcklbw %%xmm5,%%xmm4 \n"
"movdqa %%xmm4,%%xmm5 \n"
"movq (%0),%%xmm6 \n"
"palignr $0x8,%%xmm5,%%xmm5 \n"
"movq (%0,%3),%%xmm7 \n"
"lea (%0,%3,2),%0 \n"
"punpcklbw %%xmm7,%%xmm6 \n"
"neg %3 \n"
"movdqa %%xmm6,%%xmm7 \n"
"lea 0x8(%0,%3,8),%0 \n"
"palignr $0x8,%%xmm7,%%xmm7 \n"
"neg %3 \n"
// Second round of bit swap.
"punpcklwd %%xmm2,%%xmm0 \n"
"punpcklwd %%xmm3,%%xmm1 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"palignr $0x8,%%xmm2,%%xmm2 \n"
"palignr $0x8,%%xmm3,%%xmm3 \n"
"punpcklwd %%xmm6,%%xmm4 \n"
"punpcklwd %%xmm7,%%xmm5 \n"
"movdqa %%xmm4,%%xmm6 \n"
"movdqa %%xmm5,%%xmm7 \n"
"palignr $0x8,%%xmm6,%%xmm6 \n"
"palignr $0x8,%%xmm7,%%xmm7 \n"
// Third round of bit swap.
// Write to the destination pointer.
"punpckldq %%xmm4,%%xmm0 \n"
"movq %%xmm0,(%1) \n"
"movdqa %%xmm0,%%xmm4 \n"
"palignr $0x8,%%xmm4,%%xmm4 \n"
"movq %%xmm4,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm6,%%xmm2 \n"
"movdqa %%xmm2,%%xmm6 \n"
"movq %%xmm2,(%1) \n"
"palignr $0x8,%%xmm6,%%xmm6 \n"
"punpckldq %%xmm5,%%xmm1 \n"
"movq %%xmm6,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"movdqa %%xmm1,%%xmm5 \n"
"movq %%xmm1,(%1) \n"
"palignr $0x8,%%xmm5,%%xmm5 \n"
"movq %%xmm5,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm7,%%xmm3 \n"
"movq %%xmm3,(%1) \n"
"movdqa %%xmm3,%%xmm7 \n"
"palignr $0x8,%%xmm7,%%xmm7 \n"
"sub $0x8,%2 \n"
"movq %%xmm7,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "r"((intptr_t)(src_stride)), // %3
"r"((intptr_t)(dst_stride)) // %4
: "memory", "cc",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
#endif // defined(HAS_TRANSPOSEWX8_SSSE3)
// Transpose 16x8. 64 bit
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width) {
asm volatile (
// Read in the data from the source pointer.
// First round of bit swap.
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu (%0,%3),%%xmm1 \n"
"lea (%0,%3,2),%0 \n"
"movdqa %%xmm0,%%xmm8 \n"
"punpcklbw %%xmm1,%%xmm0 \n"
"punpckhbw %%xmm1,%%xmm8 \n"
"movdqu (%0),%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm8,%%xmm9 \n"
"palignr $0x8,%%xmm1,%%xmm1 \n"
"palignr $0x8,%%xmm9,%%xmm9 \n"
"movdqu (%0,%3),%%xmm3 \n"
"lea (%0,%3,2),%0 \n"
"movdqa %%xmm2,%%xmm10 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"punpckhbw %%xmm3,%%xmm10 \n"
"movdqa %%xmm2,%%xmm3 \n"
"movdqa %%xmm10,%%xmm11 \n"
"movdqu (%0),%%xmm4 \n"
"palignr $0x8,%%xmm3,%%xmm3 \n"
"palignr $0x8,%%xmm11,%%xmm11 \n"
"movdqu (%0,%3),%%xmm5 \n"
"lea (%0,%3,2),%0 \n"
"movdqa %%xmm4,%%xmm12 \n"
"punpcklbw %%xmm5,%%xmm4 \n"
"punpckhbw %%xmm5,%%xmm12 \n"
"movdqa %%xmm4,%%xmm5 \n"
"movdqa %%xmm12,%%xmm13 \n"
"movdqu (%0),%%xmm6 \n"
"palignr $0x8,%%xmm5,%%xmm5 \n"
"palignr $0x8,%%xmm13,%%xmm13 \n"
"movdqu (%0,%3),%%xmm7 \n"
"lea (%0,%3,2),%0 \n"
"movdqa %%xmm6,%%xmm14 \n"
"punpcklbw %%xmm7,%%xmm6 \n"
"punpckhbw %%xmm7,%%xmm14 \n"
"neg %3 \n"
"movdqa %%xmm6,%%xmm7 \n"
"movdqa %%xmm14,%%xmm15 \n"
"lea 0x10(%0,%3,8),%0 \n"
"palignr $0x8,%%xmm7,%%xmm7 \n"
"palignr $0x8,%%xmm15,%%xmm15 \n"
"neg %3 \n"
// Second round of bit swap.
"punpcklwd %%xmm2,%%xmm0 \n"
"punpcklwd %%xmm3,%%xmm1 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"palignr $0x8,%%xmm2,%%xmm2 \n"
"palignr $0x8,%%xmm3,%%xmm3 \n"
"punpcklwd %%xmm6,%%xmm4 \n"
"punpcklwd %%xmm7,%%xmm5 \n"
"movdqa %%xmm4,%%xmm6 \n"
"movdqa %%xmm5,%%xmm7 \n"
"palignr $0x8,%%xmm6,%%xmm6 \n"
"palignr $0x8,%%xmm7,%%xmm7 \n"
"punpcklwd %%xmm10,%%xmm8 \n"
"punpcklwd %%xmm11,%%xmm9 \n"
"movdqa %%xmm8,%%xmm10 \n"
"movdqa %%xmm9,%%xmm11 \n"
"palignr $0x8,%%xmm10,%%xmm10 \n"
"palignr $0x8,%%xmm11,%%xmm11 \n"
"punpcklwd %%xmm14,%%xmm12 \n"
"punpcklwd %%xmm15,%%xmm13 \n"
"movdqa %%xmm12,%%xmm14 \n"
"movdqa %%xmm13,%%xmm15 \n"
"palignr $0x8,%%xmm14,%%xmm14 \n"
"palignr $0x8,%%xmm15,%%xmm15 \n"
// Third round of bit swap.
// Write to the destination pointer.
"punpckldq %%xmm4,%%xmm0 \n"
"movq %%xmm0,(%1) \n"
"movdqa %%xmm0,%%xmm4 \n"
"palignr $0x8,%%xmm4,%%xmm4 \n"
"movq %%xmm4,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm6,%%xmm2 \n"
"movdqa %%xmm2,%%xmm6 \n"
"movq %%xmm2,(%1) \n"
"palignr $0x8,%%xmm6,%%xmm6 \n"
"punpckldq %%xmm5,%%xmm1 \n"
"movq %%xmm6,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"movdqa %%xmm1,%%xmm5 \n"
"movq %%xmm1,(%1) \n"
"palignr $0x8,%%xmm5,%%xmm5 \n"
"movq %%xmm5,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm7,%%xmm3 \n"
"movq %%xmm3,(%1) \n"
"movdqa %%xmm3,%%xmm7 \n"
"palignr $0x8,%%xmm7,%%xmm7 \n"
"movq %%xmm7,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm12,%%xmm8 \n"
"movq %%xmm8,(%1) \n"
"movdqa %%xmm8,%%xmm12 \n"
"palignr $0x8,%%xmm12,%%xmm12 \n"
"movq %%xmm12,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm14,%%xmm10 \n"
"movdqa %%xmm10,%%xmm14 \n"
"movq %%xmm10,(%1) \n"
"palignr $0x8,%%xmm14,%%xmm14 \n"
"punpckldq %%xmm13,%%xmm9 \n"
"movq %%xmm14,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"movdqa %%xmm9,%%xmm13 \n"
"movq %%xmm9,(%1) \n"
"palignr $0x8,%%xmm13,%%xmm13 \n"
"movq %%xmm13,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"punpckldq %%xmm15,%%xmm11 \n"
"movq %%xmm11,(%1) \n"
"movdqa %%xmm11,%%xmm15 \n"
"palignr $0x8,%%xmm15,%%xmm15 \n"
"sub $0x10,%2 \n"
"movq %%xmm15,(%1,%4) \n"
"lea (%1,%4,2),%1 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "r"((intptr_t)(src_stride)), // %3
"r"((intptr_t)(dst_stride)) // %4
: "memory", "cc",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
);
}
#endif // defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
// Transpose UV 8x8. 64 bit.
#if defined(HAS_TRANSPOSEUVWX8_SSE2)
void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, int width) {
asm volatile (
// Read in the data from the source pointer.
// First round of bit swap.
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu (%0,%4),%%xmm1 \n"
"lea (%0,%4,2),%0 \n"
"movdqa %%xmm0,%%xmm8 \n"
"punpcklbw %%xmm1,%%xmm0 \n"
"punpckhbw %%xmm1,%%xmm8 \n"
"movdqa %%xmm8,%%xmm1 \n"
"movdqu (%0),%%xmm2 \n"
"movdqu (%0,%4),%%xmm3 \n"
"lea (%0,%4,2),%0 \n"
"movdqa %%xmm2,%%xmm8 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"punpckhbw %%xmm3,%%xmm8 \n"
"movdqa %%xmm8,%%xmm3 \n"
"movdqu (%0),%%xmm4 \n"
"movdqu (%0,%4),%%xmm5 \n"
"lea (%0,%4,2),%0 \n"
"movdqa %%xmm4,%%xmm8 \n"
"punpcklbw %%xmm5,%%xmm4 \n"
"punpckhbw %%xmm5,%%xmm8 \n"
"movdqa %%xmm8,%%xmm5 \n"
"movdqu (%0),%%xmm6 \n"
"movdqu (%0,%4),%%xmm7 \n"
"lea (%0,%4,2),%0 \n"
"movdqa %%xmm6,%%xmm8 \n"
"punpcklbw %%xmm7,%%xmm6 \n"
"neg %4 \n"
"lea 0x10(%0,%4,8),%0 \n"
"punpckhbw %%xmm7,%%xmm8 \n"
"movdqa %%xmm8,%%xmm7 \n"
"neg %4 \n"
// Second round of bit swap.
"movdqa %%xmm0,%%xmm8 \n"
"movdqa %%xmm1,%%xmm9 \n"
"punpckhwd %%xmm2,%%xmm8 \n"
"punpckhwd %%xmm3,%%xmm9 \n"
"punpcklwd %%xmm2,%%xmm0 \n"
"punpcklwd %%xmm3,%%xmm1 \n"
"movdqa %%xmm8,%%xmm2 \n"
"movdqa %%xmm9,%%xmm3 \n"
"movdqa %%xmm4,%%xmm8 \n"
"movdqa %%xmm5,%%xmm9 \n"
"punpckhwd %%xmm6,%%xmm8 \n"
"punpckhwd %%xmm7,%%xmm9 \n"
"punpcklwd %%xmm6,%%xmm4 \n"
"punpcklwd %%xmm7,%%xmm5 \n"
"movdqa %%xmm8,%%xmm6 \n"
"movdqa %%xmm9,%%xmm7 \n"
// Third round of bit swap.
// Write to the destination pointer.
"movdqa %%xmm0,%%xmm8 \n"
"punpckldq %%xmm4,%%xmm0 \n"
"movlpd %%xmm0,(%1) \n" // Write back U channel
"movhpd %%xmm0,(%2) \n" // Write back V channel
"punpckhdq %%xmm4,%%xmm8 \n"
"movlpd %%xmm8,(%1,%5) \n"
"lea (%1,%5,2),%1 \n"
"movhpd %%xmm8,(%2,%6) \n"
"lea (%2,%6,2),%2 \n"
"movdqa %%xmm2,%%xmm8 \n"
"punpckldq %%xmm6,%%xmm2 \n"
"movlpd %%xmm2,(%1) \n"
"movhpd %%xmm2,(%2) \n"
"punpckhdq %%xmm6,%%xmm8 \n"
"movlpd %%xmm8,(%1,%5) \n"
"lea (%1,%5,2),%1 \n"
"movhpd %%xmm8,(%2,%6) \n"
"lea (%2,%6,2),%2 \n"
"movdqa %%xmm1,%%xmm8 \n"
"punpckldq %%xmm5,%%xmm1 \n"
"movlpd %%xmm1,(%1) \n"
"movhpd %%xmm1,(%2) \n"
"punpckhdq %%xmm5,%%xmm8 \n"
"movlpd %%xmm8,(%1,%5) \n"
"lea (%1,%5,2),%1 \n"
"movhpd %%xmm8,(%2,%6) \n"
"lea (%2,%6,2),%2 \n"
"movdqa %%xmm3,%%xmm8 \n"
"punpckldq %%xmm7,%%xmm3 \n"
"movlpd %%xmm3,(%1) \n"
"movhpd %%xmm3,(%2) \n"
"punpckhdq %%xmm7,%%xmm8 \n"
"sub $0x8,%3 \n"
"movlpd %%xmm8,(%1,%5) \n"
"lea (%1,%5,2),%1 \n"
"movhpd %%xmm8,(%2,%6) \n"
"lea (%2,%6,2),%2 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst_a), // %1
"+r"(dst_b), // %2
"+r"(width) // %3
: "r"((intptr_t)(src_stride)), // %4
"r"((intptr_t)(dst_stride_a)), // %5
"r"((intptr_t)(dst_stride_b)) // %6
: "memory", "cc",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
"xmm8", "xmm9"
);
}
#endif // defined(HAS_TRANSPOSEUVWX8_SSE2)
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,484 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include "libyuv/rotate_row.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_MIPS) && \
defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
(_MIPS_SIM == _MIPS_SIM_ABI32)
void TransposeWx8_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
"addu $t3, $t2, %[src_stride] \n"
"addu $t5, $t4, %[src_stride] \n"
"addu $t6, $t2, $t4 \n"
"andi $t0, %[dst], 0x3 \n"
"andi $t1, %[dst_stride], 0x3 \n"
"or $t0, $t0, $t1 \n"
"bnez $t0, 11f \n"
" subu $t7, $t9, %[src_stride] \n"
//dst + dst_stride word aligned
"1: \n"
"lbu $t0, 0(%[src]) \n"
"lbux $t1, %[src_stride](%[src]) \n"
"lbux $t8, $t2(%[src]) \n"
"lbux $t9, $t3(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s0, $t8, $t0 \n"
"lbux $t0, $t4(%[src]) \n"
"lbux $t1, $t5(%[src]) \n"
"lbux $t8, $t6(%[src]) \n"
"lbux $t9, $t7(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s1, $t8, $t0 \n"
"sw $s0, 0(%[dst]) \n"
"addiu %[width], -1 \n"
"addiu %[src], 1 \n"
"sw $s1, 4(%[dst]) \n"
"bnez %[width], 1b \n"
" addu %[dst], %[dst], %[dst_stride] \n"
"b 2f \n"
//dst + dst_stride unaligned
"11: \n"
"lbu $t0, 0(%[src]) \n"
"lbux $t1, %[src_stride](%[src]) \n"
"lbux $t8, $t2(%[src]) \n"
"lbux $t9, $t3(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s0, $t8, $t0 \n"
"lbux $t0, $t4(%[src]) \n"
"lbux $t1, $t5(%[src]) \n"
"lbux $t8, $t6(%[src]) \n"
"lbux $t9, $t7(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s1, $t8, $t0 \n"
"swr $s0, 0(%[dst]) \n"
"swl $s0, 3(%[dst]) \n"
"addiu %[width], -1 \n"
"addiu %[src], 1 \n"
"swr $s1, 4(%[dst]) \n"
"swl $s1, 7(%[dst]) \n"
"bnez %[width], 11b \n"
"addu %[dst], %[dst], %[dst_stride] \n"
"2: \n"
".set pop \n"
:[src] "+r" (src),
[dst] "+r" (dst),
[width] "+r" (width)
:[src_stride] "r" (src_stride),
[dst_stride] "r" (dst_stride)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1"
);
}
void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width) {
__asm__ __volatile__ (
".set noat \n"
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
"addu $t3, $t2, %[src_stride] \n"
"addu $t5, $t4, %[src_stride] \n"
"addu $t6, $t2, $t4 \n"
"srl $AT, %[width], 0x2 \n"
"andi $t0, %[dst], 0x3 \n"
"andi $t1, %[dst_stride], 0x3 \n"
"or $t0, $t0, $t1 \n"
"bnez $t0, 11f \n"
" subu $t7, $t9, %[src_stride] \n"
//dst + dst_stride word aligned
"1: \n"
"lw $t0, 0(%[src]) \n"
"lwx $t1, %[src_stride](%[src]) \n"
"lwx $t8, $t2(%[src]) \n"
"lwx $t9, $t3(%[src]) \n"
// t0 = | 30 | 20 | 10 | 00 |
// t1 = | 31 | 21 | 11 | 01 |
// t8 = | 32 | 22 | 12 | 02 |
// t9 = | 33 | 23 | 13 | 03 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 21 | 01 | 20 | 00 |
// s1 = | 23 | 03 | 22 | 02 |
// s2 = | 31 | 11 | 30 | 10 |
// s3 = | 33 | 13 | 32 | 12 |
"precr.qb.ph $s4, $s1, $s0 \n"
"precrq.qb.ph $s5, $s1, $s0 \n"
"precr.qb.ph $s6, $s3, $s2 \n"
"precrq.qb.ph $s7, $s3, $s2 \n"
// s4 = | 03 | 02 | 01 | 00 |
// s5 = | 23 | 22 | 21 | 20 |
// s6 = | 13 | 12 | 11 | 10 |
// s7 = | 33 | 32 | 31 | 30 |
"lwx $t0, $t4(%[src]) \n"
"lwx $t1, $t5(%[src]) \n"
"lwx $t8, $t6(%[src]) \n"
"lwx $t9, $t7(%[src]) \n"
// t0 = | 34 | 24 | 14 | 04 |
// t1 = | 35 | 25 | 15 | 05 |
// t8 = | 36 | 26 | 16 | 06 |
// t9 = | 37 | 27 | 17 | 07 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 25 | 05 | 24 | 04 |
// s1 = | 27 | 07 | 26 | 06 |
// s2 = | 35 | 15 | 34 | 14 |
// s3 = | 37 | 17 | 36 | 16 |
"precr.qb.ph $t0, $s1, $s0 \n"
"precrq.qb.ph $t1, $s1, $s0 \n"
"precr.qb.ph $t8, $s3, $s2 \n"
"precrq.qb.ph $t9, $s3, $s2 \n"
// t0 = | 07 | 06 | 05 | 04 |
// t1 = | 27 | 26 | 25 | 24 |
// t8 = | 17 | 16 | 15 | 14 |
// t9 = | 37 | 36 | 35 | 34 |
"addu $s0, %[dst], %[dst_stride] \n"
"addu $s1, $s0, %[dst_stride] \n"
"addu $s2, $s1, %[dst_stride] \n"
"sw $s4, 0(%[dst]) \n"
"sw $t0, 4(%[dst]) \n"
"sw $s6, 0($s0) \n"
"sw $t8, 4($s0) \n"
"sw $s5, 0($s1) \n"
"sw $t1, 4($s1) \n"
"sw $s7, 0($s2) \n"
"sw $t9, 4($s2) \n"
"addiu $AT, -1 \n"
"addiu %[src], 4 \n"
"bnez $AT, 1b \n"
" addu %[dst], $s2, %[dst_stride] \n"
"b 2f \n"
//dst + dst_stride unaligned
"11: \n"
"lw $t0, 0(%[src]) \n"
"lwx $t1, %[src_stride](%[src]) \n"
"lwx $t8, $t2(%[src]) \n"
"lwx $t9, $t3(%[src]) \n"
// t0 = | 30 | 20 | 10 | 00 |
// t1 = | 31 | 21 | 11 | 01 |
// t8 = | 32 | 22 | 12 | 02 |
// t9 = | 33 | 23 | 13 | 03 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 21 | 01 | 20 | 00 |
// s1 = | 23 | 03 | 22 | 02 |
// s2 = | 31 | 11 | 30 | 10 |
// s3 = | 33 | 13 | 32 | 12 |
"precr.qb.ph $s4, $s1, $s0 \n"
"precrq.qb.ph $s5, $s1, $s0 \n"
"precr.qb.ph $s6, $s3, $s2 \n"
"precrq.qb.ph $s7, $s3, $s2 \n"
// s4 = | 03 | 02 | 01 | 00 |
// s5 = | 23 | 22 | 21 | 20 |
// s6 = | 13 | 12 | 11 | 10 |
// s7 = | 33 | 32 | 31 | 30 |
"lwx $t0, $t4(%[src]) \n"
"lwx $t1, $t5(%[src]) \n"
"lwx $t8, $t6(%[src]) \n"
"lwx $t9, $t7(%[src]) \n"
// t0 = | 34 | 24 | 14 | 04 |
// t1 = | 35 | 25 | 15 | 05 |
// t8 = | 36 | 26 | 16 | 06 |
// t9 = | 37 | 27 | 17 | 07 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 25 | 05 | 24 | 04 |
// s1 = | 27 | 07 | 26 | 06 |
// s2 = | 35 | 15 | 34 | 14 |
// s3 = | 37 | 17 | 36 | 16 |
"precr.qb.ph $t0, $s1, $s0 \n"
"precrq.qb.ph $t1, $s1, $s0 \n"
"precr.qb.ph $t8, $s3, $s2 \n"
"precrq.qb.ph $t9, $s3, $s2 \n"
// t0 = | 07 | 06 | 05 | 04 |
// t1 = | 27 | 26 | 25 | 24 |
// t8 = | 17 | 16 | 15 | 14 |
// t9 = | 37 | 36 | 35 | 34 |
"addu $s0, %[dst], %[dst_stride] \n"
"addu $s1, $s0, %[dst_stride] \n"
"addu $s2, $s1, %[dst_stride] \n"
"swr $s4, 0(%[dst]) \n"
"swl $s4, 3(%[dst]) \n"
"swr $t0, 4(%[dst]) \n"
"swl $t0, 7(%[dst]) \n"
"swr $s6, 0($s0) \n"
"swl $s6, 3($s0) \n"
"swr $t8, 4($s0) \n"
"swl $t8, 7($s0) \n"
"swr $s5, 0($s1) \n"
"swl $s5, 3($s1) \n"
"swr $t1, 4($s1) \n"
"swl $t1, 7($s1) \n"
"swr $s7, 0($s2) \n"
"swl $s7, 3($s2) \n"
"swr $t9, 4($s2) \n"
"swl $t9, 7($s2) \n"
"addiu $AT, -1 \n"
"addiu %[src], 4 \n"
"bnez $AT, 11b \n"
" addu %[dst], $s2, %[dst_stride] \n"
"2: \n"
".set pop \n"
".set at \n"
:[src] "+r" (src),
[dst] "+r" (dst),
[width] "+r" (width)
:[src_stride] "r" (src_stride),
[dst_stride] "r" (dst_stride)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7"
);
}
void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
"addu $t3, $t2, %[src_stride] \n"
"addu $t5, $t4, %[src_stride] \n"
"addu $t6, $t2, $t4 \n"
"subu $t7, $t9, %[src_stride] \n"
"srl $t1, %[width], 1 \n"
// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
"andi $t0, %[dst_a], 0x3 \n"
"andi $t8, %[dst_b], 0x3 \n"
"or $t0, $t0, $t8 \n"
"andi $t8, %[dst_stride_a], 0x3 \n"
"andi $s5, %[dst_stride_b], 0x3 \n"
"or $t8, $t8, $s5 \n"
"or $t0, $t0, $t8 \n"
"bnez $t0, 11f \n"
" nop \n"
// dst + dst_stride word aligned (both, a & b dst addresses)
"1: \n"
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
"addu $s5, %[dst_a], %[dst_stride_a] \n"
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
"addu $s6, %[dst_b], %[dst_stride_b] \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
"sw $s3, 0($s5) \n"
"sw $s4, 0($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
"sw $s3, 0(%[dst_a]) \n"
"sw $s4, 0(%[dst_b]) \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
"sw $s3, 4($s5) \n"
"sw $s4, 4($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
"addiu %[src], 4 \n"
"addiu $t1, -1 \n"
"sll $t0, %[dst_stride_a], 1 \n"
"sll $t8, %[dst_stride_b], 1 \n"
"sw $s3, 4(%[dst_a]) \n"
"sw $s4, 4(%[dst_b]) \n"
"addu %[dst_a], %[dst_a], $t0 \n"
"bnez $t1, 1b \n"
" addu %[dst_b], %[dst_b], $t8 \n"
"b 2f \n"
" nop \n"
// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
"11: \n"
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
"addu $s5, %[dst_a], %[dst_stride_a] \n"
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
"addu $s6, %[dst_b], %[dst_stride_b] \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
"swr $s3, 0($s5) \n"
"swl $s3, 3($s5) \n"
"swr $s4, 0($s6) \n"
"swl $s4, 3($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
"swr $s3, 0(%[dst_a]) \n"
"swl $s3, 3(%[dst_a]) \n"
"swr $s4, 0(%[dst_b]) \n"
"swl $s4, 3(%[dst_b]) \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
"swr $s3, 4($s5) \n"
"swl $s3, 7($s5) \n"
"swr $s4, 4($s6) \n"
"swl $s4, 7($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
"addiu %[src], 4 \n"
"addiu $t1, -1 \n"
"sll $t0, %[dst_stride_a], 1 \n"
"sll $t8, %[dst_stride_b], 1 \n"
"swr $s3, 4(%[dst_a]) \n"
"swl $s3, 7(%[dst_a]) \n"
"swr $s4, 4(%[dst_b]) \n"
"swl $s4, 7(%[dst_b]) \n"
"addu %[dst_a], %[dst_a], $t0 \n"
"bnez $t1, 11b \n"
" addu %[dst_b], %[dst_b], $t8 \n"
"2: \n"
".set pop \n"
: [src] "+r" (src),
[dst_a] "+r" (dst_a),
[dst_b] "+r" (dst_b),
[width] "+r" (width),
[src_stride] "+r" (src_stride)
: [dst_stride_a] "r" (dst_stride_a),
[dst_stride_b] "r" (dst_stride_b)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3",
"s4", "s5", "s6"
);
}
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,533 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include "libyuv/rotate_row.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
!defined(__aarch64__)
static uvec8 kVTbl4x4Transpose =
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width) {
const uint8* src_temp = NULL;
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
"sub %5, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
"1: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld1.8 {d0}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d1}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d2}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d3}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d4}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d5}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d6}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d7}, [%0] \n"
"vtrn.8 d1, d0 \n"
"vtrn.8 d3, d2 \n"
"vtrn.8 d5, d4 \n"
"vtrn.8 d7, d6 \n"
"vtrn.16 d1, d3 \n"
"vtrn.16 d0, d2 \n"
"vtrn.16 d5, d7 \n"
"vtrn.16 d4, d6 \n"
"vtrn.32 d1, d5 \n"
"vtrn.32 d0, d4 \n"
"vtrn.32 d3, d7 \n"
"vtrn.32 d2, d6 \n"
"vrev16.8 q0, q0 \n"
"vrev16.8 q1, q1 \n"
"vrev16.8 q2, q2 \n"
"vrev16.8 q3, q3 \n"
"mov %0, %3 \n"
MEMACCESS(0)
"vst1.8 {d1}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d0}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d3}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d2}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d5}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d4}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d7}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d6}, [%0] \n"
"add %1, #8 \n" // src += 8
"add %3, %3, %4, lsl #3 \n" // dst += 8 * dst_stride
"subs %5, #8 \n" // w -= 8
"bge 1b \n"
// add 8 back to counter. if the result is 0 there are
// no residuals.
"adds %5, #8 \n"
"beq 4f \n"
// some residual, so between 1 and 7 lines left to transpose
"cmp %5, #2 \n"
"blt 3f \n"
"cmp %5, #4 \n"
"blt 2f \n"
// 4x8 block
"mov %0, %1 \n"
MEMACCESS(0)
"vld1.32 {d0[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d0[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d1[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d1[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d2[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d2[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d3[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d3[1]}, [%0] \n"
"mov %0, %3 \n"
MEMACCESS(6)
"vld1.8 {q3}, [%6] \n"
"vtbl.8 d4, {d0, d1}, d6 \n"
"vtbl.8 d5, {d0, d1}, d7 \n"
"vtbl.8 d0, {d2, d3}, d6 \n"
"vtbl.8 d1, {d2, d3}, d7 \n"
// TODO(frkoenig): Rework shuffle above to
// write out with 4 instead of 8 writes.
MEMACCESS(0)
"vst1.32 {d4[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d4[1]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d5[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d5[1]}, [%0] \n"
"add %0, %3, #4 \n"
MEMACCESS(0)
"vst1.32 {d0[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d0[1]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d1[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d1[1]}, [%0] \n"
"add %1, #4 \n" // src += 4
"add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride
"subs %5, #4 \n" // w -= 4
"beq 4f \n"
// some residual, check to see if it includes a 2x8 block,
// or less
"cmp %5, #2 \n"
"blt 3f \n"
// 2x8 block
"2: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld1.16 {d0[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d1[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d0[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d1[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d0[2]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d1[2]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d0[3]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d1[3]}, [%0] \n"
"vtrn.8 d0, d1 \n"
"mov %0, %3 \n"
MEMACCESS(0)
"vst1.64 {d0}, [%0], %4 \n"
MEMACCESS(0)
"vst1.64 {d1}, [%0] \n"
"add %1, #2 \n" // src += 2
"add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride
"subs %5, #2 \n" // w -= 2
"beq 4f \n"
// 1x8 block
"3: \n"
MEMACCESS(1)
"vld1.8 {d0[0]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[1]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[2]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[3]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[4]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[5]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[6]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[7]}, [%1] \n"
MEMACCESS(3)
"vst1.64 {d0}, [%3] \n"
"4: \n"
: "+r"(src_temp), // %0
"+r"(src), // %1
"+r"(src_stride), // %2
"+r"(dst), // %3
"+r"(dst_stride), // %4
"+r"(width) // %5
: "r"(&kVTbl4x4Transpose) // %6
: "memory", "cc", "q0", "q1", "q2", "q3"
);
}
static uvec8 kVTbl4x4TransposeDi =
{ 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) {
const uint8* src_temp = NULL;
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
"sub %7, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
"1: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld2.8 {d0, d1}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d2, d3}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d4, d5}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d6, d7}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d16, d17}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d18, d19}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d20, d21}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d22, d23}, [%0] \n"
"vtrn.8 q1, q0 \n"
"vtrn.8 q3, q2 \n"
"vtrn.8 q9, q8 \n"
"vtrn.8 q11, q10 \n"
"vtrn.16 q1, q3 \n"
"vtrn.16 q0, q2 \n"
"vtrn.16 q9, q11 \n"
"vtrn.16 q8, q10 \n"
"vtrn.32 q1, q9 \n"
"vtrn.32 q0, q8 \n"
"vtrn.32 q3, q11 \n"
"vtrn.32 q2, q10 \n"
"vrev16.8 q0, q0 \n"
"vrev16.8 q1, q1 \n"
"vrev16.8 q2, q2 \n"
"vrev16.8 q3, q3 \n"
"vrev16.8 q8, q8 \n"
"vrev16.8 q9, q9 \n"
"vrev16.8 q10, q10 \n"
"vrev16.8 q11, q11 \n"
"mov %0, %3 \n"
MEMACCESS(0)
"vst1.8 {d2}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d0}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d6}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d4}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d18}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d16}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d22}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d20}, [%0] \n"
"mov %0, %5 \n"
MEMACCESS(0)
"vst1.8 {d3}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d1}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d7}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d5}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d19}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d17}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d23}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d21}, [%0] \n"
"add %1, #8*2 \n" // src += 8*2
"add %3, %3, %4, lsl #3 \n" // dst_a += 8 * dst_stride_a
"add %5, %5, %6, lsl #3 \n" // dst_b += 8 * dst_stride_b
"subs %7, #8 \n" // w -= 8
"bge 1b \n"
// add 8 back to counter. if the result is 0 there are
// no residuals.
"adds %7, #8 \n"
"beq 4f \n"
// some residual, so between 1 and 7 lines left to transpose
"cmp %7, #2 \n"
"blt 3f \n"
"cmp %7, #4 \n"
"blt 2f \n"
// TODO(frkoenig): Clean this up
// 4x8 block
"mov %0, %1 \n"
MEMACCESS(0)
"vld1.64 {d0}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d1}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d2}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d3}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d4}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d5}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d6}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d7}, [%0] \n"
MEMACCESS(8)
"vld1.8 {q15}, [%8] \n"
"vtrn.8 q0, q1 \n"
"vtrn.8 q2, q3 \n"
"vtbl.8 d16, {d0, d1}, d30 \n"
"vtbl.8 d17, {d0, d1}, d31 \n"
"vtbl.8 d18, {d2, d3}, d30 \n"
"vtbl.8 d19, {d2, d3}, d31 \n"
"vtbl.8 d20, {d4, d5}, d30 \n"
"vtbl.8 d21, {d4, d5}, d31 \n"
"vtbl.8 d22, {d6, d7}, d30 \n"
"vtbl.8 d23, {d6, d7}, d31 \n"
"mov %0, %3 \n"
MEMACCESS(0)
"vst1.32 {d16[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d16[1]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d17[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d17[1]}, [%0], %4 \n"
"add %0, %3, #4 \n"
MEMACCESS(0)
"vst1.32 {d20[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d20[1]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d21[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d21[1]}, [%0] \n"
"mov %0, %5 \n"
MEMACCESS(0)
"vst1.32 {d18[0]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d18[1]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d19[0]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d19[1]}, [%0], %6 \n"
"add %0, %5, #4 \n"
MEMACCESS(0)
"vst1.32 {d22[0]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d22[1]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d23[0]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d23[1]}, [%0] \n"
"add %1, #4*2 \n" // src += 4 * 2
"add %3, %3, %4, lsl #2 \n" // dst_a += 4 * dst_stride_a
"add %5, %5, %6, lsl #2 \n" // dst_b += 4 * dst_stride_b
"subs %7, #4 \n" // w -= 4
"beq 4f \n"
// some residual, check to see if it includes a 2x8 block,
// or less
"cmp %7, #2 \n"
"blt 3f \n"
// 2x8 block
"2: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld2.16 {d0[0], d2[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d1[0], d3[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d0[1], d2[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d1[1], d3[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d0[2], d2[2]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d1[2], d3[2]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d0[3], d2[3]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d1[3], d3[3]}, [%0] \n"
"vtrn.8 d0, d1 \n"
"vtrn.8 d2, d3 \n"
"mov %0, %3 \n"
MEMACCESS(0)
"vst1.64 {d0}, [%0], %4 \n"
MEMACCESS(0)
"vst1.64 {d2}, [%0] \n"
"mov %0, %5 \n"
MEMACCESS(0)
"vst1.64 {d1}, [%0], %6 \n"
MEMACCESS(0)
"vst1.64 {d3}, [%0] \n"
"add %1, #2*2 \n" // src += 2 * 2
"add %3, %3, %4, lsl #1 \n" // dst_a += 2 * dst_stride_a
"add %5, %5, %6, lsl #1 \n" // dst_b += 2 * dst_stride_b
"subs %7, #2 \n" // w -= 2
"beq 4f \n"
// 1x8 block
"3: \n"
MEMACCESS(1)
"vld2.8 {d0[0], d1[0]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[1], d1[1]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[2], d1[2]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[3], d1[3]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[4], d1[4]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[5], d1[5]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[6], d1[6]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[7], d1[7]}, [%1] \n"
MEMACCESS(3)
"vst1.64 {d0}, [%3] \n"
MEMACCESS(5)
"vst1.64 {d1}, [%5] \n"
"4: \n"
: "+r"(src_temp), // %0
"+r"(src), // %1
"+r"(src_stride), // %2
"+r"(dst_a), // %3
"+r"(dst_stride_a), // %4
"+r"(dst_b), // %5
"+r"(dst_stride_b), // %6
"+r"(width) // %7
: "r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc",
"q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
);
}
#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,543 @@
/*
* Copyright 2014 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include "libyuv/rotate_row.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// This module is for GCC Neon armv8 64 bit.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
static uvec8 kVTbl4x4Transpose =
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width) {
const uint8* src_temp = NULL;
int64 width64 = (int64) width; // Work around clang 3.4 warning.
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
"sub %3, %3, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
"1: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"ld1 {v0.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v1.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v2.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v3.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v4.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v5.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v6.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v7.8b}, [%0] \n"
"trn2 v16.8b, v0.8b, v1.8b \n"
"trn1 v17.8b, v0.8b, v1.8b \n"
"trn2 v18.8b, v2.8b, v3.8b \n"
"trn1 v19.8b, v2.8b, v3.8b \n"
"trn2 v20.8b, v4.8b, v5.8b \n"
"trn1 v21.8b, v4.8b, v5.8b \n"
"trn2 v22.8b, v6.8b, v7.8b \n"
"trn1 v23.8b, v6.8b, v7.8b \n"
"trn2 v3.4h, v17.4h, v19.4h \n"
"trn1 v1.4h, v17.4h, v19.4h \n"
"trn2 v2.4h, v16.4h, v18.4h \n"
"trn1 v0.4h, v16.4h, v18.4h \n"
"trn2 v7.4h, v21.4h, v23.4h \n"
"trn1 v5.4h, v21.4h, v23.4h \n"
"trn2 v6.4h, v20.4h, v22.4h \n"
"trn1 v4.4h, v20.4h, v22.4h \n"
"trn2 v21.2s, v1.2s, v5.2s \n"
"trn1 v17.2s, v1.2s, v5.2s \n"
"trn2 v20.2s, v0.2s, v4.2s \n"
"trn1 v16.2s, v0.2s, v4.2s \n"
"trn2 v23.2s, v3.2s, v7.2s \n"
"trn1 v19.2s, v3.2s, v7.2s \n"
"trn2 v22.2s, v2.2s, v6.2s \n"
"trn1 v18.2s, v2.2s, v6.2s \n"
"mov %0, %2 \n"
MEMACCESS(0)
"st1 {v17.8b}, [%0], %6 \n"
MEMACCESS(0)
"st1 {v16.8b}, [%0], %6 \n"
MEMACCESS(0)
"st1 {v19.8b}, [%0], %6 \n"
MEMACCESS(0)
"st1 {v18.8b}, [%0], %6 \n"
MEMACCESS(0)
"st1 {v21.8b}, [%0], %6 \n"
MEMACCESS(0)
"st1 {v20.8b}, [%0], %6 \n"
MEMACCESS(0)
"st1 {v23.8b}, [%0], %6 \n"
MEMACCESS(0)
"st1 {v22.8b}, [%0] \n"
"add %1, %1, #8 \n" // src += 8
"add %2, %2, %6, lsl #3 \n" // dst += 8 * dst_stride
"subs %3, %3, #8 \n" // w -= 8
"b.ge 1b \n"
// add 8 back to counter. if the result is 0 there are
// no residuals.
"adds %3, %3, #8 \n"
"b.eq 4f \n"
// some residual, so between 1 and 7 lines left to transpose
"cmp %3, #2 \n"
"b.lt 3f \n"
"cmp %3, #4 \n"
"b.lt 2f \n"
// 4x8 block
"mov %0, %1 \n"
MEMACCESS(0)
"ld1 {v0.s}[0], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v0.s}[1], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v0.s}[2], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v0.s}[3], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v1.s}[0], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v1.s}[1], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v1.s}[2], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v1.s}[3], [%0] \n"
"mov %0, %2 \n"
MEMACCESS(4)
"ld1 {v2.16b}, [%4] \n"
"tbl v3.16b, {v0.16b}, v2.16b \n"
"tbl v0.16b, {v1.16b}, v2.16b \n"
// TODO(frkoenig): Rework shuffle above to
// write out with 4 instead of 8 writes.
MEMACCESS(0)
"st1 {v3.s}[0], [%0], %6 \n"
MEMACCESS(0)
"st1 {v3.s}[1], [%0], %6 \n"
MEMACCESS(0)
"st1 {v3.s}[2], [%0], %6 \n"
MEMACCESS(0)
"st1 {v3.s}[3], [%0] \n"
"add %0, %2, #4 \n"
MEMACCESS(0)
"st1 {v0.s}[0], [%0], %6 \n"
MEMACCESS(0)
"st1 {v0.s}[1], [%0], %6 \n"
MEMACCESS(0)
"st1 {v0.s}[2], [%0], %6 \n"
MEMACCESS(0)
"st1 {v0.s}[3], [%0] \n"
"add %1, %1, #4 \n" // src += 4
"add %2, %2, %6, lsl #2 \n" // dst += 4 * dst_stride
"subs %3, %3, #4 \n" // w -= 4
"b.eq 4f \n"
// some residual, check to see if it includes a 2x8 block,
// or less
"cmp %3, #2 \n"
"b.lt 3f \n"
// 2x8 block
"2: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"ld1 {v0.h}[0], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v1.h}[0], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v0.h}[1], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v1.h}[1], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v0.h}[2], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v1.h}[2], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v0.h}[3], [%0], %5 \n"
MEMACCESS(0)
"ld1 {v1.h}[3], [%0] \n"
"trn2 v2.8b, v0.8b, v1.8b \n"
"trn1 v3.8b, v0.8b, v1.8b \n"
"mov %0, %2 \n"
MEMACCESS(0)
"st1 {v3.8b}, [%0], %6 \n"
MEMACCESS(0)
"st1 {v2.8b}, [%0] \n"
"add %1, %1, #2 \n" // src += 2
"add %2, %2, %6, lsl #1 \n" // dst += 2 * dst_stride
"subs %3, %3, #2 \n" // w -= 2
"b.eq 4f \n"
// 1x8 block
"3: \n"
MEMACCESS(1)
"ld1 {v0.b}[0], [%1], %5 \n"
MEMACCESS(1)
"ld1 {v0.b}[1], [%1], %5 \n"
MEMACCESS(1)
"ld1 {v0.b}[2], [%1], %5 \n"
MEMACCESS(1)
"ld1 {v0.b}[3], [%1], %5 \n"
MEMACCESS(1)
"ld1 {v0.b}[4], [%1], %5 \n"
MEMACCESS(1)
"ld1 {v0.b}[5], [%1], %5 \n"
MEMACCESS(1)
"ld1 {v0.b}[6], [%1], %5 \n"
MEMACCESS(1)
"ld1 {v0.b}[7], [%1] \n"
MEMACCESS(2)
"st1 {v0.8b}, [%2] \n"
"4: \n"
: "+r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst), // %2
"+r"(width64) // %3
: "r"(&kVTbl4x4Transpose), // %4
"r"(static_cast<ptrdiff_t>(src_stride)), // %5
"r"(static_cast<ptrdiff_t>(dst_stride)) // %6
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
"v17", "v18", "v19", "v20", "v21", "v22", "v23"
);
}
static uint8 kVTbl4x4TransposeDi[32] =
{ 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,
1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) {
const uint8* src_temp = NULL;
int64 width64 = (int64) width; // Work around clang 3.4 warning.
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
"sub %4, %4, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
"1: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v1.16b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v2.16b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v3.16b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v4.16b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v5.16b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v6.16b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v7.16b}, [%0] \n"
"trn1 v16.16b, v0.16b, v1.16b \n"
"trn2 v17.16b, v0.16b, v1.16b \n"
"trn1 v18.16b, v2.16b, v3.16b \n"
"trn2 v19.16b, v2.16b, v3.16b \n"
"trn1 v20.16b, v4.16b, v5.16b \n"
"trn2 v21.16b, v4.16b, v5.16b \n"
"trn1 v22.16b, v6.16b, v7.16b \n"
"trn2 v23.16b, v6.16b, v7.16b \n"
"trn1 v0.8h, v16.8h, v18.8h \n"
"trn2 v1.8h, v16.8h, v18.8h \n"
"trn1 v2.8h, v20.8h, v22.8h \n"
"trn2 v3.8h, v20.8h, v22.8h \n"
"trn1 v4.8h, v17.8h, v19.8h \n"
"trn2 v5.8h, v17.8h, v19.8h \n"
"trn1 v6.8h, v21.8h, v23.8h \n"
"trn2 v7.8h, v21.8h, v23.8h \n"
"trn1 v16.4s, v0.4s, v2.4s \n"
"trn2 v17.4s, v0.4s, v2.4s \n"
"trn1 v18.4s, v1.4s, v3.4s \n"
"trn2 v19.4s, v1.4s, v3.4s \n"
"trn1 v20.4s, v4.4s, v6.4s \n"
"trn2 v21.4s, v4.4s, v6.4s \n"
"trn1 v22.4s, v5.4s, v7.4s \n"
"trn2 v23.4s, v5.4s, v7.4s \n"
"mov %0, %2 \n"
MEMACCESS(0)
"st1 {v16.d}[0], [%0], %6 \n"
MEMACCESS(0)
"st1 {v18.d}[0], [%0], %6 \n"
MEMACCESS(0)
"st1 {v17.d}[0], [%0], %6 \n"
MEMACCESS(0)
"st1 {v19.d}[0], [%0], %6 \n"
MEMACCESS(0)
"st1 {v16.d}[1], [%0], %6 \n"
MEMACCESS(0)
"st1 {v18.d}[1], [%0], %6 \n"
MEMACCESS(0)
"st1 {v17.d}[1], [%0], %6 \n"
MEMACCESS(0)
"st1 {v19.d}[1], [%0] \n"
"mov %0, %3 \n"
MEMACCESS(0)
"st1 {v20.d}[0], [%0], %7 \n"
MEMACCESS(0)
"st1 {v22.d}[0], [%0], %7 \n"
MEMACCESS(0)
"st1 {v21.d}[0], [%0], %7 \n"
MEMACCESS(0)
"st1 {v23.d}[0], [%0], %7 \n"
MEMACCESS(0)
"st1 {v20.d}[1], [%0], %7 \n"
MEMACCESS(0)
"st1 {v22.d}[1], [%0], %7 \n"
MEMACCESS(0)
"st1 {v21.d}[1], [%0], %7 \n"
MEMACCESS(0)
"st1 {v23.d}[1], [%0] \n"
"add %1, %1, #16 \n" // src += 8*2
"add %2, %2, %6, lsl #3 \n" // dst_a += 8 * dst_stride_a
"add %3, %3, %7, lsl #3 \n" // dst_b += 8 * dst_stride_b
"subs %4, %4, #8 \n" // w -= 8
"b.ge 1b \n"
// add 8 back to counter. if the result is 0 there are
// no residuals.
"adds %4, %4, #8 \n"
"b.eq 4f \n"
// some residual, so between 1 and 7 lines left to transpose
"cmp %4, #2 \n"
"b.lt 3f \n"
"cmp %4, #4 \n"
"b.lt 2f \n"
// TODO(frkoenig): Clean this up
// 4x8 block
"mov %0, %1 \n"
MEMACCESS(0)
"ld1 {v0.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v1.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v2.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v3.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v4.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v5.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v6.8b}, [%0], %5 \n"
MEMACCESS(0)
"ld1 {v7.8b}, [%0] \n"
MEMACCESS(8)
"ld1 {v30.16b}, [%8], #16 \n"
"ld1 {v31.16b}, [%8] \n"
"tbl v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b \n"
"tbl v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b \n"
"tbl v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b \n"
"tbl v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b \n"
"mov %0, %2 \n"
MEMACCESS(0)
"st1 {v16.s}[0], [%0], %6 \n"
MEMACCESS(0)
"st1 {v16.s}[1], [%0], %6 \n"
MEMACCESS(0)
"st1 {v16.s}[2], [%0], %6 \n"
MEMACCESS(0)
"st1 {v16.s}[3], [%0], %6 \n"
"add %0, %2, #4 \n"
MEMACCESS(0)
"st1 {v18.s}[0], [%0], %6 \n"
MEMACCESS(0)
"st1 {v18.s}[1], [%0], %6 \n"
MEMACCESS(0)
"st1 {v18.s}[2], [%0], %6 \n"
MEMACCESS(0)
"st1 {v18.s}[3], [%0] \n"
"mov %0, %3 \n"
MEMACCESS(0)
"st1 {v17.s}[0], [%0], %7 \n"
MEMACCESS(0)
"st1 {v17.s}[1], [%0], %7 \n"
MEMACCESS(0)
"st1 {v17.s}[2], [%0], %7 \n"
MEMACCESS(0)
"st1 {v17.s}[3], [%0], %7 \n"
"add %0, %3, #4 \n"
MEMACCESS(0)
"st1 {v19.s}[0], [%0], %7 \n"
MEMACCESS(0)
"st1 {v19.s}[1], [%0], %7 \n"
MEMACCESS(0)
"st1 {v19.s}[2], [%0], %7 \n"
MEMACCESS(0)
"st1 {v19.s}[3], [%0] \n"
"add %1, %1, #8 \n" // src += 4 * 2
"add %2, %2, %6, lsl #2 \n" // dst_a += 4 * dst_stride_a
"add %3, %3, %7, lsl #2 \n" // dst_b += 4 * dst_stride_b
"subs %4, %4, #4 \n" // w -= 4
"b.eq 4f \n"
// some residual, check to see if it includes a 2x8 block,
// or less
"cmp %4, #2 \n"
"b.lt 3f \n"
// 2x8 block
"2: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"ld2 {v0.h, v1.h}[0], [%0], %5 \n"
MEMACCESS(0)
"ld2 {v2.h, v3.h}[0], [%0], %5 \n"
MEMACCESS(0)
"ld2 {v0.h, v1.h}[1], [%0], %5 \n"
MEMACCESS(0)
"ld2 {v2.h, v3.h}[1], [%0], %5 \n"
MEMACCESS(0)
"ld2 {v0.h, v1.h}[2], [%0], %5 \n"
MEMACCESS(0)
"ld2 {v2.h, v3.h}[2], [%0], %5 \n"
MEMACCESS(0)
"ld2 {v0.h, v1.h}[3], [%0], %5 \n"
MEMACCESS(0)
"ld2 {v2.h, v3.h}[3], [%0] \n"
"trn1 v4.8b, v0.8b, v2.8b \n"
"trn2 v5.8b, v0.8b, v2.8b \n"
"trn1 v6.8b, v1.8b, v3.8b \n"
"trn2 v7.8b, v1.8b, v3.8b \n"
"mov %0, %2 \n"
MEMACCESS(0)
"st1 {v4.d}[0], [%0], %6 \n"
MEMACCESS(0)
"st1 {v6.d}[0], [%0] \n"
"mov %0, %3 \n"
MEMACCESS(0)
"st1 {v5.d}[0], [%0], %7 \n"
MEMACCESS(0)
"st1 {v7.d}[0], [%0] \n"
"add %1, %1, #4 \n" // src += 2 * 2
"add %2, %2, %6, lsl #1 \n" // dst_a += 2 * dst_stride_a
"add %3, %3, %7, lsl #1 \n" // dst_b += 2 * dst_stride_b
"subs %4, %4, #2 \n" // w -= 2
"b.eq 4f \n"
// 1x8 block
"3: \n"
MEMACCESS(1)
"ld2 {v0.b, v1.b}[0], [%1], %5 \n"
MEMACCESS(1)
"ld2 {v0.b, v1.b}[1], [%1], %5 \n"
MEMACCESS(1)
"ld2 {v0.b, v1.b}[2], [%1], %5 \n"
MEMACCESS(1)
"ld2 {v0.b, v1.b}[3], [%1], %5 \n"
MEMACCESS(1)
"ld2 {v0.b, v1.b}[4], [%1], %5 \n"
MEMACCESS(1)
"ld2 {v0.b, v1.b}[5], [%1], %5 \n"
MEMACCESS(1)
"ld2 {v0.b, v1.b}[6], [%1], %5 \n"
MEMACCESS(1)
"ld2 {v0.b, v1.b}[7], [%1] \n"
MEMACCESS(2)
"st1 {v0.d}[0], [%2] \n"
MEMACCESS(3)
"st1 {v1.d}[0], [%3] \n"
"4: \n"
: "+r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst_a), // %2
"+r"(dst_b), // %3
"+r"(width64) // %4
: "r"(static_cast<ptrdiff_t>(src_stride)), // %5
"r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
"r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
"r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc",
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
"v30", "v31"
);
}
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,247 @@
/*
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include "libyuv/rotate_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// This module is for 32 bit Visual C x86 and clangcl
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
__declspec(naked)
void TransposeWx8_SSSE3(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width) {
__asm {
push edi
push esi
push ebp
mov eax, [esp + 12 + 4] // src
mov edi, [esp + 12 + 8] // src_stride
mov edx, [esp + 12 + 12] // dst
mov esi, [esp + 12 + 16] // dst_stride
mov ecx, [esp + 12 + 20] // width
// Read in the data from the source pointer.
// First round of bit swap.
align 4
convertloop:
movq xmm0, qword ptr [eax]
lea ebp, [eax + 8]
movq xmm1, qword ptr [eax + edi]
lea eax, [eax + 2 * edi]
punpcklbw xmm0, xmm1
movq xmm2, qword ptr [eax]
movdqa xmm1, xmm0
palignr xmm1, xmm1, 8
movq xmm3, qword ptr [eax + edi]
lea eax, [eax + 2 * edi]
punpcklbw xmm2, xmm3
movdqa xmm3, xmm2
movq xmm4, qword ptr [eax]
palignr xmm3, xmm3, 8
movq xmm5, qword ptr [eax + edi]
punpcklbw xmm4, xmm5
lea eax, [eax + 2 * edi]
movdqa xmm5, xmm4
movq xmm6, qword ptr [eax]
palignr xmm5, xmm5, 8
movq xmm7, qword ptr [eax + edi]
punpcklbw xmm6, xmm7
mov eax, ebp
movdqa xmm7, xmm6
palignr xmm7, xmm7, 8
// Second round of bit swap.
punpcklwd xmm0, xmm2
punpcklwd xmm1, xmm3
movdqa xmm2, xmm0
movdqa xmm3, xmm1
palignr xmm2, xmm2, 8
palignr xmm3, xmm3, 8
punpcklwd xmm4, xmm6
punpcklwd xmm5, xmm7
movdqa xmm6, xmm4
movdqa xmm7, xmm5
palignr xmm6, xmm6, 8
palignr xmm7, xmm7, 8
// Third round of bit swap.
// Write to the destination pointer.
punpckldq xmm0, xmm4
movq qword ptr [edx], xmm0
movdqa xmm4, xmm0
palignr xmm4, xmm4, 8
movq qword ptr [edx + esi], xmm4
lea edx, [edx + 2 * esi]
punpckldq xmm2, xmm6
movdqa xmm6, xmm2
palignr xmm6, xmm6, 8
movq qword ptr [edx], xmm2
punpckldq xmm1, xmm5
movq qword ptr [edx + esi], xmm6
lea edx, [edx + 2 * esi]
movdqa xmm5, xmm1
movq qword ptr [edx], xmm1
palignr xmm5, xmm5, 8
punpckldq xmm3, xmm7
movq qword ptr [edx + esi], xmm5
lea edx, [edx + 2 * esi]
movq qword ptr [edx], xmm3
movdqa xmm7, xmm3
palignr xmm7, xmm7, 8
sub ecx, 8
movq qword ptr [edx + esi], xmm7
lea edx, [edx + 2 * esi]
jg convertloop
pop ebp
pop esi
pop edi
ret
}
}
__declspec(naked)
void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int w) {
__asm {
push ebx
push esi
push edi
push ebp
mov eax, [esp + 16 + 4] // src
mov edi, [esp + 16 + 8] // src_stride
mov edx, [esp + 16 + 12] // dst_a
mov esi, [esp + 16 + 16] // dst_stride_a
mov ebx, [esp + 16 + 20] // dst_b
mov ebp, [esp + 16 + 24] // dst_stride_b
mov ecx, esp
sub esp, 4 + 16
and esp, ~15
mov [esp + 16], ecx
mov ecx, [ecx + 16 + 28] // w
align 4
convertloop:
// Read in the data from the source pointer.
// First round of bit swap.
movdqu xmm0, [eax]
movdqu xmm1, [eax + edi]
lea eax, [eax + 2 * edi]
movdqa xmm7, xmm0 // use xmm7 as temp register.
punpcklbw xmm0, xmm1
punpckhbw xmm7, xmm1
movdqa xmm1, xmm7
movdqu xmm2, [eax]
movdqu xmm3, [eax + edi]
lea eax, [eax + 2 * edi]
movdqa xmm7, xmm2
punpcklbw xmm2, xmm3
punpckhbw xmm7, xmm3
movdqa xmm3, xmm7
movdqu xmm4, [eax]
movdqu xmm5, [eax + edi]
lea eax, [eax + 2 * edi]
movdqa xmm7, xmm4
punpcklbw xmm4, xmm5
punpckhbw xmm7, xmm5
movdqa xmm5, xmm7
movdqu xmm6, [eax]
movdqu xmm7, [eax + edi]
lea eax, [eax + 2 * edi]
movdqu [esp], xmm5 // backup xmm5
neg edi
movdqa xmm5, xmm6 // use xmm5 as temp register.
punpcklbw xmm6, xmm7
punpckhbw xmm5, xmm7
movdqa xmm7, xmm5
lea eax, [eax + 8 * edi + 16]
neg edi
// Second round of bit swap.
movdqa xmm5, xmm0
punpcklwd xmm0, xmm2
punpckhwd xmm5, xmm2
movdqa xmm2, xmm5
movdqa xmm5, xmm1
punpcklwd xmm1, xmm3
punpckhwd xmm5, xmm3
movdqa xmm3, xmm5
movdqa xmm5, xmm4
punpcklwd xmm4, xmm6
punpckhwd xmm5, xmm6
movdqa xmm6, xmm5
movdqu xmm5, [esp] // restore xmm5
movdqu [esp], xmm6 // backup xmm6
movdqa xmm6, xmm5 // use xmm6 as temp register.
punpcklwd xmm5, xmm7
punpckhwd xmm6, xmm7
movdqa xmm7, xmm6
// Third round of bit swap.
// Write to the destination pointer.
movdqa xmm6, xmm0
punpckldq xmm0, xmm4
punpckhdq xmm6, xmm4
movdqa xmm4, xmm6
movdqu xmm6, [esp] // restore xmm6
movlpd qword ptr [edx], xmm0
movhpd qword ptr [ebx], xmm0
movlpd qword ptr [edx + esi], xmm4
lea edx, [edx + 2 * esi]
movhpd qword ptr [ebx + ebp], xmm4
lea ebx, [ebx + 2 * ebp]
movdqa xmm0, xmm2 // use xmm0 as the temp register.
punpckldq xmm2, xmm6
movlpd qword ptr [edx], xmm2
movhpd qword ptr [ebx], xmm2
punpckhdq xmm0, xmm6
movlpd qword ptr [edx + esi], xmm0
lea edx, [edx + 2 * esi]
movhpd qword ptr [ebx + ebp], xmm0
lea ebx, [ebx + 2 * ebp]
movdqa xmm0, xmm1 // use xmm0 as the temp register.
punpckldq xmm1, xmm5
movlpd qword ptr [edx], xmm1
movhpd qword ptr [ebx], xmm1
punpckhdq xmm0, xmm5
movlpd qword ptr [edx + esi], xmm0
lea edx, [edx + 2 * esi]
movhpd qword ptr [ebx + ebp], xmm0
lea ebx, [ebx + 2 * ebp]
movdqa xmm0, xmm3 // use xmm0 as the temp register.
punpckldq xmm3, xmm7
movlpd qword ptr [edx], xmm3
movhpd qword ptr [ebx], xmm3
punpckhdq xmm0, xmm7
sub ecx, 8
movlpd qword ptr [edx + esi], xmm0
lea edx, [edx + 2 * esi]
movhpd qword ptr [ebx + ebp], xmm0
lea ebx, [ebx + 2 * ebp]
jg convertloop
mov esp, [esp + 16]
pop ebp
pop edi
pop esi
pop ebx
ret
}
}
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,818 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include <string.h> // For memset.
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Subsampled source needs to be increase by 1 of not even.
#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
// Any 4 planes to 1 with yuvconstants
#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
const uint8* a_buf, uint8* dst_ptr, \
const struct YuvConstants* yuvconstants, int width) { \
SIMD_ALIGNED(uint8 temp[64 * 5]); \
memset(temp, 0, 64 * 4); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
} \
memcpy(temp, y_buf + n, r); \
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 192, a_buf + n, r); \
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
yuvconstants, MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
#endif
#ifdef HAS_I422ALPHATOARGBROW_AVX2
ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I422ALPHATOARGBROW_NEON
ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7)
#endif
#undef ANY41C
// Any 3 planes to 1.
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
uint8* dst_ptr, int width) { \
SIMD_ALIGNED(uint8 temp[64 * 4]); \
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
} \
memcpy(temp, y_buf + n, r); \
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I422TOYUY2ROW_SSE2
ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
#endif
#ifdef HAS_I422TOYUY2ROW_NEON
ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
#endif
#ifdef HAS_I422TOUYVYROW_NEON
ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
#endif
#ifdef HAS_BLENDPLANEROW_AVX2
ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31)
#endif
#ifdef HAS_BLENDPLANEROW_SSSE3
ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
#endif
#undef ANY31
// Note that odd width replication includes 444 due to implementation
// on arm that subsamples 444 to 422 internally.
// Any 3 planes to 1 with yuvconstants
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
uint8* dst_ptr, const struct YuvConstants* yuvconstants, \
int width) { \
SIMD_ALIGNED(uint8 temp[64 * 4]); \
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
} \
memcpy(temp, y_buf + n, r); \
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
if (width & 1) { \
temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1]; \
temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \
} \
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, \
yuvconstants, MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I422TOARGBROW_SSSE3
ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
#endif
#ifdef HAS_I411TOARGBROW_SSSE3
ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
#endif
#ifdef HAS_I444TOARGBROW_SSSE3
ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
#endif // HAS_I444TOARGBROW_SSSE3
#ifdef HAS_I422TORGB24ROW_AVX2
ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
#endif
#ifdef HAS_I422TOARGBROW_AVX2
ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I422TORGBAROW_AVX2
ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I444TOARGBROW_AVX2
ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
#endif
#ifdef HAS_I411TOARGBROW_AVX2
ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
#endif
#ifdef HAS_I422TOARGB4444ROW_AVX2
ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGB1555ROW_AVX2
ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TORGB565ROW_AVX2
ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGBROW_NEON
ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
#endif
#undef ANY31C
// Any 2 planes to 1.
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \
uint8* dst_ptr, int width) { \
SIMD_ALIGNED(uint8 temp[64 * 3]); \
memset(temp, 0, 64 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \
} \
memcpy(temp, y_buf + n * SBPP, r * SBPP); \
memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
SS(r, UVSHIFT) * SBPP2); \
ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
// Merge functions.
#ifdef HAS_MERGEUVROW_SSE2
ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
#endif
#ifdef HAS_MERGEUVROW_AVX2
ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31)
#endif
#ifdef HAS_MERGEUVROW_NEON
ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15)
#endif
// Math functions.
#ifdef HAS_ARGBMULTIPLYROW_SSE2
ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
#endif
#ifdef HAS_ARGBADDROW_SSE2
ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3)
#endif
#ifdef HAS_ARGBSUBTRACTROW_SSE2
ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3)
#endif
#ifdef HAS_ARGBMULTIPLYROW_AVX2
ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBADDROW_AVX2
ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBSUBTRACTROW_AVX2
ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBMULTIPLYROW_NEON
ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBADDROW_NEON
ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_ARGBSUBTRACTROW_NEON
ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7)
#endif
#ifdef HAS_SOBELROW_SSE2
ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15)
#endif
#ifdef HAS_SOBELROW_NEON
ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7)
#endif
#ifdef HAS_SOBELTOPLANEROW_SSE2
ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15)
#endif
#ifdef HAS_SOBELTOPLANEROW_NEON
ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15)
#endif
#ifdef HAS_SOBELXYROW_SSE2
ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15)
#endif
#ifdef HAS_SOBELXYROW_NEON
ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
#endif
#undef ANY21
// Any 2 planes to 1 with yuvconstants
#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \
uint8* dst_ptr, const struct YuvConstants* yuvconstants, \
int width) { \
SIMD_ALIGNED(uint8 temp[64 * 3]); \
memset(temp, 0, 64 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
} \
memcpy(temp, y_buf + n * SBPP, r * SBPP); \
memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
SS(r, UVSHIFT) * SBPP2); \
ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
// Biplanar to RGB.
#ifdef HAS_NV12TOARGBROW_SSSE3
ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TOARGBROW_AVX2
ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
#endif
#ifdef HAS_NV12TOARGBROW_NEON
ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV21TOARGBROW_SSSE3
ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV21TOARGBROW_AVX2
ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
#endif
#ifdef HAS_NV21TOARGBROW_NEON
ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TORGB565ROW_SSSE3
ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
#endif
#ifdef HAS_NV12TORGB565ROW_AVX2
ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
#endif
#ifdef HAS_NV12TORGB565ROW_NEON
ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
#endif
#undef ANY21C
// Any 1 to 1.
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
SIMD_ALIGNED(uint8 temp[128 * 2]); \
memset(temp, 0, 128); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, n); \
} \
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
ANY_SIMD(temp, temp + 128, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
#ifdef HAS_COPYROW_AVX
ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
#endif
#ifdef HAS_COPYROW_SSE2
ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31)
#endif
#ifdef HAS_COPYROW_NEON
ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31)
#endif
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15)
ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15)
ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3)
ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3)
ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3)
#endif
#if defined(HAS_ARGBTORGB565ROW_AVX2)
ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
#endif
#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
#endif
#if defined(HAS_J400TOARGBROW_SSE2)
ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
#endif
#if defined(HAS_J400TOARGBROW_AVX2)
ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
#endif
#if defined(HAS_I400TOARGBROW_SSE2)
ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
#endif
#if defined(HAS_I400TOARGBROW_AVX2)
ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
#endif
#if defined(HAS_RGB24TOARGBROW_SSSE3)
ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7)
ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7)
#endif
#if defined(HAS_RAWTORGB24ROW_SSSE3)
ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7)
#endif
#if defined(HAS_RGB565TOARGBROW_AVX2)
ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15)
#endif
#if defined(HAS_ARGB1555TOARGBROW_AVX2)
ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
#endif
#if defined(HAS_ARGB4444TOARGBROW_AVX2)
ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
#endif
#if defined(HAS_ARGBTORGB24ROW_NEON)
ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7)
ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
#endif
#if defined(HAS_RAWTORGB24ROW_NEON)
ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7)
#endif
#ifdef HAS_ARGBTOYROW_AVX2
ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
#endif
#ifdef HAS_ARGBTOYJROW_AVX2
ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
#endif
#ifdef HAS_UYVYTOYROW_AVX2
ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31)
#endif
#ifdef HAS_YUY2TOYROW_AVX2
ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31)
#endif
#ifdef HAS_ARGBTOYROW_SSSE3
ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)
#endif
#ifdef HAS_BGRATOYROW_SSSE3
ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15)
ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15)
ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15)
ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15)
ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
#endif
#ifdef HAS_ARGBTOYJROW_SSSE3
ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15)
#endif
#ifdef HAS_ARGBTOYROW_NEON
ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGBTOYJROW_NEON
ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7)
#endif
#ifdef HAS_BGRATOYROW_NEON
ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7)
#endif
#ifdef HAS_ABGRTOYROW_NEON
ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7)
#endif
#ifdef HAS_RGBATOYROW_NEON
ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7)
#endif
#ifdef HAS_RGB24TOYROW_NEON
ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7)
#endif
#ifdef HAS_RAWTOYROW_NEON
ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
#endif
#ifdef HAS_RGB565TOYROW_NEON
ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
#endif
#ifdef HAS_ARGB1555TOYROW_NEON
ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7)
#endif
#ifdef HAS_ARGB4444TOYROW_NEON
ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7)
#endif
#ifdef HAS_YUY2TOYROW_NEON
ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15)
#endif
#ifdef HAS_UYVYTOYROW_NEON
ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 0, 2, 1, 15)
#endif
#ifdef HAS_RGB24TOARGBROW_NEON
ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
#endif
#ifdef HAS_RAWTOARGBROW_NEON
ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7)
#endif
#ifdef HAS_RGB565TOARGBROW_NEON
ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGB1555TOARGBROW_NEON
ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGB4444TOARGBROW_NEON
ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGBATTENUATEROW_SSSE3
ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3)
#endif
#ifdef HAS_ARGBUNATTENUATEROW_SSE2
ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3)
#endif
#ifdef HAS_ARGBATTENUATEROW_AVX2
ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7)
#endif
#ifdef HAS_ARGBUNATTENUATEROW_AVX2
ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7)
#endif
#ifdef HAS_ARGBATTENUATEROW_NEON
ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
#endif
#undef ANY11
// Any 1 to 1 with yuvconstants
#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
const struct YuvConstants* yuvconstants, int width) { \
SIMD_ALIGNED(uint8 temp[128 * 2]); \
memset(temp, 0, 128); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
} \
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
#if defined(HAS_YUY2TOARGBROW_SSSE3)
ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
#endif
#if defined(HAS_YUY2TOARGBROW_AVX2)
ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
#endif
#if defined(HAS_YUY2TOARGBROW_NEON)
ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
#endif
#undef ANY11C
// Any 1 to 1 blended.
#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
SIMD_ALIGNED(uint8 temp[128 * 2]); \
memset(temp, 0, 128 * 2); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, n); \
} \
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
memcpy(temp + 128, dst_ptr + n * BPP, r * BPP); \
ANY_SIMD(temp, temp + 128, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15)
#endif
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7)
#endif
#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15)
#endif
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
#endif
#undef ANY11B
// Any 1 to 1 with parameter.
#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
T shuffler, int width) { \
SIMD_ALIGNED(uint8 temp[64 * 2]); \
memset(temp, 0, 64); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, shuffler, n); \
} \
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
ANY_SIMD(temp, temp + 64, shuffler, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
}
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
ANY11P(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2,
const uint32, 4, 2, 3)
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
ANY11P(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2,
const uint32, 4, 2, 7)
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
ANY11P(ARGBToRGB565DitherRow_Any_NEON, ARGBToRGB565DitherRow_NEON,
const uint32, 4, 2, 7)
#endif
#ifdef HAS_ARGBSHUFFLEROW_SSE2
ANY11P(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, const uint8*, 4, 4, 3)
#endif
#ifdef HAS_ARGBSHUFFLEROW_SSSE3
ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8*, 4, 4, 7)
#endif
#ifdef HAS_ARGBSHUFFLEROW_AVX2
ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8*, 4, 4, 15)
#endif
#ifdef HAS_ARGBSHUFFLEROW_NEON
ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8*, 4, 4, 3)
#endif
#undef ANY11P
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
ptrdiff_t src_stride_ptr, int width, \
int source_y_fraction) { \
SIMD_ALIGNED(uint8 temp[64 * 3]); \
memset(temp, 0, 64 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \
} \
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \
ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
#ifdef HAS_INTERPOLATEROW_AVX2
ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
#endif
#ifdef HAS_INTERPOLATEROW_SSSE3
ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_NEON
ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_DSPR2
ANY11T(InterpolateRow_Any_DSPR2, InterpolateRow_DSPR2, 1, 1, 3)
#endif
#undef ANY11T
// Any 1 to 1 mirror.
#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
SIMD_ALIGNED(uint8 temp[64 * 2]); \
memset(temp, 0, 64); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
} \
memcpy(temp, src_ptr, r * BPP); \
ANY_SIMD(temp, temp + 64, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \
}
#ifdef HAS_MIRRORROW_AVX2
ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
#endif
#ifdef HAS_MIRRORROW_SSSE3
ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
#endif
#ifdef HAS_MIRRORROW_NEON
ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
#endif
#ifdef HAS_ARGBMIRRORROW_AVX2
ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
#endif
#ifdef HAS_ARGBMIRRORROW_SSE2
ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
#endif
#ifdef HAS_ARGBMIRRORROW_NEON
ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3)
#endif
#undef ANY11M
// Any 1 plane. (memset)
#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \
void NAMEANY(uint8* dst_ptr, T v32, int width) { \
SIMD_ALIGNED(uint8 temp[64]); \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(dst_ptr, v32, n); \
} \
ANY_SIMD(temp, v32, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp, r * BPP); \
}
#ifdef HAS_SETROW_X86
ANY1(SetRow_Any_X86, SetRow_X86, uint8, 1, 3)
#endif
#ifdef HAS_SETROW_NEON
ANY1(SetRow_Any_NEON, SetRow_NEON, uint8, 1, 15)
#endif
#ifdef HAS_ARGBSETROW_NEON
ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3)
#endif
#undef ANY1
// Any 1 to 2. Outputs UV planes.
#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_u, uint8* dst_v, int width) {\
SIMD_ALIGNED(uint8 temp[128 * 3]); \
memset(temp, 0, 128); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_u, dst_v, n); \
} \
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
/* repeat last 4 bytes for 422 subsampler */ \
if ((width & 1) && BPP == 4 && DUVSHIFT == 1) { \
memcpy(temp + SS(r, UVSHIFT) * BPP, \
temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
} \
/* repeat last 4 - 12 bytes for 411 subsampler */ \
if (((width & 3) == 1) && BPP == 4 && DUVSHIFT == 2) { \
memcpy(temp + SS(r, UVSHIFT) * BPP, \
temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
memcpy(temp + SS(r, UVSHIFT) * BPP + BPP, \
temp + SS(r, UVSHIFT) * BPP - BPP, BPP * 2); \
} \
if (((width & 3) == 2) && BPP == 4 && DUVSHIFT == 2) { \
memcpy(temp + SS(r, UVSHIFT) * BPP, \
temp + SS(r, UVSHIFT) * BPP - BPP * 2, BPP * 2); \
} \
if (((width & 3) == 3) && BPP == 4 && DUVSHIFT == 2) { \
memcpy(temp + SS(r, UVSHIFT) * BPP, \
temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
} \
ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \
}
#ifdef HAS_SPLITUVROW_SSE2
ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
#endif
#ifdef HAS_SPLITUVROW_AVX2
ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
#endif
#ifdef HAS_SPLITUVROW_NEON
ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
#endif
#ifdef HAS_SPLITUVROW_DSPR2
ANY12(SplitUVRow_Any_DSPR2, SplitUVRow_DSPR2, 0, 2, 0, 15)
#endif
#ifdef HAS_ARGBTOUV444ROW_SSSE3
ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
#endif
#ifdef HAS_YUY2TOUV422ROW_AVX2
ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31)
ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31)
#endif
#ifdef HAS_YUY2TOUV422ROW_SSE2
ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15)
ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
#endif
#ifdef HAS_YUY2TOUV422ROW_NEON
ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31)
ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
#endif
#undef ANY12
// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
// 128 byte row allows for 32 avx ARGB pixels.
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, int src_stride_ptr, \
uint8* dst_u, uint8* dst_v, int width) { \
SIMD_ALIGNED(uint8 temp[128 * 4]); \
memset(temp, 0, 128 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \
} \
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
SS(r, UVSHIFT) * BPP); \
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */\
memcpy(temp + SS(r, UVSHIFT) * BPP, \
temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
} \
ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \
memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \
memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \
}
#ifdef HAS_ARGBTOUVROW_AVX2
ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
#endif
#ifdef HAS_ARGBTOUVJROW_AVX2
ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
#endif
#ifdef HAS_ARGBTOUVROW_SSSE3
ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
#endif
#ifdef HAS_YUY2TOUVROW_AVX2
ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31)
ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31)
#endif
#ifdef HAS_YUY2TOUVROW_SSE2
ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15)
ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15)
#endif
#ifdef HAS_ARGBTOUVROW_NEON
ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15)
#endif
#ifdef HAS_ARGBTOUVJROW_NEON
ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
#endif
#ifdef HAS_BGRATOUVROW_NEON
ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15)
#endif
#ifdef HAS_ABGRTOUVROW_NEON
ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15)
#endif
#ifdef HAS_RGBATOUVROW_NEON
ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15)
#endif
#ifdef HAS_RGB24TOUVROW_NEON
ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15)
#endif
#ifdef HAS_RAWTOUVROW_NEON
ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15)
#endif
#ifdef HAS_RGB565TOUVROW_NEON
ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15)
#endif
#ifdef HAS_ARGB1555TOUVROW_NEON
ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15)
#endif
#ifdef HAS_ARGB4444TOUVROW_NEON
ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15)
#endif
#ifdef HAS_YUY2TOUVROW_NEON
ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
#endif
#ifdef HAS_UYVYTOUVROW_NEON
ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
#endif
#undef ANY12S
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,782 @@
/*
* Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// The following are available on Mips platforms:
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
(_MIPS_SIM == _MIPS_SIM_ABI32)
#ifdef HAS_COPYROW_MIPS
void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
__asm__ __volatile__ (
".set noreorder \n"
".set noat \n"
"slti $at, %[count], 8 \n"
"bne $at ,$zero, $last8 \n"
"xor $t8, %[src], %[dst] \n"
"andi $t8, $t8, 0x3 \n"
"bne $t8, $zero, unaligned \n"
"negu $a3, %[dst] \n"
// make dst/src aligned
"andi $a3, $a3, 0x3 \n"
"beq $a3, $zero, $chk16w \n"
// word-aligned now count is the remining bytes count
"subu %[count], %[count], $a3 \n"
"lwr $t8, 0(%[src]) \n"
"addu %[src], %[src], $a3 \n"
"swr $t8, 0(%[dst]) \n"
"addu %[dst], %[dst], $a3 \n"
// Now the dst/src are mutually word-aligned with word-aligned addresses
"$chk16w: \n"
"andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
// t8 is the byte count after 64-byte chunks
"beq %[count], $t8, chk8w \n"
// There will be at most 1 32-byte chunk after it
"subu $a3, %[count], $t8 \n" // the reminder
// Here a3 counts bytes in 16w chunks
"addu $a3, %[dst], $a3 \n"
// Now a3 is the final dst after 64-byte chunks
"addu $t0, %[dst], %[count] \n"
// t0 is the "past the end" address
// When in the loop we exercise "pref 30,x(a1)", the a1+x should not be past
// the "t0-32" address
// This means: for x=128 the last "safe" a1 address is "t0-160"
// Alternatively, for x=64 the last "safe" a1 address is "t0-96"
// we will use "pref 30,128(a1)", so "t0-160" is the limit
"subu $t9, $t0, 160 \n"
// t9 is the "last safe pref 30,128(a1)" address
"pref 0, 0(%[src]) \n" // first line of src
"pref 0, 32(%[src]) \n" // second line of src
"pref 0, 64(%[src]) \n"
"pref 30, 32(%[dst]) \n"
// In case the a1 > t9 don't use "pref 30" at all
"sgtu $v1, %[dst], $t9 \n"
"bgtz $v1, $loop16w \n"
"nop \n"
// otherwise, start with using pref30
"pref 30, 64(%[dst]) \n"
"$loop16w: \n"
"pref 0, 96(%[src]) \n"
"lw $t0, 0(%[src]) \n"
"bgtz $v1, $skip_pref30_96 \n" // skip
"lw $t1, 4(%[src]) \n"
"pref 30, 96(%[dst]) \n" // continue
"$skip_pref30_96: \n"
"lw $t2, 8(%[src]) \n"
"lw $t3, 12(%[src]) \n"
"lw $t4, 16(%[src]) \n"
"lw $t5, 20(%[src]) \n"
"lw $t6, 24(%[src]) \n"
"lw $t7, 28(%[src]) \n"
"pref 0, 128(%[src]) \n"
// bring the next lines of src, addr 128
"sw $t0, 0(%[dst]) \n"
"sw $t1, 4(%[dst]) \n"
"sw $t2, 8(%[dst]) \n"
"sw $t3, 12(%[dst]) \n"
"sw $t4, 16(%[dst]) \n"
"sw $t5, 20(%[dst]) \n"
"sw $t6, 24(%[dst]) \n"
"sw $t7, 28(%[dst]) \n"
"lw $t0, 32(%[src]) \n"
"bgtz $v1, $skip_pref30_128 \n" // skip pref 30,128(a1)
"lw $t1, 36(%[src]) \n"
"pref 30, 128(%[dst]) \n" // set dest, addr 128
"$skip_pref30_128: \n"
"lw $t2, 40(%[src]) \n"
"lw $t3, 44(%[src]) \n"
"lw $t4, 48(%[src]) \n"
"lw $t5, 52(%[src]) \n"
"lw $t6, 56(%[src]) \n"
"lw $t7, 60(%[src]) \n"
"pref 0, 160(%[src]) \n"
// bring the next lines of src, addr 160
"sw $t0, 32(%[dst]) \n"
"sw $t1, 36(%[dst]) \n"
"sw $t2, 40(%[dst]) \n"
"sw $t3, 44(%[dst]) \n"
"sw $t4, 48(%[dst]) \n"
"sw $t5, 52(%[dst]) \n"
"sw $t6, 56(%[dst]) \n"
"sw $t7, 60(%[dst]) \n"
"addiu %[dst], %[dst], 64 \n" // adding 64 to dest
"sgtu $v1, %[dst], $t9 \n"
"bne %[dst], $a3, $loop16w \n"
" addiu %[src], %[src], 64 \n" // adding 64 to src
"move %[count], $t8 \n"
// Here we have src and dest word-aligned but less than 64-bytes to go
"chk8w: \n"
"pref 0, 0x0(%[src]) \n"
"andi $t8, %[count], 0x1f \n" // 32-byte chunk?
// the t8 is the reminder count past 32-bytes
"beq %[count], $t8, chk1w \n"
// count=t8,no 32-byte chunk
" nop \n"
"lw $t0, 0(%[src]) \n"
"lw $t1, 4(%[src]) \n"
"lw $t2, 8(%[src]) \n"
"lw $t3, 12(%[src]) \n"
"lw $t4, 16(%[src]) \n"
"lw $t5, 20(%[src]) \n"
"lw $t6, 24(%[src]) \n"
"lw $t7, 28(%[src]) \n"
"addiu %[src], %[src], 32 \n"
"sw $t0, 0(%[dst]) \n"
"sw $t1, 4(%[dst]) \n"
"sw $t2, 8(%[dst]) \n"
"sw $t3, 12(%[dst]) \n"
"sw $t4, 16(%[dst]) \n"
"sw $t5, 20(%[dst]) \n"
"sw $t6, 24(%[dst]) \n"
"sw $t7, 28(%[dst]) \n"
"addiu %[dst], %[dst], 32 \n"
"chk1w: \n"
"andi %[count], $t8, 0x3 \n"
// now count is the reminder past 1w chunks
"beq %[count], $t8, $last8 \n"
" subu $a3, $t8, %[count] \n"
// a3 is count of bytes in 1w chunks
"addu $a3, %[dst], $a3 \n"
// now a3 is the dst address past the 1w chunks
// copying in words (4-byte chunks)
"$wordCopy_loop: \n"
"lw $t3, 0(%[src]) \n"
// the first t3 may be equal t0 ... optimize?
"addiu %[src], %[src],4 \n"
"addiu %[dst], %[dst],4 \n"
"bne %[dst], $a3,$wordCopy_loop \n"
" sw $t3, -4(%[dst]) \n"
// For the last (<8) bytes
"$last8: \n"
"blez %[count], leave \n"
" addu $a3, %[dst], %[count] \n" // a3 -last dst address
"$last8loop: \n"
"lb $v1, 0(%[src]) \n"
"addiu %[src], %[src], 1 \n"
"addiu %[dst], %[dst], 1 \n"
"bne %[dst], $a3, $last8loop \n"
" sb $v1, -1(%[dst]) \n"
"leave: \n"
" j $ra \n"
" nop \n"
//
// UNALIGNED case
//
"unaligned: \n"
// got here with a3="negu a1"
"andi $a3, $a3, 0x3 \n" // a1 is word aligned?
"beqz $a3, $ua_chk16w \n"
" subu %[count], %[count], $a3 \n"
// bytes left after initial a3 bytes
"lwr $v1, 0(%[src]) \n"
"lwl $v1, 3(%[src]) \n"
"addu %[src], %[src], $a3 \n" // a3 may be 1, 2 or 3
"swr $v1, 0(%[dst]) \n"
"addu %[dst], %[dst], $a3 \n"
// below the dst will be word aligned (NOTE1)
"$ua_chk16w: \n"
"andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
// t8 is the byte count after 64-byte chunks
"beq %[count], $t8, ua_chk8w \n"
// if a2==t8, no 64-byte chunks
// There will be at most 1 32-byte chunk after it
"subu $a3, %[count], $t8 \n" // the reminder
// Here a3 counts bytes in 16w chunks
"addu $a3, %[dst], $a3 \n"
// Now a3 is the final dst after 64-byte chunks
"addu $t0, %[dst], %[count] \n" // t0 "past the end"
"subu $t9, $t0, 160 \n"
// t9 is the "last safe pref 30,128(a1)" address
"pref 0, 0(%[src]) \n" // first line of src
"pref 0, 32(%[src]) \n" // second line addr 32
"pref 0, 64(%[src]) \n"
"pref 30, 32(%[dst]) \n"
// safe, as we have at least 64 bytes ahead
// In case the a1 > t9 don't use "pref 30" at all
"sgtu $v1, %[dst], $t9 \n"
"bgtz $v1, $ua_loop16w \n"
// skip "pref 30,64(a1)" for too short arrays
" nop \n"
// otherwise, start with using pref30
"pref 30, 64(%[dst]) \n"
"$ua_loop16w: \n"
"pref 0, 96(%[src]) \n"
"lwr $t0, 0(%[src]) \n"
"lwl $t0, 3(%[src]) \n"
"lwr $t1, 4(%[src]) \n"
"bgtz $v1, $ua_skip_pref30_96 \n"
" lwl $t1, 7(%[src]) \n"
"pref 30, 96(%[dst]) \n"
// continue setting up the dest, addr 96
"$ua_skip_pref30_96: \n"
"lwr $t2, 8(%[src]) \n"
"lwl $t2, 11(%[src]) \n"
"lwr $t3, 12(%[src]) \n"
"lwl $t3, 15(%[src]) \n"
"lwr $t4, 16(%[src]) \n"
"lwl $t4, 19(%[src]) \n"
"lwr $t5, 20(%[src]) \n"
"lwl $t5, 23(%[src]) \n"
"lwr $t6, 24(%[src]) \n"
"lwl $t6, 27(%[src]) \n"
"lwr $t7, 28(%[src]) \n"
"lwl $t7, 31(%[src]) \n"
"pref 0, 128(%[src]) \n"
// bring the next lines of src, addr 128
"sw $t0, 0(%[dst]) \n"
"sw $t1, 4(%[dst]) \n"
"sw $t2, 8(%[dst]) \n"
"sw $t3, 12(%[dst]) \n"
"sw $t4, 16(%[dst]) \n"
"sw $t5, 20(%[dst]) \n"
"sw $t6, 24(%[dst]) \n"
"sw $t7, 28(%[dst]) \n"
"lwr $t0, 32(%[src]) \n"
"lwl $t0, 35(%[src]) \n"
"lwr $t1, 36(%[src]) \n"
"bgtz $v1, ua_skip_pref30_128 \n"
" lwl $t1, 39(%[src]) \n"
"pref 30, 128(%[dst]) \n"
// continue setting up the dest, addr 128
"ua_skip_pref30_128: \n"
"lwr $t2, 40(%[src]) \n"
"lwl $t2, 43(%[src]) \n"
"lwr $t3, 44(%[src]) \n"
"lwl $t3, 47(%[src]) \n"
"lwr $t4, 48(%[src]) \n"
"lwl $t4, 51(%[src]) \n"
"lwr $t5, 52(%[src]) \n"
"lwl $t5, 55(%[src]) \n"
"lwr $t6, 56(%[src]) \n"
"lwl $t6, 59(%[src]) \n"
"lwr $t7, 60(%[src]) \n"
"lwl $t7, 63(%[src]) \n"
"pref 0, 160(%[src]) \n"
// bring the next lines of src, addr 160
"sw $t0, 32(%[dst]) \n"
"sw $t1, 36(%[dst]) \n"
"sw $t2, 40(%[dst]) \n"
"sw $t3, 44(%[dst]) \n"
"sw $t4, 48(%[dst]) \n"
"sw $t5, 52(%[dst]) \n"
"sw $t6, 56(%[dst]) \n"
"sw $t7, 60(%[dst]) \n"
"addiu %[dst],%[dst],64 \n" // adding 64 to dest
"sgtu $v1,%[dst],$t9 \n"
"bne %[dst],$a3,$ua_loop16w \n"
" addiu %[src],%[src],64 \n" // adding 64 to src
"move %[count],$t8 \n"
// Here we have src and dest word-aligned but less than 64-bytes to go
"ua_chk8w: \n"
"pref 0, 0x0(%[src]) \n"
"andi $t8, %[count], 0x1f \n" // 32-byte chunk?
// the t8 is the reminder count
"beq %[count], $t8, $ua_chk1w \n"
// when count==t8, no 32-byte chunk
"lwr $t0, 0(%[src]) \n"
"lwl $t0, 3(%[src]) \n"
"lwr $t1, 4(%[src]) \n"
"lwl $t1, 7(%[src]) \n"
"lwr $t2, 8(%[src]) \n"
"lwl $t2, 11(%[src]) \n"
"lwr $t3, 12(%[src]) \n"
"lwl $t3, 15(%[src]) \n"
"lwr $t4, 16(%[src]) \n"
"lwl $t4, 19(%[src]) \n"
"lwr $t5, 20(%[src]) \n"
"lwl $t5, 23(%[src]) \n"
"lwr $t6, 24(%[src]) \n"
"lwl $t6, 27(%[src]) \n"
"lwr $t7, 28(%[src]) \n"
"lwl $t7, 31(%[src]) \n"
"addiu %[src], %[src], 32 \n"
"sw $t0, 0(%[dst]) \n"
"sw $t1, 4(%[dst]) \n"
"sw $t2, 8(%[dst]) \n"
"sw $t3, 12(%[dst]) \n"
"sw $t4, 16(%[dst]) \n"
"sw $t5, 20(%[dst]) \n"
"sw $t6, 24(%[dst]) \n"
"sw $t7, 28(%[dst]) \n"
"addiu %[dst], %[dst], 32 \n"
"$ua_chk1w: \n"
"andi %[count], $t8, 0x3 \n"
// now count is the reminder past 1w chunks
"beq %[count], $t8, ua_smallCopy \n"
"subu $a3, $t8, %[count] \n"
// a3 is count of bytes in 1w chunks
"addu $a3, %[dst], $a3 \n"
// now a3 is the dst address past the 1w chunks
// copying in words (4-byte chunks)
"$ua_wordCopy_loop: \n"
"lwr $v1, 0(%[src]) \n"
"lwl $v1, 3(%[src]) \n"
"addiu %[src], %[src], 4 \n"
"addiu %[dst], %[dst], 4 \n"
// note: dst=a1 is word aligned here, see NOTE1
"bne %[dst], $a3, $ua_wordCopy_loop \n"
" sw $v1,-4(%[dst]) \n"
// Now less than 4 bytes (value in count) left to copy
"ua_smallCopy: \n"
"beqz %[count], leave \n"
" addu $a3, %[dst], %[count] \n" // a3 = last dst address
"$ua_smallCopy_loop: \n"
"lb $v1, 0(%[src]) \n"
"addiu %[src], %[src], 1 \n"
"addiu %[dst], %[dst], 1 \n"
"bne %[dst],$a3,$ua_smallCopy_loop \n"
" sb $v1, -1(%[dst]) \n"
"j $ra \n"
" nop \n"
".set at \n"
".set reorder \n"
: [dst] "+r" (dst), [src] "+r" (src)
: [count] "r" (count)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
"t8", "t9", "a3", "v1", "at"
);
}
#endif // HAS_COPYROW_MIPS
// DSPR2 functions
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
(__mips_dsp_rev >= 2) && \
(_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t4, %[width], 4 \n" // multiplies of 16
"blez $t4, 2f \n"
" andi %[width], %[width], 0xf \n" // residual
"1: \n"
"addiu $t4, $t4, -1 \n"
"lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
"lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2
"lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4
"lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6
"lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8
"lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | U10
"lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | U12
"lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | U14
"addiu %[src_uv], %[src_uv], 32 \n"
"precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
"precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
"precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
"precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
"precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
"precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
"precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
"precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
"sw $t9, 0(%[dst_v]) \n"
"sw $t0, 0(%[dst_u]) \n"
"sw $t1, 4(%[dst_v]) \n"
"sw $t2, 4(%[dst_u]) \n"
"sw $t3, 8(%[dst_v]) \n"
"sw $t5, 8(%[dst_u]) \n"
"sw $t6, 12(%[dst_v]) \n"
"sw $t7, 12(%[dst_u]) \n"
"addiu %[dst_v], %[dst_v], 16 \n"
"bgtz $t4, 1b \n"
" addiu %[dst_u], %[dst_u], 16 \n"
"beqz %[width], 3f \n"
" nop \n"
"2: \n"
"lbu $t0, 0(%[src_uv]) \n"
"lbu $t1, 1(%[src_uv]) \n"
"addiu %[src_uv], %[src_uv], 2 \n"
"addiu %[width], %[width], -1 \n"
"sb $t0, 0(%[dst_u]) \n"
"sb $t1, 0(%[dst_v]) \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"bgtz %[width], 2b \n"
" addiu %[dst_v], %[dst_v], 1 \n"
"3: \n"
".set pop \n"
: [src_uv] "+r" (src_uv),
[width] "+r" (width),
[dst_u] "+r" (dst_u),
[dst_v] "+r" (dst_v)
:
: "t0", "t1", "t2", "t3",
"t4", "t5", "t6", "t7", "t8", "t9"
);
}
void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t4, %[width], 4 \n" // multiplies of 16
"andi $t5, %[width], 0xf \n"
"blez $t4, 2f \n"
" addu %[src], %[src], %[width] \n" // src += width
"1: \n"
"lw $t0, -16(%[src]) \n" // |3|2|1|0|
"lw $t1, -12(%[src]) \n" // |7|6|5|4|
"lw $t2, -8(%[src]) \n" // |11|10|9|8|
"lw $t3, -4(%[src]) \n" // |15|14|13|12|
"wsbh $t0, $t0 \n" // |2|3|0|1|
"wsbh $t1, $t1 \n" // |6|7|4|5|
"wsbh $t2, $t2 \n" // |10|11|8|9|
"wsbh $t3, $t3 \n" // |14|15|12|13|
"rotr $t0, $t0, 16 \n" // |0|1|2|3|
"rotr $t1, $t1, 16 \n" // |4|5|6|7|
"rotr $t2, $t2, 16 \n" // |8|9|10|11|
"rotr $t3, $t3, 16 \n" // |12|13|14|15|
"addiu %[src], %[src], -16 \n"
"addiu $t4, $t4, -1 \n"
"sw $t3, 0(%[dst]) \n" // |15|14|13|12|
"sw $t2, 4(%[dst]) \n" // |11|10|9|8|
"sw $t1, 8(%[dst]) \n" // |7|6|5|4|
"sw $t0, 12(%[dst]) \n" // |3|2|1|0|
"bgtz $t4, 1b \n"
" addiu %[dst], %[dst], 16 \n"
"beqz $t5, 3f \n"
" nop \n"
"2: \n"
"lbu $t0, -1(%[src]) \n"
"addiu $t5, $t5, -1 \n"
"addiu %[src], %[src], -1 \n"
"sb $t0, 0(%[dst]) \n"
"bgez $t5, 2b \n"
" addiu %[dst], %[dst], 1 \n"
"3: \n"
".set pop \n"
: [src] "+r" (src), [dst] "+r" (dst)
: [width] "r" (width)
: "t0", "t1", "t2", "t3", "t4", "t5"
);
}
void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
int x = 0;
int y = 0;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"addu $t4, %[width], %[width] \n"
"srl %[x], %[width], 4 \n"
"andi %[y], %[width], 0xf \n"
"blez %[x], 2f \n"
" addu %[src_uv], %[src_uv], $t4 \n"
"1: \n"
"lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
"lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
"lw $t2, -24(%[src_uv]) \n" // |11|10|9|8|
"lw $t3, -20(%[src_uv]) \n" // |15|14|13|12|
"lw $t4, -16(%[src_uv]) \n" // |19|18|17|16|
"lw $t6, -12(%[src_uv]) \n" // |23|22|21|20|
"lw $t7, -8(%[src_uv]) \n" // |27|26|25|24|
"lw $t8, -4(%[src_uv]) \n" // |31|30|29|28|
"rotr $t0, $t0, 16 \n" // |1|0|3|2|
"rotr $t1, $t1, 16 \n" // |5|4|7|6|
"rotr $t2, $t2, 16 \n" // |9|8|11|10|
"rotr $t3, $t3, 16 \n" // |13|12|15|14|
"rotr $t4, $t4, 16 \n" // |17|16|19|18|
"rotr $t6, $t6, 16 \n" // |21|20|23|22|
"rotr $t7, $t7, 16 \n" // |25|24|27|26|
"rotr $t8, $t8, 16 \n" // |29|28|31|30|
"precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6|
"precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7|
"precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14|
"precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15|
"precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22|
"precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23|
"precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30|
"precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31|
"addiu %[src_uv], %[src_uv], -32 \n"
"addiu %[x], %[x], -1 \n"
"swr $t4, 0(%[dst_u]) \n"
"swl $t4, 3(%[dst_u]) \n" // |30|28|26|24|
"swr $t6, 0(%[dst_v]) \n"
"swl $t6, 3(%[dst_v]) \n" // |31|29|27|25|
"swr $t2, 4(%[dst_u]) \n"
"swl $t2, 7(%[dst_u]) \n" // |22|20|18|16|
"swr $t3, 4(%[dst_v]) \n"
"swl $t3, 7(%[dst_v]) \n" // |23|21|19|17|
"swr $t0, 8(%[dst_u]) \n"
"swl $t0, 11(%[dst_u]) \n" // |14|12|10|8|
"swr $t1, 8(%[dst_v]) \n"
"swl $t1, 11(%[dst_v]) \n" // |15|13|11|9|
"swr $t9, 12(%[dst_u]) \n"
"swl $t9, 15(%[dst_u]) \n" // |6|4|2|0|
"swr $t5, 12(%[dst_v]) \n"
"swl $t5, 15(%[dst_v]) \n" // |7|5|3|1|
"addiu %[dst_v], %[dst_v], 16 \n"
"bgtz %[x], 1b \n"
" addiu %[dst_u], %[dst_u], 16 \n"
"beqz %[y], 3f \n"
" nop \n"
"b 2f \n"
" nop \n"
"2: \n"
"lbu $t0, -2(%[src_uv]) \n"
"lbu $t1, -1(%[src_uv]) \n"
"addiu %[src_uv], %[src_uv], -2 \n"
"addiu %[y], %[y], -1 \n"
"sb $t0, 0(%[dst_u]) \n"
"sb $t1, 0(%[dst_v]) \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"bgtz %[y], 2b \n"
" addiu %[dst_v], %[dst_v], 1 \n"
"3: \n"
".set pop \n"
: [src_uv] "+r" (src_uv),
[dst_u] "+r" (dst_u),
[dst_v] "+r" (dst_v),
[x] "=&r" (x),
[y] "+r" (y)
: [width] "r" (width)
: "t0", "t1", "t2", "t3", "t4",
"t5", "t7", "t8", "t9"
);
}
// Convert (4 Y and 2 VU) I422 and arrange RGB values into
// t5 = | 0 | B0 | 0 | b0 |
// t4 = | 0 | B1 | 0 | b1 |
// t9 = | 0 | G0 | 0 | g0 |
// t8 = | 0 | G1 | 0 | g1 |
// t2 = | 0 | R0 | 0 | r0 |
// t1 = | 0 | R1 | 0 | r1 |
#define YUVTORGB \
"lw $t0, 0(%[y_buf]) \n" \
"lhu $t1, 0(%[u_buf]) \n" \
"lhu $t2, 0(%[v_buf]) \n" \
"preceu.ph.qbr $t1, $t1 \n" \
"preceu.ph.qbr $t2, $t2 \n" \
"preceu.ph.qbra $t3, $t0 \n" \
"preceu.ph.qbla $t0, $t0 \n" \
"subu.ph $t1, $t1, $s5 \n" \
"subu.ph $t2, $t2, $s5 \n" \
"subu.ph $t3, $t3, $s4 \n" \
"subu.ph $t0, $t0, $s4 \n" \
"mul.ph $t3, $t3, $s0 \n" \
"mul.ph $t0, $t0, $s0 \n" \
"shll.ph $t4, $t1, 0x7 \n" \
"subu.ph $t4, $t4, $t1 \n" \
"mul.ph $t6, $t1, $s1 \n" \
"mul.ph $t1, $t2, $s2 \n" \
"addq_s.ph $t5, $t4, $t3 \n" \
"addq_s.ph $t4, $t4, $t0 \n" \
"shra.ph $t5, $t5, 6 \n" \
"shra.ph $t4, $t4, 6 \n" \
"addiu %[u_buf], 2 \n" \
"addiu %[v_buf], 2 \n" \
"addu.ph $t6, $t6, $t1 \n" \
"mul.ph $t1, $t2, $s3 \n" \
"addu.ph $t9, $t6, $t3 \n" \
"addu.ph $t8, $t6, $t0 \n" \
"shra.ph $t9, $t9, 6 \n" \
"shra.ph $t8, $t8, 6 \n" \
"addu.ph $t2, $t1, $t3 \n" \
"addu.ph $t1, $t1, $t0 \n" \
"shra.ph $t2, $t2, 6 \n" \
"shra.ph $t1, $t1, 6 \n" \
"subu.ph $t5, $t5, $s5 \n" \
"subu.ph $t4, $t4, $s5 \n" \
"subu.ph $t9, $t9, $s5 \n" \
"subu.ph $t8, $t8, $s5 \n" \
"subu.ph $t2, $t2, $s5 \n" \
"subu.ph $t1, $t1, $s5 \n" \
"shll_s.ph $t5, $t5, 8 \n" \
"shll_s.ph $t4, $t4, 8 \n" \
"shll_s.ph $t9, $t9, 8 \n" \
"shll_s.ph $t8, $t8, 8 \n" \
"shll_s.ph $t2, $t2, 8 \n" \
"shll_s.ph $t1, $t1, 8 \n" \
"shra.ph $t5, $t5, 8 \n" \
"shra.ph $t4, $t4, 8 \n" \
"shra.ph $t9, $t9, 8 \n" \
"shra.ph $t8, $t8, 8 \n" \
"shra.ph $t2, $t2, 8 \n" \
"shra.ph $t1, $t1, 8 \n" \
"addu.ph $t5, $t5, $s5 \n" \
"addu.ph $t4, $t4, $s5 \n" \
"addu.ph $t9, $t9, $s5 \n" \
"addu.ph $t8, $t8, $s5 \n" \
"addu.ph $t2, $t2, $s5 \n" \
"addu.ph $t1, $t1, $s5 \n"
// TODO(fbarchard): accept yuv conversion constants.
void I422ToARGBRow_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
"repl.ph $s4, 16 \n" // |0|16|0|16|
"repl.ph $s5, 128 \n" // |128|128| // clipping
"lui $s6, 0xff00 \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
"1: \n"
YUVTORGB
// Arranging into argb format
"precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
"precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
"addiu %[width], -4 \n"
"precrq.qb.ph $t8, $t4, $t5 \n" // |G1|B1|G0|B0|
"precr.qb.ph $t9, $t4, $t5 \n" // |g1|b1|g0|b0|
"precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
"addiu %[y_buf], 4 \n"
"preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
"preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
"or $t1, $t1, $s6 \n" // |ff|R1|ff|R0|
"or $t2, $t2, $s6 \n" // |ff|r1|ff|r0|
"precrq.ph.w $t0, $t2, $t9 \n" // |ff|r1|g1|b1|
"precrq.ph.w $t3, $t1, $t8 \n" // |ff|R1|G1|B1|
"sll $t9, $t9, 16 \n"
"sll $t8, $t8, 16 \n"
"packrl.ph $t2, $t2, $t9 \n" // |ff|r0|g0|b0|
"packrl.ph $t1, $t1, $t8 \n" // |ff|R0|G0|B0|
// Store results.
"sw $t2, 0(%[rgb_buf]) \n"
"sw $t0, 4(%[rgb_buf]) \n"
"sw $t1, 8(%[rgb_buf]) \n"
"sw $t3, 12(%[rgb_buf]) \n"
"bnez %[width], 1b \n"
" addiu %[rgb_buf], 16 \n"
"2: \n"
".set pop \n"
:[y_buf] "+r" (y_buf),
[u_buf] "+r" (u_buf),
[v_buf] "+r" (v_buf),
[width] "+r" (width),
[rgb_buf] "+r" (rgb_buf)
:
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3",
"s4", "s5", "s6"
);
}
// Bilinear filter 8x2 -> 8x1
void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
int y0_fraction = 256 - source_y_fraction;
const uint8* src_ptr1 = src_ptr + src_stride;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"replv.ph $t0, %[y0_fraction] \n"
"replv.ph $t1, %[source_y_fraction] \n"
"1: \n"
"lw $t2, 0(%[src_ptr]) \n"
"lw $t3, 0(%[src_ptr1]) \n"
"lw $t4, 4(%[src_ptr]) \n"
"lw $t5, 4(%[src_ptr1]) \n"
"muleu_s.ph.qbl $t6, $t2, $t0 \n"
"muleu_s.ph.qbr $t7, $t2, $t0 \n"
"muleu_s.ph.qbl $t8, $t3, $t1 \n"
"muleu_s.ph.qbr $t9, $t3, $t1 \n"
"muleu_s.ph.qbl $t2, $t4, $t0 \n"
"muleu_s.ph.qbr $t3, $t4, $t0 \n"
"muleu_s.ph.qbl $t4, $t5, $t1 \n"
"muleu_s.ph.qbr $t5, $t5, $t1 \n"
"addq.ph $t6, $t6, $t8 \n"
"addq.ph $t7, $t7, $t9 \n"
"addq.ph $t2, $t2, $t4 \n"
"addq.ph $t3, $t3, $t5 \n"
"shra.ph $t6, $t6, 8 \n"
"shra.ph $t7, $t7, 8 \n"
"shra.ph $t2, $t2, 8 \n"
"shra.ph $t3, $t3, 8 \n"
"precr.qb.ph $t6, $t6, $t7 \n"
"precr.qb.ph $t2, $t2, $t3 \n"
"addiu %[src_ptr], %[src_ptr], 8 \n"
"addiu %[src_ptr1], %[src_ptr1], 8 \n"
"addiu %[dst_width], %[dst_width], -8 \n"
"sw $t6, 0(%[dst_ptr]) \n"
"sw $t2, 4(%[dst_ptr]) \n"
"bgtz %[dst_width], 1b \n"
" addiu %[dst_ptr], %[dst_ptr], 8 \n"
".set pop \n"
: [dst_ptr] "+r" (dst_ptr),
[src_ptr1] "+r" (src_ptr1),
[src_ptr] "+r" (src_ptr),
[dst_width] "+r" (dst_width)
: [source_y_fraction] "r" (source_y_fraction),
[y0_fraction] "r" (y0_fraction),
[src_stride] "r" (src_stride)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9"
);
}
#endif // __mips_dsp_rev >= 2
#endif // defined(__mips__)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

1672
libs/libyuv/source/scale.cc Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,221 @@
/*
* Copyright 2015 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/scale.h"
#include "libyuv/scale_row.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
#define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
int dst_width, int x, int dx) { \
int n = dst_width & ~MASK; \
if (n > 0) { \
TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
} \
TERP_C(dst_ptr + n * BPP, src_ptr, \
dst_width & MASK, x + n * dx, dx); \
}
#ifdef HAS_SCALEFILTERCOLS_NEON
CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
#endif
#ifdef HAS_SCALEARGBCOLS_NEON
CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
#endif
#ifdef HAS_SCALEARGBFILTERCOLS_NEON
CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON,
ScaleARGBFilterCols_C, 4, 3)
#endif
#undef CANY
// Fixed scale down.
#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
uint8* dst_ptr, int dst_width) { \
int r = (int)((unsigned int)dst_width % (MASK + 1)); \
int n = dst_width - r; \
if (n > 0) { \
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
} \
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
dst_ptr + n * BPP, r); \
}
// Fixed scale down for odd source width. Used by I420Blend subsampling.
// Since dst_width is (width + 1) / 2, this function scales one less pixel
// and copies the last pixel.
#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
uint8* dst_ptr, int dst_width) { \
int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); \
int n = dst_width - r; \
if (n > 0) { \
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
} \
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
dst_ptr + n * BPP, r); \
}
#ifdef HAS_SCALEROWDOWN2_SSSE3
SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
SDANY(ScaleRowDown2Linear_Any_SSSE3, ScaleRowDown2Linear_SSSE3,
ScaleRowDown2Linear_C, 2, 1, 15)
SDANY(ScaleRowDown2Box_Any_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_C,
2, 1, 15)
SDODD(ScaleRowDown2Box_Odd_SSSE3, ScaleRowDown2Box_SSSE3,
ScaleRowDown2Box_Odd_C, 2, 1, 15)
#endif
#ifdef HAS_SCALEROWDOWN2_AVX2
SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2,
ScaleRowDown2Linear_C, 2, 1, 31)
SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C,
2, 1, 31)
SDODD(ScaleRowDown2Box_Odd_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_Odd_C,
2, 1, 31)
#endif
#ifdef HAS_SCALEROWDOWN2_NEON
SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON,
ScaleRowDown2Linear_C, 2, 1, 15)
SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON,
ScaleRowDown2Box_C, 2, 1, 15)
SDODD(ScaleRowDown2Box_Odd_NEON, ScaleRowDown2Box_NEON,
ScaleRowDown2Box_Odd_C, 2, 1, 15)
#endif
#ifdef HAS_SCALEROWDOWN4_SSSE3
SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
SDANY(ScaleRowDown4Box_Any_SSSE3, ScaleRowDown4Box_SSSE3, ScaleRowDown4Box_C,
4, 1, 7)
#endif
#ifdef HAS_SCALEROWDOWN4_AVX2
SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
SDANY(ScaleRowDown4Box_Any_AVX2, ScaleRowDown4Box_AVX2, ScaleRowDown4Box_C,
4, 1, 15)
#endif
#ifdef HAS_SCALEROWDOWN4_NEON
SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7)
SDANY(ScaleRowDown4Box_Any_NEON, ScaleRowDown4Box_NEON, ScaleRowDown4Box_C,
4, 1, 7)
#endif
#ifdef HAS_SCALEROWDOWN34_SSSE3
SDANY(ScaleRowDown34_Any_SSSE3, ScaleRowDown34_SSSE3,
ScaleRowDown34_C, 4 / 3, 1, 23)
SDANY(ScaleRowDown34_0_Box_Any_SSSE3, ScaleRowDown34_0_Box_SSSE3,
ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
SDANY(ScaleRowDown34_1_Box_Any_SSSE3, ScaleRowDown34_1_Box_SSSE3,
ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
#endif
#ifdef HAS_SCALEROWDOWN34_NEON
SDANY(ScaleRowDown34_Any_NEON, ScaleRowDown34_NEON,
ScaleRowDown34_C, 4 / 3, 1, 23)
SDANY(ScaleRowDown34_0_Box_Any_NEON, ScaleRowDown34_0_Box_NEON,
ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
SDANY(ScaleRowDown34_1_Box_Any_NEON, ScaleRowDown34_1_Box_NEON,
ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
#endif
#ifdef HAS_SCALEROWDOWN38_SSSE3
SDANY(ScaleRowDown38_Any_SSSE3, ScaleRowDown38_SSSE3,
ScaleRowDown38_C, 8 / 3, 1, 11)
SDANY(ScaleRowDown38_3_Box_Any_SSSE3, ScaleRowDown38_3_Box_SSSE3,
ScaleRowDown38_3_Box_C, 8 / 3, 1, 5)
SDANY(ScaleRowDown38_2_Box_Any_SSSE3, ScaleRowDown38_2_Box_SSSE3,
ScaleRowDown38_2_Box_C, 8 / 3, 1, 5)
#endif
#ifdef HAS_SCALEROWDOWN38_NEON
SDANY(ScaleRowDown38_Any_NEON, ScaleRowDown38_NEON,
ScaleRowDown38_C, 8 / 3, 1, 11)
SDANY(ScaleRowDown38_3_Box_Any_NEON, ScaleRowDown38_3_Box_NEON,
ScaleRowDown38_3_Box_C, 8 / 3, 1, 11)
SDANY(ScaleRowDown38_2_Box_Any_NEON, ScaleRowDown38_2_Box_NEON,
ScaleRowDown38_2_Box_C, 8 / 3, 1, 11)
#endif
#ifdef HAS_SCALEARGBROWDOWN2_SSE2
SDANY(ScaleARGBRowDown2_Any_SSE2, ScaleARGBRowDown2_SSE2,
ScaleARGBRowDown2_C, 2, 4, 3)
SDANY(ScaleARGBRowDown2Linear_Any_SSE2, ScaleARGBRowDown2Linear_SSE2,
ScaleARGBRowDown2Linear_C, 2, 4, 3)
SDANY(ScaleARGBRowDown2Box_Any_SSE2, ScaleARGBRowDown2Box_SSE2,
ScaleARGBRowDown2Box_C, 2, 4, 3)
#endif
#ifdef HAS_SCALEARGBROWDOWN2_NEON
SDANY(ScaleARGBRowDown2_Any_NEON, ScaleARGBRowDown2_NEON,
ScaleARGBRowDown2_C, 2, 4, 7)
SDANY(ScaleARGBRowDown2Linear_Any_NEON, ScaleARGBRowDown2Linear_NEON,
ScaleARGBRowDown2Linear_C, 2, 4, 7)
SDANY(ScaleARGBRowDown2Box_Any_NEON, ScaleARGBRowDown2Box_NEON,
ScaleARGBRowDown2Box_C, 2, 4, 7)
#endif
#undef SDANY
// Scale down by even scale factor.
#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx, \
uint8* dst_ptr, int dst_width) { \
int r = (int)((unsigned int)dst_width % (MASK + 1)); \
int n = dst_width - r; \
if (n > 0) { \
SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
} \
SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, \
src_stepx, dst_ptr + n * BPP, r); \
}
#ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
SDAANY(ScaleARGBRowDownEven_Any_SSE2, ScaleARGBRowDownEven_SSE2,
ScaleARGBRowDownEven_C, 4, 3)
SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, ScaleARGBRowDownEvenBox_SSE2,
ScaleARGBRowDownEvenBox_C, 4, 3)
#endif
#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
SDAANY(ScaleARGBRowDownEven_Any_NEON, ScaleARGBRowDownEven_NEON,
ScaleARGBRowDownEven_C, 4, 3)
SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, ScaleARGBRowDownEvenBox_NEON,
ScaleARGBRowDownEvenBox_C, 4, 3)
#endif
// Add rows box filter scale down.
#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
void NAMEANY(const uint8* src_ptr, uint16* dst_ptr, int src_width) { \
int n = src_width & ~MASK; \
if (n > 0) { \
SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
} \
SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
}
#ifdef HAS_SCALEADDROW_SSE2
SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
#endif
#ifdef HAS_SCALEADDROW_AVX2
SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
#endif
#ifdef HAS_SCALEADDROW_NEON
SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
#endif
#undef SAANY
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

View File

@ -0,0 +1,859 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/scale.h"
#include <assert.h>
#include <string.h>
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h" // For CopyARGB
#include "libyuv/row.h"
#include "libyuv/scale_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
// ScaleARGB ARGB, 1/2
// This is an optimized version for scaling down a ARGB to 1/2 of
// its original size.
static void ScaleARGBDown2(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) =
filtering == kFilterNone ? ScaleARGBRowDown2_C :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
ScaleARGBRowDown2Box_C);
assert(dx == 65536 * 2); // Test scale factor of 2.
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
} else {
src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
}
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
ScaleARGBRowDown2Box_Any_SSE2);
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
ScaleARGBRowDown2Box_SSE2);
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
ScaleARGBRowDown2Box_Any_NEON);
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
ScaleARGBRowDown2Box_NEON);
}
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
src_argb += row_stride;
dst_argb += dst_stride;
}
}
// ScaleARGB ARGB, 1/4
// This is an optimized version for scaling down a ARGB to 1/4 of
// its original size.
static void ScaleARGBDown4Box(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy) {
int j;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
// Advance to odd row, even column.
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
assert(dx == 65536 * 4); // Test scale factor of 4.
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
row + kRowSize, dst_width * 2);
ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
src_argb += row_stride;
dst_argb += dst_stride;
}
free_aligned_buffer_64(row);
}
// ScaleARGB ARGB Even
// This is an optimized version for scaling down a ARGB to even
// multiple of its original size.
static void ScaleARGBDownEven(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
int row_stride = (dy >> 16) * src_stride;
void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
int src_step, uint8* dst_argb, int dst_width) =
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
ScaleARGBRowDownEven_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
ScaleARGBRowDownEven_SSE2;
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
ScaleARGBRowDownEven_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
ScaleARGBRowDownEven_NEON;
}
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
src_argb += row_stride;
dst_argb += dst_stride;
}
}
// Scale ARGB down with bilinear interpolation.
static void ScaleARGBBilinearDown(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
int64 xlast = x + (int64)(dst_width - 1) * dx;
int64 xl = (dx >= 0) ? x : xlast;
int64 xr = (dx >= 0) ? xlast : x;
int clip_src_width;
xl = (xl >> 16) & ~3; // Left edge aligned.
xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
if (xr > src_width) {
xr = src_width;
}
clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
src_argb += xl * 4;
x -= (int)(xl << 16);
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(clip_src_width, 32)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
InterpolateRow = InterpolateRow_Any_DSPR2;
if (IS_ALIGNED(clip_src_width, 4)) {
InterpolateRow = InterpolateRow_DSPR2;
}
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
}
}
#endif
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row of ARGB.
{
align_buffer_64(row, clip_src_width * 4);
const int max_y = (src_height - 1) << 16;
if (y > max_y) {
y = max_y;
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8* src = src_argb + yi * src_stride;
if (filtering == kFilterLinear) {
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(row, src, src_stride, clip_src_width, yf);
ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
}
dst_argb += dst_stride;
y += dy;
if (y > max_y) {
y = max_y;
}
}
free_aligned_buffer_64(row);
}
}
// Scale ARGB up with bilinear interpolation.
static void ScaleARGBBilinearUp(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
const int max_y = (src_height - 1) << 16;
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_DSPR2;
}
#endif
if (src_width >= 32768) {
ScaleARGBFilterCols = filtering ?
ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
}
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
if (filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
}
#endif
#if defined(HAS_SCALEARGBCOLS_NEON)
if (!filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBCols_NEON;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
}
if (y > max_y) {
y = max_y;
}
{
int yi = y >> 16;
const uint8* src = src_argb + yi * src_stride;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row;
int rowstride = kRowSize;
int lasty = yi;
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
if (src_height > 1) {
src += src_stride;
}
ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
src += src_stride;
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
if (yi != lasty) {
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_argb + yi * src_stride;
}
if (yi != lasty) {
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
src += src_stride;
}
}
if (filtering == kFilterLinear) {
InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
}
dst_argb += dst_stride;
y += dy;
}
free_aligned_buffer_64(row);
}
}
#ifdef YUVSCALEUP
// Scale YUV to ARGB up with bilinear interpolation.
static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride_y,
int src_stride_u,
int src_stride_v,
int dst_stride_argb,
const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(src_width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_DSPR2;
}
#endif
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
InterpolateRow = InterpolateRow_DSPR2;
}
#endif
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
if (src_width >= 32768) {
ScaleARGBFilterCols = filtering ?
ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
}
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
if (filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
}
#endif
#if defined(HAS_SCALEARGBCOLS_NEON)
if (!filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBCols_NEON;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
}
const int max_y = (src_height - 1) << 16;
if (y > max_y) {
y = max_y;
}
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
int yi = y >> 16;
int uv_yi = yi >> kYShift;
const uint8* src_row_y = src_y + yi * src_stride_y;
const uint8* src_row_u = src_u + uv_yi * src_stride_u;
const uint8* src_row_v = src_v + uv_yi * src_stride_v;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
// Allocate 1 row of ARGB for source conversion.
align_buffer_64(argb_row, src_width * 4);
uint8* rowptr = row;
int rowstride = kRowSize;
int lasty = yi;
// TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
if (src_height > 1) {
src_row_y += src_stride_y;
if (yi & 1) {
src_row_u += src_stride_u;
src_row_v += src_stride_v;
}
}
ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
if (src_height > 2) {
src_row_y += src_stride_y;
if (!(yi & 1)) {
src_row_u += src_stride_u;
src_row_v += src_stride_v;
}
}
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
if (yi != lasty) {
if (y > max_y) {
y = max_y;
yi = y >> 16;
uv_yi = yi >> kYShift;
src_row_y = src_y + yi * src_stride_y;
src_row_u = src_u + uv_yi * src_stride_u;
src_row_v = src_v + uv_yi * src_stride_v;
}
if (yi != lasty) {
// TODO(fbarchard): Convert the clipped region of row.
I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
src_row_y += src_stride_y;
if (yi & 1) {
src_row_u += src_stride_u;
src_row_v += src_stride_v;
}
}
}
if (filtering == kFilterLinear) {
InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
}
dst_argb += dst_stride_argb;
y += dy;
}
free_aligned_buffer_64(row);
free_aligned_buffer_64(row_argb);
}
#endif
// Scale ARGB to/from any dimensions, without interpolation.
// Fixed point math is used for performance: The upper 16 bits
// of x and dx is the integer part of the source position and
// the lower 16 bits are the fixed decimal part.
static void ScaleARGBSimple(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy) {
int j;
void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBCols = ScaleARGBCols_SSE2;
}
#endif
#if defined(HAS_SCALEARGBCOLS_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleARGBCols = ScaleARGBCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBCols = ScaleARGBCols_NEON;
}
}
#endif
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBCols = ScaleARGBColsUp2_SSE2;
}
#endif
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
dst_width, x, dx);
dst_argb += dst_stride;
y += dy;
}
}
// ScaleARGB a ARGB.
// This function in turn calls a scaling function
// suitable for handling the desired resolutions.
static void ScaleARGB(const uint8* src, int src_stride,
int src_width, int src_height,
uint8* dst, int dst_stride,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
// ARGB does not support box filter yet, but allow the user to pass it.
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height,
dst_width, dst_height,
filtering);
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
if (clip_x) {
int64 clipf = (int64)(clip_x) * dx;
x += (clipf & 0xffff);
src += (clipf >> 16) * 4;
dst += clip_x * 4;
}
if (clip_y) {
int64 clipf = (int64)(clip_y) * dy;
y += (clipf & 0xffff);
src += (clipf >> 16) * src_stride;
dst += clip_y * dst_stride;
}
// Special case for integer step values.
if (((dx | dy) & 0xffff) == 0) {
if (!dx || !dy) { // 1 pixel wide and/or tall.
filtering = kFilterNone;
} else {
// Optimized even scale down. ie 2, 4, 6, 8, 10x.
if (!(dx & 0x10000) && !(dy & 0x10000)) {
if (dx == 0x20000) {
// Optimized 1/2 downsample.
ScaleARGBDown2(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy, filtering);
return;
}
if (dx == 0x40000 && filtering == kFilterBox) {
// Optimized 1/4 box downsample.
ScaleARGBDown4Box(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy);
return;
}
ScaleARGBDownEven(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy, filtering);
return;
}
// Optimized odd scale down. ie 3, 5, 7, 9x.
if ((dx & 0x10000) && (dy & 0x10000)) {
filtering = kFilterNone;
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
dst, dst_stride, clip_width, clip_height);
return;
}
}
}
}
if (dx == 0x10000 && (x & 0xffff) == 0) {
// Arbitrary scale vertically, but unscaled vertically.
ScalePlaneVertical(src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, y, dy, 4, filtering);
return;
}
if (filtering && dy < 65536) {
ScaleARGBBilinearUp(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy, filtering);
return;
}
if (filtering) {
ScaleARGBBilinearDown(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy, filtering);
return;
}
ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy);
}
LIBYUV_API
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering) {
if (!src_argb || src_width == 0 || src_height == 0 ||
!dst_argb || dst_width <= 0 || dst_height <= 0 ||
clip_x < 0 || clip_y < 0 ||
clip_width > 32768 || clip_height > 32768 ||
(clip_x + clip_width) > dst_width ||
(clip_y + clip_height) > dst_height) {
return -1;
}
ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
dst_argb, dst_stride_argb, dst_width, dst_height,
clip_x, clip_y, clip_width, clip_height, filtering);
return 0;
}
// Scale an ARGB image.
LIBYUV_API
int ARGBScale(const uint8* src_argb, int src_stride_argb,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
int dst_width, int dst_height,
enum FilterMode filtering) {
if (!src_argb || src_width == 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 ||
!dst_argb || dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
dst_argb, dst_stride_argb, dst_width, dst_height,
0, 0, dst_width, dst_height, filtering);
return 0;
}
// Scale with YUV conversion to ARGB and clipping.
LIBYUV_API
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint32 src_fourcc,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
uint32 dst_fourcc,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering) {
uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4);
int r;
I420ToARGB(src_y, src_stride_y,
src_u, src_stride_u,
src_v, src_stride_v,
argb_buffer, src_width * 4,
src_width, src_height);
r = ARGBScaleClip(argb_buffer, src_width * 4,
src_width, src_height,
dst_argb, dst_stride_argb,
dst_width, dst_height,
clip_x, clip_y, clip_width, clip_height,
filtering);
free(argb_buffer);
return r;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,644 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// This module is for GCC MIPS DSPR2
#if !defined(LIBYUV_DISABLE_MIPS) && \
defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
(_MIPS_SIM == _MIPS_SIM_ABI32)
void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"srl $t9, %[dst_width], 4 \n" // iterations -> by 16
"beqz $t9, 2f \n"
" nop \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
// TODO(fbarchard): Use odd pixels instead of even.
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
"precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n"
"sw $t8, 0(%[dst]) \n"
"sw $t0, 4(%[dst]) \n"
"sw $t1, 8(%[dst]) \n"
"sw $t2, 12(%[dst]) \n"
"bgtz $t9, 1b \n"
" addiu %[dst], %[dst], 16 \n"
"2: \n"
"andi $t9, %[dst_width], 0xf \n" // residue
"beqz $t9, 3f \n"
" nop \n"
"21: \n"
"lbu $t0, 0(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 2 \n"
"addiu $t9, $t9, -1 \n"
"sb $t0, 0(%[dst]) \n"
"bgtz $t9, 21b \n"
" addiu %[dst], %[dst], 1 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst] "+r" (dst)
: [dst_width] "r" (dst_width)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9"
);
}
void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
const uint8* t = src_ptr + src_stride;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t9, %[dst_width], 3 \n" // iterations -> step 8
"bltz $t9, 2f \n"
" nop \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t4, 0(%[t]) \n" // |19|18|17|16|
"lw $t5, 4(%[t]) \n" // |23|22|21|20|
"lw $t6, 8(%[t]) \n" // |27|26|25|24|
"lw $t7, 12(%[t]) \n" // |31|30|29|28|
"addiu $t9, $t9, -1 \n"
"srl $t8, $t0, 16 \n" // |X|X|3|2|
"ins $t0, $t4, 16, 16 \n" // |17|16|1|0|
"ins $t4, $t8, 0, 16 \n" // |19|18|3|2|
"raddu.w.qb $t0, $t0 \n" // |17+16+1+0|
"raddu.w.qb $t4, $t4 \n" // |19+18+3+2|
"shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2
"shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2
"srl $t8, $t1, 16 \n" // |X|X|7|6|
"ins $t1, $t5, 16, 16 \n" // |21|20|5|4|
"ins $t5, $t8, 0, 16 \n" // |22|23|7|6|
"raddu.w.qb $t1, $t1 \n" // |21+20+5+4|
"raddu.w.qb $t5, $t5 \n" // |23+22+7+6|
"shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2
"shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2
"srl $t8, $t2, 16 \n" // |X|X|11|10|
"ins $t2, $t6, 16, 16 \n" // |25|24|9|8|
"ins $t6, $t8, 0, 16 \n" // |27|26|11|10|
"raddu.w.qb $t2, $t2 \n" // |25+24+9+8|
"raddu.w.qb $t6, $t6 \n" // |27+26+11+10|
"shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2
"shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2
"srl $t8, $t3, 16 \n" // |X|X|15|14|
"ins $t3, $t7, 16, 16 \n" // |29|28|13|12|
"ins $t7, $t8, 0, 16 \n" // |31|30|15|14|
"raddu.w.qb $t3, $t3 \n" // |29+28+13+12|
"raddu.w.qb $t7, $t7 \n" // |31+30+15+14|
"shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2
"shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2
"addiu %[src_ptr], %[src_ptr], 16 \n"
"addiu %[t], %[t], 16 \n"
"sb $t0, 0(%[dst]) \n"
"sb $t4, 1(%[dst]) \n"
"sb $t1, 2(%[dst]) \n"
"sb $t5, 3(%[dst]) \n"
"sb $t2, 4(%[dst]) \n"
"sb $t6, 5(%[dst]) \n"
"sb $t3, 6(%[dst]) \n"
"sb $t7, 7(%[dst]) \n"
"bgtz $t9, 1b \n"
" addiu %[dst], %[dst], 8 \n"
"2: \n"
"andi $t9, %[dst_width], 0x7 \n" // x = residue
"beqz $t9, 3f \n"
" nop \n"
"21: \n"
"lwr $t1, 0(%[src_ptr]) \n"
"lwl $t1, 3(%[src_ptr]) \n"
"lwr $t2, 0(%[t]) \n"
"lwl $t2, 3(%[t]) \n"
"srl $t8, $t1, 16 \n"
"ins $t1, $t2, 16, 16 \n"
"ins $t2, $t8, 0, 16 \n"
"raddu.w.qb $t1, $t1 \n"
"raddu.w.qb $t2, $t2 \n"
"shra_r.w $t1, $t1, 2 \n"
"shra_r.w $t2, $t2, 2 \n"
"sb $t1, 0(%[dst]) \n"
"sb $t2, 1(%[dst]) \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"addiu $t9, $t9, -2 \n"
"addiu %[t], %[t], 4 \n"
"bgtz $t9, 21b \n"
" addiu %[dst], %[dst], 2 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst] "+r" (dst), [t] "+r" (t)
: [dst_width] "r" (dst_width)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9"
);
}
void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t9, %[dst_width], 3 \n"
"beqz $t9, 2f \n"
" nop \n"
"1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
"lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
"lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
"precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0|
"precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
"precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
"precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
"precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0|
"precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n"
"sw $t1, 0(%[dst]) \n"
"sw $t5, 4(%[dst]) \n"
"bgtz $t9, 1b \n"
" addiu %[dst], %[dst], 8 \n"
"2: \n"
"andi $t9, %[dst_width], 7 \n" // residue
"beqz $t9, 3f \n"
" nop \n"
"21: \n"
"lbu $t1, 0(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"addiu $t9, $t9, -1 \n"
"sb $t1, 0(%[dst]) \n"
"bgtz $t9, 21b \n"
" addiu %[dst], %[dst], 1 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst] "+r" (dst)
: [dst_width] "r" (dst_width)
: "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9"
);
}
void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
intptr_t stride = src_stride;
const uint8* s1 = src_ptr + stride;
const uint8* s2 = s1 + stride;
const uint8* s3 = s2 + stride;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t9, %[dst_width], 1 \n"
"andi $t8, %[dst_width], 1 \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 0(%[s1]) \n" // |7|6|5|4|
"lw $t2, 0(%[s2]) \n" // |11|10|9|8|
"lw $t3, 0(%[s3]) \n" // |15|14|13|12|
"lw $t4, 4(%[src_ptr]) \n" // |19|18|17|16|
"lw $t5, 4(%[s1]) \n" // |23|22|21|20|
"lw $t6, 4(%[s2]) \n" // |27|26|25|24|
"lw $t7, 4(%[s3]) \n" // |31|30|29|28|
"raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
"raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
"raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
"raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
"raddu.w.qb $t4, $t4 \n" // |19 + 18 + 17 + 16|
"raddu.w.qb $t5, $t5 \n" // |23 + 22 + 21 + 20|
"raddu.w.qb $t6, $t6 \n" // |27 + 26 + 25 + 24|
"raddu.w.qb $t7, $t7 \n" // |31 + 30 + 29 + 28|
"add $t0, $t0, $t1 \n"
"add $t1, $t2, $t3 \n"
"add $t0, $t0, $t1 \n"
"add $t4, $t4, $t5 \n"
"add $t6, $t6, $t7 \n"
"add $t4, $t4, $t6 \n"
"shra_r.w $t0, $t0, 4 \n"
"shra_r.w $t4, $t4, 4 \n"
"sb $t0, 0(%[dst]) \n"
"sb $t4, 1(%[dst]) \n"
"addiu %[src_ptr], %[src_ptr], 8 \n"
"addiu %[s1], %[s1], 8 \n"
"addiu %[s2], %[s2], 8 \n"
"addiu %[s3], %[s3], 8 \n"
"addiu $t9, $t9, -1 \n"
"bgtz $t9, 1b \n"
" addiu %[dst], %[dst], 2 \n"
"beqz $t8, 2f \n"
" nop \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 0(%[s1]) \n" // |7|6|5|4|
"lw $t2, 0(%[s2]) \n" // |11|10|9|8|
"lw $t3, 0(%[s3]) \n" // |15|14|13|12|
"raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
"raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
"raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
"raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
"add $t0, $t0, $t1 \n"
"add $t1, $t2, $t3 \n"
"add $t0, $t0, $t1 \n"
"shra_r.w $t0, $t0, 4 \n"
"sb $t0, 0(%[dst]) \n"
"2: \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst] "+r" (dst),
[s1] "+r" (s1),
[s2] "+r" (s2),
[s3] "+r" (s3)
: [dst_width] "r" (dst_width)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6","t7", "t8", "t9"
);
}
void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"1: \n"
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
"lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
"lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
"precrq.qb.ph $t0, $t2, $t4 \n" // |7|5|15|13|
"precrq.qb.ph $t9, $t6, $t8 \n" // |23|21|31|30|
"addiu %[dst_width], %[dst_width], -24 \n"
"ins $t1, $t1, 8, 16 \n" // |3|1|0|X|
"ins $t4, $t0, 8, 16 \n" // |X|15|13|12|
"ins $t5, $t5, 8, 16 \n" // |19|17|16|X|
"ins $t8, $t9, 8, 16 \n" // |X|31|29|28|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"packrl.ph $t0, $t3, $t0 \n" // |9|8|7|5|
"packrl.ph $t9, $t7, $t9 \n" // |25|24|23|21|
"prepend $t1, $t2, 8 \n" // |4|3|1|0|
"prepend $t3, $t4, 24 \n" // |15|13|12|11|
"prepend $t5, $t6, 8 \n" // |20|19|17|16|
"prepend $t7, $t8, 24 \n" // |31|29|28|27|
"sw $t1, 0(%[dst]) \n"
"sw $t0, 4(%[dst]) \n"
"sw $t3, 8(%[dst]) \n"
"sw $t5, 12(%[dst]) \n"
"sw $t9, 16(%[dst]) \n"
"sw $t7, 20(%[dst]) \n"
"bnez %[dst_width], 1b \n"
" addiu %[dst], %[dst], 24 \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst] "+r" (dst),
[dst_width] "+r" (dst_width)
:
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6","t7", "t8", "t9"
);
}
void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"repl.ph $t3, 3 \n" // 0x00030003
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
"rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1|
"rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
"muleu_s.ph.qbl $t4, $t2, $t3 \n" // |S0*3|S3*3|
"muleu_s.ph.qbl $t5, $t6, $t3 \n" // |T0*3|T3*3|
"andi $t0, $t2, 0xFFFF \n" // |0|0|S2|S1|
"andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
"raddu.w.qb $t0, $t0 \n"
"raddu.w.qb $t1, $t1 \n"
"shra_r.w $t0, $t0, 1 \n"
"shra_r.w $t1, $t1, 1 \n"
"preceu.ph.qbr $t2, $t2 \n" // |0|S2|0|S1|
"preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
"rotr $t2, $t2, 16 \n" // |0|S1|0|S2|
"rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
"addu.ph $t2, $t2, $t4 \n"
"addu.ph $t6, $t6, $t5 \n"
"sll $t5, $t0, 1 \n"
"add $t0, $t5, $t0 \n"
"shra_r.ph $t2, $t2, 2 \n"
"shra_r.ph $t6, $t6, 2 \n"
"shll.ph $t4, $t2, 1 \n"
"addq.ph $t4, $t4, $t2 \n"
"addu $t0, $t0, $t1 \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"shra_r.w $t0, $t0, 2 \n"
"addu.ph $t6, $t6, $t4 \n"
"shra_r.ph $t6, $t6, 2 \n"
"srl $t1, $t6, 16 \n"
"addiu %[dst_width], %[dst_width], -3 \n"
"sb $t1, 0(%[d]) \n"
"sb $t0, 1(%[d]) \n"
"sb $t6, 2(%[d]) \n"
"bgtz %[dst_width], 1b \n"
" addiu %[d], %[d], 3 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[src_stride] "+r" (src_stride),
[d] "+r" (d),
[dst_width] "+r" (dst_width)
:
: "t0", "t1", "t2", "t3",
"t4", "t5", "t6"
);
}
void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"repl.ph $t2, 3 \n" // 0x00030003
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
"rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1|
"rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
"muleu_s.ph.qbl $t3, $t4, $t2 \n" // |S0*3|S3*3|
"muleu_s.ph.qbl $t5, $t6, $t2 \n" // |T0*3|T3*3|
"andi $t0, $t4, 0xFFFF \n" // |0|0|S2|S1|
"andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
"raddu.w.qb $t0, $t0 \n"
"raddu.w.qb $t1, $t1 \n"
"shra_r.w $t0, $t0, 1 \n"
"shra_r.w $t1, $t1, 1 \n"
"preceu.ph.qbr $t4, $t4 \n" // |0|S2|0|S1|
"preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
"rotr $t4, $t4, 16 \n" // |0|S1|0|S2|
"rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
"addu.ph $t4, $t4, $t3 \n"
"addu.ph $t6, $t6, $t5 \n"
"shra_r.ph $t6, $t6, 2 \n"
"shra_r.ph $t4, $t4, 2 \n"
"addu.ph $t6, $t6, $t4 \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"shra_r.ph $t6, $t6, 1 \n"
"addu $t0, $t0, $t1 \n"
"addiu %[dst_width], %[dst_width], -3 \n"
"shra_r.w $t0, $t0, 1 \n"
"srl $t1, $t6, 16 \n"
"sb $t1, 0(%[d]) \n"
"sb $t0, 1(%[d]) \n"
"sb $t6, 2(%[d]) \n"
"bgtz %[dst_width], 1b \n"
" addiu %[d], %[d], 3 \n"
"3: \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[src_stride] "+r" (src_stride),
[d] "+r" (d),
[dst_width] "+r" (dst_width)
:
: "t0", "t1", "t2", "t3",
"t4", "t5", "t6"
);
}
void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
"lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
"wsbh $t0, $t0 \n" // |2|3|0|1|
"wsbh $t6, $t6 \n" // |26|27|24|25|
"srl $t0, $t0, 8 \n" // |X|2|3|0|
"srl $t3, $t3, 16 \n" // |X|X|15|14|
"srl $t5, $t5, 16 \n" // |X|X|23|22|
"srl $t7, $t7, 16 \n" // |X|X|31|30|
"ins $t1, $t2, 24, 8 \n" // |8|6|5|4|
"ins $t6, $t5, 0, 8 \n" // |26|27|24|22|
"ins $t1, $t0, 0, 16 \n" // |8|6|3|0|
"ins $t6, $t7, 24, 8 \n" // |30|27|24|22|
"prepend $t2, $t3, 24 \n" // |X|15|14|11|
"ins $t4, $t4, 16, 8 \n" // |19|16|17|X|
"ins $t4, $t2, 0, 16 \n" // |19|16|14|11|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu %[dst_width], %[dst_width], -12 \n"
"addiu $t8,%[dst_width], -12 \n"
"sw $t1, 0(%[dst]) \n"
"sw $t4, 4(%[dst]) \n"
"sw $t6, 8(%[dst]) \n"
"bgez $t8, 1b \n"
" addiu %[dst], %[dst], 12 \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst] "+r" (dst),
[dst_width] "+r" (dst_width)
:
: "t0", "t1", "t2", "t3", "t4",
"t5", "t6", "t7", "t8"
);
}
void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
intptr_t stride = src_stride;
const uint8* t = src_ptr + stride;
const int c = 0x2AAA;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
"lw $t2, 0(%[t]) \n" // |T3|T2|T1|T0|
"lw $t3, 4(%[t]) \n" // |T7|T6|T5|T4|
"rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
"packrl.ph $t4, $t1, $t3 \n" // |S7|S6|T7|T6|
"packrl.ph $t5, $t3, $t1 \n" // |T5|T4|S5|S4|
"raddu.w.qb $t4, $t4 \n" // S7+S6+T7+T6
"raddu.w.qb $t5, $t5 \n" // T5+T4+S5+S4
"precrq.qb.ph $t6, $t0, $t2 \n" // |S3|S1|T3|T1|
"precrq.qb.ph $t6, $t6, $t6 \n" // |S3|T3|S3|T3|
"srl $t4, $t4, 2 \n" // t4 / 4
"srl $t6, $t6, 16 \n" // |0|0|S3|T3|
"raddu.w.qb $t6, $t6 \n" // 0+0+S3+T3
"addu $t6, $t5, $t6 \n"
"mul $t6, $t6, %[c] \n" // t6 * 0x2AAA
"sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
"sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
"raddu.w.qb $t0, $t0 \n" // S2+S1+S0+0
"raddu.w.qb $t2, $t2 \n" // T2+T1+T0+0
"addu $t0, $t0, $t2 \n"
"mul $t0, $t0, %[c] \n" // t0 * 0x2AAA
"addiu %[src_ptr], %[src_ptr], 8 \n"
"addiu %[t], %[t], 8 \n"
"addiu %[dst_width], %[dst_width], -3 \n"
"addiu %[dst_ptr], %[dst_ptr], 3 \n"
"srl $t6, $t6, 16 \n"
"srl $t0, $t0, 16 \n"
"sb $t4, -1(%[dst_ptr]) \n"
"sb $t6, -2(%[dst_ptr]) \n"
"bgtz %[dst_width], 1b \n"
" sb $t0, -3(%[dst_ptr]) \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst_ptr] "+r" (dst_ptr),
[t] "+r" (t),
[dst_width] "+r" (dst_width)
: [c] "r" (c)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6"
);
}
void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
intptr_t stride = src_stride;
const uint8* s1 = src_ptr + stride;
stride += stride;
const uint8* s2 = src_ptr + stride;
const int c1 = 0x1C71;
const int c2 = 0x2AAA;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"1: \n"
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
"lw $t2, 0(%[s1]) \n" // |T3|T2|T1|T0|
"lw $t3, 4(%[s1]) \n" // |T7|T6|T5|T4|
"lw $t4, 0(%[s2]) \n" // |R3|R2|R1|R0|
"lw $t5, 4(%[s2]) \n" // |R7|R6|R5|R4|
"rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
"packrl.ph $t6, $t1, $t3 \n" // |S7|S6|T7|T6|
"raddu.w.qb $t6, $t6 \n" // S7+S6+T7+T6
"packrl.ph $t7, $t3, $t1 \n" // |T5|T4|S5|S4|
"raddu.w.qb $t7, $t7 \n" // T5+T4+S5+S4
"sll $t8, $t5, 16 \n" // |R5|R4|0|0|
"raddu.w.qb $t8, $t8 \n" // R5+R4
"addu $t7, $t7, $t8 \n"
"srl $t8, $t5, 16 \n" // |0|0|R7|R6|
"raddu.w.qb $t8, $t8 \n" // R7 + R6
"addu $t6, $t6, $t8 \n"
"mul $t6, $t6, %[c2] \n" // t6 * 0x2AAA
"precrq.qb.ph $t8, $t0, $t2 \n" // |S3|S1|T3|T1|
"precrq.qb.ph $t8, $t8, $t4 \n" // |S3|T3|R3|R1|
"srl $t8, $t8, 8 \n" // |0|S3|T3|R3|
"raddu.w.qb $t8, $t8 \n" // S3 + T3 + R3
"addu $t7, $t7, $t8 \n"
"mul $t7, $t7, %[c1] \n" // t7 * 0x1C71
"sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
"sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
"sll $t4, $t4, 8 \n" // |R2|R1|R0|0|
"raddu.w.qb $t0, $t0 \n"
"raddu.w.qb $t2, $t2 \n"
"raddu.w.qb $t4, $t4 \n"
"addu $t0, $t0, $t2 \n"
"addu $t0, $t0, $t4 \n"
"mul $t0, $t0, %[c1] \n" // t0 * 0x1C71
"addiu %[src_ptr], %[src_ptr], 8 \n"
"addiu %[s1], %[s1], 8 \n"
"addiu %[s2], %[s2], 8 \n"
"addiu %[dst_width], %[dst_width], -3 \n"
"addiu %[dst_ptr], %[dst_ptr], 3 \n"
"srl $t6, $t6, 16 \n"
"srl $t7, $t7, 16 \n"
"srl $t0, $t0, 16 \n"
"sb $t6, -1(%[dst_ptr]) \n"
"sb $t7, -2(%[dst_ptr]) \n"
"bgtz %[dst_width], 1b \n"
" sb $t0, -3(%[dst_ptr]) \n"
".set pop \n"
: [src_ptr] "+r" (src_ptr),
[dst_ptr] "+r" (dst_ptr),
[s1] "+r" (s1),
[s2] "+r" (s2),
[dst_width] "+r" (dst_width)
: [c1] "r" (c1), [c2] "r" (c2)
: "t0", "t1", "t2", "t3", "t4",
"t5", "t6", "t7", "t8"
);
}
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,64 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0]))
struct FourCCAliasEntry {
uint32 alias;
uint32 canonical;
};
static const struct FourCCAliasEntry kFourCCAliases[] = {
{FOURCC_IYUV, FOURCC_I420},
{FOURCC_YU16, FOURCC_I422},
{FOURCC_YU24, FOURCC_I444},
{FOURCC_YUYV, FOURCC_YUY2},
{FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs
{FOURCC_HDYC, FOURCC_UYVY},
{FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
{FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
{FOURCC_DMB1, FOURCC_MJPG},
{FOURCC_BA81, FOURCC_BGGR}, // deprecated.
{FOURCC_RGB3, FOURCC_RAW },
{FOURCC_BGR3, FOURCC_24BG},
{FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB
{FOURCC_CM24, FOURCC_RAW }, // kCMPixelFormat_24RGB
{FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555
{FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565
{FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551
};
// TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB.
// {FOURCC_BGRA, FOURCC_ARGB}, // kCMPixelFormat_32BGRA
LIBYUV_API
uint32 CanonicalFourCC(uint32 fourcc) {
int i;
for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) {
if (kFourCCAliases[i].alias == fourcc) {
return kFourCCAliases[i].canonical;
}
}
// Not an alias, so return it as-is.
return fourcc;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

156
libs/libyuv/sync_chromium.py Executable file
View File

@ -0,0 +1,156 @@
#!/usr/bin/env python
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
"""Script to download a Chromium checkout into the workspace.
The script downloads a full Chromium Git clone and its DEPS.
The following environment variable can be used to alter the behavior:
* CHROMIUM_NO_HISTORY - If set to 1, a Git checkout with no history will be
downloaded. This is consumes less bandwidth and disk space but is known to be
slower in general if you have a high-speed connection.
After a successful sync has completed, a .last_sync_chromium file is written to
the chromium directory. While it exists, no more gclient sync operations will be
performed until the --target-revision changes or the SCRIPT_VERSION constant is
incremented. The file can be removed manually to force a new sync.
"""
import argparse
import os
import subprocess
import sys
# Bump this whenever the algorithm changes and you need bots/devs to re-sync,
# ignoring the .last_sync_chromium file
SCRIPT_VERSION = 4
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
CHROMIUM_NO_HISTORY = 'CHROMIUM_NO_HISTORY'
def _parse_gclient_dict():
gclient_dict = {}
try:
main_gclient = os.path.join(os.path.dirname(ROOT_DIR), '.gclient')
with open(main_gclient, 'rb') as deps_content:
exec(deps_content, gclient_dict)
except Exception as e:
print >> sys.stderr, 'error while parsing .gclient:', e
return gclient_dict
def get_cache_dir():
return _parse_gclient_dict().get('cache_dir')
def get_target_os_list():
return ','.join(_parse_gclient_dict().get('target_os', []))
def main():
CR_DIR = os.path.join(ROOT_DIR, 'chromium')
p = argparse.ArgumentParser()
p.add_argument('--target-revision', required=True,
help='The target chromium git revision [REQUIRED]')
p.add_argument('--chromium-dir', default=CR_DIR,
help=('The path to the chromium directory to sync '
'(default: %(default)r)'))
opts = p.parse_args()
opts.chromium_dir = os.path.abspath(opts.chromium_dir)
target_os_list = get_target_os_list()
# Do a quick check to see if we were successful last time to make runhooks
# sooper fast.
flag_file = os.path.join(opts.chromium_dir, '.last_sync_chromium')
flag_file_content = '\n'.join([
str(SCRIPT_VERSION),
opts.target_revision,
repr(target_os_list),
])
if (os.path.exists(os.path.join(opts.chromium_dir, 'src')) and
os.path.exists(flag_file)):
with open(flag_file, 'r') as f:
if f.read() == flag_file_content:
print 'Chromium already up to date: ', opts.target_revision
return 0
os.unlink(flag_file)
env = os.environ.copy()
# Avoid downloading NaCl toolchain as part of the Chromium hooks.
env.setdefault('GYP_DEFINES', '')
env['GYP_DEFINES'] += ' disable_nacl=1'
env['GYP_CHROMIUM_NO_ACTION'] = '1'
gclient_cmd = 'gclient.bat' if sys.platform.startswith('win') else 'gclient'
args = [
gclient_cmd, 'sync', '--force', '--revision', 'src@'+opts.target_revision
]
if os.environ.get('CHROME_HEADLESS') == '1':
# Running on a buildbot.
args.append('-vvv')
if sys.platform.startswith('win'):
cache_path = os.path.join(os.path.splitdrive(ROOT_DIR)[0] + os.path.sep,
'b', 'git-cache')
else:
cache_path = '/b/git-cache'
else:
# Support developers setting the cache_dir in .gclient.
cache_path = get_cache_dir()
# Allow for users with poor internet connections to download a Git clone
# without history (saves several gigs but is generally slower and doesn't work
# with the Git cache).
if os.environ.get(CHROMIUM_NO_HISTORY) == '1':
if cache_path:
print >> sys.stderr, (
'You cannot use "no-history" mode for syncing Chrome (i.e. set the '
'%s environment variable to 1) when you have cache_dir configured in '
'your .gclient.' % CHROMIUM_NO_HISTORY)
return 1
args.append('--no-history')
gclient_entries_file = os.path.join(opts.chromium_dir, '.gclient_entries')
else:
# Write a temporary .gclient file that has the cache_dir variable added.
gclientfile = os.path.join(opts.chromium_dir, '.gclient')
with open(gclientfile, 'rb') as spec:
spec = spec.read().splitlines()
spec[-1] = 'cache_dir = %r' % (cache_path,)
with open(gclientfile + '.tmp', 'wb') as f:
f.write('\n'.join(spec))
args += [
'--gclientfile', '.gclient.tmp',
'--delete_unversioned_trees', '--reset', '--upstream'
]
gclient_entries_file = os.path.join(opts.chromium_dir,
'.gclient.tmp_entries')
# To avoid gclient sync problems when DEPS entries have been removed we must
# wipe the gclient's entries file that contains cached URLs for all DEPS.
if os.path.exists(gclient_entries_file):
os.unlink(gclient_entries_file)
if target_os_list:
args += ['--deps=' + target_os_list]
print 'Running "%s" in %s' % (' '.join(args), opts.chromium_dir)
ret = subprocess.call(args, cwd=opts.chromium_dir, env=env)
if ret == 0:
with open(flag_file, 'wb') as f:
f.write(flag_file_content)
return ret
if __name__ == '__main__':
sys.exit(main())

68
libs/libyuv/third_party/gflags/BUILD.gn vendored Normal file
View File

@ -0,0 +1,68 @@
#
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
# This is a copy of WebRTC's BUILD.gn.
if (is_win) {
gflags_gen_arch_root = "gen/win"
} else {
gflags_gen_arch_root = "gen/posix"
}
config("gflags_config") {
include_dirs = [
"$gflags_gen_arch_root/include", # For configured files.
"src", # For everything else.
]
defines = [
# These macros exist so flags and symbols are properly exported when
# building DLLs. Since we don't build DLLs, we need to disable them.
"GFLAGS_DLL_DECL=",
"GFLAGS_DLL_DECLARE_FLAG=",
"GFLAGS_DLL_DEFINE_FLAG=",
]
# GN orders flags on a target before flags from configs. The default config
# adds -Wall, and this flag have to be after -Wall -- so they need to
# come from a config and can't be on the target directly.
if (is_clang) {
cflags = [ "-Wno-unused-local-typedef" ]
}
}
source_set("gflags") {
sources = [
"src/gflags.cc",
"src/gflags_completions.cc",
"src/gflags_reporting.cc",
]
if (is_win) {
sources += [ "src/windows/port.cc" ]
cflags = [
"/wd4005", # WIN32_LEAN_AND_MEAN.
"/wd4267", # Conversion from size_t to "type".
]
}
include_dirs = [ "$gflags_gen_arch_root/include/private" ] # For config.h
public_configs = [ ":gflags_config" ]
configs -= [ "//build/config/compiler:chromium_code" ]
configs += [ "//build/config/compiler:no_chromium_code" ]
if (is_clang) {
# TODO(andrew): Look into fixing this warning upstream:
# http://code.google.com/p/webrtc/issues/detail?id=760
configs -= [ "//build/config/clang:extra_warnings" ]
}
}

28
libs/libyuv/third_party/gflags/LICENSE vendored Normal file
View File

@ -0,0 +1,28 @@
Copyright (c) 2006, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,28 @@
URL: http://code.google.com/p/gflags/
Version: 2.0
License: New BSD
License File: LICENSE
Description:
The gflags package contains a library that implements commandline
flags processing. As such it's a replacement for getopt(). It has
increased flexibility, including built-in support for C++ types like
string, and the ability to define flags in the source file in which
they're used.
Local Modifications: None
How to update platform configuration files:
The gen/ directory contains pre-generated configuration header files.
Historically, all operating systems and architectures have generated
similar configurations except for Windows. This is why there's only
posix and win directories below gen/.
When rolling gflags to a newer version, it's a good idea to check if
new configuration files needs to be generated as well.
Do this by running ./configure in the newly checked out version of
gflags. Then diff the generated files with the ones below gen/.
If you notice a diff, update the files with the updated ones.
If you suspect platform dependend changes other than Windows, you'll
have to checkout gflags on the other platforms as well and run
./configure there too.

View File

@ -0,0 +1,592 @@
// Copyright (c) 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---
// Author: Ray Sidney
// Revamped and reorganized by Craig Silverstein
//
// This is the file that should be included by any file which declares
// or defines a command line flag or wants to parse command line flags
// or print a program usage message (which will include information about
// flags). Executive summary, in the form of an example foo.cc file:
//
// #include "foo.h" // foo.h has a line "DECLARE_int32(start);"
// #include "validators.h" // hypothetical file defining ValidateIsFile()
//
// DEFINE_int32(end, 1000, "The last record to read");
//
// DEFINE_string(filename, "my_file.txt", "The file to read");
// // Crash if the specified file does not exist.
// static bool dummy = RegisterFlagValidator(&FLAGS_filename,
// &ValidateIsFile);
//
// DECLARE_bool(verbose); // some other file has a DEFINE_bool(verbose, ...)
//
// void MyFunc() {
// if (FLAGS_verbose) printf("Records %d-%d\n", FLAGS_start, FLAGS_end);
// }
//
// Then, at the command-line:
// ./foo --noverbose --start=5 --end=100
//
// For more details, see
// doc/gflags.html
//
// --- A note about thread-safety:
//
// We describe many functions in this routine as being thread-hostile,
// thread-compatible, or thread-safe. Here are the meanings we use:
//
// thread-safe: it is safe for multiple threads to call this routine
// (or, when referring to a class, methods of this class)
// concurrently.
// thread-hostile: it is not safe for multiple threads to call this
// routine (or methods of this class) concurrently. In gflags,
// most thread-hostile routines are intended to be called early in,
// or even before, main() -- that is, before threads are spawned.
// thread-compatible: it is safe for multiple threads to read from
// this variable (when applied to variables), or to call const
// methods of this class (when applied to classes), as long as no
// other thread is writing to the variable or calling non-const
// methods of this class.
#ifndef GOOGLE_GFLAGS_H_
#define GOOGLE_GFLAGS_H_
#include <string>
#include <vector>
// We care a lot about number of bits things take up. Unfortunately,
// systems define their bit-specific ints in a lot of different ways.
// We use our own way, and have a typedef to get there.
// Note: these commands below may look like "#if 1" or "#if 0", but
// that's because they were constructed that way at ./configure time.
// Look at gflags.h.in to see how they're calculated (based on your config).
#if 1
#include <stdint.h> // the normal place uint16_t is defined
#endif
#if 1
#include <sys/types.h> // the normal place u_int16_t is defined
#endif
#if 1
#include <inttypes.h> // a third place for uint16_t or u_int16_t
#endif
namespace google {
#if 1 // the C99 format
typedef int32_t int32;
typedef uint32_t uint32;
typedef int64_t int64;
typedef uint64_t uint64;
#elif 1 // the BSD format
typedef int32_t int32;
typedef u_int32_t uint32;
typedef int64_t int64;
typedef u_int64_t uint64;
#elif 0 // the windows (vc7) format
typedef __int32 int32;
typedef unsigned __int32 uint32;
typedef __int64 int64;
typedef unsigned __int64 uint64;
#else
#error Do not know how to define a 32-bit integer quantity on your system
#endif
// TODO(kjellander): update generated .h's for new gflags.
// https://code.google.com/p/webrtc/issues/detail?id=2251
extern const char* VersionString();
extern void SetVersionString(const std::string& version);
// --------------------------------------------------------------------
// To actually define a flag in a file, use DEFINE_bool,
// DEFINE_string, etc. at the bottom of this file. You may also find
// it useful to register a validator with the flag. This ensures that
// when the flag is parsed from the commandline, or is later set via
// SetCommandLineOption, we call the validation function. It is _not_
// called when you assign the value to the flag directly using the = operator.
//
// The validation function should return true if the flag value is valid, and
// false otherwise. If the function returns false for the new setting of the
// flag, the flag will retain its current value. If it returns false for the
// default value, ParseCommandLineFlags() will die.
//
// This function is safe to call at global construct time (as in the
// example below).
//
// Example use:
// static bool ValidatePort(const char* flagname, int32 value) {
// if (value > 0 && value < 32768) // value is ok
// return true;
// printf("Invalid value for --%s: %d\n", flagname, (int)value);
// return false;
// }
// DEFINE_int32(port, 0, "What port to listen on");
// static bool dummy = RegisterFlagValidator(&FLAGS_port, &ValidatePort);
// Returns true if successfully registered, false if not (because the
// first argument doesn't point to a command-line flag, or because a
// validator is already registered for this flag).
bool RegisterFlagValidator(const bool* flag,
bool (*validate_fn)(const char*, bool));
bool RegisterFlagValidator(const int32* flag,
bool (*validate_fn)(const char*, int32));
bool RegisterFlagValidator(const int64* flag,
bool (*validate_fn)(const char*, int64));
bool RegisterFlagValidator(const uint64* flag,
bool (*validate_fn)(const char*, uint64));
bool RegisterFlagValidator(const double* flag,
bool (*validate_fn)(const char*, double));
bool RegisterFlagValidator(const std::string* flag,
bool (*validate_fn)(const char*, const std::string&));
// --------------------------------------------------------------------
// These methods are the best way to get access to info about the
// list of commandline flags. Note that these routines are pretty slow.
// GetAllFlags: mostly-complete info about the list, sorted by file.
// ShowUsageWithFlags: pretty-prints the list to stdout (what --help does)
// ShowUsageWithFlagsRestrict: limit to filenames with restrict as a substr
//
// In addition to accessing flags, you can also access argv[0] (the program
// name) and argv (the entire commandline), which we sock away a copy of.
// These variables are static, so you should only set them once.
struct CommandLineFlagInfo {
std::string name; // the name of the flag
std::string type; // the type of the flag: int32, etc
std::string description; // the "help text" associated with the flag
std::string current_value; // the current value, as a string
std::string default_value; // the default value, as a string
std::string filename; // 'cleaned' version of filename holding the flag
bool has_validator_fn; // true if RegisterFlagValidator called on flag
bool is_default; // true if the flag has the default value and
// has not been set explicitly from the cmdline
// or via SetCommandLineOption
const void* flag_ptr;
};
// Using this inside of a validator is a recipe for a deadlock.
// TODO(wojtekm) Fix locking when validators are running, to make it safe to
// call validators during ParseAllFlags.
// Also make sure then to uncomment the corresponding unit test in
// commandlineflags_unittest.sh
extern void GetAllFlags(std::vector<CommandLineFlagInfo>* OUTPUT);
// These two are actually defined in commandlineflags_reporting.cc.
extern void ShowUsageWithFlags(const char *argv0); // what --help does
extern void ShowUsageWithFlagsRestrict(const char *argv0, const char *restrict);
// Create a descriptive string for a flag.
// Goes to some trouble to make pretty line breaks.
extern std::string DescribeOneFlag(const CommandLineFlagInfo& flag);
// Thread-hostile; meant to be called before any threads are spawned.
extern void SetArgv(int argc, const char** argv);
// The following functions are thread-safe as long as SetArgv() is
// only called before any threads start.
extern const std::vector<std::string>& GetArgvs(); // all of argv as a vector
extern const char* GetArgv(); // all of argv as a string
extern const char* GetArgv0(); // only argv0
extern uint32 GetArgvSum(); // simple checksum of argv
extern const char* ProgramInvocationName(); // argv0, or "UNKNOWN" if not set
extern const char* ProgramInvocationShortName(); // basename(argv0)
// ProgramUsage() is thread-safe as long as SetUsageMessage() is only
// called before any threads start.
extern const char* ProgramUsage(); // string set by SetUsageMessage()
// --------------------------------------------------------------------
// Normally you access commandline flags by just saying "if (FLAGS_foo)"
// or whatever, and set them by calling "FLAGS_foo = bar" (or, more
// commonly, via the DEFINE_foo macro). But if you need a bit more
// control, we have programmatic ways to get/set the flags as well.
// These programmatic ways to access flags are thread-safe, but direct
// access is only thread-compatible.
// Return true iff the flagname was found.
// OUTPUT is set to the flag's value, or unchanged if we return false.
extern bool GetCommandLineOption(const char* name, std::string* OUTPUT);
// Return true iff the flagname was found. OUTPUT is set to the flag's
// CommandLineFlagInfo or unchanged if we return false.
extern bool GetCommandLineFlagInfo(const char* name,
CommandLineFlagInfo* OUTPUT);
// Return the CommandLineFlagInfo of the flagname. exit() if name not found.
// Example usage, to check if a flag's value is currently the default value:
// if (GetCommandLineFlagInfoOrDie("foo").is_default) ...
extern CommandLineFlagInfo GetCommandLineFlagInfoOrDie(const char* name);
enum FlagSettingMode {
// update the flag's value (can call this multiple times).
SET_FLAGS_VALUE,
// update the flag's value, but *only if* it has not yet been updated
// with SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef".
SET_FLAG_IF_DEFAULT,
// set the flag's default value to this. If the flag has not yet updated
// yet (via SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef")
// change the flag's current value to the new default value as well.
SET_FLAGS_DEFAULT
};
// Set a particular flag ("command line option"). Returns a string
// describing the new value that the option has been set to. The
// return value API is not well-specified, so basically just depend on
// it to be empty if the setting failed for some reason -- the name is
// not a valid flag name, or the value is not a valid value -- and
// non-empty else.
// SetCommandLineOption uses set_mode == SET_FLAGS_VALUE (the common case)
extern std::string SetCommandLineOption(const char* name, const char* value);
extern std::string SetCommandLineOptionWithMode(const char* name, const char* value,
FlagSettingMode set_mode);
// --------------------------------------------------------------------
// Saves the states (value, default value, whether the user has set
// the flag, registered validators, etc) of all flags, and restores
// them when the FlagSaver is destroyed. This is very useful in
// tests, say, when you want to let your tests change the flags, but
// make sure that they get reverted to the original states when your
// test is complete.
//
// Example usage:
// void TestFoo() {
// FlagSaver s1;
// FLAG_foo = false;
// FLAG_bar = "some value";
//
// // test happens here. You can return at any time
// // without worrying about restoring the FLAG values.
// }
//
// Note: This class is marked with __attribute__((unused)) because all the
// work is done in the constructor and destructor, so in the standard
// usage example above, the compiler would complain that it's an
// unused variable.
//
// This class is thread-safe.
class FlagSaver {
public:
FlagSaver();
~FlagSaver();
private:
class FlagSaverImpl* impl_; // we use pimpl here to keep API steady
FlagSaver(const FlagSaver&); // no copying!
void operator=(const FlagSaver&);
} __attribute__ ((unused));
// --------------------------------------------------------------------
// Some deprecated or hopefully-soon-to-be-deprecated functions.
// This is often used for logging. TODO(csilvers): figure out a better way
extern std::string CommandlineFlagsIntoString();
// Usually where this is used, a FlagSaver should be used instead.
extern bool ReadFlagsFromString(const std::string& flagfilecontents,
const char* prog_name,
bool errors_are_fatal); // uses SET_FLAGS_VALUE
// These let you manually implement --flagfile functionality.
// DEPRECATED.
extern bool AppendFlagsIntoFile(const std::string& filename, const char* prog_name);
extern bool SaveCommandFlags(); // actually defined in google.cc !
extern bool ReadFromFlagsFile(const std::string& filename, const char* prog_name,
bool errors_are_fatal); // uses SET_FLAGS_VALUE
// --------------------------------------------------------------------
// Useful routines for initializing flags from the environment.
// In each case, if 'varname' does not exist in the environment
// return defval. If 'varname' does exist but is not valid
// (e.g., not a number for an int32 flag), abort with an error.
// Otherwise, return the value. NOTE: for booleans, for true use
// 't' or 'T' or 'true' or '1', for false 'f' or 'F' or 'false' or '0'.
extern bool BoolFromEnv(const char *varname, bool defval);
extern int32 Int32FromEnv(const char *varname, int32 defval);
extern int64 Int64FromEnv(const char *varname, int64 defval);
extern uint64 Uint64FromEnv(const char *varname, uint64 defval);
extern double DoubleFromEnv(const char *varname, double defval);
extern const char *StringFromEnv(const char *varname, const char *defval);
// --------------------------------------------------------------------
// The next two functions parse commandlineflags from main():
// Set the "usage" message for this program. For example:
// string usage("This program does nothing. Sample usage:\n");
// usage += argv[0] + " <uselessarg1> <uselessarg2>";
// SetUsageMessage(usage);
// Do not include commandline flags in the usage: we do that for you!
// Thread-hostile; meant to be called before any threads are spawned.
extern void SetUsageMessage(const std::string& usage);
// Looks for flags in argv and parses them. Rearranges argv to put
// flags first, or removes them entirely if remove_flags is true.
// If a flag is defined more than once in the command line or flag
// file, the last definition is used. Returns the index (into argv)
// of the first non-flag argument.
// See top-of-file for more details on this function.
#ifndef SWIG // In swig, use ParseCommandLineFlagsScript() instead.
extern uint32 ParseCommandLineFlags(int *argc, char*** argv,
bool remove_flags);
#endif
// Calls to ParseCommandLineNonHelpFlags and then to
// HandleCommandLineHelpFlags can be used instead of a call to
// ParseCommandLineFlags during initialization, in order to allow for
// changing default values for some FLAGS (via
// e.g. SetCommandLineOptionWithMode calls) between the time of
// command line parsing and the time of dumping help information for
// the flags as a result of command line parsing. If a flag is
// defined more than once in the command line or flag file, the last
// definition is used. Returns the index (into argv) of the first
// non-flag argument. (If remove_flags is true, will always return 1.)
extern uint32 ParseCommandLineNonHelpFlags(int *argc, char*** argv,
bool remove_flags);
// This is actually defined in commandlineflags_reporting.cc.
// This function is misnamed (it also handles --version, etc.), but
// it's too late to change that now. :-(
extern void HandleCommandLineHelpFlags(); // in commandlineflags_reporting.cc
// Allow command line reparsing. Disables the error normally
// generated when an unknown flag is found, since it may be found in a
// later parse. Thread-hostile; meant to be called before any threads
// are spawned.
extern void AllowCommandLineReparsing();
// Reparse the flags that have not yet been recognized. Only flags
// registered since the last parse will be recognized. Any flag value
// must be provided as part of the argument using "=", not as a
// separate command line argument that follows the flag argument.
// Intended for handling flags from dynamically loaded libraries,
// since their flags are not registered until they are loaded.
// Returns the index (into the original argv) of the first non-flag
// argument. (If remove_flags is true, will always return 1.)
extern void ReparseCommandLineNonHelpFlags();
// Clean up memory allocated by flags. This is only needed to reduce
// the quantity of "potentially leaked" reports emitted by memory
// debugging tools such as valgrind. It is not required for normal
// operation, or for the perftools heap-checker. It must only be called
// when the process is about to exit, and all threads that might
// access flags are quiescent. Referencing flags after this is called
// will have unexpected consequences. This is not safe to run when
// multiple threads might be running: the function is thread-hostile.
extern void ShutDownCommandLineFlags();
// --------------------------------------------------------------------
// Now come the command line flag declaration/definition macros that
// will actually be used. They're kind of hairy. A major reason
// for this is initialization: we want people to be able to access
// variables in global constructors and have that not crash, even if
// their global constructor runs before the global constructor here.
// (Obviously, we can't guarantee the flags will have the correct
// default value in that case, but at least accessing them is safe.)
// The only way to do that is have flags point to a static buffer.
// So we make one, using a union to ensure proper alignment, and
// then use placement-new to actually set up the flag with the
// correct default value. In the same vein, we have to worry about
// flag access in global destructors, so FlagRegisterer has to be
// careful never to destroy the flag-values it constructs.
//
// Note that when we define a flag variable FLAGS_<name>, we also
// preemptively define a junk variable, FLAGS_no<name>. This is to
// cause a link-time error if someone tries to define 2 flags with
// names like "logging" and "nologging". We do this because a bool
// flag FLAG can be set from the command line to true with a "-FLAG"
// argument, and to false with a "-noFLAG" argument, and so this can
// potentially avert confusion.
//
// We also put flags into their own namespace. It is purposefully
// named in an opaque way that people should have trouble typing
// directly. The idea is that DEFINE puts the flag in the weird
// namespace, and DECLARE imports the flag from there into the current
// namespace. The net result is to force people to use DECLARE to get
// access to a flag, rather than saying "extern bool FLAGS_whatever;"
// or some such instead. We want this so we can put extra
// functionality (like sanity-checking) in DECLARE if we want, and
// make sure it is picked up everywhere.
//
// We also put the type of the variable in the namespace, so that
// people can't DECLARE_int32 something that they DEFINE_bool'd
// elsewhere.
class FlagRegisterer {
public:
FlagRegisterer(const char* name, const char* type,
const char* help, const char* filename,
void* current_storage, void* defvalue_storage);
};
extern bool FlagsTypeWarn(const char *name);
// If your application #defines STRIP_FLAG_HELP to a non-zero value
// before #including this file, we remove the help message from the
// binary file. This can reduce the size of the resulting binary
// somewhat, and may also be useful for security reasons.
extern const char kStrippedFlagHelp[];
}
#ifndef SWIG // In swig, ignore the main flag declarations
#if defined(STRIP_FLAG_HELP) && STRIP_FLAG_HELP > 0
// Need this construct to avoid the 'defined but not used' warning.
#define MAYBE_STRIPPED_HELP(txt) (false ? (txt) : ::google::kStrippedFlagHelp)
#else
#define MAYBE_STRIPPED_HELP(txt) txt
#endif
// Each command-line flag has two variables associated with it: one
// with the current value, and one with the default value. However,
// we have a third variable, which is where value is assigned; it's a
// constant. This guarantees that FLAG_##value is initialized at
// static initialization time (e.g. before program-start) rather than
// than global construction time (which is after program-start but
// before main), at least when 'value' is a compile-time constant. We
// use a small trick for the "default value" variable, and call it
// FLAGS_no<name>. This serves the second purpose of assuring a
// compile error if someone tries to define a flag named no<name>
// which is illegal (--foo and --nofoo both affect the "foo" flag).
#define DEFINE_VARIABLE(type, shorttype, name, value, help) \
namespace fL##shorttype { \
static const type FLAGS_nono##name = value; \
type FLAGS_##name = FLAGS_nono##name; \
type FLAGS_no##name = FLAGS_nono##name; \
static ::google::FlagRegisterer o_##name( \
#name, #type, MAYBE_STRIPPED_HELP(help), __FILE__, \
&FLAGS_##name, &FLAGS_no##name); \
} \
using fL##shorttype::FLAGS_##name
#define DECLARE_VARIABLE(type, shorttype, name) \
namespace fL##shorttype { \
extern type FLAGS_##name; \
} \
using fL##shorttype::FLAGS_##name
// For DEFINE_bool, we want to do the extra check that the passed-in
// value is actually a bool, and not a string or something that can be
// coerced to a bool. These declarations (no definition needed!) will
// help us do that, and never evaluate From, which is important.
// We'll use 'sizeof(IsBool(val))' to distinguish. This code requires
// that the compiler have different sizes for bool & double. Since
// this is not guaranteed by the standard, we check it with a
// compile-time assert (msg[-1] will give a compile-time error).
namespace fLB {
struct CompileAssert {};
typedef CompileAssert expected_sizeof_double_neq_sizeof_bool[
(sizeof(double) != sizeof(bool)) ? 1 : -1];
template<typename From> double IsBoolFlag(const From& from);
bool IsBoolFlag(bool from);
} // namespace fLB
#define DECLARE_bool(name) DECLARE_VARIABLE(bool, B, name)
#define DEFINE_bool(name, val, txt) \
namespace fLB { \
typedef ::fLB::CompileAssert FLAG_##name##_value_is_not_a_bool[ \
(sizeof(::fLB::IsBoolFlag(val)) != sizeof(double)) ? 1 : -1]; \
} \
DEFINE_VARIABLE(bool, B, name, val, txt)
#define DECLARE_int32(name) DECLARE_VARIABLE(::google::int32, I, name)
#define DEFINE_int32(name,val,txt) DEFINE_VARIABLE(::google::int32, I, name, val, txt)
#define DECLARE_int64(name) DECLARE_VARIABLE(::google::int64, I64, name)
#define DEFINE_int64(name,val,txt) DEFINE_VARIABLE(::google::int64, I64, name, val, txt)
#define DECLARE_uint64(name) DECLARE_VARIABLE(::google::uint64, U64, name)
#define DEFINE_uint64(name,val,txt) DEFINE_VARIABLE(::google::uint64, U64, name, val, txt)
#define DECLARE_double(name) DECLARE_VARIABLE(double, D, name)
#define DEFINE_double(name, val, txt) DEFINE_VARIABLE(double, D, name, val, txt)
// Strings are trickier, because they're not a POD, so we can't
// construct them at static-initialization time (instead they get
// constructed at global-constructor time, which is much later). To
// try to avoid crashes in that case, we use a char buffer to store
// the string, which we can static-initialize, and then placement-new
// into it later. It's not perfect, but the best we can do.
namespace fLS {
// The meaning of "string" might be different between now and when the
// macros below get invoked (e.g., if someone is experimenting with
// other string implementations that get defined after this file is
// included). Save the current meaning now and use it in the macros.
typedef std::string clstring;
inline clstring* dont_pass0toDEFINE_string(char *stringspot,
const char *value) {
return new(stringspot) clstring(value);
}
inline clstring* dont_pass0toDEFINE_string(char *stringspot,
const clstring &value) {
return new(stringspot) clstring(value);
}
inline clstring* dont_pass0toDEFINE_string(char *stringspot,
int value);
} // namespace fLS
#define DECLARE_string(name) namespace fLS { extern ::fLS::clstring& FLAGS_##name; } \
using fLS::FLAGS_##name
// We need to define a var named FLAGS_no##name so people don't define
// --string and --nostring. And we need a temporary place to put val
// so we don't have to evaluate it twice. Two great needs that go
// great together!
// The weird 'using' + 'extern' inside the fLS namespace is to work around
// an unknown compiler bug/issue with the gcc 4.2.1 on SUSE 10. See
// http://code.google.com/p/google-gflags/issues/detail?id=20
#define DEFINE_string(name, val, txt) \
namespace fLS { \
using ::fLS::clstring; \
static union { void* align; char s[sizeof(clstring)]; } s_##name[2]; \
clstring* const FLAGS_no##name = ::fLS:: \
dont_pass0toDEFINE_string(s_##name[0].s, \
val); \
static ::google::FlagRegisterer o_##name( \
#name, "string", MAYBE_STRIPPED_HELP(txt), __FILE__, \
s_##name[0].s, new (s_##name[1].s) clstring(*FLAGS_no##name)); \
extern clstring& FLAGS_##name; \
using fLS::FLAGS_##name; \
clstring& FLAGS_##name = *FLAGS_no##name; \
} \
using fLS::FLAGS_##name
#endif // SWIG
#endif // GOOGLE_GFLAGS_H_

View File

@ -0,0 +1,121 @@
// Copyright (c) 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// ---
// Author: Dave Nicponski
//
// Implement helpful bash-style command line flag completions
//
// ** Functional API:
// HandleCommandLineCompletions() should be called early during
// program startup, but after command line flag code has been
// initialized, such as the beginning of HandleCommandLineHelpFlags().
// It checks the value of the flag --tab_completion_word. If this
// flag is empty, nothing happens here. If it contains a string,
// however, then HandleCommandLineCompletions() will hijack the
// process, attempting to identify the intention behind this
// completion. Regardless of the outcome of this deduction, the
// process will be terminated, similar to --helpshort flag
// handling.
//
// ** Overview of Bash completions:
// Bash can be told to programatically determine completions for the
// current 'cursor word'. It does this by (in this case) invoking a
// command with some additional arguments identifying the command
// being executed, the word being completed, and the previous word
// (if any). Bash then expects a sequence of output lines to be
// printed to stdout. If these lines all contain a common prefix
// longer than the cursor word, bash will replace the cursor word
// with that common prefix, and display nothing. If there isn't such
// a common prefix, bash will display the lines in pages using 'more'.
//
// ** Strategy taken for command line completions:
// If we can deduce either the exact flag intended, or a common flag
// prefix, we'll output exactly that. Otherwise, if information
// must be displayed to the user, we'll take the opportunity to add
// some helpful information beyond just the flag name (specifically,
// we'll include the default flag value and as much of the flag's
// description as can fit on a single terminal line width, as specified
// by the flag --tab_completion_columns). Furthermore, we'll try to
// make bash order the output such that the most useful or relevent
// flags are the most likely to be shown at the top.
//
// ** Additional features:
// To assist in finding that one really useful flag, substring matching
// was implemented. Before pressing a <TAB> to get completion for the
// current word, you can append one or more '?' to the flag to do
// substring matching. Here's the semantics:
// --foo<TAB> Show me all flags with names prefixed by 'foo'
// --foo?<TAB> Show me all flags with 'foo' somewhere in the name
// --foo??<TAB> Same as prior case, but also search in module
// definition path for 'foo'
// --foo???<TAB> Same as prior case, but also search in flag
// descriptions for 'foo'
// Finally, we'll trim the output to a relatively small number of
// flags to keep bash quiet about the verbosity of output. If one
// really wanted to see all possible matches, appending a '+' to the
// search word will force the exhaustive list of matches to be printed.
//
// ** How to have bash accept completions from a binary:
// Bash requires that it be informed about each command that programmatic
// completion should be enabled for. Example addition to a .bashrc
// file would be (your path to gflags_completions.sh file may differ):
/*
$ complete -o bashdefault -o default -o nospace -C \
'/usr/local/bin/gflags_completions.sh --tab_completion_columns $COLUMNS' \
time env binary_name another_binary [...]
*/
// This would allow the following to work:
// $ /path/to/binary_name --vmodule<TAB>
// Or:
// $ ./bin/path/another_binary --gfs_u<TAB>
// (etc)
//
// Sadly, it appears that bash gives no easy way to force this behavior for
// all commands. That's where the "time" in the above example comes in.
// If you haven't specifically added a command to the list of completion
// supported commands, you can still get completions by prefixing the
// entire command with "env".
// $ env /some/brand/new/binary --vmod<TAB>
// Assuming that "binary" is a newly compiled binary, this should still
// produce the expected completion output.
#ifndef GOOGLE_GFLAGS_COMPLETIONS_H_
#define GOOGLE_GFLAGS_COMPLETIONS_H_
namespace google {
void HandleCommandLineCompletions(void);
}
#endif // GOOGLE_GFLAGS_COMPLETIONS_H_

Some files were not shown because too many files have changed in this diff Show More