Projects
Essentials
kvazaar
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 13
View file
kvazaar.changes
Changed
@@ -1,4 +1,70 @@ ------------------------------------------------------------------- +Fri Nov 17 14:01:40 UTC 2017 - aloisio@gmx.com + +- Update to version 1.2.0 + Features: + * Intra prediction mode encryption with + --crypto=intra_pred_modes (2b8ce5e) + * Adaptive QP for 360° video with --erp-aqp (26adef4) + * New selection algorithm for --owf=auto and --threads=auto + (8c4a347) + * Added an option to set the encryption key using --key (2e13091) + * Added an option to limit SAO to band offset or edge offset + only with --sao=band and --sao=edge (8674c0f) + Optimization: + * Reduced number of intra modes checked when using --rd=2 + (2cad317) + * Reduced inter-frame CTU dependencies caused by SAO (050e90d) + * Changed to a faster calculation for coefficient costs when + using --rd=0 (1ead9c0) + Fixes: + * Fixed long motion vectors not getting clipped (#158, 85e2a40) + * Fixed order of pictures in reconstruction debug output when + --gop=8 is used (#101, aae141f) + * Fixed a use-after-free when encoding very few frames with + --gop=8 (#161, 2991962) + * Fixed a crash when video size is not a multiple of the + smallest CU size (2f2405d) + * Fixed invalid bitstream when QP is too large (382636d) + * Fixed a race condition causing a deadlock (5f8e17d) + * Fixed a memory leak in encryption (8654b48) + * Fixed I-frames not being IRAP frames when using GOP (00c9f52, + 841597e) + * Fixed computing inter and intra costs with different metrics + (afc13f1) + * Fixed reliance on undefined behavior (b41f0fa, 924cf85) + * Fixed --mv-constraint=frametilemargin constraining motion + vectors too much (409d211) + * Fixed using --bipred with --tmvp (#160, 9974380) + User Interface: + * Changed type of kvz_config.roi.dqps from uint8_t* to int8_t. + Delta QP values for --roi may now be negative. (79cb3a2) + * Changed PSNR display format (20d6444) + Building: + * Default to no -Werror. Run configure with --enable-werror to + enable it. (033bc6b) + * make check now runs valgrind tests that used to only run on + Travis. Programs ffmpeg, valgrind and TAppDecoderStatic should + be found from $PATH (6bbe5e1) + Refactoring: + * Removed duplicate code in inter MVP and merge candidate + selection (4fb0783) + * Removed duplicate code in intra reconstruction for luma and + chroma (e944416) + * Changed functions for writing the CU tree bitstream to use + luma pixel coordinates (610c91b, f5eef7f) + * Removed duplicate code in functions for writing intra CU + bitstream with and without encryption (525a518) + * Removed duplicate code in helper functions in search.c + (2c73476) + * Gathered function parameters for inter search functions into a + single struct (2fa3d82) + +- Refreshed kvazaar.memset.patch + +- Bumped library version to 4 + +------------------------------------------------------------------- Wed Feb 22 12:34:40 UTC 2017 - scarabeus@opensuse.org - Bit of spec cleanup
View file
kvazaar.spec
Changed
@@ -18,9 +18,9 @@ %define libname libkvazaar -%define libmver 3 +%define libmver 4 Name: kvazaar -Version: 1.1.0 +Version: 1.2.0 Release: 0 Summary: HEVC encoder License: LGPL-2.1
View file
kvazaar.memset.patch
Changed
@@ -1,10 +1,8 @@ -gcc7-7.1.1+r248152-1.2 -[ 112s] rdo.c: In function 'kvz_rdoq': -[ 112s] rdo.c:563:14: error: 'memset' used with length equal to number of elements without multiplication by element size [-Werror=memset-elt-size] -[ 112s] case 16: memset(sig_coeffgroup_flag, 0, 16 * sizeof(sig_coeffgroup_flag[0])); break; ---- a/src/rdo.c -+++ b/src/rdo.c -@@ -555,6 +555,7 @@ void kvz_rdoq(encoder_state_t * const st +Index: kvazaar-1.2.0/src/rdo.c +=================================================================== +--- kvazaar-1.2.0.orig/src/rdo.c ++++ kvazaar-1.2.0/src/rdo.c +@@ -593,6 +593,7 @@ void kvz_rdoq(encoder_state_t * const st uint32_t cg_num = width * height >> 4; @@ -12,8 +10,8 @@ // Explicitly tell the only possible numbers of elements to be zeroed. // Hope the compiler is able to utilize this information. switch (cg_num) { -@@ -564,6 +565,9 @@ void kvz_rdoq(encoder_state_t * const st - case 64: memset(sig_coeffgroup_flag, 0, 64 * sizeof(sig_coeffgroup_flag[0])); break; +@@ -602,6 +603,9 @@ void kvz_rdoq(encoder_state_t * const st + case 64: FILL_ARRAY(sig_coeffgroup_flag, 0, 64); break; default: assert(0 && "There should be 1, 4, 16 or 64 coefficient groups"); } +#else
View file
kvazaar-1.1.0.tar.gz/.travis-install.sh
Deleted
@@ -1,12 +0,0 @@ -#!/bin/sh -set -ev - -if [ -n "$VALGRIND_TEST" ]; then - wget http://ultravideo.cs.tut.fi/ffmpeg-release-32bit-static.tar.xz - 7z x ffmpeg-release-32bit-static.tar.xz - 7z x ffmpeg-release-32bit-static.tar - chmod +x ./ffmpeg-2.6.3-32bit-static/ffmpeg - ./ffmpeg-2.6.3-32bit-static/ffmpeg -f lavfi -i "mandelbrot=size=${TEST_DIM}:end_pts=10" -vframes $TEST_FRAMES -pix_fmt yuv420p mandelbrot_${TEST_DIM}.yuv - wget http://ultravideo.cs.tut.fi/ubuntu-12.04-hmdec-16.10.tgz - tar -xzvf ubuntu-12.04-hmdec-16.10.tgz -fi
View file
kvazaar-1.1.0.tar.gz/.travis-script.sh
Deleted
@@ -1,21 +0,0 @@ -#!/bin/sh -set -ev - -./autogen.sh -./configure $KVZ_CONFIGURE_ARGS -make --jobs=2 V=1 - -if [ -n "$VALGRIND_TEST" ]; then - libtool execute valgrind --leak-check=full --error-exitcode=1 -- \ - src/kvazaar -i mandelbrot_${TEST_DIM}.yuv --input-res=${TEST_DIM} \ - -o test.265 $VALGRIND_TEST - ./hmdec-16.10 -b test.265 -elif [ -n "$EXPECTED_STATUS" ]; then - set +e - libtool execute src/kvazaar $PARAMS - EXIT_STATUS=$? - set -e - [ "$EXIT_STATUS" = "$EXPECTED_STATUS" ] -else - make check -fi
View file
kvazaar-1.1.0.tar.gz/.gitignore -> kvazaar-1.2.0.tar.gz/.gitignore
Changed
@@ -41,6 +41,7 @@ *.la *.lo *.o +*.trs *.log .kdev4
View file
kvazaar-1.2.0.tar.gz/.travis-install.bash
Added
@@ -0,0 +1,25 @@ +#!/bin/bash + +# Download FFmpeg and HM decoder and place them in $PATH. + +set -euvo pipefail + +mkdir -p "${HOME}/bin" + +wget http://ultravideo.cs.tut.fi/ffmpeg-release-32bit-static.tar.xz +sha256sum -c - << EOF +4d3302ba0415e08ca10ca578dcd1f0acc48fadc9b803718283c8c670350c903e ffmpeg-release-32bit-static.tar.xz +EOF +tar xf ffmpeg-release-32bit-static.tar.xz +cp ffmpeg-2.6.3-32bit-static/ffmpeg "${HOME}/bin/ffmpeg" +chmod +x "${HOME}/bin/ffmpeg" + +wget http://ultravideo.cs.tut.fi/ubuntu-12.04-hmdec-16.10.tgz +sha256sum -c - << EOF +e00d61dd031a14aab1a03c0b23df315b8f6ec3fab66a0e2ae2162496153ccf92 ubuntu-12.04-hmdec-16.10.tgz +EOF +tar xf ubuntu-12.04-hmdec-16.10.tgz +cp hmdec-16.10 "${HOME}/bin/TAppDecoderStatic" +chmod +x "${HOME}/bin/TAppDecoderStatic" + +export PATH="${HOME}/bin:${PATH}"
View file
kvazaar-1.1.0.tar.gz/.travis.yml -> kvazaar-1.2.0.tar.gz/.travis.yml
Changed
@@ -1,137 +1,43 @@ language: c -env: - global: - - TEST_DIM=264x130 - - TEST_FRAMES=10 - -# Use container based infrastructure +# Use container based infrastructure. sudo: false -# Use this the global requirements list for valgrind tests, because those are the most numerous. addons: apt: sources: - - ubuntu-toolchain-r-test + - ubuntu-toolchain-r-test packages: - - autoconf - - libtool - - p7zip-full # to uncompress our own ffmpeg binary - - valgrind - - yasm + - autoconf + - gcc-4.8 + - libtool + - valgrind + - yasm matrix: fast_finish: true - + include: - compiler: clang - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - autoconf - - libtool - - yasm - - compiler: gcc-4.8 - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - autoconf - - gcc-4.8 - - libtool - - yasm # We have some Mac specific code and Mac sometimes has odd build issues. - os: osx compiler: clang # gcc is actually clang on Travis OS X - - # Check for external symbols without kvz_ prefix. - - compiler: gcc-4.8 + install: true script: - ./autogen.sh - - ./configure && make - - (! nm -go --defined-only src/.libs/libkvazaar.a | grep -v ' kvz_') || (echo 'ERROR Only symbols prefixed with kvz_ should be exported from libkvazaar.'; false) - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - autoconf - - gcc-4.8 - - libtool - - yasm - - # Tests trying to use invalid input dimensions - - env: EXPECTED_STATUS=1 PARAMS="-i src/kvazaar --input-res=1x65 -o /dev/null" - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - autoconf - - libtool - - yasm + - ./configure --enable-werror + - make --jobs=2 V=1 - # These valgrind tests are slow, so they are performed with the minimum - # number of small frames and fast settings. - - # Tests for interlace - - env: VALGRIND_TEST="--source-scan-type=tff -p0 --preset=ultrafast --threads=2 --owf=1 --wpp" - - # Tests for owf, wpp and tiles. There is lots of separate branches of - # code related to owf=0 and owf!=0, which is why all permutations are - # tried. - - env: VALGRIND_TEST="-p4 -r1 --owf=1 --threads=0 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p4 -r1 --owf=0 --threads=0 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p4 -r2 --owf=1 --threads=2 --wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p4 -r2 --owf=0 --threads=2 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p4 -r2 --owf=1 --threads=2 --tiles-height-split=u2 --no-wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p4 -r2 --owf=0 --threads=2 --tiles-height-split=u2 --no-wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - # Tests for rdoq, sao, deblock and signhide and subme. - - env: VALGRIND_TEST="-p0 -r1 --threads=2 --wpp --owf=1 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=1 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p0 -r1 --threads=2 --wpp --owf=1 --rd=0 --no-rdoq --no-signhide --subme=0" - - env: VALGRIND_TEST="-p0 -r1 --threads=2 --wpp --owf=1 --rd=0 --no-deblock --no-sao --subme=0" - - # Tests for all-intra. - - env: VALGRIND_TEST="-p1 --threads=2 --owf=1 --rd=1 --no-rdoq --no-deblock --no-sao --no-signhide" - - env: VALGRIND_TEST="-p1 --threads=2 --owf=1 --rd=2 --no-rdoq --no-deblock --no-sao --no-signhide --no-transform-skip" - - # Tests for SMP and AMP blocks. - - env: TEST_FRAMES=4 VALGRIND_TEST="--threads=2 --owf=1 --wpp --smp" - - env: TEST_FRAMES=4 VALGRIND_TEST="--threads=2 --owf=1 --wpp --amp" - - env: TEST_FRAMES=4 VALGRIND_TEST="--threads=2 --owf=1 --wpp --smp --amp" - - # Tests for rate control - - env: VALGRIND_TEST="--bitrate=500000 -p0 -r1 --owf=1 --threads=2 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - # Tests for GOP, with and without OWF. - - env: TEST_FRAMES=20 VALGRIND_TEST="--gop=8 -p0 --threads=2 --wpp --owf=1 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: TEST_FRAMES=10 VALGRIND_TEST="--gop=8 -p0 --threads=2 --wpp --owf=4 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: TEST_FRAMES=20 VALGRIND_TEST="--gop=8 -p0 --threads=2 --wpp --owf=0 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - # Tests for --mv-constraint - - env: VALGRIND_TEST="--threads=2 --owf=1 --preset=ultrafast --pu-depth-inter=0-3 --mv-constraint=frametilemargin" - - env: VALGRIND_TEST="--threads=2 --owf=1 --preset=ultrafast --subme=4 --mv-constraint=frametilemargin" - - # Tests for --slices - - env: TEST_DIM=512x256 VALGRIND_TEST="--threads=2 --owf=1 --preset=ultrafast --tiles=2x2 --slices=tiles" - - env: VALGRIND_TEST="--threads=2 --owf=1 --preset=ultrafast --slices=wpp" - - # Test weird shapes. - - env: TEST_DIM=16x16 VALGRIND_TEST="--threads=2 --owf=1 --preset=veryslow" - - env: TEST_DIM=256x16 VALGRIND_TEST="--threads=2 --owf=1 --preset=veryslow" - - env: TEST_DIM=16x256 VALGRIND_TEST="--threads=2 --owf=1 --preset=veryslow" - -install: - - source .travis-install.sh +install: bash .travis-install.bash script: - - source .travis-script.sh - + - ./autogen.sh + - ./configure --enable-werror + - make --jobs=2 V=1 + - make check VERBOSE=1 + after_script: - - set +e # Disable errors to work around Travis not knowing how to fix their stuff. + # Disable errors to work around Travis not knowing how to fix their stuff. + - set +e
View file
kvazaar-1.1.0.tar.gz/README.md -> kvazaar-1.2.0.tar.gz/README.md
Changed
@@ -100,6 +100,8 @@ delta QP values in raster order. The delta QP map can be any size or aspect ratio, and will be mapped to LCU's. + --(no-)erp-aqp : Use adaptive QP for 360 video with + equirectangular projection Compression tools: --deblock [<beta:tc>] : Deblocking @@ -226,26 +228,26 @@ placebo. The effects of the presets are listed in the following table, where the names have been abbreviated to fit the layout in GitHub. - | 0-uf | 1-sf | 2-vf | 3-fr | 4-f | 5-m | 6-s | 7-sr | 8-vs | 9-p --------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- -rd | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 -pu-depth-intra | 2-3 | 2-3 | 2-3 | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 1-4 | 1-4 -pu-depth-inter | 2-3 | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 1-3 | 0-3 | 0-3 | 0-3 -me | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | tz -ref | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 3 | 4 -deblock | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 -signhide | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 -subme | 0 | 0 | 2 | 2 | 4 | 4 | 4 | 4 | 4 | 4 -sao | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 -rdoq | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 -rdoq-skip | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 -transform-skip | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 -mv-rdo | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 -full-intra-search | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 -smp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 -amp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 -cu-split-termination | zero | zero | zero | zero | zero | zero | zero | zero | zero | off -me-early-termination | sens. | sens. | sens. | sens. | on | on | on | on | on | off +| | 0-uf | 1-sf | 2-vf | 3-fr | 4-f | 5-m | 6-s | 7-sr | 8-vs | 9-p | +| -------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | +| rd | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| pu-depth-intra | 2-3 | 2-3 | 2-3 | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 1-4 | 1-4 | +| pu-depth-inter | 2-3 | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 1-3 | 0-3 | 0-3 | 0-3 | +| me | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | tz | +| ref | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 3 | 4 | +| deblock | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| signhide | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | +| subme | 0 | 0 | 2 | 2 | 4 | 4 | 4 | 4 | 4 | 4 | +| sao | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| rdoq | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | +| rdoq-skip | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | +| transform-skip | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mv-rdo | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| full-intra-search | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| smp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cu-split-termination | zero | zero | zero | zero | zero | zero | zero | zero | zero | off | +| me-early-termination | sens. | sens. | sens. | sens. | on | on | on | on | on | off | ## Kvazaar library
View file
kvazaar-1.1.0.tar.gz/build/kvazaar_tests/kvazaar_tests.vcxproj -> kvazaar-1.2.0.tar.gz/build/kvazaar_tests/kvazaar_tests.vcxproj
Changed
@@ -97,6 +97,7 @@ </ProjectReference> </ItemGroup> <ItemGroup> + <ClCompile Include="..\..\tests\coeff_sum_tests.c" /> <ClCompile Include="..\..\tests\dct_tests.c" /> <ClCompile Include="..\..\tests\test_strategies.c" /> <ClCompile Include="..\..\tests\intra_sad_tests.c" />
View file
kvazaar-1.1.0.tar.gz/build/kvazaar_tests/kvazaar_tests.vcxproj.filters -> kvazaar-1.2.0.tar.gz/build/kvazaar_tests/kvazaar_tests.vcxproj.filters
Changed
@@ -39,6 +39,9 @@ <ClCompile Include="..\..\tests\dct_tests.c"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="..\..\tests\coeff_sum_tests.c"> + <Filter>Source Files</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <ClInclude Include="..\..\tests\sad_tests.h">
View file
kvazaar-1.1.0.tar.gz/build/yasm/vsyasm.props -> kvazaar-1.2.0.tar.gz/build/yasm/vsyasm.props
Changed
@@ -9,16 +9,23 @@ <YASMDependsOn Condition="'$(ConfigurationType)' != 'Makefile'">_SelectedFiles;$(YASMDependsOn)</YASMDependsOn> </PropertyGroup> + <!-- Object format name for vsyasm must be in lower case. --> + <PropertyGroup Condition="'$(Platform)' == 'Win32'"> + <YASMFormat>win32</YASMFormat> + </PropertyGroup> + <PropertyGroup Condition="'$(Platform)' == 'x64'"> + <YASMFormat>win64</YASMFormat> + </PropertyGroup> <ItemDefinitionGroup> <YASM> <Debug>False</Debug> <ObjectFile>$(IntDir)</ObjectFile> <PreProc>0</PreProc> <Parser>0</Parser> - <CommandLineTemplate>vsyasm.exe -Xvc -f $(Platform) [AllOptions] [AdditionalOptions] [Inputs]</CommandLineTemplate> + <CommandLineTemplate>vsyasm.exe -Xvc -f $(YASMFormat) [AllOptions] [AdditionalOptions] [Inputs]</CommandLineTemplate> <Outputs>%(ObjectFile)</Outputs> <ExecutionDescription>Assembling %(Filename)%(Extension)</ExecutionDescription> <ShowOnlyRuleProperties>false</ShowOnlyRuleProperties> </YASM> </ItemDefinitionGroup> -</Project> \ No newline at end of file +</Project>
View file
kvazaar-1.1.0.tar.gz/configure.ac -> kvazaar-1.2.0.tar.gz/configure.ac
Changed
@@ -22,8 +22,8 @@ # - Increment when making new releases and major or minor was not changed since last release. # # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html -ver_major=3 -ver_minor=15 +ver_major=4 +ver_minor=0 ver_release=0 # Prevents configure from adding a lot of defines to the CFLAGS @@ -32,7 +32,7 @@ AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) -AM_INIT_AUTOMAKE([-Wall -Werror dist-bzip2 dist-xz foreign subdir-objects]) +AM_INIT_AUTOMAKE([-Wall dist-bzip2 dist-xz foreign subdir-objects]) AM_SILENT_RULES([yes]) AC_PROG_CC @@ -56,6 +56,10 @@ KVZ_CFLAGS="-Wall -Wtype-limits -Wvla -I$srcdir/src -I$srcdir/src/extras -ftree-vectorize -fvisibility=hidden" CFLAGS="$KVZ_CFLAGS $CFLAGS" +AC_SEARCH_LIBS([log], [m c], [], [exit 1]) +AC_SEARCH_LIBS([pow], [m c], [], [exit 1]) +AC_SEARCH_LIBS([sqrt], [m c], [], [exit 1]) + AC_ARG_WITH([cryptopp], AS_HELP_STRING([--with-cryptopp], [Build with cryptopp Enables selective encryption.])) @@ -76,21 +80,24 @@ CPPFLAGS="-DKVZ_DLL_EXPORTS $CPPFLAGS" -AC_SEARCH_LIBS([log], [m c], [], [exit 1]) -AC_SEARCH_LIBS([pow], [m c], [], [exit 1]) -AC_SEARCH_LIBS([sqrt], [m c], [], [exit 1]) - +# We need to force AX_PTHREAD to check -pthread -lpthread since otherwise +# it only outputs -pthread for GCC. Without -lpthread GCC does not link the +# shared library against the pthread library (even though it does link the +# executable). +PTHREAD_CFLAGS=-pthread +PTHREAD_LIBS=-lpthread # This does workarounds for pthreads on various compilers. -AX_PTHREAD +AX_PTHREAD([],[AC_MSG_ERROR([POSIX threads not found])]) + CFLAGS="$PTHREAD_CFLAGS $CFLAGS" LIBS="$PTHREAD_LIBS $LIBS" CC="$PTHREAD_CC" -# --disable-werror -AC_ARG_ENABLE([werror], [AS_HELP_STRING([--disable-werror], [don't treat warnings as errors [no]])], - [], [CFLAGS="-Werror $CFLAGS"] +# --enable-werror +AC_ARG_ENABLE([werror], [AS_HELP_STRING([--enable-werror], [treat warnings as errors [no]])], + [CFLAGS="-Werror $CFLAGS"], [] )
View file
kvazaar-1.1.0.tar.gz/doc/kvazaar.1 -> kvazaar-1.2.0.tar.gz/doc/kvazaar.1
Changed
@@ -1,4 +1,4 @@ -.TH KVAZAAR "1" "February 2017" "kvazaar v1.1.0" "User Commands" +.TH KVAZAAR "1" "November 2017" "kvazaar v1.2.0" "User Commands" .SH NAME kvazaar \- open source HEVC encoder .SH SYNOPSIS @@ -131,6 +131,10 @@ delta QP values in raster order. The delta QP map can be any size or aspect ratio, and will be mapped to LCU's. +.TP +\fB\-\-(no\-)erp\-aqp +Use adaptive QP for 360 video with +equirectangular projection .SS "Compression tools:" .TP
View file
kvazaar-1.1.0.tar.gz/src/Makefile.am -> kvazaar-1.2.0.tar.gz/src/Makefile.am
Changed
@@ -29,10 +29,21 @@ cli.c \ yuv_io.c \ yuv_io.h + kvazaar_LDADD = libkvazaar.la $(LIBS) kvazaar_CPPFLAGS = -DKVZ_VERSION="`$(srcdir)/../tools/version.sh`" +if USE_CRYPTOPP +kvazaar_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +else +kvazaar_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +endif + libkvazaar_la_SOURCES = \ bitstream.c \ bitstream.h \ @@ -144,15 +155,21 @@ libsse2.la \ libsse41.la +libkvazaar_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-number $(KVZ_API_VERSION) + if USE_CRYPTOPP libkvazaar_la_SOURCES += \ extras/crypto.h \ extras/crypto.cpp +libkvazaar_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) \ + $(libkvazaar_la_LDFLAGS) $(LDFLAGS) -o $@ +else +libkvazaar_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libkvazaar_la_LDFLAGS) $(LDFLAGS) -o $@ endif -libkvazaar_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-number $(KVZ_API_VERSION) - - libaltivec_la_SOURCES = \ strategies/altivec/picture-altivec.c \ strategies/altivec/picture-altivec.h @@ -170,7 +187,6 @@ strategies/avx2/quant-avx2.h \ strategies/avx2/sao-avx2.c \ strategies/avx2/sao-avx2.h - libsse2_la_SOURCES = \ strategies/sse2/picture-sse2.c \
View file
kvazaar-1.1.0.tar.gz/src/cabac.c -> kvazaar-1.2.0.tar.gz/src/cabac.c
Changed
@@ -297,9 +297,9 @@ //m_pcBinIf->encodeBinsEP(Suffix, r_param); if(r_param==1) { if(!(( base_level ==2 )&& (codeNumber==4 || codeNumber==5) ) ) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 1); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 1; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 1, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 1); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 1; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 1, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 1); } else { CABAC_BINS_EP(cabac, Suffix, 1, "coeff_abs_level_remaining"); @@ -309,65 +309,65 @@ else if(r_param==2) { if( base_level ==1) { - uint32_t key =ff_get_key(&state->tile->dbs_g, 2); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 3; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 2, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 2); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 3; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 2, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 2); } else if( base_level ==2) { if(codeNumber<=7 || codeNumber>=12) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 2); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 3; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 2, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 2); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 3; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 2, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 2); } else if(codeNumber<10) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 1); - state->tile->m_prev_pos = (( (Suffix&1) + ( state->tile->m_prev_pos^key )) & 1); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 2, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 1); + state->crypto_prev_pos = (( (Suffix&1) + ( state->crypto_prev_pos^key )) & 1); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 2, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 2); } else CABAC_BINS_EP(cabac, Suffix, 2, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(Suffix, 2); } else { //base_level=3 if(codeNumber<=7 || codeNumber>11) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 2); - state->tile->m_prev_pos = (Suffix + ( state->tile->m_prev_pos^key ) ) & 3; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 2, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 2); + state->crypto_prev_pos = (Suffix + ( state->crypto_prev_pos^key ) ) & 3; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 2, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 2); } else { - uint32_t key = ff_get_key(&state->tile->dbs_g, 1); - state->tile->m_prev_pos = ((Suffix&2))+(( (Suffix&1) + ( state->tile->m_prev_pos^key)) & 1); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 2, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 1); + state->crypto_prev_pos = ((Suffix&2))+(( (Suffix&1) + ( state->crypto_prev_pos^key)) & 1); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 2, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 2); } } } else if(r_param==3) { if( base_level ==1) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 3); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 7; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 3); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 7; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } else if( base_level ==2) { if(codeNumber<=15 || codeNumber>23) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 3); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 7; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 3); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 7; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } else if(codeNumber<=19){ - uint32_t key = ff_get_key(&state->tile->dbs_g, 2); - state->tile->m_prev_pos = ((Suffix&4))+(( (Suffix&3) + (state->tile->m_prev_pos^key )) & 3); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 2); + state->crypto_prev_pos = ((Suffix&4))+(( (Suffix&3) + (state->crypto_prev_pos^key )) & 3); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } else if(codeNumber<=21){ - uint32_t key = ff_get_key(&state->tile->dbs_g, 1); - state->tile->m_prev_pos = 4+(( (Suffix&1) + ( state->tile->m_prev_pos^key )) & 1); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 1); + state->crypto_prev_pos = 4+(( (Suffix&1) + ( state->crypto_prev_pos^key )) & 1); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } else CABAC_BINS_EP(cabac, Suffix, 3, "coeff_abs_level_remaining"); @@ -376,82 +376,82 @@ CABAC_BINS_EP(cabac, Suffix, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(Suffix, 3); if(codeNumber<=15 || codeNumber>23) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 3); - state->tile->m_prev_pos = (Suffix + ( state->tile->m_prev_pos^key ) ) & 7; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 3); + state->crypto_prev_pos = (Suffix + ( state->crypto_prev_pos^key ) ) & 7; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } else if(codeNumber<=19) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 2); - state->tile->m_prev_pos = (( (Suffix&3) + ( state->tile->m_prev_pos^key )) &3); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 2); + state->crypto_prev_pos = (( (Suffix&3) + ( state->crypto_prev_pos^key )) &3); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } else if(codeNumber<=23) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 1); - state->tile->m_prev_pos = (Suffix&6)+(( (Suffix&1) + (state->tile->m_prev_pos^key )) & 1); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 1); + state->crypto_prev_pos = (Suffix&6)+(( (Suffix&1) + (state->crypto_prev_pos^key )) & 1); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } } } else if(r_param==4) { if( base_level ==1) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 4); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 15; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 4, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 4); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 15; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 4, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 4); } else if( base_level ==2) { if(codeNumber<=31 || codeNumber>47) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 4); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 15; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, r_param, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 4); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 15; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, r_param, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, r_param); } else if(codeNumber<=39) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 3); - state->tile->m_prev_pos = (( (Suffix&7) + ( state->tile->m_prev_pos^key )) & 7); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 4, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 3); + state->crypto_prev_pos = (( (Suffix&7) + ( state->crypto_prev_pos^key )) & 7); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 4, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 4); } else if(codeNumber<=43) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 2); - state->tile->m_prev_pos = 8+(( (Suffix&3) + ( state->tile->m_prev_pos^key )) & 3); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 4, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 2); + state->crypto_prev_pos = 8+(( (Suffix&3) + ( state->crypto_prev_pos^key )) & 3); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 4, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 4); } else if(codeNumber<=45){ - uint32_t key = ff_get_key(&state->tile->dbs_g, 1); - state->tile->m_prev_pos = 12+(( (Suffix&1) + ( state->tile->m_prev_pos^key )) & 1); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 4, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 1); + state->crypto_prev_pos = 12+(( (Suffix&1) + ( state->crypto_prev_pos^key )) & 1); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 4, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 4); } else CABAC_BINS_EP(cabac, Suffix, 4, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(Suffix, 4); } else {//base_level=3 if(codeNumber<=31 || codeNumber>47) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 4); - state->tile->m_prev_pos = (Suffix + ( state->tile->m_prev_pos^key ) ) & 15; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, r_param, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 4); + state->crypto_prev_pos = (Suffix + ( state->crypto_prev_pos^key ) ) & 15; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, r_param, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, r_param); } else if(codeNumber<=39) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 3); - state->tile->m_prev_pos = (( (Suffix&7) + ( state->tile->m_prev_pos^key )) & 7); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 4, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 3); + state->crypto_prev_pos = (( (Suffix&7) + ( state->crypto_prev_pos^key )) & 7); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 4, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 4); } else if(codeNumber<=43) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 2); - state->tile->m_prev_pos = 8+(( (Suffix&3) + ( state->tile->m_prev_pos^key )) & 3); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 4, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 2); + state->crypto_prev_pos = 8+(( (Suffix&3) + ( state->crypto_prev_pos^key )) & 3); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 4, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 4); } else if(codeNumber<=47) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 1); - state->tile->m_prev_pos = (Suffix&14)+(( (Suffix&1) + (state->tile->m_prev_pos^key )) & 1); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 4, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 1); + state->crypto_prev_pos = (Suffix&14)+(( (Suffix&1) + (state->crypto_prev_pos^key )) & 1); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 4, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 4); } } @@ -466,10 +466,10 @@ CABAC_BINS_EP(cabac, (1 << (3 + length + 1 - r_param)) - 2, 3 + length + 1 - r_param, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP((1<<(COEF_REMAIN_BIN_REDUCTION+length+1-r_param))-2,COEF_REMAIN_BIN_REDUCTION+length+1-r_param); uint32_t Suffix = codeNumber; - uint32_t key = ff_get_key(&state->tile->dbs_g, length); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, length); uint32_t mask = ( (1<<length ) -1 ); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & mask; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, length, "coeff_abs_level_remaining"); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & mask; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, length, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos,length); } } @@ -532,7 +532,10 @@ /** * \brief */ -void kvz_cabac_write_ep_ex_golomb(struct encoder_state_t * const state, cabac_data_t * const data, uint32_t symbol, uint32_t count) +void kvz_cabac_write_ep_ex_golomb(encoder_state_t * const state, + cabac_data_t * const data, + uint32_t symbol, + uint32_t count) { uint32_t bins = 0; int32_t num_bins = 0; @@ -548,13 +551,13 @@ bins = (bins << count) | symbol; num_bins += count; - if (!state->cabac.only_count) { + if (!data->only_count) { if (state->encoder_control->cfg.crypto_features & KVZ_CRYPTO_MVs) { uint32_t key, mask; - key = ff_get_key(&state->tile->dbs_g, num_bins>>1); + key = kvz_crypto_get_key(state->crypto_hdl, num_bins>>1); mask = ( (1<<(num_bins >>1) ) -1 ); - state->tile->m_prev_pos = ( bins + ( state->tile->m_prev_pos^key ) ) & mask; - bins = ( (bins >> (num_bins >>1) ) << (num_bins >>1) ) | state->tile->m_prev_pos; + state->crypto_prev_pos = ( bins + ( state->crypto_prev_pos^key ) ) & mask; + bins = ( (bins >> (num_bins >>1) ) << (num_bins >>1) ) | state->crypto_prev_pos; } } kvz_cabac_encode_bins_ep(data, bins, num_bins);
View file
kvazaar-1.1.0.tar.gz/src/cfg.c -> kvazaar-1.2.0.tar.gz/src/cfg.c
Changed
@@ -44,7 +44,7 @@ cfg->deblock_enable = 1; cfg->deblock_beta = 0; cfg->deblock_tc = 0; - cfg->sao_enable = 1; + cfg->sao_type = 3; cfg->rdoq_enable = 1; cfg->rdoq_skip = 1; cfg->signhide_enable = true; @@ -119,8 +119,12 @@ cfg->roi.height = 0; cfg->roi.dqps = NULL; + cfg->erp_aqp = false; + cfg->slices = KVZ_SLICES_NONE; + cfg->optional_key = NULL; + return 1; } @@ -132,6 +136,7 @@ FREE_POINTER(cfg->tiles_height_split); FREE_POINTER(cfg->slice_addresses_in_ts); FREE_POINTER(cfg->roi.dqps); + FREE_POINTER(cfg->optional_key); } free(cfg); @@ -228,6 +233,54 @@ return 1; } +static int parse_uint8(const char *numstr,uint8_t* number,int min, int max) +{ + char *tail; + int d = strtol(numstr, &tail, 10); + if (*tail || d < min || d > max){ + fprintf(stderr, "Expected number between %d and %d\n", min, max); + if(number) + *number = 0; + return 0; + } else{ + if (number) + *number = (uint8_t) d; + return 1; + } +} + +static int parse_array(const char *array, uint8_t *coeff_key, int size, + int min, int max) +{ + char *key = strdup(array); + const char delim[] = ",;:"; + char *token; + int i = 0; + + token = strtok(key, delim); + while(token!=NULL&&i<size){ + if (!parse_uint8(token, &coeff_key[i], min, max)) + { + free(key); + return 0; + } + i++; + token = strtok(NULL, delim); + } + if(i>=size && (token != NULL)){ + fprintf(stderr, "parsing failed : too many members.\n"); + free(key); + return 0; + } + else if (i<size){ + fprintf(stderr, "parsing failed : too few members.\n"); + free(key); + return 0; + } + free(key); + return 1; +} + static int parse_slice_specification(const char* const arg, int32_t * const nslices, int32_t** const array) { const char* current_arg = NULL; int32_t current_value; @@ -309,10 +362,12 @@ static const char * const cu_split_termination_names[] = { "zero", "off", NULL }; static const char * const crypto_toggle_names[] = { "off", "on", NULL }; - static const char * const crypto_feature_names[] = { "mvs", "mv_signs", "trans_coeffs", "trans_coeff_signs", NULL }; + static const char * const crypto_feature_names[] = { "mvs", "mv_signs", "trans_coeffs", "trans_coeff_signs", "intra_pred_modes", NULL }; static const char * const me_early_termination_names[] = { "off", "on", "sensitive", NULL }; + static const char * const sao_names[] = { "off", "edge", "band", "full", NULL }; + static const char * const preset_values[11][20*2] = { { "ultrafast", @@ -324,7 +379,7 @@ "deblock", "0:0", "signhide", "0", "subme", "0", - "sao", "0", + "sao", "off", "rdoq", "0", "rdoq-skip", "1", "transform-skip", "0", @@ -347,7 +402,7 @@ "deblock", "0:0", "signhide", "0", "subme", "0", - "sao", "1", + "sao", "full", "rdoq", "0", "rdoq-skip", "1", "transform-skip", "0", @@ -370,7 +425,7 @@ "deblock", "0:0", "signhide", "0", "subme", "2", - "sao", "1", + "sao", "full", "rdoq", "0", "rdoq-skip", "1", "transform-skip", "0", @@ -393,7 +448,7 @@ "deblock", "0:0", "signhide", "0", "subme", "2", - "sao", "1", + "sao", "full", "rdoq", "0", "rdoq-skip", "1", "transform-skip", "0", @@ -416,7 +471,7 @@ "deblock", "0:0", "signhide", "0", "subme", "4", - "sao", "1", + "sao", "full", "rdoq", "0", "rdoq-skip", "1", "transform-skip", "0", @@ -439,7 +494,7 @@ "deblock", "0:0", "signhide", "0", "subme", "4", - "sao", "1", + "sao", "full", "rdoq", "1", "rdoq-skip", "1", "transform-skip", "0", @@ -462,7 +517,7 @@ "deblock", "0:0", "signhide", "1", "subme", "4", - "sao", "1", + "sao", "full", "rdoq", "1", "rdoq-skip", "1", "transform-skip", "0", @@ -485,7 +540,7 @@ "deblock", "0:0", "signhide", "1", "subme", "4", - "sao", "1", + "sao", "full", "rdoq", "1", "rdoq-skip", "1", "transform-skip", "0", @@ -508,7 +563,7 @@ "deblock", "0:0", "signhide", "1", "subme", "4", - "sao", "1", + "sao", "full", "rdoq", "1", "rdoq-skip", "1", "transform-skip", "0", @@ -531,7 +586,7 @@ "deblock", "0:0", "signhide", "1", "subme", "4", - "sao", "1", + "sao", "full", "rdoq", "1", "rdoq-skip", "0", "transform-skip", "1", @@ -599,8 +654,11 @@ cfg->deblock_enable = atobool(value); } } - else if OPT("sao") - cfg->sao_enable = atobool(value); + else if OPT("sao") { + int8_t sao_type = 0; + if (!parse_enum(value, sao_names, &sao_type)) sao_type = atobool(value) ? 3 : 0; + cfg->sao_type = sao_type; + } else if OPT("rdoq") cfg->rdoq_enable = atobool(value); else if OPT("signhide") @@ -945,6 +1003,12 @@ return 1; } + else if OPT("key"){ + int size_key = 16; + FREE_POINTER(cfg->optional_key); + cfg->optional_key = (uint8_t *)malloc(sizeof(uint8_t)*size_key); + return parse_array(value, cfg->optional_key, size_key, 0, 255); + } else if OPT("me-early-termination"){ int8_t mode = 0; int result = parse_enum(value, me_early_termination_names, &mode); @@ -1021,7 +1085,7 @@ } const unsigned size = width * height; - uint8_t *dqp_array = calloc((size_t)size, sizeof(cfg->roi.dqps[0])); + int8_t *dqp_array = calloc((size_t)size, sizeof(cfg->roi.dqps[0])); if (!dqp_array) { fprintf(stderr, "Failed to allocate memory for ROI table.\n"); fclose(f); @@ -1040,11 +1104,13 @@ fclose(f); return 0; } - dqp_array[i] = (uint8_t)number; + dqp_array[i] = CLIP(-51, 51, number); } fclose(f); } + else if OPT("erp-aqp") + cfg->erp_aqp = (bool)atobool(value); else return 0; #undef OPT @@ -1251,6 +1317,11 @@ error = 1; } + if (cfg->qp != CLIP_TO_QP(cfg->qp)) { + fprintf(stderr, "Input error: --qp parameter out of range [0..51]\n"); + error = 1; + } + if (cfg->target_bitrate < 0) { fprintf(stderr, "Input error: --bitrate must be nonnegative\n"); error = 1;
View file
kvazaar-1.1.0.tar.gz/src/cli.c -> kvazaar-1.2.0.tar.gz/src/cli.c
Changed
@@ -47,7 +47,7 @@ { "input-fps", required_argument, NULL, 0 }, { "deblock", required_argument, NULL, 0 }, { "no-deblock", no_argument, NULL, 0 }, - { "sao", no_argument, NULL, 0 }, + { "sao", optional_argument, NULL, 0 }, { "no-sao", no_argument, NULL, 0 }, { "rdoq", no_argument, NULL, 0 }, { "no-rdoq", no_argument, NULL, 0 }, @@ -107,6 +107,7 @@ { "hash", required_argument, NULL, 0 }, {"cu-split-termination",required_argument, NULL, 0 }, { "crypto", required_argument, NULL, 0 }, + { "key", required_argument, NULL, 0 }, { "me-early-termination",required_argument, NULL, 0 }, { "lossless", no_argument, NULL, 0 }, { "no-lossless", no_argument, NULL, 0 }, @@ -119,6 +120,8 @@ { "implicit-rdpcm", no_argument, NULL, 0 }, { "no-implicit-rdpcm", no_argument, NULL, 0 }, { "roi", required_argument, NULL, 0 }, + { "erp-aqp", no_argument, NULL, 0 }, + { "no-erp-aqp", no_argument, NULL, 0 }, {0, 0, 0, 0} }; @@ -388,6 +391,8 @@ " delta QP values in raster order.\n" " The delta QP map can be any size or aspect\n" " ratio, and will be mapped to LCU's.\n" + " --(no-)erp-aqp : Use adaptive QP for 360 video with\n" + " equirectangular projection\n" "\n" /* Word wrap to this width to stay under 80 characters (including ") ************/ "Compression tools:\n" @@ -497,19 +502,23 @@ void print_frame_info(const kvz_frame_info *const info, const double frame_psnr[3], - const uint32_t bytes) + const uint32_t bytes, + const bool print_psnr) { - fprintf(stderr, "POC %4d QP %2d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f", + fprintf(stderr, "POC %4d QP %2d (%c-frame) %10d bits", info->poc, info->qp, "BPI"[info->slice_type % 3], - bytes << 3, - frame_psnr[0], frame_psnr[1], frame_psnr[2]); + bytes << 3); + if (print_psnr) { + fprintf(stderr, " PSNR Y %2.4f U %2.4f V %2.4f", + frame_psnr[0], frame_psnr[1], frame_psnr[2]); + } if (info->slice_type != KVZ_SLICE_I) { // Print reference picture lists fprintf(stderr, " [L0 "); - for (int j = info->ref_list_len[0] - 1; j >= 0; j--) { + for (int j = 0; j < info->ref_list_len[0]; j++) { fprintf(stderr, "%d ", info->ref_list[0][j]); } fprintf(stderr, "] [L1 ");
View file
kvazaar-1.1.0.tar.gz/src/cli.h -> kvazaar-1.2.0.tar.gz/src/cli.h
Changed
@@ -57,6 +57,7 @@ void print_help(void); void print_frame_info(const kvz_frame_info *const info, const double frame_psnr[3], - const uint32_t bytes); + const uint32_t bytes, + const bool print_psnr); #endif
View file
kvazaar-1.1.0.tar.gz/src/cu.c -> kvazaar-1.2.0.tar.gz/src/cu.c
Changed
@@ -78,33 +78,6 @@ }; -#define BLIT_COEFF_CASE(n) case n:\ - for (y = 0; y < n; ++y) {\ - memcpy(&dst[y*dst_stride], &orig[y*orig_stride], n * sizeof(coeff_t));\ - }\ - break; - -void kvz_coefficients_blit(const coeff_t * const orig, coeff_t * const dst, - const unsigned width, const unsigned height, - const unsigned orig_stride, const unsigned dst_stride) -{ - unsigned y; - - int nxn_width = (width == height) ? width : 0; - switch (nxn_width) { - BLIT_COEFF_CASE(4) - BLIT_COEFF_CASE(8) - BLIT_COEFF_CASE(16) - BLIT_COEFF_CASE(32) - BLIT_COEFF_CASE(64) - default: - for (y = 0; y < height; ++y) { - memcpy(&dst[y*dst_stride], &orig[y*orig_stride], width * sizeof(coeff_t)); - } - break; - } -} - cu_info_t* kvz_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px) { return (cu_info_t*) kvz_cu_array_at_const(cua, x_px, y_px); @@ -115,7 +88,7 @@ { assert(x_px < cua->width); assert(y_px < cua->height); - return &(cua)->data[(x_px >> 2) + (y_px >> 2) * ((cua)->width >> 2)]; + return &(cua)->data[(x_px >> 2) + (y_px >> 2) * ((cua)->stride >> 2)]; } @@ -125,82 +98,99 @@ * \param width width of the array in luma pixels * \param height height of the array in luma pixels */ -cu_array_t * kvz_cu_array_alloc(const int width, const int height) { +cu_array_t * kvz_cu_array_alloc(const int width, const int height) +{ cu_array_t *cua = MALLOC(cu_array_t, 1); - // Round up to a multiple of cell width and divide by cell width. - const int width_scu = (width + 15) >> 2; - const int height_scu = (height + 15) >> 2; - assert(width_scu * 16 >= width); - assert(height_scu * 16 >= height); + // Round up to a multiple of LCU width and divide by cell width. + const int width_scu = CEILDIV(width, LCU_WIDTH) * LCU_WIDTH / SCU_WIDTH; + const int height_scu = CEILDIV(height, LCU_WIDTH) * LCU_WIDTH / SCU_WIDTH; const unsigned cu_array_size = width_scu * height_scu; - cua->data = calloc(cu_array_size, sizeof(cu_info_t)); - cua->width = width_scu << 2; - cua->height = height_scu << 2; + + cua->base = NULL; + cua->data = calloc(cu_array_size, sizeof(cu_info_t)); + cua->width = width_scu * SCU_WIDTH; + cua->height = height_scu * SCU_WIDTH; + cua->stride = cua->width; cua->refcount = 1; return cua; } -int kvz_cu_array_free(cu_array_t * const cua) +cu_array_t * kvz_cu_subarray(cu_array_t *base, + const unsigned x_offset, + const unsigned y_offset, + const unsigned width, + const unsigned height) +{ + assert(x_offset + width <= base->width); + assert(y_offset + height <= base->height); + + if (x_offset == 0 && + y_offset == 0 && + width == base->width && + height == base->height) + { + return kvz_cu_array_copy_ref(base); + } + + cu_array_t *cua = MALLOC(cu_array_t, 1); + + // Find the real base array. + cu_array_t *real_base = base; + while (real_base->base) { + real_base = real_base->base; + } + cua->base = kvz_cu_array_copy_ref(real_base); + cua->data = kvz_cu_array_at(base, x_offset, y_offset); + cua->width = width; + cua->height = height; + cua->stride = base->stride; + cua->refcount = 1; + + return cua; +} + +void kvz_cu_array_free(cu_array_t **cua_ptr) { - int32_t new_refcount; - if (!cua) return 1; + cu_array_t *cua = *cua_ptr; + if (cua == NULL) return; + *cua_ptr = NULL; + + int new_refcount = KVZ_ATOMIC_DEC(&cua->refcount); + if (new_refcount > 0) { + // Still we have some references, do nothing. + return; + } - new_refcount = KVZ_ATOMIC_DEC(&(cua->refcount)); - //Still we have some references, do nothing - if (new_refcount > 0) return 1; + assert(new_refcount == 0); - FREE_POINTER(cua->data); - free(cua); + if (!cua->base) { + FREE_POINTER(cua->data); + } else { + kvz_cu_array_free(&cua->base); + cua->data = NULL; + } - return 1; + FREE_POINTER(cua); } /** - * \brief Copy part of a cu array to another cu array. - * - * All values are in luma pixels. + * \brief Get a new pointer to a cu array. * - * \param dst destination array - * \param dst_x x-coordinate of the left edge of the copied area in dst - * \param dst_y y-coordinate of the top edge of the copied area in dst - * \param src source array - * \param src_x x-coordinate of the left edge of the copied area in src - * \param src_y y-coordinate of the top edge of the copied area in src - * \param width width of the area to copy - * \param height height of the area to copy + * Increment reference count and return the cu array. */ -void kvz_cu_array_copy(cu_array_t* dst, int dst_x, int dst_y, - const cu_array_t* src, int src_x, int src_y, - int width, int height) +cu_array_t * kvz_cu_array_copy_ref(cu_array_t* cua) { - // Convert values from pixel coordinates to array indices. - int src_stride = src->width >> 2; - int dst_stride = dst->width >> 2; - const cu_info_t* src_ptr = &src->data[(src_x >> 2) + (src_y >> 2) * src_stride]; - cu_info_t* dst_ptr = &dst->data[(dst_x >> 2) + (dst_y >> 2) * dst_stride]; - - // Number of bytes to copy per row. - const size_t row_size = sizeof(cu_info_t) * (width >> 2); - - width = MIN(width, MIN(src->width - src_x, dst->width - dst_x)); - height = MIN(height, MIN(src->height - src_y, dst->height - dst_y)); - - assert(src_x + width <= src->width); - assert(src_y + height <= src->height); - assert(dst_x + width <= dst->width); - assert(dst_y + height <= dst->height); - - for (int i = 0; i < (height >> 2); ++i) { - memcpy(dst_ptr, src_ptr, row_size); - src_ptr += src_stride; - dst_ptr += dst_stride; - } + // The caller should have had another reference. + assert(cua->refcount > 0); + KVZ_ATOMIC_INC(&cua->refcount); + return cua; } + /** * \brief Copy an lcu to a cu array. * @@ -213,7 +203,7 @@ */ void kvz_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src) { - const int dst_stride = dst->width >> 2; + const int dst_stride = dst->stride >> 2; for (int y = 0; y < LCU_WIDTH; y += SCU_WIDTH) { for (int x = 0; x < LCU_WIDTH; x += SCU_WIDTH) { const cu_info_t *from_cu = LCU_GET_CU_AT_PX(src, x, y);
View file
kvazaar-1.1.0.tar.gz/src/cu.h -> kvazaar-1.2.0.tar.gz/src/cu.h
Changed
@@ -138,10 +138,13 @@ int8_t mode; int8_t mode_chroma; int8_t tr_skip; //!< \brief transform skip flag +#if KVZ_SEL_ENCRYPTION + int8_t mode_encry; +#endif } intra; struct { int16_t mv[2][2]; // \brief Motion vectors for L0 and L1 - uint8_t mv_ref[2]; // \brief Index of the encoder_control.ref array. + uint8_t mv_ref[2]; // \brief Index of the L0 and L1 array. uint8_t mv_cand0 : 3; // \brief selected MV candidate uint8_t mv_cand1 : 3; // \brief selected MV candidate uint8_t mv_dir : 2; // \brief Probably describes if mv_ref is L0, L1 or both (bi-pred) @@ -178,20 +181,26 @@ (cu).inter.cost, (cu).inter.bitcost, (cu).inter.mv[0], (cu).inter.mv[1], (cu).inter.mvd[0], (cu).inter.mvd[1], \ (cu).inter.mv_cand, (cu).inter.mv_ref, (cu).inter.mv_dir, (cu).inter.mode) -typedef struct { - cu_info_t *data; //!< \brief cu array +typedef struct cu_array_t { + struct cu_array_t *base; //!< \brief base cu array or NULL + cu_info_t *data; //!< \brief cu array int32_t width; //!< \brief width of the array in pixels int32_t height; //!< \brief height of the array in pixels + int32_t stride; //!< \brief stride of the array in pixels int32_t refcount; //!< \brief number of references to this cu_array } cu_array_t; -cu_array_t * kvz_cu_array_alloc(int width, int height); -int kvz_cu_array_free(cu_array_t *cua); cu_info_t* kvz_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px); const cu_info_t* kvz_cu_array_at_const(const cu_array_t *cua, unsigned x_px, unsigned y_px); -void kvz_cu_array_copy(cu_array_t* dst, int dst_x, int dst_y, - const cu_array_t* src, int src_x, int src_y, - int width, int height); + +cu_array_t * kvz_cu_array_alloc(const int width, const int height); +cu_array_t * kvz_cu_subarray(cu_array_t *base, + const unsigned x_offset, + const unsigned y_offset, + const unsigned width, + const unsigned height); +void kvz_cu_array_free(cu_array_t **cua_ptr); +cu_array_t * kvz_cu_array_copy_ref(cu_array_t* cua); /** @@ -221,7 +230,54 @@ kvz_pixel v[LCU_REF_PX_WIDTH / 2 + 1]; } lcu_ref_px_t; -typedef struct { +/** + * \brief Coefficients of an LCU + * + * Coefficients inside a single TU are stored in row-major order. TUs + * themselves are stored in a zig-zag order, so that the coefficients of + * a TU are contiguous in memory. + * + * Example storage order for a 32x32 pixel TU tree + * + \verbatim + + +------+------+------+------+---------------------------+ + | 0 | 16 | 64 | 80 | | + | - | - | - | - | | + | 15 | 31 | 79 | 95 | | + +------+------+------+------+ | + | 32 | 48 | 96 | 112 | | + | - | - | - | - | | + | 47 | 63 | 111 | 127 | | + +------+------+------+------+ 256 - 511 | + | 128 | 144 | 192 | 208 | | + | - | - | - | - | | + | 143 | 159 | 207 | 223 | | + +------+------+------+------+ | + | 160 | 176 | 224 | 240 | | + | - | - | - | - | | + | 175 | 191 | 239 | 255 | | + +------+------+------+------+-------------+------+------+ + | 512 | 528 | | | 832 | 848 | + | - | - | | | - | - | + | 527 | 543 | | | 847 | 863 | + +------+------+ 576 - 639 | 768 - 831 +------+------+ + | 544 | 560 | | | 864 | 880 | + | - | - | | | - | - | + | 559 | 575 | | | 879 | 895 | + +------+------+-------------+-------------+------+------+ + | | | | | + | | | | | + | | | | | + | 640 - 703 | 704 - 767 | 896 - 959 | 960 - 1023 | + | | | | | + | | | | | + | | | | | + +-------------+-------------+-------------+-------------+ + + \endverbatim + */ +typedef ALIGNED(8) struct { coeff_t y[LCU_LUMA_SIZE]; coeff_t u[LCU_CHROMA_SIZE]; coeff_t v[LCU_CHROMA_SIZE]; @@ -287,6 +343,72 @@ #define LCU_GET_CU_AT_PX(lcu, x_px, y_px) \ (&(lcu)->cu[LCU_CU_OFFSET + ((x_px) >> 2) + ((y_px) >> 2) * LCU_T_CU_WIDTH]) + +/** + * \brief Copy a part of a coeff_t array to another. + * + * \param width Size of the block to be copied in pixels. + * \param src Pointer to the source array. + * \param dest Pointer to the destination array. + */ +static INLINE void copy_coeffs(const coeff_t *__restrict src, + coeff_t *__restrict dest, + size_t width) +{ + memcpy(dest, src, width * width * sizeof(coeff_t)); +} + + +/** + * \brief Convert (x, y) coordinates to z-order index. + * + * Only works for widths and coordinates divisible by four. Width must be + * a power of two in range [4..64]. + * + * \param width size of the containing block + * \param x x-coordinate + * \param y y-coordinate + * \return index in z-order + */ +static INLINE unsigned xy_to_zorder(unsigned width, unsigned x, unsigned y) +{ + assert(width % 4 == 0 && width >= 4 && width <= 64); + assert(x % 4 == 0 && x < width); + assert(y % 4 == 0 && y < width); + + unsigned result = 0; + + switch (width) { + case 64: + result += x / 32 * (32*32); + result += y / 32 * (64*32); + x %= 32; + y %= 32; + // fallthrough + case 32: + result += x / 16 * (16*16); + result += y / 16 * (32*16); + x %= 16; + y %= 16; + // fallthrough + case 16: + result += x / 8 * ( 8*8); + result += y / 8 * (16*8); + x %= 8; + y %= 8; + // fallthrough + case 8: + result += x / 4 * (4*4); + result += y / 4 * (8*4); + // fallthrough + case 4: + break; + } + + return result; +} + + #define CHECKPOINT_LCU(prefix_str, lcu) do { \ CHECKPOINT_CU(prefix_str " cu[0]", (lcu).cu[0]); \ CHECKPOINT_CU(prefix_str " cu[1]", (lcu).cu[1]); \ @@ -373,10 +495,6 @@ } while(0) -void kvz_coefficients_blit(const coeff_t *orig, coeff_t *dst, - unsigned width, unsigned height, - unsigned orig_stride, unsigned dst_stride); - #define NUM_CBF_DEPTHS 5 static const uint16_t cbf_masks[NUM_CBF_DEPTHS] = { 0x1f, 0x0f, 0x07, 0x03, 0x1 };
View file
kvazaar-1.1.0.tar.gz/src/encmain.c -> kvazaar-1.2.0.tar.gz/src/encmain.c
Changed
@@ -83,11 +83,11 @@ } } -#if KVZ_BIT_DEPTH == 8 -#define PSNRMAX (255.0 * 255.0) -#else - #define PSNRMAX ((double)PIXEL_MAX * (double)PIXEL_MAX) -#endif +/** + * \brief Value that is printed instead of PSNR when SSE is zero. + */ +static const double MAX_PSNR = 999.99; +static const double MAX_SQUARED_ERROR = (double)PIXEL_MAX * (double)PIXEL_MAX; /** * \brief Calculates image PSNR value @@ -105,28 +105,31 @@ int32_t pixels = src->width * src->height; int colors = rec->chroma_format == KVZ_CSP_400 ? 1 : 3; + double sse[3] = { 0.0 }; for (int32_t c = 0; c < colors; ++c) { int32_t num_pixels = pixels; if (c != COLOR_Y) { num_pixels >>= 2; } - psnr[c] = 0; for (int32_t i = 0; i < num_pixels; ++i) { const int32_t error = src->data[c][i] - rec->data[c][i]; - psnr[c] += error * error; + sse[c] += error * error; } // Avoid division by zero - if (psnr[c] == 0) psnr[c] = 99.0; - psnr[c] = 10 * log10((num_pixels * PSNRMAX) / ((double)psnr[c]));; + if (sse[c] == 0.0) { + psnr[c] = MAX_PSNR; + } else { + psnr[c] = 10.0 * log10(num_pixels * MAX_SQUARED_ERROR / sse[c]); + } } } typedef struct { - // Mutexes for synchronization. - pthread_mutex_t* input_mutex; - pthread_mutex_t* main_thread_mutex; + // Semaphores for synchronization. + kvz_sem_t* available_input_slots; + kvz_sem_t* filled_input_slots; // Parameters passed from main thread to input thread. FILE* input; @@ -141,9 +144,6 @@ int retval; } input_handler_args; -#define PTHREAD_LOCK(l) if (pthread_mutex_lock((l)) != 0) { fprintf(stderr, "pthread_mutex_lock(%s) failed!\n", #l); assert(0); return 0; } -#define PTHREAD_UNLOCK(l) if (pthread_mutex_unlock((l)) != 0) { fprintf(stderr, "pthread_mutex_unlock(%s) failed!\n", #l); assert(0); return 0; } - #define RETVAL_RUNNING 0 #define RETVAL_FAILURE 1 #define RETVAL_EOF 2 @@ -193,7 +193,7 @@ // Set PTS to make sure we pass it on correctly. frame_in->pts = frames_read; - bool read_success = yuv_io_read(args->input, + bool read_success = yuv_io_read(args->input, args->opts->config->width, args->opts->config->height, args->encoder->cfg.input_bitdepth, @@ -242,30 +242,65 @@ } // Wait until main thread is ready to receive the next frame. - PTHREAD_LOCK(args->input_mutex); + kvz_sem_wait(args->available_input_slots); args->img_in = frame_in; args->retval = retval; // Unlock main_thread_mutex to notify main thread that the new img_in // and retval have been placed to args. - PTHREAD_UNLOCK(args->main_thread_mutex); + kvz_sem_post(args->filled_input_slots); frame_in = NULL; } done: // Wait until main thread is ready to receive the next frame. - PTHREAD_LOCK(args->input_mutex); + kvz_sem_wait(args->available_input_slots); args->img_in = NULL; args->retval = retval; // Unlock main_thread_mutex to notify main thread that the new img_in // and retval have been placed to args. - PTHREAD_UNLOCK(args->main_thread_mutex); + kvz_sem_post(args->filled_input_slots); // Do some cleaning up. args->api->picture_free(frame_in); pthread_exit(NULL); - return 0; + return NULL; +} + + +void output_recon_pictures(const kvz_api *const api, + FILE *recout, + kvz_picture *buffer[KVZ_MAX_GOP_LENGTH], + int *buffer_size, + uint64_t *next_pts, + unsigned width, + unsigned height) +{ + bool picture_written; + do { + picture_written = false; + for (int i = 0; i < *buffer_size; i++) { + + kvz_picture *pic = buffer[i]; + if (pic->pts == *next_pts) { + // Output the picture and remove it. + if (!yuv_io_write(recout, pic, width, height)) { + fprintf(stderr, "Failed to write reconstructed picture!\n"); + } + api->picture_free(pic); + picture_written = true; + (*next_pts)++; + + // Move rest of the pictures one position backward. + for (i++; i < *buffer_size; i++) { + buffer[i - 1] = buffer[i]; + buffer[i] = NULL; + } + (*buffer_size)--; + } + } + } while (picture_written); } @@ -287,15 +322,37 @@ clock_t start_time = clock(); clock_t encoding_start_cpu_time; KVZ_CLOCK_T encoding_start_real_time; - + clock_t encoding_end_cpu_time; KVZ_CLOCK_T encoding_end_real_time; + // PTS of the reconstructed picture that should be output next. + // Only used with --debug. + uint64_t next_recon_pts = 0; + // Buffer for storing reconstructed pictures that are not to be output + // yet (i.e. in wrong order because GOP is used). + // Only used with --debug. + kvz_picture *recon_buffer[KVZ_MAX_GOP_LENGTH] = { NULL }; + int recon_buffer_size = 0; + + // Semaphores for synchronizing the input reader thread and the main + // thread. + // + // available_input_slots tells whether the main thread is currently using + // input_handler_args.img_in. (0 = in use, 1 = not in use) + // + // filled_input_slots tells whether there is a new input picture (or NULL + // if the input has ended) in input_handler_args.img_in placed by the + // input reader thread. (0 = no new image, 1 = one new image) + // + kvz_sem_t *available_input_slots = NULL; + kvz_sem_t *filled_input_slots = NULL; + #ifdef _WIN32 // Stderr needs to be text mode to convert \n to \r\n in Windows. setmode( _fileno( stderr ), _O_TEXT ); #endif - + CHECKPOINTS_INIT(); const kvz_api * const api = kvz_api_get(8); @@ -379,17 +436,15 @@ pthread_t input_thread; - pthread_mutex_t input_mutex = PTHREAD_MUTEX_INITIALIZER; - pthread_mutex_t main_thread_mutex = PTHREAD_MUTEX_INITIALIZER; - - // Lock both mutexes at startup - PTHREAD_LOCK(&main_thread_mutex); - PTHREAD_LOCK(&input_mutex); + available_input_slots = calloc(1, sizeof(kvz_sem_t)); + filled_input_slots = calloc(1, sizeof(kvz_sem_t)); + kvz_sem_init(available_input_slots, 0); + kvz_sem_init(filled_input_slots, 0); // Give arguments via struct to the input thread input_handler_args in_args = { - .input_mutex = NULL, - .main_thread_mutex = NULL, + .available_input_slots = available_input_slots, + .filled_input_slots = filled_input_slots, .input = input, .api = api, @@ -401,8 +456,8 @@ .img_in = NULL, .retval = RETVAL_RUNNING, }; - in_args.input_mutex = &input_mutex; - in_args.main_thread_mutex = &main_thread_mutex; + in_args.available_input_slots = available_input_slots; + in_args.filled_input_slots = filled_input_slots; if (pthread_create(&input_thread, NULL, input_read_thread, (void*)&in_args) != 0) { fprintf(stderr, "pthread_create failed!\n"); @@ -414,11 +469,12 @@ // Skip mutex locking if the input thread does not exist. if (in_args.retval == RETVAL_RUNNING) { - // Unlock input_mutex so that the input thread can write the new - // img_in and retval to in_args. - PTHREAD_UNLOCK(&input_mutex); - // Wait until the input thread has updated in_args. - PTHREAD_LOCK(&main_thread_mutex); + // Increase available_input_slots so that the input thread can + // write the new img_in and retval to in_args. + kvz_sem_post(available_input_slots); + // Wait until the input thread has updated in_args and then + // decrease filled_input_slots. + kvz_sem_wait(filled_input_slots); cur_in_img = in_args.img_in; in_args.img_in = NULL; @@ -484,12 +540,20 @@ if (recout) { // Since chunks_out was not NULL, img_rec should have been set. assert(img_rec); - if (!yuv_io_write(recout, - img_rec, - opts->config->width, - opts->config->height)) { - fprintf(stderr, "Failed to write reconstructed picture!\n"); - } + + // Move img_rec to the recon buffer. + assert(recon_buffer_size < KVZ_MAX_GOP_LENGTH); + recon_buffer[recon_buffer_size++] = img_rec; + img_rec = NULL; + + // Try to output some reconstructed pictures. + output_recon_pictures(api, + recout, + recon_buffer, + &recon_buffer_size, + &next_recon_pts, + opts->config->width, + opts->config->height); } frames_done += 1; @@ -497,7 +561,7 @@ psnr_sum[1] += frame_psnr[1]; psnr_sum[2] += frame_psnr[2]; - print_frame_info(&info_out, frame_psnr, len_out); + print_frame_info(&info_out, frame_psnr, len_out, encoder->cfg.calc_psnr); } api->picture_free(cur_in_img); @@ -510,12 +574,15 @@ encoding_end_cpu_time = clock(); // Coding finished + // All reconstructed pictures should have been output. + assert(recon_buffer_size == 0); + // Print statistics of the coding fprintf(stderr, " Processed %d frames, %10llu bits", frames_done, (long long unsigned int)bitstream_length * 8); - if (frames_done > 0) { - fprintf(stderr, " AVG PSNR: %2.4f %2.4f %2.4f", + if (encoder->cfg.calc_psnr && frames_done > 0) { + fprintf(stderr, " AVG PSNR Y %2.4f U %2.4f V %2.4f", psnr_sum[0] / frames_done, psnr_sum[1] / frames_done, psnr_sum[2] / frames_done); @@ -540,6 +607,12 @@ retval = EXIT_FAILURE; done: + // destroy semaphores + if (available_input_slots) kvz_sem_destroy(available_input_slots); + if (filled_input_slots) kvz_sem_destroy(filled_input_slots); + FREE_POINTER(available_input_slots); + FREE_POINTER(filled_input_slots); + // deallocate structures if (enc) api->encoder_close(enc); if (opts) cmdline_opts_free(api, opts);
View file
kvazaar-1.1.0.tar.gz/src/encode_coding_tree.c -> kvazaar-1.2.0.tar.gz/src/encode_coding_tree.c
Changed
@@ -46,13 +46,11 @@ * This method encodes the X and Y component within a block of the last * significant coefficient. */ -static void encode_last_significant_xy(encoder_state_t * const state, +static void encode_last_significant_xy(cabac_data_t * const cabac, uint8_t lastpos_x, uint8_t lastpos_y, uint8_t width, uint8_t height, uint8_t type, uint8_t scan) { - cabac_data_t * const cabac = &state->cabac; - const int index = kvz_math_floor_log2(width) - 2; uint8_t ctx_offset = type ? 0 : (index * 3 + (index + 1) / 4); uint8_t shift = type ? index : (index + 3) / 4; @@ -103,14 +101,14 @@ } void kvz_encode_coeff_nxn(encoder_state_t * const state, - coeff_t *coeff, + cabac_data_t * const cabac, + const coeff_t *coeff, uint8_t width, uint8_t type, int8_t scan_mode, int8_t tr_skip) { const encoder_control_t * const encoder = state->encoder_control; - cabac_data_t * const cabac = &state->cabac; int c1 = 1; uint8_t last_coeff_x = 0; uint8_t last_coeff_y = 0; @@ -183,8 +181,13 @@ last_coeff_y = (uint8_t)(pos_last >> log2_block_size); // Code last_coeff_x and last_coeff_y - encode_last_significant_xy(state, last_coeff_x, last_coeff_y, width, width, - type, scan_mode); + encode_last_significant_xy(cabac, + last_coeff_x, + last_coeff_y, + width, + width, + type, + scan_mode); scan_pos_sig = scan_pos_last; @@ -300,15 +303,15 @@ } if (be_valid && sign_hidden) { coeff_signs = coeff_signs >> 1; - if(!state->cabac.only_count) - if (state->encoder_control->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFF_SIGNS) { - coeff_signs = coeff_signs ^ ff_get_key(&state->tile->dbs_g, num_non_zero-1); + if (!cabac->only_count) + if (encoder->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFF_SIGNS) { + coeff_signs = coeff_signs ^ kvz_crypto_get_key(state->crypto_hdl, num_non_zero-1); } CABAC_BINS_EP(cabac, coeff_signs , (num_non_zero - 1), "coeff_sign_flag"); } else { - if(!state->cabac.only_count) - if (state->encoder_control->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFF_SIGNS) - coeff_signs = coeff_signs ^ ff_get_key(&state->tile->dbs_g, num_non_zero); + if (!cabac->only_count) + if (encoder->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFF_SIGNS) + coeff_signs = coeff_signs ^ kvz_crypto_get_key(state->crypto_hdl, num_non_zero); CABAC_BINS_EP(cabac, coeff_signs, num_non_zero, "coeff_sign_flag"); } @@ -319,9 +322,9 @@ int32_t base_level = (idx < C1FLAG_NUMBER) ? (2 + first_coeff2) : 1; if (abs_coeff[idx] >= base_level) { - if(!state->cabac.only_count) { - if (state->encoder_control->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFFS) - kvz_cabac_write_coeff_remain_encry(state, cabac, abs_coeff[idx] - base_level, go_rice_param, base_level); + if (!cabac->only_count) { + if (encoder->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFFS) + kvz_cabac_write_coeff_remain_encry(state, cabac, abs_coeff[idx] - base_level, go_rice_param, base_level); else kvz_cabac_write_coeff_remain(cabac, abs_coeff[idx] - base_level, go_rice_param); } else @@ -342,7 +345,7 @@ } static void encode_transform_unit(encoder_state_t * const state, - int x_pu, int y_pu, int depth) + int x, int y, int depth) { assert(depth >= 1 && depth <= MAX_PU_DEPTH); @@ -350,79 +353,60 @@ const uint8_t width = LCU_WIDTH >> depth; const uint8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2); - const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, x_pu << 2, y_pu << 2); - - const int x_cu = x_pu / 2; - const int y_cu = y_pu / 2; - const cu_info_t *cur_cu = kvz_videoframe_get_cu_const(frame, x_cu, y_cu); - - coeff_t coeff_y[LCU_WIDTH*LCU_WIDTH+1]; - coeff_t coeff_u[LCU_WIDTH*LCU_WIDTH>>2]; - coeff_t coeff_v[LCU_WIDTH*LCU_WIDTH>>2]; - int32_t coeff_stride = frame->width; + const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, x, y); int8_t scan_idx = kvz_get_scan_order(cur_pu->type, cur_pu->intra.mode, depth); int cbf_y = cbf_is_set(cur_pu->cbf, depth, COLOR_Y); if (cbf_y) { - int x = x_pu * (LCU_WIDTH >> MAX_PU_DEPTH); - int y = y_pu * (LCU_WIDTH >> MAX_PU_DEPTH); - coeff_t *orig_pos = &frame->coeff_y[x + y * frame->width]; - for (y = 0; y < width; y++) { - for (x = 0; x < width; x++) { - coeff_y[x+y*width] = orig_pos[x]; - } - orig_pos += coeff_stride; - } - } - - // CoeffNxN - // Residual Coding - if (cbf_y) { - kvz_encode_coeff_nxn(state, coeff_y, width, 0, scan_idx, cur_pu->intra.tr_skip); + int x_local = x % LCU_WIDTH; + int y_local = y % LCU_WIDTH; + const coeff_t *coeff_y = &state->coeff->y[xy_to_zorder(LCU_WIDTH, x_local, y_local)]; + + // CoeffNxN + // Residual Coding + kvz_encode_coeff_nxn(state, + &state->cabac, + coeff_y, + width, + 0, + scan_idx, + cur_pu->intra.tr_skip); } - if (depth == MAX_DEPTH + 1 && !(x_pu % 2 && y_pu % 2)) { + if (depth == MAX_DEPTH + 1) { // For size 4x4 luma transform the corresponding chroma transforms are - // also of size 4x4 covering 8x8 luma pixels. The residual is coded - // in the last transform unit so for the other ones, don't do anything. - return; + // also of size 4x4 covering 8x8 luma pixels. The residual is coded in + // the last transform unit. + if (x % 8 == 0 || y % 8 == 0) { + // Not the last luma transform block so there is nothing more to do. + return; + } else { + // Time to to code the chroma transform blocks. Move to the top-left + // corner of the block. + x -= 4; + y -= 4; + cur_pu = kvz_cu_array_at_const(frame->cu_array, x, y); + } } - bool chroma_cbf_set = cbf_is_set(cur_cu->cbf, depth, COLOR_U) || - cbf_is_set(cur_cu->cbf, depth, COLOR_V); + bool chroma_cbf_set = cbf_is_set(cur_pu->cbf, depth, COLOR_U) || + cbf_is_set(cur_pu->cbf, depth, COLOR_V); if (chroma_cbf_set) { - int x, y; - coeff_t *orig_pos_u, *orig_pos_v; - - if (depth <= MAX_DEPTH) { - x = x_pu * (LCU_WIDTH >> (MAX_PU_DEPTH + 1)); - y = y_pu * (LCU_WIDTH >> (MAX_PU_DEPTH + 1)); - } else { - // for 4x4 select top left pixel of the CU. - x = x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)); - y = y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)); - } - orig_pos_u = &frame->coeff_u[x + y * (frame->width >> 1)]; - orig_pos_v = &frame->coeff_v[x + y * (frame->width >> 1)]; - for (y = 0; y < (width_c); y++) { - for (x = 0; x < (width_c); x++) { - coeff_u[x+y*(width_c)] = orig_pos_u[x]; - coeff_v[x+y*(width_c)] = orig_pos_v[x]; - } - orig_pos_u += coeff_stride>>1; - orig_pos_v += coeff_stride>>1; - } + int x_local = (x >> 1) % LCU_WIDTH_C; + int y_local = (y >> 1) % LCU_WIDTH_C; + scan_idx = kvz_get_scan_order(cur_pu->type, cur_pu->intra.mode_chroma, depth); - scan_idx = kvz_get_scan_order(cur_cu->type, cur_cu->intra.mode_chroma, depth); + const coeff_t *coeff_u = &state->coeff->u[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; + const coeff_t *coeff_v = &state->coeff->v[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; - if (cbf_is_set(cur_cu->cbf, depth, COLOR_U)) { - kvz_encode_coeff_nxn(state, coeff_u, width_c, 2, scan_idx, 0); + if (cbf_is_set(cur_pu->cbf, depth, COLOR_U)) { + kvz_encode_coeff_nxn(state, &state->cabac, coeff_u, width_c, 2, scan_idx, 0); } - if (cbf_is_set(cur_cu->cbf, depth, COLOR_V)) { - kvz_encode_coeff_nxn(state, coeff_v, width_c, 2, scan_idx, 0); + if (cbf_is_set(cur_pu->cbf, depth, COLOR_V)) { + kvz_encode_coeff_nxn(state, &state->cabac, coeff_v, width_c, 2, scan_idx, 0); } } } @@ -437,21 +421,21 @@ * \param parent_coeff_v What was signlaed at previous level for cbf_cr. */ static void encode_transform_coeff(encoder_state_t * const state, - int32_t x_pu, - int32_t y_pu, + int32_t x, + int32_t y, int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v) { cabac_data_t * const cabac = &state->cabac; + const encoder_control_t *const ctrl = state->encoder_control; const videoframe_t * const frame = state->tile->frame; - const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, x_pu << 2, y_pu << 2); - - const int32_t x_cu = x_pu / 2; - const int32_t y_cu = y_pu / 2; - const cu_info_t *cur_cu = kvz_videoframe_get_cu_const(frame, x_cu, y_cu); + const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, x, y); + // Round coordinates down to a multiple of 8 to get the location of the + // containing CU. + const cu_info_t *cur_cu = kvz_cu_array_at_const(frame->cu_array, x & ~7, y & ~7); // NxN signifies implicit transform split at the first transform level. // There is a similar implicit split for inter, but it is only used when @@ -459,8 +443,12 @@ int intra_split_flag = (cur_cu->type == CU_INTRA && cur_cu->part_size == SIZE_NxN); // The implicit split by intra NxN is not counted towards max_tr_depth. - int tr_depth_intra = state->encoder_control->cfg.tr_depth_intra; - int max_tr_depth = (cur_cu->type == CU_INTRA ? tr_depth_intra + intra_split_flag : TR_DEPTH_INTER); + int max_tr_depth; + if (cur_cu->type == CU_INTRA) { + max_tr_depth = ctrl->cfg.tr_depth_intra + intra_split_flag; + } else { + max_tr_depth = ctrl->tr_depth_inter; + } int8_t split = (cur_cu->tr_depth > depth); @@ -498,11 +486,13 @@ } if (split) { - uint8_t pu_offset = 1 << (MAX_PU_DEPTH - (depth + 1)); - encode_transform_coeff(state, x_pu, y_pu, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); - encode_transform_coeff(state, x_pu + pu_offset, y_pu, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); - encode_transform_coeff(state, x_pu, y_pu + pu_offset, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); - encode_transform_coeff(state, x_pu + pu_offset, y_pu + pu_offset, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); + uint8_t offset = LCU_WIDTH >> (depth + 1); + int x2 = x + offset; + int y2 = y + offset; + encode_transform_coeff(state, x, y, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); + encode_transform_coeff(state, x2, y, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); + encode_transform_coeff(state, x, y2, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); + encode_transform_coeff(state, x2, y2, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); return; } @@ -511,7 +501,7 @@ // - transform depth > 0 // - we have chroma coefficients at this level // When it is not present, it is inferred to be 1. - if(cur_cu->type == CU_INTRA || tr_depth > 0 || cb_flag_u || cb_flag_v) { + if (cur_cu->type == CU_INTRA || tr_depth > 0 || cb_flag_u || cb_flag_v) { cabac->cur_ctx = &(cabac->ctx.qt_cbf_model_luma[!tr_depth]); CABAC_BIN(cabac, cb_flag_y, "cbf_luma"); } @@ -539,7 +529,7 @@ state->ref_qp = state->qp; } - encode_transform_unit(state, x_pu, y_pu, depth); + encode_transform_unit(state, x, y, depth); } } @@ -570,15 +560,6 @@ } } else { uint32_t ref_list_idx; - uint32_t j; - int ref_list[2] = { 0, 0 }; - for (j = 0; j < state->frame->ref->used_size; j++) { - if (state->frame->ref->pocs[j] < state->frame->poc) { - ref_list[0]++; - } else { - ref_list[1]++; - } - } // Void TEncSbac::codeInterDir( TComDataCU* pcCU, UInt uiAbsPartIdx ) if (state->frame->slicetype == KVZ_SLICE_B) @@ -602,16 +583,20 @@ for (ref_list_idx = 0; ref_list_idx < 2; ref_list_idx++) { if (cur_cu->inter.mv_dir & (1 << ref_list_idx)) { - if (ref_list[ref_list_idx] > 1) { + + // size of the current reference index list (L0/L1) + uint8_t ref_LX_size = state->frame->ref_LX_size[ref_list_idx]; + + if (ref_LX_size > 1) { // parseRefFrmIdx - int32_t ref_frame = state->frame->refmap[cur_cu->inter.mv_ref[ref_list_idx]].idx; + int32_t ref_frame = cur_cu->inter.mv_ref[ref_list_idx]; cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[0]); CABAC_BIN(cabac, (ref_frame != 0), "ref_idx_lX"); if (ref_frame > 0) { int32_t i; - int32_t ref_num = ref_list[ref_list_idx] - 2; + int32_t ref_num = ref_LX_size - 2; cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[1]); ref_frame--; @@ -668,7 +653,7 @@ uint32_t mvd_hor_sign = (mvd_hor>0)?0:1; if(!state->cabac.only_count) if (state->encoder_control->cfg.crypto_features & KVZ_CRYPTO_MV_SIGNS) - mvd_hor_sign = mvd_hor_sign^ff_get_key(&state->tile->dbs_g, 1); + mvd_hor_sign = mvd_hor_sign^kvz_crypto_get_key(state->crypto_hdl, 1); CABAC_BIN_EP(cabac, mvd_hor_sign, "mvd_sign_flag_hor"); } if (ver_abs_gr0) { @@ -678,7 +663,7 @@ uint32_t mvd_ver_sign = (mvd_ver>0)?0:1; if(!state->cabac.only_count) if (state->encoder_control->cfg.crypto_features & KVZ_CRYPTO_MV_SIGNS) - mvd_ver_sign = mvd_ver_sign^ff_get_key(&state->tile->dbs_g, 1); + mvd_ver_sign = mvd_ver_sign^kvz_crypto_get_key(state->crypto_hdl, 1); CABAC_BIN_EP(cabac, mvd_ver_sign, "mvd_sign_flag_ver"); } } @@ -694,13 +679,72 @@ } // if !merge } + +static INLINE uint8_t intra_mode_encryption(encoder_state_t * const state, + uint8_t intra_pred_mode) +{ + const uint8_t sets[3][17] = + { + { 0, 1, 2, 3, 4, 5, 15, 16, 17, 18, 19, 20, 21, 31, 32, 33, 34}, /* 17 */ + { 22, 23, 24, 25, 27, 28, 29, 30, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 9 */ + { 6, 7, 8, 9, 11, 12, 13, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1} /* 9 */ + }; + + const uint8_t nb_elems[3] = {17, 8, 8}; + + if (intra_pred_mode == 26 || intra_pred_mode == 10) { + // correct chroma intra prediction mode + return intra_pred_mode; + + } else { + uint8_t keybits, scan_dir, elem_idx=0; + + keybits = kvz_crypto_get_key(state->crypto_hdl, 5); + + scan_dir = SCAN_DIAG; + if (intra_pred_mode > 5 && intra_pred_mode < 15) { + scan_dir = SCAN_VER; + } + if (intra_pred_mode > 21 && intra_pred_mode < 31) { + scan_dir = SCAN_HOR; + } + + for (int i = 0; i < nb_elems[scan_dir]; i++) { + if (intra_pred_mode == sets[scan_dir][i]) { + elem_idx = i; + break; + } + } + + keybits = keybits % nb_elems[scan_dir]; + keybits = (elem_idx + keybits) % nb_elems[scan_dir]; + + return sets[scan_dir][keybits]; + } +} + + static void encode_intra_coding_unit(encoder_state_t * const state, cabac_data_t * const cabac, const cu_info_t * const cur_cu, - int x_ctb, int y_ctb, int depth) + int x, int y, int depth) { const videoframe_t * const frame = state->tile->frame; - uint8_t intra_pred_mode[4]; + uint8_t intra_pred_mode_actual[4]; + uint8_t *intra_pred_mode = intra_pred_mode_actual; + +#if KVZ_SEL_ENCRYPTION + const bool do_crypto = + !state->cabac.only_count && + state->encoder_control->cfg.crypto_features & KVZ_CRYPTO_INTRA_MODE; +#else + const bool do_crypto = false; +#endif + + uint8_t intra_pred_mode_encry[4] = {-1, -1, -1, -1}; + if (do_crypto) { + intra_pred_mode = intra_pred_mode_encry; + } uint8_t intra_pred_mode_chroma = cur_cu->intra.mode_chroma; int8_t intra_preds[4][3] = {{-1, -1, -1},{-1, -1, -1},{-1, -1, -1},{-1, -1, -1}}; @@ -720,8 +764,8 @@ const int cu_width = LCU_WIDTH >> depth; for (int j = 0; j < num_pred_units; ++j) { - const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x_ctb << 3, j); - const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y_ctb << 3, j); + const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, j); + const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y, j); const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, pu_x, pu_y); const cu_info_t *left_pu = NULL; @@ -737,12 +781,26 @@ above_pu = kvz_cu_array_at_const(frame->cu_array, pu_x, pu_y - 1); } - kvz_intra_get_dir_luma_predictor(pu_x, pu_y, - intra_preds[j], - cur_pu, - left_pu, above_pu); + if (do_crypto) { +#if KVZ_SEL_ENCRYPTION + // Need to wrap in preprocessor directives because this function is + // only defined when KVZ_SEL_ENCRYPTION is defined. + kvz_intra_get_dir_luma_predictor_encry(pu_x, pu_y, + intra_preds[j], + cur_pu, + left_pu, above_pu); +#endif + } else { + kvz_intra_get_dir_luma_predictor(pu_x, pu_y, + intra_preds[j], + cur_pu, + left_pu, above_pu); + } - intra_pred_mode[j] = cur_pu->intra.mode; + intra_pred_mode_actual[j] = cur_pu->intra.mode; + if (do_crypto) { + intra_pred_mode_encry[j] = intra_mode_encryption(state, cur_pu->intra.mode); + } for (int i = 0; i < 3; i++) { if (intra_preds[j][i] == intra_pred_mode[j]) { @@ -751,6 +809,26 @@ } } flag[j] = (mpm_preds[j] == -1) ? 0 : 1; + +#if KVZ_SEL_ENCRYPTION + // Need to wrap in preprocessor directives because + // cu_info_t.intra.mode_encry is only defined when KVZ_SEL_ENCRYPTION + // is defined. + if (do_crypto) { + // Set the modified intra_pred_mode of the current pu here to make it + // available from its neighbours for mpm decision. + + // FIXME: there might be a more efficient way to propagate mode_encry + // for future use from left and above PUs + const int pu_width = PU_GET_W(cur_cu->part_size, cu_width, j); + for (int y = pu_y; y < pu_y + pu_width; y += 4 ) { + for (int x = pu_x; x < pu_x + pu_width; x += 4) { + cu_info_t *cu = kvz_cu_array_at(frame->cu_array, x, y); + cu->intra.mode_encry = intra_pred_mode_encry[j]; + } + } + } +#endif } cabac->cur_ctx = &(cabac->ctx.intra_mode_model); @@ -790,14 +868,14 @@ unsigned pred_mode = 5; unsigned chroma_pred_modes[4] = {0, 26, 10, 1}; - if (intra_pred_mode_chroma == intra_pred_mode[0]) { + if (intra_pred_mode_chroma == intra_pred_mode_actual[0]) { pred_mode = 4; } else if (intra_pred_mode_chroma == 34) { // Angular 34 mode is possible only if intra pred mode is one of the // possible chroma pred modes, in which case it is signaled with that // duplicate mode. for (int i = 0; i < 4; ++i) { - if (intra_pred_mode[0] == chroma_pred_modes[i]) pred_mode = i; + if (intra_pred_mode_actual[0] == chroma_pred_modes[i]) pred_mode = i; } } else { for (int i = 0; i < 4; ++i) { @@ -829,7 +907,7 @@ } } - encode_transform_coeff(state, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0); + encode_transform_coeff(state, x, y, depth, 0, 0, 0); } static void encode_part_mode(encoder_state_t * const state, @@ -916,37 +994,48 @@ } void kvz_encode_coding_tree(encoder_state_t * const state, - uint16_t x_ctb, - uint16_t y_ctb, + uint16_t x, + uint16_t y, uint8_t depth) { cabac_data_t * const cabac = &state->cabac; + const encoder_control_t * const ctrl = state->encoder_control; const videoframe_t * const frame = state->tile->frame; - const cu_info_t *cur_cu = kvz_videoframe_get_cu_const(frame, x_ctb, y_ctb); + const cu_info_t *cur_cu = kvz_cu_array_at_const(frame->cu_array, x, y); + + const cu_info_t *left_cu = NULL; + if (x > 0) { + left_cu = kvz_cu_array_at_const(frame->cu_array, x - 1, y); + } + const cu_info_t *above_cu = NULL; + if (y > 0) { + above_cu = kvz_cu_array_at_const(frame->cu_array, x, y - 1); + } + uint8_t split_flag = GET_SPLITDATA(cur_cu, depth); uint8_t split_model = 0; - //Absolute ctb - uint16_t abs_x_ctb = x_ctb + (state->tile->lcu_offset_x * LCU_WIDTH) / (LCU_WIDTH >> MAX_DEPTH); - uint16_t abs_y_ctb = y_ctb + (state->tile->lcu_offset_y * LCU_WIDTH) / (LCU_WIDTH >> MAX_DEPTH); + // Absolute coordinates + uint16_t abs_x = x + state->tile->offset_x; + uint16_t abs_y = y + state->tile->offset_y; // Check for slice border FIXME - uint8_t border_x = ((state->encoder_control->in.width) < (abs_x_ctb * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth))) ? 1 : 0; - uint8_t border_y = ((state->encoder_control->in.height) < (abs_y_ctb * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth))) ? 1 : 0; - uint8_t border_split_x = ((state->encoder_control->in.width) < ((abs_x_ctb + 1) * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1)))) ? 0 : 1; - uint8_t border_split_y = ((state->encoder_control->in.height) < ((abs_y_ctb + 1) * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1)))) ? 0 : 1; - uint8_t border = border_x | border_y; /*!< are we in any border CU */ + bool border_x = ctrl->in.width < abs_x + (LCU_WIDTH >> depth); + bool border_y = ctrl->in.height < abs_y + (LCU_WIDTH >> depth); + bool border_split_x = ctrl->in.width >= abs_x + (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1)); + bool border_split_y = ctrl->in.height >= abs_y + (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1)); + bool border = border_x || border_y; /*!< are we in any border CU */ // When not in MAX_DEPTH, insert split flag and split the blocks if needed if (depth != MAX_DEPTH) { // Implisit split flag when on border if (!border) { // Get left and top block split_flags and if they are present and true, increase model number - if (x_ctb > 0 && GET_SPLITDATA(kvz_videoframe_get_cu_const(frame, x_ctb - 1, y_ctb), depth) == 1) { + if (left_cu && GET_SPLITDATA(left_cu, depth) == 1) { split_model++; } - if (y_ctb > 0 && GET_SPLITDATA(kvz_videoframe_get_cu_const(frame, x_ctb, y_ctb - 1), depth) == 1) { + if (above_cu && GET_SPLITDATA(above_cu, depth) == 1) { split_model++; } @@ -956,18 +1045,19 @@ if (split_flag || border) { // Split blocks and remember to change x and y block positions - uint8_t change = 1<<(MAX_DEPTH-1-depth); - kvz_encode_coding_tree(state, x_ctb, y_ctb, depth + 1); // x,y + int offset = LCU_WIDTH >> (depth + 1); + + kvz_encode_coding_tree(state, x, y, depth + 1); // TODO: fix when other half of the block would not be completely over the border if (!border_x || border_split_x) { - kvz_encode_coding_tree(state, x_ctb + change, y_ctb, depth + 1); + kvz_encode_coding_tree(state, x + offset, y, depth + 1); } if (!border_y || border_split_y) { - kvz_encode_coding_tree(state, x_ctb, y_ctb + change, depth + 1); + kvz_encode_coding_tree(state, x, y + offset, depth + 1); } if (!border || (border_split_x && border_split_y)) { - kvz_encode_coding_tree(state, x_ctb + change, y_ctb + change, depth + 1); + kvz_encode_coding_tree(state, x + offset, y + offset, depth + 1); } return; } @@ -978,27 +1068,25 @@ CABAC_BIN(cabac, 1, "cu_transquant_bypass_flag"); } - // Encode skip flag + // Encode skip flag if (state->frame->slicetype != KVZ_SLICE_I) { - int8_t ctx_skip = 0; // uiCtxSkip = aboveskipped + leftskipped; - int ui; - int16_t num_cand = MRG_MAX_NUM_CANDS; - // Get left and top skipped flags and if they are present and true, increase context number - if (x_ctb > 0 && (kvz_videoframe_get_cu_const(frame, x_ctb - 1, y_ctb))->skipped) { + // uiCtxSkip = aboveskipped + leftskipped; + int8_t ctx_skip = 0; + + if (left_cu && left_cu->skipped) { ctx_skip++; } - - if (y_ctb > 0 && (kvz_videoframe_get_cu_const(frame, x_ctb, y_ctb - 1))->skipped) { + if (above_cu && above_cu->skipped) { ctx_skip++; } cabac->cur_ctx = &(cabac->ctx.cu_skip_flag_model[ctx_skip]); CABAC_BIN(cabac, cur_cu->skipped, "SkipFlag"); - // IF SKIP if (cur_cu->skipped) { + int16_t num_cand = MRG_MAX_NUM_CANDS; if (num_cand > 1) { - for (ui = 0; ui < num_cand - 1; ui++) { + for (int ui = 0; ui < num_cand - 1; ui++) { int32_t symbol = (ui != cur_cu->merge_idx); if (ui == 0) { cabac->cur_ctx = &(cabac->ctx.cu_merge_idx_ext_model); @@ -1015,8 +1103,6 @@ } } - // ENDIF SKIP - // Prediction mode if (state->frame->slicetype != KVZ_SLICE_I) { cabac->cur_ctx = &(cabac->ctx.cu_pred_mode_model); @@ -1031,8 +1117,8 @@ const int cu_width = LCU_WIDTH >> depth; for (int i = 0; i < num_pu; ++i) { - const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x_ctb << 3, i); - const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y_ctb << 3, i); + const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, i); + const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y, i); const int pu_w = PU_GET_W(cur_cu->part_size, cu_width, i); const int pu_h = PU_GET_H(cur_cu->part_size, cu_width, i); const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, pu_x, pu_y); @@ -1051,57 +1137,52 @@ // Code (possible) coeffs to bitstream if (cbf) { - encode_transform_coeff(state, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0); + encode_transform_coeff(state, x, y, depth, 0, 0, 0); } } } else if (cur_cu->type == CU_INTRA) { - encode_intra_coding_unit(state, cabac, cur_cu, x_ctb, y_ctb, depth); + encode_intra_coding_unit(state, cabac, cur_cu, x, y, depth); } - #if ENABLE_PCM == 1 +#if ENABLE_PCM // Code IPCM block - if (cur_cu->type == CU_PCM) { + else if (cur_cu->type == CU_PCM) { kvz_cabac_encode_bin_trm(cabac, 1); // IPCMFlag == 1 - kvz_cabac_finish(cabac); - kvz_bitstream_add_rbsp_trailing_bits(cabac.stream); - // PCM sample - { - unsigned y, x; - - pixel *base_y = &cur_pic->y_data[x_ctb * (LCU_WIDTH >> (MAX_DEPTH)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; - pixel *base_u = &cur_pic->u_data[(x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1))) * encoder->in.width / 2)]; - pixel *base_v = &cur_pic->v_data[(x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1))) * encoder->in.width / 2)]; + kvz_cabac_finish(cabac); + kvz_bitstream_add_rbsp_trailing_bits(cabac.stream); - // Luma - for (y = 0; y < LCU_WIDTH >> depth; y++) { - for (x = 0; x < LCU_WIDTH >> depth; x++) { - kvz_bitstream_put(cabac.stream, base_y[x + y * encoder->in.width], 8); - } - } + // PCM sample + pixel *base_y = &cur_pic->y_data[x + y * encoder->in.width]; + pixel *base_u = &cur_pic->u_data[x / 2 + y / 2 * encoder->in.width / 2]; + pixel *base_v = &cur_pic->v_data[x / 2 + y / 2 * encoder->in.width / 2]; + + // Luma + for (unsigned y_px = 0; y_px < LCU_WIDTH >> depth; y_px++) { + for (unsigned x_px = 0; x_px < LCU_WIDTH >> depth; x_px++) { + kvz_bitstream_put(cabac.stream, base_y[x_px + y_px * encoder->in.width], 8); + } + } - // Chroma - if (encoder->in.video_format != FORMAT_400) { - for (y = 0; y < LCU_WIDTH >> (depth + 1); y++) { - for (x = 0; x < LCU_WIDTH >> (depth + 1); x++) { - kvz_bitstream_put(cabac.stream, base_u[x + y * (encoder->in.width >> 1)], 8); - } + // Chroma + if (encoder->in.video_format != FORMAT_400) { + for (unsigned y_px = 0; y_px < LCU_WIDTH >> (depth + 1); y_px++) { + for (unsigned x_px = 0; x_px < LCU_WIDTH >> (depth + 1); x_px++) { + kvz_bitstream_put(cabac.stream, base_u[x_px + y_px * (encoder->in.width >> 1)], 8); } - for (y = 0; y < LCU_WIDTH >> (depth + 1); y++) { - for (x = 0; x < LCU_WIDTH >> (depth + 1); x++) { - kvz_bitstream_put(cabac.stream, base_v[x + y * (encoder->in.width >> 1)], 8); - } + } + for (unsigned y_px = 0; y_px < LCU_WIDTH >> (depth + 1); y_px++) { + for (unsigned x_px = 0; x_px < LCU_WIDTH >> (depth + 1); x_px++) { + kvz_bitstream_put(cabac.stream, base_v[x_px + y_px * (encoder->in.width >> 1)], 8); } } } - // end PCM sample - kvz_cabac_start(cabac); - } // end Code IPCM block -#endif /* END ENABLE_PCM */ - else { /* Should not happend */ + kvz_cabac_start(cabac); + } +#endif + + else { + // CU type not set. Should not happen. assert(0); exit(1); } - - /* end prediction unit */ - /* end coding_unit */ }
View file
kvazaar-1.1.0.tar.gz/src/encode_coding_tree.h -> kvazaar-1.2.0.tar.gz/src/encode_coding_tree.h
Changed
@@ -34,8 +34,9 @@ uint16_t y_ctb, uint8_t depth); -void kvz_encode_coeff_nxn(encoder_state_t *state, - coeff_t *coeff, +void kvz_encode_coeff_nxn(encoder_state_t * const state, + cabac_data_t * const cabac, + const coeff_t *coeff, uint8_t width, uint8_t type, int8_t scan_mode,
View file
kvazaar-1.1.0.tar.gz/src/encoder.c -> kvazaar-1.2.0.tar.gz/src/encoder.c
Changed
@@ -20,6 +20,9 @@ #include "encoder.h" +// This define is required for M_PI on Windows. +#define _USE_MATH_DEFINES +#include <math.h> #include <stdio.h> #include <stdlib.h> @@ -27,90 +30,170 @@ #include "strategyselector.h" +/** + * \brief Strength of QP adjustments when using adaptive QP for 360 video. + * + * Determined empirically. + */ +static const double ERP_AQP_STRENGTH = 3.0; + + static int encoder_control_init_gop_layer_weights(encoder_control_t * const); -static int size_of_wpp_ends(int threads) +static unsigned cfg_num_threads(void) { - // Based on the shape of the area where all threads can't yet run in parallel. - return 4 * threads * threads - 2 * threads; + if (kvz_g_hardware_flags.logical_cpu_count == 0) { + // Default to 4 if we don't know the number of CPUs. + return 4; + } + + return kvz_g_hardware_flags.logical_cpu_count; } -static int select_owf_auto(const kvz_config *const cfg) + +static int get_max_parallelism(const encoder_control_t *const encoder) { - if (cfg->intra_period == 1) { - if (cfg->wpp) { - // If wpp is on, select owf such that less than 15% of the - // frame is covered by the are threads can not work at the same time. - const int lcu_width = CEILDIV(cfg->width, LCU_WIDTH); - const int lcu_height = CEILDIV(cfg->height, LCU_WIDTH); - - // Find the largest number of threads per frame that satifies the - // the condition: wpp start/stop inefficiency takes up less than 15% - // of frame area. - int threads_per_frame = 1; - const int wpp_treshold = lcu_width * lcu_height * 15 / 100; - while ((threads_per_frame + 1) * 2 < lcu_width && - threads_per_frame + 1 < lcu_height && - size_of_wpp_ends(threads_per_frame + 1) < wpp_treshold) { - ++threads_per_frame; - } + const int width_lcu = CEILDIV(encoder->cfg.width, LCU_WIDTH); + const int height_lcu = CEILDIV(encoder->cfg.height, LCU_WIDTH); + const int wpp_limit = MIN(height_lcu, CEILDIV(width_lcu, 2)); + const int par_frames = encoder->cfg.owf + 1; - const int threads = MAX(cfg->threads, 1); - const int frames = CEILDIV(threads, threads_per_frame); + int parallelism = 0; - // Convert from number of parallel frames to number of additional frames. - return CLIP(0, threads - 1, frames - 1); + if (encoder->cfg.intra_period == 1) { + int threads_per_frame; + if (encoder->cfg.wpp) { + // Usually limited by width because starting to code a CTU requires + // that the next two CTUs in the row above have been completed. + threads_per_frame = wpp_limit; } else { - // If wpp is not on, select owf such that there is enough - // tiles for twice the number of threads. - - int tiles_per_frame = cfg->tiles_width_count * cfg->tiles_height_count; - int threads = (cfg->threads > 1 ? cfg->threads : 1); - int frames = CEILDIV(threads * 4, tiles_per_frame); - - // Limit number of frames to 1.25x the number of threads for the case - // where there is only 1 tile per frame. - frames = CLIP(1, threads * 4 / 3, frames); - return frames - 1; + // One thread for each tile. + threads_per_frame = encoder->cfg.tiles_width_count * + encoder->cfg.tiles_height_count; } + // Divide by two since all frames cannot achieve the maximum + // parallelism all the time. + parallelism = par_frames * threads_per_frame / 2; + } else { - // Try and estimate a good number of parallel frames for inter. - const int lcu_width = CEILDIV(cfg->width, LCU_WIDTH); - const int lcu_height = CEILDIV(cfg->height, LCU_WIDTH); - int threads_per_frame = MIN(lcu_width / 2, lcu_height); - int threads = cfg->threads; - - // If all threads fit into one frame, at least two parallel frames should - // be used to reduce the effect of WPP spin-up and wind-down. - int frames = 1; - - while (threads > 0 && threads_per_frame > 0) { - frames += 1; - threads -= threads_per_frame; - threads_per_frame -= 2; - } + if (encoder->cfg.wpp) { + const int last_diagonal = (width_lcu - 1) + (height_lcu - 1) * 2; + + // Index of a diagonal. The diagonal contains CTUs whose coordinates + // satisfy x + 2*y == diagonal. We start the sum from the longest + // diagonal. + int diagonal = CEILDIV(last_diagonal, 2); + + // Difference between diagonal indices in consecutive frames. + const int frame_delay = 1 + encoder->max_inter_ref_lcu.right + + 2 * encoder->max_inter_ref_lcu.down; + int step = frame_delay; + int direction = -1; + + // Compute number of threads for each parallel frame. + for (int num_frames = 0; num_frames < par_frames; num_frames++) { + if (diagonal < 0 || diagonal > last_diagonal) { + // No room for more threads. + break; + } - if (cfg->gop_len && cfg->gop_lowdelay && cfg->gop_lp_definition.t > 1) { - // Temporal skipping makes every other frame very fast to encode so - // more parallel frames should be used. - frames *= 2; + // Count number of CTUs on the diagonal. + if (diagonal < MIN(2 * height_lcu, width_lcu)) { + parallelism += 1 + diagonal / 2; + } else { + parallelism += MIN( + wpp_limit, + height_lcu + CEILDIV(width_lcu, 2) - 1 - CEILDIV(diagonal, 2) + ); + } + diagonal += direction * step; + step += frame_delay; + direction = -direction; + } + + } else { + parallelism = encoder->cfg.tiles_width_count * + encoder->cfg.tiles_height_count; } - return CLIP(0, cfg->threads * 2 - 1, frames - 1); } + + return parallelism; } -static unsigned cfg_num_threads(void) +/** + * \brief Return weight for 360 degree ERP video + * + * Returns the scaling factor of area from equirectangular projection to + * spherical surface. + * + * \param y y-coordinate of the pixel + * \param h height of the picture + */ +static double ws_weight(int y, int h) +{ + return cos((y - 0.5 * h + 0.5) * (M_PI / h)); +} + + + +/** + * \brief Update ROI QPs for 360 video with equirectangular projection. + * + * Writes updated ROI parameters to encoder->cfg.roi. + * + * \param encoder encoder control + * \param orig_roi original delta QPs or NULL + * \param orig_width width of orig_roi + * \param orig_height height of orig_roi + */ +static void init_erp_aqp_roi(encoder_control_t* encoder, + int8_t *orig_roi, + int32_t orig_width, + int32_t orig_height) { - unsigned cpus = kvz_g_hardware_flags.physical_cpu_count; - unsigned fake_cpus = kvz_g_hardware_flags.logical_cpu_count - cpus; + // Update ROI with WS-PSNR delta QPs. + int height = encoder->in.height_in_lcu; + int width = orig_roi ? orig_width : 1; + + int frame_height = encoder->in.real_height; - // Default to 4 if we don't know the number of CPUs. - if (cpus == 0) return 4; + encoder->cfg.roi.width = width; + encoder->cfg.roi.height = height; + encoder->cfg.roi.dqps = calloc(width * height, sizeof(orig_roi[0])); + + double total_weight = 0.0; + for (int y = 0; y < frame_height; y++) { + total_weight += ws_weight(y, frame_height); + } + + for (int y_lcu = 0; y_lcu < height; y_lcu++) { + int y_orig = LCU_WIDTH * y_lcu; + int lcu_height = MIN(LCU_WIDTH, frame_height - y_orig); + + double lcu_weight = 0.0; + for (int y = y_orig; y < y_orig + lcu_height; y++) { + lcu_weight += ws_weight(y, frame_height); + } + // Normalize. + lcu_weight = (lcu_weight * frame_height) / (total_weight * lcu_height); + + int8_t qp_delta = round(-ERP_AQP_STRENGTH * log2(lcu_weight)); + + if (orig_roi) { + // If a ROI array already exists, we copy the existing values to the + // new array while adding qp_delta to each. + int y_roi = y_lcu * orig_height / height; + for (int x = 0; x < width; x++) { + encoder->cfg.roi.dqps[x + y_lcu * width] = + CLIP(-51, 51, orig_roi[x + y_roi * width] + qp_delta); + } - // 1.5 times the number of physical cores seems to be a good compromise - // when hyperthreading is available on Haswell. - return cpus + fake_cpus / 2; + } else { + // Otherwise, simply write qp_delta to the ROI array. + encoder->cfg.roi.dqps[y_lcu] = qp_delta; + } + } } @@ -148,21 +231,53 @@ encoder->cfg.tiles_height_split = NULL; encoder->cfg.slice_addresses_in_ts = NULL; - if (encoder->cfg.threads == -1) { - encoder->cfg.threads = cfg_num_threads(); - } - if (encoder->cfg.gop_len > 0) { if (encoder->cfg.gop_lowdelay) { kvz_config_process_lp_gop(&encoder->cfg); } } + encoder->max_inter_ref_lcu.right = 1; + encoder->max_inter_ref_lcu.down = 1; + + int max_threads = encoder->cfg.threads; + if (max_threads < 0) { + max_threads = cfg_num_threads(); + } + max_threads = MAX(1, max_threads); + // Need to set owf before initializing threadqueue. if (encoder->cfg.owf < 0) { - encoder->cfg.owf = select_owf_auto(&encoder->cfg); + int best_parallelism = 0; + + for (encoder->cfg.owf = 0; true; encoder->cfg.owf++) { + int parallelism = get_max_parallelism(encoder); + + if (parallelism <= best_parallelism) { + // No improvement over previous OWF. + encoder->cfg.owf--; + break; + } + + best_parallelism = parallelism; + if (parallelism >= max_threads) { + // Cannot have more parallelism than there are threads. + break; + } + } + + // Add two frames so that we have frames ready to be coded when one is + // completed. + encoder->cfg.owf += 2; + fprintf(stderr, "--owf=auto value set to %d.\n", encoder->cfg.owf); } + + if (encoder->cfg.threads < 0) { + encoder->cfg.threads = MIN(max_threads, get_max_parallelism(encoder)); + fprintf(stderr, "--threads=auto value set to %d.\n", encoder->cfg.threads); + } + if (encoder->cfg.source_scan_type != KVZ_INTERLACING_NONE) { // If using interlaced coding with OWF, the OWF has to be an even number // to ensure that the pair of fields will be output for the same picture. @@ -171,11 +286,8 @@ } } - encoder->threadqueue = MALLOC(threadqueue_queue_t, 1); - if (!encoder->threadqueue || - !kvz_threadqueue_init(encoder->threadqueue, - encoder->cfg.threads, - encoder->cfg.owf > 0)) { + encoder->threadqueue = kvz_threadqueue_init(encoder->cfg.threads); + if (!encoder->threadqueue) { fprintf(stderr, "Could not initialize threadqueue.\n"); goto init_failed; } @@ -219,15 +331,30 @@ goto init_failed; } - // Copy delta QP array for ROI coding. - if (cfg->roi.dqps) { + if (cfg->erp_aqp) { + init_erp_aqp_roi(encoder, + cfg->roi.dqps, + cfg->roi.width, + cfg->roi.height); + + } else if (cfg->roi.dqps) { + // Copy delta QP array for ROI coding. const size_t roi_size = encoder->cfg.roi.width * encoder->cfg.roi.height; encoder->cfg.roi.dqps = calloc(roi_size, sizeof(cfg->roi.dqps[0])); memcpy(encoder->cfg.roi.dqps, cfg->roi.dqps, roi_size * sizeof(*cfg->roi.dqps)); + } + encoder->lcu_dqp_enabled = cfg->target_bitrate > 0 || encoder->cfg.roi.dqps; + + // When tr_depth_inter is equal to 0, inter transform split flag defaults + // to 1 for SMP and AMP partition units. We want to avoid the extra + // transform split so we set tr_depth_inter to 1 when SMP or AMP + // partition modes are enabled. + encoder->tr_depth_inter = (encoder->cfg.smp_enable || encoder->cfg.amp_enable) ? 1 : 0; + //Tiles encoder->tiles_enable = encoder->cfg.tiles_width_count > 1 || encoder->cfg.tiles_height_count > 1; @@ -467,7 +594,7 @@ // lossless coding. if (encoder->cfg.lossless) { encoder->cfg.deblock_enable = false; - encoder->cfg.sao_enable = false; + encoder->cfg.sao_type = false; encoder->cfg.signhide_enable = false; encoder->cfg.trskip_enable = false; } @@ -490,6 +617,12 @@ encoder->cfg.vps_period = -1; } + if(encoder->cfg.optional_key){ + encoder->cfg.optional_key = MALLOC(uint8_t,16); + if (!encoder->cfg.optional_key) goto init_failed; + memcpy(encoder->cfg.optional_key, cfg->optional_key, 16); + } + return encoder; init_failed: @@ -520,13 +653,12 @@ FREE_POINTER(encoder->tiles_tile_id); FREE_POINTER(encoder->cfg.roi.dqps); + FREE_POINTER(encoder->cfg.optional_key); kvz_scalinglist_destroy(&encoder->scaling_list); - if (encoder->threadqueue) { - kvz_threadqueue_finalize(encoder->threadqueue); - } - FREE_POINTER(encoder->threadqueue); + kvz_threadqueue_free(encoder->threadqueue); + encoder->threadqueue = NULL; free(encoder); }
View file
kvazaar-1.1.0.tar.gz/src/encoder.h -> kvazaar-1.2.0.tar.gz/src/encoder.h
Changed
@@ -118,11 +118,21 @@ //! Picture weights when GOP is used. double gop_layer_weights[MAX_GOP_LAYERS]; + bool lcu_dqp_enabled; + + int tr_depth_inter; + //! pic_parameter_set struct { uint8_t dependent_slice_segments_enabled_flag; } pps; + //! Maximum motion vector distance as number of LCUs. + struct { + int right; + int down; + } max_inter_ref_lcu; + } encoder_control_t; encoder_control_t* kvz_encoder_control_init(const kvz_config *cfg);
View file
kvazaar-1.1.0.tar.gz/src/encoder_state-bitstream.c -> kvazaar-1.2.0.tar.gz/src/encoder_state-bitstream.c
Changed
@@ -389,7 +389,7 @@ WRITE_UE(stream, MAX_DEPTH, "log2_diff_max_min_coding_block_size"); WRITE_UE(stream, 0, "log2_min_transform_block_size_minus2"); // 4x4 WRITE_UE(stream, 3, "log2_diff_max_min_transform_block_size"); // 4x4...32x32 - WRITE_UE(stream, TR_DEPTH_INTER, "max_transform_hierarchy_depth_inter"); + WRITE_UE(stream, encoder->tr_depth_inter, "max_transform_hierarchy_depth_inter"); WRITE_UE(stream, encoder->cfg.tr_depth_intra, "max_transform_hierarchy_depth_intra"); // scaling list @@ -401,7 +401,7 @@ WRITE_U(stream, (encoder->cfg.amp_enable ? 1 : 0), 1, "amp_enabled_flag"); - WRITE_U(stream, encoder->cfg.sao_enable ? 1 : 0, 1, + WRITE_U(stream, encoder->cfg.sao_type ? 1 : 0, 1, "sample_adaptive_offset_enabled_flag"); WRITE_U(stream, ENABLE_PCM, 1, "pcm_enabled_flag"); #if ENABLE_PCM == 1 @@ -455,7 +455,7 @@ WRITE_U(stream, 0, 1, "constrained_intra_pred_flag"); WRITE_U(stream, encoder->cfg.trskip_enable, 1, "transform_skip_enabled_flag"); - if (encoder->cfg.target_bitrate > 0 || encoder->cfg.roi.dqps != NULL) { + if (encoder->lcu_dqp_enabled) { // Use separate QP for each LCU when rate control is enabled. WRITE_U(stream, 1, 1, "cu_qp_delta_enabled_flag"); WRITE_UE(stream, 0, "diff_cu_qp_delta_depth"); @@ -544,7 +544,7 @@ s += sprintf(s, " %dx%d", cfg->width, cfg->height); s += sprintf(s, " deblock=%d:%d:%d", cfg->deblock_enable, cfg->deblock_beta, cfg->deblock_tc); - s += sprintf(s, " sao=%d", cfg->sao_enable); + s += sprintf(s, " sao=%d", cfg->sao_type); s += sprintf(s, " intra_period=%d", cfg->intra_period); s += sprintf(s, " qp=%d", cfg->qp); s += sprintf(s, " ref=%d", cfg->ref_frames); @@ -731,7 +731,7 @@ WRITE_UE(stream, encoder->cfg.gop_len?delta_poc - last_poc - 1:0, "delta_poc_s0_minus1"); last_poc = delta_poc; - WRITE_U(stream,1,1, "used_by_curr_pic_s0_flag"); + WRITE_U(stream, !state->frame->is_irap, 1, "used_by_curr_pic_s0_flag"); } last_poc = 0; poc_shift = 0; @@ -758,12 +758,12 @@ WRITE_UE(stream, encoder->cfg.gop_len ? delta_poc - last_poc - 1 : 0, "delta_poc_s1_minus1"); last_poc = delta_poc; - WRITE_U(stream, 1, 1, "used_by_curr_pic_s1_flag"); + WRITE_U(stream, !state->frame->is_irap, 1, "used_by_curr_pic_s1_flag"); } //WRITE_UE(stream, 0, "short_term_ref_pic_set_idx"); if (state->encoder_control->cfg.tmvp_enable) { - WRITE_U(stream, ref_negative?1:0, 1, "slice_temporal_mvp_enabled_flag"); + WRITE_U(stream, ref_negative ? 1 : 0, 1, "slice_temporal_mvp_enabled_flag"); } } @@ -771,7 +771,7 @@ //end if - if (encoder->cfg.sao_enable) { + if (encoder->cfg.sao_type) { WRITE_U(stream, 1, 1, "slice_sao_luma_flag"); if (encoder->chroma_format != KVZ_CSP_400) { WRITE_U(stream, 1, 1, "slice_sao_chroma_flag"); @@ -942,9 +942,7 @@ encoder_state_t * state, bool independent) { - uint8_t nal_type = (state->frame->is_idr_frame ? KVZ_NAL_IDR_W_RADL : KVZ_NAL_TRAIL_R); - - kvz_nal_write(stream, nal_type, 0, state->frame->first_nal); + kvz_nal_write(stream, state->frame->pictype, 0, state->frame->first_nal); state->frame->first_nal = false; kvz_encoder_state_write_bitstream_slice_header(stream, state, independent); @@ -1018,19 +1016,13 @@ kvz_bitstream_add_rbsp_trailing_bits(stream); } - { - PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME); - encoder_state_write_bitstream_children(state); - PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, encoder->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", state->frame->num, state->type); - } - + encoder_state_write_bitstream_children(state); + if (state->encoder_control->cfg.hash != KVZ_HASH_NONE) { - PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME); // Calculate checksum add_checksum(state); - PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, encoder->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", state->frame->num, state->type); } - + //Get bitstream length for stats uint64_t newpos = kvz_bitstream_tell(stream); state->stats_bitstream_length = (newpos >> 3) - (curpos >> 3);
View file
kvazaar-1.1.0.tar.gz/src/encoder_state-ctors_dtors.c -> kvazaar-1.2.0.tar.gz/src/encoder_state-ctors_dtors.c
Changed
@@ -29,7 +29,6 @@ #include "encoder.h" #include "encoder_state-geometry.h" #include "encoderstate.h" -#include "extras/crypto.h" #include "image.h" #include "imagelist.h" #include "kvazaar.h" @@ -82,16 +81,12 @@ printf("Error allocating videoframe!\r\n"); return 0; } - - // Init coeff data table - //FIXME: move them - state->tile->frame->coeff_y = MALLOC(coeff_t, width * height); - state->tile->frame->coeff_u = MALLOC(coeff_t, (width * height) >> 2); - state->tile->frame->coeff_v = MALLOC(coeff_t, (width * height) >> 2); - + state->tile->lcu_offset_x = lcu_offset_x; state->tile->lcu_offset_y = lcu_offset_y; - + state->tile->offset_x = lcu_offset_x * LCU_WIDTH; + state->tile->offset_y = lcu_offset_y * LCU_WIDTH; + state->tile->lcu_offset_in_ts = encoder->tiles_ctb_addr_rs_to_ts[lcu_offset_x + lcu_offset_y * encoder->in.width_in_lcu]; // hor_buf_search and ver_buf_search store single row/col from each LCU row/col. @@ -105,13 +100,15 @@ state->tile->hor_buf_search = kvz_yuv_t_alloc(luma_size, chroma_size_hor); state->tile->ver_buf_search = kvz_yuv_t_alloc(luma_size, chroma_size_ver); - - if (encoder->cfg.sao_enable) { + + if (encoder->cfg.sao_type) { state->tile->hor_buf_before_sao = kvz_yuv_t_alloc(luma_size, chroma_size_hor); + state->tile->ver_buf_before_sao = kvz_yuv_t_alloc(luma_size, chroma_size_ver); } else { state->tile->hor_buf_before_sao = NULL; + state->tile->ver_buf_before_sao = NULL; } - + if (encoder->cfg.wpp) { int num_jobs = state->tile->frame->width_in_lcu * state->tile->frame->height_in_lcu; state->tile->wf_jobs = MALLOC(threadqueue_job_t*, num_jobs); @@ -132,21 +129,27 @@ static void encoder_state_config_tile_finalize(encoder_state_t * const state) { if (state->tile == NULL) return; - if (state->tile->hor_buf_before_sao) kvz_yuv_t_free(state->tile->hor_buf_before_sao); - kvz_yuv_t_free(state->tile->hor_buf_search); kvz_yuv_t_free(state->tile->ver_buf_search); - + kvz_yuv_t_free(state->tile->hor_buf_before_sao); + kvz_yuv_t_free(state->tile->ver_buf_before_sao); + + if (state->encoder_control->cfg.wpp) { + int num_jobs = state->tile->frame->width_in_lcu * state->tile->frame->height_in_lcu; + for (int i = 0; i < num_jobs; ++i) { + kvz_threadqueue_free_job(&state->tile->wf_jobs[i]); + } + } + kvz_videoframe_free(state->tile->frame); state->tile->frame = NULL; - if (state->encoder_control->cfg.crypto_features && state->tile->dbs_g) { - DeleteCryptoC(state->tile->dbs_g); - } FREE_POINTER(state->tile->wf_jobs); } -static int encoder_state_config_slice_init(encoder_state_t * const state, - const int start_address_in_ts, const int end_address_in_ts) { +static int encoder_state_config_slice_init(encoder_state_t * const state, + const int start_address_in_ts, + const int end_address_in_ts) +{ state->slice->id = -1; for (int i = 0; i < state->encoder_control->slice_count; ++i) { if (state->encoder_control->slice_addresses_in_ts[i] == start_address_in_ts) { @@ -308,6 +311,7 @@ child_state->parent = parent_state; child_state->children = MALLOC(encoder_state_t, 1); child_state->children[0].encoder_control = NULL; + child_state->crypto_hdl = NULL; child_state->tqj_bitstream_written = NULL; child_state->tqj_recon_done = NULL; @@ -326,7 +330,6 @@ return 0; } - child_state->tile->dbs_g = NULL; // Not used. The used state is in the sub-tile. child_state->slice = MALLOC(encoder_state_config_slice_t, 1); if (!child_state->slice || !encoder_state_config_slice_init(child_state, 0, encoder->in.width_in_lcu * encoder->in.height_in_lcu - 1)) { fprintf(stderr, "Could not initialize encoder_state->slice!\n"); @@ -461,9 +464,6 @@ new_child->type = ENCODER_STATE_TYPE_TILE; new_child->frame = child_state->frame; new_child->tile = MALLOC(encoder_state_config_tile_t, 1); - if (child_state->encoder_control->cfg.crypto_features) { - new_child->tile->dbs_g = CreateC(); - } new_child->slice = child_state->slice; new_child->wfrow = child_state->wfrow; @@ -706,4 +706,7 @@ } kvz_bitstream_finalize(&state->stream); + + kvz_threadqueue_free_job(&state->tqj_recon_done); + kvz_threadqueue_free_job(&state->tqj_bitstream_written); }
View file
kvazaar-1.1.0.tar.gz/src/encoderstate.c -> kvazaar-1.2.0.tar.gz/src/encoderstate.c
Changed
@@ -35,6 +35,10 @@ #include "sao.h" #include "search.h" #include "tables.h" +#include "threadqueue.h" + +#define SAO_BUF_WIDTH (LCU_WIDTH + SAO_DELAY_PX + 2) +#define SAO_BUF_WIDTH_C (SAO_BUF_WIDTH / 2) int kvz_encoder_state_match_children_of_previous_frame(encoder_state_t * const state) { @@ -48,7 +52,127 @@ return 1; } -static void encoder_state_recdata_to_bufs(encoder_state_t * const state, const lcu_order_element_t * const lcu, yuv_t * const hor_buf, yuv_t * const ver_buf) { +/** + * \brief Save edge pixels before SAO to buffers. + * + * Copies pixels at the edges of the area that will be filtered with SAO to + * the given buffers. If deblocking is enabled, the pixels must have been + * deblocked before this. + * + * The saved pixels will be needed later when doing SAO for the neighboring + * areas. + */ +static void encoder_state_recdata_before_sao_to_bufs( + encoder_state_t * const state, + const lcu_order_element_t * const lcu, + yuv_t * const hor_buf, + yuv_t * const ver_buf) +{ + videoframe_t* const frame = state->tile->frame; + + if (hor_buf && lcu->below) { + // Copy the bottommost row that will be filtered with SAO to the + // horizontal buffer. + vector2d_t pos = { + .x = lcu->position_px.x, + .y = lcu->position_px.y + LCU_WIDTH - SAO_DELAY_PX - 1, + }; + // Copy all pixels that have been deblocked. + int length = lcu->size.x - DEBLOCK_DELAY_PX; + + if (!lcu->right) { + // If there is no LCU to the right, the last pixels will be + // filtered too. + length += DEBLOCK_DELAY_PX; + } + + if (lcu->left) { + // The rightmost pixels of the CTU to the left will also be filtered. + pos.x -= DEBLOCK_DELAY_PX; + length += DEBLOCK_DELAY_PX; + } + + const unsigned from_index = pos.x + pos.y * frame->rec->stride; + // NOTE: The horizontal buffer is indexed by + // x_px + y_lcu * frame->width + // where x_px is in pixels and y_lcu in number of LCUs. + const unsigned to_index = pos.x + lcu->position.y * frame->width; + + kvz_pixels_blit(&frame->rec->y[from_index], + &hor_buf->y[to_index], + length, 1, + frame->rec->stride, + frame->width); + + if (state->encoder_control->chroma_format != KVZ_CSP_400) { + const unsigned from_index_c = (pos.x / 2) + (pos.y / 2) * frame->rec->stride / 2; + const unsigned to_index_c = (pos.x / 2) + lcu->position.y * frame->width / 2; + + kvz_pixels_blit(&frame->rec->u[from_index_c], + &hor_buf->u[to_index_c], + length / 2, 1, + frame->rec->stride / 2, + frame->width / 2); + kvz_pixels_blit(&frame->rec->v[from_index_c], + &hor_buf->v[to_index_c], + length / 2, 1, + frame->rec->stride / 2, + frame->width / 2); + } + } + + if (ver_buf && lcu->right) { + // Copy the rightmost column that will be filtered with SAO to the + // vertical buffer. + vector2d_t pos = { + .x = lcu->position_px.x + LCU_WIDTH - SAO_DELAY_PX - 1, + .y = lcu->position_px.y, + }; + int length = lcu->size.y - DEBLOCK_DELAY_PX; + + if (!lcu->below) { + // If there is no LCU below, the last pixels will be filtered too. + length += DEBLOCK_DELAY_PX; + } + + if (lcu->above) { + // The bottommost pixels of the CTU above will also be filtered. + pos.y -= DEBLOCK_DELAY_PX; + length += DEBLOCK_DELAY_PX; + } + + const unsigned from_index = pos.x + pos.y * frame->rec->stride; + // NOTE: The vertical buffer is indexed by + // x_lcu * frame->height + y_px + // where x_lcu is in number of LCUs and y_px in pixels. + const unsigned to_index = lcu->position.x * frame->height + pos.y; + + kvz_pixels_blit(&frame->rec->y[from_index], + &ver_buf->y[to_index], + 1, length, + frame->rec->stride, 1); + + if (state->encoder_control->chroma_format != KVZ_CSP_400) { + const unsigned from_index_c = (pos.x / 2) + (pos.y / 2) * frame->rec->stride / 2; + const unsigned to_index_c = lcu->position.x * frame->height / 2 + pos.y / 2; + + kvz_pixels_blit(&frame->rec->u[from_index_c], + &ver_buf->u[to_index_c], + 1, length / 2, + frame->rec->stride / 2, 1); + kvz_pixels_blit(&frame->rec->v[from_index_c], + &ver_buf->v[to_index_c], + 1, length / 2, + frame->rec->stride / 2, 1); + } + } +} + +static void encoder_state_recdata_to_bufs(encoder_state_t * const state, + const lcu_order_element_t * const lcu, + yuv_t * const hor_buf, + yuv_t * const ver_buf) +{ videoframe_t* const frame = state->tile->frame; if (hor_buf) { @@ -107,6 +231,209 @@ } +/** + * \brief Do SAO reconstuction for all available pixels. + * + * Does SAO reconstruction for all pixels that are available after the + * given LCU has been deblocked. This means the following pixels: + * - bottom-right block of SAO_DELAY_PX times SAO_DELAY_PX in the lcu to + * the left and up + * - the rightmost SAO_DELAY_PX pixels of the LCU to the left (excluding + * the bottommost pixel) + * - the bottommost SAO_DELAY_PX pixels of the LCU above (excluding the + * rightmost pixels) + * - all pixels inside the LCU, excluding the rightmost SAO_DELAY_PX and + * bottommost SAO_DELAY_PX + */ +static void encoder_sao_reconstruct(const encoder_state_t *const state, + const lcu_order_element_t *const lcu) +{ + videoframe_t *const frame = state->tile->frame; + + // Temporary buffers for SAO input pixels. + kvz_pixel sao_buf_y_array[SAO_BUF_WIDTH * SAO_BUF_WIDTH]; + kvz_pixel sao_buf_u_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C]; + kvz_pixel sao_buf_v_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C]; + + // Pointers to the top-left pixel of the LCU in the buffers. + kvz_pixel *const sao_buf_y = &sao_buf_y_array[(SAO_DELAY_PX + 1) * (SAO_BUF_WIDTH + 1)]; + kvz_pixel *const sao_buf_u = &sao_buf_u_array[(SAO_DELAY_PX/2 + 1) * (SAO_BUF_WIDTH_C + 1)]; + kvz_pixel *const sao_buf_v = &sao_buf_v_array[(SAO_DELAY_PX/2 + 1) * (SAO_BUF_WIDTH_C + 1)]; + + const int x_offsets[3] = { + // If there is an lcu to the left, we need to filter its rightmost + // pixels. + lcu->left ? -SAO_DELAY_PX : 0, + 0, + // If there is an lcu to the right, the rightmost pixels of this LCU + // are filtered when filtering that LCU. Otherwise we filter them now. + lcu->size.x - (lcu->right ? SAO_DELAY_PX : 0), + }; + + const int y_offsets[3] = { + // If there is an lcu above, we need to filter its bottommost pixels. + lcu->above ? -SAO_DELAY_PX : 0, + 0, + // If there is an lcu below, the bottommost pixels of this LCU are + // filtered when filtering that LCU. Otherwise we filter them now. + lcu->size.y - (lcu->below ? SAO_DELAY_PX : 0), + }; + + // Number of pixels around the block that need to be copied to the + // buffers. + const int border_left = lcu->left ? 1 : 0; + const int border_right = lcu->right ? 1 : 0; + const int border_above = lcu->above ? 1 : 0; + const int border_below = lcu->below ? 1 : 0; + + // Index of the pixel at the intersection of the top and left borders. + const int border_index = (x_offsets[0] - border_left) + + (y_offsets[0] - border_above) * SAO_BUF_WIDTH; + const int border_index_c = (x_offsets[0]/2 - border_left) + + (y_offsets[0]/2 - border_above) * SAO_BUF_WIDTH_C; + // Width and height of the whole area to filter. + const int width = x_offsets[2] - x_offsets[0]; + const int height = y_offsets[2] - y_offsets[0]; + + // Copy bordering pixels from above and left to buffers. + if (lcu->above) { + const int from_index = (lcu->position_px.x + x_offsets[0] - border_left) + + (lcu->position.y - 1) * frame->width; + kvz_pixels_blit(&state->tile->hor_buf_before_sao->y[from_index], + &sao_buf_y[border_index], + width + border_left + border_right, + 1, + frame->width, + SAO_BUF_WIDTH); + if (state->encoder_control->chroma_format != KVZ_CSP_400) { + const int from_index_c = (lcu->position_px.x + x_offsets[0])/2 - border_left + + (lcu->position.y - 1) * frame->width/2; + kvz_pixels_blit(&state->tile->hor_buf_before_sao->u[from_index_c], + &sao_buf_u[border_index_c], + width/2 + border_left + border_right, + 1, + frame->width/2, + SAO_BUF_WIDTH_C); + kvz_pixels_blit(&state->tile->hor_buf_before_sao->v[from_index_c], + &sao_buf_v[border_index_c], + width/2 + border_left + border_right, + 1, + frame->width/2, + SAO_BUF_WIDTH_C); + } + } + if (lcu->left) { + const int from_index = (lcu->position.x - 1) * frame->height + + (lcu->position_px.y + y_offsets[0] - border_above); + kvz_pixels_blit(&state->tile->ver_buf_before_sao->y[from_index], + &sao_buf_y[border_index], + 1, + height + border_above + border_below, + 1, + SAO_BUF_WIDTH); + if (state->encoder_control->chroma_format != KVZ_CSP_400) { + const int from_index_c = (lcu->position.x - 1) * frame->height/2 + + (lcu->position_px.y + y_offsets[0])/2 - border_above; + kvz_pixels_blit(&state->tile->ver_buf_before_sao->u[from_index_c], + &sao_buf_u[border_index_c], + 1, + height/2 + border_above + border_below, + 1, + SAO_BUF_WIDTH_C); + kvz_pixels_blit(&state->tile->ver_buf_before_sao->v[from_index_c], + &sao_buf_v[border_index_c], + 1, + height/2 + border_above + border_below, + 1, + SAO_BUF_WIDTH_C); + } + } + // Copy pixels that will be filtered and bordering pixels from right and + // below. + const int from_index = (lcu->position_px.x + x_offsets[0]) + + (lcu->position_px.y + y_offsets[0]) * frame->rec->stride; + const int to_index = x_offsets[0] + y_offsets[0] * SAO_BUF_WIDTH; + kvz_pixels_blit(&frame->rec->y[from_index], + &sao_buf_y[to_index], + width + border_right, + height + border_below, + frame->rec->stride, + SAO_BUF_WIDTH); + if (state->encoder_control->chroma_format != KVZ_CSP_400) { + const int from_index_c = (lcu->position_px.x + x_offsets[0])/2 + + (lcu->position_px.y + y_offsets[0])/2 * frame->rec->stride/2; + const int to_index_c = x_offsets[0]/2 + y_offsets[0]/2 * SAO_BUF_WIDTH_C; + kvz_pixels_blit(&frame->rec->u[from_index_c], + &sao_buf_u[to_index_c], + width/2 + border_right, + height/2 + border_below, + frame->rec->stride/2, + SAO_BUF_WIDTH_C); + kvz_pixels_blit(&frame->rec->v[from_index_c], + &sao_buf_v[to_index_c], + width/2 + border_right, + height/2 + border_below, + frame->rec->stride/2, + SAO_BUF_WIDTH_C); + } + + // We filter the pixels in four parts: + // 1. Pixels that belong to the LCU above and to the left + // 2. Pixels that belong to the LCU above + // 3. Pixels that belong to the LCU to the left + // 4. Pixels that belong to the current LCU + for (int y_offset_index = 0; y_offset_index < 2; y_offset_index++) { + for (int x_offset_index = 0; x_offset_index < 2; x_offset_index++) { + const int x = x_offsets[x_offset_index]; + const int y = y_offsets[y_offset_index]; + const int width = x_offsets[x_offset_index + 1] - x; + const int height = y_offsets[y_offset_index + 1] - y; + + if (width == 0 || height == 0) continue; + + const int lcu_x = (lcu->position_px.x + x) >> LOG2_LCU_WIDTH; + const int lcu_y = (lcu->position_px.y + y) >> LOG2_LCU_WIDTH; + const int lcu_index = lcu_x + lcu_y * frame->width_in_lcu; + const sao_info_t *sao_luma = &frame->sao_luma[lcu_index]; + const sao_info_t *sao_chroma = &frame->sao_chroma[lcu_index]; + + kvz_sao_reconstruct(state, + &sao_buf_y[x + y * SAO_BUF_WIDTH], + SAO_BUF_WIDTH, + lcu->position_px.x + x, + lcu->position_px.y + y, + width, + height, + sao_luma, + COLOR_Y); + + if (state->encoder_control->chroma_format != KVZ_CSP_400) { + // Coordinates in chroma pixels. + int x_c = x >> 1; + int y_c = y >> 1; + + kvz_sao_reconstruct(state, + &sao_buf_u[x_c + y_c * SAO_BUF_WIDTH_C], + SAO_BUF_WIDTH_C, + lcu->position_px.x / 2 + x_c, + lcu->position_px.y / 2 + y_c, + width / 2, + height / 2, + sao_chroma, + COLOR_U); + kvz_sao_reconstruct(state, + &sao_buf_v[x_c + y_c * SAO_BUF_WIDTH_C], + SAO_BUF_WIDTH_C, + lcu->position_px.x / 2 + x_c, + lcu->position_px.y / 2 + y_c, + width / 2, + height / 2, + sao_chroma, + COLOR_V); + } + } + } +} static void encode_sao_color(encoder_state_t * const state, sao_info_t *sao, color_t color_i) @@ -273,62 +600,49 @@ kvz_set_lcu_lambda_and_qp(state, lcu->position); + lcu_coeff_t coeff; + state->coeff = &coeff; + //This part doesn't write to bitstream, it's only search, deblock and sao - kvz_search_lcu(state, lcu->position_px.x, lcu->position_px.y, state->tile->hor_buf_search, state->tile->ver_buf_search); - + encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search); if (encoder->cfg.deblock_enable) { - if (encoder->cfg.target_bitrate > 0 || encoder->cfg.roi.dqps != NULL) { + if (encoder->lcu_dqp_enabled) { set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, false); } kvz_filter_deblock_lcu(state, lcu->position_px.x, lcu->position_px.y); } - if (encoder->cfg.sao_enable) { + if (encoder->cfg.sao_type) { + // Save the post-deblocking but pre-SAO pixels of the LCU to a buffer + // so that they can be used in SAO reconstruction later. + encoder_state_recdata_before_sao_to_bufs(state, + lcu, + state->tile->hor_buf_before_sao, + state->tile->ver_buf_before_sao); kvz_sao_search_lcu(state, lcu->position.x, lcu->position.y); + encoder_sao_reconstruct(state, lcu); } - // Copy LCU cu_array to main states cu_array, because that is the only one - // which is given to the next frame through image_list_t. - { - PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME); - - encoder_state_t *main_state = state; - while (main_state->parent) main_state = main_state->parent; - assert(main_state != state); - - const unsigned tile_x_px = state->tile->lcu_offset_x << LOG2_LCU_WIDTH; - const unsigned tile_y_px = state->tile->lcu_offset_y << LOG2_LCU_WIDTH; - const unsigned x_px = lcu->position_px.x; - const unsigned y_px = lcu->position_px.y; - kvz_cu_array_copy(main_state->tile->frame->cu_array, - x_px + tile_x_px, y_px + tile_y_px, - state->tile->frame->cu_array, - x_px, y_px, - LCU_WIDTH, LCU_WIDTH); - - PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=copy_cuinfo,frame=%d,tile=%d", state->frame->num, state->tile->id); - } - //Now write data to bitstream (required to have a correct CABAC state) const uint64_t existing_bits = kvz_bitstream_tell(&state->stream); - + //Encode SAO - if (encoder->cfg.sao_enable) { + if (encoder->cfg.sao_type) { encode_sao(state, lcu->position.x, lcu->position.y, &frame->sao_luma[lcu->position.y * frame->width_in_lcu + lcu->position.x], &frame->sao_chroma[lcu->position.y * frame->width_in_lcu + lcu->position.x]); } - // QP delta is not used when rate control is turned off. - state->must_code_qp_delta = ( - state->encoder_control->cfg.target_bitrate > 0 - || state->encoder_control->cfg.roi.dqps != NULL); + state->must_code_qp_delta = encoder->lcu_dqp_enabled; //Encode coding tree - kvz_encode_coding_tree(state, lcu->position.x << MAX_DEPTH, lcu->position.y << MAX_DEPTH, 0); + kvz_encode_coding_tree(state, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, 0); + + // Coeffs are not needed anymore. + state->coeff = NULL; bool end_of_slice_segment_flag; if (state->encoder_control->cfg.slices & KVZ_SLICES_WPP) { @@ -366,6 +680,8 @@ kvz_bitstream_align_zero(state->cabac.stream); kvz_cabac_start(&state->cabac); + + kvz_crypto_delete(&state->crypto_hdl); } } @@ -383,38 +699,23 @@ } } } - - if (encoder->cfg.sao_enable && lcu->above) { - // Add the post-deblocking but pre-SAO pixels of the LCU row above this - // row to a buffer so this row can use them on it's own SAO - // reconstruction. - - // The pixels need to be taken to from the LCU to the top-left, because - // not all of the pixels could be deblocked before prediction of this - // LCU was reconstructed. - if (lcu->above->left) { - encoder_state_recdata_to_bufs(state, lcu->above->left, state->tile->hor_buf_before_sao, NULL); - } - // If this is the last LCU in the row, we can save the pixels from the top - // also, as they have been fully deblocked. - if (!lcu->right) { - encoder_state_recdata_to_bufs(state, lcu->above, state->tile->hor_buf_before_sao, NULL); - } - } } -static void encoder_state_encode_leaf(encoder_state_t * const state) { +static void encoder_state_encode_leaf(encoder_state_t * const state) +{ assert(state->is_leaf); assert(state->lcu_order_count > 0); - const kvz_config *cfg = &state->encoder_control->cfg; - if (cfg->crypto_features) { - InitC(state->tile->dbs_g); - state->tile->m_prev_pos = 0; - } + const encoder_control_t *ctrl = state->encoder_control; + const kvz_config *cfg = &ctrl->cfg; state->ref_qp = state->frame->QP; + if (cfg->crypto_features) { + state->crypto_hdl = kvz_crypto_create(cfg); + state->crypto_prev_pos = 0; + } + // Select whether to encode the frame/tile in current thread or to define // wavefront jobs for other threads to handle. bool wavefront = state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW; @@ -424,38 +725,24 @@ // frame is encoded. Deblocking and SAO search is done during LCU encoding. for (int i = 0; i < state->lcu_order_count; ++i) { - PERFORMANCE_MEASURE_START(KVZ_PERF_LCU); - encoder_state_worker_encode_lcu(&state->lcu_order[i]); - -#ifdef KVZ_DEBUG - { - const lcu_order_element_t * const lcu = &state->lcu_order[i]; - PERFORMANCE_MEASURE_END(KVZ_PERF_LCU, state->encoder_control->threadqueue, "type=encode_lcu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", state->frame->num, state->tile->id, state->slice->id, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH + lcu->size.x - 1, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH + lcu->size.y - 1); - } -#endif //KVZ_DEBUG - } - - if (state->encoder_control->cfg.sao_enable) { - PERFORMANCE_MEASURE_START(KVZ_PERF_SAOREC); - kvz_sao_reconstruct_frame(state); - PERFORMANCE_MEASURE_END(KVZ_PERF_SAOREC, state->encoder_control->threadqueue, "type=kvz_sao_reconstruct_frame,frame=%d,tile=%d,slice=%d,row=%d-%d,px_x=%d-%d,px_y=%d-%d", state->frame->num, state->tile->id, state->slice->id, state->lcu_order[0].position.y + state->tile->lcu_offset_y, state->lcu_order[state->lcu_order_count - 1].position.y + state->tile->lcu_offset_y, - state->tile->lcu_offset_x * LCU_WIDTH, state->tile->frame->width + state->tile->lcu_offset_x * LCU_WIDTH - 1, - state->tile->lcu_offset_y * LCU_WIDTH, state->tile->frame->height + state->tile->lcu_offset_y * LCU_WIDTH - 1 - ); } } else { // Add each LCU in the wavefront row as it's own job to the queue. // Select which frame dependancies should be set to. const encoder_state_t * ref_state = NULL; - if (cfg->gop_lowdelay && - cfg->gop_len > 0 && - state->previous_encoder_state != state) + + if (state->frame->slicetype == KVZ_SLICE_I) { + // I-frames have no references. + ref_state = NULL; + } else if (cfg->gop_lowdelay && + cfg->gop_len > 0 && + state->previous_encoder_state != state) { // For LP-gop, depend on the state of the first reference. - int ref_neg = cfg->gop[(state->frame->poc - 1) % cfg->gop_len].ref_neg[0]; - if (ref_neg > state->encoder_control->cfg.owf) { + int ref_neg = cfg->gop[state->frame->gop_offset].ref_neg[0]; + if (ref_neg > cfg->owf) { // If frame is not within OWF range, it's already done. ref_state = NULL; } else { @@ -473,16 +760,12 @@ for (int i = 0; i < state->lcu_order_count; ++i) { const lcu_order_element_t * const lcu = &state->lcu_order[i]; -#ifdef KVZ_DEBUG - char job_description[256]; - sprintf(job_description, "type=encode_lcu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", state->frame->num, state->tile->id, state->slice->id, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH + lcu->size.x - 1, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH + lcu->size.y - 1); -#else - char* job_description = NULL; -#endif - state->tile->wf_jobs[lcu->id] = kvz_threadqueue_submit(state->encoder_control->threadqueue, encoder_state_worker_encode_lcu, (void*)lcu, 1, job_description); - + kvz_threadqueue_free_job(&state->tile->wf_jobs[lcu->id]); + state->tile->wf_jobs[lcu->id] = kvz_threadqueue_job_create(encoder_state_worker_encode_lcu, (void*)lcu); + threadqueue_job_t **job = &state->tile->wf_jobs[lcu->id]; + // If job object was returned, add dependancies and allow it to run. - if (state->tile->wf_jobs[lcu->id]) { + if (job[0]) { // Add inter frame dependancies when ecoding more than one frame at // once. The added dependancy is for the first LCU of each wavefront // row to depend on the reconstruction status of the row below in the @@ -491,37 +774,39 @@ state->previous_encoder_state->tqj_recon_done && state->frame->slicetype != KVZ_SLICE_I) { - if (!lcu->left) { - const lcu_order_element_t * const ref_lcu = &ref_state->lcu_order[i]; - if (lcu->below) { - kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], ref_lcu->below->encoder_state->tqj_recon_done); - } else { - kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], ref_lcu->encoder_state->tqj_recon_done); - } + // We need to wait until the CTUs whose pixels we refer to are + // done before we can start this CTU. + const lcu_order_element_t *dep_lcu = lcu; + for (int i = 0; dep_lcu->below && i < ctrl->max_inter_ref_lcu.down; i++) { + dep_lcu = dep_lcu->below; + } + for (int i = 0; dep_lcu->right && i < ctrl->max_inter_ref_lcu.right; i++) { + dep_lcu = dep_lcu->right; } + kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[dep_lcu->id]); } // Add local WPP dependancy to the LCU on the left. if (lcu->left) { - kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - 1]); + kvz_threadqueue_job_dep_add(job[0], job[-1]); } // Add local WPP dependancy to the LCU on the top right. if (lcu->above) { if (lcu->above->right) { - kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - state->tile->frame->width_in_lcu + 1]); + kvz_threadqueue_job_dep_add(job[0], job[-state->tile->frame->width_in_lcu + 1]); } else { - kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - state->tile->frame->width_in_lcu]); + kvz_threadqueue_job_dep_add(job[0], job[-state->tile->frame->width_in_lcu]); } } - kvz_threadqueue_job_unwait_job(state->encoder_control->threadqueue, state->tile->wf_jobs[lcu->id]); - } + kvz_threadqueue_submit(state->encoder_control->threadqueue, state->tile->wf_jobs[lcu->id]); - // In the case where SAO is not enabled, the wavefront row is - // done when the last LCU in the row is done. - if (!state->encoder_control->cfg.sao_enable && i + 1 == state->lcu_order_count) { - assert(!state->tqj_recon_done); - state->tqj_recon_done = state->tile->wf_jobs[lcu->id]; + // The wavefront row is done when the last LCU in the row is done. + if (i + 1 == state->lcu_order_count) { + assert(!state->tqj_recon_done); + state->tqj_recon_done = + kvz_threadqueue_copy_ref(state->tile->wf_jobs[lcu->id]); + } } } } @@ -541,76 +826,14 @@ int wpp_row = sub_state->wfrow->lcu_offset_y; int tile_width = sub_state->tile->frame->width_in_lcu; int end_of_row = (wpp_row + 1) * tile_width - 1; - threadqueue_job_t *job = sub_state->tile->wf_jobs[end_of_row]; - assert(!sub_state->tqj_bitstream_written); - sub_state->tqj_bitstream_written = job; - return; - } -} - -typedef struct { - int y; - const encoder_state_t * encoder_state; -} worker_sao_reconstruct_lcu_data; - -static void encoder_state_worker_sao_reconstruct_lcu(void *opaque) { - worker_sao_reconstruct_lcu_data *data = opaque; - videoframe_t * const frame = data->encoder_state->tile->frame; - unsigned stride = frame->width_in_lcu; - int x; - - //TODO: copy only needed data - kvz_pixel *new_y_data = MALLOC(kvz_pixel, frame->width * frame->height); - kvz_pixel *new_u_data = NULL; - kvz_pixel *new_v_data = NULL; - if (frame->rec->chroma_format != KVZ_CSP_400) { - new_u_data = MALLOC(kvz_pixel, (frame->width * frame->height) >> 2); - new_v_data = MALLOC(kvz_pixel, (frame->width * frame->height) >> 2); - } - - const int offset = frame->width * (data->y*LCU_WIDTH); - const int offset_c = frame->width/2 * (data->y*LCU_WIDTH_C); - int num_pixels = frame->width * (LCU_WIDTH + 2); - - if (num_pixels + offset > frame->width * frame->height) { - num_pixels = frame->width * frame->height - offset; - } - - memcpy(&new_y_data[offset], &frame->rec->y[offset], sizeof(kvz_pixel) * num_pixels); - if (frame->rec->chroma_format != KVZ_CSP_400) { - memcpy(&new_u_data[offset_c], &frame->rec->u[offset_c], sizeof(kvz_pixel) * num_pixels >> 2); - memcpy(&new_v_data[offset_c], &frame->rec->v[offset_c], sizeof(kvz_pixel) * num_pixels >> 2); - } - - if (data->y>0) { - //copy first row from buffer - memcpy(&new_y_data[frame->width * (data->y*LCU_WIDTH-1)], &data->encoder_state->tile->hor_buf_before_sao->y[frame->width * (data->y-1)], frame->width * sizeof(kvz_pixel)); - if (frame->rec->chroma_format != KVZ_CSP_400) { - memcpy(&new_u_data[frame->width / 2 * (data->y*LCU_WIDTH_C - 1)], &data->encoder_state->tile->hor_buf_before_sao->u[frame->width / 2 * (data->y - 1)], frame->width / 2 * sizeof(kvz_pixel)); - memcpy(&new_v_data[frame->width / 2 * (data->y*LCU_WIDTH_C - 1)], &data->encoder_state->tile->hor_buf_before_sao->v[frame->width / 2 * (data->y - 1)], frame->width / 2 * sizeof(kvz_pixel)); + if (sub_state->tile->wf_jobs[end_of_row]) { + sub_state->tqj_bitstream_written = + kvz_threadqueue_copy_ref(sub_state->tile->wf_jobs[end_of_row]); } } - - for (x = 0; x < frame->width_in_lcu; x++) { - // sao_do_rdo(encoder, lcu.x, lcu.y, sao_luma, sao_chroma); - sao_info_t *sao_luma = &frame->sao_luma[data->y * stride + x]; - sao_info_t *sao_chroma = &frame->sao_chroma[data->y * stride + x]; - kvz_sao_reconstruct(data->encoder_state->encoder_control, frame, new_y_data, x, data->y, sao_luma, COLOR_Y); - if (frame->rec->chroma_format != KVZ_CSP_400) { - kvz_sao_reconstruct(data->encoder_state->encoder_control, frame, new_u_data, x, data->y, sao_chroma, COLOR_U); - kvz_sao_reconstruct(data->encoder_state->encoder_control, frame, new_v_data, x, data->y, sao_chroma, COLOR_V); - } - } - - free(new_y_data); - free(new_u_data); - free(new_v_data); - - free(opaque); } - static int encoder_state_tree_is_a_chain(const encoder_state_t * const state) { if (!state->children[0].encoder_control) return 1; if (state->children[1].encoder_control) return 0; @@ -620,62 +843,66 @@ static void encoder_state_encode(encoder_state_t * const main_state) { //If we have children, encode at child level if (main_state->children[0].encoder_control) { - int i=0; //If we have only one child, than it cannot be the last split in tree int node_is_the_last_split_in_tree = (main_state->children[1].encoder_control != 0); - - for (i=0; main_state->children[i].encoder_control; ++i) { + + for (int i = 0; main_state->children[i].encoder_control; ++i) { encoder_state_t *sub_state = &(main_state->children[i]); - + if (sub_state->tile != main_state->tile) { - const int offset_x = sub_state->tile->lcu_offset_x * LCU_WIDTH; - const int offset_y = sub_state->tile->lcu_offset_y * LCU_WIDTH; + const int offset_x = sub_state->tile->offset_x; + const int offset_y = sub_state->tile->offset_y; const int width = MIN(sub_state->tile->frame->width_in_lcu * LCU_WIDTH, main_state->tile->frame->width - offset_x); const int height = MIN(sub_state->tile->frame->height_in_lcu * LCU_WIDTH, main_state->tile->frame->height - offset_y); - - if (sub_state->tile->frame->source) { - kvz_image_free(sub_state->tile->frame->source); - sub_state->tile->frame->source = NULL; - } - if (sub_state->tile->frame->rec) { - kvz_image_free(sub_state->tile->frame->rec); - sub_state->tile->frame->rec = NULL; - } - - assert(!sub_state->tile->frame->source); - assert(!sub_state->tile->frame->rec); - sub_state->tile->frame->source = kvz_image_make_subimage(main_state->tile->frame->source, offset_x, offset_y, width, height); - sub_state->tile->frame->rec = kvz_image_make_subimage(main_state->tile->frame->rec, offset_x, offset_y, width, height); + + kvz_image_free(sub_state->tile->frame->source); + sub_state->tile->frame->source = NULL; + + kvz_image_free(sub_state->tile->frame->rec); + sub_state->tile->frame->rec = NULL; + + kvz_cu_array_free(&sub_state->tile->frame->cu_array); + + sub_state->tile->frame->source = kvz_image_make_subimage( + main_state->tile->frame->source, + offset_x, + offset_y, + width, + height + ); + sub_state->tile->frame->rec = kvz_image_make_subimage( + main_state->tile->frame->rec, + offset_x, + offset_y, + width, + height + ); + sub_state->tile->frame->cu_array = kvz_cu_subarray( + main_state->tile->frame->cu_array, + offset_x, + offset_y, + sub_state->tile->frame->width_in_lcu * LCU_WIDTH, + sub_state->tile->frame->height_in_lcu * LCU_WIDTH + ); } - + //To be the last split, we require that every child is a chain - node_is_the_last_split_in_tree = node_is_the_last_split_in_tree && encoder_state_tree_is_a_chain(&main_state->children[i]); + node_is_the_last_split_in_tree = + node_is_the_last_split_in_tree && + encoder_state_tree_is_a_chain(&main_state->children[i]); } //If it's the latest split point if (node_is_the_last_split_in_tree) { - for (i=0; main_state->children[i].encoder_control; ++i) { + for (int i = 0; main_state->children[i].encoder_control; ++i) { //If we don't have wavefronts, parallelize encoding of children. if (main_state->children[i].type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) { -#ifdef KVZ_DEBUG - char job_description[256]; - switch (main_state->children[i].type) { - case ENCODER_STATE_TYPE_TILE: - sprintf(job_description, "type=encode_child,frame=%d,tile=%d,row=%d-%d,px_x=%d-%d,px_y=%d-%d", main_state->children[i].frame->num, main_state->children[i].tile->id, main_state->children[i].lcu_order[0].position.y + main_state->children[i].tile->lcu_offset_y, main_state->children[i].lcu_order[0].position.y + main_state->children[i].tile->lcu_offset_y, - main_state->children[i].lcu_order[0].position_px.x + main_state->children[i].tile->lcu_offset_x * LCU_WIDTH, main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].position_px.x + main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].size.x + main_state->children[i].tile->lcu_offset_x * LCU_WIDTH - 1, - main_state->children[i].lcu_order[0].position_px.y + main_state->children[i].tile->lcu_offset_y * LCU_WIDTH, main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].position_px.y + main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].size.y + main_state->children[i].tile->lcu_offset_y * LCU_WIDTH - 1); - break; - case ENCODER_STATE_TYPE_SLICE: - sprintf(job_description, "type=encode_child,frame=%d,slice=%d,start_in_ts=%d", main_state->children[i].frame->num, main_state->children[i].slice->id, main_state->children[i].slice->start_in_ts); - break; - default: - sprintf(job_description, "type=encode_child,frame=%d,invalid", main_state->children[i].frame->num); - break; - } -#else - char* job_description = NULL; -#endif - main_state->children[i].tqj_recon_done = kvz_threadqueue_submit(main_state->encoder_control->threadqueue, encoder_state_worker_encode_children, &(main_state->children[i]), 1, job_description); - if (main_state->children[i].previous_encoder_state != &main_state->children[i] && main_state->children[i].previous_encoder_state->tqj_recon_done && !main_state->children[i].frame->is_idr_frame) { + kvz_threadqueue_free_job(&main_state->children[i].tqj_recon_done); + main_state->children[i].tqj_recon_done = + kvz_threadqueue_job_create(encoder_state_worker_encode_children, &main_state->children[i]); + if (main_state->children[i].previous_encoder_state != &main_state->children[i] && + main_state->children[i].previous_encoder_state->tqj_recon_done && + !main_state->children[i].frame->is_irap) + { #if 0 // Disabled due to non-determinism. if (main_state->encoder_control->cfg->mv_constraint == KVZ_MV_CONSTRAIN_FRAME_AND_TILE_MARGIN) @@ -691,70 +918,15 @@ } } } - kvz_threadqueue_job_unwait_job(main_state->encoder_control->threadqueue, main_state->children[i].tqj_recon_done); + kvz_threadqueue_submit(main_state->encoder_control->threadqueue, main_state->children[i].tqj_recon_done); } else { //Wavefront rows have parallelism at LCU level, so we should not launch multiple threads here! //FIXME: add an assert: we can only have wavefront children encoder_state_worker_encode_children(&(main_state->children[i])); } } - - // Add SAO reconstruction jobs and their dependancies when using WPP coding. - if (main_state->encoder_control->cfg.sao_enable && - main_state->children[0].type == ENCODER_STATE_TYPE_WAVEFRONT_ROW) - { - int y; - videoframe_t * const frame = main_state->tile->frame; - threadqueue_job_t *previous_job = NULL; - - for (y = 0; y < frame->height_in_lcu; ++y) { - // Queue a single job performing SAO reconstruction for the whole wavefront row. - - worker_sao_reconstruct_lcu_data *data = MALLOC(worker_sao_reconstruct_lcu_data, 1); - threadqueue_job_t *job; -#ifdef KVZ_DEBUG - char job_description[256]; - sprintf(job_description, "type=sao,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d", main_state->frame->num, main_state->tile->id, main_state->tile->lcu_offset_x * LCU_WIDTH, main_state->tile->lcu_offset_x * LCU_WIDTH + main_state->tile->frame->width - 1, (main_state->tile->lcu_offset_y + y) * LCU_WIDTH, MIN(main_state->tile->lcu_offset_y * LCU_WIDTH + main_state->tile->frame->height, (main_state->tile->lcu_offset_y + y + 1) * LCU_WIDTH)-1); -#else - char* job_description = NULL; -#endif - data->y = y; - data->encoder_state = main_state; - - job = kvz_threadqueue_submit(main_state->encoder_control->threadqueue, encoder_state_worker_sao_reconstruct_lcu, data, 1, job_description); - - // This dependancy is needed, because the pre-SAO pixels from the LCU row - // below this one are read straigh from the frame. - if (previous_job) { - kvz_threadqueue_job_dep_add(job, previous_job); - } - previous_job = job; - - // This depepndancy ensures that the bottom edge of this LCU row - // has been fully deblocked. - if (y < frame->height_in_lcu - 1) { - // Not last row: depend on the last LCU of the row below. - kvz_threadqueue_job_dep_add(job, main_state->tile->wf_jobs[(y + 1) * frame->width_in_lcu + frame->width_in_lcu - 1]); - } else { - // Last row: depend on the last LCU of the row - kvz_threadqueue_job_dep_add(job, main_state->tile->wf_jobs[(y + 0) * frame->width_in_lcu + frame->width_in_lcu - 1]); - } - kvz_threadqueue_job_unwait_job(main_state->encoder_control->threadqueue, job); - - // The wavefront row is finished, when the SAO-reconstruction is - // finished. - main_state->children[y].tqj_recon_done = job; - - if (y == frame->height_in_lcu - 1) { - // This tile is finished, when the reconstruction of the last - // WPP-row is finished. - assert(!main_state->tqj_recon_done); - main_state->tqj_recon_done = job; - } - } - } } else { - for (i=0; main_state->children[i].encoder_control; ++i) { + for (int i = 0; main_state->children[i].encoder_control; ++i) { encoder_state_worker_encode_children(&(main_state->children[i])); } } @@ -773,81 +945,50 @@ } -static void encoder_ref_insertion_sort(int reflist[16], int length) { +static void encoder_ref_insertion_sort(const encoder_state_t *const state, uint8_t reflist[16], uint8_t length) { for (uint8_t i = 1; i < length; ++i) { - const int16_t cur_poc = reflist[i]; - int16_t j = i; - while (j > 0 && cur_poc < reflist[j - 1]) { + const uint8_t cur_idx = reflist[i]; + const int32_t cur_poc = state->frame->ref->pocs[cur_idx]; + int8_t j = i; + while (j > 0 && cur_poc > state->frame->ref->pocs[reflist[j - 1]]) { reflist[j] = reflist[j - 1]; --j; } - reflist[j] = cur_poc; + reflist[j] = cur_idx; } } /** - * \brief Return reference picture lists. + * \brief Generate reference picture lists. * * \param state main encoder state - * \param ref_list_len_out Returns the lengths of the reference lists. - * \param ref_list_poc_out Returns two lists of POCs of the reference pictures. */ -void kvz_encoder_get_ref_lists(const encoder_state_t *const state, - int ref_list_len_out[2], - int ref_list_poc_out[2][16]) +void kvz_encoder_create_ref_lists(const encoder_state_t *const state) { - FILL_ARRAY(ref_list_len_out, 0, 2); + // TODO check possibility to add L0 references to L1 list also + + FILL_ARRAY(state->frame->ref_LX_size, 0, 2); // List all pocs of lists int j = 0; for (j = 0; j < state->frame->ref->used_size; j++) { if (state->frame->ref->pocs[j] < state->frame->poc) { - ref_list_poc_out[0][ref_list_len_out[0]] = state->frame->ref->pocs[j]; - ref_list_len_out[0]++; + state->frame->ref_LX[0][state->frame->ref_LX_size[0]] = j; + state->frame->ref_LX_size[0] += 1; } else { - ref_list_poc_out[1][ref_list_len_out[1]] = state->frame->ref->pocs[j]; - ref_list_len_out[1]++; + state->frame->ref_LX[1][state->frame->ref_LX_size[1]] = j; + state->frame->ref_LX_size[1] += 1; } } - // Fill the rest of ref_list_poc_out array with -1s. + // Fill the rest with -1s. for (; j < 16; j++) { - ref_list_poc_out[0][j] = -1; - ref_list_poc_out[1][j] = -1; + state->frame->ref_LX[0][j] = (uint8_t) -1; + state->frame->ref_LX[1][j] = (uint8_t) -1; } - encoder_ref_insertion_sort(ref_list_poc_out[0], ref_list_len_out[0]); - encoder_ref_insertion_sort(ref_list_poc_out[1], ref_list_len_out[1]); -} - -static void encoder_state_ref_sort(encoder_state_t *state) { - int ref_list_len[2]; - int ref_list_poc[2][16]; - - kvz_encoder_get_ref_lists(state, ref_list_len, ref_list_poc); - - for (int j = 0; j < state->frame->ref->used_size; j++) { - if (state->frame->ref->pocs[j] < state->frame->poc) { - for (int ref_idx = 0; ref_idx < ref_list_len[0]; ref_idx++) { - if (ref_list_poc[0][ref_idx] == state->frame->ref->pocs[j]) { - state->frame->refmap[j].idx = ref_list_len[0] - ref_idx - 1; - break; - } - } - state->frame->refmap[j].list = 1; - - } else { - for (int ref_idx = 0; ref_idx < ref_list_len[1]; ref_idx++) { - if (ref_list_poc[1][ref_idx] == state->frame->ref->pocs[j]) { - state->frame->refmap[j].idx = ref_idx; - break; - } - } - state->frame->refmap[j].list = 2; - } - state->frame->refmap[j].poc = state->frame->ref->pocs[j]; - } + encoder_ref_insertion_sort(state, state->frame->ref_LX[0], state->frame->ref_LX_size[0]); } /** @@ -855,7 +996,7 @@ */ static void encoder_state_remove_refs(encoder_state_t *state) { const encoder_control_t * const encoder = state->encoder_control; - + int neg_refs = encoder->cfg.gop[state->frame->gop_offset].ref_neg_count; int pos_refs = encoder->cfg.gop[state->frame->gop_offset].ref_pos_count; @@ -865,7 +1006,10 @@ } else { target_ref_num = encoder->cfg.ref_frames; } - if (state->frame->slicetype == KVZ_SLICE_I) { + + if (state->frame->pictype == KVZ_NAL_IDR_W_RADL || + state->frame->pictype == KVZ_NAL_IDR_N_LP) + { target_ref_num = 0; } @@ -877,7 +1021,7 @@ bool is_referenced = false; int ref_poc = state->frame->ref->pocs[ref]; - + for (int i = 0; i < neg_refs; i++) { int ref_relative_poc = -encoder->cfg.gop[state->frame->gop_offset].ref_neg[i]; if (ref_poc == state->frame->poc + ref_relative_poc) { @@ -886,7 +1030,6 @@ } } - for (int i = 0; i < pos_refs; i++) { int ref_relative_poc = encoder->cfg.gop[state->frame->gop_offset].ref_pos[i]; if (ref_poc == state->frame->poc + ref_relative_poc) { @@ -895,6 +1038,20 @@ } } + if (ref_poc < state->frame->irap_poc && + state->frame->irap_poc < state->frame->poc) + { + // Trailing frames cannot refer to leading frames. + is_referenced = false; + } + + if (encoder->cfg.intra_period > 0 && + ref_poc < state->frame->irap_poc - encoder->cfg.intra_period) + { + // No frame can refer past the two preceding IRAP frames. + is_referenced = false; + } + if (!is_referenced) { // This reference is not referred to by this frame, it must be removed. kvz_image_list_rem(state->frame->ref, ref); @@ -911,16 +1068,6 @@ assert(state->frame->ref->used_size <= target_ref_num); } -static void encoder_state_reset_poc(encoder_state_t *state) { - state->frame->poc = 0; - kvz_videoframe_set_poc(state->tile->frame, 0); - - for (int i = 0; state->children[i].encoder_control; ++i) { - encoder_state_t *sub_state = &(state->children[i]); - encoder_state_reset_poc(sub_state); - } -} - static void encoder_set_source_picture(encoder_state_t * const state, kvz_picture* frame) { assert(!state->tile->frame->source); @@ -949,8 +1096,8 @@ } //Clear the jobs - state->tqj_bitstream_written = NULL; - state->tqj_recon_done = NULL; + kvz_threadqueue_free_job(&state->tqj_bitstream_written); + kvz_threadqueue_free_job(&state->tqj_recon_done); for (int i = 0; state->children[i].encoder_control; ++i) { encoder_state_init_children(&state->children[i]); @@ -980,56 +1127,71 @@ encoder_set_source_picture(state, frame); + assert(!state->tile->frame->cu_array); + state->tile->frame->cu_array = kvz_cu_array_alloc( + state->tile->frame->width, + state->tile->frame->height + ); + + // Set POC. if (state->frame->num == 0) { - state->frame->is_idr_frame = true; - } else if (cfg->gop_len) { - // Closed GOP / CRA is not yet supported. - state->frame->is_idr_frame = false; - + state->frame->poc = 0; + } else if (cfg->gop_len && !cfg->gop_lowdelay) { // Calculate POC according to the global frame counter and GOP structure int32_t poc = state->frame->num - 1; int32_t poc_offset = cfg->gop[state->frame->gop_offset].poc_offset; state->frame->poc = poc - poc % cfg->gop_len + poc_offset; kvz_videoframe_set_poc(state->tile->frame, state->frame->poc); + } else if (cfg->intra_period > 0) { + state->frame->poc = state->frame->num % cfg->intra_period; } else { - bool is_i_idr = (cfg->intra_period == 1 && state->frame->num % 2 == 0); - bool is_p_idr = (cfg->intra_period > 1 && (state->frame->num % cfg->intra_period) == 0); - state->frame->is_idr_frame = is_i_idr || is_p_idr; + state->frame->poc = state->frame->num; } - - if (state->frame->is_idr_frame) { - encoder_state_reset_poc(state); - state->frame->slicetype = KVZ_SLICE_I; - state->frame->pictype = KVZ_NAL_IDR_W_RADL; + + // Check whether the frame is a keyframe or not. + if (state->frame->num == 0) { + state->frame->is_irap = true; } else { - if (cfg->intra_period == 1) { - state->frame->slicetype = KVZ_SLICE_I; - } else if (cfg->gop_len != 0) { - state->frame->slicetype = KVZ_SLICE_B; - } else { - state->frame->slicetype = KVZ_SLICE_P; - } + state->frame->is_irap = + cfg->intra_period > 0 && + (state->frame->poc % cfg->intra_period) == 0; + } + if (state->frame->is_irap) { + state->frame->irap_poc = state->frame->poc; + } - // Use P-slice for lowdelay. - if (state->frame->slicetype == KVZ_SLICE_B && - cfg->gop_len > 0 && - cfg->gop_lowdelay) { - state->frame->slicetype = KVZ_SLICE_P; + // Set pictype. + if (state->frame->is_irap) { + if (state->frame->num == 0 || + cfg->intra_period == 1 || + cfg->gop_len == 0 || + cfg->gop_lowdelay) + { + state->frame->pictype = KVZ_NAL_IDR_W_RADL; + } else { + state->frame->pictype = KVZ_NAL_CRA_NUT; } - + } else if (state->frame->poc < state->frame->irap_poc) { + state->frame->pictype = KVZ_NAL_RASL_R; + } else { state->frame->pictype = KVZ_NAL_TRAIL_R; - if (state->encoder_control->cfg.gop_len) { - if (cfg->intra_period > 1 && (state->frame->poc % cfg->intra_period) == 0) { - state->frame->slicetype = KVZ_SLICE_I; - } - } - } encoder_state_remove_refs(state); - encoder_state_ref_sort(state); + kvz_encoder_create_ref_lists(state); + + // Set slicetype. + if (state->frame->is_irap) { + state->frame->slicetype = KVZ_SLICE_I; + } else if (state->frame->ref_LX_size[1] > 0) { + state->frame->slicetype = KVZ_SLICE_B; + } else { + state->frame->slicetype = KVZ_SLICE_P; + } - normalize_lcu_weights(state); + if (cfg->target_bitrate > 0 && state->frame->num > cfg->owf) { + normalize_lcu_weights(state); + } kvz_set_picture_lambda_and_qp(state); encoder_state_init_children(state); @@ -1051,39 +1213,22 @@ void kvz_encode_one_frame(encoder_state_t * const state, kvz_picture* frame) { - { - PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME); - encoder_state_init_new_frame(state, frame); - PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=init_new_frame,frame=%d,poc=%d", state->frame->num, state->frame->poc); - } - { - PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME); - encoder_state_encode(state); - PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=encode,frame=%d", state->frame->num); - } - //kvz_threadqueue_flush(main_state->encoder_control->threadqueue); - { - threadqueue_job_t *job; -#ifdef KVZ_DEBUG - char job_description[256]; - sprintf(job_description, "type=write_bitstream,frame=%d", state->frame->num); -#else - char* job_description = NULL; -#endif - - job = kvz_threadqueue_submit(state->encoder_control->threadqueue, kvz_encoder_state_worker_write_bitstream, (void*) state, 1, job_description); - - _encode_one_frame_add_bitstream_deps(state, job); - if (state->previous_encoder_state != state && state->previous_encoder_state->tqj_bitstream_written) { - //We need to depend on previous bitstream generation - kvz_threadqueue_job_dep_add(job, state->previous_encoder_state->tqj_bitstream_written); - } - kvz_threadqueue_job_unwait_job(state->encoder_control->threadqueue, job); - assert(!state->tqj_bitstream_written); - state->tqj_bitstream_written = job; + encoder_state_init_new_frame(state, frame); + encoder_state_encode(state); + + threadqueue_job_t *job = + kvz_threadqueue_job_create(kvz_encoder_state_worker_write_bitstream, state); + + _encode_one_frame_add_bitstream_deps(state, job); + if (state->previous_encoder_state != state && state->previous_encoder_state->tqj_bitstream_written) { + //We need to depend on previous bitstream generation + kvz_threadqueue_job_dep_add(job, state->previous_encoder_state->tqj_bitstream_written); } + kvz_threadqueue_submit(state->encoder_control->threadqueue, job); + assert(!state->tqj_bitstream_written); + state->tqj_bitstream_written = job; + state->frame->done = 0; - //kvz_threadqueue_flush(main_state->encoder_control->threadqueue); } @@ -1105,9 +1250,11 @@ if (state->frame->num == -1) { // We're at the first frame, so don't care about all this stuff. state->frame->num = 0; - state->frame->poc = 0; + state->frame->poc = 0; + state->frame->irap_poc = 0; assert(!state->tile->frame->source); assert(!state->tile->frame->rec); + assert(!state->tile->frame->cu_array); state->frame->prepared = 1; return; } @@ -1116,13 +1263,13 @@ encoder_state_t *prev_state = state->previous_encoder_state; if (state->previous_encoder_state != state) { - kvz_cu_array_free(state->tile->frame->cu_array); - state->tile->frame->cu_array = NULL; + kvz_cu_array_free(&state->tile->frame->cu_array); unsigned width = state->tile->frame->width_in_lcu * LCU_WIDTH; unsigned height = state->tile->frame->height_in_lcu * LCU_WIDTH; state->tile->frame->cu_array = kvz_cu_array_alloc(width, height); kvz_image_list_copy_contents(state->frame->ref, prev_state->frame->ref); + kvz_encoder_create_ref_lists(state); } if (!encoder->cfg.gop_len || @@ -1136,8 +1283,9 @@ kvz_image_list_add(state->frame->ref, prev_state->tile->frame->rec, prev_state->tile->frame->cu_array, - prev_state->frame->poc); - kvz_cu_array_free(state->tile->frame->cu_array); + prev_state->frame->poc, + prev_state->frame->ref_LX); + kvz_cu_array_free(&state->tile->frame->cu_array); unsigned height = state->tile->frame->height_in_lcu * LCU_WIDTH; unsigned width = state->tile->frame->width_in_lcu * LCU_WIDTH; state->tile->frame->cu_array = kvz_cu_array_alloc(width, height); @@ -1146,12 +1294,16 @@ // Remove source and reconstructed picture. kvz_image_free(state->tile->frame->source); state->tile->frame->source = NULL; + kvz_image_free(state->tile->frame->rec); state->tile->frame->rec = NULL; + kvz_cu_array_free(&state->tile->frame->cu_array); + // Update POC and frame count. state->frame->num = prev_state->frame->num + 1; - state->frame->poc = prev_state->frame->poc + 1; + state->frame->poc = prev_state->frame->poc + 1; + state->frame->irap_poc = prev_state->frame->irap_poc; state->frame->prepared = 1; }
View file
kvazaar-1.1.0.tar.gz/src/encoderstate.h -> kvazaar-1.2.0.tar.gz/src/encoderstate.h
Changed
@@ -81,6 +81,7 @@ int32_t num; /*!< \brief Frame number */ int32_t poc; /*!< \brief Picture order count */ int8_t gop_offset; /*!< \brief Offset in the gop structure */ + int32_t irap_poc; /*!< \brief POC of the associated IRAP picture */ /** * \brief Frame-level quantization parameter @@ -91,17 +92,16 @@ //! \brief quantization factor double QP_factor; - //Current picture available references + //! Current pictures available for references image_list_t *ref; int8_t ref_list; - struct { - int32_t poc; - int8_t list; - int8_t idx; - } refmap[16]; - - bool is_idr_frame; + //! L0 and L1 reference index list + uint8_t ref_LX[2][16]; + //! L0 reference index list size + uint8_t ref_LX_size[2]; + + bool is_irap; uint8_t pictype; enum kvz_slice_type slicetype; @@ -153,11 +153,15 @@ videoframe_t *frame; int32_t id; - + //Tile: offset in LCU for current encoder_state in global coordinates int32_t lcu_offset_x; int32_t lcu_offset_y; - + + //Tile: offset in pixels + int32_t offset_x; + int32_t offset_y; + //Position of the first element in tile scan in global coordinates int32_t lcu_offset_in_ts; @@ -169,18 +173,20 @@ // LCU-column. They are packed such that each LCU-column index maps to the // x-coordinate. yuv_t *ver_buf_search; - - // This is a buffer for the deblocked bottom pixels of every LCU-row in the - // tile. They are packed such that each LCU-row index maps to the y-coordinate. + + // This is a buffer for the deblocked bottom pixels of every LCU in the + // tile. They are packed such that each LCU-row index maps to the + // y-coordinate. yuv_t *hor_buf_before_sao; - + + // This is a buffer for the deblocked right pixels of every LCU in the + // tile. They are packed such that each LCU-column index maps to the + // x-coordinate. + yuv_t *ver_buf_before_sao; + //Jobs for each individual LCU of a wavefront row. threadqueue_job_t **wf_jobs; - // Instance of encryption generator by tile - Crypto_Handle dbs_g; - uint32_t m_prev_pos; - } encoder_state_config_tile_t; typedef struct encoder_state_config_slice_t { @@ -243,6 +249,10 @@ bitstream_t stream; cabac_data_t cabac; + // Crypto stuff + crypto_handle_t *crypto_hdl; + uint32_t crypto_prev_pos; + uint32_t stats_bitstream_length; //Bitstream length written in bytes //! \brief Lambda for SSE @@ -263,6 +273,11 @@ */ int8_t ref_qp; + /** + * \brief Coeffs for the LCU. + */ + lcu_coeff_t *coeff; + //Jobs to wait for threadqueue_job_t * tqj_recon_done; //Reconstruction is done threadqueue_job_t * tqj_bitstream_written; //Bitstream is written @@ -277,9 +292,7 @@ coeff_scan_order_t kvz_get_scan_order(int8_t cu_type, int intra_mode, int depth); -void kvz_encoder_get_ref_lists(const encoder_state_t *const state, - int ref_list_len_out[2], - int ref_list_poc_out[2][16]); +void kvz_encoder_create_ref_lists(const encoder_state_t *const state); lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y);
View file
kvazaar-1.1.0.tar.gz/src/extras/crypto.cpp -> kvazaar-1.2.0.tar.gz/src/extras/crypto.cpp
Changed
@@ -1,132 +1,140 @@ #include <extras/crypto.h> #ifndef KVZ_SEL_ENCRYPTION -extern int kvz_make_vs_ignore_crypto_not_having_symbols; int kvz_make_vs_ignore_crypto_not_having_symbols = 0; #else + #include <cryptopp/aes.h> #include <cryptopp/modes.h> #include <cryptopp/osrng.h> -typedef struct AESDecoder { + #if AESEncryptionStreamMode - CryptoPP::CFB_Mode<CryptoPP::AES>::Encryption *CFBdec; + typedef CryptoPP::CFB_Mode<CryptoPP::AES>::Encryption cipher_t; #else - CryptoPP::CFB_Mode<CryptoPP::AES>::Decryption *CFBdec; + typedef CryptoPP::CFB_Mode<CryptoPP::AES>::Decryption cipher_t; #endif - byte key[CryptoPP::AES::DEFAULT_KEYLENGTH], iv[CryptoPP::AES::BLOCKSIZE], out_stream_counter[CryptoPP::AES::BLOCKSIZE], counter[CryptoPP::AES::BLOCKSIZE]; - int couter_avail, counter_index, counter_index_pos; -} AESDecoder; +struct crypto_handle_t { + cipher_t *cipher; + byte key[CryptoPP::AES::DEFAULT_KEYLENGTH]; + byte iv[CryptoPP::AES::BLOCKSIZE]; + byte out_stream_counter[CryptoPP::AES::BLOCKSIZE]; + byte counter[CryptoPP::AES::BLOCKSIZE]; + int couter_avail; + int counter_index; + int counter_index_pos; +}; -AESDecoder* Create() { - AESDecoder * AESdecoder = (AESDecoder *)malloc(sizeof(AESDecoder)); - return AESdecoder; -} -void Init(AESDecoder* AESdecoder) { - int init_val[32] = {201, 75, 219, 152, 6, 245, 237, 107, 179, 194, 81, 29, 66, 98, 198, 0, 16, 213, 27, 56, 255, 127, 242, 112, 97, 126, 197, 204, 25, 59, 38, 30}; - for(int i=0;i<16; i++) { - AESdecoder->iv [i] = init_val[i]; - AESdecoder->counter[i] = init_val[5+i]; - AESdecoder->key[i] = init_val[i+16]; - } -#if AESEncryptionStreamMode - AESdecoder->CFBdec = new CryptoPP::CFB_Mode<CryptoPP::AES >::Encryption(AESdecoder->key, CryptoPP::AES::DEFAULT_KEYLENGTH, AESdecoder->iv); -#else - AESdecoder->CFBdec = new CryptoPP::CFB_Mode<CryptoPP::AES >::Decryption(AESdecoder->key, CryptoPP::AES::DEFAULT_KEYLENGTH, AESdecoder->iv); -#endif - AESdecoder->couter_avail = 0; - AESdecoder->counter_index = 0; - AESdecoder->counter_index_pos = 0; -} +static uint8_t default_IV[16] = {201, 75, 219, 152, 6, 245, 237, 107, 179, 194, 81, 29, 66, 98, 198, 0}; +static uint8_t default_key[16] = {16, 213, 27, 56, 255, 127, 242, 112, 97, 126, 197, 204, 25, 59, 38, 30}; -void DeleteCrypto(AESDecoder * AESdecoder) { - if(AESdecoder) - free(AESdecoder); -} -void Decrypt(AESDecoder *AESdecoder, const unsigned char *in_stream, int size_bits, unsigned char *out_stream) { - int nb_bytes = ceil((double)size_bits/8); - AESdecoder->CFBdec->ProcessData(out_stream, in_stream, nb_bytes); - if(size_bits&7) - AESdecoder->CFBdec->SetKeyWithIV(AESdecoder->key, CryptoPP::AES::DEFAULT_KEYLENGTH, AESdecoder->iv); - -} -void Incr_counter (unsigned char *counter) { - counter[0]++; -} +crypto_handle_t* kvz_crypto_create(const kvz_config *cfg) +{ + crypto_handle_t* hdl = (crypto_handle_t*)calloc(1, sizeof(crypto_handle_t)); -#if AESEncryptionStreamMode -void Decrypt_counter(AESDecoder * AESdecoder) { - AESdecoder->CFBdec->ProcessData(AESdecoder->out_stream_counter, AESdecoder->counter, 16); - AESdecoder->couter_avail = 128; - AESdecoder->counter_index = 15; - AESdecoder->counter_index_pos = 8; - Incr_counter(AESdecoder->counter); -} -#endif + uint8_t *key; + if(cfg->optional_key!=NULL) + key = cfg->optional_key; + else + key = default_key; -#if AESEncryptionStreamMode -unsigned int get_key (AESDecoder * AESdecoder, int nb_bits) { - unsigned int key_ = 0; - if(nb_bits > 32) { - printf("The Generator can not generate more than 32 bit %d \n", nb_bits); - return 0; - } - if( !nb_bits ) - return 0; - if(!AESdecoder->couter_avail) - Decrypt_counter(AESdecoder); - - if(AESdecoder->couter_avail >= nb_bits) - AESdecoder->couter_avail -= nb_bits; - else - AESdecoder->couter_avail = 0; - int nb = 0; - while( nb_bits ) { - if( nb_bits >= AESdecoder->counter_index_pos ) - nb = AESdecoder->counter_index_pos; - else - nb = nb_bits; - key_ <<= nb; - key_ += (AESdecoder->out_stream_counter[AESdecoder->counter_index] & ((1<<nb)-1)); - AESdecoder->out_stream_counter[AESdecoder->counter_index] >>= nb; - nb_bits -= nb; - - if(AESdecoder->counter_index && nb == AESdecoder->counter_index_pos ) { - AESdecoder->counter_index--; - AESdecoder->counter_index_pos = 8; - } else { - AESdecoder->counter_index_pos -= nb; - if(nb_bits) { - Decrypt_counter(AESdecoder); - AESdecoder->couter_avail -= nb_bits; - } - } - } - return key_; -} -#endif -Crypto_Handle CreateC() { - AESDecoder* AESdecoder = Create(); - return AESdecoder; + for (int i = 0; i < 16; i++) { + hdl->iv [i] = default_IV[i]; + hdl->counter[i] = (i<11)? default_IV[5+i] : key[i-11]; + hdl->key[i] = key[i]; + } + + hdl->cipher = new cipher_t(hdl->key, CryptoPP::AES::DEFAULT_KEYLENGTH, hdl->iv); + + hdl->couter_avail = 0; + hdl->counter_index = 0; + hdl->counter_index_pos = 0; + + return hdl; } -void InitC(Crypto_Handle hdl) { - Init((AESDecoder*)hdl); +void kvz_crypto_delete(crypto_handle_t **hdl) +{ + if (*hdl) { + delete (*hdl)->cipher; + (*hdl)->cipher = NULL; + } + FREE_POINTER(*hdl); } +void kvz_crypto_decrypt(crypto_handle_t* hdl, + const uint8_t *in_stream, + int size_bits, + uint8_t *out_stream) +{ + int num_bytes = ceil((double)size_bits/8); + hdl->cipher->ProcessData(out_stream, in_stream, num_bytes); + if (size_bits & 7) { + hdl->cipher->SetKeyWithIV(hdl->key, CryptoPP::AES::DEFAULT_KEYLENGTH, hdl->iv); + } +} #if AESEncryptionStreamMode -unsigned int ff_get_key (Crypto_Handle *hdl, int nb_bits) { - return get_key ((AESDecoder*)*hdl, nb_bits); +static void increment_counter(unsigned char *counter) +{ + counter[0]++; } -#endif -void DecryptC(Crypto_Handle hdl, const unsigned char *in_stream, int size_bits, unsigned char *out_stream) { - Decrypt((AESDecoder*)hdl, in_stream, size_bits, out_stream); + +static void decrypt_counter(crypto_handle_t *hdl) +{ + hdl->cipher->ProcessData(hdl->out_stream_counter, hdl->counter, 16); + hdl->couter_avail = 128; + hdl->counter_index = 15; + hdl->counter_index_pos = 8; + increment_counter(hdl->counter); } -void DeleteCryptoC(Crypto_Handle hdl) { - DeleteCrypto((AESDecoder *)hdl); +unsigned kvz_crypto_get_key(crypto_handle_t *hdl, int nb_bits) +{ + unsigned key = 0; + if (nb_bits > 32) { + fprintf(stderr, "The generator cannot generate %d bits (max 32 bits)\n", nb_bits); + return 0; + } + if (nb_bits == 0) return 0; + + if (!hdl->couter_avail) { + decrypt_counter(hdl); + } + + if(hdl->couter_avail >= nb_bits) { + hdl->couter_avail -= nb_bits; + } else { + hdl->couter_avail = 0; + } + + int nb = 0; + while (nb_bits) { + if (nb_bits >= hdl->counter_index_pos) { + nb = hdl->counter_index_pos; + } else { + nb = nb_bits; + } + + key <<= nb; + key += hdl->out_stream_counter[hdl->counter_index] & ((1 << nb) - 1); + hdl->out_stream_counter[hdl->counter_index] >>= nb; + nb_bits -= nb; + + if (hdl->counter_index && nb == hdl->counter_index_pos) { + hdl->counter_index--; + hdl->counter_index_pos = 8; + } else { + hdl->counter_index_pos -= nb; + if (nb_bits) { + decrypt_counter(hdl); + hdl->couter_avail -= nb_bits; + } + } + } + return key; } +#endif // AESEncryptionStreamMode #endif // KVZ_SEL_ENCRYPTION
View file
kvazaar-1.1.0.tar.gz/src/extras/crypto.h -> kvazaar-1.2.0.tar.gz/src/extras/crypto.h
Changed
@@ -2,6 +2,10 @@ #define CRYPTO_H_ #include "global.h" +#include "../cfg.h" + +#include <stdio.h> +#include <math.h> #ifdef KVZ_SEL_ENCRYPTION #define STUBBED extern @@ -9,73 +13,60 @@ #define STUBBED static #endif -#include <stdio.h> -#include <math.h> -#define AESEncryptionStreamMode 1 +#define AESEncryptionStreamMode 1 + #ifdef __cplusplus extern "C" { #endif - typedef void* Crypto_Handle; - STUBBED Crypto_Handle CreateC(); - STUBBED void InitC(Crypto_Handle hdl); - STUBBED void DecryptC(Crypto_Handle hdl, const unsigned char *in_stream, int size_bits, unsigned char *out_stream); + +typedef struct crypto_handle_t crypto_handle_t; + +STUBBED crypto_handle_t* kvz_crypto_create(const kvz_config *cfg); +STUBBED void kvz_crypto_decrypt(crypto_handle_t* hdl, + const uint8_t *in_stream, + int size_bits, + uint8_t *out_stream); +STUBBED void kvz_crypto_delete(crypto_handle_t **hdl); + #if AESEncryptionStreamMode - STUBBED unsigned int ff_get_key(Crypto_Handle *hdl, int nb_bits); +STUBBED unsigned kvz_crypto_get_key(crypto_handle_t *hdl, int num_bits); #endif - STUBBED void DeleteCryptoC(Crypto_Handle hdl); #ifdef __cplusplus } #endif +#undef STUBBED + #ifndef KVZ_SEL_ENCRYPTION -// Provide static stubs to allow linking without libcryptopp and allows us to -// avoid sprinkling ifdefs everywhere and having a bunch of code that's not -// compiled during normal development. +// Provide static stubs to allow linking without libcryptopp and allows us +// to avoid sprinkling ifdefs everywhere and having a bunch of code that's +// not compiled during normal development. // Provide them in the header so we can avoid compiling the cpp file, which // means we don't need a C++ compiler when crypto is not enabled. -#include <stdio.h> -#include <stdint.h> -#include <inttypes.h> - -static uintptr_t handle_id = 1; - -static INLINE Crypto_Handle CreateC() { - printf("Crypto CreateC %" PRIuPTR "\n", handle_id); - return (void*)(handle_id++); -} -static INLINE void InitC(Crypto_Handle hdl) { - printf("Crypto InitC %" PRIuPTR "\n", (uintptr_t)hdl); -} - -static INLINE void DecryptC(Crypto_Handle hdl, const unsigned char *in_stream, - int size_bits, unsigned char *out_stream) +static INLINE crypto_handle_t* kvz_crypto_create(const kvz_config *cfg) { - // Stub. - printf("Crypto DecryptC %" PRIuPTR "\n", (uintptr_t)hdl); + return NULL; } +static INLINE void kvz_crypto_decrypt(crypto_handle_t* hdl, + const uint8_t *in_stream, + int size_bits, + uint8_t *out_stream) +{} + +static INLINE void kvz_crypto_delete(crypto_handle_t **hdl) +{} + #if AESEncryptionStreamMode -static INLINE unsigned int ff_get_key(Crypto_Handle *hdl, int nb_bits) +static INLINE unsigned kvz_crypto_get_key(crypto_handle_t *hdl, int num_bits) { - // Stub. - static Crypto_Handle ff_get_key_last_hdl = 0; - if (*hdl != ff_get_key_last_hdl) { - printf("Crypto ff_get_key %" PRIuPTR "\n", (uintptr_t)*hdl); - } - ff_get_key_last_hdl = *hdl; return 0; } #endif -static INLINE void DeleteCryptoC(Crypto_Handle hdl) -{ - // Stub. - printf("Crypto DeleteCryptoC %" PRIuPTR "\n", (uintptr_t)hdl); -} - #endif // KVZ_SEL_ENCRYPTION #endif // CRYPTO_H_
View file
kvazaar-1.1.0.tar.gz/src/filter.c -> kvazaar-1.2.0.tar.gz/src/filter.c
Changed
@@ -168,7 +168,7 @@ int16_t m4 = src[0]; int16_t m5 = src[offset]; - delta = CLIP(-tc,tc, (((m4 - m3) << 2) + m2 - m5 + 4 ) >> 3); + delta = CLIP(-tc,tc, (((m4 - m3) * 4) + m2 - m5 + 4 ) >> 3); if(!part_P_nofilter) { src[-offset] = CLIP(0, (1 << encoder->bitdepth) - 1, m3 + delta); } @@ -262,9 +262,7 @@ static int8_t get_qp_y_pred(const encoder_state_t* state, int x, int y, edge_dir dir) { - if (state->encoder_control->cfg.target_bitrate <= 0 - && state->encoder_control->cfg.roi.dqps == NULL) - { + if (!state->encoder_control->lcu_dqp_enabled) { return state->qp; } @@ -403,10 +401,13 @@ // Non-zero residual/coeffs and transform boundary // Neither CU is intra so tr_depth <= MAX_DEPTH. strength = 1; - } else if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3 && ((abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][0] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][0]) >= 4) || (abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][1] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][1]) >= 4))) { + } else if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3 && + ((abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][0] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][0]) >= 4) || + (abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][1] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][1]) >= 4))) { // Absolute motion vector diff between blocks >= 1 (Integer pixel) strength = 1; - } else if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3 && cu_q->inter.mv_ref[cu_q->inter.mv_dir - 1] != cu_p->inter.mv_ref[cu_p->inter.mv_dir - 1]) { + } else if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3 && + cu_q->inter.mv_ref[cu_q->inter.mv_dir - 1] != cu_p->inter.mv_ref[cu_p->inter.mv_dir - 1]) { strength = 1; } @@ -431,10 +432,10 @@ cu_p->inter.mv[1][0] = 0; cu_p->inter.mv[1][1] = 0; } - const int refP0 = (cu_p->inter.mv_dir & 1) ? cu_p->inter.mv_ref[0] : -1; - const int refP1 = (cu_p->inter.mv_dir & 2) ? cu_p->inter.mv_ref[1] : -1; - const int refQ0 = (cu_q->inter.mv_dir & 1) ? cu_q->inter.mv_ref[0] : -1; - const int refQ1 = (cu_q->inter.mv_dir & 2) ? cu_q->inter.mv_ref[1] : -1; + const int refP0 = (cu_p->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_p->inter.mv_ref[0]] : -1; + const int refP1 = (cu_p->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_p->inter.mv_ref[1]] : -1; + const int refQ0 = (cu_q->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_q->inter.mv_ref[0]] : -1; + const int refQ1 = (cu_q->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_q->inter.mv_ref[1]] : -1; const int16_t* mvQ0 = cu_q->inter.mv[0]; const int16_t* mvQ1 = cu_q->inter.mv[1];
View file
kvazaar-1.1.0.tar.gz/src/global.h -> kvazaar-1.2.0.tar.gz/src/global.h
Changed
@@ -117,10 +117,6 @@ //! Search is started at depth 0 and goes in Z-order to MAX_PU_DEPTH, see search_cu() #define MAX_PU_DEPTH 4 -//! Minimum log2 transform sizes. -//! spec: max_transform_hierarchy_depth_inter -#define TR_DEPTH_INTER 2 - //! spec: pcm_enabled_flag, Setting to 1 will enable using PCM blocks (current intra-search does not consider PCM) #define ENABLE_PCM 0 @@ -150,6 +146,28 @@ #define LCU_LUMA_SIZE (LCU_WIDTH * LCU_WIDTH) #define LCU_CHROMA_SIZE (LCU_WIDTH * LCU_WIDTH >> 2) +/** + * \brief Number of pixels to delay deblocking. + * + * Number of pixels at the bottom and right side of the LCU that are not + * deblocked until when filtering the neighboring LCU. The last four chroma + * pixels of the horizontal edges within the LCU are deblocked with the LCU + * to the right. Therefore, DEBLOCK_DELAY_PX is set to 8 pixels. + */ +#define DEBLOCK_DELAY_PX 8 + +/** + * \brief Number of pixels to delay SAO in horizontal and vertical + * directions. + * + * Number of pixels at the bottom and right side of the LCU that are not + * filtered with SAO until when filtering the neighboring LCU. SAO + * reconstruction requires that a one pixels border has been deblocked for + * both luma and chroma. Therefore, SAO_DELAY_PX is set to + * DEBLOCK_DELAY_PX + 2. + */ +#define SAO_DELAY_PX (DEBLOCK_DELAY_PX + 2) + #define MAX_REF_PIC_COUNT 16 #define AMVP_MAX_NUM_CANDS 2 @@ -162,6 +180,7 @@ #define MIN(a,b) (((a)<(b))?(a):(b)) #define CLIP(low,high,value) MAX((low),MIN((high),(value))) #define CLIP_TO_PIXEL(value) CLIP(0, PIXEL_MAX, (value)) +#define CLIP_TO_QP(value) CLIP(0, 51, (value)) #define SWAP(a,b,swaptype) { swaptype tempval; tempval = a; a = b; b = tempval; } #define CU_WIDTH_FROM_DEPTH(depth) (LCU_WIDTH >> depth) #define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val)) @@ -181,7 +200,7 @@ // NOTE: When making a release, check to see if incrementing libversion in // configure.ac is necessary. #ifndef KVZ_VERSION -#define KVZ_VERSION 1.1.0 +#define KVZ_VERSION 1.2.0 #endif #define VERSION_STRING QUOTE_EXPAND(KVZ_VERSION) @@ -206,6 +225,12 @@ #define SIMD_ALIGNMENT 32 #ifdef _MSC_VER + #define ALIGNED(alignment) __declspec(align(alignment)) +#else + #define ALIGNED(alignment) __attribute__((aligned (alignment))) +#endif + +#ifdef _MSC_VER // Buggy VS2010 throws intellisense warnings if void* is not casted. #define MALLOC(type, num) (type *)malloc(sizeof(type) * (num)) #else @@ -219,7 +244,11 @@ // Fill a structure or a static array with val bytes. #define FILL(var, val) memset(&(var), (val), sizeof(var)) // Fill a number of elements in an array with val bytes. -#define FILL_ARRAY(ar, val, size) memset((ar), (val), (size) * sizeof(*(ar))) +#define FILL_ARRAY(ar, val, size) \ +{\ + void *temp_ptr = (void*)(ar);\ + memset((temp_ptr), (val), (size) * sizeof(*(ar)));\ +} #define FREE_POINTER(pointer) { free((void*)pointer); pointer = NULL; } #define MOVE_POINTER(dst_pointer,src_pointer) { dst_pointer = src_pointer; src_pointer = NULL; }
View file
kvazaar-1.1.0.tar.gz/src/image.c -> kvazaar-1.2.0.tar.gz/src/image.c
Changed
@@ -23,6 +23,7 @@ #include <limits.h> #include <stdlib.h> +#include "strategies/strategies-ipol.h" #include "strategies/strategies-picture.h" #include "threads.h" @@ -191,12 +192,14 @@ return yuv; } -void kvz_yuv_t_free(yuv_t * yuv) +void kvz_yuv_t_free(yuv_t *yuv) { - free(yuv->y); - free(yuv->u); - free(yuv->v); - free(yuv); + if (yuv) { + FREE_POINTER(yuv->y); + FREE_POINTER(yuv->u); + FREE_POINTER(yuv->v); + } + FREE_POINTER(yuv); } hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size) @@ -447,21 +450,19 @@ * \param pic Image for the block we are trying to find. * \param ref Image where we are trying to find the block. * -* \returns +* \returns Sum of absolute differences */ -unsigned kvz_image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int pic_x, int pic_y, int ref_x, int ref_y, - int block_width, int block_height, int max_px_below_lcu) { +unsigned kvz_image_calc_sad(const kvz_picture *pic, + const kvz_picture *ref, + int pic_x, + int pic_y, + int ref_x, + int ref_y, + int block_width, + int block_height) +{ assert(pic_x >= 0 && pic_x <= pic->width - block_width); assert(pic_y >= 0 && pic_y <= pic->height - block_height); - - // Check that we are not referencing pixels that are not final. - if (max_px_below_lcu >= 0) { - int next_lcu_row_px = ((pic_y >> LOG2_LCU_WIDTH) + 1) << LOG2_LCU_WIDTH; - int px_below_lcu = ref_y + block_height - next_lcu_row_px; - if (px_below_lcu > max_px_below_lcu) { - return INT_MAX; - } - } if (ref_x >= 0 && ref_x <= ref->width - block_width && ref_y >= 0 && ref_y <= ref->height - block_height) @@ -479,6 +480,74 @@ /** +* \brief Calculate interpolated SATD between two blocks. +* +* \param pic Image for the block we are trying to find. +* \param ref Image where we are trying to find the block. +*/ +unsigned kvz_image_calc_satd(const kvz_picture *pic, + const kvz_picture *ref, + int pic_x, + int pic_y, + int ref_x, + int ref_y, + int block_width, + int block_height) +{ + assert(pic_x >= 0 && pic_x <= pic->width - block_width); + assert(pic_y >= 0 && pic_y <= pic->height - block_height); + + if (ref_x >= 0 && ref_x <= ref->width - block_width && + ref_y >= 0 && ref_y <= ref->height - block_height) + { + // Reference block is completely inside the frame, so just calculate the + // SAD directly. This is the most common case, which is why it's first. + const kvz_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x]; + const kvz_pixel *ref_data = &ref->y[ref_y * ref->stride + ref_x]; + return kvz_satd_any_size(block_width, + block_height, + pic_data, + pic->stride, + ref_data, + ref->stride) >> (KVZ_BIT_DEPTH - 8); + } else { + // Extrapolate pixels from outside the frame. + kvz_extended_block block; + kvz_get_extended_block(pic_x, + pic_y, + ref_x - pic_x, + ref_y - pic_y, + 0, + 0, + ref->y, + ref->width, + ref->height, + 0, + block_width, + block_height, + &block); + + const kvz_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x]; + + unsigned satd = kvz_satd_any_size(block_width, + block_height, + pic_data, + pic->stride, + block.buffer, + block.stride) >> (KVZ_BIT_DEPTH - 8); + + if (block.malloc_used) { + FREE_POINTER(block.buffer); + } + + return satd; + } +} + + + + +/** * \brief BLock Image Transfer from one buffer to another. * * It's a stupidly simple loop that copies pixels.
View file
kvazaar-1.1.0.tar.gz/src/image.h -> kvazaar-1.2.0.tar.gz/src/image.h
Changed
@@ -74,8 +74,24 @@ //Algorithms -unsigned kvz_image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int pic_x, int pic_y, int ref_x, int ref_y, - int block_width, int block_height, int max_lcu_below); +unsigned kvz_image_calc_sad(const kvz_picture *pic, + const kvz_picture *ref, + int pic_x, + int pic_y, + int ref_x, + int ref_y, + int block_width, + int block_height); + + +unsigned kvz_image_calc_satd(const kvz_picture *pic, + const kvz_picture *ref, + int pic_x, + int pic_y, + int ref_x, + int ref_y, + int block_width, + int block_height); void kvz_pixels_blit(const kvz_pixel* orig, kvz_pixel *dst,
View file
kvazaar-1.1.0.tar.gz/src/imagelist.c -> kvazaar-1.2.0.tar.gz/src/imagelist.c
Changed
@@ -36,9 +36,10 @@ { image_list_t *list = (image_list_t *)malloc(sizeof(image_list_t)); list->size = size; - list->images = malloc(sizeof(kvz_picture*) * size); - list->cu_arrays = malloc(sizeof(cu_array_t*) * size); - list->pocs = malloc(sizeof(int32_t) * size); + list->images = malloc(sizeof(kvz_picture*) * size); + list->cu_arrays = malloc(sizeof(cu_array_t*) * size); + list->pocs = malloc(sizeof(int32_t) * size); + list->ref_LXs = malloc(sizeof(*list->ref_LXs) * size); list->used_size = 0; return list; @@ -55,6 +56,7 @@ list->images = (kvz_picture**)realloc(list->images, sizeof(kvz_picture*) * size); list->cu_arrays = (cu_array_t**)realloc(list->cu_arrays, sizeof(cu_array_t*) * size); list->pocs = realloc(list->pocs, sizeof(int32_t) * size); + list->ref_LXs = realloc(list->ref_LXs, sizeof(*list->ref_LXs) * size); list->size = size; return size == 0 || (list->images && list->cu_arrays && list->pocs); } @@ -71,9 +73,13 @@ for (i = 0; i < list->used_size; ++i) { kvz_image_free(list->images[i]); list->images[i] = NULL; - kvz_cu_array_free(list->cu_arrays[i]); + kvz_cu_array_free(&list->cu_arrays[i]); list->cu_arrays[i] = NULL; list->pocs[i] = 0; + for (int j = 0; j < 16; j++) { + list->ref_LXs[i][0][j] = 0; + list->ref_LXs[i][1][j] = 0; + } } } @@ -81,10 +87,12 @@ free(list->images); free(list->cu_arrays); free(list->pocs); + free(list->ref_LXs); } list->images = NULL; list->cu_arrays = NULL; list->pocs = NULL; + list->ref_LXs = NULL; free(list); return 1; } @@ -95,7 +103,7 @@ * \param picture_list list to use * \return 1 on success */ -int kvz_image_list_add(image_list_t *list, kvz_picture *im, cu_array_t *cua, int32_t poc) +int kvz_image_list_add(image_list_t *list, kvz_picture *im, cu_array_t *cua, int32_t poc, uint8_t ref_LX[2][16]) { int i = 0; if (KVZ_ATOMIC_INC(&(im->refcount)) == 1) { @@ -119,11 +127,19 @@ list->images[i] = list->images[i - 1]; list->cu_arrays[i] = list->cu_arrays[i - 1]; list->pocs[i] = list->pocs[i - 1]; + for (int j = 0; j < 16; j++) { + list->ref_LXs[i][0][j] = list->ref_LXs[i - 1][0][j]; + list->ref_LXs[i][1][j] = list->ref_LXs[i - 1][1][j]; + } } list->images[0] = im; list->cu_arrays[0] = cua; list->pocs[0] = poc; + for (int j = 0; j < 16; j++) { + list->ref_LXs[0][0][j] = ref_LX[0][j]; + list->ref_LXs[0][1][j] = ref_LX[1][j]; + } list->used_size++; return 1; @@ -145,17 +161,17 @@ kvz_image_free(list->images[n]); - if (!kvz_cu_array_free(list->cu_arrays[n])) { - fprintf(stderr, "Could not free cu_array!\n"); - assert(0); //Stop here - return 0; - } + kvz_cu_array_free(&list->cu_arrays[n]); // The last item is easy to remove if (n == list->used_size - 1) { list->images[n] = NULL; list->cu_arrays[n] = NULL; list->pocs[n] = 0; + for (int j = 0; j < 16; j++) { + list->ref_LXs[n][0][j] = 0; + list->ref_LXs[n][1][j] = 0; + } list->used_size--; } else { int i = n; @@ -164,10 +180,18 @@ list->images[i] = list->images[i + 1]; list->cu_arrays[i] = list->cu_arrays[i + 1]; list->pocs[i] = list->pocs[i + 1]; + for (int j = 0; j < 16; j++) { + list->ref_LXs[i][0][j] = list->ref_LXs[i + 1][0][j]; + list->ref_LXs[i][1][j] = list->ref_LXs[i + 1][1][j]; + } } list->images[list->used_size - 1] = NULL; list->cu_arrays[list->used_size - 1] = NULL; list->pocs[list->used_size - 1] = 0; + for (int j = 0; j < 16; j++) { + list->ref_LXs[list->used_size - 1][0][j] = 0; + list->ref_LXs[list->used_size - 1][1][j] = 0; + } list->used_size--; } @@ -181,7 +205,7 @@ } for (i = source->used_size - 1; i >= 0; --i) { - kvz_image_list_add(target, source->images[i], source->cu_arrays[i], source->pocs[i]); + kvz_image_list_add(target, source->images[i], source->cu_arrays[i], source->pocs[i], source->ref_LXs[i]); } return 1; }
View file
kvazaar-1.1.0.tar.gz/src/imagelist.h -> kvazaar-1.2.0.tar.gz/src/imagelist.h
Changed
@@ -39,14 +39,17 @@ struct kvz_picture* *images; //!< \brief Pointer to array of picture pointers. cu_array_t* *cu_arrays; int32_t *pocs; + uint8_t (*ref_LXs)[2][16]; //!< L0 and L1 reference index list for each image uint32_t size; //!< \brief Array size. uint32_t used_size; + + } image_list_t; image_list_t * kvz_image_list_alloc(int size); int kvz_image_list_resize(image_list_t *list, unsigned size); int kvz_image_list_destroy(image_list_t *list); -int kvz_image_list_add(image_list_t *list, kvz_picture *im, cu_array_t* cua, int32_t poc); +int kvz_image_list_add(image_list_t *list, kvz_picture *im, cu_array_t* cua, int32_t poc, uint8_t ref_LX[2][16]); int kvz_image_list_rem(image_list_t *list, unsigned n); int kvz_image_list_copy_contents(image_list_t *target, image_list_t *source);
View file
kvazaar-1.1.0.tar.gz/src/input_frame_buffer.c -> kvazaar-1.2.0.tar.gz/src/input_frame_buffer.c
Changed
@@ -58,8 +58,6 @@ const int gop_buf_size = 3 * cfg->gop_len; - assert(state->frame->num >= 0); - if (cfg->gop_len == 0 || cfg->gop_lowdelay) { // No reordering of output pictures necessary. @@ -69,12 +67,14 @@ state->frame->gop_offset = 0; if (cfg->gop_len > 0) { // Using a low delay GOP structure. - state->frame->gop_offset = (state->frame->num - 1) % cfg->gop_len; - if (state->frame->gop_offset < 0) { - // Set gop_offset of IDR as the highest quality picture. - state->frame->gop_offset += cfg->gop_len; + uint64_t frame_num = buf->num_out; + if (cfg->intra_period) { + frame_num %= cfg->intra_period; } + state->frame->gop_offset = (frame_num + cfg->gop_len - 1) % cfg->gop_len; } + buf->num_in++; + buf->num_out++; return kvz_image_copy_ref(img_in); }
View file
kvazaar-1.1.0.tar.gz/src/inter.c -> kvazaar-1.2.0.tar.gz/src/inter.c
Changed
@@ -31,6 +31,14 @@ #include "videoframe.h" +typedef struct { + const cu_info_t *a[2]; + const cu_info_t *b[3]; + const cu_info_t *c3; + const cu_info_t *h; +} merge_candidates_t; + + static void inter_recon_frac_luma(const encoder_state_t * const state, const kvz_picture * const ref, int32_t xpos, @@ -53,8 +61,8 @@ ypos, mv_param[0] >> 2, mv_param[1] >> 2, - state->tile->lcu_offset_x * LCU_WIDTH, - state->tile->lcu_offset_y * LCU_WIDTH, + state->tile->offset_x, + state->tile->offset_y, ref->y, ref->width, ref->height, @@ -98,8 +106,8 @@ ypos, mv_param[0] >> 2, mv_param[1] >> 2, - state->tile->lcu_offset_x * LCU_WIDTH, - state->tile->lcu_offset_y * LCU_WIDTH, + state->tile->offset_x, + state->tile->offset_y, ref->y, ref->width, ref->height, @@ -146,14 +154,34 @@ kvz_extended_block src_v = { 0, 0, 0, 0 }; //Fractional chroma U - kvz_get_extended_block(xpos, ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, state->tile->lcu_offset_x * LCU_WIDTH_C, state->tile->lcu_offset_y * LCU_WIDTH_C, - ref->u, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_height, &src_u); + kvz_get_extended_block(xpos, ypos, + (mv_param[0] >> 2) >> 1, + (mv_param[1] >> 2) >> 1, + state->tile->offset_x >> 1, + state->tile->offset_y >> 1, + ref->u, + ref->width >> 1, + ref->height >> 1, + FILTER_SIZE_C, + block_width, + block_height, + &src_u); kvz_sample_octpel_chroma(state->encoder_control, src_u.orig_topleft, src_u.stride, block_width, block_height, lcu->rec.u + (ypos % LCU_WIDTH_C)*LCU_WIDTH_C + (xpos % LCU_WIDTH_C), LCU_WIDTH_C, mv_frac_x, mv_frac_y, mv_param); //Fractional chroma V - kvz_get_extended_block(xpos, ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, state->tile->lcu_offset_x * LCU_WIDTH_C, state->tile->lcu_offset_y * LCU_WIDTH_C, - ref->v, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_height, &src_v); + kvz_get_extended_block(xpos, ypos, + (mv_param[0] >> 2) >> 1, + (mv_param[1] >> 2) >> 1, + state->tile->offset_x >> 1, + state->tile->offset_y >> 1, + ref->v, + ref->width >> 1, + ref->height >> 1, + FILTER_SIZE_C, + block_width, + block_height, + &src_v); kvz_sample_octpel_chroma(state->encoder_control, src_v.orig_topleft, src_v.stride, block_width, block_height, lcu->rec.v + (ypos % LCU_WIDTH_C)*LCU_WIDTH_C + (xpos % LCU_WIDTH_C), LCU_WIDTH_C, mv_frac_x, mv_frac_y, mv_param); @@ -190,8 +218,8 @@ ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, - state->tile->lcu_offset_x * LCU_WIDTH_C, - state->tile->lcu_offset_y * LCU_WIDTH_C, + state->tile->offset_x >> 1, + state->tile->offset_y >> 1, ref->u, ref->width >> 1, ref->height >> 1, @@ -215,8 +243,8 @@ ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, - state->tile->lcu_offset_x * LCU_WIDTH_C, - state->tile->lcu_offset_y * LCU_WIDTH_C, + state->tile->offset_x >> 1, + state->tile->offset_y >> 1, ref->v, ref->width >> 1, ref->height >> 1, @@ -300,17 +328,13 @@ lcu_t *lcu, hi_prec_buf_t *hi_prec_out) { - const vector2d_t tile_in_frame = { - state->tile->lcu_offset_x * LCU_WIDTH, - state->tile->lcu_offset_y * LCU_WIDTH - }; const vector2d_t pu_in_tile = { xpos, ypos }; const vector2d_t pu_in_lcu = { xpos % LCU_WIDTH, ypos % LCU_WIDTH }; const vector2d_t mv_in_pu = { mv_param[0] >> 2, mv_param[1] >> 2 }; const vector2d_t mv_in_frame = { - mv_in_pu.x + pu_in_tile.x + tile_in_frame.x, - mv_in_pu.y + pu_in_tile.y + tile_in_frame.y + mv_in_pu.x + pu_in_tile.x + state->tile->offset_x, + mv_in_pu.y + pu_in_tile.y + state->tile->offset_y }; const bool mv_is_outside_frame = mv_in_frame.x < 0 || @@ -642,24 +666,26 @@ /** -* \brief Get merge candidates for current block -* \param encoder encoder control struct to use -* \param x block x position in SCU -* \param y block y position in SCU -* \param width current block width -* \param height current block height -* \param H candidate H -* \param C1 candidate C1 -*/ -static void kvz_inter_get_temporal_merge_candidates(const encoder_state_t * const state, - int32_t x, - int32_t y, - int32_t width, - int32_t height, - cu_info_t **C3, - cu_info_t **H, - uint8_t ref_list, - uint8_t ref_idx) { + * \brief Get merge candidates for current block + * + * \param state encoder control state to use + * \param x block x position in SCU + * \param y block y position in SCU + * \param width current block width + * \param height current block height + * \param ref_list which reference list, L0 is 1 and L1 is 2 + * \param ref_idx index in the reference list + * \param cand_out will be filled with C3 and H candidates + */ +static void get_temporal_merge_candidates(const encoder_state_t * const state, + int32_t x, + int32_t y, + int32_t width, + int32_t height, + uint8_t ref_list, + uint8_t ref_idx, + merge_candidates_t *cand_out) +{ /* Predictor block locations _________ @@ -670,22 +696,19 @@ |H| */ - *C3 = NULL; - *H = NULL; + cand_out->c3 = cand_out->h = NULL; // Find temporal reference if (state->frame->ref->used_size) { - uint32_t colocated_ref = UINT_MAX; + uint32_t colocated_ref; // Select L0/L1 ref_idx reference - for (int temporal_cand = 0; temporal_cand < state->frame->ref->used_size; temporal_cand++) { - if (state->frame->refmap[temporal_cand].list == ref_list && state->frame->refmap[temporal_cand].idx == ref_idx) { - colocated_ref = temporal_cand; - break; - } + if (state->frame->ref_LX_size[ref_list-1] > ref_idx) { + colocated_ref = state->frame->ref_LX[ref_list - 1][ref_idx]; + } else { + // not found + return; } - - if (colocated_ref == UINT_MAX) return; cu_array_t *ref_cu_array = state->frame->ref->cu_arrays[colocated_ref]; int cu_per_width = ref_cu_array->width / SCU_WIDTH; @@ -707,7 +730,7 @@ if (H_offset >= 0) { // Only use when it's inter block if (ref_cu_array->data[H_offset].type == CU_INTER) { - *H = &ref_cu_array->data[H_offset]; + cand_out->h = &ref_cu_array->data[H_offset]; } } } @@ -718,7 +741,7 @@ if (xColCtr < state->encoder_control->in.width && yColCtr < state->encoder_control->in.height) { uint32_t C3_offset = ((xColCtr >> 4) << 4) / SCU_WIDTH + ((((yColCtr >> 4) << 4) / SCU_WIDTH) * cu_per_width); if (ref_cu_array->data[C3_offset].type == CU_INTER) { - *C3 = &ref_cu_array->data[C3_offset]; + cand_out->c3 = &ref_cu_array->data[C3_offset]; } } } @@ -737,12 +760,8 @@ * \param height block height in pixels * \param picture_width tile width in pixels * \param picture_height tile height in pixels - * \param b0 Returns the b0 candidate. - * \param b1 Returns the b1 candidate. - * \param b2 Returns the b2 candidate. - * \param a0 Returns the a0 candidate. - * \param a1 Returns the a1 candidate. * \param lcu current LCU + * \param cand_out will be filled with A and B candidates */ static void get_spatial_merge_candidates(int32_t x, int32_t y, @@ -750,12 +769,8 @@ int32_t height, int32_t picture_width, int32_t picture_height, - cu_info_t **b0, - cu_info_t **b1, - cu_info_t **b2, - cu_info_t **a0, - cu_info_t **a1, - lcu_t *lcu) + lcu_t *lcu, + merge_candidates_t *cand_out) { /* Predictor block locations @@ -771,59 +786,55 @@ int32_t y_local = SUB_SCU(y); // A0 and A1 availability testing if (x != 0) { - *a1 = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local + height - 1); - // Do not check (*a1)->coded because the block above is always coded before + cu_info_t *a1 = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local + height - 1); + // Do not check a1->coded because the block above is always coded before // the current one and the flag is not set when searching an SMP block. - if ((*a1)->type == CU_INTER) { - inter_clear_cu_unused(*a1); - } else { - *a1 = NULL; + if (a1->type == CU_INTER) { + inter_clear_cu_unused(a1); + cand_out->a[1] = a1; } if (y_local + height < LCU_WIDTH && y + height < picture_height) { - *a0 = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local + height); - if ((*a0)->type == CU_INTER && is_a0_cand_coded(x, y, width, height)) { - inter_clear_cu_unused(*a0); - } else { - *a0 = NULL; + cu_info_t *a0 = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local + height); + if (a0->type == CU_INTER && is_a0_cand_coded(x, y, width, height)) { + inter_clear_cu_unused(a0); + cand_out->a[0] = a0; } } } // B0, B1 and B2 availability testing if (y != 0) { + cu_info_t *b0 = NULL; if (x + width < picture_width) { if (x_local + width < LCU_WIDTH) { - *b0 = LCU_GET_CU_AT_PX(lcu, x_local + width, y_local - 1); + b0 = LCU_GET_CU_AT_PX(lcu, x_local + width, y_local - 1); } else if (y_local == 0) { // Special case, top-right CU - *b0 = LCU_GET_TOP_RIGHT_CU(lcu); + b0 = LCU_GET_TOP_RIGHT_CU(lcu); } } - if ((*b0) && (*b0)->type == CU_INTER && is_b0_cand_coded(x, y, width, height)) { - inter_clear_cu_unused(*b0); - } else { - *b0 = NULL; + if (b0 && b0->type == CU_INTER && is_b0_cand_coded(x, y, width, height)) { + inter_clear_cu_unused(b0); + cand_out->b[0] = b0; } - *b1 = LCU_GET_CU_AT_PX(lcu, x_local + width - 1, y_local - 1); - // Do not check (*b1)->coded because the block to the left is always coded + cu_info_t *b1 = LCU_GET_CU_AT_PX(lcu, x_local + width - 1, y_local - 1); + // Do not check b1->coded because the block to the left is always coded // before the current one and the flag is not set when searching an SMP // block. - if ((*b1)->type == CU_INTER) { - inter_clear_cu_unused(*b1); - } else { - *b1 = NULL; + if (b1->type == CU_INTER) { + inter_clear_cu_unused(b1); + cand_out->b[1] = b1; } if (x != 0) { - *b2 = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local - 1); - // Do not check (*b2)->coded because the block above and to the left is + cu_info_t *b2 = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local - 1); + // Do not check b2->coded because the block above and to the left is // always coded before the current one. - if ((*b2)->type == CU_INTER) { - inter_clear_cu_unused(*b2); - } else { - *b2 = NULL; + if (b2->type == CU_INTER) { + inter_clear_cu_unused(b2); + cand_out->b[2] = b2; } } } @@ -843,11 +854,7 @@ * \param height block height in pixels * \param picture_width tile width in pixels * \param picture_height tile height in pixels - * \param b0 Returns the b0 candidate. - * \param b1 Returns the b1 candidate. - * \param b2 Returns the b2 candidate. - * \param a0 Returns the a0 candidate. - * \param a1 Returns the a1 candidate. + * \param cand_out will be filled with A and B candidates */ static void get_spatial_merge_candidates_cua(const cu_array_t *cua, int32_t x, @@ -856,11 +863,7 @@ int32_t height, int32_t picture_width, int32_t picture_height, - const cu_info_t **b0, - const cu_info_t **b1, - const cu_info_t **b2, - const cu_info_t **a0, - const cu_info_t **a1) + merge_candidates_t *cand_out) { /* Predictor block locations @@ -876,16 +879,16 @@ int32_t y_local = SUB_SCU(y); // A0 and A1 availability testing if (x != 0) { - *a1 = kvz_cu_array_at_const(cua, x - 1, y + height - 1); + const cu_info_t *a1 = kvz_cu_array_at_const(cua, x - 1, y + height - 1); // The block above is always coded before the current one. - if ((*a1)->type != CU_INTER) { - *a1 = NULL; + if (a1->type == CU_INTER) { + cand_out->a[1] = a1; } if (y_local + height < LCU_WIDTH && y + height < picture_height) { - *a0 = kvz_cu_array_at_const(cua, x - 1, y + height); - if ((*a0)->type != CU_INTER || !is_a0_cand_coded(x, y, width, height)) { - *a0 = NULL; + const cu_info_t *a0 = kvz_cu_array_at_const(cua, x - 1, y + height); + if (a0->type == CU_INTER && is_a0_cand_coded(x, y, width, height)) { + cand_out->a[0] = a0; } } } @@ -893,191 +896,227 @@ // B0, B1 and B2 availability testing if (y != 0) { if (x + width < picture_width && (x_local + width < LCU_WIDTH || y_local == 0)) { - *b0 = kvz_cu_array_at_const(cua, x + width, y - 1); - if ((*b0)->type != CU_INTER || !is_b0_cand_coded(x, y, width, height)) { - *b0 = NULL; + const cu_info_t *b0 = kvz_cu_array_at_const(cua, x + width, y - 1); + if (b0->type == CU_INTER && is_b0_cand_coded(x, y, width, height)) { + cand_out->b[0] = b0; } } - *b1 = kvz_cu_array_at_const(cua, x + width - 1, y - 1); + const cu_info_t *b1 = kvz_cu_array_at_const(cua, x + width - 1, y - 1); // The block to the left is always coded before the current one. - if ((*b1)->type != CU_INTER) { - *b1 = NULL; + if (b1->type == CU_INTER) { + cand_out->b[1] = b1; } if (x != 0) { - *b2 = kvz_cu_array_at_const(cua, x - 1, y - 1); + const cu_info_t *b2 = kvz_cu_array_at_const(cua, x - 1, y - 1); // The block above and to the left is always coded before the current // one. - if ((*b2)->type != CU_INTER) { - *b2 = NULL; + if (b2->type == CU_INTER) { + cand_out->b[2] = b2; } } } } +static INLINE int16_t get_scaled_mv(int16_t mv, int scale) +{ + int32_t scaled = scale * mv; + return CLIP(-32768, 32767, (scaled + 127 + (scaled < 0)) >> 8); +} + +static void apply_mv_scaling_pocs(int32_t current_poc, + int32_t current_ref_poc, + int32_t neighbor_poc, + int32_t neighbor_ref_poc, + int16_t mv_cand[2]) +{ + int32_t diff_current = current_poc - current_ref_poc; + int32_t diff_neighbor = neighbor_poc - neighbor_ref_poc; + + if (diff_current == diff_neighbor) return; + + diff_current = CLIP(-128, 127, diff_current); + diff_neighbor = CLIP(-128, 127, diff_neighbor); + + int scale = CLIP(-4096, 4095, + (diff_current * ((0x4000 + (abs(diff_neighbor) >> 1)) / diff_neighbor) + 32) >> 6); + + mv_cand[0] = get_scaled_mv(mv_cand[0], scale); + mv_cand[1] = get_scaled_mv(mv_cand[1], scale); +} + +static INLINE void apply_mv_scaling(const encoder_state_t *state, + const cu_info_t *current_cu, + const cu_info_t *neighbor_cu, + int8_t current_reflist, + int8_t neighbor_reflist, + int16_t mv_cand[2]) +{ + apply_mv_scaling_pocs(state->frame->poc, + state->frame->ref->pocs[ + state->frame->ref_LX[current_reflist][ + current_cu->inter.mv_ref[current_reflist]]], + state->frame->poc, + state->frame->ref->pocs[ + state->frame->ref_LX[neighbor_reflist][ + neighbor_cu->inter.mv_ref[neighbor_reflist]]], + mv_cand); +} + /** - * \brief Pick two mv candidates from the spatial and temporal candidates. + * \brief Try to add a temporal MVP or merge candidate. + * + * \param state encoder state + * \param current_ref index of the picture referenced by the current CU + * \param colocated colocated CU + * \param reflist either 0 (for L0) or 1 (for L1) + * \param[out] mv_out Returns the motion vector + * + * \return Whether a temporal candidate was added or not. */ -static void get_mv_cand_from_candidates(const encoder_state_t * const state, - int32_t x, - int32_t y, - int32_t width, - int32_t height, - const cu_info_t *b0, - const cu_info_t *b1, - const cu_info_t *b2, - const cu_info_t *a0, - const cu_info_t *a1, - const cu_info_t *c3, - const cu_info_t *h, +static bool add_temporal_candidate(const encoder_state_t *state, + uint8_t current_ref, + const cu_info_t *colocated, + int32_t reflist, + int16_t mv_out[2]) +{ + if (!colocated) return false; + + int colocated_ref; + if (state->frame->ref_LX_size[0] > 0) { + // get the first reference from L0 if it exists + colocated_ref = state->frame->ref_LX[0][0]; + } else { + // otherwise no candidate added + return false; + } + + // When there are reference pictures from the future (POC > current POC) + // in L0 or L1, the primary list for the colocated PU is the inverse of + // collocated_from_l0_flag. Otherwise it is equal to reflist. + // + // In Kvazaar, the L1 list is only used for future pictures and the slice + // type is set to KVZ_SLICE_B if and only if L1 is used. Therefore we can + // simply check the slice type here. Kvazaar always sets + // collocated_from_l0_flag so the list is L1 for B-slices. + int col_list = state->frame->slicetype == KVZ_SLICE_P ? reflist : 1; + + if ((colocated->inter.mv_dir & (col_list + 1)) == 0) { + // Use the other list if the colocated PU does not have a MV for the + // primary list. + col_list = 1 - col_list; + } + + mv_out[0] = colocated->inter.mv[col_list][0]; + mv_out[1] = colocated->inter.mv[col_list][1]; + apply_mv_scaling_pocs( + state->frame->poc, + state->frame->ref->pocs[current_ref], + state->frame->ref->pocs[colocated_ref], + state->frame->ref->images[colocated_ref]->ref_pocs[ + state->frame->ref->ref_LXs[colocated_ref] + [col_list][colocated->inter.mv_ref[col_list]]], + mv_out + ); + + return true; +} + +static INLINE bool add_mvp_candidate(const encoder_state_t *state, const cu_info_t *cur_cu, + const cu_info_t *cand, int8_t reflist, - int16_t mv_cand[2][2]) + bool scaling, + int16_t mv_cand_out[2]) { + if (!cand) return false; + + assert(cand->inter.mv_dir != 0); + const int cand_list = cand->inter.mv_dir & (1 << reflist) ? reflist : !reflist; + + if (scaling) { + mv_cand_out[0] = cand->inter.mv[cand_list][0]; + mv_cand_out[1] = cand->inter.mv[cand_list][1]; + apply_mv_scaling(state, cur_cu, cand, reflist, cand_list, mv_cand_out); + return true; + } + + if (cand->inter.mv_dir & (1 << cand_list) && + state->frame->ref_LX[cand_list][cand->inter.mv_ref[cand_list]] == + state->frame->ref_LX[reflist][cur_cu->inter.mv_ref[reflist]]) + { + mv_cand_out[0] = cand->inter.mv[cand_list][0]; + mv_cand_out[1] = cand->inter.mv[cand_list][1]; + return true; + } + + return false; +} + +/** + * \brief Pick two mv candidates from the spatial and temporal candidates. + */ +static void get_mv_cand_from_candidates(const encoder_state_t * const state, + int32_t x, + int32_t y, + int32_t width, + int32_t height, + const merge_candidates_t *merge_cand, + const cu_info_t *cur_cu, + int8_t reflist, + int16_t mv_cand[2][2]) +{ + const cu_info_t *const *a = merge_cand->a; + const cu_info_t *const *b = merge_cand->b; + const cu_info_t *c3 = merge_cand->c3; + const cu_info_t *h = merge_cand->h; + uint8_t candidates = 0; uint8_t b_candidates = 0; - int8_t reflist2nd = !reflist; - - #define CALCULATE_SCALE(cu,tb,td) ((tb * ((0x4000 + (abs(td)>>1))/td) + 32) >> 6) -#define APPLY_MV_SCALING(cu, cand, list) {int td = state->frame->poc - state->frame->ref->pocs[(cu)->inter.mv_ref[list]];\ - int tb = state->frame->poc - state->frame->ref->pocs[cur_cu->inter.mv_ref[reflist]];\ - if (td != tb) { \ - int scale = CALCULATE_SCALE(cu,tb,td); \ - mv_cand[cand][0] = ((scale * (cu)->inter.mv[list][0] + 127 + (scale * (cu)->inter.mv[list][0] < 0)) >> 8 ); \ - mv_cand[cand][1] = ((scale * (cu)->inter.mv[list][1] + 127 + (scale * (cu)->inter.mv[list][1] < 0)) >> 8 ); }} - - // Left predictors - if (a0 && ( - ((a0->inter.mv_dir & 1) && a0->inter.mv_ref[0] == cur_cu->inter.mv_ref[reflist]) || - ((a0->inter.mv_dir & 2) && a0->inter.mv_ref[1] == cur_cu->inter.mv_ref[reflist]))) { - if (a0->inter.mv_dir & (1 << reflist) && a0->inter.mv_ref[reflist] == cur_cu->inter.mv_ref[reflist]) { - mv_cand[candidates][0] = a0->inter.mv[reflist][0]; - mv_cand[candidates][1] = a0->inter.mv[reflist][1]; - } else { - mv_cand[candidates][0] = a0->inter.mv[reflist2nd][0]; - mv_cand[candidates][1] = a0->inter.mv[reflist2nd][1]; - } - candidates++; - } else if (a1 && ( - ((a1->inter.mv_dir & 1) && a1->inter.mv_ref[0] == cur_cu->inter.mv_ref[reflist]) || - ((a1->inter.mv_dir & 2) && a1->inter.mv_ref[1] == cur_cu->inter.mv_ref[reflist]))) { - if (a1->inter.mv_dir & (1 << reflist) && a1->inter.mv_ref[reflist] == cur_cu->inter.mv_ref[reflist]) { - mv_cand[candidates][0] = a1->inter.mv[reflist][0]; - mv_cand[candidates][1] = a1->inter.mv[reflist][1]; - } else { - mv_cand[candidates][0] = a1->inter.mv[reflist2nd][0]; - mv_cand[candidates][1] = a1->inter.mv[reflist2nd][1]; + + // Left predictors without scaling + for (int i = 0; i < 2; i++) { + if (add_mvp_candidate(state, cur_cu, a[i], reflist, false, mv_cand[candidates])) { + candidates++; + break; } - candidates++; } - if(!candidates) { - // Left predictors - if (a0) { - if (a0->inter.mv_dir & (1 << reflist)) { - mv_cand[candidates][0] = a0->inter.mv[reflist][0]; - mv_cand[candidates][1] = a0->inter.mv[reflist][1]; - APPLY_MV_SCALING(a0, candidates, reflist); - } else { - mv_cand[candidates][0] = a0->inter.mv[reflist2nd][0]; - mv_cand[candidates][1] = a0->inter.mv[reflist2nd][1]; - APPLY_MV_SCALING(a0, candidates, reflist2nd); - } - candidates++; - } else if (a1) { - if (a1->inter.mv_dir & (1 << reflist)) { - mv_cand[candidates][0] = a1->inter.mv[reflist][0]; - mv_cand[candidates][1] = a1->inter.mv[reflist][1]; - APPLY_MV_SCALING(a1, candidates, reflist); - } else { - mv_cand[candidates][0] = a1->inter.mv[reflist2nd][0]; - mv_cand[candidates][1] = a1->inter.mv[reflist2nd][1]; - APPLY_MV_SCALING(a1, candidates, reflist2nd); + // Left predictors with scaling + if (candidates == 0) { + for (int i = 0; i < 2; i++) { + if (add_mvp_candidate(state, cur_cu, a[i], reflist, true, mv_cand[candidates])) { + candidates++; + break; } - candidates++; } } - // Top predictors - if (b0 && ( - ((b0->inter.mv_dir & 1) && b0->inter.mv_ref[0] == cur_cu->inter.mv_ref[reflist]) || - ((b0->inter.mv_dir & 2) && b0->inter.mv_ref[1] == cur_cu->inter.mv_ref[reflist]))) { - if (b0->inter.mv_dir & (1 << reflist) && b0->inter.mv_ref[reflist] == cur_cu->inter.mv_ref[reflist]) { - mv_cand[candidates][0] = b0->inter.mv[reflist][0]; - mv_cand[candidates][1] = b0->inter.mv[reflist][1]; - } else { - mv_cand[candidates][0] = b0->inter.mv[reflist2nd][0]; - mv_cand[candidates][1] = b0->inter.mv[reflist2nd][1]; - } - b_candidates++; - } else if (b1 && ( - ((b1->inter.mv_dir & 1) && b1->inter.mv_ref[0] == cur_cu->inter.mv_ref[reflist]) || - ((b1->inter.mv_dir & 2) && b1->inter.mv_ref[1] == cur_cu->inter.mv_ref[reflist]))) { - if (b1->inter.mv_dir & (1 << reflist) && b1->inter.mv_ref[reflist] == cur_cu->inter.mv_ref[reflist]) { - mv_cand[candidates][0] = b1->inter.mv[reflist][0]; - mv_cand[candidates][1] = b1->inter.mv[reflist][1]; - } else { - mv_cand[candidates][0] = b1->inter.mv[reflist2nd][0]; - mv_cand[candidates][1] = b1->inter.mv[reflist2nd][1]; - } - b_candidates++; - } else if (b2 && ( - ((b2->inter.mv_dir & 1) && b2->inter.mv_ref[0] == cur_cu->inter.mv_ref[reflist]) || - ((b2->inter.mv_dir & 2) && b2->inter.mv_ref[1] == cur_cu->inter.mv_ref[reflist]))) { - if (b2->inter.mv_dir & (1 << reflist) && b2->inter.mv_ref[reflist] == cur_cu->inter.mv_ref[reflist]) { - mv_cand[candidates][0] = b2->inter.mv[reflist][0]; - mv_cand[candidates][1] = b2->inter.mv[reflist][1]; - } else { - mv_cand[candidates][0] = b2->inter.mv[reflist2nd][0]; - mv_cand[candidates][1] = b2->inter.mv[reflist2nd][1]; + // Top predictors without scaling + for (int i = 0; i < 3; i++) { + if (add_mvp_candidate(state, cur_cu, b[i], reflist, false, mv_cand[candidates])) { + b_candidates++; + break; } - b_candidates++; } + candidates += b_candidates; - // When a1 or a0 is available, we dont check for secondary B candidates - if (a1 || a0) { + // When a1 or a0 is available, we dont check for secondary B candidates. + if (a[0] || a[1]) { b_candidates = 1; - } else if(candidates != 2) { + } else if (candidates != 2) { b_candidates = 0; } - if(!b_candidates) { - // Top predictors - if (b0) { - if (b0->inter.mv_dir & (1 << reflist)) { - mv_cand[candidates][0] = b0->inter.mv[reflist][0]; - mv_cand[candidates][1] = b0->inter.mv[reflist][1]; - APPLY_MV_SCALING(b0, candidates, reflist); - } else { - mv_cand[candidates][0] = b0->inter.mv[reflist2nd][0]; - mv_cand[candidates][1] = b0->inter.mv[reflist2nd][1]; - APPLY_MV_SCALING(b0, candidates, reflist2nd); - } - candidates++; - } else if (b1) { - if (b1->inter.mv_dir & (1 << reflist)) { - mv_cand[candidates][0] = b1->inter.mv[reflist][0]; - mv_cand[candidates][1] = b1->inter.mv[reflist][1]; - APPLY_MV_SCALING(b1, candidates, reflist); - } else { - mv_cand[candidates][0] = b1->inter.mv[reflist2nd][0]; - mv_cand[candidates][1] = b1->inter.mv[reflist2nd][1]; - APPLY_MV_SCALING(b1, candidates, reflist2nd); - } - candidates++; - } else if (b2) { - if (b2->inter.mv_dir & (1 << reflist)) { - mv_cand[candidates][0] = b2->inter.mv[reflist][0]; - mv_cand[candidates][1] = b2->inter.mv[reflist][1]; - APPLY_MV_SCALING(b2, candidates, reflist); - } else { - mv_cand[candidates][0] = b2->inter.mv[reflist2nd][0]; - mv_cand[candidates][1] = b2->inter.mv[reflist2nd][1]; - APPLY_MV_SCALING(b2, candidates, reflist2nd); + if (!b_candidates) { + // Top predictors with scaling + for (int i = 0; i < 3; i++) { + if (add_mvp_candidate(state, cur_cu, b[i], reflist, true, mv_cand[candidates])) { + candidates++; + break; } - candidates++; } } @@ -1086,70 +1125,22 @@ candidates = 1; } - // Use Temporal Motion Vector Prediction when enabled - if (state->encoder_control->cfg.tmvp_enable) { - /* - Predictor block locations - __________ - |CurrentPU| - | |C0|__ | - | |C3| | - |_________|_ - |H| - */ - - // TMVP required at least two sequential P/B-frames - if (state->frame->poc > 1 && state->frame->ref->used_size && candidates < AMVP_MAX_NUM_CANDS) { - - // Use "H" as the primary predictor and "C3" as secondary - const cu_info_t *selected_CU = (h != NULL) ? h : (c3 != NULL) ? c3 : NULL; - - if (selected_CU) { - uint32_t colocated_ref = UINT_MAX; - uint32_t colocated_ref_poc = 0; - int td, tb; - - //ToDo: allow other than L0[0] for prediction - - //Fetch ref idx of the selected CU in L0[0] ref list - for (int temporal_cand = 0; temporal_cand < state->frame->ref->used_size; temporal_cand++) { - if (state->frame->refmap[temporal_cand].list == 1 && state->frame->refmap[temporal_cand].idx == 0) { - colocated_ref = temporal_cand; - break; - } - } - - if (colocated_ref != UINT_MAX) { - - uint8_t used_reflist = reflist; - - colocated_ref_poc = state->frame->ref->pocs[colocated_ref]; - - if (!(selected_CU->inter.mv_dir & (1 << used_reflist))) { - used_reflist = !reflist; - } - - // The reference id the colocated block is using - uint32_t colocated_ref_mv_ref = selected_CU->inter.mv_ref[used_reflist]; - - td = colocated_ref_poc - state->frame->ref->images[colocated_ref]->ref_pocs[colocated_ref_mv_ref]; - tb = state->frame->poc - state->frame->ref->pocs[cur_cu->inter.mv_ref[reflist]]; - - if (td == tb) { - mv_cand[candidates][0] = selected_CU->inter.mv[used_reflist][0]; - mv_cand[candidates][1] = selected_CU->inter.mv[used_reflist][1]; - } else { - int scale = CALCULATE_SCALE(NULL, tb, td); - mv_cand[candidates][0] = ((scale * selected_CU->inter.mv[used_reflist][0] + 127 + ((scale * selected_CU->inter.mv[used_reflist][0]) < 0)) >> 8); - mv_cand[candidates][1] = ((scale * selected_CU->inter.mv[used_reflist][1] + 127 + ((scale * selected_CU->inter.mv[used_reflist][1]) < 0)) >> 8); - } - - candidates++; - - } - } -#undef CALCULATE_SCALE - } + // Use Temporal Motion Vector Prediction when enabled. + // TMVP required at least two sequential P/B-frames. + bool can_use_tmvp = + state->encoder_control->cfg.tmvp_enable && + state->frame->poc > 1 && + state->frame->ref->used_size && + candidates < AMVP_MAX_NUM_CANDS && + (h != NULL || c3 != NULL); + + if (can_use_tmvp && add_temporal_candidate(state, + state->frame->ref_LX[reflist][cur_cu->inter.mv_ref[reflist]], + (h != NULL) ? h : c3, + reflist, + mv_cand[candidates])) + { + candidates++; } // Fill with (0,0) @@ -1158,8 +1149,6 @@ mv_cand[candidates][1] = 0; candidates++; } -#undef CALCULATE_SCALE -#undef APPLY_MV_SCALING } /** @@ -1185,13 +1174,15 @@ lcu_t *lcu, int8_t reflist) { - cu_info_t *b0, *b1, *b2, *a0, *a1, *c3, *h; - b0 = b1 = b2 = a0 = a1 = c3 = h = NULL; + merge_candidates_t merge_cand = { {0, 0}, {0, 0, 0}, 0, 0 }; + get_spatial_merge_candidates(x, y, width, height, - state->tile->frame->width, state->tile->frame->height, - &b0, &b1, &b2, &a0, &a1, lcu); - kvz_inter_get_temporal_merge_candidates(state, x, y, width, height, &c3, &h, 1, 0); - get_mv_cand_from_candidates(state, x, y, width, height, b0, b1, b2, a0, a1, c3, h, cur_cu, reflist, mv_cand); + state->tile->frame->width, + state->tile->frame->height, + lcu, + &merge_cand); + get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand); + get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand); } /** @@ -1215,17 +1206,54 @@ const cu_info_t* cur_cu, int8_t reflist) { - const cu_info_t *b0, *b1, *b2, *a0, *a1; - cu_info_t *c3, *h; - b0 = b1 = b2 = a0 = a1 = c3 = h = NULL; - + merge_candidates_t merge_cand = { {0, 0}, {0, 0, 0}, 0, 0 }; + const cu_array_t *cua = state->tile->frame->cu_array; get_spatial_merge_candidates_cua(cua, x, y, width, height, state->tile->frame->width, state->tile->frame->height, - &b0, &b1, &b2, &a0, &a1); - kvz_inter_get_temporal_merge_candidates(state, x, y, width, height, &c3, &h, 1, 0); - get_mv_cand_from_candidates(state, x, y, width, height, b0, b1, b2, a0, a1, c3, h, cur_cu, reflist, mv_cand); + &merge_cand); + get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand); + get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand); +} + +static bool is_duplicate_candidate(const cu_info_t* cu1, const cu_info_t* cu2) +{ + if (!cu2) return false; + if (cu1->inter.mv_dir != cu2->inter.mv_dir) return false; + + for (int reflist = 0; reflist < 2; reflist++) { + if (cu1->inter.mv_dir & (1 << reflist)) { + if (cu1->inter.mv[reflist][0] != cu2->inter.mv[reflist][0] || + cu1->inter.mv[reflist][1] != cu2->inter.mv[reflist][1] || + cu1->inter.mv_ref[reflist] != cu2->inter.mv_ref[reflist]) { + return false; + } + } + } + + return true; +} + +static bool add_merge_candidate(const cu_info_t *cand, + const cu_info_t *possible_duplicate1, + const cu_info_t *possible_duplicate2, + inter_merge_cand_t *merge_cand_out) +{ + if (!cand || + is_duplicate_candidate(cand, possible_duplicate1) || + is_duplicate_candidate(cand, possible_duplicate2)) { + return false; + } + + merge_cand_out->mv[0][0] = cand->inter.mv[0][0]; + merge_cand_out->mv[0][1] = cand->inter.mv[0][1]; + merge_cand_out->mv[1][0] = cand->inter.mv[1][0]; + merge_cand_out->mv[1][1] = cand->inter.mv[1][1]; + merge_cand_out->ref[0] = cand->inter.mv_ref[0]; // L0/L1 references + merge_cand_out->ref[1] = cand->inter.mv_ref[1]; + merge_cand_out->dir = cand->inter.mv_dir; + return true; } /** @@ -1239,7 +1267,6 @@ * \param use_b1 true, if candidate b1 can be used * \param mv_cand Returns the merge candidates. * \param lcu lcu containing the block - * \param ref_idx current reference index (used only by TMVP) * \return number of merge candidates */ uint8_t kvz_inter_get_merge_cand(const encoder_state_t * const state, @@ -1247,228 +1274,63 @@ int32_t width, int32_t height, bool use_a1, bool use_b1, inter_merge_cand_t mv_cand[MRG_MAX_NUM_CANDS], - lcu_t *lcu, - uint8_t ref_idx) + lcu_t *lcu) { uint8_t candidates = 0; - int8_t duplicate = 0; - - cu_info_t *b0, *b1, *b2, *a0, *a1; int8_t zero_idx = 0; - b0 = b1 = b2 = a0 = a1 = NULL; - get_spatial_merge_candidates(x, y, width, height, - state->tile->frame->width, state->tile->frame->height, - &b0, &b1, &b2, &a0, &a1, lcu); - - if (!use_a1) a1 = NULL; - if (!use_b1) b1 = NULL; - -#define CHECK_DUPLICATE(CU1,CU2) {duplicate = 0; if ((CU2) && \ - (CU1)->inter.mv_dir == (CU2)->inter.mv_dir && \ - (!(((CU1)->inter.mv_dir & 1) && ((CU2)->inter.mv_dir & 1)) || \ - ((CU1)->inter.mv[0][0] == (CU2)->inter.mv[0][0] && \ - (CU1)->inter.mv[0][1] == (CU2)->inter.mv[0][1] && \ - (CU1)->inter.mv_ref[0] == (CU2)->inter.mv_ref[0]) ) && \ - (!(((CU1)->inter.mv_dir & 2) && ((CU2)->inter.mv_dir & 2) ) || \ - ((CU1)->inter.mv[1][0] == (CU2)->inter.mv[1][0] && \ - (CU1)->inter.mv[1][1] == (CU2)->inter.mv[1][1] && \ - (CU1)->inter.mv_ref[1] == (CU2)->inter.mv_ref[1]) ) \ - ) duplicate = 1; } - - if (a1) { - mv_cand[candidates].mv[0][0] = a1->inter.mv[0][0]; - mv_cand[candidates].mv[0][1] = a1->inter.mv[0][1]; - mv_cand[candidates].mv[1][0] = a1->inter.mv[1][0]; - mv_cand[candidates].mv[1][1] = a1->inter.mv[1][1]; - mv_cand[candidates].ref[0] = a1->inter.mv_ref[0]; - mv_cand[candidates].ref[1] = a1->inter.mv_ref[1]; - mv_cand[candidates].dir = a1->inter.mv_dir; - candidates++; - } - - if (b1) { - if(candidates) CHECK_DUPLICATE(b1, a1); - if(!duplicate) { - mv_cand[candidates].mv[0][0] = b1->inter.mv[0][0]; - mv_cand[candidates].mv[0][1] = b1->inter.mv[0][1]; - mv_cand[candidates].mv[1][0] = b1->inter.mv[1][0]; - mv_cand[candidates].mv[1][1] = b1->inter.mv[1][1]; - mv_cand[candidates].ref[0] = b1->inter.mv_ref[0]; - mv_cand[candidates].ref[1] = b1->inter.mv_ref[1]; - mv_cand[candidates].dir = b1->inter.mv_dir; - candidates++; - } - } - - if (b0) { - if(candidates) CHECK_DUPLICATE(b0,b1); - if(!duplicate) { - mv_cand[candidates].mv[0][0] = b0->inter.mv[0][0]; - mv_cand[candidates].mv[0][1] = b0->inter.mv[0][1]; - mv_cand[candidates].mv[1][0] = b0->inter.mv[1][0]; - mv_cand[candidates].mv[1][1] = b0->inter.mv[1][1]; - mv_cand[candidates].ref[0] = b0->inter.mv_ref[0]; - mv_cand[candidates].ref[1] = b0->inter.mv_ref[1]; - mv_cand[candidates].dir = b0->inter.mv_dir; - candidates++; - } - } - if (a0) { - if(candidates) CHECK_DUPLICATE(a0,a1); - if(!duplicate) { - mv_cand[candidates].mv[0][0] = a0->inter.mv[0][0]; - mv_cand[candidates].mv[0][1] = a0->inter.mv[0][1]; - mv_cand[candidates].mv[1][0] = a0->inter.mv[1][0]; - mv_cand[candidates].mv[1][1] = a0->inter.mv[1][1]; - mv_cand[candidates].ref[0] = a0->inter.mv_ref[0]; - mv_cand[candidates].ref[1] = a0->inter.mv_ref[1]; - mv_cand[candidates].dir = a0->inter.mv_dir; - candidates++; - } - } + merge_candidates_t merge_cand = { {0, 0}, {0, 0, 0}, 0, 0 }; - if (candidates != 4) { - if (b2) { - CHECK_DUPLICATE(b2,a1); - if(!duplicate) { - CHECK_DUPLICATE(b2,b1); - if(!duplicate) { - mv_cand[candidates].mv[0][0] = b2->inter.mv[0][0]; - mv_cand[candidates].mv[0][1] = b2->inter.mv[0][1]; - mv_cand[candidates].mv[1][0] = b2->inter.mv[1][0]; - mv_cand[candidates].mv[1][1] = b2->inter.mv[1][1]; - mv_cand[candidates].ref[0] = b2->inter.mv_ref[0]; - mv_cand[candidates].ref[1] = b2->inter.mv_ref[1]; - mv_cand[candidates].dir = b2->inter.mv_dir; - candidates++; - } + get_spatial_merge_candidates(x, y, width, height, + state->tile->frame->width, + state->tile->frame->height, + lcu, + &merge_cand); + + const cu_info_t **a = merge_cand.a; + const cu_info_t **b = merge_cand.b; + + if (!use_a1) a[1] = NULL; + if (!use_b1) b[1] = NULL; + + if (add_merge_candidate(a[1], NULL, NULL, &mv_cand[candidates])) candidates++; + if (add_merge_candidate(b[1], a[1], NULL, &mv_cand[candidates])) candidates++; + if (add_merge_candidate(b[0], b[1], NULL, &mv_cand[candidates])) candidates++; + if (add_merge_candidate(a[0], a[1], NULL, &mv_cand[candidates])) candidates++; + if (candidates < 4 && + add_merge_candidate(b[2], a[1], b[1], &mv_cand[candidates])) candidates++; + + bool can_use_tmvp = + state->encoder_control->cfg.tmvp_enable && + candidates < MRG_MAX_NUM_CANDS && + state->frame->ref->used_size; + + if (can_use_tmvp) { + mv_cand[candidates].dir = 0; + + const int max_reflist = (state->frame->slicetype == KVZ_SLICE_B ? 1 : 0); + for (int reflist = 0; reflist <= max_reflist; reflist++) { + // Fetch temporal candidates for the current CU + get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand); + // TODO: enable L1 TMVP candidate + // get_temporal_merge_candidates(state, x, y, width, height, 2, 0, &merge_cand); + + const cu_info_t *temporal_cand = + (merge_cand.h != NULL) ? merge_cand.h : merge_cand.c3; + + if (add_temporal_candidate(state, + // Reference index 0 is always used for + // the temporal merge candidate. + state->frame->ref_LX[reflist][0], + temporal_cand, + reflist, + mv_cand[candidates].mv[reflist])) { + mv_cand[candidates].ref[reflist] = 0; + mv_cand[candidates].dir |= (1 << reflist); } } - } - - if (state->encoder_control->cfg.tmvp_enable) { - - #define CALCULATE_SCALE(tb,td) ((tb * ((0x4000 + (abs(td) >> 1)) / td) + 32) >> 6) - - if (candidates < MRG_MAX_NUM_CANDS && state->frame->ref->used_size) { - - uint32_t colocated_ref = UINT_MAX; - uint32_t colocated_ref_poc = 0; - int32_t td, tb; - uint8_t selected_reflist = 0; - - cu_info_t *c3_L0 = NULL; - cu_info_t *h_L0 = NULL; - - // Fetch temporal candidates for the current CU, , L0[0] - kvz_inter_get_temporal_merge_candidates(state, x, y, width, height, &c3_L0, &h_L0, 1, 0); - cu_info_t *selected_CU = NULL; - - selected_CU = (h_L0 != NULL) ? h_L0 : (c3_L0 != NULL) ? c3_L0 : NULL; - - - mv_cand[candidates].dir = 0; - - // Find LIST_0 reference - if (selected_CU) { - - if (!(selected_CU->inter.mv_dir & (selected_reflist + 1))) { - selected_reflist = !selected_reflist; - } - - uint8_t colocated_ref_found = 0; - - //Fetch ref idx of the selected CU in L0[0] ref list - for (int32_t temporal_cand = 0; temporal_cand < state->frame->ref->used_size; temporal_cand++) { - if (state->frame->refmap[temporal_cand].list == 1 && state->frame->refmap[temporal_cand].idx == 0) { - colocated_ref = temporal_cand; - colocated_ref_found = 1; - break; - } - } - - if (colocated_ref_found) { - colocated_ref_poc = state->frame->ref->pocs[colocated_ref]; - - // The reference id the colocated block is using - uint32_t colocated_ref_mv_ref = selected_CU->inter.mv_ref[selected_reflist]; - - td = colocated_ref_poc - state->frame->ref->images[colocated_ref]->ref_pocs[colocated_ref_mv_ref]; - tb = state->frame->poc - state->frame->ref->pocs[ref_idx]; - - mv_cand[candidates].dir |= 1; - - if (td == tb) { - mv_cand[candidates].mv[0][0] = selected_CU->inter.mv[selected_reflist][0]; - mv_cand[candidates].mv[0][1] = selected_CU->inter.mv[selected_reflist][1]; - } else { - int32_t scale = CALCULATE_SCALE(tb, td); - mv_cand[candidates].mv[0][0] = ((scale * selected_CU->inter.mv[selected_reflist][0] + 127 + ((scale * selected_CU->inter.mv[selected_reflist][0]) < 0)) >> 8); - mv_cand[candidates].mv[0][1] = ((scale * selected_CU->inter.mv[selected_reflist][1] + 127 + ((scale * selected_CU->inter.mv[selected_reflist][1]) < 0)) >> 8); - } - mv_cand[candidates].ref[0] = colocated_ref; - } - } - - - if (state->frame->slicetype == KVZ_SLICE_B) { - - selected_reflist = 1; - - // ToDo: enable L1 TMVP candidate - // Fetch temporal candidates for the current CU, L0[0] - kvz_inter_get_temporal_merge_candidates(state, x, y, width, height, &c3_L0, &h_L0, 1, 0); - //kvz_inter_get_temporal_merge_candidates(state, x, y, width, height, &c3_L1, &h_L1, 2, 0); - - selected_CU = (h_L0 != NULL) ? h_L0 : (c3_L0 != NULL) ? c3_L0 : NULL; - - // Find LIST_1 reference - if (selected_CU) { - if (!(selected_CU->inter.mv_dir & (selected_reflist + 1))) { - selected_reflist = !selected_reflist; - } - uint8_t colocated_ref_found = 0; - - //Fetch ref idx of the selected CU in L0[0] ref list - for (int32_t temporal_cand = 0; temporal_cand < state->frame->ref->used_size; temporal_cand++) { - if (state->frame->refmap[temporal_cand].list == 1 && state->frame->refmap[temporal_cand].idx == 0) { - colocated_ref = temporal_cand; - colocated_ref_found = 1; - break; - } - } - - colocated_ref_poc = state->frame->ref->pocs[colocated_ref]; - - if (colocated_ref_found) { - // The reference id the colocated block is using - uint32_t colocated_ref_mv_ref = selected_CU->inter.mv_ref[selected_reflist]; - - // POC differences in current and in candidate - td = colocated_ref_poc - state->frame->ref->images[colocated_ref]->ref_pocs[colocated_ref_mv_ref]; - tb = state->frame->poc - state->frame->ref->pocs[ref_idx]; - mv_cand[candidates].dir |= 2; - - // No need for scaling when POC difference is the same - if (td == tb) { - mv_cand[candidates].mv[1][0] = selected_CU->inter.mv[selected_reflist][0]; - mv_cand[candidates].mv[1][1] = selected_CU->inter.mv[selected_reflist][1]; - } else { - int32_t scale = CALCULATE_SCALE(tb, td); - mv_cand[candidates].mv[1][0] = ((scale * selected_CU->inter.mv[selected_reflist][0] + 127 + ((scale * selected_CU->inter.mv[selected_reflist][0]) < 0)) >> 8); - mv_cand[candidates].mv[1][1] = ((scale * selected_CU->inter.mv[selected_reflist][1] + 127 + ((scale * selected_CU->inter.mv[selected_reflist][1]) < 0)) >> 8); - } - mv_cand[candidates].ref[1] = colocated_ref; - } - } - } - - if (mv_cand[candidates].dir != 0) candidates++; - - } - #undef CALCULATE_SCALE + if (mv_cand[candidates].dir != 0) candidates++; } if (candidates < MRG_MAX_NUM_CANDS && state->frame->slicetype == KVZ_SLICE_B) { @@ -1493,9 +1355,11 @@ mv_cand[candidates].ref[0] = mv_cand[i].ref[0]; mv_cand[candidates].ref[1] = mv_cand[j].ref[1]; - if (mv_cand[i].ref[0] == mv_cand[j].ref[1] && - mv_cand[i].mv[0][0] == mv_cand[j].mv[1][0] && - mv_cand[i].mv[0][1] == mv_cand[j].mv[1][1]) { + if (state->frame->ref_LX[0][mv_cand[i].ref[0]] == + state->frame->ref_LX[1][mv_cand[j].ref[1]] + && + mv_cand[i].mv[0][0] == mv_cand[j].mv[1][0] && + mv_cand[i].mv[0][1] == mv_cand[j].mv[1][1]) { // Not a candidate } else { candidates++; @@ -1519,12 +1383,12 @@ } num_ref = MIN(ref_negative, ref_positive); } - + // Add (0,0) prediction while (candidates != MRG_MAX_NUM_CANDS) { mv_cand[candidates].mv[0][0] = 0; mv_cand[candidates].mv[0][1] = 0; - mv_cand[candidates].ref[0] = (zero_idx>=num_ref-1)?0:zero_idx; + mv_cand[candidates].ref[0] = (zero_idx >= num_ref - 1) ? 0 : zero_idx; mv_cand[candidates].ref[1] = mv_cand[candidates].ref[0]; mv_cand[candidates].dir = 1; if (state->frame->slicetype == KVZ_SLICE_B) {
View file
kvazaar-1.1.0.tar.gz/src/inter.h -> kvazaar-1.2.0.tar.gz/src/inter.h
Changed
@@ -35,7 +35,7 @@ typedef struct { uint8_t dir; - uint8_t ref[2]; + uint8_t ref[2]; // index to L0/L1 int16_t mv[2][2]; } inter_merge_cand_t; @@ -85,6 +85,5 @@ int32_t width, int32_t height, bool use_a1, bool use_b1, inter_merge_cand_t mv_cand[MRG_MAX_NUM_CANDS], - lcu_t *lcu, - uint8_t ref_idx); + lcu_t *lcu); #endif
View file
kvazaar-1.1.0.tar.gz/src/intra.c -> kvazaar-1.2.0.tar.gz/src/intra.c
Changed
@@ -114,6 +114,52 @@ return 1; } +#if KVZ_SEL_ENCRYPTION +int8_t kvz_intra_get_dir_luma_predictor_encry( + const uint32_t x, + const uint32_t y, + int8_t *preds, + const cu_info_t *const cur_pu, + const cu_info_t *const left_pu, + const cu_info_t *const above_pu) +{ + // The default mode if block is not coded yet is INTRA_DC. + int8_t left_intra_dir = 1; + if (left_pu && left_pu->type == CU_INTRA) { + left_intra_dir = left_pu->intra.mode_encry ; + } + + int8_t above_intra_dir = 1; + if (above_pu && above_pu->type == CU_INTRA && y % LCU_WIDTH != 0) { + above_intra_dir = above_pu->intra.mode_encry; + } + + // If the predictions are the same, add new predictions + if (left_intra_dir == above_intra_dir) { + if (left_intra_dir > 1) { // angular modes + preds[0] = left_intra_dir; + preds[1] = ((left_intra_dir + 29) % 32) + 2; + preds[2] = ((left_intra_dir - 1 ) % 32) + 2; + } else { //non-angular + preds[0] = 0;//PLANAR_IDX; + preds[1] = 1;//DC_IDX; + preds[2] = 26;//VER_IDX; + } + } else { // If we have two distinct predictions + preds[0] = left_intra_dir; + preds[1] = above_intra_dir; + + // add planar mode if it's not yet present + if (left_intra_dir && above_intra_dir ) { + preds[2] = 0; // PLANAR_IDX; + } else { // Add DC mode if it's not present, otherwise 26. + preds[2] = (left_intra_dir+above_intra_dir)<2? 26 : 1; + } + } + + return 1; +} +#endif static void intra_filter_reference( int_fast8_t log2_width, @@ -541,126 +587,120 @@ } } -void kvz_intra_recon_lcu_luma( +static void intra_recon_tb_leaf( encoder_state_t *const state, int x, int y, int depth, int8_t intra_mode, - cu_info_t *cur_cu, - lcu_t *lcu) + lcu_t *lcu, + color_t color) { - const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; - if (cur_cu == NULL) { - cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); - } - const int8_t width = LCU_WIDTH >> depth; - - if (depth == 0 || cur_cu->tr_depth > depth) { - int offset = width / 2; - - kvz_intra_recon_lcu_luma(state, x, y, depth+1, intra_mode, NULL, lcu); - kvz_intra_recon_lcu_luma(state, x + offset, y, depth+1, intra_mode, NULL, lcu); - kvz_intra_recon_lcu_luma(state, x, y + offset, depth+1, intra_mode, NULL, lcu); - kvz_intra_recon_lcu_luma(state, x + offset, y + offset, depth+1, intra_mode, NULL, lcu); - - if (depth < MAX_DEPTH) { - uint16_t child_cbfs[3] = { - LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y )->cbf, - LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset)->cbf, - LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf, - }; - cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_Y); - } + const kvz_config *cfg = &state->encoder_control->cfg; + const int shift = color == COLOR_Y ? 0 : 1; - return; + int log2width = LOG2_LCU_WIDTH - depth; + if (color != COLOR_Y && depth < MAX_PU_DEPTH) { + // Chroma width is half of luma width, when not at maximum depth. + log2width -= 1; } + const int width = 1 << log2width; + const int lcu_width = LCU_WIDTH >> shift; + + const vector2d_t luma_px = { x, y }; + const vector2d_t pic_px = { + state->tile->frame->width, + state->tile->frame->height, + }; + const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift}; - // Perform intra prediction and put the result in correct place lcu. - vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height }; - vector2d_t luma_px = { x, y }; kvz_intra_references refs; - const int_fast8_t log2_width = kvz_g_convert_to_bit[width] + 2; - kvz_intra_build_reference(log2_width, COLOR_Y, &luma_px, &pic_px, lcu, &refs); + kvz_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs); kvz_pixel pred[32 * 32]; - const kvz_config *cfg = &state->encoder_control->cfg; - bool filter_boundary = !(cfg->lossless && cfg->implicit_rdpcm); - kvz_intra_predict(&refs, log2_width, intra_mode, COLOR_Y, pred, filter_boundary); - - kvz_pixel *block_in_lcu = &lcu->rec.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; - kvz_pixels_blit(pred, block_in_lcu, width, width, width, LCU_WIDTH); - - kvz_quantize_lcu_luma_residual(state, x, y, depth, cur_cu, lcu); + const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm); + kvz_intra_predict(&refs, log2width, intra_mode, color, pred, filter_boundary); + + const int index = lcu_px.x + lcu_px.y * lcu_width; + kvz_pixel *block = NULL; + switch (color) { + case COLOR_Y: + block = &lcu->rec.y[index]; + break; + case COLOR_U: + block = &lcu->rec.u[index]; + break; + case COLOR_V: + block = &lcu->rec.v[index]; + break; + } + kvz_pixels_blit(pred, block , width, width, width, lcu_width); } - -void kvz_intra_recon_lcu_chroma( +/** + * \brief Reconstruct an intra CU + * + * \param state encoder state + * \param x x-coordinate of the CU in luma pixels + * \param y y-coordinate of the CU in luma pixels + * \param depth depth in the CU tree + * \param mode_luma intra mode for luma, or -1 to skip luma recon + * \param mode_chroma intra mode for chroma, or -1 to skip chroma recon + * \param cur_cu pointer to the CU, or NULL to fetch CU from LCU + * \param lcu containing LCU + */ +void kvz_intra_recon_cu( encoder_state_t *const state, int x, int y, int depth, - int8_t intra_mode, + int8_t mode_luma, + int8_t mode_chroma, cu_info_t *cur_cu, lcu_t *lcu) { const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; const int8_t width = LCU_WIDTH >> depth; - const int8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2); - if (cur_cu == NULL) { cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } if (depth == 0 || cur_cu->tr_depth > depth) { - int offset = width / 2; - - kvz_intra_recon_lcu_chroma(state, x, y, depth+1, intra_mode, NULL, lcu); - kvz_intra_recon_lcu_chroma(state, x + offset, y, depth+1, intra_mode, NULL, lcu); - kvz_intra_recon_lcu_chroma(state, x, y + offset, depth+1, intra_mode, NULL, lcu); - kvz_intra_recon_lcu_chroma(state, x + offset, y + offset, depth+1, intra_mode, NULL, lcu); - - if (depth <= MAX_DEPTH) { - uint16_t child_cbfs[3] = { - LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y )->cbf, - LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset)->cbf, - LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf, - }; + const int offset = width / 2; + const int32_t x2 = x + offset; + const int32_t y2 = y + offset; + + kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, lcu); + kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, lcu); + kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, lcu); + kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, lcu); + + // Propagate coded block flags from child CUs to parent CU. + uint16_t child_cbfs[3] = { + LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y )->cbf, + LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset)->cbf, + LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf, + }; + + if (mode_luma != -1 && depth < MAX_DEPTH) { + cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_Y); + } + if (mode_chroma != -1 && depth <= MAX_DEPTH) { cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_U); cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_V); } - return; - } - - if (!(x & 4 || y & 4)) { - const int_fast8_t log2_width_c = kvz_g_convert_to_bit[width_c] + 2; - const vector2d_t luma_px = { x, y }; - const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height }; - - // Intra predict U-plane and put the result in lcu buffer. - { - kvz_intra_references refs; - kvz_intra_build_reference(log2_width_c, COLOR_U, &luma_px, &pic_px, lcu, &refs); - - kvz_pixel pred[32 * 32]; - kvz_intra_predict(&refs, log2_width_c, intra_mode, COLOR_U, pred, false); - - kvz_pixel *pu_in_lcu = &lcu->rec.u[lcu_px.x / 2 + (lcu_px.y * LCU_WIDTH) / 4]; - kvz_pixels_blit(pred, pu_in_lcu, width_c, width_c, width_c, LCU_WIDTH_C); + } else { + const bool has_luma = mode_luma != -1; + const bool has_chroma = mode_chroma != -1 && x % 8 == 0 && y % 8 == 0; + // Process a leaf TU. + if (has_luma) { + intra_recon_tb_leaf(state, x, y, depth, mode_luma, lcu, COLOR_Y); } - - // Intra predict V-plane and put the result in lcu buffer. - { - kvz_intra_references refs; - kvz_intra_build_reference(log2_width_c, COLOR_V, &luma_px, &pic_px, lcu, &refs); - - kvz_pixel pred[32 * 32]; - kvz_intra_predict(&refs, log2_width_c, intra_mode, COLOR_V, pred, false); - - kvz_pixel *pu_in_lcu = &lcu->rec.v[lcu_px.x / 2 + (lcu_px.y * LCU_WIDTH) / 4]; - kvz_pixels_blit(pred, pu_in_lcu, width_c, width_c, width_c, LCU_WIDTH_C); + if (has_chroma) { + intra_recon_tb_leaf(state, x, y, depth, mode_chroma, lcu, COLOR_U); + intra_recon_tb_leaf(state, x, y, depth, mode_chroma, lcu, COLOR_V); } - kvz_quantize_lcu_chroma_residual(state, x, y, depth, cur_cu, lcu); + kvz_quantize_lcu_residual(state, has_luma, has_chroma, x, y, depth, cur_cu, lcu); } }
View file
kvazaar-1.1.0.tar.gz/src/intra.h -> kvazaar-1.2.0.tar.gz/src/intra.h
Changed
@@ -62,6 +62,26 @@ const cu_info_t *const left_pu, const cu_info_t *const above_pu); +#if KVZ_SEL_ENCRYPTION +/** +* \brief Function for deriving intra luma predictions with encryption +* \param x x-coordinate of the PU in pixels +* \param y y-coordinate of the PU in pixels +* \param preds output buffer for 3 predictions +* \param cur_pu PU to check +* \param left_pu PU to the left of cur_pu +* \param above_pu PU above cur_pu +* \returns 1 if predictions are found, otherwise 0 +*/ +int8_t kvz_intra_get_dir_luma_predictor_encry( +const uint32_t x, +const uint32_t y, +int8_t *preds, +const cu_info_t *const cur_pu, +const cu_info_t *const left_pu, +const cu_info_t *const above_pu); +#endif + /** * \brief Generage angular predictions. * \param width Width in pixels, range 4..32. @@ -97,27 +117,13 @@ kvz_pixel *dst, bool filter_boundary); -/** - * \brief Do a full intra prediction cycle on a CU in lcu for luma. - */ -void kvz_intra_recon_lcu_luma( - encoder_state_t *const state, - int x, - int y, - int depth, - int8_t intra_mode, - cu_info_t *cur_cu, - lcu_t *lcu); - -/** -* \brief Do a full intra prediction cycle on a CU in lcu for chroma. -*/ -void kvz_intra_recon_lcu_chroma( +void kvz_intra_recon_cu( encoder_state_t *const state, int x, int y, int depth, - int8_t intra_mode, + int8_t mode_luma, + int8_t mode_chroma, cu_info_t *cur_cu, lcu_t *lcu);
View file
kvazaar-1.1.0.tar.gz/src/kvazaar.c -> kvazaar-1.2.0.tar.gz/src/kvazaar.c
Changed
@@ -43,7 +43,21 @@ static void kvazaar_close(kvz_encoder *encoder) { if (encoder) { + // The threadqueue must be stopped before freeing states. + if (encoder->control) { + kvz_threadqueue_stop(encoder->control->threadqueue); + } + if (encoder->states) { + // Flush input frame buffer. + kvz_picture *pic = NULL; + while ((pic = kvz_encoder_feed_frame(&encoder->input_buffer, + &encoder->states[0], + NULL)) != NULL) { + kvz_image_free(pic); + pic = NULL; + } + for (unsigned i = 0; i < encoder->num_encoder_states; ++i) { kvz_encoder_state_finalize(&encoder->states[i]); } @@ -127,7 +141,20 @@ info->qp = state->frame->QP; info->nal_unit_type = state->frame->pictype; info->slice_type = state->frame->slicetype; - kvz_encoder_get_ref_lists(state, info->ref_list_len, info->ref_list); + + memset(info->ref_list[0], 0, 16); + memset(info->ref_list[1], 0, 16); + + for (size_t i = 0; i < state->frame->ref_LX_size[0]; i++) { + info->ref_list[0][i] = state->frame->ref->pocs[state->frame->ref_LX[0][i]]; + } + + for (size_t i = 0; i < state->frame->ref_LX_size[1]; i++) { + info->ref_list[1][i] = state->frame->ref->pocs[state->frame->ref_LX[1][i]]; + } + + info->ref_list_len[0] = state->frame->ref_LX_size[0]; + info->ref_list_len[1] = state->frame->ref_LX_size[1]; } @@ -244,7 +271,7 @@ kvz_threadqueue_waitfor(enc->control->threadqueue, output_state->tqj_bitstream_written); // The job pointer must be set to NULL here since it won't be usable after // the next frame is done. - output_state->tqj_bitstream_written = NULL; + kvz_threadqueue_free_job(&output_state->tqj_bitstream_written); // Get stream length before taking chunks since that clears the stream. if (len_out) *len_out = kvz_bitstream_tell(&output_state->stream) / 8;
View file
kvazaar-1.1.0.tar.gz/src/kvazaar.h -> kvazaar-1.2.0.tar.gz/src/kvazaar.h
Changed
@@ -149,7 +149,8 @@ KVZ_CRYPTO_MV_SIGNS = (1 << 1), KVZ_CRYPTO_TRANSF_COEFFS = (1 << 2), KVZ_CRYPTO_TRANSF_COEFF_SIGNS = (1 << 3), - KVZ_CRYPTO_ON = (1 << 4) - 1, + KVZ_CRYPTO_INTRA_MODE = (1 << 4), + KVZ_CRYPTO_ON = (1 << 5) - 1, }; /** @@ -198,6 +199,13 @@ KVZ_SLICES_WPP = (1 << 1), /*!< \brief Put each row in a slice. */ }; +enum kvz_sao { + KVZ_SAO_OFF = 0, + KVZ_SAO_EDGE = 1, + KVZ_SAO_BAND = 2, + KVZ_SAO_FULL = 3 +}; + // Map from input format to chroma format. #define KVZ_FORMAT2CSP(format) ((enum kvz_chroma_format)"\0\1\2\3"[format]) @@ -245,7 +253,7 @@ int32_t framerate_num; /*!< \brief Framerate numerator */ int32_t framerate_denom; /*!< \brief Framerate denominator */ int32_t deblock_enable; /*!< \brief Flag to enable deblocking filter */ - int32_t sao_enable; /*!< \brief Flag to enable sample adaptive offset filter */ + enum kvz_sao sao_type; /*!< \brief Flag to enable sample adaptive offset filter */ int32_t rdoq_enable; /*!< \brief Flag to enable RD optimized quantization. */ int32_t signhide_enable; /*!< \brief Flag to enable sign hiding. */ int32_t smp_enable; /*!< \brief Flag to enable SMP blocks. */ @@ -311,6 +319,7 @@ enum kvz_cu_split_termination cu_split_termination; /*!< \since 3.8.0 \brief Mode of cu split termination. */ enum kvz_crypto_features crypto_features; /*!< \since 3.7.0 */ + uint8_t *optional_key; enum kvz_me_early_termination me_early_termination; /*!< \since 3.8.0 \brief Mode of me early termination. */ @@ -333,10 +342,15 @@ struct { int32_t width; int32_t height; - uint8_t *dqps; + int8_t *dqps; } roi; /*!< \since 3.14.0 \brief Map of delta QPs for region of interest coding. */ unsigned slices; /*!< \since 3.15.0 \brief How to map slices to frame. */ + + /** + * \brief Use adaptive QP for 360 video with equirectangular projection. + */ + int32_t erp_aqp; } kvz_config; /**
View file
kvazaar-1.1.0.tar.gz/src/rate_control.c -> kvazaar-1.2.0.tar.gz/src/rate_control.c
Changed
@@ -170,7 +170,7 @@ static int8_t lambda_to_qp(const double lambda) { const int8_t qp = 4.2005 * log(lambda) + 13.7223 + 0.5; - return CLIP(0, 51, qp); + return CLIP_TO_QP(qp); } static double qp_to_lamba(encoder_state_t * const state, int qp) @@ -240,10 +240,10 @@ kvz_gop_config const * const gop = &ctrl->cfg.gop[state->frame->gop_offset]; const int gop_len = ctrl->cfg.gop_len; - state->frame->QP = ctrl->cfg.qp; - if (gop_len > 0 && state->frame->slicetype != KVZ_SLICE_I) { - state->frame->QP += gop->qp_offset; + state->frame->QP = CLIP_TO_QP(ctrl->cfg.qp + gop->qp_offset); + } else { + state->frame->QP = ctrl->cfg.qp; } state->frame->lambda = qp_to_lamba(state, state->frame->QP); @@ -291,7 +291,7 @@ }; int roi_index = roi.x + roi.y * ctrl->cfg.roi.width; int dqp = ctrl->cfg.roi.dqps[roi_index]; - state->qp = state->frame->QP + dqp; + state->qp = CLIP_TO_QP(state->frame->QP + dqp); state->lambda = qp_to_lamba(state, state->qp); state->lambda_sqrt = sqrt(state->frame->lambda);
View file
kvazaar-1.1.0.tar.gz/src/rdo.c -> kvazaar-1.2.0.tar.gz/src/rdo.c
Changed
@@ -33,12 +33,16 @@ #include "tables.h" #include "transform.h" +#include "strategies/strategies-quant.h" + #define QUANT_SHIFT 14 #define SCAN_SET_SIZE 16 #define LOG2_SCAN_SET_SIZE 4 #define SBH_THRESHOLD 4 +static const double COEFF_SUM_MULTIPLIER = 1.9; + const uint32_t kvz_g_go_rice_range[5] = { 7, 14, 26, 46, 78 }; const uint32_t kvz_g_go_rice_prefix_len[5] = { 8, 7, 6, 5, 4 }; @@ -140,48 +144,82 @@ }; -/** Calculate actual (or really close to actual) bitcost for coding coefficients +/** + * \brief Calculate actual (or really close to actual) bitcost for coding + * coefficients. + * * \param coeff coefficient array * \param width coeff block width * \param type data type (0 == luma) + * * \returns bits needed to code input coefficients */ -int32_t kvz_get_coeff_cost(const encoder_state_t * const state, coeff_t *coeff, int32_t width, int32_t type, int8_t scan_mode) +static INLINE uint32_t get_coeff_cabac_cost( + const encoder_state_t * const state, + const coeff_t *coeff, + int32_t width, + int32_t type, + int8_t scan_mode) { - int32_t cost = 0; - int i; - int found = 0; - encoder_state_t state_copy; - // Make sure there are coeffs present - for(i = 0; i < width*width; i++) { + bool found = false; + for (int i = 0; i < width*width; i++) { if (coeff[i] != 0) { found = 1; break; } } + if (!found) return 0; - if(!found) return 0; - - // Store cabac state and contexts - memcpy(&state_copy,state,sizeof(encoder_state_t)); + // Take a copy of the CABAC so that we don't overwrite the contexts when + // counting the bits. + cabac_data_t cabac_copy; + memcpy(&cabac_copy, &state->cabac, sizeof(cabac_copy)); // Clear bytes and bits and set mode to "count" - state_copy.cabac.only_count = 1; - state_copy.cabac.num_buffered_bytes = 0; - state_copy.cabac.bits_left = 23; + cabac_copy.only_count = 1; + cabac_copy.num_buffered_bytes = 0; + cabac_copy.bits_left = 23; + + // Execute the coding function. + // It is safe to drop the const modifier since state won't be modified + // when cabac.only_count is set. + kvz_encode_coeff_nxn((encoder_state_t*) state, + &cabac_copy, + coeff, + width, + type, + scan_mode, + 0); + + return (23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3); +} - // Execute the coding function - kvz_encode_coeff_nxn(&state_copy, coeff, width, type, scan_mode, 0); - // Store bitcost before restoring cabac - cost = (23-state_copy.cabac.bits_left) + (state_copy.cabac.num_buffered_bytes << 3); +/** + * \brief Estimate bitcost for coding coefficients. + * + * \param coeff coefficient array + * \param width coeff block width + * \param type data type (0 == luma) + * + * \returns number of bits needed to code coefficients + */ +uint32_t kvz_get_coeff_cost(const encoder_state_t * const state, + const coeff_t *coeff, + int32_t width, + int32_t type, + int8_t scan_mode) +{ + if (state->encoder_control->cfg.rdo > 0) { + return get_coeff_cabac_cost(state, coeff, width, type, scan_mode); - return cost; + } else { + return COEFF_SUM_MULTIPLIER * kvz_coeff_abs_sum(coeff, width * width) + 0.5; + } } - #define COEF_REMAIN_BIN_REDUCTION 3 /** Calculates the cost for specific absolute transform level * \param abs_level scaled quantized level @@ -191,7 +229,7 @@ * \returns cost of given absolute transform level * From HM 12.0 */ -int32_t kvz_get_ic_rate(encoder_state_t * const state, +INLINE int32_t kvz_get_ic_rate(encoder_state_t * const state, uint32_t abs_level, uint16_t ctx_num_one, uint16_t ctx_num_abs, @@ -211,14 +249,14 @@ int32_t length; if (symbol < (COEF_REMAIN_BIN_REDUCTION << abs_go_rice)) { length = symbol>>abs_go_rice; - rate += (length+1+abs_go_rice) << CTX_FRAC_BITS; + rate += (length+1+abs_go_rice) * (1 << CTX_FRAC_BITS); } else { length = abs_go_rice; symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << abs_go_rice); while (symbol >= (1<<length)) { symbol -= (1<<(length++)); } - rate += (COEF_REMAIN_BIN_REDUCTION+length+1-abs_go_rice+length) << CTX_FRAC_BITS; + rate += (COEF_REMAIN_BIN_REDUCTION+length+1-abs_go_rice+length) * (1 << CTX_FRAC_BITS); } if (c1_idx < C1FLAG_NUMBER) { rate += CTX_ENTROPY_BITS(&base_one_ctx[ctx_num_one],1); @@ -255,7 +293,7 @@ * This method calculates the best quantized transform level for a given scan position. * From HM 12.0 */ -uint32_t kvz_get_coded_level ( encoder_state_t * const state, double *coded_cost, double *coded_cost0, double *coded_cost_sig, +INLINE uint32_t kvz_get_coded_level ( encoder_state_t * const state, double *coded_cost, double *coded_cost0, double *coded_cost_sig, int32_t level_double, uint32_t max_abs_level, uint16_t ctx_num_sig, uint16_t ctx_num_one, uint16_t ctx_num_abs, uint16_t abs_go_rice, @@ -283,7 +321,7 @@ min_abs_level = ( max_abs_level > 1 ? max_abs_level - 1 : 1 ); for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) { - double err = (double)(level_double - ( abs_level << q_bits ) ); + double err = (double)(level_double - ( abs_level * (1 << q_bits) ) ); double cur_cost = err * err * temp + state->lambda * kvz_get_ic_rate( state, abs_level, ctx_num_one, ctx_num_abs, abs_go_rice, c1_idx, c2_idx, type); @@ -450,8 +488,8 @@ dec_bits -= 4 * CTX_FRAC_ONE_BIT; } - inc_bits = -quant_cost_in_bits + (inc_bits << PRECISION_INC); - dec_bits = quant_cost_in_bits + (dec_bits << PRECISION_INC); + inc_bits = -quant_cost_in_bits + inc_bits * (1 << PRECISION_INC); + dec_bits = quant_cost_in_bits + dec_bits * (1 << PRECISION_INC); if (inc_bits < dec_bits) { current.change = 1; @@ -472,7 +510,7 @@ // Add sign bit, other bits and sig_coeff goes to one. int bits = CTX_FRAC_ONE_BIT + sh_rates->inc[current.pos] + sh_rates->sig_coeff_inc[current.pos]; - current.cost = -llabs(quant_cost_in_bits) + (bits << PRECISION_INC); + current.cost = -llabs(quant_cost_in_bits) + bits * (1 << PRECISION_INC); current.change = 1; if (coeff_scan < first_nz_scan) { @@ -558,10 +596,10 @@ // Explicitly tell the only possible numbers of elements to be zeroed. // Hope the compiler is able to utilize this information. switch (cg_num) { - case 1: memset(sig_coeffgroup_flag, 0, 1 * sizeof(sig_coeffgroup_flag[0])); break; - case 4: memset(sig_coeffgroup_flag, 0, 4 * sizeof(sig_coeffgroup_flag[0])); break; - case 16: memset(sig_coeffgroup_flag, 0, 16 * sizeof(sig_coeffgroup_flag[0])); break; - case 64: memset(sig_coeffgroup_flag, 0, 64 * sizeof(sig_coeffgroup_flag[0])); break; + case 1: FILL_ARRAY(sig_coeffgroup_flag, 0, 1); break; + case 4: FILL_ARRAY(sig_coeffgroup_flag, 0, 4); break; + case 16: FILL_ARRAY(sig_coeffgroup_flag, 0, 16); break; + case 64: FILL_ARRAY(sig_coeffgroup_flag, 0, 64); break; default: assert(0 && "There should be 1, 4, 16 or 64 coefficient groups"); } @@ -658,7 +696,7 @@ } if (encoder->cfg.signhide_enable) { - sh_rates.quant_delta[blkpos] = (level_double - (level << q_bits)) >> (q_bits - 8); + sh_rates.quant_delta[blkpos] = (level_double - level * (1 << q_bits)) >> (q_bits - 8); if (level > 0) { int32_t rate_now = kvz_get_ic_rate(state, level, one_ctx, abs_ctx, go_rice_param, c1_idx, c2_idx, type); int32_t rate_up = kvz_get_ic_rate(state, level + 1, one_ctx, abs_ctx, go_rice_param, c1_idx, c2_idx, type); @@ -845,7 +883,9 @@ * \returns int * Calculates cost of actual motion vectors using CABAC coding */ -uint32_t kvz_get_mvd_coding_cost_cabac(encoder_state_t * const state, vector2d_t *mvd, const cabac_data_t* real_cabac) +uint32_t kvz_get_mvd_coding_cost_cabac(const encoder_state_t *state, + vector2d_t *mvd, + const cabac_data_t* real_cabac) { uint32_t bitcost = 0; const int32_t mvd_hor = mvd->x; @@ -872,13 +912,15 @@ } if (hor_abs_gr0) { if (mvd_hor_abs > 1) { - kvz_cabac_write_ep_ex_golomb(state, cabac, mvd_hor_abs - 2, 1); + // It is safe to drop const here because cabac->only_count is set. + kvz_cabac_write_ep_ex_golomb((encoder_state_t*)state, cabac, mvd_hor_abs - 2, 1); } CABAC_BIN_EP(cabac, (mvd_hor > 0) ? 0 : 1, "mvd_sign_flag_hor"); } if (ver_abs_gr0) { if (mvd_ver_abs > 1) { - kvz_cabac_write_ep_ex_golomb(state, cabac, mvd_ver_abs - 2, 1); + // It is safe to drop const here because cabac->only_count is set. + kvz_cabac_write_ep_ex_golomb((encoder_state_t*)state, cabac, mvd_ver_abs - 2, 1); } CABAC_BIN_EP(cabac, (mvd_ver > 0) ? 0 : 1, "mvd_sign_flag_ver"); } @@ -891,10 +933,16 @@ * \returns int * Calculates Motion Vector cost and related costs using CABAC coding */ -int kvz_calc_mvd_cost_cabac(encoder_state_t * const state, int x, int y, int mv_shift, - int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], - int16_t num_cand, int32_t ref_idx, uint32_t *bitcost) { - +uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state, + int x, + int y, + int mv_shift, + int16_t mv_cand[2][2], + inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], + int16_t num_cand, + int32_t ref_idx, + uint32_t *bitcost) +{ cabac_data_t state_cabac_copy; cabac_data_t* cabac; uint32_t merge_idx; @@ -903,15 +951,18 @@ int8_t merged = 0; int8_t cur_mv_cand = 0; - x <<= mv_shift; - y <<= mv_shift; + x *= 1 << mv_shift; + y *= 1 << mv_shift; // Check every candidate to find a match for (merge_idx = 0; merge_idx < (uint32_t)num_cand; merge_idx++) { if (merge_cand[merge_idx].dir == 3) continue; if (merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == x && merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == y && - merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] == ref_idx) { + state->frame->ref_LX[merge_cand[merge_idx].dir - 1][ + merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] + ] == ref_idx) + { merged = 1; break; } @@ -1030,7 +1081,8 @@ if (hor_abs_gr0) { if (mvd_hor_abs > 1) { - kvz_cabac_write_ep_ex_golomb(state, cabac, mvd_hor_abs - 2, 1); + // It is safe to drop const because cabac->only_count is set. + kvz_cabac_write_ep_ex_golomb((encoder_state_t*)state, cabac, mvd_hor_abs - 2, 1); } CABAC_BIN_EP(cabac, (mvd_hor > 0) ? 0 : 1, "mvd_sign_flag_hor"); @@ -1038,7 +1090,8 @@ if (ver_abs_gr0) { if (mvd_ver_abs > 1) { - kvz_cabac_write_ep_ex_golomb(state, cabac, mvd_ver_abs - 2, 1); + // It is safe to drop const because cabac->only_count is set. + kvz_cabac_write_ep_ex_golomb((encoder_state_t*)state, cabac, mvd_ver_abs - 2, 1); } CABAC_BIN_EP(cabac, (mvd_ver > 0) ? 0 : 1, "mvd_sign_flag_ver"); @@ -1056,5 +1109,5 @@ *bitcost = (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3); // Store bitcost before restoring cabac - return *bitcost * (int32_t)(state->lambda_sqrt + 0.5); + return *bitcost * (uint32_t)(state->lambda_sqrt + 0.5); }
View file
kvazaar-1.1.0.tar.gz/src/rdo.h -> kvazaar-1.2.0.tar.gz/src/rdo.h
Changed
@@ -39,7 +39,11 @@ void kvz_rdoq(encoder_state_t *state, coeff_t *coef, coeff_t *dest_coeff, int32_t width, int32_t height, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth); -int32_t kvz_get_coeff_cost(const encoder_state_t *state, coeff_t *coeff, int32_t width, int32_t type, int8_t scan_mode); +uint32_t kvz_get_coeff_cost(const encoder_state_t *state, + const coeff_t *coeff, + int32_t width, + int32_t type, + int8_t scan_mode); int32_t kvz_get_ic_rate(encoder_state_t *state, uint32_t abs_level, uint16_t ctx_num_one, uint16_t ctx_num_abs, uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type); @@ -52,7 +56,9 @@ kvz_mvd_cost_func kvz_calc_mvd_cost_cabac; -uint32_t kvz_get_mvd_coding_cost_cabac(encoder_state_t * const state, vector2d_t *mvd, const cabac_data_t* cabac); +uint32_t kvz_get_mvd_coding_cost_cabac(const encoder_state_t *state, + vector2d_t *mvd, + const cabac_data_t* cabac); // Number of fixed point fractional bits used in the fractional bit table. #define CTX_FRAC_BITS 15
View file
kvazaar-1.1.0.tar.gz/src/sao.c -> kvazaar-1.2.0.tar.gz/src/sao.c
Changed
@@ -262,182 +262,81 @@ /** - * \brief Calculate dimensions of the buffer used by sao reconstruction. - - * \param pic Picture. - * \param sao Sao parameters. - * \param rec Top-left corner of the LCU + * \brief Reconstruct SAO. + * + * \param encoder encoder state + * \param buffer Buffer containing the deblocked input pixels. The + * area to filter starts at index 0. + * \param stride stride of buffer + * \param frame_x x-coordinate of the top-left corner in pixels + * \param frame_y y-coordinate of the top-left corner in pixels + * \param width width of the area to filter + * \param height height of the area to filter + * \param sao SAO information + * \param color color plane index */ -static void sao_calc_band_block_dims(const videoframe_t *frame, color_t color_i, - vector2d_t *rec, vector2d_t *block) +void kvz_sao_reconstruct(const encoder_state_t *state, + const kvz_pixel *buffer, + int stride, + int frame_x, + int frame_y, + int width, + int height, + const sao_info_t *sao, + color_t color) { - const int is_chroma = (color_i != COLOR_Y ? 1 : 0); - int width = frame->width >> is_chroma; - int height = frame->height >> is_chroma; - int block_width = LCU_WIDTH >> is_chroma; - + const encoder_control_t *const ctrl = state->encoder_control; + videoframe_t *const frame = state->tile->frame; + const int shift = color == COLOR_Y ? 0 : 1; - // Handle right and bottom, taking care of non-LCU sized CUs. - if (rec->y + block_width >= height) { - if (rec->y + block_width >= height) { - block->y = height - rec->y; - } - } - if (rec->x + block_width >= width) { - if (rec->x + block_width > width) { - block->x = width - rec->x; - } - } + const int frame_width = frame->width >> shift; + const int frame_height = frame->height >> shift; + const int frame_stride = frame->rec->stride >> shift; + kvz_pixel *output = &frame->rec->data[color][frame_x + frame_y * frame_stride]; - rec->x = 0; rec->y = 0; -} + if (sao->type == SAO_TYPE_EDGE) { + const vector2d_t *offset = g_sao_edge_offsets[sao->eo_class]; -/** - * \brief Calculate dimensions of the buffer used by sao reconstruction. - * - * This function calculates 4 vectors that can be used to make the temporary - * buffers required by sao_reconstruct_color. - * - * Vector block is the area affected by sao. Vectors tr and br are top-left - * margin and bottom-right margin, which contain pixels that are not modified - * by the reconstruction of this LCU but are needed by the reconstruction. - * Vector rec is the offset from the CU to the required pixel area. - * - * The margins are always either 0 or 1, depending on the direction of the - * edge offset class. - * - * This also takes into account borders of the picture and non-LCU sized - * CU's at the bottom and right of the picture. - * - * \ CU + rec - * +------+ - * |\ tl | - * | +--+ | - * | |\ block - * | | \| | - * | +--+ | - * | \ br - * +------+ - * - * \param pic Picture. - * \param sao Sao parameters. - * \param rec Top-left corner of the LCU, modified to be top-left corner of - */ -static void sao_calc_edge_block_dims(const videoframe_t * const frame, color_t color_i, - const sao_info_t *sao, vector2d_t *rec, - vector2d_t *tl, vector2d_t *br, - vector2d_t *block) -{ - vector2d_t a_ofs = g_sao_edge_offsets[sao->eo_class][0]; - vector2d_t b_ofs = g_sao_edge_offsets[sao->eo_class][1]; - const int is_chroma = (color_i != COLOR_Y ? 1 : 0); - int width = frame->width >> is_chroma; - int height = frame->height >> is_chroma; - int block_width = LCU_WIDTH >> is_chroma; - - // Handle top and left. - if (rec->y == 0) { - tl->y = 0; - if (a_ofs.y == -1 || b_ofs.y == -1) { - block->y -= 1; - tl->y += 1; + if (frame_x + width + offset[0].x > frame_width || + frame_x + width + offset[1].x > frame_width) + { + // Nothing to do for the rightmost column. + width -= 1; } - } - if (rec->x == 0) { - tl->x = 0; - if (a_ofs.x == -1 || b_ofs.x == -1) { - block->x -= 1; - tl->x += 1; + if (frame_x + offset[0].x < 0 || frame_x + offset[1].x < 0) { + // Nothing to do for the leftmost column. + buffer += 1; + output += 1; + width -= 1; } - } - - // Handle right and bottom, taking care of non-LCU sized CUs. - if (rec->y + block_width >= height) { - br->y = 0; - block->y -= block_width + rec->y - height; - if (a_ofs.y == 1 || b_ofs.y == 1) { - block->y -= 1; - br->y += 1; + if (frame_y + height + offset[0].y > frame_height || + frame_y + height + offset[1].y > frame_height) + { + // Nothing to do for the bottommost row. + height -= 1; } - } - if (rec->x + block_width >= width) { - br->x = 0; - block->x -= block_width + rec->x - width; - if (a_ofs.x == 1 || b_ofs.x == 1) { - block->x -= 1; - br->x += 1; + if (frame_y + offset[0].y < 0 || frame_y + offset[1].y < 0) { + // Nothing to do for the topmost row. + buffer += stride; + output += frame_stride; + height -= 1; } } - rec->y = (rec->y == 0 ? 0 : -1); - rec->x = (rec->x == 0 ? 0 : -1); -} - -void kvz_sao_reconstruct(const encoder_control_t * const encoder, videoframe_t * frame, const kvz_pixel *old_rec, - unsigned x_ctb, unsigned y_ctb, - const sao_info_t *sao, color_t color_i) -{ - const int is_chroma = (color_i != COLOR_Y ? 1 : 0); - const int pic_stride = frame->width >> is_chroma; - const int lcu_stride = LCU_WIDTH >> is_chroma; - const int buf_stride = lcu_stride + 2; - - kvz_pixel *recdata = frame->rec->data[color_i]; - kvz_pixel buf_rec[(LCU_WIDTH + 2) * (LCU_WIDTH + 2)]; - kvz_pixel new_rec[LCU_WIDTH * LCU_WIDTH]; - // Calling CU_TO_PIXEL with depth 1 is the same as using block size of 32. - kvz_pixel *lcu_rec = &recdata[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, frame->rec->stride>>is_chroma)]; - const kvz_pixel *old_lcu_rec = &old_rec[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, pic_stride)]; - - vector2d_t ofs; - vector2d_t tl = { 1, 1 }; - vector2d_t br = { 1, 1 }; - vector2d_t block; - - if (sao->type == SAO_TYPE_NONE) { - return; - } - - ofs.x = x_ctb * lcu_stride; - ofs.y = y_ctb * lcu_stride; - block.x = lcu_stride; - block.y = lcu_stride; - if (sao->type == SAO_TYPE_BAND) { - tl.x = 0; tl.y = 0; - br.x = 0; br.y = 0; - sao_calc_band_block_dims(frame, color_i, &ofs, &block); - } - else { - sao_calc_edge_block_dims(frame, color_i, sao, &ofs, &tl, &br, &block); + if (sao->type != SAO_TYPE_NONE) { + kvz_sao_reconstruct_color(ctrl, + buffer, + output, + sao, + stride, + frame_stride, + width, + height, + color); } - - assert(ofs.x + tl.x + block.x + br.x <= frame->width); - assert(ofs.y + tl.y + block.y + br.y <= frame->height); - - CHECKPOINT("ofs.x=%d ofs.y=%d tl.x=%d tl.y=%d block.x=%d block.y=%d br.x=%d br.y=%d", - ofs.x, ofs.y, tl.x, tl.y, block.x, block.y, br.x, br.y); - - // Data to tmp buffer. - kvz_pixels_blit(&old_lcu_rec[ofs.y * pic_stride + ofs.x], - buf_rec, - tl.x + block.x + br.x, - tl.y + block.y + br.y, - pic_stride, buf_stride); - - kvz_sao_reconstruct_color(encoder, &buf_rec[tl.y * buf_stride + tl.x], - &new_rec[(ofs.y + tl.y) * lcu_stride + ofs.x + tl.x], - sao, - buf_stride, lcu_stride, - block.x, block.y, color_i); - - // Copy reconstructed block from tmp buffer to rec image. - kvz_pixels_blit(&new_rec[(tl.y + ofs.y) * lcu_stride + (tl.x + ofs.x)], - &lcu_rec[(tl.y + ofs.y) * (frame->rec->stride >> is_chroma) + (tl.x + ofs.x)], - block.x, block.y, lcu_stride, frame->rec->stride >> is_chroma); } - static void sao_search_edge_sao(const encoder_state_t * const state, const kvz_pixel * data[], const kvz_pixel * recdata[], int block_width, int block_height, @@ -584,10 +483,8 @@ band_sao.offsets[5] = 0; band_sao.eo_class = SAO_EO0; - sao_search_edge_sao(state, data, recdata, block_width, block_height, buf_cnt, &edge_sao, sao_top, sao_left); - sao_search_band_sao(state, data, recdata, block_width, block_height, buf_cnt, &band_sao, sao_top, sao_left); - - { + if (state->encoder_control->cfg.sao_type & 1){ + sao_search_edge_sao(state, data, recdata, block_width, block_height, buf_cnt, &edge_sao, sao_top, sao_left); float mode_bits = sao_mode_bits_edge(state, edge_sao.eo_class, edge_sao.offsets, sao_top, sao_left, buf_cnt); int ddistortion = (int)(mode_bits * state->lambda + 0.5); unsigned buf_i; @@ -600,8 +497,12 @@ edge_sao.ddistortion = ddistortion; } + else{ + edge_sao.ddistortion = INT_MAX; + } - { + if (state->encoder_control->cfg.sao_type & 2){ + sao_search_band_sao(state, data, recdata, block_width, block_height, buf_cnt, &band_sao, sao_top, sao_left); float mode_bits = sao_mode_bits_band(state, band_sao.band_position, band_sao.offsets, sao_top, sao_left, buf_cnt); int ddistortion = (int)(mode_bits * state->lambda + 0.5); unsigned buf_i; @@ -614,6 +515,9 @@ band_sao.ddistortion = ddistortion; } + else{ + band_sao.ddistortion = INT_MAX; + } if (edge_sao.ddistortion <= band_sao.ddistortion) { *sao_out = edge_sao; @@ -749,7 +653,8 @@ int32_t merge_cost_chroma[3] = { INT32_MAX }; sao_info_t *sao_luma = &frame->sao_luma[lcu_y * stride + lcu_x]; sao_info_t *sao_chroma = NULL; - if (state->encoder_control->chroma_format != KVZ_CSP_400) { + int enable_chroma = state->encoder_control->chroma_format != KVZ_CSP_400; + if (enable_chroma) { sao_chroma = &frame->sao_chroma[lcu_y * stride + lcu_x]; } @@ -758,13 +663,13 @@ sao_info_t *sao_left_luma = lcu_x != 0 ? &frame->sao_luma [lcu_y * stride + lcu_x - 1] : NULL; sao_info_t *sao_top_chroma = NULL; sao_info_t *sao_left_chroma = NULL; - if (state->encoder_control->chroma_format != KVZ_CSP_400) { + if (enable_chroma) { if (lcu_y != 0) sao_top_chroma = &frame->sao_chroma[(lcu_y - 1) * stride + lcu_x]; if (lcu_x != 0) sao_left_chroma = &frame->sao_chroma[lcu_y * stride + lcu_x - 1]; } sao_search_luma(state, frame, lcu_x, lcu_y, sao_luma, sao_top_luma, sao_left_luma, merge_cost_luma); - if (state->encoder_control->chroma_format != KVZ_CSP_400) { + if (enable_chroma) { sao_search_chroma(state, frame, lcu_x, lcu_y, sao_chroma, sao_top_chroma, sao_left_chroma, merge_cost_chroma); } else { merge_cost_chroma[0] = 0; @@ -803,46 +708,3 @@ CHECKPOINT_SAO_INFO("sao_chroma", *sao_chroma); } } - -void kvz_sao_reconstruct_frame(encoder_state_t * const state) -{ - vector2d_t lcu; - videoframe_t * const frame = state->tile->frame; - - // These are needed because SAO needs the pre-SAO pixels form left and - // top LCUs. Single pixel wide buffers, like what kvz_search_lcu takes, would - // be enough though. - kvz_pixel *new_y_data = MALLOC(kvz_pixel, frame->rec->width * frame->rec->height); - kvz_pixels_blit(frame->rec->y, new_y_data, frame->rec->width, frame->rec->height, frame->rec->stride, frame->rec->width); - for (lcu.y = 0; lcu.y < frame->height_in_lcu; lcu.y++) { - for (lcu.x = 0; lcu.x < frame->width_in_lcu; lcu.x++) { - unsigned stride = frame->width_in_lcu; - sao_info_t *sao_luma = &frame->sao_luma[lcu.y * stride + lcu.x]; - - // sao_do_rdo(encoder, lcu.x, lcu.y, sao_luma, sao_chroma); - kvz_sao_reconstruct(state->encoder_control, frame, new_y_data, lcu.x, lcu.y, sao_luma, COLOR_Y); - } - } - free(new_y_data); - - if (state->encoder_control->chroma_format != KVZ_CSP_400) { - kvz_pixel *new_u_data = MALLOC(kvz_pixel, (frame->rec->width * frame->rec->height) >> 2); - kvz_pixel *new_v_data = MALLOC(kvz_pixel, (frame->rec->width * frame->rec->height) >> 2); - - kvz_pixels_blit(frame->rec->u, new_u_data, frame->rec->width / 2, frame->rec->height / 2, frame->rec->stride / 2, frame->rec->width / 2); - kvz_pixels_blit(frame->rec->v, new_v_data, frame->rec->width / 2, frame->rec->height / 2, frame->rec->stride / 2, frame->rec->width / 2); - - for (lcu.y = 0; lcu.y < frame->height_in_lcu; lcu.y++) { - for (lcu.x = 0; lcu.x < frame->width_in_lcu; lcu.x++) { - unsigned stride = frame->width_in_lcu; - sao_info_t *sao_chroma = &frame->sao_chroma[lcu.y * stride + lcu.x]; - - kvz_sao_reconstruct(state->encoder_control, frame, new_u_data, lcu.x, lcu.y, sao_chroma, COLOR_U); - kvz_sao_reconstruct(state->encoder_control, frame, new_v_data, lcu.x, lcu.y, sao_chroma, COLOR_V); - } - } - - free(new_u_data); - free(new_v_data); - } -}
View file
kvazaar-1.1.0.tar.gz/src/sao.h -> kvazaar-1.2.0.tar.gz/src/sao.h
Changed
@@ -72,10 +72,16 @@ (sao).offsets[0], (sao).offsets[1], (sao).offsets[2], (sao).offsets[3], (sao).offsets[4]) -void kvz_sao_reconstruct(const encoder_control_t * encoder, videoframe_t *frame, const kvz_pixel *old_rec, - unsigned x_ctb, unsigned y_ctb, - const sao_info_t *sao, color_t color_i); -void kvz_sao_reconstruct_frame(encoder_state_t *state); +void kvz_sao_reconstruct(const encoder_state_t *state, + const kvz_pixel *buffer, + int stride, + int frame_x, + int frame_y, + int width, + int height, + const sao_info_t *sao, + color_t color); + void kvz_sao_search_lcu(const encoder_state_t* const state, int lcu_x, int lcu_y); void kvz_calc_sao_offset_array(const encoder_control_t * const encoder, const sao_info_t *sao, int *offset, color_t color_i);
View file
kvazaar-1.1.0.tar.gz/src/search.c -> kvazaar-1.2.0.tar.gz/src/search.c
Changed
@@ -36,6 +36,7 @@ #include "transform.h" #include "videoframe.h" #include "strategies/strategies-picture.h" +#include "strategies/strategies-quant.h" #define IN_FRAME(x, y, width, height, block_width, block_height) \ @@ -43,11 +44,8 @@ && (x) + (block_width) <= (width) \ && (y) + (block_height) <= (height)) -// Cost treshold for doing intra search in inter frames with --rd=0. -#ifndef INTRA_TRESHOLD -# define INTRA_TRESHOLD 20 -#endif - +// Cost threshold for doing intra search in inter frames with --rd=0. +static const int INTRA_THRESHOLD = 8; // Modify weight of luma SSD. #ifndef LUMA_MULT @@ -58,216 +56,133 @@ # define CHROMA_MULT 1.5 #endif - -/** - * Copy all non-reference CU data from depth+1 to depth. - */ -static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1]) +static INLINE void copy_cu_info(int x_local, int y_local, int width, lcu_t *from, lcu_t *to) { - assert(depth >= 0 && depth < MAX_PU_DEPTH); - - // Copy non-reference CUs. - { - const int x_orig = SUB_SCU(x_px); - const int y_orig = SUB_SCU(y_px); - const int width_cu = LCU_WIDTH >> depth; - for (int y = y_orig; y < y_orig + width_cu; y += SCU_WIDTH) { - for (int x = x_orig; x < x_orig + width_cu; x += SCU_WIDTH) { - const cu_info_t *from_cu = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x, y); - cu_info_t *to_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x, y); - memcpy(to_cu, from_cu, sizeof(*to_cu)); - } - } - } - - // Copy reconstructed pixels. - { - const int x = SUB_SCU(x_px); - const int y = SUB_SCU(y_px); - const int width_px = LCU_WIDTH >> depth; - const int luma_index = x + y * LCU_WIDTH; - const int chroma_index = (x / 2) + (y / 2) * (LCU_WIDTH / 2); - - const lcu_yuv_t *from = &work_tree[depth + 1].rec; - lcu_yuv_t *to = &work_tree[depth].rec; - - const lcu_coeff_t *from_coeff = &work_tree[depth + 1].coeff; - lcu_coeff_t *to_coeff = &work_tree[depth].coeff; - - kvz_pixels_blit(&from->y[luma_index], &to->y[luma_index], - width_px, width_px, LCU_WIDTH, LCU_WIDTH); - if (from->chroma_format != KVZ_CSP_400) { - kvz_pixels_blit(&from->u[chroma_index], &to->u[chroma_index], - width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); - kvz_pixels_blit(&from->v[chroma_index], &to->v[chroma_index], - width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); - } - - // Copy coefficients up. They do not have to be copied down because they - // are not used for the search. - kvz_coefficients_blit(&from_coeff->y[luma_index], &to_coeff->y[luma_index], - width_px, width_px, LCU_WIDTH, LCU_WIDTH); - if (from->chroma_format != KVZ_CSP_400) { - kvz_coefficients_blit(&from_coeff->u[chroma_index], &to_coeff->u[chroma_index], - width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); - kvz_coefficients_blit(&from_coeff->v[chroma_index], &to_coeff->v[chroma_index], - width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); + for (int y = y_local; y < y_local + width; y += SCU_WIDTH) { + for (int x = x_local; x < x_local + width; x += SCU_WIDTH) { + *LCU_GET_CU_AT_PX(to, x, y) = *LCU_GET_CU_AT_PX(from, x, y); } } } - -/** - * Copy all non-reference CU data from depth to depth+1..MAX_PU_DEPTH. - */ -static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1]) +static INLINE void copy_cu_pixels(int x_local, int y_local, int width, lcu_t *from, lcu_t *to) { - assert(depth >= 0 && depth < MAX_PU_DEPTH); - - // TODO: clean up to remove the copy pasta - const int width_px = LCU_WIDTH >> depth; + const int luma_index = x_local + y_local * LCU_WIDTH; + const int chroma_index = (x_local / 2) + (y_local / 2) * (LCU_WIDTH / 2); - int d; - - for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) { - const int x_orig = SUB_SCU(x_px); - const int y_orig = SUB_SCU(y_px); - - for (int y = y_orig; y < y_orig + width_px; y += SCU_WIDTH) { - for (int x = x_orig; x < x_orig + width_px; x += SCU_WIDTH) { - const cu_info_t *from_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x, y); - cu_info_t *to_cu = LCU_GET_CU_AT_PX(&work_tree[d], x, y); - memcpy(to_cu, from_cu, sizeof(*to_cu)); - } - } + kvz_pixels_blit(&from->rec.y[luma_index], &to->rec.y[luma_index], + width, width, LCU_WIDTH, LCU_WIDTH); + if (from->rec.chroma_format != KVZ_CSP_400) { + kvz_pixels_blit(&from->rec.u[chroma_index], &to->rec.u[chroma_index], + width / 2, width / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); + kvz_pixels_blit(&from->rec.v[chroma_index], &to->rec.v[chroma_index], + width / 2, width / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); } +} - // Copy reconstructed pixels. - for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) { - const int x = SUB_SCU(x_px); - const int y = SUB_SCU(y_px); - - const int luma_index = x + y * LCU_WIDTH; - const int chroma_index = (x / 2) + (y / 2) * (LCU_WIDTH / 2); - - lcu_yuv_t *from = &work_tree[depth].rec; - lcu_yuv_t *to = &work_tree[d].rec; +static INLINE void copy_cu_coeffs(int x_local, int y_local, int width, lcu_t *from, lcu_t *to) +{ + const int luma_z = xy_to_zorder(LCU_WIDTH, x_local, y_local); + copy_coeffs(&from->coeff.y[luma_z], &to->coeff.y[luma_z], width); - kvz_pixels_blit(&from->y[luma_index], &to->y[luma_index], - width_px, width_px, LCU_WIDTH, LCU_WIDTH); - if (from->chroma_format != KVZ_CSP_400) { - kvz_pixels_blit(&from->u[chroma_index], &to->u[chroma_index], - width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); - kvz_pixels_blit(&from->v[chroma_index], &to->v[chroma_index], - width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); - } + if (from->rec.chroma_format != KVZ_CSP_400) { + const int chroma_z = xy_to_zorder(LCU_WIDTH_C, x_local >> 1, y_local >> 1); + copy_coeffs(&from->coeff.u[chroma_z], &to->coeff.u[chroma_z], width >> 1); + copy_coeffs(&from->coeff.v[chroma_z], &to->coeff.v[chroma_z], width >> 1); } } - -void kvz_lcu_set_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth) +/** + * Copy all non-reference CU data from next level to current level. + */ +static void work_tree_copy_up(int x_local, int y_local, int depth, lcu_t *work_tree) { const int width = LCU_WIDTH >> depth; - const vector2d_t lcu_cu = { SUB_SCU(x_px), SUB_SCU(y_px) }; - - // Depth 4 doesn't go inside the loop. Set the top-left CU. - LCU_GET_CU_AT_PX(lcu, lcu_cu.x, lcu_cu.y)->tr_depth = tr_depth; - - for (unsigned y = 0; y < width; y += SCU_WIDTH) { - for (unsigned x = 0; x < width; x += SCU_WIDTH) { - cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, lcu_cu.x + x, lcu_cu.y + y); - cu->tr_depth = tr_depth; - } - } + copy_cu_info (x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]); + copy_cu_pixels(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]); + copy_cu_coeffs(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]); } -static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pred_mode, int chroma_mode, int part_mode) +/** + * Copy all non-reference CU data from current level to all lower levels. + */ +static void work_tree_copy_down(int x_local, int y_local, int depth, lcu_t *work_tree) { const int width = LCU_WIDTH >> depth; - const int x_cu = SUB_SCU(x_px); - const int y_cu = SUB_SCU(y_px); - - if (part_mode == SIZE_NxN) { - assert(depth == MAX_DEPTH + 1); - assert(width == SCU_WIDTH); + for (int i = depth + 1; i <= MAX_PU_DEPTH; i++) { + copy_cu_info (x_local, y_local, width, &work_tree[depth], &work_tree[i]); + copy_cu_pixels(x_local, y_local, width, &work_tree[depth], &work_tree[i]); } +} - if (depth > MAX_DEPTH) { - depth = MAX_DEPTH; - assert(part_mode == SIZE_NxN); - } +void kvz_lcu_set_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth) +{ + const int x_local = SUB_SCU(x_px); + const int y_local = SUB_SCU(y_px); + const int width = LCU_WIDTH >> depth; - // Set mode in every CU covered by part_mode in this depth. - for (int y = y_cu; y < y_cu + width; y += SCU_WIDTH) { - for (int x = x_cu; x < x_cu + width; x += SCU_WIDTH) { - cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x, y); - cu->depth = depth; - cu->type = CU_INTRA; - cu->intra.mode = pred_mode; - cu->intra.mode_chroma = chroma_mode; - cu->part_size = part_mode; + for (unsigned y = 0; y < width; y += SCU_WIDTH) { + for (unsigned x = 0; x < width; x += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, x_local + x, y_local + y)->tr_depth = tr_depth; } } } - -static void lcu_set_inter_pu(lcu_t *lcu, int x_px, int y_px, int width, int height, cu_info_t *cur_pu) +static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, int height, cu_info_t *cu) { // Set mode in every CU covered by part_mode in this depth. - for (int y = y_px; y < y_px + height; y += SCU_WIDTH) { - for (int x = x_px; x < x_px + width; x += SCU_WIDTH) { - cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x, y); - //Check if this could be moved inside the if - if (cu != cur_pu) { - cu->depth = cur_pu->depth; - cu->part_size = cur_pu->part_size; - cu->type = CU_INTER; - cu->tr_depth = cur_pu->tr_depth; - cu->merged = cur_pu->merged; - cu->skipped = cur_pu->skipped; - memcpy(&cu->inter, &cur_pu->inter, sizeof(cur_pu->inter)); + for (int y = y_local; y < y_local + height; y += SCU_WIDTH) { + for (int x = x_local; x < x_local + width; x += SCU_WIDTH) { + cu_info_t *to = LCU_GET_CU_AT_PX(lcu, x, y); + to->type = cu->type; + to->depth = cu->depth; + to->part_size = cu->part_size; + + if (cu->type == CU_INTRA) { + to->intra.mode = cu->intra.mode; + to->intra.mode_chroma = cu->intra.mode_chroma; + } else { + to->skipped = cu->skipped; + to->merged = cu->merged; + to->merge_idx = cu->merge_idx; + to->inter = cu->inter; } } } } - -static void lcu_set_inter(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t *cur_cu) +static void lcu_set_inter(lcu_t *lcu, int x_local, int y_local, int cu_width) { - const int width = LCU_WIDTH >> depth; - const int x_local = SUB_SCU(x_px); - const int y_local = SUB_SCU(y_px); - const int num_pu = kvz_part_mode_num_parts[cur_cu->part_size]; + const part_mode_t part_mode = LCU_GET_CU_AT_PX(lcu, x_local, y_local)->part_size; + const int num_pu = kvz_part_mode_num_parts[part_mode]; for (int i = 0; i < num_pu; ++i) { - const int x_pu = PU_GET_X(cur_cu->part_size, width, x_local, i); - const int y_pu = PU_GET_Y(cur_cu->part_size, width, y_local, i); - const int width_pu = PU_GET_W(cur_cu->part_size, width, i); - const int height_pu = PU_GET_H(cur_cu->part_size, width, i); - cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu); - lcu_set_inter_pu(lcu, x_pu, y_pu, width_pu, height_pu, cur_pu); + const int x_pu = PU_GET_X(part_mode, cu_width, x_local, i); + const int y_pu = PU_GET_Y(part_mode, cu_width, y_local, i); + const int width_pu = PU_GET_W(part_mode, cu_width, i); + const int height_pu = PU_GET_H(part_mode, cu_width, i); + + cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu); + pu->type = CU_INTER; + lcu_fill_cu_info(lcu, x_pu, y_pu, width_pu, height_pu, pu); } } - -static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t *cur_cu) +static void lcu_set_coeff(lcu_t *lcu, int x_local, int y_local, int width, cu_info_t *cur_cu) { - const uint32_t width = LCU_WIDTH >> depth; - const uint32_t x_local = SUB_SCU(x_px); - const uint32_t y_local = SUB_SCU(y_px); - const uint32_t tr_split = cur_cu->tr_depth-cur_cu->depth; + const uint32_t tr_split = cur_cu->tr_depth - cur_cu->depth; const uint32_t mask = ~((width >> tr_split)-1); // Set coeff flags in every CU covered by part_mode in this depth. for (uint32_t y = y_local; y < y_local + width; y += SCU_WIDTH) { for (uint32_t x = x_local; x < x_local + width; x += SCU_WIDTH) { - cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x, y); // Use TU top-left CU to propagate coeff flags cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & mask, y & mask); - if (cu != cu_from) { + cu_info_t *cu_to = LCU_GET_CU_AT_PX(lcu, x, y); + if (cu_from != cu_to) { // Chroma coeff data is not used, luma is needed for deblocking - cbf_copy(&cu->cbf, cu_from->cbf, COLOR_Y); + cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_Y); } } } @@ -344,12 +259,10 @@ } { - coeff_t coeff_temp[32 * 32]; int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); + const coeff_t *coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; - // Code coeffs using cabac to get a better estimate of real coding costs. - kvz_coefficients_blit(&lcu->coeff.y[(y_px*LCU_WIDTH) + x_px], coeff_temp, width, width, LCU_WIDTH, width); - coeff_bits += kvz_get_coeff_cost(state, coeff_temp, width, 0, luma_scan_mode); + coeff_bits += kvz_get_coeff_cost(state, coeffs, width, 0, luma_scan_mode); } double bits = tr_tree_bits + coeff_bits; @@ -415,16 +328,11 @@ } { - coeff_t coeff_temp[16 * 16]; int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth); - - kvz_coefficients_blit(&lcu->coeff.u[(lcu_px.y*(LCU_WIDTH_C)) + lcu_px.x], - coeff_temp, width, width, LCU_WIDTH_C, width); - coeff_bits += kvz_get_coeff_cost(state, coeff_temp, width, 2, scan_order); + const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); - kvz_coefficients_blit(&lcu->coeff.v[(lcu_px.y*(LCU_WIDTH_C)) + lcu_px.x], - coeff_temp, width, width, LCU_WIDTH_C, width); - coeff_bits += kvz_get_coeff_cost(state, coeff_temp, width, 2, scan_order); + coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order); + coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], width, 2, scan_order); } double bits = tr_tree_bits + coeff_bits; @@ -478,7 +386,7 @@ * - All the final data for the LCU gets eventually copied to depth 0, which * will be the final output of the recursion. */ -static double search_cu(encoder_state_t * const state, int x, int y, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1]) +static double search_cu(encoder_state_t * const state, int x, int y, int depth, lcu_t *work_tree) { const encoder_control_t* ctrl = state->encoder_control; const videoframe_t * const frame = state->tile->frame; @@ -491,10 +399,6 @@ int x_local = SUB_SCU(x); int y_local = SUB_SCU(y); -#ifdef KVZ_DEBUG - int debug_split = 0; -#endif - PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHCU); // Stop recursion if the CU is completely outside the frame. if (x >= frame->width || y >= frame->height) { @@ -502,21 +406,27 @@ return 0; } - cur_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x_local, y_local); + cur_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); // Assign correct depth cur_cu->depth = depth > MAX_DEPTH ? MAX_DEPTH : depth; cur_cu->tr_depth = depth > 0 ? depth : 1; cur_cu->type = CU_NOTSET; cur_cu->part_size = SIZE_2Nx2N; + // If the CU is completely inside the frame at this depth, search for // prediction modes at this depth. if (x + cu_width <= frame->width && y + cu_width <= frame->height) { - - bool can_use_inter = - state->frame->slicetype != KVZ_SLICE_I - && WITHIN(depth, ctrl->cfg.pu_depth_inter.min, ctrl->cfg.pu_depth_inter.max); + int cu_width_inter_min = LCU_WIDTH >> ctrl->cfg.pu_depth_inter.max; + bool can_use_inter = state->frame->slicetype != KVZ_SLICE_I && ( + WITHIN(depth, ctrl->cfg.pu_depth_inter.min, ctrl->cfg.pu_depth_inter.max) || + // When the split was forced because the CTU is partially outside the + // frame, we permit inter coding even if pu_depth_inter would + // otherwise forbid it. + (x & ~(cu_width_inter_min - 1)) + cu_width_inter_min > frame->width || + (y & ~(cu_width_inter_min - 1)) + cu_width_inter_min > frame->height + ); if (can_use_inter) { double mode_cost; @@ -524,7 +434,7 @@ kvz_search_cu_inter(state, x, y, depth, - &work_tree[depth], + lcu, &mode_cost, &mode_bitcost); if (mode_cost < cost) { cost = mode_cost; @@ -555,7 +465,7 @@ cost = mode_cost; inter_bitcost = mode_bitcost; // TODO: only copy inter prediction info, not pixels - work_tree_copy_up(x, y, depth, work_tree); + work_tree_copy_up(x_local, y_local, depth, work_tree); } } } @@ -565,13 +475,21 @@ // decision after reconstructing the inter frame. bool skip_intra = state->encoder_control->cfg.rdo == 0 && cur_cu->type != CU_NOTSET - && cost / (cu_width * cu_width) < INTRA_TRESHOLD; - if (!skip_intra - && WITHIN(depth, ctrl->cfg.pu_depth_intra.min, ctrl->cfg.pu_depth_intra.max)) - { + && cost / (cu_width * cu_width) < INTRA_THRESHOLD; + + int32_t cu_width_intra_min = LCU_WIDTH >> ctrl->cfg.pu_depth_intra.max; + bool can_use_intra = + WITHIN(depth, ctrl->cfg.pu_depth_intra.min, ctrl->cfg.pu_depth_intra.max) || + // When the split was forced because the CTU is partially outside + // the frame, we permit intra coding even if pu_depth_intra would + // otherwise forbid it. + (x & ~(cu_width_intra_min - 1)) + cu_width_intra_min > frame->width || + (y & ~(cu_width_intra_min - 1)) + cu_width_intra_min > frame->height; + + if (can_use_intra && !skip_intra) { int8_t intra_mode; double intra_cost; - kvz_search_cu_intra(state, x, y, depth, &work_tree[depth], + kvz_search_cu_intra(state, x, y, depth, lcu, &intra_mode, &intra_cost); if (intra_cost < cost) { cost = intra_cost; @@ -585,38 +503,37 @@ // mode search of adjacent CUs. if (cur_cu->type == CU_INTRA) { assert(cur_cu->part_size == SIZE_2Nx2N || cur_cu->part_size == SIZE_NxN); - int8_t intra_mode = cur_cu->intra.mode; - lcu_set_intra_mode(&work_tree[depth], x, y, depth, - intra_mode, - intra_mode, - cur_cu->part_size); - kvz_intra_recon_lcu_luma(state, x, y, depth, intra_mode, NULL, &work_tree[depth]); + cur_cu->intra.mode_chroma = cur_cu->intra.mode; + lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); + kvz_intra_recon_cu(state, + x, y, + depth, + cur_cu->intra.mode, -1, // skip chroma + NULL, lcu); if (x % 8 == 0 && y % 8 == 0 && state->encoder_control->chroma_format != KVZ_CSP_400) { - int8_t intra_mode_chroma = intra_mode; - // There is almost no benefit to doing the chroma mode search for // rd2. Possibly because the luma mode search already takes chroma // into account, so there is less of a chanse of luma mode being // really bad for chroma. if (state->encoder_control->cfg.rdo == 3) { - intra_mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, &work_tree[depth]); - lcu_set_intra_mode(&work_tree[depth], x, y, depth, - intra_mode, intra_mode_chroma, - cur_cu->part_size); + cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu); + lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); } - kvz_intra_recon_lcu_chroma(state, x, y, depth, intra_mode_chroma, NULL, &work_tree[depth]); + kvz_intra_recon_cu(state, + x, y, + depth, + -1, cur_cu->intra.mode_chroma, // skip luma + NULL, lcu); } } else if (cur_cu->type == CU_INTER) { // Reset transform depth because intra messes with them. // This will no longer be necessary if the transform depths are not shared. int tr_depth = depth > 0 ? depth : 1; - kvz_lcu_set_trdepth(&work_tree[depth], x, y, depth, tr_depth); + kvz_lcu_set_trdepth(lcu, x, y, depth, tr_depth); - const int cu_width = LCU_WIDTH >> depth; const int num_pu = kvz_part_mode_num_parts[cur_cu->part_size]; - for (int i = 0; i < num_pu; ++i) { const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, i); const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y, i); @@ -627,33 +544,43 @@ if (cur_pu->inter.mv_dir == 3) { const kvz_picture *const refs[2] = { - state->frame->ref->images[cur_pu->inter.mv_ref[0]], - state->frame->ref->images[cur_pu->inter.mv_ref[1]], + state->frame->ref->images[ + state->frame->ref_LX[0][ + cur_pu->inter.mv_ref[0]]], + state->frame->ref->images[ + state->frame->ref_LX[1][ + cur_pu->inter.mv_ref[1]]], }; kvz_inter_recon_lcu_bipred(state, refs[0], refs[1], pu_x, pu_y, pu_w, pu_h, cur_pu->inter.mv, - &work_tree[depth]); + lcu); } else { const int mv_idx = cur_pu->inter.mv_dir - 1; + const kvz_picture *const ref = - state->frame->ref->images[cur_pu->inter.mv_ref[mv_idx]]; + state->frame->ref->images[ + state->frame->ref_LX[mv_idx][ + cur_pu->inter.mv_ref[mv_idx]]]; + kvz_inter_recon_lcu(state, ref, pu_x, pu_y, pu_w, pu_h, cur_pu->inter.mv[mv_idx], - &work_tree[depth], + lcu, 0); } } - kvz_quantize_lcu_luma_residual(state, x, y, depth, NULL, &work_tree[depth]); - if (state->encoder_control->chroma_format != KVZ_CSP_400) { - kvz_quantize_lcu_chroma_residual(state, x, y, depth, NULL, &work_tree[depth]); - } + const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400; + kvz_quantize_lcu_residual(state, + true, has_chroma, + x, y, depth, + NULL, + lcu); int cbf = cbf_is_set_any(cur_cu->cbf, depth); @@ -665,30 +592,36 @@ inter_bitcost -= 1; } } - lcu_set_inter(&work_tree[depth], x, y, depth, cur_cu); - lcu_set_coeff(&work_tree[depth], x, y, depth, cur_cu); + lcu_set_inter(lcu, x_local, y_local, cu_width); + lcu_set_coeff(lcu, x_local, y_local, cu_width, cur_cu); } } if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) { - cost = kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, &work_tree[depth]); + cost = kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu); if (state->encoder_control->chroma_format != KVZ_CSP_400) { - cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, &work_tree[depth]); + cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu); } double mode_bits; if (cur_cu->type == CU_INTRA) { - mode_bits = calc_mode_bits(state, &work_tree[depth], cur_cu, x, y); + mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y); } else { mode_bits = inter_bitcost; } cost += mode_bits * state->lambda; } - + + bool can_split_cu = + // If the CU is partially outside the frame, we need to split it even + // if pu_depth_intra and pu_depth_inter would not permit it. + cur_cu->type == CU_NOTSET || + depth < ctrl->cfg.pu_depth_intra.max || + (state->frame->slicetype != KVZ_SLICE_I && + depth < ctrl->cfg.pu_depth_inter.max); + // Recursively split all the way to max search depth. - if (depth < ctrl->cfg.pu_depth_intra.max || - (depth < ctrl->cfg.pu_depth_inter.max && state->frame->slicetype != KVZ_SLICE_I)) - { + if (can_split_cu) { int half_cu = cu_width / 2; double split_cost = 0.0; int cbf = cbf_is_set_any(cur_cu->cbf, depth); @@ -739,16 +672,20 @@ cur_cu->type = CU_INTRA; cur_cu->part_size = SIZE_2Nx2N; - kvz_lcu_set_trdepth(&work_tree[depth], x, y, depth, cur_cu->tr_depth); - lcu_set_intra_mode(&work_tree[depth], x, y, depth, - cur_cu->intra.mode, cur_cu->intra.mode_chroma, - cur_cu->part_size); - kvz_intra_recon_lcu_luma(state, x, y, depth, cur_cu->intra.mode, NULL, &work_tree[depth]); - cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, &work_tree[depth]); + kvz_lcu_set_trdepth(lcu, x, y, depth, cur_cu->tr_depth); + lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); - if (state->encoder_control->chroma_format != KVZ_CSP_400) { - kvz_intra_recon_lcu_chroma(state, x, y, depth, cur_cu->intra.mode_chroma, NULL, &work_tree[depth]); - cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, &work_tree[depth]); + const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400; + const int8_t mode_chroma = has_chroma ? cur_cu->intra.mode_chroma : -1; + kvz_intra_recon_cu(state, + x, y, + depth, + cur_cu->intra.mode, mode_chroma, + NULL, lcu); + + cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu); + if (has_chroma) { + cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu); } // Add the cost of coding no-split. @@ -757,7 +694,7 @@ cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; // Add the cost of coding intra mode only once. - double mode_bits = calc_mode_bits(state, &work_tree[depth], cur_cu, x, y); + double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y); cost += mode_bits * state->lambda; } } @@ -765,27 +702,22 @@ if (split_cost < cost) { // Copy split modes to this depth. cost = split_cost; - work_tree_copy_up(x, y, depth, work_tree); + work_tree_copy_up(x_local, y_local, depth, work_tree); #if KVZ_DEBUG debug_split = 1; #endif } else if (depth > 0) { // Copy this CU's mode all the way down for use in adjacent CUs mode // search. - work_tree_copy_down(x, y, depth, work_tree); + work_tree_copy_down(x_local, y_local, depth, work_tree); } } else if (depth >= 0 && depth < MAX_PU_DEPTH) { // Need to copy modes down since the lower level of the work tree is used // when searching SMP and AMP blocks. - work_tree_copy_down(x, y, depth, work_tree); + work_tree_copy_down(x_local, y_local, depth, work_tree); } - PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHCU, state->encoder_control->threadqueue, "type=search_cu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d,depth=%d,split=%d,cur_cu_is_intra=%d", state->frame->num, state->tile->id, state->slice->id, - (state->tile->lcu_offset_x * LCU_WIDTH) + x, - (state->tile->lcu_offset_x * LCU_WIDTH) + x + (LCU_WIDTH >> depth), - (state->tile->lcu_offset_y * LCU_WIDTH) + y, - (state->tile->lcu_offset_y * LCU_WIDTH) + y + (LCU_WIDTH >> depth), - depth, debug_split, (cur_cu->type==CU_INTRA)?1:0); + assert(cur_cu->type != CU_NOTSET); return cost; } @@ -911,23 +843,15 @@ const int pic_width = pic->width; const int x_max = MIN(x_px + LCU_WIDTH, pic_width) - x_px; const int y_max = MIN(y_px + LCU_WIDTH, pic->height) - y_px; - const int luma_index = x_px + y_px * pic_width; - const int chroma_index = (x_px / 2) + (y_px / 2) * (pic_width / 2); kvz_pixels_blit(lcu->rec.y, &pic->rec->y[x_px + y_px * pic->rec->stride], x_max, y_max, LCU_WIDTH, pic->rec->stride); - kvz_coefficients_blit(lcu->coeff.y, &pic->coeff_y[luma_index], - x_max, y_max, LCU_WIDTH, pic_width); if (state->encoder_control->chroma_format != KVZ_CSP_400) { kvz_pixels_blit(lcu->rec.u, &pic->rec->u[(x_px / 2) + (y_px / 2) * (pic->rec->stride / 2)], x_max / 2, y_max / 2, LCU_WIDTH / 2, pic->rec->stride / 2); kvz_pixels_blit(lcu->rec.v, &pic->rec->v[(x_px / 2) + (y_px / 2) * (pic->rec->stride / 2)], x_max / 2, y_max / 2, LCU_WIDTH / 2, pic->rec->stride / 2); - kvz_coefficients_blit(lcu->coeff.u, &pic->coeff_u[chroma_index], - x_max / 2, y_max / 2, LCU_WIDTH / 2, pic_width / 2); - kvz_coefficients_blit(lcu->coeff.v, &pic->coeff_v[chroma_index], - x_max / 2, y_max / 2, LCU_WIDTH / 2, pic_width / 2); } } } @@ -961,4 +885,9 @@ // The best decisions through out the LCU got propagated back to depth 0, // so copy those back to the frame. copy_lcu_to_cu_data(state, x, y, &work_tree[0]); + + // Copy coeffs to encoder state. + copy_coeffs(work_tree[0].coeff.y, state->coeff->y, LCU_WIDTH); + copy_coeffs(work_tree[0].coeff.u, state->coeff->u, LCU_WIDTH_C); + copy_coeffs(work_tree[0].coeff.v, state->coeff->v, LCU_WIDTH_C); }
View file
kvazaar-1.1.0.tar.gz/src/search_inter.c -> kvazaar-1.2.0.tar.gz/src/search_inter.c
Changed
@@ -35,68 +35,199 @@ #include "videoframe.h" +typedef struct { + encoder_state_t *state; + + /** + * \brief Current frame + */ + const kvz_picture *pic; + /** + * \brief Reference frame + */ + const kvz_picture *ref; + + /** + * \brief Index of the reference frame + */ + int32_t ref_idx; + + /** + * \brief Top-left corner of the PU + */ + const vector2d_t origin; + int32_t width; + int32_t height; + + int16_t mv_cand[2][2]; + inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS]; + int32_t num_merge_cand; + + kvz_mvd_cost_func *mvd_cost_func; + + /** + * \brief Best motion vector among the ones tested so far + */ + vector2d_t best_mv; + /** + * \brief Cost of best_mv + */ + uint32_t best_cost; + /** + * \brief Bit cost of best_mv + */ + uint32_t best_bitcost; +} inter_search_info_t; + + /** * \return True if referred block is within current tile. */ -static INLINE bool fracmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width, int height, int wpp_limit) +static INLINE bool fracmv_within_tile(const inter_search_info_t *info, int x, int y) { - if (state->encoder_control->cfg.mv_constraint == KVZ_MV_CONSTRAIN_NONE) { - return (wpp_limit == -1 || y + (height << 2) <= (wpp_limit << 2)); - }; + const encoder_control_t *ctrl = info->state->encoder_control; + + const bool is_frac_luma = x % 4 != 0 || y % 4 != 0; + const bool is_frac_chroma = x % 8 != 0 || y % 8 != 0; + + if (ctrl->cfg.owf && ctrl->cfg.wpp) { + // Check that the block does not reference pixels that are not final. + + // Margin as luma pixels. + int margin = 0; + if (is_frac_luma) { + // Fractional motion estimation needs up to 4 pixels outside the + // block. + margin = 4; + } else if (is_frac_chroma) { + // Odd chroma interpolation needs up to 2 luma pixels outside the + // block. + margin = 2; + } + + if (ctrl->cfg.sao_type) { + // Make sure we don't refer to pixels for which SAO reconstruction + // has not been done. + margin += SAO_DELAY_PX; + } else if (ctrl->cfg.deblock_enable) { + // Make sure we don't refer to pixels that have not been deblocked. + margin += DEBLOCK_DELAY_PX; + } + + // Coordinates of the top-left corner of the containing LCU. + const vector2d_t orig_lcu = { + .x = info->origin.x / LCU_WIDTH, + .y = info->origin.y / LCU_WIDTH, + }; + // Difference between the coordinates of the LCU containing the + // bottom-left corner of the referenced block and the LCU containing + // this block. + const vector2d_t mv_lcu = { + ((info->origin.x + info->width + margin) * 4 + x) / (LCU_WIDTH << 2) - orig_lcu.x, + ((info->origin.y + info->height + margin) * 4 + y) / (LCU_WIDTH << 2) - orig_lcu.y, + }; + if (mv_lcu.y > ctrl->max_inter_ref_lcu.down) { + return false; + } + + if (mv_lcu.x + mv_lcu.y > + ctrl->max_inter_ref_lcu.down + ctrl->max_inter_ref_lcu.right) + { + return false; + } + } + + if (ctrl->cfg.mv_constraint == KVZ_MV_CONSTRAIN_NONE) { + return true; + } + + // Margin as luma quater pixels. int margin = 0; - if (state->encoder_control->cfg.mv_constraint == KVZ_MV_CONSTRAIN_FRAME_AND_TILE_MARGIN) { - // Enforce a distance of 8 from any tile boundary. - margin = 4 * 4; + if (ctrl->cfg.mv_constraint == KVZ_MV_CONSTRAIN_FRAME_AND_TILE_MARGIN) { + if (is_frac_luma) { + margin = 4 << 2; + } else if (is_frac_chroma) { + margin = 2 << 2; + } } // TODO implement KVZ_MV_CONSTRAIN_FRAM and KVZ_MV_CONSTRAIN_TILE. - const vector2d_t abs_mv = { (orig->x << 2) + x, (orig->y << 2) + y }; + const vector2d_t abs_mv = { + info->origin.x * 4 + x, + info->origin.y * 4 + y, + }; - // Check that both margin and wpp_limit constraints are satisfied. - if (abs_mv.x >= margin && abs_mv.x + (width << 2) <= (state->tile->frame->width << 2) - margin && - abs_mv.y >= margin && abs_mv.y + (height << 2) <= (state->tile->frame->height << 2) - margin && - (wpp_limit == -1 || y + (height << 2) <= (wpp_limit << 2))) - { - return true; - } else { - return false; - } + // Check that both margin constraints are satisfied. + const int from_right = + (info->state->tile->frame->width << 2) - (abs_mv.x + (info->width << 2)); + const int from_bottom = + (info->state->tile->frame->height << 2) - (abs_mv.y + (info->height << 2)); + + return abs_mv.x >= margin && + abs_mv.y >= margin && + from_right >= margin && + from_bottom >= margin; } -static INLINE int get_wpp_limit(const encoder_state_t *state, const vector2d_t* orig) +/** + * \return True if referred block is within current tile. + */ +static INLINE bool intmv_within_tile(const inter_search_info_t *info, int x, int y) { - const encoder_control_t *ctrl = state->encoder_control; - if (ctrl->cfg.owf && ctrl->cfg.wpp) { - // Limit motion vectors to the LCU-row below this row. - // To avoid fractional pixel interpolation depending on things outside - // this range, add a margin of 4 pixels. - // - fme needs 4 pixels - // - odd chroma interpolation needs 4 pixels - int wpp_limit = 2 * LCU_WIDTH - 4 - orig->y % LCU_WIDTH; - if (ctrl->cfg.deblock_enable && !ctrl->cfg.sao_enable) { - // As a special case, when deblocking is enabled but SAO is not, we have - // to avoid the possibility of interpolation filters reaching the - // non-deblocked pixels. The deblocking for the horizontal edge on the - // LCU boundary can reach 4 pixels. If SAO is enabled, this WPP-row - // depends on the SAO job, which depends on the deblocking having - // already been done. - wpp_limit -= 4; - } - return wpp_limit; - } else { - return -1; - } + return fracmv_within_tile(info, x * 4, y * 4); } /** - * \return True if referred block is within current tile. + * \brief Calculate cost for an integer motion vector. + * + * Updates info->best_mv, info->best_cost and info->best_bitcost to the new + * motion vector if it yields a lower cost than the current one. + * + * If the motion vector violates the MV constraints for tiles or WPP, the + * cost is not set. + * + * \return true if info->best_mv was changed, false otherwise */ -static INLINE bool intmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width, int height, int wpp_limit) +static bool check_mv_cost(inter_search_info_t *info, int x, int y) { - return fracmv_within_tile(state, orig, x << 2, y << 2, width, height, wpp_limit); + if (!intmv_within_tile(info, x, y)) return false; + + uint32_t bitcost = 0; + uint32_t cost = kvz_image_calc_sad( + info->pic, + info->ref, + info->origin.x, + info->origin.y, + info->state->tile->offset_x + info->origin.x + x, + info->state->tile->offset_y + info->origin.y + y, + info->width, + info->height + ); + + if (cost >= info->best_cost) return false; + + cost += info->mvd_cost_func( + info->state, + x, y, 2, + info->mv_cand, + info->merge_cand, + info->num_merge_cand, + info->ref_idx, + &bitcost + ); + + if (cost >= info->best_cost) return false; + + // Set to motion vector in quarter pixel precision. + info->best_mv.x = x * 4; + info->best_mv.y = y * 4; + info->best_cost = cost; + info->best_bitcost = bitcost; + + return true; } @@ -121,18 +252,19 @@ } -/**Checks if mv is one of the merge candidates -* \return true if found else return false -*/ -static bool mv_in_merge(const inter_merge_cand_t* merge_cand, int16_t num_cand, const vector2d_t* mv) +/** + * \brief Checks if mv is one of the merge candidates. + * \return true if found else return false + */ +static bool mv_in_merge(const inter_search_info_t *info, vector2d_t mv) { - for (int i = 0; i < num_cand; ++i) { - if (merge_cand[i].dir == 3) continue; + for (int i = 0; i < info->num_merge_cand; ++i) { + if (info->merge_cand[i].dir == 3) continue; const vector2d_t merge_mv = { - merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2, - merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2 + info->merge_cand[i].mv[info->merge_cand[i].dir - 1][0] >> 2, + info->merge_cand[i].mv[info->merge_cand[i].dir - 1][1] >> 2 }; - if (merge_mv.x == mv->x && merge_mv.y == mv->y) { + if (merge_mv.x == mv.x && merge_mv.y == mv.y) { return true; } } @@ -140,49 +272,43 @@ } -static unsigned select_starting_point(int16_t num_cand, inter_merge_cand_t *merge_cand, vector2d_t *mv_in_out, vector2d_t *mv, encoder_state_t *const state, - const vector2d_t *orig, unsigned width, unsigned height, int wpp_limit, const kvz_picture *pic, const kvz_picture *ref, - int16_t mv_cand[2][2], int32_t ref_idx, unsigned best_cost, unsigned *best_index, uint32_t *best_bitcost, - kvz_mvd_cost_func *calc_mvd){ +/** + * \brief Select starting point for integer motion estimation search. + * + * Checks the zero vector, extra_mv and merge candidates and updates + * info->best_mv to the best one. + */ +static void select_starting_point(inter_search_info_t *info, vector2d_t extra_mv) +{ + // Check the 0-vector, so we can ignore all 0-vectors in the merge cand list. + check_mv_cost(info, 0, 0); + + // Change to integer precision. + extra_mv.x >>= 2; + extra_mv.y >>= 2; + + // Check mv_in if it's not one of the merge candidates. + if ((extra_mv.x != 0 || extra_mv.y != 0) && !mv_in_merge(info, extra_mv)) { + check_mv_cost(info, extra_mv.x, extra_mv.y); + } + // Go through candidates - for (unsigned i = 0; i < num_cand; ++i) { - if (merge_cand[i].dir == 3) continue; - mv->x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2; - mv->y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2; - - if (mv->x == 0 && mv->y == 0) continue; - if (!intmv_within_tile(state, orig, mv->x, mv->y, width, height, wpp_limit)) { - continue; - } + for (unsigned i = 0; i < info->num_merge_cand; ++i) { + if (info->merge_cand[i].dir == 3) continue; - uint32_t bitcost = 0; - unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x, - (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y, - width, height, -1); - cost += calc_mvd(state, mv->x, mv->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - - if (cost < best_cost) { - best_cost = cost; - *best_index = i; - *best_bitcost = bitcost; - } - } - if (*best_index < num_cand) { - mv->x = merge_cand[*best_index].mv[merge_cand[*best_index].dir - 1][0] >> 2; - mv->y = merge_cand[*best_index].mv[merge_cand[*best_index].dir - 1][1] >> 2; - } else if (*best_index == num_cand) { - mv->x = mv_in_out->x >> 2; - mv->y = mv_in_out->y >> 2; - } else { - mv->x = 0; - mv->y = 0; + int x = info->merge_cand[i].mv[info->merge_cand[i].dir - 1][0] >> 2; + int y = info->merge_cand[i].mv[info->merge_cand[i].dir - 1][1] >> 2; + + if (x == 0 && y == 0) continue; + + check_mv_cost(info, x, y); } - return best_cost; } -static uint32_t get_mvd_coding_cost(encoder_state_t * const state, vector2d_t *mvd, const cabac_data_t* cabac) +static uint32_t get_mvd_coding_cost(const encoder_state_t *state, + vector2d_t *mvd, + const cabac_data_t* cabac) { unsigned bitcost = 0; const vector2d_t abs_mvd = { abs(mvd->x), abs(mvd->y) }; @@ -210,9 +336,15 @@ } -static int calc_mvd_cost(encoder_state_t * const state, int x, int y, int mv_shift, - int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], - int16_t num_cand,int32_t ref_idx, uint32_t *bitcost) +static uint32_t calc_mvd_cost(const encoder_state_t *state, + int x, + int y, + int mv_shift, + int16_t mv_cand[2][2], + inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], + int16_t num_cand, + int32_t ref_idx, + uint32_t *bitcost) { uint32_t temp_bitcost = 0; uint32_t merge_idx; @@ -221,15 +353,17 @@ int8_t merged = 0; int8_t cur_mv_cand = 0; - x <<= mv_shift; - y <<= mv_shift; + x *= 1 << mv_shift; + y *= 1 << mv_shift; // Check every candidate to find a match for(merge_idx = 0; merge_idx < (uint32_t)num_cand; merge_idx++) { if (merge_cand[merge_idx].dir == 3) continue; if (merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == x && merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == y && - merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] == ref_idx) { + state->frame->ref_LX[merge_cand[merge_idx].dir - 1][ + merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] + ] == ref_idx) { temp_bitcost += merge_idx; merged = 1; break; @@ -257,81 +391,63 @@ } -static bool early_terminate(int16_t num_cand, inter_merge_cand_t *merge_cand, vector2d_t *mv_in_out, vector2d_t *mv, encoder_state_t *const state, - const vector2d_t *orig, unsigned width, unsigned height, int wpp_limit, const kvz_picture *pic, const kvz_picture *ref, - int16_t mv_cand[2][2], int32_t ref_idx, unsigned *best_cost, uint32_t *bitcost_out, uint32_t *best_bitcost, - kvz_mvd_cost_func *calc_mvd) +static bool early_terminate(inter_search_info_t *info) { - static const vector2d_t small_hexbs[5] = { - { 0, 0 }, - { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 }, + static const vector2d_t small_hexbs[7] = { + { 0, -1 }, { -1, 0 }, { 0, 1 }, { 1, 0 }, + { 0, -1 }, { -1, 0 }, { 0, 0 }, }; - double multiplier = 1; - // If early termination is set to fast set multiplier to 0.9 - if (state->encoder_control->cfg.me_early_termination == KVZ_ME_EARLY_TERMINATION_SENSITIVE){ - multiplier = 0.95; - } - const vector2d_t *offset; - for (int k = 0; k < 2; ++k){ - unsigned best_index = 0; - for (int i = 1; i < 5; ++i) { - offset = &small_hexbs[i]; - if (!intmv_within_tile(state, orig, mv->x + offset->x, mv->y + offset->y, width, height, wpp_limit)) { - continue; - } - unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + offset->x, - (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + offset->y, - width, height, -1); - unsigned bitcost; - cost += calc_mvd(state, mv->x + offset->x, mv->y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); + vector2d_t mv = { info->best_mv.x >> 2, info->best_mv.y >> 2 }; - if (cost < multiplier * *best_cost ) { - *best_cost = cost; + int first_index = 0; + int last_index = 3; + + for (int k = 0; k < 2; ++k) { + double threshold; + if (info->state->encoder_control->cfg.me_early_termination == + KVZ_ME_EARLY_TERMINATION_SENSITIVE) + { + threshold = info->best_cost * 0.95; + } else { + threshold = info->best_cost; + } + + int best_index = 6; + for (int i = first_index; i <= last_index; i++) { + int x = mv.x + small_hexbs[i].x; + int y = mv.y + small_hexbs[i].y; + + if (check_mv_cost(info, x, y)) { best_index = i; - *best_bitcost = bitcost; } } - // Adjust the movement vector - mv->x += small_hexbs[best_index].x; - mv->y += small_hexbs[best_index].y; - // if best match is at center we stop the search - if (best_index == 0){ - // Return final movement vector in quarter-pixel precision. - mv_in_out->x = mv->x << 2; - mv_in_out->y = mv->y << 2; + // Adjust the movement vector + mv.x += small_hexbs[best_index].x; + mv.y += small_hexbs[best_index].y; - *bitcost_out = *best_bitcost; + // If best match is not better than threshold, we stop the search. + if (info->best_cost >= threshold) { return true; } + + first_index = (best_index + 3) % 4; + last_index = first_index + 2; } return false; } -unsigned kvz_tz_pattern_search(encoder_state_t * const state, const kvz_picture *pic, const kvz_picture *ref, unsigned pattern_type, - const vector2d_t *orig, const int iDist, vector2d_t *mv, unsigned best_cost, int *best_dist, - int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], int16_t num_cand, int32_t ref_idx, uint32_t *best_bitcost, - int width, int height, int wpp_limit) +void kvz_tz_pattern_search(inter_search_info_t *info, + unsigned pattern_type, + const int iDist, + int *best_dist) { - int n_points; - int best_index = -1; - int i; - - vector2d_t mv_best = { 0, 0 }; - - - kvz_mvd_cost_func *calc_mvd = calc_mvd_cost; - if (state->encoder_control->cfg.mv_rdo) { - calc_mvd = kvz_calc_mvd_cost_cabac; - } - assert(pattern_type < 4); //implemented search patterns - vector2d_t pattern[4][8] = { + const vector2d_t pattern[4][8] = { //diamond (8 points) //[ ][ ][ ][ ][1][ ][ ][ ][ ] //[ ][ ][ ][ ][ ][ ][ ][ ][ ] @@ -391,14 +507,12 @@ { iDist / 2, iDist }, { iDist, 0 }, { iDist / 2, -iDist }, { -iDist, 0 }, { iDist / 2, iDist }, { -iDist / 2, -iDist }, { 0, 0 }, { 0, 0 } } - }; - //set the number of points to be checked - if (iDist == 1) - { - switch (pattern_type) - { + // Set the number of points to be checked. + int n_points; + if (iDist == 1) { + switch (pattern_type) { case 0: n_points = 4; break; @@ -412,11 +526,8 @@ n_points = 8; break; }; - } - else - { - switch (pattern_type) - { + } else { + switch (pattern_type) { case 3: n_points = 6; break; @@ -426,248 +537,110 @@ }; } - //compute SAD values for all chosen points - for (i = 0; i < n_points; i++) - { - vector2d_t *current = &pattern[pattern_type][i]; - if (!intmv_within_tile(state, orig, mv->x + current->x, mv->y + current->y, width, height, wpp_limit)) { - continue; - } - - unsigned cost; - uint32_t bitcost; + const vector2d_t mv = { info->best_mv.x >> 2, info->best_mv.y >> 2 }; - { - cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x, - (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y, - width, height, -1); - cost += calc_mvd(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - } + // Compute SAD values for all chosen points. + int best_index = -1; + for (int i = 0; i < n_points; i++) { + vector2d_t offset = pattern[pattern_type][i]; + int x = mv.x + offset.x; + int y = mv.y + offset.y; - if (cost < best_cost) - { - best_cost = cost; - *best_bitcost = bitcost; + if (check_mv_cost(info, x, y)) { best_index = i; } - } - if (best_index >= 0) - { - mv_best = pattern[pattern_type][best_index]; + if (best_index >= 0) { *best_dist = iDist; } - - mv->x += mv_best.x; - mv->y += mv_best.y; - - return best_cost; - } -unsigned kvz_tz_raster_search(encoder_state_t * const state, const kvz_picture *pic, const kvz_picture *ref, - const vector2d_t *orig, vector2d_t *mv, unsigned best_cost, - int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], int16_t num_cand, int32_t ref_idx, uint32_t *best_bitcost, - int width, int height, int iSearchRange, int iRaster, int wpp_limit) +void kvz_tz_raster_search(inter_search_info_t *info, + int iSearchRange, + int iRaster) { - int i; - int k; - - vector2d_t mv_best = { 0, 0 }; + const vector2d_t mv = { info->best_mv.x >> 2, info->best_mv.y >> 2 }; - kvz_mvd_cost_func *calc_mvd = calc_mvd_cost; - if (state->encoder_control->cfg.mv_rdo) { - calc_mvd = kvz_calc_mvd_cost_cabac; - } - //compute SAD values for every point in the iRaster downsampled version of the current search area - for (i = iSearchRange; i >= -iSearchRange; i -= iRaster) - { - for (k = -iSearchRange; k <= iSearchRange; k += iRaster) - { - vector2d_t current = { k, i }; - if (!intmv_within_tile(state, orig, mv->x + current.x, mv->y + current.y, width, height, wpp_limit)) { - continue; - } - - unsigned cost; - uint32_t bitcost; - - { - cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k, - (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i, - width, height, -1); - cost += calc_mvd(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - } - - if (cost < best_cost) - { - best_cost = cost; - *best_bitcost = bitcost; - mv_best = current; - } - + for (int y = iSearchRange; y >= -iSearchRange; y -= iRaster) { + for (int x = -iSearchRange; x <= iSearchRange; x += iRaster) { + check_mv_cost(info, mv.x + x, mv.y + y); } } - - mv->x += mv_best.x; - mv->y += mv_best.y; - - return best_cost; - } -static unsigned tz_search(encoder_state_t * const state, - unsigned width, unsigned height, - const kvz_picture *pic, const kvz_picture *ref, - const vector2d_t *orig, vector2d_t *mv_in_out, - int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], - int16_t num_cand, int32_t ref_idx, uint32_t *bitcost_out) +static void tz_search(inter_search_info_t *info, vector2d_t extra_mv) { - //TZ parameters const int iSearchRange = 96; // search range for each stage - const int iRaster = 5; // search distance limit and downsampling factor for step 3 + const int iRaster = 5; // search distance limit and downsampling factor for step 3 const unsigned step2_type = 0; // search patterns for steps 2 and 4 const unsigned step4_type = 0; const bool bRasterRefinementEnable = true; // enable step 4 mode 1 const bool bStarRefinementEnable = false; // enable step 4 mode 2 (only one mode will be executed) - vector2d_t mv = { mv_in_out->x >> 2, mv_in_out->y >> 2 }; - - unsigned best_cost = UINT32_MAX; - uint32_t best_bitcost = 0; - int iDist; int best_dist = 0; - unsigned best_index = num_cand + 1; - int wpp_limit = get_wpp_limit(state, orig); - - kvz_mvd_cost_func *calc_mvd = calc_mvd_cost; - if (state->encoder_control->cfg.mv_rdo) { - calc_mvd = kvz_calc_mvd_cost_cabac; - } - - // Check the 0-vector, so we can ignore all 0-vectors in the merge cand list. - if (intmv_within_tile(state, orig, 0, 0, width, height, wpp_limit)) { - best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x, - (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y, - width, height, -1); - best_cost += calc_mvd(state, 0, 0, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost); - best_index = num_cand + 1; - } + info->best_cost = UINT32_MAX; - // Check mv_in if it's not one of the merge candidates. - if (!mv_in_merge(merge_cand, num_cand, &mv) && - intmv_within_tile(state, orig, mv.x, mv.y, width, height, wpp_limit)) - { - unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, - (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, - width, height, -1); - unsigned bitcost; - cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - if (cost < best_cost) { - best_cost = cost; - best_index = num_cand; - best_bitcost = bitcost; - } - } - - // Select starting point from among merge candidates. These should include - // both mv_cand vectors and (0, 0). - best_cost = select_starting_point(num_cand, merge_cand, mv_in_out, &mv, state, orig, width, height, wpp_limit, - pic, ref, mv_cand, ref_idx, best_cost, &best_index, &best_bitcost, calc_mvd); + // Select starting point from among merge candidates. These should + // include both mv_cand vectors and (0, 0). + select_starting_point(info, extra_mv); // Check if we should stop search - if (state->encoder_control->cfg.me_early_termination){ - if (early_terminate(num_cand, merge_cand, mv_in_out, &mv, state, orig, width, height, wpp_limit, - pic, ref, mv_cand, ref_idx, &best_cost, bitcost_out, &best_bitcost, calc_mvd)) return best_cost; + if (info->state->encoder_control->cfg.me_early_termination && + early_terminate(info)) + { + return; } //step 2, grid search - for (iDist = 1; iDist <= iSearchRange; iDist *= 2) - { - best_cost = kvz_tz_pattern_search(state, pic, ref, step2_type, orig, iDist, &mv, best_cost, &best_dist, - mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, wpp_limit); + for (int iDist = 1; iDist <= iSearchRange; iDist *= 2) { + kvz_tz_pattern_search(info, step2_type, iDist, &best_dist); } //step 3, raster scan - if (best_dist > iRaster) - { + if (best_dist > iRaster) { best_dist = iRaster; - - best_cost = kvz_tz_raster_search(state, pic, ref, orig, &mv, best_cost, mv_cand, merge_cand, - num_cand, ref_idx, &best_bitcost, width, height, iSearchRange, iRaster, wpp_limit); + kvz_tz_raster_search(info, iSearchRange, iRaster); } //step 4 //raster refinement - if (bRasterRefinementEnable && best_dist > 0) - { - iDist = best_dist >> 1; - while (iDist > 0) - { - best_cost = kvz_tz_pattern_search(state, pic, ref, step4_type, orig, iDist, &mv, best_cost, &best_dist, - mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, wpp_limit); - - iDist = iDist >> 1; + if (bRasterRefinementEnable && best_dist > 0) { + for (int iDist = best_dist >> 1; iDist > 0; iDist >>= 1) { + kvz_tz_pattern_search(info, step4_type, iDist, &best_dist); } } //star refinement (repeat step 2 for the current starting point) - if (bStarRefinementEnable && best_dist > 0) - { - for (iDist = 1; iDist <= iSearchRange; iDist *= 2) - { - best_cost = kvz_tz_pattern_search(state, pic, ref, step4_type, orig, iDist, &mv, best_cost, &best_dist, - mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, wpp_limit); + if (bStarRefinementEnable && best_dist > 0) { + for (int iDist = 1; iDist <= iSearchRange; iDist *= 2) { + kvz_tz_pattern_search(info, step4_type, iDist, &best_dist); } } - - mv.x = mv.x << 2; - mv.y = mv.y << 2; - - *mv_in_out = mv; - *bitcost_out = best_bitcost; - - return best_cost; } /** * \brief Do motion search using the HEXBS algorithm. * - * \param width width of the block to search - * \param height height of the block to search - * \param pic Picture motion vector is searched for. - * \param ref Picture motion vector is searched from. - * \param orig Top left corner of the searched for block. - * \param mv_in_out Predicted mv in and best out. Quarter pixel precision. - * - * \returns Cost of the motion vector. + * \param info search info + * \param extra_mv extra motion vector to check * * Motion vector is searched by first searching iteratively with the large * hexagon pattern until the best match is at the center of the hexagon. * As a final step a smaller hexagon is used to check the adjacent pixels. * - * If a non 0,0 predicted motion vector predictor is given as mv_in_out, + * If a non 0,0 predicted motion vector predictor is given as extra_mv, * the 0,0 vector is also tried. This is hoped to help in the case where * the predicted motion vector is way off. In the future even more additional * points like 0,0 might be used, such as vectors from top or left. */ -static unsigned hexagon_search(encoder_state_t * const state, - unsigned width, unsigned height, - const kvz_picture *pic, const kvz_picture *ref, - const vector2d_t *orig, vector2d_t *mv_in_out, - int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], - int16_t num_cand, int32_t ref_idx, uint32_t *bitcost_out) +static void hexagon_search(inter_search_info_t *info, vector2d_t extra_mv) { // The start of the hexagonal pattern has been repeated at the end so that // the indices between 1-6 can be used as the start of a 3-point list of new @@ -691,83 +664,36 @@ { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } }; - vector2d_t mv = { mv_in_out->x >> 2, mv_in_out->y >> 2 }; - unsigned best_cost = UINT32_MAX; - uint32_t best_bitcost = 0, bitcost; - unsigned i; - // Current best index, either to merge_cands, large_hebx or small_hexbs. - unsigned best_index = num_cand + 1; - int wpp_limit = get_wpp_limit(state, orig); + info->best_cost = UINT32_MAX; - kvz_mvd_cost_func *calc_mvd = calc_mvd_cost; - if (state->encoder_control->cfg.mv_rdo) { - calc_mvd = kvz_calc_mvd_cost_cabac; - } + // Select starting point from among merge candidates. These should + // include both mv_cand vectors and (0, 0). + select_starting_point(info, extra_mv); - // Check the 0-vector, so we can ignore all 0-vectors in the merge cand list. - if (intmv_within_tile(state, orig, 0, 0, width, height, wpp_limit)) { - best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x, - (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y, - width, height, -1); - best_cost += calc_mvd(state, 0, 0, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - best_bitcost = bitcost; - best_index = num_cand + 1; - } - - // Check mv_in if it's not one of the merge candidates. - if (!mv_in_merge(merge_cand, num_cand, &mv) && - intmv_within_tile(state, orig, mv.x, mv.y, width, height, wpp_limit)) + // Check if we should stop search + if (info->state->encoder_control->cfg.me_early_termination && + early_terminate(info)) { - unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, - (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, - width, height, -1); - cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - - if (cost < best_cost) { - best_cost = cost; - best_index = num_cand; - best_bitcost = bitcost; - } + return; } - // Select starting point from among merge candidates. These should include - // both mv_cand vectors and (0, 0). - best_cost = select_starting_point(num_cand, merge_cand, mv_in_out, &mv, state, orig, width, height, wpp_limit, - pic, ref, mv_cand, ref_idx, best_cost, &best_index, &best_bitcost, calc_mvd); + vector2d_t mv = { info->best_mv.x >> 2, info->best_mv.y >> 2 }; - // Check if we should stop search - if (state->encoder_control->cfg.me_early_termination){ - if (early_terminate(num_cand, merge_cand, mv_in_out, &mv, state, orig, width, height, wpp_limit, - pic, ref, mv_cand, ref_idx, &best_cost, bitcost_out, &best_bitcost, calc_mvd)) return best_cost; - } + // Current best index, either to merge_cands, large_hebx or small_hexbs. + int best_index = 0; // Search the initial 7 points of the hexagon. - best_index = 0; - for (i = 0; i < 7; ++i) { - const vector2d_t *pattern = &large_hexbs[i]; - if (!intmv_within_tile(state, orig, mv.x + pattern->x, mv.y + pattern->y, width, height, wpp_limit)) { - continue; - } - - unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, - (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y, - width, height, -1); - cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - - if (cost < best_cost) { - best_cost = cost; - best_index = i; - best_bitcost = bitcost; + for (int i = 1; i < 7; ++i) { + if (check_mv_cost(info, mv.x + large_hexbs[i].x, mv.y + large_hexbs[i].y)) { + best_index = i; } } // Iteratively search the 3 new points around the best match, until the best // match is in the center. while (best_index != 0) { - unsigned start; // Starting point of the 3 offsets to be searched. + // Starting point of the 3 offsets to be searched. + unsigned start; if (best_index == 1) { start = 6; } else if (best_index == 8) { @@ -782,22 +708,10 @@ best_index = 0; // Iterate through the next 3 points. - for (i = 0; i < 3; ++i) { - const vector2d_t *offset = &large_hexbs[start + i]; - if (!intmv_within_tile(state, orig, mv.x + offset->x, mv.y + offset->y, width, height, wpp_limit)) { - continue; - } - - unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, - (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, - width, height, -1); - cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - - if (cost < best_cost) { - best_cost = cost; - best_index = start + i; - best_bitcost = bitcost; + for (int i = 0; i < 3; ++i) { + vector2d_t offset = large_hexbs[start + i]; + if (check_mv_cost(info, mv.x + offset.x, mv.y + offset.y)) { + best_index = start + i; } } } @@ -808,115 +722,45 @@ best_index = 0; // Do the final step of the search with a small pattern. - for (i = 1; i < 5; ++i) { - const vector2d_t *offset = &small_hexbs[i]; - if (!intmv_within_tile(state, orig, mv.x + offset->x, mv.y + offset->y, width, height, wpp_limit)) { - continue; - } - - unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, - (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, - width, height, -1); - cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - - if (cost > 0 && cost < best_cost) { - best_cost = cost; - best_index = i; - best_bitcost = bitcost; - } + for (int i = 1; i < 5; ++i) { + check_mv_cost(info, mv.x + small_hexbs[i].x, mv.y + small_hexbs[i].y); } - - // Adjust the movement vector according to the final best match. - mv.x += small_hexbs[best_index].x; - mv.y += small_hexbs[best_index].y; - - // Return final movement vector in quarter-pixel precision. - mv_in_out->x = mv.x << 2; - mv_in_out->y = mv.y << 2; - - *bitcost_out = best_bitcost; - - return best_cost; } -static unsigned search_mv_full(encoder_state_t * const state, - unsigned width, unsigned height, - const kvz_picture *pic, const kvz_picture *ref, - const vector2d_t *orig, vector2d_t *mv_in_out, - int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], - int16_t num_cand, int32_t ref_idx, const int32_t search_range, uint32_t *bitcost_out) +static void search_mv_full(inter_search_info_t *info, + int32_t search_range, + vector2d_t extra_mv) { - vector2d_t mv = { mv_in_out->x >> 2, mv_in_out->y >> 2 }; - vector2d_t best_mv = { 0, 0 }; - unsigned best_cost = UINT32_MAX; - uint32_t best_bitcost = 0, bitcost; - int wpp_limit = get_wpp_limit(state, orig); - - kvz_mvd_cost_func *calc_mvd = calc_mvd_cost; - if (state->encoder_control->cfg.mv_rdo) { - calc_mvd = kvz_calc_mvd_cost_cabac; - } - - // Check the 0-vector, so we can ignore all 0-vectors in the merge cand list. - if (intmv_within_tile(state, orig, 0, 0, width, height, wpp_limit)) { - vector2d_t min_mv = { 0 - search_range, 0 - search_range }; - vector2d_t max_mv = { 0 + search_range, 0 + search_range }; - - for (int y = min_mv.y; y <= max_mv.y; ++y) { - for (int x = min_mv.x; x <= max_mv.x; ++x) { - if (!intmv_within_tile(state, orig, x, y, width, height, wpp_limit)) { - continue; - } - unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - orig->x + x, - orig->y + y, - width, height, -1); - cost += calc_mvd(state, x, y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - if (cost < best_cost) { - best_cost = cost; - best_bitcost = bitcost; - best_mv.x = x; - best_mv.y = y; - } - } + // Search around the 0-vector. + for (int y = -search_range; y <= search_range; y++) { + for (int x = -search_range; x <= search_range; x++) { + check_mv_cost(info, x, y); } } - // Check mv_in if it's not one of the merge candidates. - if (!mv_in_merge(merge_cand, num_cand, &mv) && - intmv_within_tile(state, orig, mv.x, mv.y, width, height, wpp_limit)) - { - vector2d_t min_mv = { mv.x - search_range, mv.y - search_range }; - vector2d_t max_mv = { mv.x + search_range, mv.y + search_range }; + // Change to integer precision. + extra_mv.x >>= 2; + extra_mv.y >>= 2; - for (int y = min_mv.y; y <= max_mv.y; ++y) { - for (int x = min_mv.x; x <= max_mv.x; ++x) { - if (!intmv_within_tile(state, orig, x, y, width, height, wpp_limit)) { - continue; - } - unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - orig->x + x, - orig->y + y, - width, height, -1); - cost += calc_mvd(state, x, y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - if (cost < best_cost) { - best_cost = cost; - best_bitcost = bitcost; - best_mv.x = x; - best_mv.y = y; - } + // Check around extra_mv if it's not one of the merge candidates. + if (!mv_in_merge(info, extra_mv)) { + for (int y = -search_range; y <= search_range; y++) { + for (int x = -search_range; x <= search_range; x++) { + check_mv_cost(info, extra_mv.x + x, extra_mv.y + y); } } } // Select starting point from among merge candidates. These should include // both mv_cand vectors and (0, 0). - for (int i = 0; i < num_cand; ++i) { - if (merge_cand[i].dir == 3) continue; - mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2; - mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2; + for (int i = 0; i < info->num_merge_cand; ++i) { + if (info->merge_cand[i].dir == 3) continue; + + vector2d_t mv = { + .x = info->merge_cand[i].mv[info->merge_cand[i].dir - 1][0] >> 2, + .y = info->merge_cand[i].mv[info->merge_cand[i].dir - 1][1] >> 2, + }; // Ignore 0-vector because it has already been checked. if (mv.x == 0 && mv.y == 0) continue; @@ -926,7 +770,7 @@ for (int y = min_mv.y; y <= max_mv.y; ++y) { for (int x = min_mv.x; x <= max_mv.x; ++x) { - if (!intmv_within_tile(state, orig, x, y, width, height, wpp_limit)) { + if (!intmv_within_tile(info, x, y)) { continue; } @@ -936,9 +780,9 @@ int xx = 0; int yy = 0; if (j >= 0) { - if (merge_cand[j].dir == 3) continue; - xx = merge_cand[j].mv[merge_cand[j].dir - 1][0] >> 2; - yy = merge_cand[j].mv[merge_cand[j].dir - 1][1] >> 2; + if (info->merge_cand[j].dir == 3) continue; + xx = info->merge_cand[j].mv[info->merge_cand[j].dir - 1][0] >> 2; + yy = info->merge_cand[j].mv[info->merge_cand[j].dir - 1][1] >> 2; } if (x >= xx - search_range && x <= xx + search_range && y >= yy - search_range && y <= yy + search_range) @@ -950,51 +794,20 @@ } if (already_tested) continue; - unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, - orig->x + x, - orig->y + y, - width, height, -1); - cost += calc_mvd(state, x, y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - if (cost < best_cost) { - best_cost = cost; - best_bitcost = bitcost; - best_mv.x = x; - best_mv.y = y; - } + check_mv_cost(info, x, y); } } } - - mv_in_out->x = best_mv.x << 2; - mv_in_out->y = best_mv.y << 2; - - *bitcost_out = best_bitcost; - - return best_cost; } /** * \brief Do fractional motion estimation * - * \param width width of the block - * \param height height of the block - * \param pic Picture motion vector is searched for. - * \param ref Picture motion vector is searched from. - * \param orig Top left corner of the searched for block. - * \param mv_in_out Predicted mv in and best out. Quarter pixel precision. - * - * \returns Cost of the motion vector. - * * Algoritm first searches 1/2-pel positions around integer mv and after best match is found, * refines the search by searching best 1/4-pel postion around best 1/2-pel position. */ -static unsigned search_frac(encoder_state_t * const state, - unsigned width, unsigned height, - const kvz_picture *pic, const kvz_picture *ref, - const vector2d_t *orig, vector2d_t *mv_in_out, - int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], - int16_t num_cand, int32_t ref_idx, uint32_t *bitcost_out) +static void search_frac(inter_search_info_t *info) { // Map indexes to relative coordinates in the following way: // 5 3 6 @@ -1006,14 +819,12 @@ { 1, -1 }, { -1, 1 }, { 1, 1 } }; - int wpp_limit = get_wpp_limit(state, orig); + // Set mv to pixel precision + vector2d_t mv = { info->best_mv.x >> 2, info->best_mv.y >> 2 }; - //Set mv to halfpel precision - vector2d_t mv = { mv_in_out->x >> 2, mv_in_out->y >> 2 }; unsigned best_cost = UINT32_MAX; uint32_t best_bitcost = 0; uint32_t bitcosts[4] = { 0 }; - unsigned i; unsigned best_index = 0; unsigned costs[4] = { 0 }; @@ -1043,69 +854,99 @@ hpel_pos[6] = fracpel_blocks[HPEL_POS_DIA] + (LCU_WIDTH + 1); hpel_pos[7] = fracpel_blocks[HPEL_POS_DIA] + (LCU_WIDTH + 1) + 1; - int fme_level = state->encoder_control->cfg.fme_level; + const kvz_picture *ref = info->ref; + const kvz_picture *pic = info->pic; + vector2d_t orig = info->origin; + const int width = info->width; + const int height = info->height; - kvz_mvd_cost_func *calc_mvd = calc_mvd_cost; - if (state->encoder_control->cfg.mv_rdo) { - calc_mvd = kvz_calc_mvd_cost_cabac; - } - - kvz_get_extended_block(orig->x, orig->y, mv.x-1, mv.y-1, - state->tile->lcu_offset_x * LCU_WIDTH, - state->tile->lcu_offset_y * LCU_WIDTH, - ref->y, ref->width, ref->height, FILTER_SIZE, width+1, height+1, &src); + const encoder_state_t *state = info->state; + int fme_level = state->encoder_control->cfg.fme_level; - kvz_filter_frac_blocks_luma(state->encoder_control, src.orig_topleft, src.stride, width, - height, fracpel_blocks, fme_level); + kvz_get_extended_block(orig.x, orig.y, mv.x - 1, mv.y - 1, + state->tile->offset_x, + state->tile->offset_y, + ref->y, ref->width, ref->height, FILTER_SIZE, + width+1, height+1, + &src); + + kvz_filter_frac_blocks_luma(state->encoder_control, + src.orig_topleft, + src.stride, + width, + height, + fracpel_blocks, + fme_level); kvz_pixel tmp_pic[LCU_WIDTH*LCU_WIDTH]; - kvz_pixels_blit(pic->y + orig->y * pic->stride + orig->x, tmp_pic, width, height, pic->stride, width); + kvz_pixels_blit(pic->y + orig.y * pic->stride + orig.x, + tmp_pic, + width, + height, + pic->stride, + width); // Search integer position costs[0] = kvz_satd_any_size(width, height, tmp_pic, width, src.orig_topleft + src.stride + 1, src.stride); - costs[0] += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcosts[0]); + costs[0] += info->mvd_cost_func(state, + mv.x, mv.y, 2, + info->mv_cand, + info->merge_cand, + info->num_merge_cand, + info->ref_idx, + &bitcosts[0]); best_cost = costs[0]; best_bitcost = bitcosts[0]; int last_hpel_index = (fme_level == 1) ? 4 : 8; //Set mv to half-pixel precision - mv.x <<= 1; - mv.y <<= 1; + mv.x *= 2; + mv.y *= 2; // Search halfpel positions around best integer mv - for (i = 1; i <= last_hpel_index; i+=4) { + for (int i = 1; i <= last_hpel_index; i += 4) { const vector2d_t *pattern[4] = { &square[i], &square[i + 1], &square[i + 2], &square[i + 3] }; - - int8_t within_tile[4] = { - fracmv_within_tile(state, orig, (mv.x + pattern[0]->x) << 1, (mv.y + pattern[0]->y) << 1, width, height, wpp_limit), - fracmv_within_tile(state, orig, (mv.x + pattern[1]->x) << 1, (mv.y + pattern[1]->y) << 1, width, height, wpp_limit), - fracmv_within_tile(state, orig, (mv.x + pattern[2]->x) << 1, (mv.y + pattern[2]->y) << 1, width, height, wpp_limit), - fracmv_within_tile(state, orig, (mv.x + pattern[3]->x) << 1, (mv.y + pattern[3]->y) << 1, width, height, wpp_limit), + + int8_t within_tile[4]; + for (int j = 0; j < 4; j++) { + within_tile[j] = + fracmv_within_tile(info, (mv.x + pattern[j]->x) * 2, (mv.y + pattern[j]->y) * 2); }; int hpel_strides[4] = { - (LCU_WIDTH + 1), - (LCU_WIDTH + 1), - (LCU_WIDTH + 1), + (LCU_WIDTH + 1), + (LCU_WIDTH + 1), + (LCU_WIDTH + 1), (LCU_WIDTH + 1) }; kvz_satd_any_size_quad(width, height, (const kvz_pixel**)(hpel_pos + i - 1), hpel_strides, tmp_pic, width, 4, costs, within_tile); - costs[0] += calc_mvd(state, mv.x + pattern[0]->x, mv.y + pattern[0]->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcosts[0]); - costs[1] += calc_mvd(state, mv.x + pattern[1]->x, mv.y + pattern[1]->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcosts[1]); - costs[2] += calc_mvd(state, mv.x + pattern[2]->x, mv.y + pattern[2]->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcosts[2]); - costs[3] += calc_mvd(state, mv.x + pattern[3]->x, mv.y + pattern[3]->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcosts[3]); + for (int j = 0; j < 4; j++) { + if (within_tile[j]) { + costs[j] += info->mvd_cost_func( + state, + mv.x + pattern[j]->x, + mv.y + pattern[j]->y, + 1, + info->mv_cand, + info->merge_cand, + info->num_merge_cand, + info->ref_idx, + &bitcosts[j] + ); + } + } for (int j = 0; j < 4; ++j) { if (within_tile[j] && costs[j] < best_cost) { best_cost = costs[j]; - best_index = i + j; best_bitcost = bitcosts[j]; + best_index = i + j; } } } @@ -1117,8 +958,8 @@ mv.y += square[best_index].y; //Set mv to quarterpel precision - mv.x <<= 1; - mv.y <<= 1; + mv.x *= 2; + mv.y *= 2; if (fme_level >= 3) { @@ -1127,15 +968,14 @@ int last_qpel_index = (fme_level == 3) ? 4 : 8; //Search quarterpel points around best halfpel mv - for (i = 1; i <= last_qpel_index; i += 4) { + for (int i = 1; i <= last_qpel_index; i += 4) { const vector2d_t *pattern[4] = { &square[i], &square[i + 1], &square[i + 2], &square[i + 3] }; - int8_t within_tile[4] = { - fracmv_within_tile(state, orig, (mv.x + pattern[0]->x), (mv.y + pattern[0]->y), width, height, wpp_limit), - fracmv_within_tile(state, orig, (mv.x + pattern[1]->x), (mv.y + pattern[1]->y), width, height, wpp_limit), - fracmv_within_tile(state, orig, (mv.x + pattern[2]->x), (mv.y + pattern[2]->y), width, height, wpp_limit), - fracmv_within_tile(state, orig, (mv.x + pattern[3]->x), (mv.y + pattern[3]->y), width, height, wpp_limit), - }; + int8_t within_tile[4]; + for (int j = 0; j < 4; j++) { + within_tile[j] = + fracmv_within_tile(info, mv.x + pattern[j]->x, mv.y + pattern[j]->y); + } int qpel_indices[4] = { 0 }; int int_offset_x[4] = { 0 }; @@ -1183,16 +1023,27 @@ kvz_satd_any_size_quad(width, height, (const kvz_pixel**)qpel_pos, qpel_strides, tmp_pic, width, 4, costs, within_tile); - costs[0] += calc_mvd(state, mv.x + pattern[0]->x, mv.y + pattern[0]->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcosts[0]); - costs[1] += calc_mvd(state, mv.x + pattern[1]->x, mv.y + pattern[1]->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcosts[1]); - costs[2] += calc_mvd(state, mv.x + pattern[2]->x, mv.y + pattern[2]->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcosts[2]); - costs[3] += calc_mvd(state, mv.x + pattern[3]->x, mv.y + pattern[3]->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcosts[3]); + for (int j = 0; j < 4; j++) { + if (within_tile[j]) { + costs[j] += info->mvd_cost_func( + state, + mv.x + pattern[j]->x, + mv.y + pattern[j]->y, + 0, + info->mv_cand, + info->merge_cand, + info->num_merge_cand, + info->ref_idx, + &bitcosts[j] + ); + } + } for (int j = 0; j < 4; ++j) { if (within_tile[j] && costs[j] < best_cost) { best_cost = costs[j]; - best_index = i + j; best_bitcost = bitcosts[j]; + best_index = i + j; } } } @@ -1202,61 +1053,79 @@ mv.y += square[best_index].y; } - mv_in_out->x = mv.x; - mv_in_out->y = mv.y; - - *bitcost_out = best_bitcost; + info->best_mv = mv; + info->best_cost = best_cost; + info->best_bitcost = best_bitcost; if (src.malloc_used) free(src.buffer); - - return best_cost; } /** * \brief Perform inter search for a single reference frame. */ -static void search_pu_inter_ref(encoder_state_t * const state, - int x, int y, - int width, int height, +static void search_pu_inter_ref(inter_search_info_t *info, int depth, lcu_t *lcu, cu_info_t *cur_cu, - int16_t mv_cand[2][2], - inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], - int16_t num_cand, - unsigned ref_idx, - uint32_t(*get_mvd_cost)(encoder_state_t * const, vector2d_t *, const cabac_data_t*), double *inter_cost, uint32_t *inter_bitcost) { - const videoframe_t * const frame = state->tile->frame; - kvz_picture *ref_image = state->frame->ref->images[ref_idx]; - const vector2d_t orig = { x, y }; - uint32_t temp_bitcost = 0; - uint32_t temp_cost = 0; - int32_t merged = 0; - uint8_t cu_mv_cand = 0; - int8_t merge_idx = 0; - int8_t ref_list = state->frame->refmap[ref_idx].list-1; + const kvz_config *cfg = &info->state->encoder_control->cfg; + + // which list, L0 or L1, ref_idx is in and in what index + int8_t ref_list = -1; + // the index of the ref_idx in L0 or L1 list + int8_t LX_idx; + // max value of LX_idx plus one + const int8_t LX_IDX_MAX_PLUS_1 = MAX(info->state->frame->ref_LX_size[0], + info->state->frame->ref_LX_size[1]); + + for (LX_idx = 0; LX_idx < LX_IDX_MAX_PLUS_1; LX_idx++) + { + // check if ref_idx is in L0 + if (LX_idx < info->state->frame->ref_LX_size[0] && + info->state->frame->ref_LX[0][LX_idx] == info->ref_idx) { + ref_list = 0; + break; + } + + // check if ref_idx is in L1 + if (LX_idx < info->state->frame->ref_LX_size[1] && + info->state->frame->ref_LX[1][LX_idx] == info->ref_idx) { + ref_list = 1; + break; + } + } + // ref_idx has to be found in either L0 or L1 + assert(LX_idx < LX_IDX_MAX_PLUS_1); + + // store temp values to be stored back later int8_t temp_ref_idx = cur_cu->inter.mv_ref[ref_list]; + // Get MV candidates - cur_cu->inter.mv_ref[ref_list] = ref_idx; - kvz_inter_get_mv_cand(state, x, y, width, height, mv_cand, cur_cu, lcu, ref_list); + cur_cu->inter.mv_ref[ref_list] = LX_idx; + + kvz_inter_get_mv_cand(info->state, + info->origin.x, + info->origin.y, + info->width, + info->height, + info->mv_cand, + cur_cu, + lcu, + ref_list); + + // store old values back cur_cu->inter.mv_ref[ref_list] = temp_ref_idx; - vector2d_t mv = { 0, 0 }; { // Take starting point for MV search from previous frame. // When temporal motion vector candidates are added, there is probably // no point to this anymore, but for now it helps. - const vector2d_t tile_top_left_corner = { - (state->tile->lcu_offset_x << LOG2_LCU_WIDTH), - (state->tile->lcu_offset_y << LOG2_LCU_WIDTH) - }; - const int mid_x = tile_top_left_corner.x + x + (width >> 1); - const int mid_y = tile_top_left_corner.y + y + (height >> 1); - const cu_array_t* ref_array = state->frame->ref->cu_arrays[ref_idx]; + const int mid_x = info->state->tile->offset_x + info->origin.x + (info->width >> 1); + const int mid_y = info->state->tile->offset_y + info->origin.y + (info->height >> 1); + const cu_array_t* ref_array = info->state->frame->ref->cu_arrays[info->ref_idx]; const cu_info_t* ref_cu = kvz_cu_array_at_const(ref_array, mid_x, mid_y); if (ref_cu->type == CU_INTER) { if (ref_cu->inter.mv_dir & 1) { @@ -1270,7 +1139,7 @@ } int search_range = 32; - switch (state->encoder_control->cfg.ime_algorithm) { + switch (cfg->ime_algorithm) { case KVZ_IME_FULL64: search_range = 64; break; case KVZ_IME_FULL32: search_range = 32; break; case KVZ_IME_FULL16: search_range = 16; break; @@ -1278,94 +1147,81 @@ default: break; } - switch (state->encoder_control->cfg.ime_algorithm) { + info->best_cost = UINT32_MAX; + + switch (cfg->ime_algorithm) { case KVZ_IME_TZ: - temp_cost += tz_search(state, - width, height, - frame->source, - ref_image, - &orig, - &mv, - mv_cand, - merge_cand, - num_cand, - ref_idx, - &temp_bitcost); + tz_search(info, mv); break; - case KVZ_IME_FULL64: case KVZ_IME_FULL32: case KVZ_IME_FULL16: case KVZ_IME_FULL8: case KVZ_IME_FULL: - temp_cost += search_mv_full(state, - width, height, - frame->source, - ref_image, - &orig, - &mv, - mv_cand, - merge_cand, - num_cand, - ref_idx, - search_range, - &temp_bitcost); + search_mv_full(info, search_range, mv); break; default: - temp_cost += hexagon_search(state, - width, height, - frame->source, - ref_image, - &orig, - &mv, - mv_cand, - merge_cand, - num_cand, - ref_idx, - &temp_bitcost); + hexagon_search(info, mv); break; } - if (state->encoder_control->cfg.fme_level > 0 && temp_cost < *inter_cost) { - temp_cost = search_frac(state, - width, height, - frame->source, - ref_image, - &orig, - &mv, - mv_cand, - merge_cand, - num_cand, - ref_idx, - &temp_bitcost); + if (cfg->fme_level > 0 && info->best_cost < *inter_cost) { + search_frac(info); + + } else if (info->best_cost < UINT32_MAX) { + // Recalculate inter cost with SATD. + info->best_cost = kvz_image_calc_satd( + info->state->tile->frame->source, + info->ref, + info->origin.x, + info->origin.y, + info->state->tile->offset_x + info->origin.x + (info->best_mv.x >> 2), + info->state->tile->offset_y + info->origin.y + (info->best_mv.y >> 2), + info->width, + info->height); + info->best_cost += info->best_bitcost * (int)(info->state->lambda_sqrt + 0.5); } - - merged = 0; + + mv = info->best_mv; + + int merged = 0; + int merge_idx = 0; // Check every candidate to find a match - for(merge_idx = 0; merge_idx < num_cand; merge_idx++) { - if (merge_cand[merge_idx].dir != 3 && - merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == mv.x && - merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == mv.y && - (uint32_t)merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] == ref_idx) { + for (merge_idx = 0; merge_idx < info->num_merge_cand; merge_idx++) { + if (info->merge_cand[merge_idx].dir != 3 && + info->merge_cand[merge_idx].mv[info->merge_cand[merge_idx].dir - 1][0] == mv.x && + info->merge_cand[merge_idx].mv[info->merge_cand[merge_idx].dir - 1][1] == mv.y && + (uint32_t)info->state->frame->ref_LX[info->merge_cand[merge_idx].dir - 1][ + info->merge_cand[merge_idx].ref[info->merge_cand[merge_idx].dir - 1]] == info->ref_idx) + { merged = 1; break; } } // Only check when candidates are different - if (!merged && (mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1])) { + int cu_mv_cand = 0; + if (!merged && ( + info->mv_cand[0][0] != info->mv_cand[1][0] || + info->mv_cand[0][1] != info->mv_cand[1][1])) + { + uint32_t (*mvd_coding_cost)(const encoder_state_t * const state, + vector2d_t *, + const cabac_data_t*) = + cfg->mv_rdo ? kvz_get_mvd_coding_cost_cabac : get_mvd_coding_cost; + vector2d_t mvd_temp1, mvd_temp2; int cand1_cost,cand2_cost; - mvd_temp1.x = mv.x - mv_cand[0][0]; - mvd_temp1.y = mv.y - mv_cand[0][1]; - cand1_cost = get_mvd_cost(state, &mvd_temp1, &state->cabac); + mvd_temp1.x = mv.x - info->mv_cand[0][0]; + mvd_temp1.y = mv.y - info->mv_cand[0][1]; + cand1_cost = mvd_coding_cost(info->state, &mvd_temp1, &info->state->cabac); - mvd_temp2.x = mv.x - mv_cand[1][0]; - mvd_temp2.y = mv.y - mv_cand[1][1]; - cand2_cost = get_mvd_cost(state, &mvd_temp2, &state->cabac); + mvd_temp2.x = mv.x - info->mv_cand[1][0]; + mvd_temp2.y = mv.y - info->mv_cand[1][1]; + cand2_cost = mvd_coding_cost(info->state, &mvd_temp2, &info->state->cabac); // Select candidate 1 if it has lower cost if (cand2_cost < cand1_cost) { @@ -1373,20 +1229,21 @@ } } - if (temp_cost < *inter_cost) { + if (info->best_cost < *inter_cost) { // Map reference index to L0/L1 pictures cur_cu->inter.mv_dir = ref_list+1; - uint8_t mv_ref_coded = state->frame->refmap[ref_idx].idx; + uint8_t mv_ref_coded = LX_idx; + + cur_cu->merged = merged; + cur_cu->merge_idx = merge_idx; + cur_cu->inter.mv_ref[ref_list] = LX_idx; + cur_cu->inter.mv[ref_list][0] = (int16_t)mv.x; + cur_cu->inter.mv[ref_list][1] = (int16_t)mv.y; - cur_cu->merged = merged; - cur_cu->merge_idx = merge_idx; - cur_cu->inter.mv_ref[ref_list] = ref_idx; - cur_cu->inter.mv[ref_list][0] = (int16_t)mv.x; - cur_cu->inter.mv[ref_list][1] = (int16_t)mv.y; CU_SET_MV_CAND(cur_cu, ref_list, cu_mv_cand); - *inter_cost = temp_cost; - *inter_bitcost = temp_bitcost + cur_cu->inter.mv_dir - 1 + mv_ref_coded; + *inter_cost = info->best_cost; + *inter_bitcost = info->best_bitcost + cur_cu->inter.mv_dir - 1 + mv_ref_coded; } } @@ -1417,6 +1274,7 @@ *inter_cost = MAX_INT; *inter_bitcost = MAX_INT; + const kvz_config *cfg = &state->encoder_control->cfg; const videoframe_t * const frame = state->tile->frame; const int width_cu = LCU_WIDTH >> depth; const int x = PU_GET_X(part_mode, width_cu, x_cu, i_pu); @@ -1435,58 +1293,39 @@ const int y_local = SUB_SCU(y); cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); - int16_t mv_cand[2][2]; - // Search for merge mode candidate - inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS]; - // Get list of candidates - int16_t num_cand = 0; - if (!state->encoder_control->cfg.tmvp_enable) { - num_cand = kvz_inter_get_merge_cand(state, - x, y, - width, height, - merge_a1, merge_b1, - merge_cand, - lcu, - 0); - } + inter_search_info_t info = { + .state = state, + .pic = frame->source, + .origin = { x, y }, + .width = width, + .height = height, + .mvd_cost_func = cfg->mv_rdo ? kvz_calc_mvd_cost_cabac : calc_mvd_cost, + }; - uint32_t(*get_mvd_cost)(encoder_state_t * const state, vector2d_t *, const cabac_data_t*) = get_mvd_coding_cost; - if (state->encoder_control->cfg.mv_rdo) { - get_mvd_cost = kvz_get_mvd_coding_cost_cabac; - } + // Search for merge mode candidates + info.num_merge_cand = kvz_inter_get_merge_cand( + state, + x, y, + width, height, + merge_a1, merge_b1, + info.merge_cand, + lcu + ); // Default to candidate 0 CU_SET_MV_CAND(cur_cu, 0, 0); CU_SET_MV_CAND(cur_cu, 1, 0); - uint32_t ref_idx; - for (ref_idx = 0; ref_idx < state->frame->ref->used_size; ref_idx++) { - if (state->encoder_control->cfg.tmvp_enable) { - // Get list of candidates, TMVP required MV scaling for each reference - num_cand = kvz_inter_get_merge_cand(state, - x, y, - width, height, - merge_a1, merge_b1, - merge_cand, - lcu, - ref_idx); - } - search_pu_inter_ref(state, - x, y, - width, height, - depth, - lcu, cur_cu, - mv_cand, merge_cand, - num_cand, - ref_idx, - get_mvd_cost, - inter_cost, - inter_bitcost); + for (int ref_idx = 0; ref_idx < state->frame->ref->used_size; ref_idx++) { + info.ref_idx = ref_idx; + info.ref = state->frame->ref->images[ref_idx]; + + search_pu_inter_ref(&info, depth, lcu, cur_cu, inter_cost, inter_bitcost); } // Search bi-pred positions bool can_use_bipred = state->frame->slicetype == KVZ_SLICE_B - && state->encoder_control->cfg.bipred + && cfg->bipred && width + height >= 16; // 4x8 and 8x4 PBs are restricted to unipred if (can_use_bipred) { @@ -1494,50 +1333,50 @@ unsigned cu_width = LCU_WIDTH >> depth; static const uint8_t priorityList0[] = { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 }; static const uint8_t priorityList1[] = { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 }; - const unsigned num_cand_pairs = MIN(num_cand * (num_cand - 1), 12); + const unsigned num_cand_pairs = + MIN(info.num_merge_cand * (info.num_merge_cand - 1), 12); - kvz_mvd_cost_func *calc_mvd = calc_mvd_cost; - if (state->encoder_control->cfg.mv_rdo) { - calc_mvd = kvz_calc_mvd_cost_cabac; - } + inter_merge_cand_t *merge_cand = info.merge_cand; for (int32_t idx = 0; idx < num_cand_pairs; idx++) { uint8_t i = priorityList0[idx]; uint8_t j = priorityList1[idx]; - if (i >= num_cand || j >= num_cand) break; + if (i >= info.num_merge_cand || j >= info.num_merge_cand) break; // Find one L0 and L1 candidate according to the priority list if ((merge_cand[i].dir & 0x1) && (merge_cand[j].dir & 0x2)) { - if (merge_cand[i].ref[0] != merge_cand[j].ref[1] || - merge_cand[i].mv[0][0] != merge_cand[j].mv[1][0] || - merge_cand[i].mv[0][1] != merge_cand[j].mv[1][1]) { + if (state->frame->ref_LX[0][merge_cand[i].ref[0]] != + state->frame->ref_LX[1][merge_cand[j].ref[1]] || + + merge_cand[i].mv[0][0] != merge_cand[j].mv[1][0] || + merge_cand[i].mv[0][1] != merge_cand[j].mv[1][1]) + { uint32_t bitcost[2]; uint32_t cost = 0; int8_t cu_mv_cand = 0; int16_t mv[2][2]; kvz_pixel tmp_block[64 * 64]; kvz_pixel tmp_pic[64 * 64]; - // Force L0 and L1 references - if (state->frame->refmap[merge_cand[i].ref[0]].list == 2 || state->frame->refmap[merge_cand[j].ref[1]].list == 1) continue; mv[0][0] = merge_cand[i].mv[0][0]; mv[0][1] = merge_cand[i].mv[0][1]; mv[1][0] = merge_cand[j].mv[1][0]; mv[1][1] = merge_cand[j].mv[1][1]; + // Don't try merge candidates that don't satisfy mv constraints. + if (!fracmv_within_tile(&info, mv[0][0], mv[0][1]) || + !fracmv_within_tile(&info, mv[1][0], mv[1][1])) { - // Don't try merge candidates that don't satisfy mv constraints. - vector2d_t orig = { x, y }; - if (!fracmv_within_tile(state, &orig, mv[0][0], mv[0][1], width, height, -1) || - !fracmv_within_tile(state, &orig, mv[1][0], mv[1][1], width, height, -1)) - { - continue; - } + continue; } kvz_inter_recon_lcu_bipred(state, - state->frame->ref->images[merge_cand[i].ref[0]], - state->frame->ref->images[merge_cand[j].ref[1]], + state->frame->ref->images[ + state->frame->ref_LX[0][merge_cand[i].ref[0]] + ], + state->frame->ref->images[ + state->frame->ref_LX[1][merge_cand[j].ref[1]] + ], x, y, width, height, @@ -1555,16 +1394,31 @@ cost = kvz_satd_any_size(cu_width, cu_width, tmp_pic, cu_width, tmp_block, cu_width); - cost += calc_mvd(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]); - cost += calc_mvd(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]); + cost += info.mvd_cost_func(state, + merge_cand[i].mv[0][0], + merge_cand[i].mv[0][1], + 0, + info.mv_cand, + NULL, 0, 0, + &bitcost[0]); + cost += info.mvd_cost_func(state, + merge_cand[i].mv[1][0], + merge_cand[i].mv[1][1], + 0, + info.mv_cand, + NULL, 0, 0, + &bitcost[1]); + + const uint8_t mv_ref_coded[2] = { + merge_cand[i].ref[0], + merge_cand[j].ref[1] + }; + const int extra_bits = mv_ref_coded[0] + mv_ref_coded[1] + 2 /* mv dir cost */; + cost += state->lambda_sqrt * extra_bits + 0.5; - if (cost < *inter_cost) { + if (cost < *inter_cost) { cur_cu->inter.mv_dir = 3; - uint8_t mv_ref_coded[2] = { - state->frame->refmap[merge_cand[i].ref[0]].idx, - state->frame->refmap[merge_cand[j].ref[1]].idx - }; cur_cu->inter.mv_ref[0] = merge_cand[i].ref[0]; cur_cu->inter.mv_ref[1] = merge_cand[j].ref[1]; @@ -1574,16 +1428,16 @@ cur_cu->inter.mv[1][0] = merge_cand[j].mv[1][0]; cur_cu->inter.mv[1][1] = merge_cand[j].mv[1][1]; cur_cu->merged = 0; - + // Check every candidate to find a match - for(int merge_idx = 0; merge_idx < num_cand; merge_idx++) { - if ( - merge_cand[merge_idx].mv[0][0] == cur_cu->inter.mv[0][0] && - merge_cand[merge_idx].mv[0][1] == cur_cu->inter.mv[0][1] && + for (int merge_idx = 0; merge_idx < info.num_merge_cand; merge_idx++) { + if (merge_cand[merge_idx].mv[0][0] == cur_cu->inter.mv[0][0] && + merge_cand[merge_idx].mv[0][1] == cur_cu->inter.mv[0][1] && merge_cand[merge_idx].mv[1][0] == cur_cu->inter.mv[1][0] && - merge_cand[merge_idx].mv[1][1] == cur_cu->inter.mv[1][1] && - merge_cand[merge_idx].ref[0] == cur_cu->inter.mv_ref[0] && - merge_cand[merge_idx].ref[1] == cur_cu->inter.mv_ref[1]) { + merge_cand[merge_idx].mv[1][1] == cur_cu->inter.mv[1][1] && + merge_cand[merge_idx].ref[0] == cur_cu->inter.mv_ref[0] && + merge_cand[merge_idx].ref[1] == cur_cu->inter.mv_ref[1]) + { cur_cu->merged = 1; cur_cu->merge_idx = merge_idx; break; @@ -1593,28 +1447,36 @@ // Each motion vector has its own candidate for (int reflist = 0; reflist < 2; reflist++) { cu_mv_cand = 0; - kvz_inter_get_mv_cand(state, x, y, width, height, mv_cand, cur_cu, lcu, reflist); - if ((mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1])) { + kvz_inter_get_mv_cand(state, x, y, width, height, info.mv_cand, cur_cu, lcu, reflist); + if (info.mv_cand[0][0] != info.mv_cand[1][0] || + info.mv_cand[0][1] != info.mv_cand[1][1]) + { + uint32_t (*mvd_coding_cost)(const encoder_state_t * const state, + vector2d_t *, + const cabac_data_t*) = + cfg->mv_rdo ? kvz_get_mvd_coding_cost_cabac : get_mvd_coding_cost; + vector2d_t mvd_temp1, mvd_temp2; int cand1_cost, cand2_cost; - mvd_temp1.x = cur_cu->inter.mv[reflist][0] - mv_cand[0][0]; - mvd_temp1.y = cur_cu->inter.mv[reflist][1] - mv_cand[0][1]; - cand1_cost = get_mvd_cost(state, &mvd_temp1, (cabac_data_t*)&state->cabac); + mvd_temp1.x = cur_cu->inter.mv[reflist][0] - info.mv_cand[0][0]; + mvd_temp1.y = cur_cu->inter.mv[reflist][1] - info.mv_cand[0][1]; + cand1_cost = mvd_coding_cost(state, &mvd_temp1, (cabac_data_t*)&state->cabac); - mvd_temp2.x = cur_cu->inter.mv[reflist][0] - mv_cand[1][0]; - mvd_temp2.y = cur_cu->inter.mv[reflist][1] - mv_cand[1][1]; - cand2_cost = get_mvd_cost(state, &mvd_temp2, (cabac_data_t*)&state->cabac); + mvd_temp2.x = cur_cu->inter.mv[reflist][0] - info.mv_cand[1][0]; + mvd_temp2.y = cur_cu->inter.mv[reflist][1] - info.mv_cand[1][1]; + cand2_cost = mvd_coding_cost(state, &mvd_temp2, (cabac_data_t*)&state->cabac); // Select candidate 1 if it has lower cost if (cand2_cost < cand1_cost) { - cu_mv_cand = 1; + cu_mv_cand = 1; } } CU_SET_MV_CAND(cur_cu, reflist, cu_mv_cand); } + *inter_cost = cost; - *inter_bitcost = bitcost[0] + bitcost[1] + cur_cu->inter.mv_dir - 1 + mv_ref_coded[0] + mv_ref_coded[1]; + *inter_bitcost = bitcost[0] + bitcost[1] + extra_bits; } } } @@ -1622,11 +1484,8 @@ FREE_POINTER(templcu); } - if (*inter_cost < INT_MAX) { - const vector2d_t orig = { x, y }; - if (cur_cu->inter.mv_dir == 1) { - assert(fracmv_within_tile(state, &orig, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1], width, height, -1)); - } + if (*inter_cost < INT_MAX && cur_cu->inter.mv_dir == 1) { + assert(fracmv_within_tile(&info, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1])); } } @@ -1707,6 +1566,13 @@ search_pu_inter(state, x, y, depth, part_mode, i, lcu, &cost, &bitcost); + if (cost >= MAX_INT) { + // Could not find any motion vector. + *inter_cost = MAX_INT; + *inter_bitcost = MAX_INT; + return; + } + *inter_cost += cost; *inter_bitcost += bitcost;
View file
kvazaar-1.1.0.tar.gz/src/search_inter.h -> kvazaar-1.2.0.tar.gz/src/search_inter.h
Changed
@@ -50,14 +50,14 @@ HPEL_POS_DIA = 2 }; -typedef int kvz_mvd_cost_func(encoder_state_t * const state, - int x, int y, - int mv_shift, - int16_t mv_cand[2][2], - inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], - int16_t num_cand, - int32_t ref_idx, - uint32_t *bitcost); +typedef uint32_t kvz_mvd_cost_func(const encoder_state_t *state, + int x, int y, + int mv_shift, + int16_t mv_cand[2][2], + inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], + int16_t num_cand, + int32_t ref_idx, + uint32_t *bitcost); void kvz_search_cu_inter(encoder_state_t * const state, int x, int y, int depth, @@ -73,4 +73,10 @@ double *inter_cost, uint32_t *inter_bitcost); + +unsigned kvz_inter_satd_cost(const encoder_state_t* state, + const lcu_t *lcu, + int x, + int y); + #endif // SEARCH_INTER_H_
View file
kvazaar-1.1.0.tar.gz/src/search_intra.c -> kvazaar-1.2.0.tar.gz/src/search_intra.c
Changed
@@ -220,15 +220,20 @@ nosplit_cost = 0.0; cbf_clear(&pred_cu->cbf, depth, COLOR_Y); - - kvz_intra_recon_lcu_luma(state, x_px, y_px, depth, intra_mode, pred_cu, lcu); - nosplit_cost += kvz_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); - if (reconstruct_chroma) { cbf_clear(&pred_cu->cbf, depth, COLOR_U); cbf_clear(&pred_cu->cbf, depth, COLOR_V); + } - kvz_intra_recon_lcu_chroma(state, x_px, y_px, depth, intra_mode, pred_cu, lcu); + const int8_t chroma_mode = reconstruct_chroma ? intra_mode : -1; + kvz_intra_recon_cu(state, + x_px, y_px, + depth, + intra_mode, chroma_mode, + pred_cu, lcu); + + nosplit_cost += kvz_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); + if (reconstruct_chroma) { nosplit_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); } @@ -697,7 +702,11 @@ for (int8_t chroma_mode_i = 0; chroma_mode_i < num_modes; ++chroma_mode_i) { chroma.mode = modes[chroma_mode_i]; - kvz_intra_recon_lcu_chroma(state, x_px, y_px, depth, chroma.mode, NULL, lcu); + kvz_intra_recon_cu(state, + x_px, y_px, + depth, + -1, chroma.mode, // skip luma + NULL, lcu); chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu); double mode_bits = kvz_chroma_mode_bits(state, chroma.mode, intra_mode); @@ -836,7 +845,7 @@ // Set transform depth to current depth, meaning no transform splits. kvz_lcu_set_trdepth(lcu, x_px, y_px, depth, depth); - + double best_rough_cost = costs[select_best_mode_index(modes, costs, number_of_modes)]; // Refine results with slower search or get some results if rough search was skipped. const int32_t rdo_level = state->encoder_control->cfg.rdo; if (rdo_level >= 2 || skip_rough_search) { @@ -844,7 +853,7 @@ if (rdo_level == 3) { number_of_modes_to_search = 35; } else if (rdo_level == 2) { - number_of_modes_to_search = (cu_width <= 8) ? 8 : 3; + number_of_modes_to_search = (cu_width == 4) ? 3 : 2; } else { // Check only the predicted modes. number_of_modes_to_search = 0; @@ -863,5 +872,5 @@ uint8_t best_mode_i = select_best_mode_index(modes, costs, number_of_modes); *mode_out = modes[best_mode_i]; - *cost_out = costs[best_mode_i]; + *cost_out = skip_rough_search ? costs[best_mode_i]:best_rough_cost; }
View file
kvazaar-1.1.0.tar.gz/src/strategies/avx2/ipol-avx2.c -> kvazaar-1.2.0.tar.gz/src/strategies/avx2/ipol-avx2.c
Changed
@@ -1384,7 +1384,9 @@ int sample_out_of_bounds = out_of_bounds_y || out_of_bounds_x; if (sample_out_of_bounds){ - out->buffer = MALLOC(kvz_pixel, (width + filter_size) * (height + filter_size)); + // Alloc 5 pixels more than we actually use because AVX2 filter + // functions read up to 5 pixels past the last pixel. + out->buffer = MALLOC(kvz_pixel, (width + filter_size) * (height + filter_size) + 5); if (!out->buffer){ fprintf(stderr, "Memory allocation failed!\n"); assert(0);
View file
kvazaar-1.1.0.tar.gz/src/strategies/avx2/quant-avx2.c -> kvazaar-1.2.0.tar.gz/src/strategies/avx2/quant-avx2.c
Changed
@@ -343,7 +343,7 @@ * \param color Color. * \param scan_order Coefficient scan order. * \param use_trskip Whether transform skip is used. -* \param stride Stride for ref_in, pred_in rec_out and coeff_out. +* \param stride Stride for ref_in, pred_in and rec_out. * \param ref_in Reference pixels. * \param pred_in Predicted pixels. * \param rec_out Reconstructed pixels. @@ -360,7 +360,6 @@ { // Temporary arrays to pass data to and from kvz_quant and transform functions. int16_t residual[TR_MAX_WIDTH * TR_MAX_WIDTH]; - coeff_t quant_coeff[TR_MAX_WIDTH * TR_MAX_WIDTH]; coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH]; int has_coeffs = 0; @@ -379,35 +378,32 @@ kvz_transform2d(state->encoder_control, residual, coeff, width, (color == COLOR_Y ? 0 : 65535)); } - // Quantize coeffs. (coeff -> quant_coeff) + // Quantize coeffs. (coeff -> coeff_out) if (state->encoder_control->cfg.rdoq_enable && (width > 4 || !state->encoder_control->cfg.rdoq_skip)) { int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth; tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0); - kvz_rdoq(state, coeff, quant_coeff, width, width, (color == COLOR_Y ? 0 : 2), + kvz_rdoq(state, coeff, coeff_out, width, width, (color == COLOR_Y ? 0 : 2), scan_order, cur_cu->type, tr_depth); } else { - kvz_quant(state, coeff, quant_coeff, width, width, (color == COLOR_Y ? 0 : 2), + kvz_quant(state, coeff, coeff_out, width, width, (color == COLOR_Y ? 0 : 2), scan_order, cur_cu->type); } // Check if there are any non-zero coefficients. for (int i = 0; i < width * width; i += 8) { - __m128i v_quant_coeff = _mm_loadu_si128((__m128i*)&(quant_coeff[i])); + __m128i v_quant_coeff = _mm_loadu_si128((__m128i*)&(coeff_out[i])); has_coeffs = !_mm_testz_si128(_mm_set1_epi8(0xFF), v_quant_coeff); if(has_coeffs) break; } - // Copy coefficients to coeff_out. - kvz_coefficients_blit(quant_coeff, coeff_out, width, width, width, out_stride); - // Do the inverse quantization and transformation and the reconstruction to // rec_out. if (has_coeffs) { - // Get quantized residual. (quant_coeff -> coeff -> residual) - kvz_dequant(state, quant_coeff, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)), cur_cu->type); + // Get quantized residual. (coeff_out -> coeff -> residual) + kvz_dequant(state, coeff_out, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)), cur_cu->type); if (use_trskip) { kvz_itransformskip(state->encoder_control, residual, coeff, width); } @@ -506,8 +502,29 @@ } } -#endif //COMPILE_INTEL_AVX2 && defined X86_64 +static uint32_t coeff_abs_sum_avx2(const coeff_t *coeffs, const size_t length) +{ + assert(length % 8 == 0); + + __m256i total = _mm256_abs_epi32(_mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*) coeffs))); + + for (int i = 8; i < length; i += 8) { + __m256i temp = _mm256_abs_epi32(_mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*) &coeffs[i]))); + total = _mm256_add_epi32(total, temp); + } + __m128i result128 = _mm_add_epi32( + _mm256_castsi256_si128(total), + _mm256_extractf128_si256(total, 1) + ); + + uint32_t parts[4]; + _mm_storeu_si128((__m128i*) parts, result128); + + return parts[0] + parts[1] + parts[2] + parts[3]; +} + +#endif //COMPILE_INTEL_AVX2 && defined X86_64 int kvz_strategy_register_quant_avx2(void* opaque, uint8_t bitdepth) { @@ -519,6 +536,7 @@ success &= kvz_strategyselector_register(opaque, "quantize_residual", "avx2", 40, &kvz_quantize_residual_avx2); success &= kvz_strategyselector_register(opaque, "dequant", "avx2", 40, &kvz_dequant_avx2); } + success &= kvz_strategyselector_register(opaque, "coeff_abs_sum", "avx2", 0, &coeff_abs_sum_avx2); #endif //COMPILE_INTEL_AVX2 && defined X86_64 return success;
View file
kvazaar-1.1.0.tar.gz/src/strategies/avx2/sao-avx2.c -> kvazaar-1.2.0.tar.gz/src/strategies/avx2/sao-avx2.c
Changed
@@ -36,18 +36,13 @@ // is difficult to understand. -static INLINE __m256i load_6_offsets(const int* offsets){ - - return _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((__m128i*) offsets)), _mm_loadl_epi64((__m128i*)&(offsets[4])), 1); -} - -static INLINE __m128i load_6_pixels(const kvz_pixel* data){ - +static INLINE __m128i load_6_pixels(const kvz_pixel* data) +{ return _mm_insert_epi16(_mm_cvtsi32_si128(*(int32_t*)&(data[0])), *(int16_t*)&(data[4]), 2); } -static INLINE __m256i load_5_offsets(const int* offsets){ - +static INLINE __m256i load_5_offsets(const int* offsets) +{ return _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((__m128i*) offsets)), _mm_insert_epi32(_mm_setzero_si128(), offsets[4], 0), 1); } @@ -73,9 +68,12 @@ } -int kvz_sao_edge_ddistortion_avx2(const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int block_width, int block_height, - int eo_class, int offsets[NUM_SAO_EDGE_CATEGORIES]) +static int sao_edge_ddistortion_avx2(const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int block_width, + int block_height, + int eo_class, + int offsets[NUM_SAO_EDGE_CATEGORIES]) { int y, x; int sum = 0; @@ -96,7 +94,7 @@ __m256i v_cat = _mm256_cvtepu8_epi32(sao_calc_eo_cat_avx2(&v_a, &v_b, &v_c)); - __m256i v_offset = _mm256_loadu_si256((__m256i*) offsets); + __m256i v_offset = load_5_offsets(offsets); v_offset = _mm256_permutevar8x32_epi32(v_offset, v_cat); __m256i v_diff = _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)&(orig_data[y * block_width + x]))); @@ -117,7 +115,7 @@ __m256i v_cat = _mm256_cvtepu8_epi32(sao_calc_eo_cat_avx2(&v_a, &v_b, &v_c)); - __m256i v_offset = load_6_offsets(offsets); + __m256i v_offset = load_5_offsets(offsets); v_offset = _mm256_permutevar8x32_epi32(v_offset, v_cat); const kvz_pixel* orig_ptr = &(orig_data[y * block_width + x]); @@ -139,7 +137,12 @@ } -static INLINE void accum_count_eo_cat_avx2(__m256i* __restrict v_diff_accum, __m256i* __restrict v_count, __m256i* __restrict v_cat, __m256i* __restrict v_diff, int eo_cat){ +static INLINE void accum_count_eo_cat_avx2(__m256i* __restrict v_diff_accum, + __m256i* __restrict v_count, + __m256i* __restrict v_cat, + __m256i* __restrict v_diff, + int eo_cat) +{ __m256i v_mask = _mm256_cmpeq_epi32(*v_cat, _mm256_set1_epi32(eo_cat)); *v_diff_accum = _mm256_add_epi32(*v_diff_accum, _mm256_and_si256(*v_diff, v_mask)); *v_count = _mm256_sub_epi32(*v_count, v_mask); @@ -151,9 +154,12 @@ accum_count_eo_cat_avx2(&(v_diff_accum[ EO_CAT ]), &(v_count[ EO_CAT ]), &V_CAT , &v_diff, EO_CAT); -void kvz_calc_sao_edge_dir_avx2(const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int eo_class, int block_width, int block_height, - int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) +static void calc_sao_edge_dir_avx2(const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int eo_class, + int block_width, + int block_height, + int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) { int y, x; vector2d_t a_ofs = g_sao_edge_offsets[eo_class][0]; @@ -240,30 +246,29 @@ } -void kvz_sao_reconstruct_color_avx2(const encoder_control_t * const encoder, - const kvz_pixel *rec_data, kvz_pixel *new_rec_data, - const sao_info_t *sao, - int stride, int new_stride, - int block_width, int block_height, - color_t color_i) +static void sao_reconstruct_color_avx2(const encoder_control_t * const encoder, + const kvz_pixel *rec_data, kvz_pixel *new_rec_data, + const sao_info_t *sao, + int stride, int new_stride, + int block_width, int block_height, + color_t color_i) { - int y, x; // Arrays orig_data and rec_data are quarter size for chroma. int offset_v = color_i == COLOR_V ? 5 : 0; - if(sao->type == SAO_TYPE_BAND) { - int offsets[1<<KVZ_BIT_DEPTH]; + if (sao->type == SAO_TYPE_BAND) { + int offsets[1 << KVZ_BIT_DEPTH]; kvz_calc_sao_offset_array(encoder, sao, offsets, color_i); - for (y = 0; y < block_height; ++y) { - for (x = 0; x < block_width; ++x) { + for (int y = 0; y < block_height; ++y) { + for (int x = 0; x < block_width; ++x) { new_rec_data[y * new_stride + x] = offsets[rec_data[y * stride + x]]; } } } else { // Don't sample the edge pixels because this function doesn't have access to // their neighbours. - for (y = 0; y < block_height; ++y) { - for (x = 0; x < block_width; x+=8) { + for (int y = 0; y < block_height; ++y) { + for (int x = 0; x < block_width; x+=8) { vector2d_t a_ofs = g_sao_edge_offsets[sao->eo_class][0]; vector2d_t b_ofs = g_sao_edge_offsets[sao->eo_class][1]; const kvz_pixel *c_data = &rec_data[y * stride + x]; @@ -299,9 +304,13 @@ } -int kvz_sao_band_ddistortion_avx2(const encoder_state_t * const state, const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int block_width, int block_height, - int band_pos, int sao_bands[4]) +static int sao_band_ddistortion_avx2(const encoder_state_t * const state, + const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int block_width, + int block_height, + int band_pos, + int sao_bands[4]) { int y, x; int shift = state->encoder_control->bitdepth-5; @@ -348,10 +357,10 @@ bool success = true; #if COMPILE_INTEL_AVX2 if (bitdepth == 8) { - success &= kvz_strategyselector_register(opaque, "sao_edge_ddistortion", "avx2", 40, &kvz_sao_edge_ddistortion_avx2); - success &= kvz_strategyselector_register(opaque, "calc_sao_edge_dir", "avx2", 40, &kvz_calc_sao_edge_dir_avx2); - success &= kvz_strategyselector_register(opaque, "sao_reconstruct_color", "avx2", 40, &kvz_sao_reconstruct_color_avx2); - success &= kvz_strategyselector_register(opaque, "sao_band_ddistortion", "avx2", 40, &kvz_sao_band_ddistortion_avx2); + success &= kvz_strategyselector_register(opaque, "sao_edge_ddistortion", "avx2", 40, &sao_edge_ddistortion_avx2); + success &= kvz_strategyselector_register(opaque, "calc_sao_edge_dir", "avx2", 40, &calc_sao_edge_dir_avx2); + success &= kvz_strategyselector_register(opaque, "sao_reconstruct_color", "avx2", 40, &sao_reconstruct_color_avx2); + success &= kvz_strategyselector_register(opaque, "sao_band_ddistortion", "avx2", 40, &sao_band_ddistortion_avx2); } #endif //COMPILE_INTEL_AVX2 return success;
View file
kvazaar-1.1.0.tar.gz/src/strategies/generic/quant-generic.c -> kvazaar-1.2.0.tar.gz/src/strategies/generic/quant-generic.c
Changed
@@ -169,7 +169,7 @@ * \param color Color. * \param scan_order Coefficient scan order. * \param use_trskip Whether transform skip is used. -* \param stride Stride for ref_in, pred_in rec_out and coeff_out. +* \param stride Stride for ref_in, pred_in and rec_out. * \param ref_in Reference pixels. * \param pred_in Predicted pixels. * \param rec_out Reconstructed pixels. @@ -186,7 +186,6 @@ { // Temporary arrays to pass data to and from kvz_quant and transform functions. int16_t residual[TR_MAX_WIDTH * TR_MAX_WIDTH]; - coeff_t quant_coeff[TR_MAX_WIDTH * TR_MAX_WIDTH]; coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH]; int has_coeffs = 0; @@ -212,16 +211,16 @@ kvz_transform2d(state->encoder_control, residual, coeff, width, (color == COLOR_Y ? 0 : 65535)); } - // Quantize coeffs. (coeff -> quant_coeff) + // Quantize coeffs. (coeff -> coeff_out) if (state->encoder_control->cfg.rdoq_enable && (width > 4 || !state->encoder_control->cfg.rdoq_skip)) { int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth; tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0); - kvz_rdoq(state, coeff, quant_coeff, width, width, (color == COLOR_Y ? 0 : 2), + kvz_rdoq(state, coeff, coeff_out, width, width, (color == COLOR_Y ? 0 : 2), scan_order, cur_cu->type, tr_depth); } else { - kvz_quant(state, coeff, quant_coeff, width, width, (color == COLOR_Y ? 0 : 2), + kvz_quant(state, coeff, coeff_out, width, width, (color == COLOR_Y ? 0 : 2), scan_order, cur_cu->type); } @@ -229,23 +228,20 @@ { int i; for (i = 0; i < width * width; ++i) { - if (quant_coeff[i] != 0) { + if (coeff_out[i] != 0) { has_coeffs = 1; break; } } } - // Copy coefficients to coeff_out. - kvz_coefficients_blit(quant_coeff, coeff_out, width, width, width, out_stride); - // Do the inverse quantization and transformation and the reconstruction to // rec_out. if (has_coeffs) { int y, x; - // Get quantized residual. (quant_coeff -> coeff -> residual) - kvz_dequant(state, quant_coeff, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)), cur_cu->type); + // Get quantized residual. (coeff_out -> coeff -> residual) + kvz_dequant(state, coeff_out, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)), cur_cu->type); if (use_trskip) { kvz_itransformskip(state->encoder_control, residual, coeff, width); } @@ -324,6 +320,15 @@ } } +static uint32_t coeff_abs_sum_generic(const coeff_t *coeffs, size_t length) +{ + uint32_t sum = 0; + for (int i = 0; i < length; i++) { + sum += abs(coeffs[i]); + } + return sum; +} + int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth) { bool success = true; @@ -331,6 +336,7 @@ success &= kvz_strategyselector_register(opaque, "quant", "generic", 0, &kvz_quant_generic); success &= kvz_strategyselector_register(opaque, "quantize_residual", "generic", 0, &kvz_quantize_residual_generic); success &= kvz_strategyselector_register(opaque, "dequant", "generic", 0, &kvz_dequant_generic); + success &= kvz_strategyselector_register(opaque, "coeff_abs_sum", "generic", 0, &coeff_abs_sum_generic); return success; }
View file
kvazaar-1.1.0.tar.gz/src/strategies/generic/sao-generic.c -> kvazaar-1.2.0.tar.gz/src/strategies/generic/sao-generic.c
Changed
@@ -40,9 +40,12 @@ } -int kvz_sao_edge_ddistortion_generic(const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int block_width, int block_height, - int eo_class, int offsets[NUM_SAO_EDGE_CATEGORIES]) +static int sao_edge_ddistortion_generic(const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int block_width, + int block_height, + int eo_class, + int offsets[NUM_SAO_EDGE_CATEGORIES]) { int y, x; int sum = 0; @@ -76,9 +79,12 @@ * \param dir_offsets * \param is_chroma 0 for luma, 1 for chroma. Indicates */ -void kvz_calc_sao_edge_dir_generic(const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int eo_class, int block_width, int block_height, - int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) +static void calc_sao_edge_dir_generic(const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int eo_class, + int block_width, + int block_height, + int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) { int y, x; vector2d_t a_ofs = g_sao_edge_offsets[eo_class][0]; @@ -103,30 +109,32 @@ } -void kvz_sao_reconstruct_color_generic(const encoder_control_t * const encoder, - const kvz_pixel *rec_data, kvz_pixel *new_rec_data, - const sao_info_t *sao, - int stride, int new_stride, - int block_width, int block_height, - color_t color_i) +static void sao_reconstruct_color_generic(const encoder_control_t * const encoder, + const kvz_pixel *rec_data, + kvz_pixel *new_rec_data, + const sao_info_t *sao, + int stride, + int new_stride, + int block_width, + int block_height, + color_t color_i) { - int y, x; // Arrays orig_data and rec_data are quarter size for chroma. int offset_v = color_i == COLOR_V ? 5 : 0; - if(sao->type == SAO_TYPE_BAND) { + if (sao->type == SAO_TYPE_BAND) { int offsets[1<<KVZ_BIT_DEPTH]; kvz_calc_sao_offset_array(encoder, sao, offsets, color_i); - for (y = 0; y < block_height; ++y) { - for (x = 0; x < block_width; ++x) { + for (int y = 0; y < block_height; ++y) { + for (int x = 0; x < block_width; ++x) { new_rec_data[y * new_stride + x] = offsets[rec_data[y * stride + x]]; } } } else { // Don't sample the edge pixels because this function doesn't have access to // their neighbours. - for (y = 0; y < block_height; ++y) { - for (x = 0; x < block_width; ++x) { + for (int y = 0; y < block_height; ++y) { + for (int x = 0; x < block_width; ++x) { vector2d_t a_ofs = g_sao_edge_offsets[sao->eo_class][0]; vector2d_t b_ofs = g_sao_edge_offsets[sao->eo_class][1]; const kvz_pixel *c_data = &rec_data[y * stride + x]; @@ -144,9 +152,13 @@ } -int kvz_sao_band_ddistortion_generic(const encoder_state_t * const state, const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int block_width, int block_height, - int band_pos, int sao_bands[4]) +static int sao_band_ddistortion_generic(const encoder_state_t * const state, + const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int block_width, + int block_height, + int band_pos, + int sao_bands[4]) { int y, x; int shift = state->encoder_control->bitdepth-5; @@ -174,11 +186,11 @@ int kvz_strategy_register_sao_generic(void* opaque, uint8_t bitdepth) { bool success = true; - - success &= kvz_strategyselector_register(opaque, "sao_edge_ddistortion", "generic", 0, &kvz_sao_edge_ddistortion_generic); - success &= kvz_strategyselector_register(opaque, "calc_sao_edge_dir", "generic", 0, &kvz_calc_sao_edge_dir_generic); - success &= kvz_strategyselector_register(opaque, "sao_reconstruct_color", "generic", 0, &kvz_sao_reconstruct_color_generic); - success &= kvz_strategyselector_register(opaque, "sao_band_ddistortion", "generic", 0, &kvz_sao_band_ddistortion_generic); + + success &= kvz_strategyselector_register(opaque, "sao_edge_ddistortion", "generic", 0, &sao_edge_ddistortion_generic); + success &= kvz_strategyselector_register(opaque, "calc_sao_edge_dir", "generic", 0, &calc_sao_edge_dir_generic); + success &= kvz_strategyselector_register(opaque, "sao_reconstruct_color", "generic", 0, &sao_reconstruct_color_generic); + success &= kvz_strategyselector_register(opaque, "sao_band_ddistortion", "generic", 0, &sao_band_ddistortion_generic); return success; }
View file
kvazaar-1.1.0.tar.gz/src/strategies/strategies-quant.c -> kvazaar-1.2.0.tar.gz/src/strategies/strategies-quant.c
Changed
@@ -29,6 +29,7 @@ quant_func *kvz_quant; quant_residual_func *kvz_quantize_residual; dequant_func *kvz_dequant; +coeff_abs_sum_func *kvz_coeff_abs_sum; int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth) { @@ -40,4 +41,4 @@ success &= kvz_strategy_register_quant_avx2(opaque, bitdepth); } return success; -} \ No newline at end of file +}
View file
kvazaar-1.1.0.tar.gz/src/strategies/strategies-quant.h -> kvazaar-1.2.0.tar.gz/src/strategies/strategies-quant.h
Changed
@@ -45,10 +45,13 @@ typedef unsigned (dequant_func)(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height, int8_t type, int8_t block_type); +typedef uint32_t (coeff_abs_sum_func)(const coeff_t *coeffs, size_t length); + // Declare function pointers. extern quant_func * kvz_quant; extern quant_residual_func * kvz_quantize_residual; extern dequant_func *kvz_dequant; +extern coeff_abs_sum_func *kvz_coeff_abs_sum; int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth); @@ -57,6 +60,7 @@ {"quant", (void**) &kvz_quant}, \ {"quantize_residual", (void**) &kvz_quantize_residual}, \ {"dequant", (void**) &kvz_dequant}, \ + {"coeff_abs_sum", (void**) &kvz_coeff_abs_sum}, \
View file
kvazaar-1.1.0.tar.gz/src/threadqueue.c -> kvazaar-1.2.0.tar.gz/src/threadqueue.c
Changed
@@ -30,668 +30,601 @@ #include "threads.h" -typedef struct { - threadqueue_queue_t * threadqueue; - int worker_id; -} threadqueue_worker_spec; +/** + * \file + * + * Lock acquisition order: + * + * 1. When locking a job and its dependency, the dependecy must be locked + * first and then the job depending on it. + * + * 2. When locking a job and the thread queue, the thread queue must be + * locked first and then the job. + * + * 3. When accessing threadqueue_job_t.next, the thread queue must be + * locked. + */ #define THREADQUEUE_LIST_REALLOC_SIZE 32 -//#define PTHREAD_COND_SIGNAL(c) fprintf(stderr, "%s:%d pthread_cond_signal(%s=%p)\n", __FUNCTION__, __LINE__, #c, c); if (pthread_cond_signal((c)) != 0) { fprintf(stderr, "pthread_cond_signal(%s=%p) failed!\n", #c, c); assert(0); return 0; } -//#define PTHREAD_COND_BROADCAST(c) fprintf(stderr, "%s:%d pthread_cond_broadcast(%s=%p)\n", __FUNCTION__, __LINE__, #c, c); if (pthread_cond_broadcast((c)) != 0) { fprintf(stderr, "pthread_cond_broadcast(%s=%p) failed!\n", #c, c); assert(0); return 0; } -//#define PTHREAD_COND_WAIT(c,l) fprintf(stderr, "%s:%d pthread_cond_wait(%s=%p, %s=%p)\n", __FUNCTION__, __LINE__, #c, c, #l, l); if (pthread_cond_wait((c),(l)) != 0) { fprintf(stderr, "pthread_cond_wait(%s=%p, %s=%p) failed!\n", #c, c, #l, l); assert(0); return 0; } else {fprintf(stderr, "%s:%d pthread_cond_wait(%s=%p, %s=%p) (done)\n", __FUNCTION__, __LINE__, #c, c, #l, l);} -//#define PTHREAD_LOCK(l) fprintf(stderr, "%s:%d pthread_mutex_lock(%s=%p) (try)\n", __FUNCTION__, __LINE__, #l, l); if (pthread_mutex_lock((l)) != 0) { fprintf(stderr, "pthread_mutex_lock(%s=%p) failed!\n", #l, l); assert(0); return 0; } else {fprintf(stderr, "%s:%d pthread_mutex_lock(%s=%p)\n", __FUNCTION__, __LINE__, #l, l);} -//#define PTHREAD_UNLOCK(l) if (pthread_mutex_unlock((l)) != 0) { fprintf(stderr, "pthread_mutex_unlock(%s=%p) failed!\n", #l, l); assert(0); return 0; } else {fprintf(stderr, "%s:%d pthread_mutex_unlock(%s=%p)\n", __FUNCTION__, __LINE__, #l, l);} - - -#define PTHREAD_COND_SIGNAL(c) if (pthread_cond_signal((c)) != 0) { fprintf(stderr, "pthread_cond_signal(%s=%p) failed!\n", #c, c); assert(0); return 0; } -#define PTHREAD_COND_BROADCAST(c) if (pthread_cond_broadcast((c)) != 0) { fprintf(stderr, "pthread_cond_broadcast(%s=%p) failed!\n", #c, c); assert(0); return 0; } - -#ifndef _PTHREAD_DUMP -#define PTHREAD_COND_WAIT(c,l) if (pthread_cond_wait((c),(l)) != 0) { fprintf(stderr, "pthread_cond_wait(%s=%p, %s=%p) failed!\n", #c, c, #l, l); assert(0); return 0; } -#define PTHREAD_LOCK(l) if (pthread_mutex_lock((l)) != 0) { fprintf(stderr, "pthread_mutex_lock(%s) failed!\n", #l); assert(0); return 0; } -#define PTHREAD_UNLOCK(l) if (pthread_mutex_unlock((l)) != 0) { fprintf(stderr, "pthread_mutex_unlock(%s) failed!\n", #l); assert(0); return 0; } - -#else //PTHREAD_DUMP -#define PTHREAD_LOCK(l) do { \ - PERFORMANCE_MEASURE_START(); \ - if (pthread_mutex_lock((l)) != 0) { fprintf(stderr, "pthread_mutex_lock(%s) failed!\n", #l); assert(0); return 0; } \ - PERFORMANCE_MEASURE_END(NULL, "pthread_mutex_lock(%s=%p)@%s:%d",#l,l,__FUNCTION__, __LINE__); \ -} while (0); - -#define PTHREAD_UNLOCK(l) do { \ - PERFORMANCE_MEASURE_START(); \ - if (pthread_mutex_unlock((l)) != 0) { fprintf(stderr, "pthread_mutex_unlock(%s) failed!\n", #l); assert(0); return 0; } \ - PERFORMANCE_MEASURE_END(NULL, "pthread_mutex_unlock(%s=%p)@%s:%d",#l,l,__FUNCTION__, __LINE__); \ -} while (0); - -#define PTHREAD_COND_WAIT(c,l) do { \ - PERFORMANCE_MEASURE_START(); \ - if (pthread_cond_wait((c),(l)) != 0) { fprintf(stderr, "pthread_cond_wait(%s=%p, %s=%p) failed!\n", #c, c, #l, l); assert(0); return 0;} \ - PERFORMANCE_MEASURE_END(NULL, "pthread_cond_wait(%s=%p, %s=%p)@%s:%d",#c, c, #l, l,__FUNCTION__, __LINE__); \ -} while (0); -#endif //PTHREAD_DUMP - -static void* threadqueue_worker(void* threadqueue_worker_spec_opaque) +#define PTHREAD_COND_SIGNAL(c) \ + if (pthread_cond_signal((c)) != 0) { \ + fprintf(stderr, "pthread_cond_signal(%s=%p) failed!\n", #c, c); \ + assert(0); \ + return 0; \ + } + +#define PTHREAD_COND_BROADCAST(c) \ + if (pthread_cond_broadcast((c)) != 0) { \ + fprintf(stderr, "pthread_cond_broadcast(%s=%p) failed!\n", #c, c); \ + assert(0); \ + return 0; \ + } + +#define PTHREAD_COND_WAIT(c,l) \ + if (pthread_cond_wait((c),(l)) != 0) { \ + fprintf(stderr, "pthread_cond_wait(%s=%p, %s=%p) failed!\n", #c, c, #l, l); \ + assert(0); \ + return 0; \ + } + +#define PTHREAD_LOCK(l) \ + if (pthread_mutex_lock((l)) != 0) { \ + fprintf(stderr, "pthread_mutex_lock(%s) failed!\n", #l); \ + assert(0); \ + return 0; \ + } + +#define PTHREAD_UNLOCK(l) \ + if (pthread_mutex_unlock((l)) != 0) { \ + fprintf(stderr, "pthread_mutex_unlock(%s) failed!\n", #l); \ + assert(0); \ + return 0; \ + } + + +typedef enum { + /** + * \brief Job has been submitted, but is not allowed to run yet. + */ + THREADQUEUE_JOB_STATE_PAUSED, + + /** + * \brief Job is waiting for dependencies. + */ + THREADQUEUE_JOB_STATE_WAITING, + + /** + * \brief Job is ready to run. + */ + THREADQUEUE_JOB_STATE_READY, + + /** + * \brief Job is running. + */ + THREADQUEUE_JOB_STATE_RUNNING, + + /** + * \brief Job is completed. + */ + THREADQUEUE_JOB_STATE_DONE, + +} threadqueue_job_state; + + +struct threadqueue_job_t { + pthread_mutex_t lock; + + threadqueue_job_state state; + + /** + * \brief Number of dependencies that have not been completed yet. + */ + int ndepends; + + /** + * \brief Reverse dependencies. + * + * Array of pointers to jobs that depend on this one. They have to exist + * when the thread finishes, because they cannot be run before. + */ + struct threadqueue_job_t **rdepends; + + /** + * \brief Number of elements in rdepends. + */ + int rdepends_count; + + /** + * \brief Allocated size of rdepends. + */ + int rdepends_size; + + /** + * \brief Reference count + */ + int refcount; + + /** + * \brief Pointer to the function to execute. + */ + void (*fptr)(void *arg); + + /** + * \brief Argument for fptr. + */ + void *arg; + + /** + * \brief Pointer to the next job in the queue. + */ + struct threadqueue_job_t *next; + +}; + + +struct threadqueue_queue_t { + pthread_mutex_t lock; + + /** + * \brief Job available condition variable + * + * Signalled when there is a new job to do. + */ + pthread_cond_t job_available; + + /** + * \brief Job done condition variable + * + * Signalled when a job has been completed. + */ + pthread_cond_t job_done; + + /** + * Array containing spawned threads + */ + pthread_t *threads; + + /** + * \brief Number of threads spawned + */ + int thread_count; + + /** + * \brief Number of threads running + */ + int thread_running_count; + + /** + * \brief If true, threads should stop ASAP. + */ + bool stop; + + /** + * \brief Pointer to the first ready job + */ + threadqueue_job_t *first; + + /** + * \brief Pointer to the last ready job + */ + threadqueue_job_t *last; +}; + + +/** + * \brief Add a job to the queue of jobs ready to run. + * + * The caller must have locked the thread queue and the job. This function + * takes the ownership of the job. + */ +static void threadqueue_push_job(threadqueue_queue_t * threadqueue, + threadqueue_job_t *job) { - threadqueue_worker_spec * const threadqueue_worker_spec = threadqueue_worker_spec_opaque; - threadqueue_queue_t * const threadqueue = threadqueue_worker_spec->threadqueue; - threadqueue_job_t * next_job = NULL; + assert(job->ndepends == 0); + job->state = THREADQUEUE_JOB_STATE_READY; -#ifdef KVZ_DEBUG - KVZ_GET_TIME(&threadqueue->debug_clock_thread_start[threadqueue_worker_spec->worker_id]); -#endif //KVZ_DEBUG + if (threadqueue->first == NULL) { + threadqueue->first = job; + } else { + threadqueue->last->next = job; + } - for(;;) { - threadqueue_job_t * job = NULL; + threadqueue->last = job; + job->next = NULL; +} - PTHREAD_LOCK(&threadqueue->lock); - while(!threadqueue->stop && threadqueue->queue_waiting_execution == 0 && !next_job) { +/** + * \brief Retrieve a job from the queue of jobs ready to run. + * + * The caller must have locked the thread queue. The calling function + * receives the ownership of the job. + */ +static threadqueue_job_t * threadqueue_pop_job(threadqueue_queue_t * threadqueue) +{ + assert(threadqueue->first != NULL); + + threadqueue_job_t *job = threadqueue->first; + threadqueue->first = job->next; + job->next = NULL; + + if (threadqueue->first == NULL) { + threadqueue->last = NULL; + } + + return job; +} + + +/** + * \brief Function executed by worker threads. + */ +static void* threadqueue_worker(void* threadqueue_opaque) +{ + threadqueue_queue_t * const threadqueue = (threadqueue_queue_t *) threadqueue_opaque; + + PTHREAD_LOCK(&threadqueue->lock); + + for (;;) { + while (!threadqueue->stop && threadqueue->first == NULL) { // Wait until there is something to do in the queue. - PTHREAD_COND_WAIT(&threadqueue->cond, &threadqueue->lock); + PTHREAD_COND_WAIT(&threadqueue->job_available, &threadqueue->lock); } - if(threadqueue->stop) { - if (next_job) { - // Put a job we had already reserved back into the queue. - // FIXME: This lock should be unnecessary, as nobody else is allowed - // to touch this job when it's running. - PTHREAD_LOCK(&next_job->lock); - next_job->state = THREADQUEUE_JOB_STATE_QUEUED; - PTHREAD_UNLOCK(&next_job->lock); - } + if (threadqueue->stop) { break; } - //Find a task (should be fast enough) - job = NULL; - if (next_job) { - assert(next_job->ndepends == 0); - job = next_job; - } else { - //FIXME: if not using OWF, the first is better than the second, otherwise we should use the second order - //for (i = threadqueue->queue_count - 1; i >= threadqueue->queue_start; --i) { - //for (i = threadqueue->queue_start; i < threadqueue->queue_count; ++i) { - - for (int i = (threadqueue->fifo ? threadqueue->queue_start : threadqueue->queue_count - 1); - (threadqueue->fifo ? i < threadqueue->queue_count : i >= threadqueue->queue_start); - (threadqueue->fifo ? ++i : --i)) { - threadqueue_job_t * const i_job = threadqueue->queue[i]; - - if (i_job->state == THREADQUEUE_JOB_STATE_QUEUED && i_job->ndepends == 0) { - // Once we found the job with no dependancies, lock it and change - // its state to running, so nobody else can claim it. - PTHREAD_LOCK(&i_job->lock); - if (i_job->state == THREADQUEUE_JOB_STATE_QUEUED && i_job->ndepends == 0) { - job = i_job; - job->state = THREADQUEUE_JOB_STATE_RUNNING; - } - PTHREAD_UNLOCK(&i_job->lock); - if (job) break; - } - } - } - - if (!job) { - // We have no job. Probably because more threads were woken up than - // there were jobs to do. - PTHREAD_UNLOCK(&threadqueue->lock); - } else { - // We have a job with ndepends==0 and its state is running. - assert(job->state == THREADQUEUE_JOB_STATE_RUNNING); - - // Advance queue_start to skip all the running jobs. - while (threadqueue->queue_start < threadqueue->queue_count && - threadqueue->queue[threadqueue->queue_start]->state != THREADQUEUE_JOB_STATE_QUEUED) - { - threadqueue->queue_start++; - } - - if (!next_job) { - --threadqueue->queue_waiting_execution; - ++threadqueue->queue_running; - } - - PTHREAD_UNLOCK(&threadqueue->lock); - -#ifdef KVZ_DEBUG - job->debug_worker_id = threadqueue_worker_spec->worker_id; - KVZ_GET_TIME(&job->debug_clock_start); -#endif //KVZ_DEBUG - - job->fptr(job->arg); - -#ifdef KVZ_DEBUG - job->debug_worker_id = threadqueue_worker_spec->worker_id; - KVZ_GET_TIME(&job->debug_clock_stop); -#endif //KVZ_DEBUG - - // FIXME: This lock should be unnecessary, as nobody else is allowed - // to touch this job when it's running. - PTHREAD_LOCK(&job->lock); - assert(job->state == THREADQUEUE_JOB_STATE_RUNNING); - - job->state = THREADQUEUE_JOB_STATE_DONE; - - next_job = NULL; - - int queue_waiting_dependency_decr = 0; - int queue_waiting_execution_incr = 0; - - // Go throught all the jobs that depend on this one, decresing their ndepends. - for (int i = 0; i < job->rdepends_count; ++i) { - threadqueue_job_t * const depjob = job->rdepends[i]; - // Note that we lock the dependency AFTER locking the source. This avoids a deadlock in dep_add. - PTHREAD_LOCK(&depjob->lock); - - assert(depjob->state == THREADQUEUE_JOB_STATE_QUEUED); - assert(depjob->ndepends > 0); - --depjob->ndepends; - - // Count how many jobs can now start executing so we know how many - // threads to wake up. - if (depjob->ndepends == 0) { - if (!next_job) { - // Avoid having to find a new job for this worker through the - // queue by taking one of the jobs that depended on current job. - next_job = depjob; - depjob->state = THREADQUEUE_JOB_STATE_RUNNING; - } else { - ++queue_waiting_execution_incr; - } - ++queue_waiting_dependency_decr; - } - - PTHREAD_UNLOCK(&depjob->lock); - } - - PTHREAD_UNLOCK(&job->lock); + // Get a job and remove it from the queue. + threadqueue_job_t *job = threadqueue_pop_job(threadqueue); - PTHREAD_LOCK(&threadqueue->lock); + PTHREAD_LOCK(&job->lock); + assert(job->state == THREADQUEUE_JOB_STATE_READY); + job->state = THREADQUEUE_JOB_STATE_RUNNING; + PTHREAD_UNLOCK(&job->lock); + PTHREAD_UNLOCK(&threadqueue->lock); - assert(threadqueue->queue_waiting_dependency >= queue_waiting_dependency_decr); + job->fptr(job->arg); - // This thread will - if (!next_job) { - // We didn't find a new job, so this thread will have to go wait. - threadqueue->queue_running--; + PTHREAD_LOCK(&threadqueue->lock); + PTHREAD_LOCK(&job->lock); + assert(job->state == THREADQUEUE_JOB_STATE_RUNNING); + job->state = THREADQUEUE_JOB_STATE_DONE; + + PTHREAD_COND_SIGNAL(&threadqueue->job_done); + + // Go through all the jobs that depend on this one, decreasing their + // ndepends. Count how many jobs can now start executing so we know how + // many threads to wake up. + int num_new_jobs = 0; + for (int i = 0; i < job->rdepends_count; ++i) { + threadqueue_job_t * const depjob = job->rdepends[i]; + // The dependency (job) is locked before the job depending on it. + // This must be the same order as in kvz_threadqueue_job_dep_add. + PTHREAD_LOCK(&depjob->lock); + + assert(depjob->state == THREADQUEUE_JOB_STATE_WAITING || + depjob->state == THREADQUEUE_JOB_STATE_PAUSED); + assert(depjob->ndepends > 0); + depjob->ndepends--; + + if (depjob->ndepends == 0 && depjob->state == THREADQUEUE_JOB_STATE_WAITING) { + // Move the job to ready jobs. + threadqueue_push_job(threadqueue, kvz_threadqueue_copy_ref(depjob)); + num_new_jobs++; } - threadqueue->queue_waiting_dependency -= queue_waiting_dependency_decr; - threadqueue->queue_waiting_execution += queue_waiting_execution_incr; - // Wake up enough threads to take care of the tasks now lacking dependancies. - for (int i = 0; i < queue_waiting_execution_incr; ++i) { - PTHREAD_COND_SIGNAL(&threadqueue->cond); - } + // Clear this reference to the job. + PTHREAD_UNLOCK(&depjob->lock); + kvz_threadqueue_free_job(&job->rdepends[i]); + } + job->rdepends_count = 0; - // Signal main thread that a job has been completed. - pthread_cond_signal(&threadqueue->cb_cond); + PTHREAD_UNLOCK(&job->lock); + kvz_threadqueue_free_job(&job); - PTHREAD_UNLOCK(&threadqueue->lock); + // The current thread will process one of the new jobs so we wake up + // one threads less than the the number of new jobs. + for (int i = 0; i < num_new_jobs - 1; i++) { + pthread_cond_signal(&threadqueue->job_available); } } - // We got out of the loop because threadqueue->stop == 1. The queue is locked. - assert(threadqueue->stop); - --threadqueue->threads_running; - -#ifdef KVZ_DEBUG - KVZ_GET_TIME(&threadqueue->debug_clock_thread_end[threadqueue_worker_spec->worker_id]); - - fprintf(threadqueue->debug_log, "\t%d\t-\t%lf\t+%lf\t-\tthread\n", threadqueue_worker_spec->worker_id, KVZ_CLOCK_T_AS_DOUBLE(threadqueue->debug_clock_thread_start[threadqueue_worker_spec->worker_id]), KVZ_CLOCK_T_DIFF(threadqueue->debug_clock_thread_start[threadqueue_worker_spec->worker_id], threadqueue->debug_clock_thread_end[threadqueue_worker_spec->worker_id])); -#endif //KVZ_DEBUG - + threadqueue->thread_running_count--; PTHREAD_UNLOCK(&threadqueue->lock); - - free(threadqueue_worker_spec_opaque); - - pthread_exit(NULL); - return NULL; } -int kvz_threadqueue_init(threadqueue_queue_t * const threadqueue, int thread_count, int fifo) { - int i; +/** + * \brief Initialize the queue. + * + * \return 1 on success, 0 on failure + */ +threadqueue_queue_t * kvz_threadqueue_init(int thread_count) +{ + threadqueue_queue_t *threadqueue = MALLOC(threadqueue_queue_t, 1); + if (!threadqueue) { + goto failed; + } + if (pthread_mutex_init(&threadqueue->lock, NULL) != 0) { fprintf(stderr, "pthread_mutex_init failed!\n"); - assert(0); - return 0; + goto failed; } - if (pthread_cond_init(&threadqueue->cond, NULL) != 0) { + + if (pthread_cond_init(&threadqueue->job_available, NULL) != 0) { fprintf(stderr, "pthread_cond_init failed!\n"); - assert(0); - return 0; + goto failed; } - - if (pthread_cond_init(&threadqueue->cb_cond, NULL) != 0) { + + if (pthread_cond_init(&threadqueue->job_done, NULL) != 0) { fprintf(stderr, "pthread_cond_init failed!\n"); - assert(0); - return 0; + goto failed; } - - threadqueue->stop = 0; - threadqueue->fifo = !!fifo; - threadqueue->threads_running = 0; - threadqueue->threads_count = thread_count; - + threadqueue->threads = MALLOC(pthread_t, thread_count); if (!threadqueue->threads) { fprintf(stderr, "Could not malloc threadqueue->threads!\n"); - return 0; + goto failed; } -#ifdef KVZ_DEBUG - threadqueue->debug_clock_thread_start = MALLOC(KVZ_CLOCK_T, thread_count); - assert(threadqueue->debug_clock_thread_start); - threadqueue->debug_clock_thread_end = MALLOC(KVZ_CLOCK_T, thread_count); - assert(threadqueue->debug_clock_thread_end); - threadqueue->debug_log = fopen("threadqueue.log", "w"); -#endif //KVZ_DEBUG - - threadqueue->queue = NULL; - threadqueue->queue_size = 0; - threadqueue->queue_count = 0; - threadqueue->queue_start = 0; - threadqueue->queue_waiting_execution = 0; - threadqueue->queue_waiting_dependency = 0; - threadqueue->queue_running = 0; - - //Lock the queue before creating threads, to ensure they all have correct information + threadqueue->thread_count = 0; + threadqueue->thread_running_count = 0; + + threadqueue->stop = false; + + threadqueue->first = NULL; + threadqueue->last = NULL; + + // Lock the queue before creating threads, to ensure they all have correct information. PTHREAD_LOCK(&threadqueue->lock); - - for(i = 0; i < thread_count; i++) { - threadqueue_worker_spec *tqws = MALLOC(threadqueue_worker_spec, 1); - if (tqws) { - tqws->threadqueue = threadqueue; - tqws->worker_id = i; - if(pthread_create(&(threadqueue->threads[i]), NULL, threadqueue_worker, (void*)tqws) != 0) { - fprintf(stderr, "pthread_create failed!\n"); - assert(0); - return 0; - } - threadqueue->threads_running++; - } else { - fprintf(stderr, "Could not allocate threadqueue_worker_spec structure!\n"); - PTHREAD_UNLOCK(&threadqueue->lock); - return 0; + for (int i = 0; i < thread_count; i++) { + if (pthread_create(&threadqueue->threads[i], NULL, threadqueue_worker, threadqueue) != 0) { + fprintf(stderr, "pthread_create failed!\n"); + goto failed; } + threadqueue->thread_count++; + threadqueue->thread_running_count++; } - PTHREAD_UNLOCK(&threadqueue->lock); - return 1; + return threadqueue; + +failed: + kvz_threadqueue_free(threadqueue); + return NULL; } + /** - * \brief Free a single job from the threadqueue index i, destroying it. + * \brief Create a job and return a pointer to it. + * + * The job is created in a paused state. Function kvz_threadqueue_submit + * must be called on the job in order to have it run. + * + * \return pointer to the job, or NULL on failure */ -static void threadqueue_free_job(threadqueue_queue_t * const threadqueue, int i) +threadqueue_job_t * kvz_threadqueue_job_create(void (*fptr)(void *arg), void *arg) { -#ifdef KVZ_DEBUG -#if KVZ_DEBUG & KVZ_PERF_JOB - int j; - KVZ_GET_TIME(&threadqueue->queue[i]->debug_clock_dequeue); - fprintf(threadqueue->debug_log, "%p\t%d\t%lf\t+%lf\t+%lf\t+%lf\t%s\n", threadqueue->queue[i], threadqueue->queue[i]->debug_worker_id, KVZ_CLOCK_T_AS_DOUBLE(threadqueue->queue[i]->debug_clock_enqueue), KVZ_CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_enqueue, threadqueue->queue[i]->debug_clock_start), KVZ_CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_start, threadqueue->queue[i]->debug_clock_stop), KVZ_CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_stop, threadqueue->queue[i]->debug_clock_dequeue), threadqueue->queue[i]->debug_description); - - for (j = 0; j < threadqueue->queue[i]->rdepends_count; ++j) { - fprintf(threadqueue->debug_log, "%p->%p\n", threadqueue->queue[i], threadqueue->queue[i]->rdepends[j]); + threadqueue_job_t *job = MALLOC(threadqueue_job_t, 1); + if (!job) { + fprintf(stderr, "Could not alloc job!\n"); + return NULL; + } + + if (pthread_mutex_init(&job->lock, NULL) != 0) { + fprintf(stderr, "pthread_mutex_init(job) failed!\n"); + return NULL; } - FREE_POINTER(threadqueue->queue[i]->debug_description); -#endif -#endif - FREE_POINTER(threadqueue->queue[i]->rdepends); - - pthread_mutex_destroy(&threadqueue->queue[i]->lock); + job->state = THREADQUEUE_JOB_STATE_PAUSED; + job->ndepends = 0; + job->rdepends = NULL; + job->rdepends_count = 0; + job->rdepends_size = 0; + job->refcount = 1; + job->fptr = fptr; + job->arg = arg; - FREE_POINTER(threadqueue->queue[i]); + return job; } -static void threadqueue_free_jobs(threadqueue_queue_t * const threadqueue) { - int i; - for (i=0; i < threadqueue->queue_count; ++i) { - threadqueue_free_job(threadqueue, i); - } - threadqueue->queue_count = 0; - threadqueue->queue_start = 0; -#ifdef KVZ_DEBUG -#if KVZ_DEBUG & KVZ_PERF_JOB - { - KVZ_CLOCK_T time; - KVZ_GET_TIME(&time); - - fprintf(threadqueue->debug_log, "\t\t-\t-\t%lf\t-\tFLUSH\n", KVZ_CLOCK_T_AS_DOUBLE(time)); - } -#endif -#endif -} -int kvz_threadqueue_finalize(threadqueue_queue_t * const threadqueue) { - int i; - - //Flush the queue - if (!kvz_threadqueue_flush(threadqueue)) { - fprintf(stderr, "Unable to flush threadqueue!\n"); - return 0; - } - - //Lock threadqueue +int kvz_threadqueue_submit(threadqueue_queue_t * const threadqueue, threadqueue_job_t *job) +{ PTHREAD_LOCK(&threadqueue->lock); - - //Free job memory - threadqueue_free_jobs(threadqueue); - - if (threadqueue->stop) { - fprintf(stderr, "threadqueue already stopping\n"); - - if (pthread_mutex_unlock(&threadqueue->lock) != 0) { - fprintf(stderr, "pthread_mutex_unlock failed!\n"); - assert(0); - return 0; - } - assert(0); //We should get here... - return 0; - } - - threadqueue->stop = 1; - - if (pthread_cond_broadcast(&(threadqueue->cond)) != 0) { - fprintf(stderr, "pthread_cond_broadcast failed!\n"); - PTHREAD_UNLOCK(&threadqueue->lock); - assert(0); - return 0; + PTHREAD_LOCK(&job->lock); + assert(job->state == THREADQUEUE_JOB_STATE_PAUSED); + + if (threadqueue->thread_count == 0) { + // When not using threads, run the job immediately. + job->fptr(job->arg); + job->state = THREADQUEUE_JOB_STATE_DONE; + } else if (job->ndepends == 0) { + threadqueue_push_job(threadqueue, kvz_threadqueue_copy_ref(job)); + pthread_cond_signal(&threadqueue->job_available); + } else { + job->state = THREADQUEUE_JOB_STATE_WAITING; } - //Unlock it now, since all jobs have to stpo + PTHREAD_UNLOCK(&job->lock); PTHREAD_UNLOCK(&threadqueue->lock); - - //Join threads - for(i = 0; i < threadqueue->threads_count; i++) { - if(pthread_join(threadqueue->threads[i], NULL) != 0) { - fprintf(stderr, "pthread_join failed!\n"); - return 0; - } - } - -#ifdef KVZ_DEBUG - FREE_POINTER(threadqueue->debug_clock_thread_start); - FREE_POINTER(threadqueue->debug_clock_thread_end); - fclose(threadqueue->debug_log); -#endif - - //Free allocated stuff - FREE_POINTER(threadqueue->queue); - threadqueue->queue_count = 0; - threadqueue->queue_size = 0; - threadqueue->queue_start = 0; - - FREE_POINTER(threadqueue->threads); - threadqueue->threads_count = 0; - - if (pthread_mutex_destroy(&threadqueue->lock) != 0) { - fprintf(stderr, "pthread_mutex_destroy failed!\n"); - assert(0); - return 0; - } - if (pthread_cond_destroy(&threadqueue->cond) != 0) { - fprintf(stderr, "pthread_cond_destroy failed!\n"); - assert(0); - return 0; - } - - if (pthread_cond_destroy(&threadqueue->cb_cond) != 0) { - fprintf(stderr, "pthread_cond_destroy failed!\n"); - assert(0); - return 0; - } - + return 1; } -int kvz_threadqueue_flush(threadqueue_queue_t * const threadqueue) { - int notdone = 1; - - //Lock the queue - PTHREAD_LOCK(&threadqueue->lock); - - do { - notdone = threadqueue->queue_waiting_execution + threadqueue->queue_waiting_dependency + threadqueue->queue_running; - if (notdone > 0) { - PTHREAD_COND_BROADCAST(&(threadqueue->cond)); - PTHREAD_COND_WAIT(&threadqueue->cb_cond, &threadqueue->lock); - } - } while (notdone > 0); - - threadqueue_free_jobs(threadqueue); +/** + * \brief Add a dependency between two jobs. + * + * \param job job that should be executed after dependency + * \param dependency job that should be executed before job + * + * \return 1 on success, 0 on failure + * + */ +int kvz_threadqueue_job_dep_add(threadqueue_job_t *job, threadqueue_job_t *dependency) +{ + // Lock the dependency first and then the job depending on it. + // This must be the same order as in threadqueue_worker. + PTHREAD_LOCK(&dependency->lock); + + if (dependency->state == THREADQUEUE_JOB_STATE_DONE) { + // The dependency has been completed already so there is nothing to do. + PTHREAD_UNLOCK(&dependency->lock); + return 1; + } - assert(threadqueue->queue_waiting_dependency == 0 && threadqueue->queue_waiting_execution == 0 && threadqueue->queue_running == 0); + PTHREAD_LOCK(&job->lock); + job->ndepends++; + PTHREAD_UNLOCK(&job->lock); - PTHREAD_UNLOCK(&threadqueue->lock); + // Add the reverse dependency + if (dependency->rdepends_count >= dependency->rdepends_size) { + dependency->rdepends_size += THREADQUEUE_LIST_REALLOC_SIZE; + size_t bytes = dependency->rdepends_size * sizeof(threadqueue_job_t*); + dependency->rdepends = realloc(dependency->rdepends, bytes); + } + dependency->rdepends[dependency->rdepends_count++] = kvz_threadqueue_copy_ref(job); + + PTHREAD_UNLOCK(&dependency->lock); return 1; } -int kvz_threadqueue_waitfor(threadqueue_queue_t * const threadqueue, threadqueue_job_t * const job) { - int job_done = 0; - - //NULL job is clearly OK :-) - if (!job) return 1; - - //Lock the queue - PTHREAD_LOCK(&threadqueue->lock); - do { - - PTHREAD_LOCK(&job->lock); - job_done = (job->state == THREADQUEUE_JOB_STATE_DONE); - PTHREAD_UNLOCK(&job->lock); - - if (!job_done) { - PTHREAD_COND_BROADCAST(&(threadqueue->cond)); - PTHREAD_COND_WAIT(&threadqueue->cb_cond, &threadqueue->lock); - } - } while (!job_done); - - // Free jobs submitted before this job. - int i; - for (i = 0; i < threadqueue->queue_count; ++i) { - if (threadqueue->queue[i] == job) break; - threadqueue_free_job(threadqueue, i); - } - // Move remaining jobs to the beginning of the array. - if (i > 0) { - threadqueue->queue_count -= i; - threadqueue->queue_start = 0; - memmove(threadqueue->queue, &threadqueue->queue[i], threadqueue->queue_count * sizeof(*threadqueue->queue)); - FILL_ARRAY(&threadqueue->queue[threadqueue->queue_count], 0, i); - } - PTHREAD_UNLOCK(&threadqueue->lock); - - return 1; +/** + * \brief Get a new pointer to a job. + * + * Increment reference count and return the job. + */ +threadqueue_job_t *kvz_threadqueue_copy_ref(threadqueue_job_t *job) +{ + // The caller should have had another reference. + assert(job->refcount > 0); + KVZ_ATOMIC_INC(&job->refcount); + return job; } -threadqueue_job_t * kvz_threadqueue_submit(threadqueue_queue_t * const threadqueue, void (*fptr)(void *arg), void *arg, int wait, const char* const debug_description) { - threadqueue_job_t *job; - //No lock here... this should be constant - if (threadqueue->threads_count == 0) { - //FIXME: This should be improved in order to handle dependencies - PERFORMANCE_MEASURE_START(KVZ_PERF_JOB); - fptr(arg); - PERFORMANCE_MEASURE_END(KVZ_PERF_JOB, threadqueue, "%s", debug_description); - return NULL; - } - - assert(wait == 0 || wait == 1); - - job = MALLOC(threadqueue_job_t, 1); - -#ifdef KVZ_DEBUG - if (debug_description) { - size_t desc_len = MIN(255, strlen(debug_description)); - char* desc; - - //Copy description - desc = MALLOC(char, desc_len + 1); - assert(desc); - memcpy(desc, debug_description, desc_len); - desc[desc_len] = 0; - - job->debug_description = desc; - } else { - char* desc; - desc = MALLOC(char, 255); - sprintf(desc, "(*%p)(%p)", fptr, arg); - - job->debug_description = desc; - } - KVZ_GET_TIME(&job->debug_clock_enqueue); -#endif //KVZ_DEBUG - - if (!job) { - fprintf(stderr, "Could not alloc job!\n"); - assert(0); - return NULL; + +/** + * \brief Free a job. + * + * Decrement reference count of the job. If no references exist any more, + * deallocate associated memory and destroy mutexes. + * + * Sets the job pointer to NULL. + */ +void kvz_threadqueue_free_job(threadqueue_job_t **job_ptr) +{ + threadqueue_job_t *job = *job_ptr; + if (job == NULL) return; + *job_ptr = NULL; + + int new_refcount = KVZ_ATOMIC_DEC(&job->refcount); + if (new_refcount > 0) { + // There are still references so we don't free the data yet. + return; } - - job->fptr = fptr; - job->arg = arg; - if (pthread_mutex_init(&job->lock, NULL) != 0) { - fprintf(stderr, "pthread_mutex_init(job) failed!\n"); - assert(0); - return NULL; + + assert(new_refcount == 0); + + for (int i = 0; i < job->rdepends_count; i++) { + kvz_threadqueue_free_job(&job->rdepends[i]); } - job->ndepends = wait; - job->rdepends = NULL; job->rdepends_count = 0; - job->rdepends_size = 0; - job->state = THREADQUEUE_JOB_STATE_QUEUED; - - PTHREAD_LOCK(&threadqueue->lock); - - //Add the reverse dependency - if (threadqueue->queue_count >= threadqueue->queue_size) { - threadqueue->queue = realloc(threadqueue->queue, sizeof(threadqueue_job_t *) * (threadqueue->queue_size + THREADQUEUE_LIST_REALLOC_SIZE)); - if (!threadqueue->queue) { - fprintf(stderr, "Could not realloc queue!\n"); - assert(0); - return NULL; - } - threadqueue->queue_size += THREADQUEUE_LIST_REALLOC_SIZE; - } - threadqueue->queue[threadqueue->queue_count++] = job; - - if (job->ndepends == 0) { - ++threadqueue->queue_waiting_execution; - //Hope a thread can do it... - PTHREAD_COND_SIGNAL(&(threadqueue->cond)); - } else { - ++threadqueue->queue_waiting_dependency; - } - - PTHREAD_UNLOCK(&threadqueue->lock); - - return job; + + FREE_POINTER(job->rdepends); + pthread_mutex_destroy(&job->lock); + FREE_POINTER(job); } -int kvz_threadqueue_job_dep_add(threadqueue_job_t *job, threadqueue_job_t *depends_on) { - //If we are not using threads, job are NULL pointers, so we can skip that - if (!job && !depends_on) return 1; - - assert(job && depends_on); - - //Lock first the job, and then the dependency + +/** + * \brief Wait for a job to be completed. + * + * \return 1 on success, 0 on failure + */ +int kvz_threadqueue_waitfor(threadqueue_queue_t * threadqueue, threadqueue_job_t * job) +{ PTHREAD_LOCK(&job->lock); - PTHREAD_LOCK(&depends_on->lock); - - if (depends_on->state != THREADQUEUE_JOB_STATE_DONE) { - job->ndepends++; - } - - //Add the reverse dependency (FIXME: this may be moved in the if above... but we would lose ability to track) - if (depends_on->rdepends_count >= depends_on->rdepends_size) { - depends_on->rdepends = realloc(depends_on->rdepends, sizeof(threadqueue_job_t *) * (depends_on->rdepends_size + THREADQUEUE_LIST_REALLOC_SIZE)); - if (!depends_on->rdepends) { - fprintf(stderr, "Could not realloc rdepends!\n"); - assert(0); - return 0; - } - depends_on->rdepends_size += THREADQUEUE_LIST_REALLOC_SIZE; + while (job->state != THREADQUEUE_JOB_STATE_DONE) { + PTHREAD_COND_WAIT(&threadqueue->job_done, &job->lock); } - depends_on->rdepends[depends_on->rdepends_count++] = job; - - PTHREAD_UNLOCK(&depends_on->lock); PTHREAD_UNLOCK(&job->lock); - + return 1; } -int kvz_threadqueue_job_unwait_job(threadqueue_queue_t * const threadqueue, threadqueue_job_t *job) { - int ndepends = 0; - - //NULL job => no threads, nothing to do - if (!job) return 1; - PTHREAD_LOCK(&job->lock); - job->ndepends--; - ndepends = job->ndepends; - PTHREAD_UNLOCK(&job->lock); - - if (ndepends == 0) { - PTHREAD_LOCK(&threadqueue->lock); - assert(threadqueue->queue_waiting_dependency > 0); - --threadqueue->queue_waiting_dependency; - ++threadqueue->queue_waiting_execution; - //Hope a thread can do it... - PTHREAD_COND_SIGNAL(&(threadqueue->cond)); - + +/** + * \brief Stop all threads after they finish the current jobs. + * + * Block until all threads have stopped. + * + * \return 1 on success, 0 on failure + */ +int kvz_threadqueue_stop(threadqueue_queue_t * const threadqueue) +{ + PTHREAD_LOCK(&threadqueue->lock); + + if (threadqueue->stop) { + // The threadqueue should have stopped already. + assert(threadqueue->thread_running_count == 0); PTHREAD_UNLOCK(&threadqueue->lock); + return 1; + } + + // Tell all threads to stop. + threadqueue->stop = true; + PTHREAD_COND_BROADCAST(&threadqueue->job_available); + PTHREAD_UNLOCK(&threadqueue->lock); + + // Wait for them to stop. + for (int i = 0; i < threadqueue->thread_count; i++) { + if (pthread_join(threadqueue->threads[i], NULL) != 0) { + fprintf(stderr, "pthread_join failed!\n"); + return 0; + } } - + return 1; } -#ifdef KVZ_DEBUG -int threadqueue_log(threadqueue_queue_t * threadqueue, const KVZ_CLOCK_T *start, const KVZ_CLOCK_T *stop, const char* debug_description) { - int i, thread_id = -1; - FILE* output; - - assert(start); - - if (threadqueue) { - //We need to lock to output safely - PTHREAD_LOCK(&threadqueue->lock); - - output = threadqueue->debug_log; - - //Find the thread - for(i = 0; i < threadqueue->threads_count; i++) { - if(pthread_equal(threadqueue->threads[i], pthread_self()) != 0) { - thread_id = i; - break; - } - } - } else { - thread_id = -1; - output = stderr; + +/** + * \brief Stop all threads and free allocated resources. + * + * \return 1 on success, 0 on failure + */ +void kvz_threadqueue_free(threadqueue_queue_t *threadqueue) +{ + if (threadqueue == NULL) return; + + kvz_threadqueue_stop(threadqueue); + + // Free all jobs. + while (threadqueue->first) { + threadqueue_job_t *next = threadqueue->first->next; + kvz_threadqueue_free_job(&threadqueue->first); + threadqueue->first = next; } - - if (thread_id >= 0) { - if (stop) { - fprintf(output, "\t%d\t-\t%lf\t+%lf\t-\t%s\n", thread_id, KVZ_CLOCK_T_AS_DOUBLE(*start), KVZ_CLOCK_T_DIFF(*start, *stop), debug_description); - } else { - fprintf(output, "\t%d\t-\t%lf\t-\t-\t%s\n", thread_id, KVZ_CLOCK_T_AS_DOUBLE(*start), debug_description); - } - } else { - if (stop) { - fprintf(output, "\t\t-\t%lf\t+%lf\t-\t%s\n", KVZ_CLOCK_T_AS_DOUBLE(*start), KVZ_CLOCK_T_DIFF(*start, *stop), debug_description); - } else { - fprintf(output, "\t\t-\t%lf\t-\t-\t%s\n", KVZ_CLOCK_T_AS_DOUBLE(*start), debug_description); - } + threadqueue->last = NULL; + + FREE_POINTER(threadqueue->threads); + threadqueue->thread_count = 0; + + if (pthread_mutex_destroy(&threadqueue->lock) != 0) { + fprintf(stderr, "pthread_mutex_destroy failed!\n"); } - - if (threadqueue) { - PTHREAD_UNLOCK(&threadqueue->lock); + + if (pthread_cond_destroy(&threadqueue->job_available) != 0) { + fprintf(stderr, "pthread_cond_destroy failed!\n"); } - return 1; + + if (pthread_cond_destroy(&threadqueue->job_done) != 0) { + fprintf(stderr, "pthread_cond_destroy failed!\n"); + } + + FREE_POINTER(threadqueue); } -#endif //KVZ_DEBUG
View file
kvazaar-1.1.0.tar.gz/src/threadqueue.h -> kvazaar-1.2.0.tar.gz/src/threadqueue.h
Changed
@@ -30,140 +30,22 @@ #include "global.h" // IWYU pragma: keep -typedef enum { - THREADQUEUE_JOB_STATE_QUEUED = 0, - THREADQUEUE_JOB_STATE_RUNNING = 1, - THREADQUEUE_JOB_STATE_DONE = 2 -} threadqueue_job_state; +typedef struct threadqueue_job_t threadqueue_job_t; +typedef struct threadqueue_queue_t threadqueue_queue_t; -typedef struct threadqueue_job_t { - pthread_mutex_t lock; - - threadqueue_job_state state; - - unsigned int ndepends; //Number of active dependencies that this job wait for - - struct threadqueue_job_t **rdepends; //array of pointer to jobs that depend on this one. They have to exist when the thread finishes, because they cannot be run before. - unsigned int rdepends_count; //number of rdepends - unsigned int rdepends_size; //allocated size of rdepends - - //Job function and state to use - void (*fptr)(void *arg); - void *arg; - -#ifdef KVZ_DEBUG - const char* debug_description; - - int debug_worker_id; - - KVZ_CLOCK_T debug_clock_enqueue; - KVZ_CLOCK_T debug_clock_start; - KVZ_CLOCK_T debug_clock_stop; - KVZ_CLOCK_T debug_clock_dequeue; -#endif -} threadqueue_job_t; +threadqueue_queue_t * kvz_threadqueue_init(int thread_count); +threadqueue_job_t * kvz_threadqueue_job_create(void (*fptr)(void *arg), void *arg); +int kvz_threadqueue_submit(threadqueue_queue_t * threadqueue, threadqueue_job_t *job); - +int kvz_threadqueue_job_dep_add(threadqueue_job_t *job, threadqueue_job_t *dependency); -typedef struct { - pthread_mutex_t lock; - pthread_cond_t cond; - pthread_cond_t cb_cond; - - pthread_t *threads; - int threads_count; - int threads_running; +threadqueue_job_t *kvz_threadqueue_copy_ref(threadqueue_job_t *job); - int stop; //=>1: threads should stop asap - - int fifo; - - threadqueue_job_t **queue; - unsigned int queue_start; - unsigned int queue_count; - unsigned int queue_size; - unsigned int queue_waiting_execution; //Number of jobs without any dependency which could be run - unsigned int queue_waiting_dependency; //Number of jobs waiting for a dependency to complete - unsigned int queue_running; //Number of jobs running - -#ifdef KVZ_DEBUG - //Format: pointer <tab> worker id <tab> time enqueued <tab> time started <tab> time stopped <tab> time dequeued <tab> job description - //For threads, pointer = "" and job description == "thread", time enqueued and time dequeued are equal to "-" - //For flush, pointer = "" and job description == "FLUSH", time enqueued, time dequeued and time started are equal to "-" - //Each time field, except the first one in the line be expressed in a relative way, by prepending the number of seconds by +. - //Dependencies: pointer -> pointer +void kvz_threadqueue_free_job(threadqueue_job_t **job_ptr); - FILE *debug_log; - - KVZ_CLOCK_T *debug_clock_thread_start; - KVZ_CLOCK_T *debug_clock_thread_end; -#endif -} threadqueue_queue_t; - -//Init a threadqueue (if fifo, then behave as a FIFO with dependencies, otherwise as a LIFO with dependencies) -int kvz_threadqueue_init(threadqueue_queue_t * threadqueue, int thread_count, int fifo); - -//Add a job to the queue, and returs a threadqueue_job handle. If wait == 1, one has to run kvz_threadqueue_job_unwait_job in order to have it run -threadqueue_job_t * kvz_threadqueue_submit(threadqueue_queue_t * threadqueue, void (*fptr)(void *arg), void *arg, int wait, const char* debug_description); - -int kvz_threadqueue_job_unwait_job(threadqueue_queue_t * threadqueue, threadqueue_job_t *job); - -//Add a dependency between two jobs. -int kvz_threadqueue_job_dep_add(threadqueue_job_t *job, threadqueue_job_t *depends_on); - -//Blocking call until the queue is empty. Previously set threadqueue_job handles should not be used anymore -int kvz_threadqueue_flush(threadqueue_queue_t * threadqueue); - -//Blocking call until job is executed. Job handles submitted before job should not be used any more as they are removed from the queue. int kvz_threadqueue_waitfor(threadqueue_queue_t * threadqueue, threadqueue_job_t * job); +int kvz_threadqueue_stop(threadqueue_queue_t * threadqueue); +void kvz_threadqueue_free(threadqueue_queue_t * threadqueue); -//Free ressources in a threadqueue -int kvz_threadqueue_finalize(threadqueue_queue_t * threadqueue); - -#ifdef KVZ_DEBUG -int threadqueue_log(threadqueue_queue_t * threadqueue, const KVZ_CLOCK_T *start, const KVZ_CLOCK_T *stop, const char* debug_description); - -// Bitmasks for PERFORMANCE_MEASURE_START and PERFORMANCE_MEASURE_END. -#define KVZ_PERF_FRAME (1 << 0) -#define KVZ_PERF_JOB (1 << 1) -#define KVZ_PERF_LCU (1 << 2) -#define KVZ_PERF_SAOREC (1 << 3) -#define KVZ_PERF_BSLEAF (1 << 4) -#define KVZ_PERF_SEARCHCU (1 << 5) - -#define IMPL_PERFORMANCE_MEASURE_START(mask) KVZ_CLOCK_T start, stop; if ((KVZ_DEBUG) & mask) { KVZ_GET_TIME(&start); } -#define IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) { if ((KVZ_DEBUG) & mask) { KVZ_GET_TIME(&stop); {char job_description[256]; sprintf(job_description, (str), __VA_ARGS__); threadqueue_log((threadqueue), &start, &stop, job_description);}} } \ - -#ifdef _MSC_VER -// Disable VS conditional expression warning from debug code. -# define WITHOUT_CONSTANT_EXP_WARNING(macro) \ - __pragma(warning(push)) \ - __pragma(warning(disable:4127)) \ - macro \ - __pragma(warning(pop)) -# define PERFORMANCE_MEASURE_START(mask) \ - WITHOUT_CONSTANT_EXP_WARNING(IMPL_PERFORMANCE_MEASURE_START(mask)) -# define PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) \ - WITHOUT_CONSTANT_EXP_WARNING(IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ##__VA_ARGS__)) -#else -# define PERFORMANCE_MEASURE_START(mask) \ - IMPL_PERFORMANCE_MEASURE_START(mask) -# define PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) \ - IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ##__VA_ARGS__) -#endif - -#else -#define PERFORMANCE_MEASURE_START(mask) -#define PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) -#endif - -/* Constraints: - * - * - Always first lock threadqueue, than a job inside it - * - When job A depends on job B, always lock first job B and then job A - * - Jobs should be submitted in an order which is compatible with serial execution. - * - * */ - -#endif //THREADQUEUE_H_ +#endif // THREADQUEUE_H_
View file
kvazaar-1.1.0.tar.gz/src/threads.h -> kvazaar-1.2.0.tar.gz/src/threads.h
Changed
@@ -30,10 +30,6 @@ #include <pthread.h> -#define E3 1000 -#define E9 1000000000 -#define FILETIME_TO_EPOCH 0x19DB1DED53E8000LL - #if defined(__GNUC__) && !defined(__MINGW32__) #include <unistd.h> // IWYU pragma: export #include <time.h> // IWYU pragma: export @@ -76,7 +72,64 @@ #endif //__GNUC__ -#undef E9 -#undef E3 +#ifdef __APPLE__ +// POSIX semaphores are deprecated on Mac so we use Grand Central Dispatch +// semaphores instead. +#include <dispatch/dispatch.h> +typedef dispatch_semaphore_t kvz_sem_t; + +static INLINE void kvz_sem_init(kvz_sem_t *sem, int value) +{ + assert(value >= 0); + *sem = dispatch_semaphore_create(value); +} + +static INLINE void kvz_sem_wait(kvz_sem_t *sem) +{ + dispatch_semaphore_wait(*sem, DISPATCH_TIME_FOREVER); +} + +static INLINE void kvz_sem_post(kvz_sem_t *sem) +{ + dispatch_semaphore_signal(*sem); +} + + +static INLINE void kvz_sem_destroy(kvz_sem_t *sem) +{ + // Do nothing for GCD semaphores. +} + +#else +// Use POSIX semaphores. +#include <semaphore.h> + +typedef sem_t kvz_sem_t; + +static INLINE void kvz_sem_init(kvz_sem_t *sem, int value) +{ + assert(value >= 0); + // Pthreads-w32 does not support process-shared semaphores, so pshared + // must always be zero. + int pshared = 0; + sem_init(sem, pshared, value); +} + +static INLINE void kvz_sem_wait(kvz_sem_t *sem) +{ + sem_wait(sem); +} + +static INLINE void kvz_sem_post(kvz_sem_t *sem) +{ + sem_post(sem); +} + +static INLINE void kvz_sem_destroy(kvz_sem_t *sem) +{ + sem_destroy(sem); +} + +#endif #endif //THREADS_H_
View file
kvazaar-1.1.0.tar.gz/src/transform.c -> kvazaar-1.2.0.tar.gz/src/transform.c
Changed
@@ -62,7 +62,7 @@ * * \param width Transform width. * \param in_stride Stride for ref_in and pred_in - * \param out_stride Stride for rec_out and coeff_out. + * \param out_stride Stride for rec_out. * \param ref_in Reference pixels. * \param pred_in Predicted pixels. * \param rec_out Returns the reconstructed pixels. @@ -82,14 +82,15 @@ for (int y = 0; y < width; ++y) { for (int x = 0; x < width; ++x) { - int32_t in_idx = x + y * in_stride; - int32_t out_idx = x + y * out_stride; + int32_t in_idx = x + y * in_stride; + int32_t out_idx = x + y * out_stride; + int32_t coeff_idx = x + y * width; // The residual must be computed before writing to rec_out because // pred_in and rec_out may point to the same array. - coeff_t coeff = (coeff_t)(ref_in[in_idx] - pred_in[in_idx]); - coeff_out[out_idx] = coeff; - rec_out[out_idx] = ref_in[in_idx]; + coeff_t coeff = (coeff_t)(ref_in[in_idx] - pred_in[in_idx]); + coeff_out[coeff_idx] = coeff; + rec_out[out_idx] = ref_in[in_idx]; nonzero_coeffs |= (coeff != 0); } @@ -102,22 +103,20 @@ * Apply DPCM to residual. * * \param width width of the block - * \param stride stride of coeff array * \param dir RDPCM direction * \param coeff coefficients (residual) to filter */ static void rdpcm(const int width, - const int stride, const rdpcm_dir dir, coeff_t *coeff) { - const int offset = (dir == RDPCM_HOR) ? 1 : stride; + const int offset = (dir == RDPCM_HOR) ? 1 : width; const int min_x = (dir == RDPCM_HOR) ? 1 : 0; const int min_y = (dir == RDPCM_HOR) ? 0 : 1; for (int y = width - 1; y >= min_y; y--) { for (int x = width - 1; x >= min_x; x--) { - const int index = x + y * stride; + const int index = x + y * width; coeff[index] -= coeff[index - offset]; } } @@ -209,7 +208,7 @@ * \param color Color. * \param scan_order Coefficient scan order. * \param trskip_out Whether transform skip is used. - * \param stride Stride for ref_in, pred_in rec_out and coeff_out. + * \param stride Stride for ref_in, pred_in and rec_out. * \param ref_in Reference pixels. * \param pred_in Predicted pixels. * \param rec_out Reconstructed pixels. @@ -261,19 +260,142 @@ // we can skip this. kvz_pixels_blit(best->rec, rec_out, width, width, 4, out_stride); } - kvz_coefficients_blit(best->coeff, coeff_out, width, width, 4, out_stride); + copy_coeffs(best->coeff, coeff_out, width); return best->has_coeffs; } +/** + * Calculate the residual coefficients for a single TU. + */ +static void quantize_tr_residual(encoder_state_t * const state, + const color_t color, + const int32_t x, + const int32_t y, + const uint8_t depth, + cu_info_t *cur_pu, + lcu_t* lcu) +{ + const kvz_config *cfg = &state->encoder_control->cfg; + const int32_t shift = color == COLOR_Y ? 0 : 1; + const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift }; + + // If luma is 4x4, do chroma for the 8x8 luma area when handling the top + // left PU because the coordinates are correct. + bool handled_elsewhere = color != COLOR_Y && + depth > MAX_DEPTH && + (lcu_px.x % 4 != 0 || lcu_px.y % 4 != 0); + if (handled_elsewhere) { + return; + } + + // Clear coded block flag structures for depths lower than current depth. + // This should ensure that the CBF data doesn't get corrupted if this function + // is called more than once. + cbf_clear(&cur_pu->cbf, depth, color); + + int32_t tr_width; + if (color == COLOR_Y) { + tr_width = LCU_WIDTH >> depth; + } else { + const int chroma_depth = (depth == MAX_PU_DEPTH ? depth - 1 : depth); + tr_width = LCU_WIDTH_C >> chroma_depth; + } + const int32_t lcu_width = LCU_WIDTH >> shift; + const int8_t mode = + (color == COLOR_Y) ? cur_pu->intra.mode : cur_pu->intra.mode_chroma; + const coeff_scan_order_t scan_idx = + kvz_get_scan_order(cur_pu->type, mode, depth); + const int offset = lcu_px.x + lcu_px.y * lcu_width; + const int z_index = xy_to_zorder(lcu_width, lcu_px.x, lcu_px.y); + + // Pointers to current location in arrays with prediction. The + // reconstruction will be written to this array. + kvz_pixel *pred = NULL; + // Pointers to current location in arrays with reference. + const kvz_pixel *ref = NULL; + // Pointers to current location in arrays with quantized coefficients. + coeff_t *coeff = NULL; + + switch (color) { + case COLOR_Y: + pred = &lcu->rec.y[offset]; + ref = &lcu->ref.y[offset]; + coeff = &lcu->coeff.y[z_index]; + break; + case COLOR_U: + pred = &lcu->rec.u[offset]; + ref = &lcu->ref.u[offset]; + coeff = &lcu->coeff.u[z_index]; + break; + case COLOR_V: + pred = &lcu->rec.v[offset]; + ref = &lcu->ref.v[offset]; + coeff = &lcu->coeff.v[z_index]; + break; + } + + const bool can_use_trskip = tr_width == 4 && + color == COLOR_Y && + cfg->trskip_enable; + + bool has_coeffs; + + if (cfg->lossless) { + has_coeffs = bypass_transquant(tr_width, + lcu_width, // in stride + lcu_width, // out stride + ref, + pred, + pred, + coeff); + if (cfg->implicit_rdpcm && cur_pu->type == CU_INTRA) { + // implicit rdpcm for horizontal and vertical intra modes + if (mode == 10) { + rdpcm(tr_width, RDPCM_HOR, coeff); + } else if (mode == 26) { + rdpcm(tr_width, RDPCM_VER, coeff); + } + } + + } else if (can_use_trskip) { + // Try quantization with trskip and use it if it's better. + has_coeffs = kvz_quantize_residual_trskip(state, + cur_pu, + tr_width, + color, + scan_idx, + &cur_pu->intra.tr_skip, + lcu_width, + lcu_width, + ref, + pred, + pred, + coeff); + } else { + has_coeffs = kvz_quantize_residual(state, + cur_pu, + tr_width, + color, + scan_idx, + false, // tr skip + lcu_width, + lcu_width, + ref, + pred, + pred, + coeff); + } + + if (has_coeffs) { + cbf_set(&cur_pu->cbf, depth, color); + } +} /** * This function calculates the residual coefficients for a region of the LCU * (defined by x, y and depth) and updates the reconstruction with the - * kvantized residual. - * - * It handles recursion for transform split, but that is currently only work - * for 64x64 inter to 32x32 transform blocks. + * kvantized residual. Processes the TU tree recursively. * * Inputs are: * - lcu->rec pixels after prediction for the area @@ -281,196 +403,69 @@ * - lcu->cu for the area * * Outputs are: - * - lcu->rec reconstruction after quantized residual - * - lcu->coeff quantized coefficients for the area - * - lcu->cbf coded block flags for the area - * - lcu->cu.intra[].tr_skip for the area + * - lcu->rec reconstruction after quantized residual + * - lcu->coeff quantized coefficients for the area + * - lcu->cbf coded block flags for the area + * - lcu->cu.intra.tr_skip tr skip flags for the area (in case of luma) */ -void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_pu, lcu_t* lcu) +void kvz_quantize_lcu_residual(encoder_state_t * const state, + const bool luma, + const bool chroma, + const int32_t x, + const int32_t y, + const uint8_t depth, + cu_info_t *cur_pu, + lcu_t* lcu) { - // we have 64>>depth transform size - const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; + const int32_t width = LCU_WIDTH >> depth; + const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; + if (cur_pu == NULL) { cur_pu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } - const int8_t width = LCU_WIDTH>>depth; - + // Tell clang-analyzer what is up. For some reason it can't figure out from // asserting just depth. - assert(width == 4 || width == 8 || width == 16 || width == 32 || width == 64); + assert(width == 4 || + width == 8 || + width == 16 || + width == 32 || + width == 64); - // Split transform and increase depth if (depth == 0 || cur_pu->tr_depth > depth) { - int offset = width / 2; - kvz_quantize_lcu_luma_residual(state, x, y, depth+1, NULL, lcu); - kvz_quantize_lcu_luma_residual(state, x + offset, y, depth+1, NULL, lcu); - kvz_quantize_lcu_luma_residual(state, x, y + offset, depth+1, NULL, lcu); - kvz_quantize_lcu_luma_residual(state, x + offset, y + offset, depth+1, NULL, lcu); + // Split transform and increase depth + const int offset = width / 2; + const int32_t x2 = x + offset; + const int32_t y2 = y + offset; + + kvz_quantize_lcu_residual(state, luma, chroma, x, y, depth + 1, NULL, lcu); + kvz_quantize_lcu_residual(state, luma, chroma, x2, y, depth + 1, NULL, lcu); + kvz_quantize_lcu_residual(state, luma, chroma, x, y2, depth + 1, NULL, lcu); + kvz_quantize_lcu_residual(state, luma, chroma, x2, y2, depth + 1, NULL, lcu); // Propagate coded block flags from child CUs to parent CU. - if (depth <= MAX_DEPTH) { - uint16_t child_cbfs[3] = { - LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y )->cbf, - LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset)->cbf, - LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf, - }; + uint16_t child_cbfs[3] = { + LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y )->cbf, + LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset)->cbf, + LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf, + }; + + if (luma && depth < MAX_DEPTH) { cbf_set_conditionally(&cur_pu->cbf, child_cbfs, depth, COLOR_Y); } - - return; - } - - { - const int luma_offset = lcu_px.x + lcu_px.y * LCU_WIDTH; - - // Pointers to current location in arrays with prediction. - kvz_pixel *recbase_y = &lcu->rec.y[luma_offset]; - // Pointers to current location in arrays with reference. - const kvz_pixel *base_y = &lcu->ref.y[luma_offset]; - // Pointers to current location in arrays with kvantized coefficients. - coeff_t *orig_coeff_y = &lcu->coeff.y[luma_offset]; - - coeff_scan_order_t scan_idx_luma = kvz_get_scan_order(cur_pu->type, cur_pu->intra.mode, depth); - - #if OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD - uint32_t residual_sum = 0; - #endif - - // Clear coded block flag structures for depths lower than current depth. - // This should ensure that the CBF data doesn't get corrupted if this function - // is called more than once. - cbf_clear(&cur_pu->cbf, depth, COLOR_Y); - - - if (state->encoder_control->cfg.lossless) { - if (bypass_transquant(width, - LCU_WIDTH, LCU_WIDTH, - base_y, recbase_y, - recbase_y, orig_coeff_y)) { - cbf_set(&cur_pu->cbf, depth, COLOR_Y); - } - if (state->encoder_control->cfg.implicit_rdpcm && cur_pu->type == CU_INTRA) { - // implicit rdpcm for horizontal and vertical intra modes - if (cur_pu->intra.mode == 10) { - rdpcm(width, LCU_WIDTH, RDPCM_HOR, orig_coeff_y); - - } else if (cur_pu->intra.mode == 26) { - rdpcm(width, LCU_WIDTH, RDPCM_VER, orig_coeff_y); - } - } - } else if (width == 4 && state->encoder_control->cfg.trskip_enable) { - // Try quantization with trskip and use it if it's better. - int has_coeffs = kvz_quantize_residual_trskip( - state, cur_pu, width, COLOR_Y, scan_idx_luma, - &cur_pu->intra.tr_skip, - LCU_WIDTH, LCU_WIDTH, - base_y, recbase_y, recbase_y, orig_coeff_y - ); - if (has_coeffs) { - cbf_set(&cur_pu->cbf, depth, COLOR_Y); - } - } else { - int has_coeffs = kvz_quantize_residual( - state, cur_pu, width, COLOR_Y, scan_idx_luma, - 0, - LCU_WIDTH, LCU_WIDTH, - base_y, recbase_y, recbase_y, orig_coeff_y - ); - if (has_coeffs) { - cbf_set(&cur_pu->cbf, depth, COLOR_Y); - } + if (chroma && depth <= MAX_DEPTH) { + cbf_set_conditionally(&cur_pu->cbf, child_cbfs, depth, COLOR_U); + cbf_set_conditionally(&cur_pu->cbf, child_cbfs, depth, COLOR_V); } - } -} - - -void kvz_quantize_lcu_chroma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu) -{ - // we have 64>>depth transform size - const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; - const int8_t width = LCU_WIDTH>>depth; - if (cur_cu == NULL) { - cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); - } - - // Tell clang-analyzer what is up. For some reason it can't figure out from - // asserting just depth. - assert(width == 4 || width == 8 || width == 16 || width == 32 || width == 64); - - // Split transform and increase depth - if (depth == 0 || cur_cu->tr_depth > depth) { - int offset = width / 2; - kvz_quantize_lcu_chroma_residual(state, x, y, depth+1, NULL, lcu); - kvz_quantize_lcu_chroma_residual(state, x + offset, y, depth+1, NULL, lcu); - kvz_quantize_lcu_chroma_residual(state, x, y + offset, depth+1, NULL, lcu); - kvz_quantize_lcu_chroma_residual(state, x + offset, y + offset, depth+1, NULL, lcu); - // Propagate coded block flags from child CUs to parent CU. - if (depth < MAX_DEPTH) { - uint16_t child_cbfs[3] = { - LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y )->cbf, - LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset)->cbf, - LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf, - }; - cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_U); - cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_V); + } else { + // Process a leaf TU. + if (luma) { + quantize_tr_residual(state, COLOR_Y, x, y, depth, cur_pu, lcu); } - - return; - } - - // If luma is 4x4, do chroma for the 8x8 luma area when handling the top - // left PU because the coordinates are correct. - if (depth <= MAX_DEPTH || (lcu_px.x % 8 == 0 && lcu_px.y % 8 == 0)) { - cbf_clear(&cur_cu->cbf, depth, COLOR_U); - cbf_clear(&cur_cu->cbf, depth, COLOR_V); - - const int chroma_offset = lcu_px.x / 2 + lcu_px.y / 2 * LCU_WIDTH_C; - kvz_pixel *recbase_u = &lcu->rec.u[chroma_offset]; - kvz_pixel *recbase_v = &lcu->rec.v[chroma_offset]; - const kvz_pixel *base_u = &lcu->ref.u[chroma_offset]; - const kvz_pixel *base_v = &lcu->ref.v[chroma_offset]; - coeff_t *orig_coeff_u = &lcu->coeff.u[chroma_offset]; - coeff_t *orig_coeff_v = &lcu->coeff.v[chroma_offset]; - coeff_scan_order_t scan_idx_chroma; - int tr_skip = 0; - int chroma_depth = (depth == MAX_PU_DEPTH ? depth - 1 : depth); - int chroma_width = LCU_WIDTH_C >> chroma_depth; - - scan_idx_chroma = kvz_get_scan_order(cur_cu->type, cur_cu->intra.mode_chroma, depth); - - if (state->encoder_control->cfg.lossless) { - if (bypass_transquant(chroma_width, - LCU_WIDTH_C, LCU_WIDTH_C, - base_u, recbase_u, - recbase_u, orig_coeff_u)) { - cbf_set(&cur_cu->cbf, depth, COLOR_U); - } - if (bypass_transquant(chroma_width, - LCU_WIDTH_C, LCU_WIDTH_C, - base_v, recbase_v, - recbase_v, orig_coeff_v)) { - cbf_set(&cur_cu->cbf, depth, COLOR_V); - } - if (state->encoder_control->cfg.implicit_rdpcm && cur_cu->type == CU_INTRA) { - // implicit rdpcm for horizontal and vertical intra modes - if (cur_cu->intra.mode_chroma == 10) { - rdpcm(chroma_width, LCU_WIDTH_C, RDPCM_HOR, orig_coeff_u); - rdpcm(chroma_width, LCU_WIDTH_C, RDPCM_HOR, orig_coeff_v); - - } else if (cur_cu->intra.mode_chroma == 26) { - rdpcm(chroma_width, LCU_WIDTH_C, RDPCM_VER, orig_coeff_u); - rdpcm(chroma_width, LCU_WIDTH_C, RDPCM_VER, orig_coeff_v); - } - } - } else { - if (kvz_quantize_residual(state, cur_cu, chroma_width, COLOR_U, scan_idx_chroma, tr_skip, LCU_WIDTH_C, LCU_WIDTH_C, base_u, recbase_u, recbase_u, orig_coeff_u)) { - cbf_set(&cur_cu->cbf, depth, COLOR_U); - } - if (kvz_quantize_residual(state, cur_cu, chroma_width, COLOR_V, scan_idx_chroma, tr_skip, LCU_WIDTH_C, LCU_WIDTH_C, base_v, recbase_v, recbase_v, orig_coeff_v)) { - cbf_set(&cur_cu->cbf, depth, COLOR_V); - } + if (chroma) { + quantize_tr_residual(state, COLOR_U, x, y, depth, cur_pu, lcu); + quantize_tr_residual(state, COLOR_V, x, y, depth, cur_pu, lcu); } } } -
View file
kvazaar-1.1.0.tar.gz/src/transform.h -> kvazaar-1.2.0.tar.gz/src/transform.h
Changed
@@ -43,7 +43,13 @@ int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset); -void kvz_quantize_lcu_luma_residual(encoder_state_t *state, int32_t x, int32_t y, uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu); -void kvz_quantize_lcu_chroma_residual(encoder_state_t *state, int32_t x, int32_t y, uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu); +void kvz_quantize_lcu_residual(encoder_state_t *state, + bool luma, + bool chroma, + int32_t x, + int32_t y, + uint8_t depth, + cu_info_t *cur_cu, + lcu_t* lcu); #endif
View file
kvazaar-1.1.0.tar.gz/src/videoframe.c -> kvazaar-1.2.0.tar.gz/src/videoframe.c
Changed
@@ -35,26 +35,13 @@ int32_t height, enum kvz_chroma_format chroma_format) { - videoframe_t *frame = MALLOC(videoframe_t, 1); - + videoframe_t *frame = calloc(1, sizeof(videoframe_t)); if (!frame) return 0; - FILL(*frame, 0); - frame->width = width; frame->height = height; - frame->width_in_lcu = frame->width / LCU_WIDTH; - if (frame->width_in_lcu * LCU_WIDTH < frame->width) frame->width_in_lcu++; - frame->height_in_lcu = frame->height / LCU_WIDTH; - if (frame->height_in_lcu * LCU_WIDTH < frame->height) frame->height_in_lcu++; - - { - unsigned cu_array_width = frame->width_in_lcu * LCU_WIDTH; - unsigned cu_array_height = frame->height_in_lcu * LCU_WIDTH; - frame->cu_array = kvz_cu_array_alloc(cu_array_width, cu_array_height); - } - - frame->coeff_y = NULL; frame->coeff_u = NULL; frame->coeff_v = NULL; + frame->width_in_lcu = CEILDIV(frame->width, LCU_WIDTH); + frame->height_in_lcu = CEILDIV(frame->height, LCU_WIDTH); frame->sao_luma = MALLOC(sao_info_t, frame->width_in_lcu * frame->height_in_lcu); if (chroma_format != KVZ_CSP_400) { @@ -76,11 +63,7 @@ kvz_image_free(frame->rec); frame->rec = NULL; - kvz_cu_array_free(frame->cu_array); - - FREE_POINTER(frame->coeff_y); - FREE_POINTER(frame->coeff_u); - FREE_POINTER(frame->coeff_v); + kvz_cu_array_free(&frame->cu_array); FREE_POINTER(frame->sao_luma); FREE_POINTER(frame->sao_chroma); @@ -93,17 +76,3 @@ void kvz_videoframe_set_poc(videoframe_t * const frame, const int32_t poc) { frame->poc = poc; } - -const cu_info_t* kvz_videoframe_get_cu_const(const videoframe_t * const frame, - unsigned int x_in_scu, - unsigned int y_in_scu) -{ - return kvz_cu_array_at_const(frame->cu_array, x_in_scu << 3, y_in_scu << 3); -} - -cu_info_t* kvz_videoframe_get_cu(videoframe_t * const frame, - const unsigned int x_in_scu, - const unsigned int y_in_scu) -{ - return kvz_cu_array_at(frame->cu_array, x_in_scu << 3, y_in_scu << 3); -}
View file
kvazaar-1.1.0.tar.gz/src/videoframe.h -> kvazaar-1.2.0.tar.gz/src/videoframe.h
Changed
@@ -39,10 +39,6 @@ kvz_picture *source; //!< \brief Source image. kvz_picture *rec; //!< \brief Reconstructed image. - coeff_t* coeff_y; //!< \brief coefficient pointer Y - coeff_t* coeff_u; //!< \brief coefficient pointer U - coeff_t* coeff_v; //!< \brief coefficient pointer V - int32_t width; //!< \brief Luma pixel array width. int32_t height; //!< \brief Luma pixel array height. int32_t height_in_lcu; //!< \brief Picture width in number of LCU's. @@ -60,7 +56,4 @@ void kvz_videoframe_set_poc(videoframe_t * frame, int32_t poc); -const cu_info_t* kvz_videoframe_get_cu_const(const videoframe_t * const frame, unsigned int x_in_scu, unsigned int y_in_scu); -cu_info_t* kvz_videoframe_get_cu(videoframe_t * const frame, const unsigned int x_in_scu, const unsigned int y_in_scu); - #endif
View file
kvazaar-1.1.0.tar.gz/tests/Makefile.am -> kvazaar-1.2.0.tar.gz/tests/Makefile.am
Changed
@@ -1,9 +1,22 @@ -TESTS = $(check_PROGRAMS) +TESTS = $(check_PROGRAMS) \ + test_external_symbols.sh \ + test_gop.sh \ + test_interlace.sh \ + test_intra.sh \ + test_invalid_input.sh \ + test_mv_constraint.sh \ + test_owf_wpp_tiles.sh \ + test_rate_control.sh \ + test_slices.sh \ + test_smp.sh \ + test_tools.sh \ + test_weird_shapes.sh check_PROGRAMS = kvazaar_tests kvazaar_tests_SOURCES = \ + coeff_sum_tests.c \ dct_tests.c \ intra_sad_tests.c \ mv_cand_tests.c \ @@ -18,3 +31,15 @@ kvazaar_tests_CFLAGS = -I$(srcdir) -I$(top_srcdir) -I$(top_srcdir)/src kvazaar_tests_LDFLAGS = -static $(top_builddir)/src/libkvazaar.la $(LIBS) +# This makes sure that CXXLD gets defined. +nodist_EXTRA_kvazaar_tests_SOURCES = cpp.cpp + +if USE_CRYPTOPP +kvazaar_tests_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(kvazaar_tests_CFLAGS) $(CXXFLAGS) \ + $(kvazaar_tests_LDFLAGS) $(LDFLAGS) -o $@ +else +kvazaar_tests_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(kvazaar_tests_CFLAGS) $(CFLAGS) \ + $(kvazaar_tests_LDFLAGS) $(LDFLAGS) -o $@ +endif
View file
kvazaar-1.2.0.tar.gz/tests/coeff_sum_tests.c
Added
@@ -0,0 +1,63 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2017 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Kvazaar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +#include "greatest/greatest.h" + +#include "test_strategies.h" + +#include <string.h> + +static coeff_t coeff_test_data[64 * 64]; +static uint32_t expected_test_result; + +static void setup() +{ + // Fill test data. + coeff_t value = INT16_MIN; + for (int i = 0; i < 64 * 64; i++) { + coeff_test_data[i] = value; + value += 16; + } + + // Calculate expected result using the formula for an arithmetic sum. + expected_test_result = + 2048 * (16 - INT16_MIN) / 2 + + 2048 * 2047 * 16 / 2; +} + +TEST test_coeff_abs_sum() +{ + uint32_t sum = kvz_coeff_abs_sum(coeff_test_data, 64 * 64); + ASSERT_EQ(sum, expected_test_result); + PASS(); +} + +SUITE(coeff_sum_tests) +{ + setup(); + + for (volatile int i = 0; i < strategies.count; ++i) { + if (strcmp(strategies.strategies[i].type, "coeff_abs_sum") != 0) { + continue; + } + + kvz_coeff_abs_sum = strategies.strategies[i].fptr; + RUN_TEST(test_coeff_abs_sum); + } +}
View file
kvazaar-1.1.0.tar.gz/tests/intra_sad_tests.c -> kvazaar-1.2.0.tar.gz/tests/intra_sad_tests.c
Changed
@@ -177,7 +177,7 @@ // Loop through all strategies picking out the intra sad ones and run // selectec strategies though all tests. - for (unsigned i = 0; i < strategies.count; ++i) { + for (volatile unsigned i = 0; i < strategies.count; ++i) { const char * type = strategies.strategies[i].type; if (strcmp(type, "sad_4x4") == 0) {
View file
kvazaar-1.1.0.tar.gz/tests/mv_cand_tests.c -> kvazaar-1.2.0.tar.gz/tests/mv_cand_tests.c
Changed
@@ -31,22 +31,19 @@ lcu.cu[i].type = CU_INTER; } - cu_info_t *mv_cand[5] = { NULL }; + merge_candidates_t cand = { {0, 0}, {0, 0, 0}, 0, 0 }; + get_spatial_merge_candidates(64 + 32, 64, // x, y 32, 24, // width, height 1920, 1080, // picture size - &mv_cand[0], // b0 - &mv_cand[1], // b1 - &mv_cand[2], // b2 - &mv_cand[3], // a0 - &mv_cand[4], // a1 - &lcu); - - ASSERT_EQ(mv_cand[0], &lcu.cu[289]); // b0 - ASSERT_EQ(mv_cand[1], &lcu.cu[ 16]); // b1 - ASSERT_EQ(mv_cand[2], &lcu.cu[ 8]); // b2 - ASSERT_EQ(mv_cand[3], &lcu.cu[127]); // a0 - ASSERT_EQ(mv_cand[4], &lcu.cu[110]); // a1 + &lcu, + &cand); + + ASSERT_EQ(cand.b[0], &lcu.cu[289]); + ASSERT_EQ(cand.b[1], &lcu.cu[ 16]); + ASSERT_EQ(cand.b[2], &lcu.cu[ 8]); + ASSERT_EQ(cand.a[0], &lcu.cu[127]); + ASSERT_EQ(cand.a[1], &lcu.cu[110]); PASS(); }
View file
kvazaar-1.1.0.tar.gz/tests/sad_tests.c -> kvazaar-1.2.0.tar.gz/tests/sad_tests.c
Changed
@@ -31,7 +31,7 @@ ////////////////////////////////////////////////////////////////////////// // DEFINES -#define TEST_SAD(X, Y) kvz_image_calc_sad(g_pic, g_ref, 0, 0, (X), (Y), 8, 8, -1) +#define TEST_SAD(X, Y) kvz_image_calc_sad(g_pic, g_ref, 0, 0, (X), (Y), 8, 8) ////////////////////////////////////////////////////////////////////////// // GLOBALS @@ -378,7 +378,7 @@ sad_test_env.tested_func = strategies.strategies[i].fptr; sad_test_env.strategy = &strategies.strategies[i]; int num_dim_tests = sizeof(tested_dims) / sizeof(tested_dims[0]); - for (int dim_test = 0; dim_test < num_dim_tests; ++dim_test) { + for (volatile int dim_test = 0; dim_test < num_dim_tests; ++dim_test) { sad_test_env.width = tested_dims[dim_test].width; sad_test_env.height = tested_dims[dim_test].height; RUN_TEST(test_reg_sad);
View file
kvazaar-1.1.0.tar.gz/tests/satd_tests.c -> kvazaar-1.2.0.tar.gz/tests/satd_tests.c
Changed
@@ -167,7 +167,7 @@ // Loop through all strategies picking out the intra sad ones and run // selectec strategies though all tests. - for (unsigned i = 0; i < strategies.count; ++i) { + for (volatile unsigned i = 0; i < strategies.count; ++i) { const char * type = strategies.strategies[i].type; if (strcmp(type, "satd_4x4") == 0) {
View file
kvazaar-1.1.0.tar.gz/tests/speed_tests.c -> kvazaar-1.2.0.tar.gz/tests/speed_tests.c
Changed
@@ -405,7 +405,7 @@ int num_tested_dims = sizeof(tested_dims) / sizeof(*tested_dims); // Call reg_sad with all the sizes it is actually called with. - for (int dim_i = 0; dim_i < num_tested_dims; ++dim_i) { + for (volatile int dim_i = 0; dim_i < num_tested_dims; ++dim_i) { test_env.width = tested_dims[dim_i].x; test_env.height = tested_dims[dim_i].y; RUN_TEST(inter_sad);
View file
kvazaar-1.2.0.tar.gz/tests/test_external_symbols.sh
Added
@@ -0,0 +1,10 @@ +#!/bin/sh + +# Check for external symbols without kvz_ prefix. + +set -eu${BASH+o pipefail} + +if nm -go --defined-only ../src/.libs/libkvazaar.a | grep -v ' kvz_'; then + printf '%s\n' 'Only symbols prefixed with "kvz_" should be exported from libkvazaar.' + false +fi
View file
kvazaar-1.2.0.tar.gz/tests/test_gop.sh
Added
@@ -0,0 +1,12 @@ +#!/bin/sh + +# Test GOP, with and without OWF. + +set -eu +. "${0%/*}/util.sh" + +common_args='-p0 --threads=2 --wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3' +valgrind_test 264x130 10 $common_args --gop=8 -p0 --owf=1 +valgrind_test 264x130 10 $common_args --gop=8 -p0 --owf=4 +valgrind_test 264x130 20 $common_args --gop=8 -p16 --owf=0 +valgrind_test 264x130 10 $common_args --gop=lp-g4d3t1 -p5 --owf=4
View file
kvazaar-1.2.0.tar.gz/tests/test_interlace.sh
Added
@@ -0,0 +1,6 @@ +#!/bin/sh + +set -eu +. "${0%/*}/util.sh" + +valgrind_test 264x130 10 --source-scan-type=tff -p0 --preset=ultrafast --threads=2 --owf=1 --wpp
View file
kvazaar-1.2.0.tar.gz/tests/test_intra.sh
Added
@@ -0,0 +1,11 @@ +#!/bin/sh + +# Test all-intra coding. + +set -eu + +. "${0%/*}/util.sh" + +common_args='264x130 10 -p1 --threads=2 --owf=1 --no-rdoq --no-deblock --no-sao --no-signhide' +valgrind_test $common_args --rd=1 +valgrind_test $common_args --rd=2 --no-transform-skip
View file
kvazaar-1.2.0.tar.gz/tests/test_invalid_input.sh
Added
@@ -0,0 +1,8 @@ +#!/bin/sh + +# Test trying to use invalid input dimensions. + +set -eu +. "${0%/*}/util.sh" + +encode_test 1x65 1 1
View file
kvazaar-1.2.0.tar.gz/tests/test_mv_constraint.sh
Added
@@ -0,0 +1,7 @@ +#!/bin/sh + +set -eu +. "${0%/*}/util.sh" + +valgrind_test 264x130 10 --threads=2 --owf=1 --preset=ultrafast --pu-depth-inter=0-3 --mv-constraint=frametilemargin +valgrind_test 264x130 10 --threads=2 --owf=1 --preset=ultrafast --subme=4 --mv-constraint=frametilemargin
View file
kvazaar-1.2.0.tar.gz/tests/test_owf_wpp_tiles.sh
Added
@@ -0,0 +1,18 @@ +#!/bin/sh + +# Test OWF, WPP and tiles. There is lots of separate branches of code +# related to owf == 0 and owf != 0, which is why all permutations are +# tried. + +set -eu +. "${0%/*}/util.sh" + +common_args='-p4 --rd=0 --no-rdoq --no-signhide --subme=0 --deblock --sao --pu-depth-inter=1-3 --pu-depth-intra=2-3' +valgrind_test 264x130 10 $common_args -r1 --owf=1 --threads=0 --no-wpp +valgrind_test 264x130 10 $common_args -r1 --owf=0 --threads=0 --no-wpp +valgrind_test 264x130 10 $common_args -r2 --owf=1 --threads=2 --wpp +valgrind_test 264x130 10 $common_args -r2 --owf=0 --threads=2 --no-wpp +valgrind_test 264x130 10 $common_args -r2 --owf=1 --threads=2 --tiles-height-split=u2 --no-wpp +valgrind_test 264x130 10 $common_args -r2 --owf=0 --threads=2 --tiles-height-split=u2 --no-wpp +valgrind_test 512x512 3 $common_args -r2 --owf=1 --threads=2 --tiles=2x2 --no-wpp +valgrind_test 512x512 3 $common_args -r2 --owf=0 --threads=2 --tiles=2x2 --no-wpp
View file
kvazaar-1.2.0.tar.gz/tests/test_rate_control.sh
Added
@@ -0,0 +1,6 @@ +#!/bin/sh + +set -eu +. "${0%/*}/util.sh" + +valgrind_test 264x130 10 --bitrate=500000 -p0 -r1 --owf=1 --threads=2 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3
View file
kvazaar-1.2.0.tar.gz/tests/test_slices.sh
Added
@@ -0,0 +1,7 @@ +#!/bin/sh + +set -eu +. "${0%/*}/util.sh" + +valgrind_test 512x256 10 --threads=2 --owf=1 --preset=ultrafast --tiles=2x2 --slices=tiles +valgrind_test 264x130 10 --threads=2 --owf=1 --preset=ultrafast --slices=wpp
View file
kvazaar-1.2.0.tar.gz/tests/test_smp.sh
Added
@@ -0,0 +1,10 @@ +#!/bin/sh + +# Test SMP and AMP blocks. + +set -eu +. "${0%/*}/util.sh" + +valgrind_test 264x130 4 --threads=2 --owf=1 --wpp --smp +valgrind_test 264x130 4 --threads=2 --owf=1 --wpp --amp +valgrind_test 264x130 4 --threads=2 --owf=1 --wpp --smp --amp
View file
kvazaar-1.1.0.tar.gz/tests/test_strategies.c -> kvazaar-1.2.0.tar.gz/tests/test_strategies.c
Changed
@@ -44,4 +44,9 @@ fprintf(stderr, "strategy_register_dct failed!\n"); return; } + + if (!kvz_strategy_register_quant(&strategies, KVZ_BIT_DEPTH)) { + fprintf(stderr, "strategy_register_quant failed!\n"); + return; + } }
View file
kvazaar-1.2.0.tar.gz/tests/test_tools.sh
Added
@@ -0,0 +1,12 @@ +#!/bin/sh + +# Test RDOQ, SAO, deblock and signhide and subme. + +set -eu +. "${0%/*}/util.sh" + +common_args='264x130 10 -p0 -r1 --threads=2 --wpp --owf=1 --rd=0' + +valgrind_test $common_args --no-rdoq --no-deblock --no-sao --no-signhide --subme=1 --pu-depth-intra=2-3 +valgrind_test $common_args --no-rdoq --no-signhide --subme=0 +valgrind_test $common_args --rdoq --no-deblock --no-sao --subme=0
View file
kvazaar-1.2.0.tar.gz/tests/test_weird_shapes.sh
Added
@@ -0,0 +1,8 @@ +#!/bin/sh + +set -eu +. "${0%/*}/util.sh" + +valgrind_test 16x16 10 --threads=2 --owf=1 --preset=veryslow +valgrind_test 256x16 10 --threads=2 --owf=1 --preset=veryslow +valgrind_test 16x256 10 --threads=2 --owf=1 --preset=veryslow
View file
kvazaar-1.1.0.tar.gz/tests/tests_main.c -> kvazaar-1.2.0.tar.gz/tests/tests_main.c
Changed
@@ -30,6 +30,7 @@ extern SUITE(dct_tests); #endif //KVZ_BIT_DEPTH == 8 +extern SUITE(coeff_sum_tests); extern SUITE(mv_cand_tests); int main(int argc, char **argv) @@ -52,6 +53,8 @@ printf("10-bit tests are not yet supported\n"); #endif //KVZ_BIT_DEPTH == 8 + RUN_SUITE(coeff_sum_tests); + RUN_SUITE(mv_cand_tests); GREATEST_MAIN_END();
View file
kvazaar-1.2.0.tar.gz/tests/util.sh
Added
@@ -0,0 +1,65 @@ +#!/bin/sh + +# Helper functions for test scripts. + +set -eu${BASH+o pipefail} + +# Temporary files for encoder input and output. +yuvfile="$(mktemp)" +hevcfile="$(mktemp)" + +cleanup() { + rm -rf "${yuvfile}" "${hevcfile}" +} +trap cleanup EXIT + +print_and_run() { + printf '\n\n$ %s\n' "$*" + "$@" +} + +prepare() { + cleanup + print_and_run \ + ffmpeg -f lavfi -i "mandelbrot=size=${1}" \ + -vframes "${2}" -pix_fmt yuv420p -f yuv4mpegpipe \ + "${yuvfile}" +} + +valgrind_test() { + dimensions="$1" + shift + frames="$1" + shift + + prepare "${dimensions}" "${frames}" + + print_and_run \ + libtool execute \ + valgrind --leak-check=full --error-exitcode=1 -- \ + ../src/kvazaar -i "${yuvfile}" "--input-res=${dimensions}" -o "${hevcfile}" "$@" + + print_and_run \ + TAppDecoderStatic -b "${hevcfile}" + + cleanup +} + +encode_test() { + dimensions="$1" + shift + frames="$1" + shift + expected_status="$1" + shift + + prepare "${dimensions}" "${frames}" + + set +e + print_and_run \ + libtool execute \ + ../src/kvazaar -i "${yuvfile}" "--input-res=${dimensions}" -o "${hevcfile}" "$@" + actual_status="$?" + set -e + [ ${actual_status} -eq ${expected_status} ] +}
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.