Projects
Essentials
kvazaar
Sign Up
Log In
Username
Password
We truncated the diff of some files because they were too big. If you want to see the full diff for every file,
click here
.
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 22
View file
kvazaar.changes
Changed
@@ -1,4 +1,29 @@ ------------------------------------------------------------------- +Wed Sep 17 16:30:44 UTC 2025 - Luigi Baldoni <aloisio@gmx.com> + +- Update to version 2.3.2 + Fixes: + * Several fixes to 10 bit encoding + * Fix SATD calculation + * Ensure proper stack size + * Fix typing for threadwrapper + * Separate .pdb files for exe and library + * Fix y4m reading + * Update minimum CMake version + * Fix --no-early-skip + * Fix lossless incorrectly selecting skip + * Rate control improvements + * Fix bit cost calculation for tr splits and smp + * Update the default configuration to match the medium preset + External contributions: + * add missing files in distribution for compilation with cmake + * ax_pthread/clang: move -pthread to LIBS + * Fix emscripten build + * Avoid double free of roi.file_path + * Fix passing too many arguments to "init_test_strategies". +- Rebased kvazaar.patch + +------------------------------------------------------------------- Wed Apr 10 11:16:02 UTC 2024 - Luigi Baldoni <aloisio@gmx.com> - Update to version 2.3.1
View file
kvazaar.spec
Changed
@@ -1,7 +1,7 @@ # # spec file for package kvazaar # -# Copyright (c) 2024 Packman Team <packman@links2linux.de> +# Copyright (c) 2025 Packman Team <packman@links2linux.de> # Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties @@ -19,14 +19,14 @@ %define libname libkvazaar %define libmver 7 Name: kvazaar -Version: 2.3.1 +Version: 2.3.2 Release: 0 Summary: HEVC encoder License: BSD-3-Clause URL: http://ultravideo.cs.tut.fi/#encoder Source0: https://github.com/ultravideo/%{name}/archive/v%{version}.tar.gz#/%{name}-%{version}.tar.gz -Patch0: %name.patch -BuildRequires: cmake >= 3.12 +Patch0: %%name.patch +BuildRequires: cmake >= 3.25 BuildRequires: pkgconfig %if 0%{?suse_version} > 1500 BuildRequires: gcc-c++
View file
kvazaar.patch
Changed
@@ -1,10 +1,12 @@ ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -235,7 +235,7 @@ +Index: kvazaar-2.3.2/CMakeLists.txt +=================================================================== +--- kvazaar-2.3.2.orig/CMakeLists.txt ++++ kvazaar-2.3.2/CMakeLists.txt +@@ -258,7 +258,7 @@ else() # CYGWIN, MSYS, and MINGW seem to be needing this but in some cases # it might be that the toolset is not properly set, so also use this # in cases where we are not sure that it is not needed -- if((NOT MSVC AND NOT LINUX AND NOT APPLE) OR (CYGWIN OR MSYS OR MINGW)) +- if((NOT MSVC AND NOT LINUX AND NOT APPLE AND NOT ANDROID AND NOT EMSCRIPTEN AND NOT BSD) OR (CYGWIN OR MSYS OR MINGW)) + if(FALSE) set(CMAKE_C_FLAGS "-Wa,-muse-unaligned-vector-move ${CMAKE_C_FLAGS}") endif()
View file
kvazaar-2.3.1.tar.gz/.gitignore -> kvazaar-2.3.2.tar.gz/.gitignore
Changed
@@ -54,6 +54,7 @@ src/kvazaar src/libkvazaar.so.* src/kvazaar.pc +src/kvazaar.pc.temp src/version.h tests/kvazaar_tests tests/kvazaar_tests.trs
View file
kvazaar-2.3.1.tar.gz/CMakeLists.txt -> kvazaar-2.3.2.tar.gz/CMakeLists.txt
Changed
@@ -1,10 +1,10 @@ -cmake_minimum_required(VERSION 3.12) +cmake_minimum_required(VERSION 3.25) project(kvazaar LANGUAGES C CXX HOMEPAGE_URL https://github.com/ultravideo/kvazaar DESCRIPTION "An open-source HEVC encoder licensed under 3-clause BSD" -VERSION 2.3.1 ) +VERSION 2.3.2 ) option(BUILD_SHARED_LIBS "Build using shared kvazaar library" ON) @@ -12,6 +12,8 @@ option(USE_CRYPTO "Use crypto library" OFF) +option(BUILD_KVAZAAR_BINARY "Build kvazaar-bin" ON) # To build only the lib, useful for iOS-builds + include(GNUInstallDirs) #Helps to define correct distro specific install directories set(DEFERRED "@") @@ -213,20 +215,41 @@ list(APPEND CLI_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/extras/getopt.c ${CMAKE_CURRENT_SOURCE_DIR}/src/threadwrapper/src/pthread.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/threadwrapper/src/semaphore.cpp) endif() -add_executable(kvazaar-bin ${CLI_SOURCES}) +if(BUILD_KVAZAAR_BINARY) + add_executable(kvazaar-bin ${CLI_SOURCES}) + + if(MSVC AND BUILD_SHARED_LIBS) + set_target_properties(kvazaar PROPERTIES + OUTPUT_NAME kvazaar + PDB_NAME kvazaar-lib + ) + + set_target_properties(kvazaar-bin PROPERTIES + OUTPUT_NAME kvazaar + PDB_NAME kvazaar-bin + ) + else() + set_target_properties(kvazaar-bin PROPERTIES + OUTPUT_NAME kvazaar + ) + endif() + set_target_properties(kvazaar-bin PROPERTIES RUNTIME_OUTPUT_NAME kvazaar) -set_target_properties(kvazaar-bin PROPERTIES OUTPUT_NAME kvazaar) -set_target_properties(kvazaar-bin PROPERTIES RUNTIME_OUTPUT_NAME kvazaar) + target_link_libraries(kvazaar-bin PUBLIC kvazaar) +endif() -target_link_libraries(kvazaar-bin PUBLIC kvazaar) +list(APPEND ALLOW_AVX2 "x86_64" "AMD64") if(MSVC) target_include_directories(kvazaar PUBLIC src/threadwrapper/include) - set_property( SOURCE ${LIB_SOURCES_STRATEGIES_AVX2} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) + if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2) + set_property( SOURCE ${LIB_SOURCES_STRATEGIES_AVX2} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) + endif() else() - set_target_properties(kvazaar-bin PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src) + if(BUILD_KVAZAAR_BINARY) + set_target_properties(kvazaar-bin PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src) + endif() set_target_properties(kvazaar PROPERTIES SOVERSION "7" VERSION "7.4.0") - list(APPEND ALLOW_AVX2 "x86_64" "AMD64") if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2) set_property( SOURCE ${LIB_SOURCES_STRATEGIES_AVX2} APPEND PROPERTY COMPILE_FLAGS "-mavx2 -mbmi -mpopcnt -mlzcnt -mbmi2" ) set_property( SOURCE ${LIB_SOURCES_STRATEGIES_SSE41} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" ) @@ -235,14 +258,15 @@ # CYGWIN, MSYS, and MINGW seem to be needing this but in some cases # it might be that the toolset is not properly set, so also use this # in cases where we are not sure that it is not needed - if((NOT MSVC AND NOT LINUX AND NOT APPLE) OR (CYGWIN OR MSYS OR MINGW)) + if((NOT MSVC AND NOT LINUX AND NOT APPLE AND NOT ANDROID AND NOT EMSCRIPTEN AND NOT BSD) OR (CYGWIN OR MSYS OR MINGW)) set(CMAKE_C_FLAGS "-Wa,-muse-unaligned-vector-move ${CMAKE_C_FLAGS}") endif() - - set(THREADS_PREFER_PTHREAD_FLAG ON) - find_package(Threads REQUIRED) - target_link_libraries(kvazaar PUBLIC Threads::Threads) + if(NOT ANDROID AND NOT EMSCRIPTEN) + set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads REQUIRED) + target_link_libraries(kvazaar PUBLIC Threads::Threads) + endif() include(CheckLibraryExists) @@ -256,7 +280,9 @@ endif () target_link_libraries(kvazaar PUBLIC ${EXTRA_LIBS}) - target_link_libraries(kvazaar-bin PUBLIC ${EXTRA_LIBS} ) + if(BUILD_KVAZAAR_BINARY) + target_link_libraries(kvazaar-bin PUBLIC ${EXTRA_LIBS} ) + endif() endif() @@ -308,7 +334,9 @@ install(CODE "configure_file(\"${PROJECT_SOURCE_DIR}/src/kvazaar.pc.temp\" \"${PROJECT_SOURCE_DIR}/src/kvazaar.pc\" @ONLY)") install(FILES ${PROJECT_SOURCE_DIR}/src/kvazaar.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) -install(TARGETS kvazaar-bin DESTINATION ${CMAKE_INSTALL_BINDIR}) +if(BUILD_KVAZAAR_BINARY) + install(TARGETS kvazaar-bin DESTINATION ${CMAKE_INSTALL_BINDIR}) +endif() install(TARGETS kvazaar RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
View file
kvazaar-2.3.1.tar.gz/README.md -> kvazaar-2.3.2.tar.gz/README.md
Changed
@@ -51,7 +51,7 @@ comment: # "BEGIN KVAZAAR HELP MESSAGE" ``` -Kvazaar v2.3.1 2024-04-10 +Kvazaar v2.3.2 2025-09-16 Kvazaar license: 3-clause BSD Usage: kvazaar -i <input> --input-res <width>x<height> -o <output> @@ -126,7 +126,7 @@ - 0: Only send VPS with the first frame. - N: Send VPS with every Nth intra frame. -r, --ref <integer> : Number of reference frames, in range 1..15 4 - --gop <string> : GOP structure lp-g4d3t1 + --gop <string> : GOP structure 16 - 0: Disabled - 8: B-frame pyramid of length 8 - 16: B-frame pyramid of length 16 @@ -262,7 +262,7 @@ guaranteed to produce sensible bitstream or work at all. disabled --tr-depth-intra <int> : Transform split depth for intra blocks 0 - --(no-)bipred : Bi-prediction disabled + --(no-)bipred : Bi-prediction enabled --cu-split-termination <string> : CU split search termination zero - off: Don't terminate early. - zero: Terminate when residual is zero. @@ -383,7 +383,7 @@ | | 0-uf | 1-sf | 2-vf | 3-fr | 4-f | 5-m | 6-s | 7-sr | 8-vs | 9-p | | -------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | -| rd | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 2 | 2 | +| rd | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 3 | 3 | | pu-depth-intra | 2-3 | 2-3 | 2-3 | 2-3 | 1-3 | 1-4 | 1-4 | 1-4 | 1-4 | 1-4 | | pu-depth-inter | 1-2 | 1-2 | 1-3 | 1-3 | 1-3 | 0-3 | 0-3 | 0-3 | 0-3 | 0-3 | | me | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | tz | tz | @@ -400,7 +400,7 @@ | mv-rdo | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | full-intra-search | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | smp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | -| amp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | | cu-split-termination | zero | zero | zero | zero | zero | zero | zero | zero | zero | off | | me-early-termination | sens. | sens. | sens. | sens. | sens. | on | on | off | off | off | | intra-rdo-et | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -491,7 +491,7 @@ Please cite this paper(https://dl.acm.org/citation.cfm?doid=2964284.2973796) for Kvazaar: -```M. Viitanen, A. Koivula, A. Lemmetti, A. Ylä-Outinen, J. Vanne, and T. D. Hämäläinen, Kvazaar: open-source HEVC/H.265 encoder, in Proc. ACM Int. Conf. Multimedia, Amsterdam, The Netherlands, Oct. 2016.``` +```M. Viitanen, A. Koivula, A. Lemmetti, A. Yl�-Outinen, J. Vanne, and T. D. H�m�l�inen, �Kvazaar: open-source HEVC/H.265 encoder,� in Proc. ACM Int. Conf. Multimedia, Amsterdam, The Netherlands, Oct. 2016.``` Or in BibTex:
View file
kvazaar-2.3.1.tar.gz/configure.ac -> kvazaar-2.3.2.tar.gz/configure.ac
Changed
@@ -23,7 +23,7 @@ # # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html ver_major=7 -ver_minor=4 +ver_minor=5 ver_release=0 # Prevents configure from adding a lot of defines to the CFLAGS
View file
kvazaar-2.3.1.tar.gz/doc/kvazaar.1 -> kvazaar-2.3.2.tar.gz/doc/kvazaar.1
Changed
@@ -1,4 +1,4 @@ -.TH KVAZAAR "1" "April 2024" "kvazaar v2.3.1" "User Commands" +.TH KVAZAAR "1" "September 2025" "kvazaar v2.3.2" "User Commands" .SH NAME kvazaar \- open source HEVC encoder .SH SYNOPSIS @@ -133,7 +133,7 @@ Number of reference frames, in range 1..15 4 .TP \fB\-\-gop <string> -GOP structure lp\-g4d3t1 +GOP structure 16 \- 0: Disabled \- 8: B\-frame pyramid of length 8 \- 16: B\-frame pyramid of length 16 @@ -347,7 +347,7 @@ Transform split depth for intra blocks 0 .TP \fB\-\-(no\-)bipred -Bi\-prediction disabled +Bi\-prediction enabled .TP \fB\-\-cu\-split\-termination <string> CU split search termination zero
View file
kvazaar-2.3.1.tar.gz/m4/ax_pthread.m4 -> kvazaar-2.3.2.tar.gz/m4/ax_pthread.m4
Changed
@@ -261,8 +261,8 @@ # -pthread does define _REENTRANT, and while the Darwin headers # ignore this macro, third-party headers might not.) - PTHREAD_CFLAGS="-pthread" - PTHREAD_LIBS= + PTHREAD_CFLAGS= + PTHREAD_LIBS="-pthread" ax_pthread_ok=yes
View file
kvazaar-2.3.1.tar.gz/src/Makefile.am -> kvazaar-2.3.2.tar.gz/src/Makefile.am
Changed
@@ -4,8 +4,10 @@ EXTRA_DIST = \ extras/getopt.c \ - extras/getopt.h - + extras/getopt.h \ + version.h.in \ + kvazaarCMake.pc.in \ + threadwrapper pkgconfigdir = $(libdir)/pkgconfig nodist_pkgconfig_DATA = kvazaar.pc
View file
kvazaar-2.3.1.tar.gz/src/cfg.c -> kvazaar-2.3.2.tar.gz/src/cfg.c
Changed
@@ -63,10 +63,10 @@ cfg->sao_type = 3; cfg->rdoq_enable = 1; cfg->rdoq_skip = 1; - cfg->signhide_enable = true; + cfg->signhide_enable = false; cfg->smp_enable = false; cfg->amp_enable = false; - cfg->rdo = 1; + cfg->rdo = 0; cfg->mv_rdo = 0; cfg->full_intra_search = 0; cfg->trskip_enable = 0; @@ -86,9 +86,7 @@ cfg->aud_enable = 0; cfg->cqmfile = NULL; cfg->fast_coeff_table_fn = NULL; - cfg->ref_frames = 1; - cfg->gop_len = 4; - cfg->gop_lowdelay = true; + cfg->ref_frames = 4; cfg->bipred = 0; cfg->target_bitrate = 0; cfg->hash = KVZ_HASH_CHECKSUM; @@ -118,10 +116,10 @@ memset( cfg->pu_depth_inter.max, -1, sizeof( cfg->pu_depth_inter.max ) ); memset( cfg->pu_depth_intra.min, -1, sizeof( cfg->pu_depth_intra.min ) ); memset( cfg->pu_depth_intra.max, -1, sizeof( cfg->pu_depth_intra.max ) ); - *cfg->pu_depth_inter.min = 2; // 0-3 + *cfg->pu_depth_inter.min = 0; // 0-3 *cfg->pu_depth_inter.max = 3; // 0-3 - *cfg->pu_depth_intra.min = 2; // 0-4 - *cfg->pu_depth_intra.max = 3; // 0-4 + *cfg->pu_depth_intra.min = 1; // 0-4 + *cfg->pu_depth_intra.max = 4; // 0-4 cfg->add_encoder_info = true; cfg->calc_psnr = true; @@ -135,9 +133,11 @@ cfg->input_format = KVZ_FORMAT_P420; cfg->input_bitdepth = 8; - cfg->gop_lp_definition.d = 3; - cfg->gop_lp_definition.t = 1; + cfg->gop_lowdelay = 0; + cfg->gop_len = sizeof(kvz_gop_ra16) / sizeof(kvz_gop_ra160); + memcpy(cfg->gop, kvz_gop_ra16, sizeof(kvz_gop_ra16)); cfg->open_gop = true; + cfg->roi.file_path = NULL; cfg->roi.format = KVZ_ROI_TXT; @@ -725,7 +725,7 @@ "mv-rdo", "0", "full-intra-search", "0", "smp", "1", - "amp", "0", + "amp", "1", "cu-split-termination", "zero", "me-early-termination", "off", "intra-rdo-et", "0", @@ -793,10 +793,12 @@ if (sscanf(value, "%d/%d", &fps_num, &fps_denom) == 2) { cfg->framerate_num = fps_num; cfg->framerate_denom = fps_denom; + cfg->framerate = (double)fps_num / fps_denom; } else { // Accept decimal notation, making sure not to round 0 to 1. cfg->framerate_num = (int)(atof(value) * 1000 + 0.49); cfg->framerate_denom = 1000; + cfg->framerate = atof(value); } } else if OPT("qp")
View file
kvazaar-2.3.1.tar.gz/src/cli.c -> kvazaar-2.3.2.tar.gz/src/cli.c
Changed
@@ -485,7 +485,7 @@ " - 0: Only send VPS with the first frame.\n" " - N: Send VPS with every Nth intra frame.\n" " -r, --ref <integer> : Number of reference frames, in range 1..15 4\n" - " --gop <string> : GOP structure lp-g4d3t1\n" + " --gop <string> : GOP structure 16\n" " - 0: Disabled\n" " - 8: B-frame pyramid of length 8\n" " - 16: B-frame pyramid of length 16\n" @@ -622,7 +622,7 @@ " guaranteed to produce sensible bitstream or\n" " work at all. disabled\n" " --tr-depth-intra <int> : Transform split depth for intra blocks 0\n" - " --(no-)bipred : Bi-prediction disabled\n" + " --(no-)bipred : Bi-prediction enabled\n" " --cu-split-termination <string> : CU split search termination zero\n" " - off: Don't terminate early.\n" " - zero: Terminate when residual is zero.\n"
View file
kvazaar-2.3.1.tar.gz/src/encmain.c -> kvazaar-2.3.2.tar.gz/src/encmain.c
Changed
@@ -37,8 +37,12 @@ #ifdef _WIN32 /* The following two defines must be located before the inclusion of any system header files. */ +#ifndef WINVER #define WINVER 0x0500 +#endif +#ifndef _WIN32_WINNT #define _WIN32_WINNT 0x0500 +#endif #include "global.h" // IWYU pragma: keep @@ -341,9 +345,11 @@ bufferi = getc(input); // Start code of frame data if (bufferi == 0x0A) { - for (; i > 0; i--) { - ungetc(bufferi, input); - } + // There should not be any reason to ungetc the last parameter, but this was there for + // some reason in the original code. Leave as a comment for now, in case it is needed later. + //for (; i > 0; i--) { + // ungetc(bufferi, input); + //} end_of_header = true; break; }
View file
kvazaar-2.3.1.tar.gz/src/encoder.c -> kvazaar-2.3.2.tar.gz/src/encoder.c
Changed
@@ -41,6 +41,7 @@ #include "strategyselector.h" #include "kvz_math.h" #include "fast_coeff_cost.h" +#include "rate_control.h" static int encoder_control_init_gop_layer_weights(encoder_control_t * const); @@ -153,6 +154,12 @@ // Take a copy of the config. memcpy(&encoder->cfg, cfg, sizeof(encoder->cfg)); + + // Copy the ROI file path + if (cfg->roi.file_path) { + encoder->cfg.roi.file_path = strdup(cfg->roi.file_path); + } + // Set fields that are not copied to NULL. encoder->cfg.cqmfile = NULL; encoder->cfg.tiles_width_split = NULL; @@ -596,6 +603,15 @@ memcpy(encoder->cfg.optional_key, cfg->optional_key, 16); } + if (encoder->cfg.target_bitrate > 0 && cfg->rc_algorithm != KVZ_NO_RC) + { + encoder->rc_data = kvz_get_rc_data(encoder); + if (!encoder->rc_data) { + fprintf(stderr, "Failed to initialize rate control.\n"); + goto init_failed; + } + } + return encoder; init_failed: @@ -639,6 +655,8 @@ fclose(encoder->roi_file); } + kvz_free_rc_data(encoder->rc_data); + free(encoder); }
View file
kvazaar-2.3.1.tar.gz/src/encoder.h -> kvazaar-2.3.2.tar.gz/src/encoder.h
Changed
@@ -44,6 +44,8 @@ #include "threadqueue.h" #include "fast_coeff_cost.h" +struct rc_data; + /* Encoder control options, the main struct */ typedef struct encoder_control_t { @@ -149,6 +151,8 @@ fast_coeff_table_t fast_coeff_table; + struct kvz_rc_data *rc_data; + } encoder_control_t; encoder_control_t* kvz_encoder_control_init(const kvz_config *cfg);
View file
kvazaar-2.3.1.tar.gz/src/encoder_state-ctors_dtors.c -> kvazaar-2.3.2.tar.gz/src/encoder_state-ctors_dtors.c
Changed
@@ -94,7 +94,7 @@ pthread_mutex_init(&state->frame->rc_lock, NULL); - state->frame->new_ratecontrol = kvz_get_rc_data(NULL); + state->frame->new_ratecontrol = state->encoder_control->rc_data; return 1; }
View file
kvazaar-2.3.1.tar.gz/src/global.h -> kvazaar-2.3.2.tar.gz/src/global.h
Changed
@@ -220,7 +220,7 @@ #define QUOTE_EXPAND(x) QUOTE(x) #ifndef KVZ_VERSION -#define KVZ_VERSION 2.3.1 +#define KVZ_VERSION 2.3.2 #endif #define VERSION_STRING QUOTE_EXPAND(KVZ_VERSION) @@ -365,7 +365,7 @@ # define COMPILE_POWERPC 0 #endif -#if defined (_M_ARM) || defined(__arm__) || defined(__thumb__) +#if defined (_M_ARM) || defined(_M_ARM64) || defined(__arm__) || defined(__thumb__) # define COMPILE_ARM 1 #else # define COMPILE_ARM 0
View file
kvazaar-2.3.1.tar.gz/src/image.c -> kvazaar-2.3.2.tar.gz/src/image.c
Changed
@@ -472,13 +472,13 @@ pic_data, pic->stride, ref_data, - ref->stride) >> (KVZ_BIT_DEPTH - 8); + ref->stride); } else { // Extrapolate pixels from outside the frame. // Space for extrapolated pixels and the part from the picture // The extrapolation function will set the pointers and stride. - kvz_pixel ext_bufferLCU_LUMA_SIZE; + kvz_pixel ext_bufferLCU_LUMA_SIZE + 1; kvz_pixel *ext = NULL; kvz_pixel *ext_origin = NULL; int ext_s = 0; @@ -514,7 +514,7 @@ pic_data, pic->stride, ext_origin, - ext_s) >> (KVZ_BIT_DEPTH - 8); + ext_s); return satd; }
View file
kvazaar-2.3.1.tar.gz/src/intra.c -> kvazaar-2.3.2.tar.gz/src/intra.c
Changed
@@ -632,8 +632,9 @@ { const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; const int8_t width = LCU_WIDTH >> depth; + cu_info_t* cur_tu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); if (cur_cu == NULL) { - cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); + cur_cu = cur_tu; } // Reset CBFs because CBFs might have been set @@ -684,5 +685,14 @@ } kvz_quantize_lcu_residual(state, has_luma, has_chroma, x, y, depth, cur_cu, lcu, false); + if (cur_cu != cur_tu) + { + if (has_luma) cbf_copy(&cur_tu->cbf, cur_cu->cbf, COLOR_Y); + if (has_chroma) + { + cbf_copy(&cur_tu->cbf, cur_cu->cbf, COLOR_U); + cbf_copy(&cur_tu->cbf, cur_cu->cbf, COLOR_V); + } + } } }
View file
kvazaar-2.3.1.tar.gz/src/kvazaar.c -> kvazaar-2.3.2.tar.gz/src/kvazaar.c
Changed
@@ -78,7 +78,6 @@ } FREE_POINTER(encoder->states); - kvz_free_rc_data(); // Discard const from the pointer. kvz_encoder_control_free((void*) encoder->control); encoder->control = NULL; @@ -114,11 +113,6 @@ encoder->frames_started = 0; encoder->frames_done = 0; - // Assure that the rc data allocation was successful - if(!kvz_get_rc_data(encoder->control)) { - goto kvazaar_open_failure; - } - kvz_init_input_frame_buffer(&encoder->input_buffer); encoder->states = calloc(encoder->num_encoder_states, sizeof(encoder_state_t));
View file
kvazaar-2.3.1.tar.gz/src/rate_control.c -> kvazaar-2.3.2.tar.gz/src/rate_control.c
Changed
@@ -40,17 +40,10 @@ static const int MIN_SMOOTHING_WINDOW = 40; -static int smoothing_window = 40; static const double MIN_LAMBDA = 0.1; static const double MAX_LAMBDA = 10000; #define BETA1 1.2517 -static kvz_rc_data *data; - -static FILE *dist_file; -static FILE *bits_file; -static FILE *qp_file; -static FILE *lambda_file; /** * \brief Clip lambda value to a valid range. @@ -61,9 +54,7 @@ } kvz_rc_data * kvz_get_rc_data(const encoder_control_t * const encoder) { - if (data != NULL || encoder == NULL) return data; - - data = calloc(1, sizeof(kvz_rc_data)); + kvz_rc_data* data = calloc(1, sizeof(kvz_rc_data)); if (data == NULL) return NULL; if (pthread_mutex_init(&data->ck_frame_lock, NULL) != 0) return NULL; @@ -107,18 +98,21 @@ if(encoder->cfg.stats_file_prefix) { char buff128; sprintf(buff, "%sbits.txt", encoder->cfg.stats_file_prefix); - bits_file = fopen(buff, "w"); + data->bits_file = fopen(buff, "w"); sprintf(buff, "%sdist.txt", encoder->cfg.stats_file_prefix); - dist_file = fopen(buff, "w"); + data->dist_file = fopen(buff, "w"); sprintf(buff, "%sqp.txt", encoder->cfg.stats_file_prefix); - qp_file = fopen(buff, "w"); + data->qp_file = fopen(buff, "w"); sprintf(buff, "%slambda.txt", encoder->cfg.stats_file_prefix); - lambda_file = fopen(buff, "w"); + data->lambda_file = fopen(buff, "w"); } + + data->smoothing_window = MIN_SMOOTHING_WINDOW; + return data; } -void kvz_free_rc_data() { +void kvz_free_rc_data(kvz_rc_data *data) { if (data == NULL) return; pthread_mutex_destroy(&data->ck_frame_lock); @@ -189,16 +183,16 @@ bits_coded -= state->frame->cur_gop_bits_coded; } - smoothing_window = MAX(MIN_SMOOTHING_WINDOW, smoothing_window - MAX(encoder->cfg.gop_len / 2, 1)); + state->frame->new_ratecontrol->smoothing_window = MAX(MIN_SMOOTHING_WINDOW, state->frame->new_ratecontrol->smoothing_window - MAX(encoder->cfg.gop_len / 2, 1)); double gop_target_bits = -1; - while( gop_target_bits < 0 && smoothing_window < 150) { + while( gop_target_bits < 0 && state->frame->new_ratecontrol->smoothing_window < 150) { // Equation 12 from https://doi.org/10.1109/TIP.2014.2336550 gop_target_bits = - (encoder->target_avg_bppic * (pictures_coded + smoothing_window) - bits_coded) - * MAX(1, encoder->cfg.gop_len) / smoothing_window; + (encoder->target_avg_bppic * (pictures_coded + state->frame->new_ratecontrol->smoothing_window) - bits_coded) + * MAX(1, encoder->cfg.gop_len) / state->frame->new_ratecontrol->smoothing_window; if(gop_target_bits < 0) { - smoothing_window += 10; + state->frame->new_ratecontrol->smoothing_window += 10; } } // Allocate at least 200 bits for each GOP like HM does. @@ -375,7 +369,12 @@ else { alpha = 0.3; } - return MIN(MAX(100, alpha*pow(state->frame->icost * 4 / bits, beta)*bits), encoder->cfg.gop_len >= 2 ? 0.85 * state->frame->cur_gop_target_bits : state->frame->cur_gop_target_bits); + + double low_limit = (encoder->cfg.rc_algorithm == KVZ_LAMBDA && encoder->cfg.rdo < 2 ? 1.0 : 1.2) * pow(state->encoder_control->cfg.framerate, -0.873) * state->encoder_control->cfg.target_bitrate; + double high_limit = (encoder->cfg.rdo < 2 ? (encoder->cfg.rc_algorithm == KVZ_LAMBDA ? 1.1 : 3.5) : 2.25) * pow(state->encoder_control->cfg.framerate, -0.61) * state->encoder_control->cfg.target_bitrate; + double original_bits = alpha * pow(state->frame->icost * 4 / bits, beta) * bits; + double limited = MIN(MAX(low_limit, original_bits), high_limit); + return limited; } if (encoder->cfg.gop_len <= 0) { @@ -387,7 +386,8 @@ const double pic_target_bits = state->frame->cur_gop_target_bits * pic_weight - pic_header_bits(state); // Allocate at least 100 bits for each picture like HM does. - return MAX(100, pic_target_bits); + const double intra_bits = state->frame->is_irap ? (encoder->cfg.rdo < 2 ? 4 : 6) * state->encoder_control->target_avg_bppic : pic_target_bits; + return MAX(MAX(100, pic_target_bits), intra_bits); } static int8_t lambda_to_qp(const double lambda) @@ -716,7 +716,7 @@ alpha = -state->frame->c_paraindex * state->frame->k_paraindex; beta = state->frame->k_paraindex - 1; } - + alpha *= (double)(1 << (KVZ_BIT_DEPTH - 8)); double est_lambda; int est_qp; if (state->frame->is_irap && encoder->cfg.intra_bit_allocation) { @@ -924,10 +924,10 @@ if (encoder->cfg.stats_file_prefix) { int poc = calc_poc(state); - fprintf(dist_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); - fprintf(bits_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); - fprintf(qp_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); - fprintf(lambda_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); + fprintf(state->frame->new_ratecontrol->dist_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); + fprintf(state->frame->new_ratecontrol->bits_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); + fprintf(state->frame->new_ratecontrol->qp_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); + fprintf(state->frame->new_ratecontrol->lambda_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); } for(int y_ctu = 0; y_ctu < state->encoder_control->in.height_in_lcu; y_ctu++) { @@ -945,17 +945,17 @@ total_distortion += (double)ctu_distortion / ctu->pixels; lambda += ctu->lambda / (state->encoder_control->in.width_in_lcu * state->encoder_control->in.height_in_lcu); if(encoder->cfg.stats_file_prefix) { - fprintf(dist_file, "%f ", ctu->distortion); - fprintf(bits_file, "%d ", ctu->bits); - fprintf(qp_file, "%d ", ctu->adjust_qp ? ctu->adjust_qp : ctu->qp); - fprintf(lambda_file, "%f ", ctu->adjust_lambda ? ctu->adjust_lambda : ctu->lambda); + fprintf(state->frame->new_ratecontrol->dist_file, "%f ", ctu->distortion); + fprintf(state->frame->new_ratecontrol->bits_file, "%d ", ctu->bits); + fprintf(state->frame->new_ratecontrol->qp_file, "%d ", ctu->adjust_qp ? ctu->adjust_qp : ctu->qp); + fprintf(state->frame->new_ratecontrol->lambda_file, "%f ", ctu->adjust_lambda ? ctu->adjust_lambda : ctu->lambda); } } if (encoder->cfg.stats_file_prefix) { - fprintf(dist_file, "\n"); - fprintf(bits_file, "\n"); - fprintf(qp_file, "\n"); - fprintf(lambda_file, "\n"); + fprintf(state->frame->new_ratecontrol->dist_file, "\n"); + fprintf(state->frame->new_ratecontrol->bits_file, "\n"); + fprintf(state->frame->new_ratecontrol->qp_file, "\n"); + fprintf(state->frame->new_ratecontrol->lambda_file, "\n"); } }
View file
kvazaar-2.3.1.tar.gz/src/rate_control.h -> kvazaar-2.3.2.tar.gz/src/rate_control.h
Changed
@@ -62,10 +62,17 @@ pthread_mutex_t ck_frame_lock; pthread_mutex_t lambda_lock; pthread_mutex_t intra_lock; + + int smoothing_window; + + FILE* dist_file; + FILE* bits_file; + FILE* qp_file; + FILE* lambda_file; } kvz_rc_data; kvz_rc_data * kvz_get_rc_data(const encoder_control_t * const encoder); -void kvz_free_rc_data(); +void kvz_free_rc_data(kvz_rc_data* data); void kvz_set_picture_lambda_and_qp(encoder_state_t * const state);
View file
kvazaar-2.3.1.tar.gz/src/sao.c -> kvazaar-2.3.2.tar.gz/src/sao.c
Changed
@@ -385,7 +385,7 @@ // Call calc_sao_edge_dir once for luma and twice for chroma. for (i = 0; i < buf_cnt; ++i) { FILL(cat_sum_cnt, 0); - kvz_calc_sao_edge_dir(datai, recdatai, edge_class, + kvz_calc_sao_edge_dir(state->encoder_control, datai, recdatai, edge_class, block_width, block_height, cat_sum_cnt); @@ -514,7 +514,7 @@ unsigned buf_i; for (buf_i = 0; buf_i < buf_cnt; ++buf_i) { - ddistortion += kvz_sao_edge_ddistortion(databuf_i, recdatabuf_i, + ddistortion += kvz_sao_edge_ddistortion(state->encoder_control, databuf_i, recdatabuf_i, block_width, block_height, edge_sao.eo_class, &edge_sao.offsets5 * buf_i); } @@ -577,7 +577,7 @@ switch (merge_cand->type) { case SAO_TYPE_EDGE: for (buf_i = 0; buf_i < buf_cnt; ++buf_i) { - ddistortion += kvz_sao_edge_ddistortion(databuf_i, recdatabuf_i, + ddistortion += kvz_sao_edge_ddistortion(state->encoder_control, databuf_i, recdatabuf_i, block_width, block_height, merge_cand->eo_class, &merge_cand->offsets5 * buf_i); }
View file
kvazaar-2.3.1.tar.gz/src/search.c -> kvazaar-2.3.2.tar.gz/src/search.c
Changed
@@ -233,6 +233,7 @@ * Calculate RD cost for a Coding Unit. * \return Cost of block * \param ref_cu CU used for prediction parameters. +* \param parent_tu * * Calculates the RDO cost of a single CU that will not be split further. * Takes into account SSD of reconstruction and the cost of encoding whatever @@ -241,10 +242,10 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, const cu_info_t *const pred_cu, - lcu_t *const lcu) + const cu_info_t* const parent_tu, lcu_t *const lcu) { const int width = LCU_WIDTH >> depth; - const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0); + const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && parent_tu->cbf == 0); // cur_cu is used for TU parameters. cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); @@ -283,10 +284,10 @@ int offset = width / 2; double sum = 0; - sum += kvz_cu_rd_cost_luma(state, x_px, y_px, depth + 1, pred_cu, lcu); - sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu); - sum += kvz_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu); - sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu); + sum += kvz_cu_rd_cost_luma(state, x_px, y_px, depth + 1, pred_cu, tr_cu, lcu); + sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px, depth + 1, pred_cu, tr_cu, lcu); + sum += kvz_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, tr_cu, lcu); + sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, tr_cu, lcu); return sum + tr_tree_bits * state->lambda; } @@ -302,8 +303,8 @@ if (state->encoder_control->chroma_format != KVZ_CSP_400) { cabac_ctx_t* cr_ctx = &(cabac->ctx.qt_cbf_model_chromadepth - tr_cu->depth); cabac->cur_ctx = cr_ctx; - int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U); - int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V); + int u_is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_U); + int v_is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_V); CABAC_FBITS_UPDATE(cabac, cr_ctx, u_is_set, tr_tree_bits, "cbf_cb_search"); CABAC_FBITS_UPDATE(cabac, cr_ctx, v_is_set, tr_tree_bits, "cbf_cb_search"); } @@ -311,7 +312,7 @@ // Add transform_tree cbf_luma bit cost. const int is_tr_split = tr_cu->tr_depth - tr_cu->depth; - int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y); + int is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_Y); if (pred_cu->type == CU_INTRA || is_tr_split || cbf_is_set(tr_cu->cbf, depth, COLOR_U) || @@ -348,12 +349,12 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, const cu_info_t *const pred_cu, - lcu_t *const lcu) + const cu_info_t* const parent_tu, lcu_t *const lcu) { const vector2d_t lcu_px = { x_px / 2, y_px / 2 }; const int width = (depth <= MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth; cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); - const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0); + const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && parent_tu->cbf == 0); double tr_tree_bits = 0; double coeff_bits = 0; @@ -367,18 +368,18 @@ return 0; } - int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U); - int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V); + int u_is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_U); + int v_is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_V); // See luma for why the second condition if (depth < MAX_PU_DEPTH && (!state->search_cabac.update || tr_cu->tr_depth != tr_cu->depth) && !skip_residual_coding) { const int tr_depth = depth - pred_cu->depth; cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac; cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_chromatr_depth); cabac->cur_ctx = ctx; - if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) { + if (tr_depth == 0 || cbf_is_set(tr_cu->cbf, depth - 1, COLOR_U)) { CABAC_FBITS_UPDATE(cabac, ctx, u_is_set, tr_tree_bits, "cbf_cb_search"); } - if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) { + if (tr_depth == 0 || cbf_is_set(tr_cu->cbf, depth - 1, COLOR_V)) { CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cb_search"); } } @@ -387,10 +388,10 @@ int offset = LCU_WIDTH >> (depth + 1); double sum = 0; - sum += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth + 1, pred_cu, lcu); - sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu); - sum += kvz_cu_rd_cost_chroma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu); - sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu); + sum += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth + 1, pred_cu, tr_cu, lcu); + sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px, depth + 1, pred_cu, tr_cu, lcu); + sum += kvz_cu_rd_cost_chroma(state, x_px, y_px + offset, depth + 1, pred_cu, tr_cu, lcu); + sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, tr_cu, lcu); return sum + tr_tree_bits * state->lambda; } @@ -424,10 +425,10 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state, const int x_px, const int y_px, const int depth, const cu_info_t* const pred_cu, - lcu_t* const lcu) { + const cu_info_t* const parent_tu, lcu_t* const lcu) { const int width = LCU_WIDTH >> depth; - const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0); + const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && parent_tu->cbf == 0); // cur_cu is used for TU parameters. cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); @@ -445,7 +446,7 @@ cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac; - { + if (depth == pred_cu->depth){ int cbf = cbf_is_set_any(pred_cu->cbf, depth); // Only need to signal coded block flag if not skipped or merged // skip = no coded residual, merge = coded residual @@ -474,10 +475,10 @@ } if(state->encoder_control->chroma_format != KVZ_CSP_400 && !skip_residual_coding) { - if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) { + if(tr_cu->depth == depth || cbf_is_set(tr_cu->cbf, depth - 1, COLOR_U)) { CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_chromadepth - tr_cu->depth), cb_flag_u, tr_tree_bits, "cbf_cb"); } - if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) { + if(tr_cu->depth == depth || cbf_is_set(tr_cu->cbf, depth - 1, COLOR_V)) { CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_chromadepth - tr_cu->depth), cb_flag_v, tr_tree_bits, "cbf_cr"); } } @@ -486,10 +487,10 @@ int offset = LCU_WIDTH >> (depth + 1); double sum = 0; - sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px, depth + 1, pred_cu, lcu); - sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px, depth + 1, pred_cu, lcu); - sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px + offset, depth + 1, pred_cu, lcu); - sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu); + sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px, depth + 1, pred_cu, tr_cu, lcu); + sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px, depth + 1, pred_cu, tr_cu, lcu); + sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px + offset, depth + 1, pred_cu, tr_cu, lcu); + sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px + offset, depth + 1, pred_cu, tr_cu, lcu); return sum + tr_tree_bits * state->lambda; } const int cb_flag_y = cbf_is_set(tr_cu->cbf, depth, COLOR_Y) ; @@ -515,7 +516,7 @@ width); } - { + if(!skip_residual_coding) { int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); const coeff_t* coeffs = &lcu->coeff.yxy_to_zorder(LCU_WIDTH, x_px, y_px); @@ -538,7 +539,7 @@ chroma_ssd = ssd_u + ssd_v; } - { + if (!skip_residual_coding) { int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth); const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); @@ -864,6 +865,26 @@ inter_bitcost += cur_cu->merge_idx; } } + else if (!state->encoder_control->cfg.early_skip) + { + int tr_depth = depth; + kvz_lcu_fill_trdepth(lcu, x, y, depth, tr_depth); + const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400; + kvz_inter_recon_cu(state, lcu, x, y, cu_width, true, has_chroma); + for (int i = 0; i < cu_width; i += (cu_width >> (depth == 0))) + { + for (int j = 0; j < cu_width; j += (cu_width >> (depth == 0))) + { + cu_info_t* tmp_cu = LCU_GET_CU_AT_PX(lcu, x_local + i, y_local + j); + cbf_clear(&tmp_cu->cbf, depth, COLOR_Y); + if (has_chroma) + { + cbf_clear(&tmp_cu->cbf, depth, COLOR_U); + cbf_clear(&tmp_cu->cbf, depth, COLOR_V); + } + } + }
View file
kvazaar-2.3.1.tar.gz/src/search.h -> kvazaar-2.3.2.tar.gz/src/search.h
Changed
@@ -81,11 +81,11 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, const cu_info_t *const pred_cu, - lcu_t *const lcu); + const cu_info_t* parent_tu, lcu_t *const lcu); double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, const cu_info_t *const pred_cu, - lcu_t *const lcu); + const cu_info_t* parent_tu, lcu_t *const lcu); void kvz_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth); void kvz_intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
View file
kvazaar-2.3.1.tar.gz/src/search_inter.c -> kvazaar-2.3.2.tar.gz/src/search_inter.c
Changed
@@ -220,8 +220,9 @@ info->height, info->optimized_sad ); - - if (cost >= *best_cost) return false; + // On some platforms comparing two doubles give weird results, so add an offset +#define KVZ_TEMP_DOUBLE_PRECISION 0.001 + if (cost + KVZ_TEMP_DOUBLE_PRECISION >= *best_cost) return false; cost += info->mvd_cost_func( info->state, @@ -233,7 +234,8 @@ &bitcost ); - if (cost >= *best_cost) return false; + if (cost + KVZ_TEMP_DOUBLE_PRECISION >= *best_cost) return false; +#undef KVZ_TEMP_DOUBLE_PRECISION // Set to motion vector in quarter pixel precision. best_mv->x = x * 4; @@ -552,7 +554,7 @@ // 6 3 { { iDist / 2, iDist }, { iDist, 0 }, { iDist / 2, -iDist }, { -iDist, 0 }, - { iDist / 2, iDist }, { -iDist / 2, -iDist }, { 0, 0 }, { 0, 0 } + { -iDist / 2, iDist }, { -iDist / 2, -iDist }, { 0, 0 }, { 0, 0 } } }; @@ -1045,6 +1047,11 @@ epol_args.ext_origin = &ext_origin; epol_args.ext_s = &ext_s; + // In case the half of the fractional positions are legal + // and the other half illegal, the extension will be performed + // for pixels that might be uninitialized. + // Nevertheless, in that case the search will not use those pixels, + // but the thread/address sanitizer will not know that. kvz_get_extended_block(&epol_args); kvz_pixel *tmp_pic = pic->y + orig.y * pic->stride + orig.x; @@ -1879,6 +1886,12 @@ double frac_bits = MAX_INT; vector2d_t frac_mv = { unipred_pu->inter.mvlist0, unipred_pu->inter.mvlist1 }; + // Check that at least one quarter-pel step is possible + if (!fracmv_within_tile(info, frac_mv.x + 3, frac_mv.y + 3) && + !fracmv_within_tile(info, frac_mv.x - 3, frac_mv.y - 3)) { + continue; + } + search_frac(info, &frac_cost, &frac_bits, &frac_mv); uint8_t mv_ref_coded = LX_idx; @@ -2083,7 +2096,7 @@ const int width = LCU_WIDTH >> depth; cabac_data_t cabac_copy; memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy)); - cabac_copy.update = 1; + state->search_cabac.update = 1; cu_info_t* cur_pu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); *cur_pu = *cur_cu; @@ -2109,12 +2122,16 @@ double bits = 0; const int skip_context = kvz_get_skip_context(x, y, lcu, NULL); if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) { - no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_modelskip_context, 1) + *inter_bitcost; - bits += kvz_mock_encode_coding_unit(state, &cabac_copy, x, y, depth, lcu, cur_cu); + no_cbf_bits = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_modelskip_context, 1) + *inter_bitcost; + bits += kvz_mock_encode_coding_unit(state, &state->search_cabac, x, y, depth, lcu, cur_cu); } else { - no_cbf_bits = kvz_mock_encode_coding_unit(state, &cabac_copy, x, y, depth, lcu, cur_cu); - bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac_copy.ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac_copy.ctx.cu_qt_root_cbf_model, 1); + no_cbf_bits = kvz_mock_encode_coding_unit(state, &state->search_cabac, x, y, depth, lcu, cur_cu); + bits += no_cbf_bits; + if (!cur_cu->merged || cur_cu->part_size != SIZE_2Nx2N) { + bits += CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_qt_root_cbf_model, 1); + no_cbf_bits += CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_qt_root_cbf_model, 0); + } } double no_cbf_cost = ssd + no_cbf_bits * state->lambda; @@ -2124,12 +2141,23 @@ lcu, false); + + if (tr_depth == depth) + { + cbf_copy(&cur_pu->cbf, cur_cu->cbf, COLOR_Y); + if (reconstruct_chroma) + { + cbf_copy(&cur_pu->cbf, cur_cu->cbf, COLOR_U); + cbf_copy(&cur_pu->cbf, cur_cu->cbf, COLOR_V); + } + } + int cbf = cbf_is_set_any(cur_cu->cbf, depth); if(cbf) { - *inter_cost = kvz_cu_rd_cost_luma(state, x_px, y_px, depth, cur_cu, lcu); + *inter_cost = kvz_cu_rd_cost_luma(state, x_px, y_px, depth, cur_cu, cur_cu, lcu); if (reconstruct_chroma) { - *inter_cost += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, lcu); + *inter_cost += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, cur_cu, lcu); } } else { @@ -2137,13 +2165,14 @@ *inter_cost = no_cbf_cost; cur_cu->cbf = 0; *inter_bitcost = no_cbf_bits; + memcpy(&state->search_cabac, &cabac_copy, sizeof(cabac_copy)); return; } *inter_cost += (bits)* state->lambda; *inter_bitcost = bits; - if(no_cbf_cost < *inter_cost) { + if(no_cbf_cost < *inter_cost && !state->encoder_control->cfg.lossless) { cur_cu->cbf = 0; if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) { cur_cu->skipped = 1; @@ -2152,6 +2181,7 @@ *inter_bitcost = no_cbf_bits; } + memcpy(&state->search_cabac, &cabac_copy, sizeof(cabac_copy)); }
View file
kvazaar-2.3.1.tar.gz/src/search_intra.c -> kvazaar-2.3.2.tar.gz/src/search_intra.c
Changed
@@ -221,9 +221,9 @@ intra_mode, chroma_mode, pred_cu, lcu); - nosplit_cost += kvz_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); + nosplit_cost += kvz_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, pred_cu, lcu); if (reconstruct_chroma) { - nosplit_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); + nosplit_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, pred_cu, lcu); } // Early stop codition for the recursive search. @@ -334,11 +334,9 @@ cost_pixel_nxn_func *const satd_func = kvz_pixels_get_satd_func(width); //cost_pixel_nxn_func *const sad_func = kvz_pixels_get_sad_func(width); - kvz_pixel _pred32 * 32 + SIMD_ALIGNMENT; - kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); + ALIGNED(SIMD_ALIGNMENT) kvz_pixel pred32 * 32; - kvz_pixel _orig_block32 * 32 + SIMD_ALIGNMENT; - kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); + ALIGNED(SIMD_ALIGNMENT) kvz_pixel orig_block32 * 32; kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width); for (int i = 0; i < 5; ++i) { @@ -408,11 +406,9 @@ const bool filter_boundary = !(cfg->lossless && cfg->implicit_rdpcm); // Temporary block arrays - kvz_pixel _predsPARALLEL_BLKS * 32 * 32 + SIMD_ALIGNMENT; - pred_buffer preds = ALIGNED_POINTER(_preds, SIMD_ALIGNMENT); - - kvz_pixel _orig_block32 * 32 + SIMD_ALIGNMENT; - kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); + ALIGNED(SIMD_ALIGNMENT) kvz_pixel _predsPARALLEL_BLKS * 32 * 32; + pred_buffer preds = (pred_buffer)_preds; + ALIGNED(SIMD_ALIGNMENT) kvz_pixel orig_block32 * 32; // Store original block for SAD computation kvz_pixels_blit(orig, orig_block, width, width, origstride, width); @@ -731,7 +727,7 @@ -1, chroma.mode, // skip luma NULL, lcu); double bits = 0; - chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu); + chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, tr_cu, lcu); double mode_bits = kvz_chroma_mode_bits(state, chroma.mode, intra_mode); bits += mode_bits;
View file
kvazaar-2.3.1.tar.gz/src/strategies/avx2/dct-avx2.c -> kvazaar-2.3.2.tar.gz/src/strategies/avx2/dct-avx2.c
Changed
@@ -38,7 +38,6 @@ #if COMPILE_INTEL_AVX2 #include "kvazaar.h" -#if KVZ_BIT_DEPTH == 8 #include <immintrin.h> #include "strategyselector.h" @@ -938,30 +937,28 @@ TRANSFORM(dct, 32); ITRANSFORM(dct, 32); -#endif // KVZ_BIT_DEPTH == 8 + #endif //COMPILE_INTEL_AVX2 int kvz_strategy_register_dct_avx2(void* opaque, uint8_t bitdepth) { bool success = true; #if COMPILE_INTEL_AVX2 -#if KVZ_BIT_DEPTH == 8 - if (bitdepth == 8){ - success &= kvz_strategyselector_register(opaque, "fast_forward_dst_4x4", "avx2", 40, &matrix_dst_4x4_avx2); + // Coefficients are the same for all bitdepths, no need to disable for 10-bit + success &= kvz_strategyselector_register(opaque, "fast_forward_dst_4x4", "avx2", 40, &matrix_dst_4x4_avx2); - success &= kvz_strategyselector_register(opaque, "dct_4x4", "avx2", 40, &matrix_dct_4x4_avx2); - success &= kvz_strategyselector_register(opaque, "dct_8x8", "avx2", 40, &matrix_dct_8x8_avx2); - success &= kvz_strategyselector_register(opaque, "dct_16x16", "avx2", 40, &matrix_dct_16x16_avx2); - success &= kvz_strategyselector_register(opaque, "dct_32x32", "avx2", 40, &matrix_dct_32x32_avx2); + success &= kvz_strategyselector_register(opaque, "dct_4x4", "avx2", 40, &matrix_dct_4x4_avx2); + success &= kvz_strategyselector_register(opaque, "dct_8x8", "avx2", 40, &matrix_dct_8x8_avx2); + success &= kvz_strategyselector_register(opaque, "dct_16x16", "avx2", 40, &matrix_dct_16x16_avx2); + success &= kvz_strategyselector_register(opaque, "dct_32x32", "avx2", 40, &matrix_dct_32x32_avx2); - success &= kvz_strategyselector_register(opaque, "fast_inverse_dst_4x4", "avx2", 40, &matrix_idst_4x4_avx2); + success &= kvz_strategyselector_register(opaque, "fast_inverse_dst_4x4", "avx2", 40, &matrix_idst_4x4_avx2); - success &= kvz_strategyselector_register(opaque, "idct_4x4", "avx2", 40, &matrix_idct_4x4_avx2); - success &= kvz_strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2); - success &= kvz_strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2); - success &= kvz_strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2); - } -#endif // KVZ_BIT_DEPTH == 8 + success &= kvz_strategyselector_register(opaque, "idct_4x4", "avx2", 40, &matrix_idct_4x4_avx2); + success &= kvz_strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2); + success &= kvz_strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2); + success &= kvz_strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2); + #endif //COMPILE_INTEL_AVX2 return success; }
View file
kvazaar-2.3.1.tar.gz/src/strategies/avx2/picture-avx2.c -> kvazaar-2.3.2.tar.gz/src/strategies/avx2/picture-avx2.c
Changed
@@ -631,48 +631,51 @@ static cost_pixel_any_size_multi_func satd_any_size_## suffix; \ static void satd_any_size_ ## suffix ( \ int width, int height, \ - const uint8_t **preds, \ + const kvz_pixel **preds, \ const int stride, \ - const uint8_t *orig, \ + const kvz_pixel *orig, \ const int orig_stride, \ unsigned num_modes, \ unsigned *costs_out, \ int8_t *valid) \ { \ unsigned sumsnum_parallel_blocks = { 0 }; \ - const uint8_t *pred_ptrs4 = { preds0, preds1, preds2, preds3 };\ - const uint8_t *orig_ptr = orig; \ + const kvz_pixel *pred_ptrs4 = { preds0, preds1, preds2, preds3 };\ + const kvz_pixel *orig_ptr = orig; \ costs_out0 = 0; costs_out1 = 0; costs_out2 = 0; costs_out3 = 0; \ - if (width % 8 != 0) { \ + const int width_mod_8 = width % 8; \ + if (width_mod_8 != 0) { \ + const kvz_pixel *pred_ptrs_tmp4 = { preds0, preds1, preds2, preds3 };\ /* Process the first column using 4x4 blocks. */ \ for (int y = 0; y < height; y += 4) { \ - kvz_satd_4x4_subblock_ ## suffix(preds, stride, orig, orig_stride, sums); \ - } \ - orig_ptr += 4; \ + kvz_satd_4x4_subblock_ ## suffix(pred_ptrs_tmp, stride, &origy*orig_stride, orig_stride, sums); \ + for(int blk = 0; blk < num_parallel_blocks; ++blk){ pred_ptrs_tmpblk += 4*stride; costs_outblk += sumsblk; }\ + } \ for(int blk = 0; blk < num_parallel_blocks; ++blk){\ pred_ptrsblk += 4; \ - }\ + }\ width -= 4; \ - } \ + } \ if (height % 8 != 0) { \ + const kvz_pixel *pred_ptrs_tmp4 = { preds0, preds1, preds2, preds3 };\ /* Process the first row using 4x4 blocks. */ \ for (int x = 0; x < width; x += 4 ) { \ - kvz_satd_4x4_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \ - } \ - orig_ptr += 4 * orig_stride; \ + kvz_satd_4x4_subblock_ ## suffix(pred_ptrs_tmp, stride, &orig_ptrx, orig_stride, sums); \ + for(int blk = 0; blk < num_parallel_blocks; ++blk){ pred_ptrs_tmpblk += 4; costs_outblk += sumsblk; }\ + } \ for(int blk = 0; blk < num_parallel_blocks; ++blk){\ pred_ptrsblk += 4 * stride; \ - }\ + }\ height -= 4; \ - } \ + } \ /* The rest can now be processed with 8x8 blocks. */ \ - for (int y = 0; y < height; y += 8) { \ - orig_ptr = &origy * orig_stride; \ - pred_ptrs0 = &preds0y * stride; \ - pred_ptrs1 = &preds1y * stride; \ - pred_ptrs2 = &preds2y * stride; \ - pred_ptrs3 = &preds3y * stride; \ - for (int x = 0; x < width; x += 8) { \ + for (int y = height % 8; y < height; y += 8) { \ + orig_ptr = &origy * orig_stride + width_mod_8; \ + pred_ptrs0 = &preds0y * stride + width_mod_8; \ + pred_ptrs1 = &preds1y * stride + width_mod_8; \ + pred_ptrs2 = &preds2y * stride + width_mod_8; \ + pred_ptrs3 = &preds3y * stride + width_mod_8; \ + for (int x = width_mod_8; x < width; x += 8) { \ satd_8x8_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \ orig_ptr += 8; \ pred_ptrs0 += 8; \
View file
kvazaar-2.3.1.tar.gz/src/strategies/avx2/sao-avx2.c -> kvazaar-2.3.2.tar.gz/src/strategies/avx2/sao-avx2.c
Changed
@@ -284,7 +284,8 @@ return calc_diff_off_delta(diff_lo, diff_hi, offset, orig); } -static int32_t sao_edge_ddistortion_avx2(const uint8_t *orig_data, +static int32_t sao_edge_ddistortion_avx2(const encoder_control_t* const encoder, + const uint8_t *orig_data, const uint8_t *rec_data, int32_t block_width, int32_t block_height, @@ -316,7 +317,8 @@ assert(NUM_SAO_EDGE_CATEGORIES == 5); if (offsets_ok != 0xffff) { - return sao_edge_ddistortion_generic(orig_data, + return sao_edge_ddistortion_generic(encoder, + orig_data, rec_data, block_width, block_height, @@ -420,7 +422,8 @@ } } -static void calc_sao_edge_dir_avx2(const uint8_t *orig_data, +static void calc_sao_edge_dir_avx2(const encoder_control_t* const encoder, + const uint8_t *orig_data, const uint8_t *rec_data, int32_t eo_class, int32_t block_width,
View file
kvazaar-2.3.1.tar.gz/src/strategies/generic/ipol-generic.c -> kvazaar-2.3.2.tar.gz/src/strategies/generic/ipol-generic.c
Changed
@@ -800,6 +800,10 @@ kvz_pixel *dst = args->buf + (y + args->pad_t + y_simd) * (*args->ext_s); FILL_ARRAY(dst, 0, *args->ext_s); } + // Set the last element to zero because the avx2 code reads it, + // though it does not use it, this is purely to prevent a false positive in + // address sanitizer. + args->buf(args->blk_h + args->pad_b + args->pad_t + args->pad_b_simd - 1) * *args->ext_s + args->pad_l + args->blk_w + args->pad_r = 0; } else {
View file
kvazaar-2.3.1.tar.gz/src/strategies/generic/picture-generic.c -> kvazaar-2.3.2.tar.gz/src/strategies/generic/picture-generic.c
Changed
@@ -417,36 +417,39 @@ const kvz_pixel *pred_ptrs4 = { preds0, preds1, preds2, preds3 };\ const kvz_pixel *orig_ptr = orig; \ costs_out0 = 0; costs_out1 = 0; costs_out2 = 0; costs_out3 = 0; \ - if (width % 8 != 0) { \ + const int width_mod_8 = width % 8; \ + if (width_mod_8 != 0) { \ + const kvz_pixel *pred_ptrs_tmp4 = { preds0, preds1, preds2, preds3 };\ /* Process the first column using 4x4 blocks. */ \ for (int y = 0; y < height; y += 4) { \ - kvz_satd_4x4_subblock_ ## suffix(preds, stride, orig, orig_stride, sums); \ - } \ - orig_ptr += 4; \ + kvz_satd_4x4_subblock_ ## suffix(pred_ptrs_tmp, stride, &origy*orig_stride, orig_stride, sums); \ + for(int blk = 0; blk < num_parallel_blocks; ++blk){ pred_ptrs_tmpblk += 4*stride; costs_outblk += sumsblk; }\ + } \ for(int blk = 0; blk < num_parallel_blocks; ++blk){\ pred_ptrsblk += 4; \ - }\ + }\ width -= 4; \ - } \ + } \ if (height % 8 != 0) { \ + const kvz_pixel *pred_ptrs_tmp4 = { preds0, preds1, preds2, preds3 };\ /* Process the first row using 4x4 blocks. */ \ for (int x = 0; x < width; x += 4 ) { \ - kvz_satd_4x4_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \ - } \ - orig_ptr += 4 * orig_stride; \ + kvz_satd_4x4_subblock_ ## suffix(pred_ptrs_tmp, stride, &orig_ptrx, orig_stride, sums); \ + for(int blk = 0; blk < num_parallel_blocks; ++blk){ pred_ptrs_tmpblk += 4; costs_outblk += sumsblk; }\ + } \ for(int blk = 0; blk < num_parallel_blocks; ++blk){\ pred_ptrsblk += 4 * stride; \ - }\ + }\ height -= 4; \ - } \ + } \ /* The rest can now be processed with 8x8 blocks. */ \ - for (int y = 0; y < height; y += 8) { \ - orig_ptr = &origy * orig_stride; \ - pred_ptrs0 = &preds0y * stride; \ - pred_ptrs1 = &preds1y * stride; \ - pred_ptrs2 = &preds2y * stride; \ - pred_ptrs3 = &preds3y * stride; \ - for (int x = 0; x < width; x += 8) { \ + for (int y = height % 8; y < height; y += 8) { \ + orig_ptr = &origy * orig_stride + width_mod_8; \ + pred_ptrs0 = &preds0y * stride + width_mod_8; \ + pred_ptrs1 = &preds1y * stride + width_mod_8; \ + pred_ptrs2 = &preds2y * stride + width_mod_8; \ + pred_ptrs3 = &preds3y * stride + width_mod_8; \ + for (int x = width_mod_8; x < width; x += 8) { \ satd_8x8_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \ orig_ptr += 8; \ pred_ptrs0 += 8; \
View file
kvazaar-2.3.1.tar.gz/src/strategies/generic/sao-generic.c -> kvazaar-2.3.2.tar.gz/src/strategies/generic/sao-generic.c
Changed
@@ -47,7 +47,8 @@ * \param dir_offsets * \param is_chroma 0 for luma, 1 for chroma. Indicates */ -static void calc_sao_edge_dir_generic(const kvz_pixel *orig_data, +static void calc_sao_edge_dir_generic(const encoder_control_t* const encoder, + const kvz_pixel *orig_data, const kvz_pixel *rec_data, int eo_class, int block_width, @@ -62,7 +63,7 @@ // Don't sample the edge pixels because this function doesn't have access to // their neighbours. - + const int offset = encoder->bitdepth != 8 ? 1 << (encoder->bitdepth - 9) : 0; for (y = 1; y < block_height - 1; ++y) { for (x = 1; x < block_width - 1; ++x) { @@ -73,7 +74,7 @@ int eo_cat = sao_calc_eo_cat(a, b, c); - cat_sum_cnt0eo_cat += orig_datay * block_width + x - c; + cat_sum_cnt0eo_cat += (orig_datay * block_width + x - c + offset) >> (encoder->bitdepth - 8); cat_sum_cnt1eo_cat += 1; } }
View file
kvazaar-2.3.1.tar.gz/src/strategies/generic/sao_shared_generics.h -> kvazaar-2.3.2.tar.gz/src/strategies/generic/sao_shared_generics.h
Changed
@@ -49,7 +49,8 @@ return sao_eo_idx_to_eo_categoryeo_idx; } -static int sao_edge_ddistortion_generic(const kvz_pixel *orig_data, +static int sao_edge_ddistortion_generic(const encoder_control_t* const encoder, + const kvz_pixel *orig_data, const kvz_pixel *rec_data, int32_t block_width, int32_t block_height, @@ -61,22 +62,24 @@ vector2d_t a_ofs = g_sao_edge_offsetseo_class0; vector2d_t b_ofs = g_sao_edge_offsetseo_class1; + const int bit_offset = encoder->bitdepth != 8 ? 1 << (encoder->bitdepth - 9) : 0; + for (y = 1; y < block_height - 1; y++) { for (x = 1; x < block_width - 1; x++) { uint32_t c_pos = y * block_width + x; uint32_t a_pos = (y + a_ofs.y) * block_width + x + a_ofs.x; uint32_t b_pos = (y + b_ofs.y) * block_width + x + b_ofs.x; - uint8_t a = rec_dataa_pos; - uint8_t b = rec_datab_pos; - uint8_t c = rec_datac_pos; - uint8_t orig = orig_datac_pos; + kvz_pixel a = rec_dataa_pos; + kvz_pixel b = rec_datab_pos; + kvz_pixel c = rec_datac_pos; + kvz_pixel orig = orig_datac_pos; int32_t eo_cat = sao_calc_eo_cat(a, b, c); int32_t offset = offsetseo_cat; if (offset != 0) { - int32_t diff = orig - c; + int32_t diff = (orig - c + bit_offset) >> (encoder->bitdepth - 8); int32_t delta = diff - offset; int32_t curr = delta * delta - diff * diff;
View file
kvazaar-2.3.1.tar.gz/src/strategies/strategies-ipol.h -> kvazaar-2.3.2.tar.gz/src/strategies/strategies-ipol.h
Changed
@@ -50,14 +50,14 @@ // error because AVX2 reads one extra byte in the end. #define KVZ_IPOL_MAX_INPUT_SIZE_LUMA_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * KVZ_EXT_BLOCK_W_LUMA + 1) #define KVZ_IPOL_MAX_INPUT_SIZE_CHROMA_SIMD ((KVZ_EXT_BLOCK_W_CHROMA + 3) * KVZ_EXT_BLOCK_W_CHROMA + 1) -#define KVZ_IPOL_MAX_IM_SIZE_LUMA_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH) +#define KVZ_IPOL_MAX_IM_SIZE_LUMA_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH + 1) #define KVZ_IPOL_MAX_IM_SIZE_CHROMA_SIMD ((KVZ_EXT_BLOCK_W_CHROMA + 3) * LCU_WIDTH_C) // On top of basic interpolation, FME needs one extra // column and row for ME (left and up). Adding the // extra row happens to satisfy AVX2 requirements for // row count. No other extra rows are needed. -#define KVZ_FME_MAX_INPUT_SIZE_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * (KVZ_EXT_BLOCK_W_LUMA + 1)) +#define KVZ_FME_MAX_INPUT_SIZE_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * (KVZ_EXT_BLOCK_W_LUMA + 1) + 1) typedef struct { kvz_pixel *buffer; kvz_pixel *orig_topleft; unsigned stride; unsigned malloc_used; } kvz_extended_block;
View file
kvazaar-2.3.1.tar.gz/src/strategies/strategies-sao.h -> kvazaar-2.3.2.tar.gz/src/strategies/strategies-sao.h
Changed
@@ -46,11 +46,13 @@ // Declare function pointers. -typedef int (sao_edge_ddistortion_func)(const kvz_pixel *orig_data, const kvz_pixel *rec_data, +typedef int (sao_edge_ddistortion_func)(const encoder_control_t* const encoder, + const kvz_pixel *orig_data, const kvz_pixel *rec_data, int block_width, int block_height, int eo_class, int offsetsNUM_SAO_EDGE_CATEGORIES); -typedef void (calc_sao_edge_dir_func)(const kvz_pixel *orig_data, const kvz_pixel *rec_data, +typedef void (calc_sao_edge_dir_func)(const encoder_control_t* const encoder, + const kvz_pixel *orig_data, const kvz_pixel *rec_data, int eo_class, int block_width, int block_height, int cat_sum_cnt2NUM_SAO_EDGE_CATEGORIES);
View file
kvazaar-2.3.1.tar.gz/src/threadqueue.c -> kvazaar-2.3.2.tar.gz/src/threadqueue.c
Changed
@@ -389,10 +389,31 @@ threadqueue->first = NULL; threadqueue->last = NULL; +#ifndef _MSC_VER + pthread_attr_t attr; + if (pthread_attr_init(&attr) != 0) { + fprintf(stderr, "pthread_attr_init failed!\n"); + goto failed; + } + size_t default_stack_size; + if (pthread_attr_getstacksize(&attr, &default_stack_size) != 0) { + fprintf(stderr, "pthread_attr_getstacksize failed!\n"); + goto failed; + } + if (default_stack_size < 1024 * 1024) { + if (pthread_attr_setstacksize(&attr, 1024 * 1024) != 0) { + fprintf(stderr, "pthread_attr_setstacksize failed!\n"); + goto failed; + } + } +#else + pthread_attr_t attr; +#endif + // Lock the queue before creating threads, to ensure they all have correct information. PTHREAD_LOCK(&threadqueue->lock); for (int i = 0; i < thread_count; i++) { - if (pthread_create(&threadqueue->threadsi, NULL, threadqueue_worker, threadqueue) != 0) { + if (pthread_create(&threadqueue->threadsi, &attr, threadqueue_worker, threadqueue) != 0) { fprintf(stderr, "pthread_create failed!\n"); goto failed; } @@ -404,6 +425,11 @@ return threadqueue; failed: +#ifndef _MSC_VER + if (pthread_attr_destroy(&attr) != 0) { + fprintf(stderr, "pthread_attr_destroy failed!\n"); + } +#endif kvz_threadqueue_free(threadqueue); return NULL; }
View file
kvazaar-2.3.1.tar.gz/src/threadwrapper/include/pthread.h -> kvazaar-2.3.2.tar.gz/src/threadwrapper/include/pthread.h
Changed
@@ -27,10 +27,10 @@ typedef void* pthread_t; typedef void*(voidp_voidp_func)(void*); -typedef void pthread_attr_t; -typedef void pthread_condattr_t; -typedef void pthread_mutexattr_t; -typedef void pthread_rwlockattr_t; +typedef void* pthread_attr_t; +typedef void* pthread_condattr_t; +typedef void* pthread_mutexattr_t; +typedef void* pthread_rwlockattr_t; // Parameter names that have been commented away do nothing, // as they are always null when the functions are used in Kvazaar.
View file
kvazaar-2.3.1.tar.gz/src/yuv_io.c -> kvazaar-2.3.2.tar.gz/src/yuv_io.c
Changed
@@ -57,24 +57,43 @@ unsigned width, unsigned height, unsigned bytes_per_sample, unsigned array_width, kvz_pixel *data) { - kvz_pixel* p = data; - kvz_pixel* end = data + array_width * height; - kvz_pixel fill_char; - unsigned i; - while (p < end) { - // Read the beginning of the line from input. - if (width != fread(p, bytes_per_sample, width, file)) - return 0; - - // Fill the rest with the last pixel value. - fill_char = pwidth - 1; + unsigned i; + // Handle separately the case where we use KVZ_BIT_DEPTH 10+ but the input is 8-bit. + if (bytes_per_sample != sizeof(kvz_pixel)) { + uint8_t* p = (uint8_t*)data; + uint8_t* end = (uint8_t*)data + array_width * height; + uint8_t fill_char; + while (p < end) { + // Read the beginning of the line from input. + if (width != fread(p, bytes_per_sample, width, file)) return 0; + // Fill the rest with the last pixel value. + // Fill the rest with the last pixel value. + fill_char = pwidth - 1; + + for (i = width; i < array_width; ++i) { + pi = fill_char; + } - for (i = width; i < array_width; ++i) { - pi = fill_char; + p += array_width; } + } + else { + kvz_pixel* p = data; + kvz_pixel* end = data + array_width * height; + kvz_pixel fill_char; + while (p < end) { + // Read the beginning of the line from input. + if (width != fread(p, bytes_per_sample, width, file)) return 0; + // Fill the rest with the last pixel value. + fill_char = pwidth - 1; + + for (i = width; i < array_width; ++i) { + pi = fill_char; + } - p += array_width; + p += array_width; + } } return 1; } @@ -201,24 +220,23 @@ static int read_frame_header(FILE* input) { - char buffer256; - bool frame_start = false; - - while (!frame_start) { - for (int i = 0; i < 256; i++) { - bufferi = getc(input); - if (bufferi == EOF) return 0; - // ToDo: frame headers can have some information structured same as start headers - // This info is just skipped for now, since it's not clear what it could be. - if (bufferi == 0x0A) { - frame_start = true; - break; - } + int c; + int count = 0; + const int max_scan = 4096; // prevent infinite loops + + while ((c = getc(input)) != EOF && count < max_scan) { + count++; + // ToDo: frame headers can have some information structured same as start headers + // This info is just skipped for now, since it's not clear what it could be. + if (c == 0x0A) { + return 1; // Found frame start } } - return 1; + + return 0; // EOF or scan limit reached } + /** * \brief Read a single frame from a file. * @@ -313,7 +331,7 @@ // Seek failed. Skip data by reading. error = 0; - unsigned char* tmp4096; + unsigned char tmp4096; size_t bytes_left = skip_bytes; while (bytes_left > 0 && !error) { const size_t skip = MIN(4096, bytes_left);
View file
kvazaar-2.3.1.tar.gz/tests/CMakeLists.txt -> kvazaar-2.3.2.tar.gz/tests/CMakeLists.txt
Changed
@@ -15,18 +15,24 @@ add_definitions(-DPIC) endif() +list(APPEND ALLOW_AVX2 "x86_64" "AMD64") + if(MSVC) target_include_directories(kvazaar_tests PUBLIC ../src/threadwrapper/include) - set_property( SOURCE ${TEST_SOURCES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) + if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2) + set_property( SOURCE ${TEST_SOURCES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) + endif() add_definitions(-DWIN32_LEAN_AND_MEAN -D_WIN32 -DWIN32 -DWIN64) else() - list(APPEND ALLOW_AVX2 "x86_64" "AMD64") if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2) set_property( SOURCE ${TEST_SOURCES} APPEND PROPERTY COMPILE_FLAGS "-mavx2 -mbmi -mpopcnt -mlzcnt -mbmi2" ) endif() - find_package(Threads REQUIRED) - target_link_libraries(kvazaar_tests PUBLIC Threads::Threads) + + if(NOT ANDROID) + find_package(Threads REQUIRED) + target_link_libraries(kvazaar_tests PUBLIC Threads::Threads) + endif() include(CheckLibraryExists)
View file
kvazaar-2.3.1.tar.gz/tests/Makefile.am -> kvazaar-2.3.2.tar.gz/tests/Makefile.am
Changed
@@ -28,7 +28,8 @@ test_tools.sh \ test_weird_shapes.sh \ test_pu_depth_constraints.sh \ - util.sh + util.sh \ + CMakeLists.txt check_PROGRAMS = kvazaar_tests
View file
kvazaar-2.3.1.tar.gz/tests/test_owf_wpp_tiles.sh -> kvazaar-2.3.2.tar.gz/tests/test_owf_wpp_tiles.sh
Changed
@@ -7,7 +7,7 @@ set -eu . "${0%/*}/util.sh" -common_args='-p4 --rd=0 --no-rdoq --no-signhide --subme=0 --deblock --sao --pu-depth-inter=1-3 --pu-depth-intra=2-3' +common_args='--gop lp-g8d3t1 -p4 --rd=0 --no-rdoq --no-signhide --subme=0 --deblock --sao --pu-depth-inter=1-3 --pu-depth-intra=2-3' valgrind_test 264x130 10 $common_args -r1 --owf=1 --threads=0 --no-wpp valgrind_test 264x130 10 $common_args -r1 --owf=0 --threads=0 --no-wpp valgrind_test 264x130 10 $common_args -r2 --owf=1 --threads=2 --wpp
View file
kvazaar-2.3.1.tar.gz/tests/tests_main.c -> kvazaar-2.3.2.tar.gz/tests/tests_main.c
Changed
@@ -51,7 +51,7 @@ { GREATEST_MAIN_BEGIN(); - init_test_strategies(1); + init_test_strategies(); #if KVZ_BIT_DEPTH == 8 RUN_SUITE(sad_tests); RUN_SUITE(intra_sad_tests);
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.