Projects
Essentials
kvazaar
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 22
View file
kvazaar.changes
Changed
@@ -1,4 +1,29 @@ ------------------------------------------------------------------- +Wed Sep 17 16:30:44 UTC 2025 - Luigi Baldoni <aloisio@gmx.com> + +- Update to version 2.3.2 + Fixes: + * Several fixes to 10 bit encoding + * Fix SATD calculation + * Ensure proper stack size + * Fix typing for threadwrapper + * Separate .pdb files for exe and library + * Fix y4m reading + * Update minimum CMake version + * Fix --no-early-skip + * Fix lossless incorrectly selecting skip + * Rate control improvements + * Fix bit cost calculation for tr splits and smp + * Update the default configuration to match the medium preset + External contributions: + * add missing files in distribution for compilation with cmake + * ax_pthread/clang: move -pthread to LIBS + * Fix emscripten build + * Avoid double free of roi.file_path + * Fix passing too many arguments to "init_test_strategies". +- Rebased kvazaar.patch + +------------------------------------------------------------------- Wed Apr 10 11:16:02 UTC 2024 - Luigi Baldoni <aloisio@gmx.com> - Update to version 2.3.1
View file
kvazaar.spec
Changed
@@ -1,7 +1,7 @@ # # spec file for package kvazaar # -# Copyright (c) 2024 Packman Team <packman@links2linux.de> +# Copyright (c) 2025 Packman Team <packman@links2linux.de> # Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties @@ -19,14 +19,14 @@ %define libname libkvazaar %define libmver 7 Name: kvazaar -Version: 2.3.1 +Version: 2.3.2 Release: 0 Summary: HEVC encoder License: BSD-3-Clause URL: http://ultravideo.cs.tut.fi/#encoder Source0: https://github.com/ultravideo/%{name}/archive/v%{version}.tar.gz#/%{name}-%{version}.tar.gz -Patch0: %name.patch -BuildRequires: cmake >= 3.12 +Patch0: %%name.patch +BuildRequires: cmake >= 3.25 BuildRequires: pkgconfig %if 0%{?suse_version} > 1500 BuildRequires: gcc-c++
View file
kvazaar.patch
Changed
@@ -1,10 +1,12 @@ ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -235,7 +235,7 @@ +Index: kvazaar-2.3.2/CMakeLists.txt +=================================================================== +--- kvazaar-2.3.2.orig/CMakeLists.txt ++++ kvazaar-2.3.2/CMakeLists.txt +@@ -258,7 +258,7 @@ else() # CYGWIN, MSYS, and MINGW seem to be needing this but in some cases # it might be that the toolset is not properly set, so also use this # in cases where we are not sure that it is not needed -- if((NOT MSVC AND NOT LINUX AND NOT APPLE) OR (CYGWIN OR MSYS OR MINGW)) +- if((NOT MSVC AND NOT LINUX AND NOT APPLE AND NOT ANDROID AND NOT EMSCRIPTEN AND NOT BSD) OR (CYGWIN OR MSYS OR MINGW)) + if(FALSE) set(CMAKE_C_FLAGS "-Wa,-muse-unaligned-vector-move ${CMAKE_C_FLAGS}") endif()
View file
kvazaar-2.3.1.tar.gz/.gitignore -> kvazaar-2.3.2.tar.gz/.gitignore
Changed
@@ -54,6 +54,7 @@ src/kvazaar src/libkvazaar.so.* src/kvazaar.pc +src/kvazaar.pc.temp src/version.h tests/kvazaar_tests tests/kvazaar_tests.trs
View file
kvazaar-2.3.1.tar.gz/CMakeLists.txt -> kvazaar-2.3.2.tar.gz/CMakeLists.txt
Changed
@@ -1,10 +1,10 @@ -cmake_minimum_required(VERSION 3.12) +cmake_minimum_required(VERSION 3.25) project(kvazaar LANGUAGES C CXX HOMEPAGE_URL https://github.com/ultravideo/kvazaar DESCRIPTION "An open-source HEVC encoder licensed under 3-clause BSD" -VERSION 2.3.1 ) +VERSION 2.3.2 ) option(BUILD_SHARED_LIBS "Build using shared kvazaar library" ON) @@ -12,6 +12,8 @@ option(USE_CRYPTO "Use crypto library" OFF) +option(BUILD_KVAZAAR_BINARY "Build kvazaar-bin" ON) # To build only the lib, useful for iOS-builds + include(GNUInstallDirs) #Helps to define correct distro specific install directories set(DEFERRED "@") @@ -213,20 +215,41 @@ list(APPEND CLI_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/extras/getopt.c ${CMAKE_CURRENT_SOURCE_DIR}/src/threadwrapper/src/pthread.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/threadwrapper/src/semaphore.cpp) endif() -add_executable(kvazaar-bin ${CLI_SOURCES}) +if(BUILD_KVAZAAR_BINARY) + add_executable(kvazaar-bin ${CLI_SOURCES}) + + if(MSVC AND BUILD_SHARED_LIBS) + set_target_properties(kvazaar PROPERTIES + OUTPUT_NAME kvazaar + PDB_NAME kvazaar-lib + ) + + set_target_properties(kvazaar-bin PROPERTIES + OUTPUT_NAME kvazaar + PDB_NAME kvazaar-bin + ) + else() + set_target_properties(kvazaar-bin PROPERTIES + OUTPUT_NAME kvazaar + ) + endif() + set_target_properties(kvazaar-bin PROPERTIES RUNTIME_OUTPUT_NAME kvazaar) -set_target_properties(kvazaar-bin PROPERTIES OUTPUT_NAME kvazaar) -set_target_properties(kvazaar-bin PROPERTIES RUNTIME_OUTPUT_NAME kvazaar) + target_link_libraries(kvazaar-bin PUBLIC kvazaar) +endif() -target_link_libraries(kvazaar-bin PUBLIC kvazaar) +list(APPEND ALLOW_AVX2 "x86_64" "AMD64") if(MSVC) target_include_directories(kvazaar PUBLIC src/threadwrapper/include) - set_property( SOURCE ${LIB_SOURCES_STRATEGIES_AVX2} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) + if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2) + set_property( SOURCE ${LIB_SOURCES_STRATEGIES_AVX2} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) + endif() else() - set_target_properties(kvazaar-bin PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src) + if(BUILD_KVAZAAR_BINARY) + set_target_properties(kvazaar-bin PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src) + endif() set_target_properties(kvazaar PROPERTIES SOVERSION "7" VERSION "7.4.0") - list(APPEND ALLOW_AVX2 "x86_64" "AMD64") if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2) set_property( SOURCE ${LIB_SOURCES_STRATEGIES_AVX2} APPEND PROPERTY COMPILE_FLAGS "-mavx2 -mbmi -mpopcnt -mlzcnt -mbmi2" ) set_property( SOURCE ${LIB_SOURCES_STRATEGIES_SSE41} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" ) @@ -235,14 +258,15 @@ # CYGWIN, MSYS, and MINGW seem to be needing this but in some cases # it might be that the toolset is not properly set, so also use this # in cases where we are not sure that it is not needed - if((NOT MSVC AND NOT LINUX AND NOT APPLE) OR (CYGWIN OR MSYS OR MINGW)) + if((NOT MSVC AND NOT LINUX AND NOT APPLE AND NOT ANDROID AND NOT EMSCRIPTEN AND NOT BSD) OR (CYGWIN OR MSYS OR MINGW)) set(CMAKE_C_FLAGS "-Wa,-muse-unaligned-vector-move ${CMAKE_C_FLAGS}") endif() - - set(THREADS_PREFER_PTHREAD_FLAG ON) - find_package(Threads REQUIRED) - target_link_libraries(kvazaar PUBLIC Threads::Threads) + if(NOT ANDROID AND NOT EMSCRIPTEN) + set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads REQUIRED) + target_link_libraries(kvazaar PUBLIC Threads::Threads) + endif() include(CheckLibraryExists) @@ -256,7 +280,9 @@ endif () target_link_libraries(kvazaar PUBLIC ${EXTRA_LIBS}) - target_link_libraries(kvazaar-bin PUBLIC ${EXTRA_LIBS} ) + if(BUILD_KVAZAAR_BINARY) + target_link_libraries(kvazaar-bin PUBLIC ${EXTRA_LIBS} ) + endif() endif() @@ -308,7 +334,9 @@ install(CODE "configure_file(\"${PROJECT_SOURCE_DIR}/src/kvazaar.pc.temp\" \"${PROJECT_SOURCE_DIR}/src/kvazaar.pc\" @ONLY)") install(FILES ${PROJECT_SOURCE_DIR}/src/kvazaar.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) -install(TARGETS kvazaar-bin DESTINATION ${CMAKE_INSTALL_BINDIR}) +if(BUILD_KVAZAAR_BINARY) + install(TARGETS kvazaar-bin DESTINATION ${CMAKE_INSTALL_BINDIR}) +endif() install(TARGETS kvazaar RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
View file
kvazaar-2.3.1.tar.gz/README.md -> kvazaar-2.3.2.tar.gz/README.md
Changed
@@ -51,7 +51,7 @@ comment: # "BEGIN KVAZAAR HELP MESSAGE" ``` -Kvazaar v2.3.1 2024-04-10 +Kvazaar v2.3.2 2025-09-16 Kvazaar license: 3-clause BSD Usage: kvazaar -i <input> --input-res <width>x<height> -o <output> @@ -126,7 +126,7 @@ - 0: Only send VPS with the first frame. - N: Send VPS with every Nth intra frame. -r, --ref <integer> : Number of reference frames, in range 1..15 4 - --gop <string> : GOP structure lp-g4d3t1 + --gop <string> : GOP structure 16 - 0: Disabled - 8: B-frame pyramid of length 8 - 16: B-frame pyramid of length 16 @@ -262,7 +262,7 @@ guaranteed to produce sensible bitstream or work at all. disabled --tr-depth-intra <int> : Transform split depth for intra blocks 0 - --(no-)bipred : Bi-prediction disabled + --(no-)bipred : Bi-prediction enabled --cu-split-termination <string> : CU split search termination zero - off: Don't terminate early. - zero: Terminate when residual is zero. @@ -383,7 +383,7 @@ | | 0-uf | 1-sf | 2-vf | 3-fr | 4-f | 5-m | 6-s | 7-sr | 8-vs | 9-p | | -------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | -| rd | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 2 | 2 | +| rd | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 3 | 3 | | pu-depth-intra | 2-3 | 2-3 | 2-3 | 2-3 | 1-3 | 1-4 | 1-4 | 1-4 | 1-4 | 1-4 | | pu-depth-inter | 1-2 | 1-2 | 1-3 | 1-3 | 1-3 | 0-3 | 0-3 | 0-3 | 0-3 | 0-3 | | me | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | tz | tz | @@ -400,7 +400,7 @@ | mv-rdo | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | full-intra-search | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | smp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | -| amp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | | cu-split-termination | zero | zero | zero | zero | zero | zero | zero | zero | zero | off | | me-early-termination | sens. | sens. | sens. | sens. | sens. | on | on | off | off | off | | intra-rdo-et | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -491,7 +491,7 @@ Please cite this paper(https://dl.acm.org/citation.cfm?doid=2964284.2973796) for Kvazaar: -```M. Viitanen, A. Koivula, A. Lemmetti, A. Ylä-Outinen, J. Vanne, and T. D. Hämäläinen, Kvazaar: open-source HEVC/H.265 encoder, in Proc. ACM Int. Conf. Multimedia, Amsterdam, The Netherlands, Oct. 2016.``` +```M. Viitanen, A. Koivula, A. Lemmetti, A. Yl�-Outinen, J. Vanne, and T. D. H�m�l�inen, �Kvazaar: open-source HEVC/H.265 encoder,� in Proc. ACM Int. Conf. Multimedia, Amsterdam, The Netherlands, Oct. 2016.``` Or in BibTex:
View file
kvazaar-2.3.1.tar.gz/configure.ac -> kvazaar-2.3.2.tar.gz/configure.ac
Changed
@@ -23,7 +23,7 @@ # # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html ver_major=7 -ver_minor=4 +ver_minor=5 ver_release=0 # Prevents configure from adding a lot of defines to the CFLAGS
View file
kvazaar-2.3.1.tar.gz/doc/kvazaar.1 -> kvazaar-2.3.2.tar.gz/doc/kvazaar.1
Changed
@@ -1,4 +1,4 @@ -.TH KVAZAAR "1" "April 2024" "kvazaar v2.3.1" "User Commands" +.TH KVAZAAR "1" "September 2025" "kvazaar v2.3.2" "User Commands" .SH NAME kvazaar \- open source HEVC encoder .SH SYNOPSIS @@ -133,7 +133,7 @@ Number of reference frames, in range 1..15 4 .TP \fB\-\-gop <string> -GOP structure lp\-g4d3t1 +GOP structure 16 \- 0: Disabled \- 8: B\-frame pyramid of length 8 \- 16: B\-frame pyramid of length 16 @@ -347,7 +347,7 @@ Transform split depth for intra blocks 0 .TP \fB\-\-(no\-)bipred -Bi\-prediction disabled +Bi\-prediction enabled .TP \fB\-\-cu\-split\-termination <string> CU split search termination zero
View file
kvazaar-2.3.1.tar.gz/m4/ax_pthread.m4 -> kvazaar-2.3.2.tar.gz/m4/ax_pthread.m4
Changed
@@ -261,8 +261,8 @@ # -pthread does define _REENTRANT, and while the Darwin headers # ignore this macro, third-party headers might not.) - PTHREAD_CFLAGS="-pthread" - PTHREAD_LIBS= + PTHREAD_CFLAGS= + PTHREAD_LIBS="-pthread" ax_pthread_ok=yes
View file
kvazaar-2.3.1.tar.gz/src/Makefile.am -> kvazaar-2.3.2.tar.gz/src/Makefile.am
Changed
@@ -4,8 +4,10 @@ EXTRA_DIST = \ extras/getopt.c \ - extras/getopt.h - + extras/getopt.h \ + version.h.in \ + kvazaarCMake.pc.in \ + threadwrapper pkgconfigdir = $(libdir)/pkgconfig nodist_pkgconfig_DATA = kvazaar.pc
View file
kvazaar-2.3.1.tar.gz/src/cfg.c -> kvazaar-2.3.2.tar.gz/src/cfg.c
Changed
@@ -63,10 +63,10 @@ cfg->sao_type = 3; cfg->rdoq_enable = 1; cfg->rdoq_skip = 1; - cfg->signhide_enable = true; + cfg->signhide_enable = false; cfg->smp_enable = false; cfg->amp_enable = false; - cfg->rdo = 1; + cfg->rdo = 0; cfg->mv_rdo = 0; cfg->full_intra_search = 0; cfg->trskip_enable = 0; @@ -86,9 +86,7 @@ cfg->aud_enable = 0; cfg->cqmfile = NULL; cfg->fast_coeff_table_fn = NULL; - cfg->ref_frames = 1; - cfg->gop_len = 4; - cfg->gop_lowdelay = true; + cfg->ref_frames = 4; cfg->bipred = 0; cfg->target_bitrate = 0; cfg->hash = KVZ_HASH_CHECKSUM; @@ -118,10 +116,10 @@ memset( cfg->pu_depth_inter.max, -1, sizeof( cfg->pu_depth_inter.max ) ); memset( cfg->pu_depth_intra.min, -1, sizeof( cfg->pu_depth_intra.min ) ); memset( cfg->pu_depth_intra.max, -1, sizeof( cfg->pu_depth_intra.max ) ); - *cfg->pu_depth_inter.min = 2; // 0-3 + *cfg->pu_depth_inter.min = 0; // 0-3 *cfg->pu_depth_inter.max = 3; // 0-3 - *cfg->pu_depth_intra.min = 2; // 0-4 - *cfg->pu_depth_intra.max = 3; // 0-4 + *cfg->pu_depth_intra.min = 1; // 0-4 + *cfg->pu_depth_intra.max = 4; // 0-4 cfg->add_encoder_info = true; cfg->calc_psnr = true; @@ -135,9 +133,11 @@ cfg->input_format = KVZ_FORMAT_P420; cfg->input_bitdepth = 8; - cfg->gop_lp_definition.d = 3; - cfg->gop_lp_definition.t = 1; + cfg->gop_lowdelay = 0; + cfg->gop_len = sizeof(kvz_gop_ra16) / sizeof(kvz_gop_ra160); + memcpy(cfg->gop, kvz_gop_ra16, sizeof(kvz_gop_ra16)); cfg->open_gop = true; + cfg->roi.file_path = NULL; cfg->roi.format = KVZ_ROI_TXT; @@ -725,7 +725,7 @@ "mv-rdo", "0", "full-intra-search", "0", "smp", "1", - "amp", "0", + "amp", "1", "cu-split-termination", "zero", "me-early-termination", "off", "intra-rdo-et", "0", @@ -793,10 +793,12 @@ if (sscanf(value, "%d/%d", &fps_num, &fps_denom) == 2) { cfg->framerate_num = fps_num; cfg->framerate_denom = fps_denom; + cfg->framerate = (double)fps_num / fps_denom; } else { // Accept decimal notation, making sure not to round 0 to 1. cfg->framerate_num = (int)(atof(value) * 1000 + 0.49); cfg->framerate_denom = 1000; + cfg->framerate = atof(value); } } else if OPT("qp")
View file
kvazaar-2.3.1.tar.gz/src/cli.c -> kvazaar-2.3.2.tar.gz/src/cli.c
Changed
@@ -485,7 +485,7 @@ " - 0: Only send VPS with the first frame.\n" " - N: Send VPS with every Nth intra frame.\n" " -r, --ref <integer> : Number of reference frames, in range 1..15 4\n" - " --gop <string> : GOP structure lp-g4d3t1\n" + " --gop <string> : GOP structure 16\n" " - 0: Disabled\n" " - 8: B-frame pyramid of length 8\n" " - 16: B-frame pyramid of length 16\n" @@ -622,7 +622,7 @@ " guaranteed to produce sensible bitstream or\n" " work at all. disabled\n" " --tr-depth-intra <int> : Transform split depth for intra blocks 0\n" - " --(no-)bipred : Bi-prediction disabled\n" + " --(no-)bipred : Bi-prediction enabled\n" " --cu-split-termination <string> : CU split search termination zero\n" " - off: Don't terminate early.\n" " - zero: Terminate when residual is zero.\n"
View file
kvazaar-2.3.1.tar.gz/src/encmain.c -> kvazaar-2.3.2.tar.gz/src/encmain.c
Changed
@@ -37,8 +37,12 @@ #ifdef _WIN32 /* The following two defines must be located before the inclusion of any system header files. */ +#ifndef WINVER #define WINVER 0x0500 +#endif +#ifndef _WIN32_WINNT #define _WIN32_WINNT 0x0500 +#endif #include "global.h" // IWYU pragma: keep @@ -341,9 +345,11 @@ bufferi = getc(input); // Start code of frame data if (bufferi == 0x0A) { - for (; i > 0; i--) { - ungetc(bufferi, input); - } + // There should not be any reason to ungetc the last parameter, but this was there for + // some reason in the original code. Leave as a comment for now, in case it is needed later. + //for (; i > 0; i--) { + // ungetc(bufferi, input); + //} end_of_header = true; break; }
View file
kvazaar-2.3.1.tar.gz/src/encoder.c -> kvazaar-2.3.2.tar.gz/src/encoder.c
Changed
@@ -41,6 +41,7 @@ #include "strategyselector.h" #include "kvz_math.h" #include "fast_coeff_cost.h" +#include "rate_control.h" static int encoder_control_init_gop_layer_weights(encoder_control_t * const); @@ -153,6 +154,12 @@ // Take a copy of the config. memcpy(&encoder->cfg, cfg, sizeof(encoder->cfg)); + + // Copy the ROI file path + if (cfg->roi.file_path) { + encoder->cfg.roi.file_path = strdup(cfg->roi.file_path); + } + // Set fields that are not copied to NULL. encoder->cfg.cqmfile = NULL; encoder->cfg.tiles_width_split = NULL; @@ -596,6 +603,15 @@ memcpy(encoder->cfg.optional_key, cfg->optional_key, 16); } + if (encoder->cfg.target_bitrate > 0 && cfg->rc_algorithm != KVZ_NO_RC) + { + encoder->rc_data = kvz_get_rc_data(encoder); + if (!encoder->rc_data) { + fprintf(stderr, "Failed to initialize rate control.\n"); + goto init_failed; + } + } + return encoder; init_failed: @@ -639,6 +655,8 @@ fclose(encoder->roi_file); } + kvz_free_rc_data(encoder->rc_data); + free(encoder); }
View file
kvazaar-2.3.1.tar.gz/src/encoder.h -> kvazaar-2.3.2.tar.gz/src/encoder.h
Changed
@@ -44,6 +44,8 @@ #include "threadqueue.h" #include "fast_coeff_cost.h" +struct rc_data; + /* Encoder control options, the main struct */ typedef struct encoder_control_t { @@ -149,6 +151,8 @@ fast_coeff_table_t fast_coeff_table; + struct kvz_rc_data *rc_data; + } encoder_control_t; encoder_control_t* kvz_encoder_control_init(const kvz_config *cfg);
View file
kvazaar-2.3.1.tar.gz/src/encoder_state-ctors_dtors.c -> kvazaar-2.3.2.tar.gz/src/encoder_state-ctors_dtors.c
Changed
@@ -94,7 +94,7 @@ pthread_mutex_init(&state->frame->rc_lock, NULL); - state->frame->new_ratecontrol = kvz_get_rc_data(NULL); + state->frame->new_ratecontrol = state->encoder_control->rc_data; return 1; }
View file
kvazaar-2.3.1.tar.gz/src/global.h -> kvazaar-2.3.2.tar.gz/src/global.h
Changed
@@ -220,7 +220,7 @@ #define QUOTE_EXPAND(x) QUOTE(x) #ifndef KVZ_VERSION -#define KVZ_VERSION 2.3.1 +#define KVZ_VERSION 2.3.2 #endif #define VERSION_STRING QUOTE_EXPAND(KVZ_VERSION) @@ -365,7 +365,7 @@ # define COMPILE_POWERPC 0 #endif -#if defined (_M_ARM) || defined(__arm__) || defined(__thumb__) +#if defined (_M_ARM) || defined(_M_ARM64) || defined(__arm__) || defined(__thumb__) # define COMPILE_ARM 1 #else # define COMPILE_ARM 0
View file
kvazaar-2.3.1.tar.gz/src/image.c -> kvazaar-2.3.2.tar.gz/src/image.c
Changed
@@ -472,13 +472,13 @@ pic_data, pic->stride, ref_data, - ref->stride) >> (KVZ_BIT_DEPTH - 8); + ref->stride); } else { // Extrapolate pixels from outside the frame. // Space for extrapolated pixels and the part from the picture // The extrapolation function will set the pointers and stride. - kvz_pixel ext_bufferLCU_LUMA_SIZE; + kvz_pixel ext_bufferLCU_LUMA_SIZE + 1; kvz_pixel *ext = NULL; kvz_pixel *ext_origin = NULL; int ext_s = 0; @@ -514,7 +514,7 @@ pic_data, pic->stride, ext_origin, - ext_s) >> (KVZ_BIT_DEPTH - 8); + ext_s); return satd; }
View file
kvazaar-2.3.1.tar.gz/src/intra.c -> kvazaar-2.3.2.tar.gz/src/intra.c
Changed
@@ -632,8 +632,9 @@ { const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; const int8_t width = LCU_WIDTH >> depth; + cu_info_t* cur_tu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); if (cur_cu == NULL) { - cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); + cur_cu = cur_tu; } // Reset CBFs because CBFs might have been set @@ -684,5 +685,14 @@ } kvz_quantize_lcu_residual(state, has_luma, has_chroma, x, y, depth, cur_cu, lcu, false); + if (cur_cu != cur_tu) + { + if (has_luma) cbf_copy(&cur_tu->cbf, cur_cu->cbf, COLOR_Y); + if (has_chroma) + { + cbf_copy(&cur_tu->cbf, cur_cu->cbf, COLOR_U); + cbf_copy(&cur_tu->cbf, cur_cu->cbf, COLOR_V); + } + } } }
View file
kvazaar-2.3.1.tar.gz/src/kvazaar.c -> kvazaar-2.3.2.tar.gz/src/kvazaar.c
Changed
@@ -78,7 +78,6 @@ } FREE_POINTER(encoder->states); - kvz_free_rc_data(); // Discard const from the pointer. kvz_encoder_control_free((void*) encoder->control); encoder->control = NULL; @@ -114,11 +113,6 @@ encoder->frames_started = 0; encoder->frames_done = 0; - // Assure that the rc data allocation was successful - if(!kvz_get_rc_data(encoder->control)) { - goto kvazaar_open_failure; - } - kvz_init_input_frame_buffer(&encoder->input_buffer); encoder->states = calloc(encoder->num_encoder_states, sizeof(encoder_state_t));
View file
kvazaar-2.3.1.tar.gz/src/rate_control.c -> kvazaar-2.3.2.tar.gz/src/rate_control.c
Changed
@@ -40,17 +40,10 @@ static const int MIN_SMOOTHING_WINDOW = 40; -static int smoothing_window = 40; static const double MIN_LAMBDA = 0.1; static const double MAX_LAMBDA = 10000; #define BETA1 1.2517 -static kvz_rc_data *data; - -static FILE *dist_file; -static FILE *bits_file; -static FILE *qp_file; -static FILE *lambda_file; /** * \brief Clip lambda value to a valid range. @@ -61,9 +54,7 @@ } kvz_rc_data * kvz_get_rc_data(const encoder_control_t * const encoder) { - if (data != NULL || encoder == NULL) return data; - - data = calloc(1, sizeof(kvz_rc_data)); + kvz_rc_data* data = calloc(1, sizeof(kvz_rc_data)); if (data == NULL) return NULL; if (pthread_mutex_init(&data->ck_frame_lock, NULL) != 0) return NULL; @@ -107,18 +98,21 @@ if(encoder->cfg.stats_file_prefix) { char buff128; sprintf(buff, "%sbits.txt", encoder->cfg.stats_file_prefix); - bits_file = fopen(buff, "w"); + data->bits_file = fopen(buff, "w"); sprintf(buff, "%sdist.txt", encoder->cfg.stats_file_prefix); - dist_file = fopen(buff, "w"); + data->dist_file = fopen(buff, "w"); sprintf(buff, "%sqp.txt", encoder->cfg.stats_file_prefix); - qp_file = fopen(buff, "w"); + data->qp_file = fopen(buff, "w"); sprintf(buff, "%slambda.txt", encoder->cfg.stats_file_prefix); - lambda_file = fopen(buff, "w"); + data->lambda_file = fopen(buff, "w"); } + + data->smoothing_window = MIN_SMOOTHING_WINDOW; + return data; } -void kvz_free_rc_data() { +void kvz_free_rc_data(kvz_rc_data *data) { if (data == NULL) return; pthread_mutex_destroy(&data->ck_frame_lock); @@ -189,16 +183,16 @@ bits_coded -= state->frame->cur_gop_bits_coded; } - smoothing_window = MAX(MIN_SMOOTHING_WINDOW, smoothing_window - MAX(encoder->cfg.gop_len / 2, 1)); + state->frame->new_ratecontrol->smoothing_window = MAX(MIN_SMOOTHING_WINDOW, state->frame->new_ratecontrol->smoothing_window - MAX(encoder->cfg.gop_len / 2, 1)); double gop_target_bits = -1; - while( gop_target_bits < 0 && smoothing_window < 150) { + while( gop_target_bits < 0 && state->frame->new_ratecontrol->smoothing_window < 150) { // Equation 12 from https://doi.org/10.1109/TIP.2014.2336550 gop_target_bits = - (encoder->target_avg_bppic * (pictures_coded + smoothing_window) - bits_coded) - * MAX(1, encoder->cfg.gop_len) / smoothing_window; + (encoder->target_avg_bppic * (pictures_coded + state->frame->new_ratecontrol->smoothing_window) - bits_coded) + * MAX(1, encoder->cfg.gop_len) / state->frame->new_ratecontrol->smoothing_window; if(gop_target_bits < 0) { - smoothing_window += 10; + state->frame->new_ratecontrol->smoothing_window += 10; } } // Allocate at least 200 bits for each GOP like HM does. @@ -375,7 +369,12 @@ else { alpha = 0.3; } - return MIN(MAX(100, alpha*pow(state->frame->icost * 4 / bits, beta)*bits), encoder->cfg.gop_len >= 2 ? 0.85 * state->frame->cur_gop_target_bits : state->frame->cur_gop_target_bits); + + double low_limit = (encoder->cfg.rc_algorithm == KVZ_LAMBDA && encoder->cfg.rdo < 2 ? 1.0 : 1.2) * pow(state->encoder_control->cfg.framerate, -0.873) * state->encoder_control->cfg.target_bitrate; + double high_limit = (encoder->cfg.rdo < 2 ? (encoder->cfg.rc_algorithm == KVZ_LAMBDA ? 1.1 : 3.5) : 2.25) * pow(state->encoder_control->cfg.framerate, -0.61) * state->encoder_control->cfg.target_bitrate; + double original_bits = alpha * pow(state->frame->icost * 4 / bits, beta) * bits; + double limited = MIN(MAX(low_limit, original_bits), high_limit); + return limited; } if (encoder->cfg.gop_len <= 0) { @@ -387,7 +386,8 @@ const double pic_target_bits = state->frame->cur_gop_target_bits * pic_weight - pic_header_bits(state); // Allocate at least 100 bits for each picture like HM does. - return MAX(100, pic_target_bits); + const double intra_bits = state->frame->is_irap ? (encoder->cfg.rdo < 2 ? 4 : 6) * state->encoder_control->target_avg_bppic : pic_target_bits; + return MAX(MAX(100, pic_target_bits), intra_bits); } static int8_t lambda_to_qp(const double lambda) @@ -716,7 +716,7 @@ alpha = -state->frame->c_paraindex * state->frame->k_paraindex; beta = state->frame->k_paraindex - 1; } - + alpha *= (double)(1 << (KVZ_BIT_DEPTH - 8)); double est_lambda; int est_qp; if (state->frame->is_irap && encoder->cfg.intra_bit_allocation) { @@ -924,10 +924,10 @@ if (encoder->cfg.stats_file_prefix) { int poc = calc_poc(state); - fprintf(dist_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); - fprintf(bits_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); - fprintf(qp_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); - fprintf(lambda_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); + fprintf(state->frame->new_ratecontrol->dist_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); + fprintf(state->frame->new_ratecontrol->bits_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); + fprintf(state->frame->new_ratecontrol->qp_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); + fprintf(state->frame->new_ratecontrol->lambda_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu); } for(int y_ctu = 0; y_ctu < state->encoder_control->in.height_in_lcu; y_ctu++) { @@ -945,17 +945,17 @@ total_distortion += (double)ctu_distortion / ctu->pixels; lambda += ctu->lambda / (state->encoder_control->in.width_in_lcu * state->encoder_control->in.height_in_lcu); if(encoder->cfg.stats_file_prefix) { - fprintf(dist_file, "%f ", ctu->distortion); - fprintf(bits_file, "%d ", ctu->bits); - fprintf(qp_file, "%d ", ctu->adjust_qp ? ctu->adjust_qp : ctu->qp); - fprintf(lambda_file, "%f ", ctu->adjust_lambda ? ctu->adjust_lambda : ctu->lambda); + fprintf(state->frame->new_ratecontrol->dist_file, "%f ", ctu->distortion); + fprintf(state->frame->new_ratecontrol->bits_file, "%d ", ctu->bits); + fprintf(state->frame->new_ratecontrol->qp_file, "%d ", ctu->adjust_qp ? ctu->adjust_qp : ctu->qp); + fprintf(state->frame->new_ratecontrol->lambda_file, "%f ", ctu->adjust_lambda ? ctu->adjust_lambda : ctu->lambda); } } if (encoder->cfg.stats_file_prefix) { - fprintf(dist_file, "\n"); - fprintf(bits_file, "\n"); - fprintf(qp_file, "\n"); - fprintf(lambda_file, "\n"); + fprintf(state->frame->new_ratecontrol->dist_file, "\n"); + fprintf(state->frame->new_ratecontrol->bits_file, "\n"); + fprintf(state->frame->new_ratecontrol->qp_file, "\n"); + fprintf(state->frame->new_ratecontrol->lambda_file, "\n"); } }
View file
kvazaar-2.3.1.tar.gz/src/rate_control.h -> kvazaar-2.3.2.tar.gz/src/rate_control.h
Changed
@@ -62,10 +62,17 @@ pthread_mutex_t ck_frame_lock; pthread_mutex_t lambda_lock; pthread_mutex_t intra_lock; + + int smoothing_window; + + FILE* dist_file; + FILE* bits_file; + FILE* qp_file; + FILE* lambda_file; } kvz_rc_data; kvz_rc_data * kvz_get_rc_data(const encoder_control_t * const encoder); -void kvz_free_rc_data(); +void kvz_free_rc_data(kvz_rc_data* data); void kvz_set_picture_lambda_and_qp(encoder_state_t * const state);
View file
kvazaar-2.3.1.tar.gz/src/sao.c -> kvazaar-2.3.2.tar.gz/src/sao.c
Changed
@@ -385,7 +385,7 @@ // Call calc_sao_edge_dir once for luma and twice for chroma. for (i = 0; i < buf_cnt; ++i) { FILL(cat_sum_cnt, 0); - kvz_calc_sao_edge_dir(datai, recdatai, edge_class, + kvz_calc_sao_edge_dir(state->encoder_control, datai, recdatai, edge_class, block_width, block_height, cat_sum_cnt); @@ -514,7 +514,7 @@ unsigned buf_i; for (buf_i = 0; buf_i < buf_cnt; ++buf_i) { - ddistortion += kvz_sao_edge_ddistortion(databuf_i, recdatabuf_i, + ddistortion += kvz_sao_edge_ddistortion(state->encoder_control, databuf_i, recdatabuf_i, block_width, block_height, edge_sao.eo_class, &edge_sao.offsets5 * buf_i); } @@ -577,7 +577,7 @@ switch (merge_cand->type) { case SAO_TYPE_EDGE: for (buf_i = 0; buf_i < buf_cnt; ++buf_i) { - ddistortion += kvz_sao_edge_ddistortion(databuf_i, recdatabuf_i, + ddistortion += kvz_sao_edge_ddistortion(state->encoder_control, databuf_i, recdatabuf_i, block_width, block_height, merge_cand->eo_class, &merge_cand->offsets5 * buf_i); }
View file
kvazaar-2.3.1.tar.gz/src/search.c -> kvazaar-2.3.2.tar.gz/src/search.c
Changed
@@ -233,6 +233,7 @@ * Calculate RD cost for a Coding Unit. * \return Cost of block * \param ref_cu CU used for prediction parameters. +* \param parent_tu * * Calculates the RDO cost of a single CU that will not be split further. * Takes into account SSD of reconstruction and the cost of encoding whatever @@ -241,10 +242,10 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, const cu_info_t *const pred_cu, - lcu_t *const lcu) + const cu_info_t* const parent_tu, lcu_t *const lcu) { const int width = LCU_WIDTH >> depth; - const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0); + const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && parent_tu->cbf == 0); // cur_cu is used for TU parameters. cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); @@ -283,10 +284,10 @@ int offset = width / 2; double sum = 0; - sum += kvz_cu_rd_cost_luma(state, x_px, y_px, depth + 1, pred_cu, lcu); - sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu); - sum += kvz_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu); - sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu); + sum += kvz_cu_rd_cost_luma(state, x_px, y_px, depth + 1, pred_cu, tr_cu, lcu); + sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px, depth + 1, pred_cu, tr_cu, lcu); + sum += kvz_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, tr_cu, lcu); + sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, tr_cu, lcu); return sum + tr_tree_bits * state->lambda; } @@ -302,8 +303,8 @@ if (state->encoder_control->chroma_format != KVZ_CSP_400) { cabac_ctx_t* cr_ctx = &(cabac->ctx.qt_cbf_model_chromadepth - tr_cu->depth); cabac->cur_ctx = cr_ctx; - int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U); - int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V); + int u_is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_U); + int v_is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_V); CABAC_FBITS_UPDATE(cabac, cr_ctx, u_is_set, tr_tree_bits, "cbf_cb_search"); CABAC_FBITS_UPDATE(cabac, cr_ctx, v_is_set, tr_tree_bits, "cbf_cb_search"); } @@ -311,7 +312,7 @@ // Add transform_tree cbf_luma bit cost. const int is_tr_split = tr_cu->tr_depth - tr_cu->depth; - int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y); + int is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_Y); if (pred_cu->type == CU_INTRA || is_tr_split || cbf_is_set(tr_cu->cbf, depth, COLOR_U) || @@ -348,12 +349,12 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, const cu_info_t *const pred_cu, - lcu_t *const lcu) + const cu_info_t* const parent_tu, lcu_t *const lcu) { const vector2d_t lcu_px = { x_px / 2, y_px / 2 }; const int width = (depth <= MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth; cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); - const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0); + const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && parent_tu->cbf == 0); double tr_tree_bits = 0; double coeff_bits = 0; @@ -367,18 +368,18 @@ return 0; } - int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U); - int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V); + int u_is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_U); + int v_is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_V); // See luma for why the second condition if (depth < MAX_PU_DEPTH && (!state->search_cabac.update || tr_cu->tr_depth != tr_cu->depth) && !skip_residual_coding) { const int tr_depth = depth - pred_cu->depth; cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac; cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_chromatr_depth); cabac->cur_ctx = ctx; - if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) { + if (tr_depth == 0 || cbf_is_set(tr_cu->cbf, depth - 1, COLOR_U)) { CABAC_FBITS_UPDATE(cabac, ctx, u_is_set, tr_tree_bits, "cbf_cb_search"); } - if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) { + if (tr_depth == 0 || cbf_is_set(tr_cu->cbf, depth - 1, COLOR_V)) { CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cb_search"); } } @@ -387,10 +388,10 @@ int offset = LCU_WIDTH >> (depth + 1); double sum = 0; - sum += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth + 1, pred_cu, lcu); - sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu); - sum += kvz_cu_rd_cost_chroma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu); - sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu); + sum += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth + 1, pred_cu, tr_cu, lcu); + sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px, depth + 1, pred_cu, tr_cu, lcu); + sum += kvz_cu_rd_cost_chroma(state, x_px, y_px + offset, depth + 1, pred_cu, tr_cu, lcu); + sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, tr_cu, lcu); return sum + tr_tree_bits * state->lambda; } @@ -424,10 +425,10 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state, const int x_px, const int y_px, const int depth, const cu_info_t* const pred_cu, - lcu_t* const lcu) { + const cu_info_t* const parent_tu, lcu_t* const lcu) { const int width = LCU_WIDTH >> depth; - const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0); + const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && parent_tu->cbf == 0); // cur_cu is used for TU parameters. cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); @@ -445,7 +446,7 @@ cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac; - { + if (depth == pred_cu->depth){ int cbf = cbf_is_set_any(pred_cu->cbf, depth); // Only need to signal coded block flag if not skipped or merged // skip = no coded residual, merge = coded residual @@ -474,10 +475,10 @@ } if(state->encoder_control->chroma_format != KVZ_CSP_400 && !skip_residual_coding) { - if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) { + if(tr_cu->depth == depth || cbf_is_set(tr_cu->cbf, depth - 1, COLOR_U)) { CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_chromadepth - tr_cu->depth), cb_flag_u, tr_tree_bits, "cbf_cb"); } - if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) { + if(tr_cu->depth == depth || cbf_is_set(tr_cu->cbf, depth - 1, COLOR_V)) { CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_chromadepth - tr_cu->depth), cb_flag_v, tr_tree_bits, "cbf_cr"); } } @@ -486,10 +487,10 @@ int offset = LCU_WIDTH >> (depth + 1); double sum = 0; - sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px, depth + 1, pred_cu, lcu); - sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px, depth + 1, pred_cu, lcu); - sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px + offset, depth + 1, pred_cu, lcu); - sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu); + sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px, depth + 1, pred_cu, tr_cu, lcu); + sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px, depth + 1, pred_cu, tr_cu, lcu); + sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px + offset, depth + 1, pred_cu, tr_cu, lcu); + sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px + offset, depth + 1, pred_cu, tr_cu, lcu); return sum + tr_tree_bits * state->lambda; } const int cb_flag_y = cbf_is_set(tr_cu->cbf, depth, COLOR_Y) ; @@ -515,7 +516,7 @@ width); } - { + if(!skip_residual_coding) { int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); const coeff_t* coeffs = &lcu->coeff.yxy_to_zorder(LCU_WIDTH, x_px, y_px); @@ -538,7 +539,7 @@ chroma_ssd = ssd_u + ssd_v; } - { + if (!skip_residual_coding) { int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth); const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); @@ -864,6 +865,26 @@ inter_bitcost += cur_cu->merge_idx; } } + else if (!state->encoder_control->cfg.early_skip) + { + int tr_depth = depth; + kvz_lcu_fill_trdepth(lcu, x, y, depth, tr_depth); + const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400; + kvz_inter_recon_cu(state, lcu, x, y, cu_width, true, has_chroma); + for (int i = 0; i < cu_width; i += (cu_width >> (depth == 0))) + { + for (int j = 0; j < cu_width; j += (cu_width >> (depth == 0))) + { + cu_info_t* tmp_cu = LCU_GET_CU_AT_PX(lcu, x_local + i, y_local + j); + cbf_clear(&tmp_cu->cbf, depth, COLOR_Y); + if (has_chroma) + { + cbf_clear(&tmp_cu->cbf, depth, COLOR_U); + cbf_clear(&tmp_cu->cbf, depth, COLOR_V); + } + } + } + } lcu_fill_inter(lcu, x_local, y_local, cu_width); lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu); } @@ -893,7 +914,7 @@ cost = bits * state->lambda; - cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu); + cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, cur_cu, lcu); if (ctrl->cfg.zero_coeff_rdo && inter_zero_coeff_cost <= cost) { cost = inter_zero_coeff_cost; @@ -1015,7 +1036,7 @@ double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y) + bits; cost += mode_bits * state->lambda; - cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu); + cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, cur_cu, lcu); memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac)); memcpy(&state->search_cabac, &temp_cabac, sizeof(temp_cabac));
View file
kvazaar-2.3.1.tar.gz/src/search.h -> kvazaar-2.3.2.tar.gz/src/search.h
Changed
@@ -81,11 +81,11 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, const cu_info_t *const pred_cu, - lcu_t *const lcu); + const cu_info_t* parent_tu, lcu_t *const lcu); double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, const cu_info_t *const pred_cu, - lcu_t *const lcu); + const cu_info_t* parent_tu, lcu_t *const lcu); void kvz_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth); void kvz_intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
View file
kvazaar-2.3.1.tar.gz/src/search_inter.c -> kvazaar-2.3.2.tar.gz/src/search_inter.c
Changed
@@ -220,8 +220,9 @@ info->height, info->optimized_sad ); - - if (cost >= *best_cost) return false; + // On some platforms comparing two doubles give weird results, so add an offset +#define KVZ_TEMP_DOUBLE_PRECISION 0.001 + if (cost + KVZ_TEMP_DOUBLE_PRECISION >= *best_cost) return false; cost += info->mvd_cost_func( info->state, @@ -233,7 +234,8 @@ &bitcost ); - if (cost >= *best_cost) return false; + if (cost + KVZ_TEMP_DOUBLE_PRECISION >= *best_cost) return false; +#undef KVZ_TEMP_DOUBLE_PRECISION // Set to motion vector in quarter pixel precision. best_mv->x = x * 4; @@ -552,7 +554,7 @@ // 6 3 { { iDist / 2, iDist }, { iDist, 0 }, { iDist / 2, -iDist }, { -iDist, 0 }, - { iDist / 2, iDist }, { -iDist / 2, -iDist }, { 0, 0 }, { 0, 0 } + { -iDist / 2, iDist }, { -iDist / 2, -iDist }, { 0, 0 }, { 0, 0 } } }; @@ -1045,6 +1047,11 @@ epol_args.ext_origin = &ext_origin; epol_args.ext_s = &ext_s; + // In case the half of the fractional positions are legal + // and the other half illegal, the extension will be performed + // for pixels that might be uninitialized. + // Nevertheless, in that case the search will not use those pixels, + // but the thread/address sanitizer will not know that. kvz_get_extended_block(&epol_args); kvz_pixel *tmp_pic = pic->y + orig.y * pic->stride + orig.x; @@ -1879,6 +1886,12 @@ double frac_bits = MAX_INT; vector2d_t frac_mv = { unipred_pu->inter.mvlist0, unipred_pu->inter.mvlist1 }; + // Check that at least one quarter-pel step is possible + if (!fracmv_within_tile(info, frac_mv.x + 3, frac_mv.y + 3) && + !fracmv_within_tile(info, frac_mv.x - 3, frac_mv.y - 3)) { + continue; + } + search_frac(info, &frac_cost, &frac_bits, &frac_mv); uint8_t mv_ref_coded = LX_idx; @@ -2083,7 +2096,7 @@ const int width = LCU_WIDTH >> depth; cabac_data_t cabac_copy; memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy)); - cabac_copy.update = 1; + state->search_cabac.update = 1; cu_info_t* cur_pu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); *cur_pu = *cur_cu; @@ -2109,12 +2122,16 @@ double bits = 0; const int skip_context = kvz_get_skip_context(x, y, lcu, NULL); if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) { - no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_modelskip_context, 1) + *inter_bitcost; - bits += kvz_mock_encode_coding_unit(state, &cabac_copy, x, y, depth, lcu, cur_cu); + no_cbf_bits = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_modelskip_context, 1) + *inter_bitcost; + bits += kvz_mock_encode_coding_unit(state, &state->search_cabac, x, y, depth, lcu, cur_cu); } else { - no_cbf_bits = kvz_mock_encode_coding_unit(state, &cabac_copy, x, y, depth, lcu, cur_cu); - bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac_copy.ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac_copy.ctx.cu_qt_root_cbf_model, 1); + no_cbf_bits = kvz_mock_encode_coding_unit(state, &state->search_cabac, x, y, depth, lcu, cur_cu); + bits += no_cbf_bits; + if (!cur_cu->merged || cur_cu->part_size != SIZE_2Nx2N) { + bits += CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_qt_root_cbf_model, 1); + no_cbf_bits += CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_qt_root_cbf_model, 0); + } } double no_cbf_cost = ssd + no_cbf_bits * state->lambda; @@ -2124,12 +2141,23 @@ lcu, false); + + if (tr_depth == depth) + { + cbf_copy(&cur_pu->cbf, cur_cu->cbf, COLOR_Y); + if (reconstruct_chroma) + { + cbf_copy(&cur_pu->cbf, cur_cu->cbf, COLOR_U); + cbf_copy(&cur_pu->cbf, cur_cu->cbf, COLOR_V); + } + } + int cbf = cbf_is_set_any(cur_cu->cbf, depth); if(cbf) { - *inter_cost = kvz_cu_rd_cost_luma(state, x_px, y_px, depth, cur_cu, lcu); + *inter_cost = kvz_cu_rd_cost_luma(state, x_px, y_px, depth, cur_cu, cur_cu, lcu); if (reconstruct_chroma) { - *inter_cost += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, lcu); + *inter_cost += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, cur_cu, lcu); } } else { @@ -2137,13 +2165,14 @@ *inter_cost = no_cbf_cost; cur_cu->cbf = 0; *inter_bitcost = no_cbf_bits; + memcpy(&state->search_cabac, &cabac_copy, sizeof(cabac_copy)); return; } *inter_cost += (bits)* state->lambda; *inter_bitcost = bits; - if(no_cbf_cost < *inter_cost) { + if(no_cbf_cost < *inter_cost && !state->encoder_control->cfg.lossless) { cur_cu->cbf = 0; if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) { cur_cu->skipped = 1; @@ -2152,6 +2181,7 @@ *inter_bitcost = no_cbf_bits; } + memcpy(&state->search_cabac, &cabac_copy, sizeof(cabac_copy)); }
View file
kvazaar-2.3.1.tar.gz/src/search_intra.c -> kvazaar-2.3.2.tar.gz/src/search_intra.c
Changed
@@ -221,9 +221,9 @@ intra_mode, chroma_mode, pred_cu, lcu); - nosplit_cost += kvz_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); + nosplit_cost += kvz_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, pred_cu, lcu); if (reconstruct_chroma) { - nosplit_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); + nosplit_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, pred_cu, lcu); } // Early stop codition for the recursive search. @@ -334,11 +334,9 @@ cost_pixel_nxn_func *const satd_func = kvz_pixels_get_satd_func(width); //cost_pixel_nxn_func *const sad_func = kvz_pixels_get_sad_func(width); - kvz_pixel _pred32 * 32 + SIMD_ALIGNMENT; - kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); + ALIGNED(SIMD_ALIGNMENT) kvz_pixel pred32 * 32; - kvz_pixel _orig_block32 * 32 + SIMD_ALIGNMENT; - kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); + ALIGNED(SIMD_ALIGNMENT) kvz_pixel orig_block32 * 32; kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width); for (int i = 0; i < 5; ++i) { @@ -408,11 +406,9 @@ const bool filter_boundary = !(cfg->lossless && cfg->implicit_rdpcm); // Temporary block arrays - kvz_pixel _predsPARALLEL_BLKS * 32 * 32 + SIMD_ALIGNMENT; - pred_buffer preds = ALIGNED_POINTER(_preds, SIMD_ALIGNMENT); - - kvz_pixel _orig_block32 * 32 + SIMD_ALIGNMENT; - kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); + ALIGNED(SIMD_ALIGNMENT) kvz_pixel _predsPARALLEL_BLKS * 32 * 32; + pred_buffer preds = (pred_buffer)_preds; + ALIGNED(SIMD_ALIGNMENT) kvz_pixel orig_block32 * 32; // Store original block for SAD computation kvz_pixels_blit(orig, orig_block, width, width, origstride, width); @@ -731,7 +727,7 @@ -1, chroma.mode, // skip luma NULL, lcu); double bits = 0; - chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu); + chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, tr_cu, lcu); double mode_bits = kvz_chroma_mode_bits(state, chroma.mode, intra_mode); bits += mode_bits;
View file
kvazaar-2.3.1.tar.gz/src/strategies/avx2/dct-avx2.c -> kvazaar-2.3.2.tar.gz/src/strategies/avx2/dct-avx2.c
Changed
@@ -38,7 +38,6 @@ #if COMPILE_INTEL_AVX2 #include "kvazaar.h" -#if KVZ_BIT_DEPTH == 8 #include <immintrin.h> #include "strategyselector.h" @@ -938,30 +937,28 @@ TRANSFORM(dct, 32); ITRANSFORM(dct, 32); -#endif // KVZ_BIT_DEPTH == 8 + #endif //COMPILE_INTEL_AVX2 int kvz_strategy_register_dct_avx2(void* opaque, uint8_t bitdepth) { bool success = true; #if COMPILE_INTEL_AVX2 -#if KVZ_BIT_DEPTH == 8 - if (bitdepth == 8){ - success &= kvz_strategyselector_register(opaque, "fast_forward_dst_4x4", "avx2", 40, &matrix_dst_4x4_avx2); + // Coefficients are the same for all bitdepths, no need to disable for 10-bit + success &= kvz_strategyselector_register(opaque, "fast_forward_dst_4x4", "avx2", 40, &matrix_dst_4x4_avx2); - success &= kvz_strategyselector_register(opaque, "dct_4x4", "avx2", 40, &matrix_dct_4x4_avx2); - success &= kvz_strategyselector_register(opaque, "dct_8x8", "avx2", 40, &matrix_dct_8x8_avx2); - success &= kvz_strategyselector_register(opaque, "dct_16x16", "avx2", 40, &matrix_dct_16x16_avx2); - success &= kvz_strategyselector_register(opaque, "dct_32x32", "avx2", 40, &matrix_dct_32x32_avx2); + success &= kvz_strategyselector_register(opaque, "dct_4x4", "avx2", 40, &matrix_dct_4x4_avx2); + success &= kvz_strategyselector_register(opaque, "dct_8x8", "avx2", 40, &matrix_dct_8x8_avx2); + success &= kvz_strategyselector_register(opaque, "dct_16x16", "avx2", 40, &matrix_dct_16x16_avx2); + success &= kvz_strategyselector_register(opaque, "dct_32x32", "avx2", 40, &matrix_dct_32x32_avx2); - success &= kvz_strategyselector_register(opaque, "fast_inverse_dst_4x4", "avx2", 40, &matrix_idst_4x4_avx2); + success &= kvz_strategyselector_register(opaque, "fast_inverse_dst_4x4", "avx2", 40, &matrix_idst_4x4_avx2); - success &= kvz_strategyselector_register(opaque, "idct_4x4", "avx2", 40, &matrix_idct_4x4_avx2); - success &= kvz_strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2); - success &= kvz_strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2); - success &= kvz_strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2); - } -#endif // KVZ_BIT_DEPTH == 8 + success &= kvz_strategyselector_register(opaque, "idct_4x4", "avx2", 40, &matrix_idct_4x4_avx2); + success &= kvz_strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2); + success &= kvz_strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2); + success &= kvz_strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2); + #endif //COMPILE_INTEL_AVX2 return success; }
View file
kvazaar-2.3.1.tar.gz/src/strategies/avx2/picture-avx2.c -> kvazaar-2.3.2.tar.gz/src/strategies/avx2/picture-avx2.c
Changed
@@ -631,48 +631,51 @@ static cost_pixel_any_size_multi_func satd_any_size_## suffix; \ static void satd_any_size_ ## suffix ( \ int width, int height, \ - const uint8_t **preds, \ + const kvz_pixel **preds, \ const int stride, \ - const uint8_t *orig, \ + const kvz_pixel *orig, \ const int orig_stride, \ unsigned num_modes, \ unsigned *costs_out, \ int8_t *valid) \ { \ unsigned sumsnum_parallel_blocks = { 0 }; \ - const uint8_t *pred_ptrs4 = { preds0, preds1, preds2, preds3 };\ - const uint8_t *orig_ptr = orig; \ + const kvz_pixel *pred_ptrs4 = { preds0, preds1, preds2, preds3 };\ + const kvz_pixel *orig_ptr = orig; \ costs_out0 = 0; costs_out1 = 0; costs_out2 = 0; costs_out3 = 0; \ - if (width % 8 != 0) { \ + const int width_mod_8 = width % 8; \ + if (width_mod_8 != 0) { \ + const kvz_pixel *pred_ptrs_tmp4 = { preds0, preds1, preds2, preds3 };\ /* Process the first column using 4x4 blocks. */ \ for (int y = 0; y < height; y += 4) { \ - kvz_satd_4x4_subblock_ ## suffix(preds, stride, orig, orig_stride, sums); \ - } \ - orig_ptr += 4; \ + kvz_satd_4x4_subblock_ ## suffix(pred_ptrs_tmp, stride, &origy*orig_stride, orig_stride, sums); \ + for(int blk = 0; blk < num_parallel_blocks; ++blk){ pred_ptrs_tmpblk += 4*stride; costs_outblk += sumsblk; }\ + } \ for(int blk = 0; blk < num_parallel_blocks; ++blk){\ pred_ptrsblk += 4; \ - }\ + }\ width -= 4; \ - } \ + } \ if (height % 8 != 0) { \ + const kvz_pixel *pred_ptrs_tmp4 = { preds0, preds1, preds2, preds3 };\ /* Process the first row using 4x4 blocks. */ \ for (int x = 0; x < width; x += 4 ) { \ - kvz_satd_4x4_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \ - } \ - orig_ptr += 4 * orig_stride; \ + kvz_satd_4x4_subblock_ ## suffix(pred_ptrs_tmp, stride, &orig_ptrx, orig_stride, sums); \ + for(int blk = 0; blk < num_parallel_blocks; ++blk){ pred_ptrs_tmpblk += 4; costs_outblk += sumsblk; }\ + } \ for(int blk = 0; blk < num_parallel_blocks; ++blk){\ pred_ptrsblk += 4 * stride; \ - }\ + }\ height -= 4; \ - } \ + } \ /* The rest can now be processed with 8x8 blocks. */ \ - for (int y = 0; y < height; y += 8) { \ - orig_ptr = &origy * orig_stride; \ - pred_ptrs0 = &preds0y * stride; \ - pred_ptrs1 = &preds1y * stride; \ - pred_ptrs2 = &preds2y * stride; \ - pred_ptrs3 = &preds3y * stride; \ - for (int x = 0; x < width; x += 8) { \ + for (int y = height % 8; y < height; y += 8) { \ + orig_ptr = &origy * orig_stride + width_mod_8; \ + pred_ptrs0 = &preds0y * stride + width_mod_8; \ + pred_ptrs1 = &preds1y * stride + width_mod_8; \ + pred_ptrs2 = &preds2y * stride + width_mod_8; \ + pred_ptrs3 = &preds3y * stride + width_mod_8; \ + for (int x = width_mod_8; x < width; x += 8) { \ satd_8x8_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \ orig_ptr += 8; \ pred_ptrs0 += 8; \
View file
kvazaar-2.3.1.tar.gz/src/strategies/avx2/sao-avx2.c -> kvazaar-2.3.2.tar.gz/src/strategies/avx2/sao-avx2.c
Changed
@@ -284,7 +284,8 @@ return calc_diff_off_delta(diff_lo, diff_hi, offset, orig); } -static int32_t sao_edge_ddistortion_avx2(const uint8_t *orig_data, +static int32_t sao_edge_ddistortion_avx2(const encoder_control_t* const encoder, + const uint8_t *orig_data, const uint8_t *rec_data, int32_t block_width, int32_t block_height, @@ -316,7 +317,8 @@ assert(NUM_SAO_EDGE_CATEGORIES == 5); if (offsets_ok != 0xffff) { - return sao_edge_ddistortion_generic(orig_data, + return sao_edge_ddistortion_generic(encoder, + orig_data, rec_data, block_width, block_height, @@ -420,7 +422,8 @@ } } -static void calc_sao_edge_dir_avx2(const uint8_t *orig_data, +static void calc_sao_edge_dir_avx2(const encoder_control_t* const encoder, + const uint8_t *orig_data, const uint8_t *rec_data, int32_t eo_class, int32_t block_width,
View file
kvazaar-2.3.1.tar.gz/src/strategies/generic/ipol-generic.c -> kvazaar-2.3.2.tar.gz/src/strategies/generic/ipol-generic.c
Changed
@@ -800,6 +800,10 @@ kvz_pixel *dst = args->buf + (y + args->pad_t + y_simd) * (*args->ext_s); FILL_ARRAY(dst, 0, *args->ext_s); } + // Set the last element to zero because the avx2 code reads it, + // though it does not use it, this is purely to prevent a false positive in + // address sanitizer. + args->buf(args->blk_h + args->pad_b + args->pad_t + args->pad_b_simd - 1) * *args->ext_s + args->pad_l + args->blk_w + args->pad_r = 0; } else {
View file
kvazaar-2.3.1.tar.gz/src/strategies/generic/picture-generic.c -> kvazaar-2.3.2.tar.gz/src/strategies/generic/picture-generic.c
Changed
@@ -417,36 +417,39 @@ const kvz_pixel *pred_ptrs4 = { preds0, preds1, preds2, preds3 };\ const kvz_pixel *orig_ptr = orig; \ costs_out0 = 0; costs_out1 = 0; costs_out2 = 0; costs_out3 = 0; \ - if (width % 8 != 0) { \ + const int width_mod_8 = width % 8; \ + if (width_mod_8 != 0) { \ + const kvz_pixel *pred_ptrs_tmp4 = { preds0, preds1, preds2, preds3 };\ /* Process the first column using 4x4 blocks. */ \ for (int y = 0; y < height; y += 4) { \ - kvz_satd_4x4_subblock_ ## suffix(preds, stride, orig, orig_stride, sums); \ - } \ - orig_ptr += 4; \ + kvz_satd_4x4_subblock_ ## suffix(pred_ptrs_tmp, stride, &origy*orig_stride, orig_stride, sums); \ + for(int blk = 0; blk < num_parallel_blocks; ++blk){ pred_ptrs_tmpblk += 4*stride; costs_outblk += sumsblk; }\ + } \ for(int blk = 0; blk < num_parallel_blocks; ++blk){\ pred_ptrsblk += 4; \ - }\ + }\ width -= 4; \ - } \ + } \ if (height % 8 != 0) { \ + const kvz_pixel *pred_ptrs_tmp4 = { preds0, preds1, preds2, preds3 };\ /* Process the first row using 4x4 blocks. */ \ for (int x = 0; x < width; x += 4 ) { \ - kvz_satd_4x4_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \ - } \ - orig_ptr += 4 * orig_stride; \ + kvz_satd_4x4_subblock_ ## suffix(pred_ptrs_tmp, stride, &orig_ptrx, orig_stride, sums); \ + for(int blk = 0; blk < num_parallel_blocks; ++blk){ pred_ptrs_tmpblk += 4; costs_outblk += sumsblk; }\ + } \ for(int blk = 0; blk < num_parallel_blocks; ++blk){\ pred_ptrsblk += 4 * stride; \ - }\ + }\ height -= 4; \ - } \ + } \ /* The rest can now be processed with 8x8 blocks. */ \ - for (int y = 0; y < height; y += 8) { \ - orig_ptr = &origy * orig_stride; \ - pred_ptrs0 = &preds0y * stride; \ - pred_ptrs1 = &preds1y * stride; \ - pred_ptrs2 = &preds2y * stride; \ - pred_ptrs3 = &preds3y * stride; \ - for (int x = 0; x < width; x += 8) { \ + for (int y = height % 8; y < height; y += 8) { \ + orig_ptr = &origy * orig_stride + width_mod_8; \ + pred_ptrs0 = &preds0y * stride + width_mod_8; \ + pred_ptrs1 = &preds1y * stride + width_mod_8; \ + pred_ptrs2 = &preds2y * stride + width_mod_8; \ + pred_ptrs3 = &preds3y * stride + width_mod_8; \ + for (int x = width_mod_8; x < width; x += 8) { \ satd_8x8_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \ orig_ptr += 8; \ pred_ptrs0 += 8; \
View file
kvazaar-2.3.1.tar.gz/src/strategies/generic/sao-generic.c -> kvazaar-2.3.2.tar.gz/src/strategies/generic/sao-generic.c
Changed
@@ -47,7 +47,8 @@ * \param dir_offsets * \param is_chroma 0 for luma, 1 for chroma. Indicates */ -static void calc_sao_edge_dir_generic(const kvz_pixel *orig_data, +static void calc_sao_edge_dir_generic(const encoder_control_t* const encoder, + const kvz_pixel *orig_data, const kvz_pixel *rec_data, int eo_class, int block_width, @@ -62,7 +63,7 @@ // Don't sample the edge pixels because this function doesn't have access to // their neighbours. - + const int offset = encoder->bitdepth != 8 ? 1 << (encoder->bitdepth - 9) : 0; for (y = 1; y < block_height - 1; ++y) { for (x = 1; x < block_width - 1; ++x) { @@ -73,7 +74,7 @@ int eo_cat = sao_calc_eo_cat(a, b, c); - cat_sum_cnt0eo_cat += orig_datay * block_width + x - c; + cat_sum_cnt0eo_cat += (orig_datay * block_width + x - c + offset) >> (encoder->bitdepth - 8); cat_sum_cnt1eo_cat += 1; } }
View file
kvazaar-2.3.1.tar.gz/src/strategies/generic/sao_shared_generics.h -> kvazaar-2.3.2.tar.gz/src/strategies/generic/sao_shared_generics.h
Changed
@@ -49,7 +49,8 @@ return sao_eo_idx_to_eo_categoryeo_idx; } -static int sao_edge_ddistortion_generic(const kvz_pixel *orig_data, +static int sao_edge_ddistortion_generic(const encoder_control_t* const encoder, + const kvz_pixel *orig_data, const kvz_pixel *rec_data, int32_t block_width, int32_t block_height, @@ -61,22 +62,24 @@ vector2d_t a_ofs = g_sao_edge_offsetseo_class0; vector2d_t b_ofs = g_sao_edge_offsetseo_class1; + const int bit_offset = encoder->bitdepth != 8 ? 1 << (encoder->bitdepth - 9) : 0; + for (y = 1; y < block_height - 1; y++) { for (x = 1; x < block_width - 1; x++) { uint32_t c_pos = y * block_width + x; uint32_t a_pos = (y + a_ofs.y) * block_width + x + a_ofs.x; uint32_t b_pos = (y + b_ofs.y) * block_width + x + b_ofs.x; - uint8_t a = rec_dataa_pos; - uint8_t b = rec_datab_pos; - uint8_t c = rec_datac_pos; - uint8_t orig = orig_datac_pos; + kvz_pixel a = rec_dataa_pos; + kvz_pixel b = rec_datab_pos; + kvz_pixel c = rec_datac_pos; + kvz_pixel orig = orig_datac_pos; int32_t eo_cat = sao_calc_eo_cat(a, b, c); int32_t offset = offsetseo_cat; if (offset != 0) { - int32_t diff = orig - c; + int32_t diff = (orig - c + bit_offset) >> (encoder->bitdepth - 8); int32_t delta = diff - offset; int32_t curr = delta * delta - diff * diff;
View file
kvazaar-2.3.1.tar.gz/src/strategies/strategies-ipol.h -> kvazaar-2.3.2.tar.gz/src/strategies/strategies-ipol.h
Changed
@@ -50,14 +50,14 @@ // error because AVX2 reads one extra byte in the end. #define KVZ_IPOL_MAX_INPUT_SIZE_LUMA_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * KVZ_EXT_BLOCK_W_LUMA + 1) #define KVZ_IPOL_MAX_INPUT_SIZE_CHROMA_SIMD ((KVZ_EXT_BLOCK_W_CHROMA + 3) * KVZ_EXT_BLOCK_W_CHROMA + 1) -#define KVZ_IPOL_MAX_IM_SIZE_LUMA_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH) +#define KVZ_IPOL_MAX_IM_SIZE_LUMA_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH + 1) #define KVZ_IPOL_MAX_IM_SIZE_CHROMA_SIMD ((KVZ_EXT_BLOCK_W_CHROMA + 3) * LCU_WIDTH_C) // On top of basic interpolation, FME needs one extra // column and row for ME (left and up). Adding the // extra row happens to satisfy AVX2 requirements for // row count. No other extra rows are needed. -#define KVZ_FME_MAX_INPUT_SIZE_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * (KVZ_EXT_BLOCK_W_LUMA + 1)) +#define KVZ_FME_MAX_INPUT_SIZE_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * (KVZ_EXT_BLOCK_W_LUMA + 1) + 1) typedef struct { kvz_pixel *buffer; kvz_pixel *orig_topleft; unsigned stride; unsigned malloc_used; } kvz_extended_block;
View file
kvazaar-2.3.1.tar.gz/src/strategies/strategies-sao.h -> kvazaar-2.3.2.tar.gz/src/strategies/strategies-sao.h
Changed
@@ -46,11 +46,13 @@ // Declare function pointers. -typedef int (sao_edge_ddistortion_func)(const kvz_pixel *orig_data, const kvz_pixel *rec_data, +typedef int (sao_edge_ddistortion_func)(const encoder_control_t* const encoder, + const kvz_pixel *orig_data, const kvz_pixel *rec_data, int block_width, int block_height, int eo_class, int offsetsNUM_SAO_EDGE_CATEGORIES); -typedef void (calc_sao_edge_dir_func)(const kvz_pixel *orig_data, const kvz_pixel *rec_data, +typedef void (calc_sao_edge_dir_func)(const encoder_control_t* const encoder, + const kvz_pixel *orig_data, const kvz_pixel *rec_data, int eo_class, int block_width, int block_height, int cat_sum_cnt2NUM_SAO_EDGE_CATEGORIES);
View file
kvazaar-2.3.1.tar.gz/src/threadqueue.c -> kvazaar-2.3.2.tar.gz/src/threadqueue.c
Changed
@@ -389,10 +389,31 @@ threadqueue->first = NULL; threadqueue->last = NULL; +#ifndef _MSC_VER + pthread_attr_t attr; + if (pthread_attr_init(&attr) != 0) { + fprintf(stderr, "pthread_attr_init failed!\n"); + goto failed; + } + size_t default_stack_size; + if (pthread_attr_getstacksize(&attr, &default_stack_size) != 0) { + fprintf(stderr, "pthread_attr_getstacksize failed!\n"); + goto failed; + } + if (default_stack_size < 1024 * 1024) { + if (pthread_attr_setstacksize(&attr, 1024 * 1024) != 0) { + fprintf(stderr, "pthread_attr_setstacksize failed!\n"); + goto failed; + } + } +#else + pthread_attr_t attr; +#endif + // Lock the queue before creating threads, to ensure they all have correct information. PTHREAD_LOCK(&threadqueue->lock); for (int i = 0; i < thread_count; i++) { - if (pthread_create(&threadqueue->threadsi, NULL, threadqueue_worker, threadqueue) != 0) { + if (pthread_create(&threadqueue->threadsi, &attr, threadqueue_worker, threadqueue) != 0) { fprintf(stderr, "pthread_create failed!\n"); goto failed; } @@ -404,6 +425,11 @@ return threadqueue; failed: +#ifndef _MSC_VER + if (pthread_attr_destroy(&attr) != 0) { + fprintf(stderr, "pthread_attr_destroy failed!\n"); + } +#endif kvz_threadqueue_free(threadqueue); return NULL; }
View file
kvazaar-2.3.1.tar.gz/src/threadwrapper/include/pthread.h -> kvazaar-2.3.2.tar.gz/src/threadwrapper/include/pthread.h
Changed
@@ -27,10 +27,10 @@ typedef void* pthread_t; typedef void*(voidp_voidp_func)(void*); -typedef void pthread_attr_t; -typedef void pthread_condattr_t; -typedef void pthread_mutexattr_t; -typedef void pthread_rwlockattr_t; +typedef void* pthread_attr_t; +typedef void* pthread_condattr_t; +typedef void* pthread_mutexattr_t; +typedef void* pthread_rwlockattr_t; // Parameter names that have been commented away do nothing, // as they are always null when the functions are used in Kvazaar.
View file
kvazaar-2.3.1.tar.gz/src/yuv_io.c -> kvazaar-2.3.2.tar.gz/src/yuv_io.c
Changed
@@ -57,24 +57,43 @@ unsigned width, unsigned height, unsigned bytes_per_sample, unsigned array_width, kvz_pixel *data) { - kvz_pixel* p = data; - kvz_pixel* end = data + array_width * height; - kvz_pixel fill_char; - unsigned i; - while (p < end) { - // Read the beginning of the line from input. - if (width != fread(p, bytes_per_sample, width, file)) - return 0; - - // Fill the rest with the last pixel value. - fill_char = pwidth - 1; + unsigned i; + // Handle separately the case where we use KVZ_BIT_DEPTH 10+ but the input is 8-bit. + if (bytes_per_sample != sizeof(kvz_pixel)) { + uint8_t* p = (uint8_t*)data; + uint8_t* end = (uint8_t*)data + array_width * height; + uint8_t fill_char; + while (p < end) { + // Read the beginning of the line from input. + if (width != fread(p, bytes_per_sample, width, file)) return 0; + // Fill the rest with the last pixel value. + // Fill the rest with the last pixel value. + fill_char = pwidth - 1; + + for (i = width; i < array_width; ++i) { + pi = fill_char; + } - for (i = width; i < array_width; ++i) { - pi = fill_char; + p += array_width; } + } + else { + kvz_pixel* p = data; + kvz_pixel* end = data + array_width * height; + kvz_pixel fill_char; + while (p < end) { + // Read the beginning of the line from input. + if (width != fread(p, bytes_per_sample, width, file)) return 0; + // Fill the rest with the last pixel value. + fill_char = pwidth - 1; + + for (i = width; i < array_width; ++i) { + pi = fill_char; + } - p += array_width; + p += array_width; + } } return 1; } @@ -201,24 +220,23 @@ static int read_frame_header(FILE* input) { - char buffer256; - bool frame_start = false; - - while (!frame_start) { - for (int i = 0; i < 256; i++) { - bufferi = getc(input); - if (bufferi == EOF) return 0; - // ToDo: frame headers can have some information structured same as start headers - // This info is just skipped for now, since it's not clear what it could be. - if (bufferi == 0x0A) { - frame_start = true; - break; - } + int c; + int count = 0; + const int max_scan = 4096; // prevent infinite loops + + while ((c = getc(input)) != EOF && count < max_scan) { + count++; + // ToDo: frame headers can have some information structured same as start headers + // This info is just skipped for now, since it's not clear what it could be. + if (c == 0x0A) { + return 1; // Found frame start } } - return 1; + + return 0; // EOF or scan limit reached } + /** * \brief Read a single frame from a file. * @@ -313,7 +331,7 @@ // Seek failed. Skip data by reading. error = 0; - unsigned char* tmp4096; + unsigned char tmp4096; size_t bytes_left = skip_bytes; while (bytes_left > 0 && !error) { const size_t skip = MIN(4096, bytes_left);
View file
kvazaar-2.3.1.tar.gz/tests/CMakeLists.txt -> kvazaar-2.3.2.tar.gz/tests/CMakeLists.txt
Changed
@@ -15,18 +15,24 @@ add_definitions(-DPIC) endif() +list(APPEND ALLOW_AVX2 "x86_64" "AMD64") + if(MSVC) target_include_directories(kvazaar_tests PUBLIC ../src/threadwrapper/include) - set_property( SOURCE ${TEST_SOURCES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) + if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2) + set_property( SOURCE ${TEST_SOURCES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) + endif() add_definitions(-DWIN32_LEAN_AND_MEAN -D_WIN32 -DWIN32 -DWIN64) else() - list(APPEND ALLOW_AVX2 "x86_64" "AMD64") if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2) set_property( SOURCE ${TEST_SOURCES} APPEND PROPERTY COMPILE_FLAGS "-mavx2 -mbmi -mpopcnt -mlzcnt -mbmi2" ) endif() - find_package(Threads REQUIRED) - target_link_libraries(kvazaar_tests PUBLIC Threads::Threads) + + if(NOT ANDROID) + find_package(Threads REQUIRED) + target_link_libraries(kvazaar_tests PUBLIC Threads::Threads) + endif() include(CheckLibraryExists)
View file
kvazaar-2.3.1.tar.gz/tests/Makefile.am -> kvazaar-2.3.2.tar.gz/tests/Makefile.am
Changed
@@ -28,7 +28,8 @@ test_tools.sh \ test_weird_shapes.sh \ test_pu_depth_constraints.sh \ - util.sh + util.sh \ + CMakeLists.txt check_PROGRAMS = kvazaar_tests
View file
kvazaar-2.3.1.tar.gz/tests/test_owf_wpp_tiles.sh -> kvazaar-2.3.2.tar.gz/tests/test_owf_wpp_tiles.sh
Changed
@@ -7,7 +7,7 @@ set -eu . "${0%/*}/util.sh" -common_args='-p4 --rd=0 --no-rdoq --no-signhide --subme=0 --deblock --sao --pu-depth-inter=1-3 --pu-depth-intra=2-3' +common_args='--gop lp-g8d3t1 -p4 --rd=0 --no-rdoq --no-signhide --subme=0 --deblock --sao --pu-depth-inter=1-3 --pu-depth-intra=2-3' valgrind_test 264x130 10 $common_args -r1 --owf=1 --threads=0 --no-wpp valgrind_test 264x130 10 $common_args -r1 --owf=0 --threads=0 --no-wpp valgrind_test 264x130 10 $common_args -r2 --owf=1 --threads=2 --wpp
View file
kvazaar-2.3.1.tar.gz/tests/tests_main.c -> kvazaar-2.3.2.tar.gz/tests/tests_main.c
Changed
@@ -51,7 +51,7 @@ { GREATEST_MAIN_BEGIN(); - init_test_strategies(1); + init_test_strategies(); #if KVZ_BIT_DEPTH == 8 RUN_SUITE(sad_tests); RUN_SUITE(intra_sad_tests);
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.