Packman Build Service PMBS

kvazaar.changes Changed

@@ -1,4 +1,29 @@
 -------------------------------------------------------------------
+Wed Sep 17 16:30:44 UTC 2025 - Luigi Baldoni <aloisio@gmx.com>
+
+- Update to version 2.3.2
+  Fixes:
+  * Several fixes to 10 bit encoding
+  * Fix SATD calculation
+  * Ensure proper stack size
+  * Fix typing for threadwrapper
+  * Separate .pdb files for exe and library
+  * Fix y4m reading
+  * Update minimum CMake version
+  * Fix --no-early-skip
+  * Fix lossless incorrectly selecting skip
+  * Rate control improvements
+  * Fix bit cost calculation for tr splits and smp
+  * Update the default configuration to match the medium preset
+  External contributions:
+  * add missing files in distribution for compilation with cmake
+  * ax_pthread/clang: move -pthread to LIBS
+  * Fix emscripten build
+  * Avoid double free of roi.file_path
+  * Fix passing too many arguments to "init_test_strategies".
+- Rebased kvazaar.patch
+
+-------------------------------------------------------------------
 Wed Apr 10 11:16:02 UTC 2024 - Luigi Baldoni <aloisio@gmx.com>
 
 - Update to version 2.3.1

kvazaar.spec Changed

kvazaar.patch Changed

kvazaar-2.3.1.tar.gz/.gitignore -> kvazaar-2.3.2.tar.gz/.gitignore Changed

kvazaar-2.3.1.tar.gz/CMakeLists.txt -> kvazaar-2.3.2.tar.gz/CMakeLists.txt Changed

@@ -1,10 +1,10 @@
-cmake_minimum_required(VERSION 3.12)
+cmake_minimum_required(VERSION 3.25)
 
 project(kvazaar
 LANGUAGES C CXX
 HOMEPAGE_URL https://github.com/ultravideo/kvazaar
 DESCRIPTION "An open-source HEVC encoder licensed under 3-clause BSD"
-VERSION 2.3.1 )
+VERSION 2.3.2 )
 
 option(BUILD_SHARED_LIBS "Build using shared kvazaar library" ON)
 
@@ -12,6 +12,8 @@
 
 option(USE_CRYPTO "Use crypto library" OFF)
 
+option(BUILD_KVAZAAR_BINARY "Build kvazaar-bin" ON) # To build only the lib, useful for iOS-builds
+
 include(GNUInstallDirs) #Helps to define correct distro specific install directories
 set(DEFERRED "@")
 
@@ -213,20 +215,41 @@
   list(APPEND CLI_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/extras/getopt.c ${CMAKE_CURRENT_SOURCE_DIR}/src/threadwrapper/src/pthread.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/threadwrapper/src/semaphore.cpp)
 endif()
 
-add_executable(kvazaar-bin ${CLI_SOURCES})
+if(BUILD_KVAZAAR_BINARY)
+  add_executable(kvazaar-bin ${CLI_SOURCES})
+
+  if(MSVC AND BUILD_SHARED_LIBS)
+    set_target_properties(kvazaar PROPERTIES
+      OUTPUT_NAME kvazaar
+      PDB_NAME kvazaar-lib
+    )
+
+    set_target_properties(kvazaar-bin PROPERTIES
+      OUTPUT_NAME kvazaar
+      PDB_NAME kvazaar-bin
+    )
+  else()
+    set_target_properties(kvazaar-bin PROPERTIES
+      OUTPUT_NAME kvazaar
+    )
+  endif()
+  set_target_properties(kvazaar-bin PROPERTIES RUNTIME_OUTPUT_NAME kvazaar)
 
-set_target_properties(kvazaar-bin PROPERTIES OUTPUT_NAME kvazaar)
-set_target_properties(kvazaar-bin PROPERTIES RUNTIME_OUTPUT_NAME kvazaar)
+  target_link_libraries(kvazaar-bin PUBLIC kvazaar)
+endif()
 
-target_link_libraries(kvazaar-bin PUBLIC kvazaar)
+list(APPEND ALLOW_AVX2 "x86_64" "AMD64")
 
 if(MSVC)
   target_include_directories(kvazaar PUBLIC src/threadwrapper/include)
-  set_property( SOURCE ${LIB_SOURCES_STRATEGIES_AVX2} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" )  
+  if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2) 
+    set_property( SOURCE ${LIB_SOURCES_STRATEGIES_AVX2} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" )  
+  endif()
 else()
-  set_target_properties(kvazaar-bin PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src)
+  if(BUILD_KVAZAAR_BINARY)
+    set_target_properties(kvazaar-bin PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src)
+  endif()
   set_target_properties(kvazaar PROPERTIES SOVERSION "7" VERSION "7.4.0")
-  list(APPEND ALLOW_AVX2 "x86_64" "AMD64")
   if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2) 
     set_property( SOURCE ${LIB_SOURCES_STRATEGIES_AVX2} APPEND PROPERTY COMPILE_FLAGS "-mavx2 -mbmi -mpopcnt -mlzcnt -mbmi2" )
     set_property( SOURCE ${LIB_SOURCES_STRATEGIES_SSE41} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" )    
@@ -235,14 +258,15 @@
   # CYGWIN, MSYS, and MINGW seem to be needing this but in some cases 
   # it might be that the toolset is not properly set, so also use this
   # in cases where we are not sure that it is not needed
-  if((NOT MSVC AND NOT LINUX AND NOT APPLE) OR (CYGWIN OR MSYS OR MINGW))
+  if((NOT MSVC AND NOT LINUX AND NOT APPLE AND NOT ANDROID AND NOT EMSCRIPTEN AND NOT BSD) OR (CYGWIN OR MSYS OR MINGW))
     set(CMAKE_C_FLAGS "-Wa,-muse-unaligned-vector-move ${CMAKE_C_FLAGS}")
   endif()
 
-  
-  set(THREADS_PREFER_PTHREAD_FLAG ON)
-  find_package(Threads REQUIRED)
-  target_link_libraries(kvazaar PUBLIC Threads::Threads)
+  if(NOT ANDROID AND NOT EMSCRIPTEN)
+    set(THREADS_PREFER_PTHREAD_FLAG ON)
+    find_package(Threads REQUIRED)
+    target_link_libraries(kvazaar PUBLIC Threads::Threads)
+  endif()
 
   include(CheckLibraryExists)
 
@@ -256,7 +280,9 @@
   endif ()
 
   target_link_libraries(kvazaar PUBLIC ${EXTRA_LIBS})
-  target_link_libraries(kvazaar-bin PUBLIC ${EXTRA_LIBS} )
+  if(BUILD_KVAZAAR_BINARY)
+    target_link_libraries(kvazaar-bin PUBLIC ${EXTRA_LIBS} )
+  endif()
 endif()
 
 
@@ -308,7 +334,9 @@
 install(CODE "configure_file(\"${PROJECT_SOURCE_DIR}/src/kvazaar.pc.temp\" \"${PROJECT_SOURCE_DIR}/src/kvazaar.pc\" @ONLY)")
 
 install(FILES ${PROJECT_SOURCE_DIR}/src/kvazaar.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
-install(TARGETS kvazaar-bin DESTINATION ${CMAKE_INSTALL_BINDIR})
+if(BUILD_KVAZAAR_BINARY)
+  install(TARGETS kvazaar-bin DESTINATION ${CMAKE_INSTALL_BINDIR})
+endif()
 install(TARGETS kvazaar
   RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
   ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}

kvazaar-2.3.1.tar.gz/README.md -> kvazaar-2.3.2.tar.gz/README.md Changed

@@ -51,7 +51,7 @@
 
 comment: # "BEGIN KVAZAAR HELP MESSAGE"
 ```
-Kvazaar v2.3.1 2024-04-10
+Kvazaar v2.3.2 2025-09-16
 Kvazaar license: 3-clause BSD
 Usage:
 kvazaar -i <input> --input-res <width>x<height> -o <output>
@@ -126,7 +126,7 @@
                                    - 0: Only send VPS with the first frame.
                                    - N: Send VPS with every Nth intra frame.
   -r, --ref <integer>        : Number of reference frames, in range 1..15 4
-      --gop <string>         : GOP structure lp-g4d3t1
+      --gop <string>         : GOP structure 16
                                    -  0: Disabled
                                    -  8: B-frame pyramid of length 8
                                    - 16: B-frame pyramid of length 16
@@ -262,7 +262,7 @@
                                guaranteed to produce sensible bitstream or
                                work at all. disabled
       --tr-depth-intra <int> : Transform split depth for intra blocks 0
-      --(no-)bipred          : Bi-prediction disabled
+      --(no-)bipred          : Bi-prediction enabled
       --cu-split-termination <string> : CU split search termination zero
                                    - off: Don't terminate early.
                                    - zero: Terminate when residual is zero.
@@ -383,7 +383,7 @@
 
 |                      | 0-uf  | 1-sf  | 2-vf  | 3-fr  | 4-f   | 5-m   | 6-s   | 7-sr  | 8-vs  | 9-p   |
 | -------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- |
-| rd                   | 0     | 0     | 0     | 0     | 0     | 0     | 1     | 2     | 2     | 2     |
+| rd                   | 0     | 0     | 0     | 0     | 0     | 0     | 1     | 2     | 3     | 3     |
 | pu-depth-intra       | 2-3   | 2-3   | 2-3   | 2-3   | 1-3   | 1-4   | 1-4   | 1-4   | 1-4   | 1-4   |
 | pu-depth-inter       | 1-2   | 1-2   | 1-3   | 1-3   | 1-3   | 0-3   | 0-3   | 0-3   | 0-3   | 0-3   |
 | me                   | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | tz    | tz    |
@@ -400,7 +400,7 @@
 | mv-rdo               | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 1     |
 | full-intra-search    | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 0     |
 | smp                  | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 1     | 1     |
-| amp                  | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 1     |
+| amp                  | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 1     | 1     |
 | cu-split-termination | zero  | zero  | zero  | zero  | zero  | zero  | zero  | zero  | zero  | off   |
 | me-early-termination | sens. | sens. | sens. | sens. | sens. | on    | on    | off   | off   | off   |
 | intra-rdo-et         | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 0     | 0     |
@@ -491,7 +491,7 @@
 
 Please cite this paper(https://dl.acm.org/citation.cfm?doid=2964284.2973796) for Kvazaar:
 
-```M. Viitanen, A. Koivula, A. Lemmetti, A. Ylä-Outinen, J. Vanne, and T. D. Hämäläinen, Kvazaar: open-source HEVC/H.265 encoder, in Proc. ACM Int. Conf. Multimedia, Amsterdam, The Netherlands, Oct. 2016.```
+```M. Viitanen, A. Koivula, A. Lemmetti, A. Ylï¿½-Outinen, J. Vanne, and T. D. Hï¿½mï¿½lï¿½inen, ï¿½Kvazaar: open-source HEVC/H.265 encoder,ï¿½ in Proc. ACM Int. Conf. Multimedia, Amsterdam, The Netherlands, Oct. 2016.```
 
 Or in BibTex:

kvazaar-2.3.1.tar.gz/configure.ac -> kvazaar-2.3.2.tar.gz/configure.ac Changed

kvazaar-2.3.1.tar.gz/doc/kvazaar.1 -> kvazaar-2.3.2.tar.gz/doc/kvazaar.1 Changed

kvazaar-2.3.1.tar.gz/m4/ax_pthread.m4 -> kvazaar-2.3.2.tar.gz/m4/ax_pthread.m4 Changed

kvazaar-2.3.1.tar.gz/src/Makefile.am -> kvazaar-2.3.2.tar.gz/src/Makefile.am Changed

kvazaar-2.3.1.tar.gz/src/cfg.c -> kvazaar-2.3.2.tar.gz/src/cfg.c Changed

@@ -63,10 +63,10 @@
   cfg->sao_type        = 3;
   cfg->rdoq_enable     = 1;
   cfg->rdoq_skip       = 1;
-  cfg->signhide_enable = true;
+  cfg->signhide_enable = false;
   cfg->smp_enable      = false;
   cfg->amp_enable      = false;
-  cfg->rdo             = 1;
+  cfg->rdo             = 0;
   cfg->mv_rdo          = 0;
   cfg->full_intra_search = 0;
   cfg->trskip_enable   = 0;
@@ -86,9 +86,7 @@
   cfg->aud_enable      = 0;
   cfg->cqmfile         = NULL;
   cfg->fast_coeff_table_fn = NULL;
-  cfg->ref_frames      = 1;
-  cfg->gop_len         = 4;
-  cfg->gop_lowdelay    = true;
+  cfg->ref_frames      = 4;
   cfg->bipred          = 0;
   cfg->target_bitrate  = 0;
   cfg->hash            = KVZ_HASH_CHECKSUM;
@@ -118,10 +116,10 @@
   memset( cfg->pu_depth_inter.max, -1, sizeof( cfg->pu_depth_inter.max ) );
   memset( cfg->pu_depth_intra.min, -1, sizeof( cfg->pu_depth_intra.min ) );
   memset( cfg->pu_depth_intra.max, -1, sizeof( cfg->pu_depth_intra.max ) );
-  *cfg->pu_depth_inter.min = 2; // 0-3
+  *cfg->pu_depth_inter.min = 0; // 0-3
   *cfg->pu_depth_inter.max = 3; // 0-3
-  *cfg->pu_depth_intra.min = 2; // 0-4
-  *cfg->pu_depth_intra.max = 3; // 0-4
+  *cfg->pu_depth_intra.min = 1; // 0-4
+  *cfg->pu_depth_intra.max = 4; // 0-4
 
   cfg->add_encoder_info = true;
   cfg->calc_psnr = true;
@@ -135,9 +133,11 @@
   cfg->input_format = KVZ_FORMAT_P420;
   cfg->input_bitdepth = 8;
 
-  cfg->gop_lp_definition.d = 3;
-  cfg->gop_lp_definition.t = 1;
+  cfg->gop_lowdelay = 0;
+  cfg->gop_len = sizeof(kvz_gop_ra16) / sizeof(kvz_gop_ra160);
+  memcpy(cfg->gop, kvz_gop_ra16, sizeof(kvz_gop_ra16));
   cfg->open_gop = true;
+  
 
   cfg->roi.file_path = NULL;
   cfg->roi.format = KVZ_ROI_TXT;
@@ -725,7 +725,7 @@
         "mv-rdo", "0",
         "full-intra-search", "0",
         "smp", "1",
-        "amp", "0",
+        "amp", "1",
         "cu-split-termination", "zero",
         "me-early-termination", "off",
         "intra-rdo-et", "0",
@@ -793,10 +793,12 @@
     if (sscanf(value, "%d/%d", &fps_num, &fps_denom) == 2) {
       cfg->framerate_num = fps_num;
       cfg->framerate_denom = fps_denom;
+      cfg->framerate = (double)fps_num / fps_denom;
     } else {
       // Accept decimal notation, making sure not to round 0 to 1.
       cfg->framerate_num = (int)(atof(value) * 1000 + 0.49);
       cfg->framerate_denom = 1000;
+      cfg->framerate = atof(value);
     }
   }
   else if OPT("qp")

kvazaar-2.3.1.tar.gz/src/cli.c -> kvazaar-2.3.2.tar.gz/src/cli.c Changed

@@ -485,7 +485,7 @@
     "                                   - 0: Only send VPS with the first frame.\n"
     "                                   - N: Send VPS with every Nth intra frame.\n"
     "  -r, --ref <integer>        : Number of reference frames, in range 1..15 4\n"
-    "      --gop <string>         : GOP structure lp-g4d3t1\n"
+    "      --gop <string>         : GOP structure 16\n"
     "                                   -  0: Disabled\n"
     "                                   -  8: B-frame pyramid of length 8\n"
     "                                   - 16: B-frame pyramid of length 16\n"
@@ -622,7 +622,7 @@
     "                               guaranteed to produce sensible bitstream or\n"
     "                               work at all. disabled\n"
     "      --tr-depth-intra <int> : Transform split depth for intra blocks 0\n"
-    "      --(no-)bipred          : Bi-prediction disabled\n"
+    "      --(no-)bipred          : Bi-prediction enabled\n"
     "      --cu-split-termination <string> : CU split search termination zero\n"
     "                                   - off: Don't terminate early.\n"
     "                                   - zero: Terminate when residual is zero.\n"

kvazaar-2.3.1.tar.gz/src/encmain.c -> kvazaar-2.3.2.tar.gz/src/encmain.c Changed

kvazaar-2.3.1.tar.gz/src/encoder.c -> kvazaar-2.3.2.tar.gz/src/encoder.c Changed

@@ -41,6 +41,7 @@
 #include "strategyselector.h"
 #include "kvz_math.h"
 #include "fast_coeff_cost.h"
+#include "rate_control.h"
 
 static int encoder_control_init_gop_layer_weights(encoder_control_t * const);
 
@@ -153,6 +154,12 @@
 
   // Take a copy of the config.
   memcpy(&encoder->cfg, cfg, sizeof(encoder->cfg));
+
+  // Copy the ROI file path
+  if (cfg->roi.file_path) {
+    encoder->cfg.roi.file_path = strdup(cfg->roi.file_path);
+  }
+
   // Set fields that are not copied to NULL.
   encoder->cfg.cqmfile = NULL;
   encoder->cfg.tiles_width_split = NULL;
@@ -596,6 +603,15 @@
     memcpy(encoder->cfg.optional_key, cfg->optional_key, 16);
   }
 
+  if (encoder->cfg.target_bitrate > 0 && cfg->rc_algorithm != KVZ_NO_RC)
+  {
+    encoder->rc_data = kvz_get_rc_data(encoder);
+    if (!encoder->rc_data) {
+      fprintf(stderr, "Failed to initialize rate control.\n");
+      goto init_failed;
+    }
+  }
+
   return encoder;
 
 init_failed:
@@ -639,6 +655,8 @@
     fclose(encoder->roi_file);
   }
 
+  kvz_free_rc_data(encoder->rc_data);
+
   free(encoder);
 }

kvazaar-2.3.1.tar.gz/src/encoder.h -> kvazaar-2.3.2.tar.gz/src/encoder.h Changed

kvazaar-2.3.1.tar.gz/src/encoder_state-ctors_dtors.c -> kvazaar-2.3.2.tar.gz/src/encoder_state-ctors_dtors.c Changed

kvazaar-2.3.1.tar.gz/src/global.h -> kvazaar-2.3.2.tar.gz/src/global.h Changed

kvazaar-2.3.1.tar.gz/src/image.c -> kvazaar-2.3.2.tar.gz/src/image.c Changed

kvazaar-2.3.1.tar.gz/src/intra.c -> kvazaar-2.3.2.tar.gz/src/intra.c Changed

kvazaar-2.3.1.tar.gz/src/kvazaar.c -> kvazaar-2.3.2.tar.gz/src/kvazaar.c Changed

kvazaar-2.3.1.tar.gz/src/rate_control.c -> kvazaar-2.3.2.tar.gz/src/rate_control.c Changed

@@ -40,17 +40,10 @@
 
 
 static const int MIN_SMOOTHING_WINDOW = 40;
-static int smoothing_window = 40;
 static const double MIN_LAMBDA    = 0.1;
 static const double MAX_LAMBDA    = 10000;
 #define BETA1 1.2517
 
-static kvz_rc_data *data;
-
-static FILE *dist_file;
-static FILE *bits_file;
-static FILE *qp_file;
-static FILE *lambda_file;
 
 /**
  * \brief Clip lambda value to a valid range.
@@ -61,9 +54,7 @@
 }
 
 kvz_rc_data * kvz_get_rc_data(const encoder_control_t * const encoder) {
-  if (data != NULL || encoder == NULL) return data;
-
-  data = calloc(1, sizeof(kvz_rc_data));
+  kvz_rc_data* data = calloc(1, sizeof(kvz_rc_data));
 
   if (data == NULL) return NULL;
   if (pthread_mutex_init(&data->ck_frame_lock, NULL) != 0) return NULL;
@@ -107,18 +98,21 @@
   if(encoder->cfg.stats_file_prefix) {
     char buff128;
     sprintf(buff, "%sbits.txt", encoder->cfg.stats_file_prefix);
-    bits_file = fopen(buff, "w");
+    data->bits_file = fopen(buff, "w");
     sprintf(buff, "%sdist.txt", encoder->cfg.stats_file_prefix);
-    dist_file = fopen(buff, "w");
+    data->dist_file = fopen(buff, "w");
     sprintf(buff, "%sqp.txt", encoder->cfg.stats_file_prefix);
-    qp_file = fopen(buff, "w");
+    data->qp_file = fopen(buff, "w");
     sprintf(buff, "%slambda.txt", encoder->cfg.stats_file_prefix);
-    lambda_file = fopen(buff, "w");
+    data->lambda_file = fopen(buff, "w");
   }
+
+  data->smoothing_window = MIN_SMOOTHING_WINDOW;
+
   return data;
 }
 
-void kvz_free_rc_data() {
+void kvz_free_rc_data(kvz_rc_data *data) {
   if (data == NULL) return;
 
   pthread_mutex_destroy(&data->ck_frame_lock);
@@ -189,16 +183,16 @@
     bits_coded -= state->frame->cur_gop_bits_coded;
   }
 
-  smoothing_window = MAX(MIN_SMOOTHING_WINDOW, smoothing_window - MAX(encoder->cfg.gop_len / 2, 1));
+  state->frame->new_ratecontrol->smoothing_window = MAX(MIN_SMOOTHING_WINDOW, state->frame->new_ratecontrol->smoothing_window - MAX(encoder->cfg.gop_len / 2, 1));
   double gop_target_bits = -1;
 
-  while( gop_target_bits < 0 && smoothing_window < 150) {
+  while( gop_target_bits < 0 && state->frame->new_ratecontrol->smoothing_window < 150) {
     // Equation 12 from https://doi.org/10.1109/TIP.2014.2336550
     gop_target_bits =
-      (encoder->target_avg_bppic * (pictures_coded + smoothing_window) - bits_coded)
-      * MAX(1, encoder->cfg.gop_len) / smoothing_window;
+      (encoder->target_avg_bppic * (pictures_coded + state->frame->new_ratecontrol->smoothing_window) - bits_coded)
+      * MAX(1, encoder->cfg.gop_len) / state->frame->new_ratecontrol->smoothing_window;
     if(gop_target_bits < 0) {
-      smoothing_window += 10;
+      state->frame->new_ratecontrol->smoothing_window += 10;
     }
   }
   // Allocate at least 200 bits for each GOP like HM does.
@@ -375,7 +369,12 @@
     else {
       alpha = 0.3;
     }
-    return MIN(MAX(100, alpha*pow(state->frame->icost * 4 / bits, beta)*bits), encoder->cfg.gop_len >= 2 ? 0.85 * state->frame->cur_gop_target_bits : state->frame->cur_gop_target_bits);
+
+    double low_limit = (encoder->cfg.rc_algorithm == KVZ_LAMBDA && encoder->cfg.rdo < 2 ? 1.0 : 1.2) * pow(state->encoder_control->cfg.framerate, -0.873) * state->encoder_control->cfg.target_bitrate;
+    double high_limit = (encoder->cfg.rdo < 2 ? (encoder->cfg.rc_algorithm == KVZ_LAMBDA ? 1.1 : 3.5) : 2.25) * pow(state->encoder_control->cfg.framerate, -0.61) * state->encoder_control->cfg.target_bitrate;
+    double original_bits = alpha * pow(state->frame->icost * 4 / bits, beta) * bits;
+    double limited = MIN(MAX(low_limit, original_bits), high_limit);
+    return limited;
   }
 
   if (encoder->cfg.gop_len <= 0) {
@@ -387,7 +386,8 @@
   const double pic_target_bits =
     state->frame->cur_gop_target_bits * pic_weight - pic_header_bits(state);
   // Allocate at least 100 bits for each picture like HM does.
-  return MAX(100, pic_target_bits);
+  const double intra_bits = state->frame->is_irap ? (encoder->cfg.rdo < 2 ? 4 : 6) * state->encoder_control->target_avg_bppic : pic_target_bits;
+  return MAX(MAX(100, pic_target_bits), intra_bits);
 }
 
 static int8_t lambda_to_qp(const double lambda)
@@ -716,7 +716,7 @@
     alpha = -state->frame->c_paraindex * state->frame->k_paraindex;
     beta = state->frame->k_paraindex - 1;
   }
-
+  alpha *= (double)(1 << (KVZ_BIT_DEPTH - 8));
   double est_lambda;
   int est_qp;
   if (state->frame->is_irap && encoder->cfg.intra_bit_allocation) {
@@ -924,10 +924,10 @@
 
   if (encoder->cfg.stats_file_prefix) {
     int poc = calc_poc(state);
-    fprintf(dist_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu);
-    fprintf(bits_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu);
-    fprintf(qp_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu);
-    fprintf(lambda_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu);
+    fprintf(state->frame->new_ratecontrol->dist_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu);
+    fprintf(state->frame->new_ratecontrol->bits_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu);
+    fprintf(state->frame->new_ratecontrol->qp_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu);
+    fprintf(state->frame->new_ratecontrol->lambda_file, "%d %d %d\n", poc, encoder->in.width_in_lcu, encoder->in.height_in_lcu);
   }
 
   for(int y_ctu = 0; y_ctu < state->encoder_control->in.height_in_lcu; y_ctu++) {
@@ -945,17 +945,17 @@
       total_distortion += (double)ctu_distortion / ctu->pixels;
       lambda += ctu->lambda / (state->encoder_control->in.width_in_lcu * state->encoder_control->in.height_in_lcu);
       if(encoder->cfg.stats_file_prefix) {
-        fprintf(dist_file, "%f ", ctu->distortion);
-        fprintf(bits_file, "%d ", ctu->bits);
-        fprintf(qp_file, "%d ", ctu->adjust_qp ? ctu->adjust_qp : ctu->qp);
-        fprintf(lambda_file, "%f ", ctu->adjust_lambda ? ctu->adjust_lambda : ctu->lambda);
+        fprintf(state->frame->new_ratecontrol->dist_file, "%f ", ctu->distortion);
+        fprintf(state->frame->new_ratecontrol->bits_file, "%d ", ctu->bits);
+        fprintf(state->frame->new_ratecontrol->qp_file, "%d ", ctu->adjust_qp ? ctu->adjust_qp : ctu->qp);
+        fprintf(state->frame->new_ratecontrol->lambda_file, "%f ", ctu->adjust_lambda ? ctu->adjust_lambda : ctu->lambda);
       }
     }
     if (encoder->cfg.stats_file_prefix) {
-      fprintf(dist_file, "\n");
-      fprintf(bits_file, "\n");
-      fprintf(qp_file, "\n");
-      fprintf(lambda_file, "\n");
+      fprintf(state->frame->new_ratecontrol->dist_file, "\n");
+      fprintf(state->frame->new_ratecontrol->bits_file, "\n");
+      fprintf(state->frame->new_ratecontrol->qp_file, "\n");
+      fprintf(state->frame->new_ratecontrol->lambda_file, "\n");
     }
   }

kvazaar-2.3.1.tar.gz/src/rate_control.h -> kvazaar-2.3.2.tar.gz/src/rate_control.h Changed

kvazaar-2.3.1.tar.gz/src/sao.c -> kvazaar-2.3.2.tar.gz/src/sao.c Changed

@@ -385,7 +385,7 @@
     // Call calc_sao_edge_dir once for luma and twice for chroma.
     for (i = 0; i < buf_cnt; ++i) {
       FILL(cat_sum_cnt, 0);
-      kvz_calc_sao_edge_dir(datai, recdatai, edge_class,
+      kvz_calc_sao_edge_dir(state->encoder_control, datai, recdatai, edge_class,
                         block_width, block_height, cat_sum_cnt);
     
 
@@ -514,7 +514,7 @@
     unsigned buf_i;
     
     for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
-      ddistortion += kvz_sao_edge_ddistortion(databuf_i, recdatabuf_i, 
+      ddistortion += kvz_sao_edge_ddistortion(state->encoder_control, databuf_i, recdatabuf_i, 
                                           block_width, block_height,
                                           edge_sao.eo_class, &edge_sao.offsets5 * buf_i);
     }
@@ -577,7 +577,7 @@
         switch (merge_cand->type) {
           case SAO_TYPE_EDGE:
                 for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
-                  ddistortion += kvz_sao_edge_ddistortion(databuf_i, recdatabuf_i,
+                  ddistortion += kvz_sao_edge_ddistortion(state->encoder_control, databuf_i, recdatabuf_i,
                     block_width, block_height,
                     merge_cand->eo_class, &merge_cand->offsets5 * buf_i);
                 }

kvazaar-2.3.1.tar.gz/src/search.c -> kvazaar-2.3.2.tar.gz/src/search.c Changed

@@ -233,6 +233,7 @@
 * Calculate RD cost for a Coding Unit.
 * \return Cost of block
 * \param ref_cu  CU used for prediction parameters.
+* \param parent_tu
 *
 * Calculates the RDO cost of a single CU that will not be split further.
 * Takes into account SSD of reconstruction and the cost of encoding whatever
@@ -241,10 +242,10 @@
 double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
                            const int x_px, const int y_px, const int depth,
                            const cu_info_t *const pred_cu,
-                           lcu_t *const lcu)
+                           const cu_info_t* const parent_tu, lcu_t *const lcu)
 {
   const int width = LCU_WIDTH >> depth;
-  const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
+  const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && parent_tu->cbf == 0);
 
   // cur_cu is used for TU parameters.
   cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
@@ -283,10 +284,10 @@
     int offset = width / 2;
     double sum = 0;
 
-    sum += kvz_cu_rd_cost_luma(state, x_px, y_px, depth + 1, pred_cu, lcu);
-    sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu);
-    sum += kvz_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu);
-    sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu);
+    sum += kvz_cu_rd_cost_luma(state, x_px, y_px, depth + 1, pred_cu, tr_cu, lcu);
+    sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px, depth + 1, pred_cu, tr_cu, lcu);
+    sum += kvz_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, tr_cu, lcu);
+    sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, tr_cu, lcu);
 
     return sum + tr_tree_bits * state->lambda;
   }
@@ -302,8 +303,8 @@
     if (state->encoder_control->chroma_format != KVZ_CSP_400) {
       cabac_ctx_t* cr_ctx = &(cabac->ctx.qt_cbf_model_chromadepth - tr_cu->depth);
       cabac->cur_ctx = cr_ctx;
-      int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
-      int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
+      int u_is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_U);
+      int v_is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_V);
       CABAC_FBITS_UPDATE(cabac, cr_ctx, u_is_set, tr_tree_bits, "cbf_cb_search");
       CABAC_FBITS_UPDATE(cabac, cr_ctx, v_is_set, tr_tree_bits, "cbf_cb_search");
     }
@@ -311,7 +312,7 @@
 
   // Add transform_tree cbf_luma bit cost.
   const int is_tr_split = tr_cu->tr_depth - tr_cu->depth;
-  int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y);
+  int is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_Y);
   if (pred_cu->type == CU_INTRA ||
       is_tr_split ||
       cbf_is_set(tr_cu->cbf, depth, COLOR_U) ||
@@ -348,12 +349,12 @@
 double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
                              const int x_px, const int y_px, const int depth,
                              const cu_info_t *const pred_cu,
-                             lcu_t *const lcu)
+                             const cu_info_t* const parent_tu, lcu_t *const lcu)
 {
   const vector2d_t lcu_px = { x_px / 2, y_px / 2 };
   const int width = (depth <= MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
   cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
-  const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
+  const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && parent_tu->cbf == 0);
 
   double tr_tree_bits = 0;
   double coeff_bits = 0;
@@ -367,18 +368,18 @@
     return 0;
   }
 
-  int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
-  int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
+  int u_is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_U);
+  int v_is_set = cbf_is_set(tr_cu->cbf, depth, COLOR_V);
   // See luma for why the second condition
   if (depth < MAX_PU_DEPTH && (!state->search_cabac.update || tr_cu->tr_depth != tr_cu->depth) && !skip_residual_coding) {
     const int tr_depth = depth - pred_cu->depth;
     cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
     cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_chromatr_depth);
     cabac->cur_ctx = ctx;
-    if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
+    if (tr_depth == 0 || cbf_is_set(tr_cu->cbf, depth - 1, COLOR_U)) {
       CABAC_FBITS_UPDATE(cabac, ctx, u_is_set, tr_tree_bits, "cbf_cb_search");
     }
-    if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
+    if (tr_depth == 0 || cbf_is_set(tr_cu->cbf, depth - 1, COLOR_V)) {
       CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cb_search");
     }
   }
@@ -387,10 +388,10 @@
     int offset = LCU_WIDTH >> (depth + 1);
     double sum = 0;
 
-    sum += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth + 1, pred_cu, lcu);
-    sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu);
-    sum += kvz_cu_rd_cost_chroma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu);
-    sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu);
+    sum += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth + 1, pred_cu, tr_cu, lcu);
+    sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px, depth + 1, pred_cu, tr_cu, lcu);
+    sum += kvz_cu_rd_cost_chroma(state, x_px, y_px + offset, depth + 1, pred_cu, tr_cu, lcu);
+    sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, tr_cu, lcu);
 
     return sum + tr_tree_bits * state->lambda;
   }
@@ -424,10 +425,10 @@
 static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
                                            const int x_px, const int y_px, const int depth,
                                            const cu_info_t* const pred_cu,
-                                           lcu_t* const lcu) {
+                                           const cu_info_t* const parent_tu, lcu_t* const lcu) {
   const int width = LCU_WIDTH >> depth;
 
-  const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
+  const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && parent_tu->cbf == 0);
   // cur_cu is used for TU parameters.
   cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
 
@@ -445,7 +446,7 @@
 
   cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
 
-  {
+  if (depth == pred_cu->depth){
     int cbf = cbf_is_set_any(pred_cu->cbf, depth);
     // Only need to signal coded block flag if not skipped or merged
     // skip = no coded residual, merge = coded residual
@@ -474,10 +475,10 @@
   }
 
   if(state->encoder_control->chroma_format != KVZ_CSP_400 && !skip_residual_coding) {
-    if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
+    if(tr_cu->depth == depth || cbf_is_set(tr_cu->cbf, depth - 1, COLOR_U)) {
       CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_chromadepth - tr_cu->depth), cb_flag_u, tr_tree_bits, "cbf_cb");
     } 
-    if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
+    if(tr_cu->depth == depth || cbf_is_set(tr_cu->cbf, depth - 1, COLOR_V)) {
       CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_chromadepth - tr_cu->depth), cb_flag_v, tr_tree_bits, "cbf_cr");
     } 
   }
@@ -486,10 +487,10 @@
     int offset = LCU_WIDTH >> (depth + 1);
     double sum = 0;
 
-    sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px, depth + 1, pred_cu, lcu);
-    sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px, depth + 1, pred_cu, lcu);
-    sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px + offset, depth + 1, pred_cu, lcu);
-    sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu);
+    sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px, depth + 1, pred_cu, tr_cu, lcu);
+    sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px, depth + 1, pred_cu, tr_cu, lcu);
+    sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px + offset, depth + 1, pred_cu, tr_cu, lcu);
+    sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px + offset, depth + 1, pred_cu, tr_cu, lcu);
     return sum + tr_tree_bits * state->lambda;
   }
   const int cb_flag_y = cbf_is_set(tr_cu->cbf, depth, COLOR_Y) ;
@@ -515,7 +516,7 @@
       width);
   }
 
-  {
+  if(!skip_residual_coding) {
     int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
     const coeff_t* coeffs = &lcu->coeff.yxy_to_zorder(LCU_WIDTH, x_px, y_px);
 
@@ -538,7 +539,7 @@
       chroma_ssd = ssd_u + ssd_v;
     }
 
-     {
+    if (!skip_residual_coding) {
       int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
       const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
 
@@ -864,6 +865,26 @@
           inter_bitcost += cur_cu->merge_idx;        
         }
       }
+      else if (!state->encoder_control->cfg.early_skip)
+      {
+        int tr_depth = depth;
+        kvz_lcu_fill_trdepth(lcu, x, y, depth, tr_depth);
+        const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
+        kvz_inter_recon_cu(state, lcu, x, y, cu_width, true, has_chroma);
+        for (int i = 0; i < cu_width; i += (cu_width >> (depth == 0)))
+        {
+          for (int j = 0; j < cu_width; j += (cu_width >> (depth == 0)))
+          {
+            cu_info_t* tmp_cu = LCU_GET_CU_AT_PX(lcu, x_local + i, y_local + j);
+            cbf_clear(&tmp_cu->cbf, depth, COLOR_Y);
+            if (has_chroma)
+            {              
+             cbf_clear(&tmp_cu->cbf, depth, COLOR_U);
+             cbf_clear(&tmp_cu->cbf, depth, COLOR_V);
+            }
+          }
+        }
+      }
       lcu_fill_inter(lcu, x_local, y_local, cu_width);
       lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
     }
@@ -893,7 +914,7 @@
     
     cost = bits * state->lambda;
 
-    cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu);
+    cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, cur_cu, lcu);
     
     if (ctrl->cfg.zero_coeff_rdo && inter_zero_coeff_cost <= cost) {
       cost = inter_zero_coeff_cost;
@@ -1015,7 +1036,7 @@
         double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y) + bits;
         cost += mode_bits * state->lambda;
 
-        cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu);
+        cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, cur_cu, lcu);
 
         memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac));
         memcpy(&state->search_cabac, &temp_cabac, sizeof(temp_cabac));

kvazaar-2.3.1.tar.gz/src/search.h -> kvazaar-2.3.2.tar.gz/src/search.h Changed

kvazaar-2.3.1.tar.gz/src/search_inter.c -> kvazaar-2.3.2.tar.gz/src/search_inter.c Changed

@@ -220,8 +220,9 @@
       info->height,
       info->optimized_sad
   );
-
-  if (cost >= *best_cost) return false;
+  // On some platforms comparing two doubles give weird results, so add an offset
+#define KVZ_TEMP_DOUBLE_PRECISION 0.001
+  if (cost + KVZ_TEMP_DOUBLE_PRECISION >= *best_cost) return false;
 
   cost += info->mvd_cost_func(
       info->state,
@@ -233,7 +234,8 @@
       &bitcost
   );
 
-  if (cost >= *best_cost) return false;
+  if (cost + KVZ_TEMP_DOUBLE_PRECISION >= *best_cost) return false;
+#undef KVZ_TEMP_DOUBLE_PRECISION
 
   // Set to motion vector in quarter pixel precision.
   best_mv->x = x * 4;
@@ -552,7 +554,7 @@
       //  6   3  
       {
         { iDist / 2, iDist }, { iDist, 0 }, { iDist / 2, -iDist }, { -iDist, 0 },
-        { iDist / 2, iDist }, { -iDist / 2, -iDist }, { 0, 0 }, { 0, 0 }
+        { -iDist / 2, iDist }, { -iDist / 2, -iDist }, { 0, 0 }, { 0, 0 }
       }
   };
 
@@ -1045,6 +1047,11 @@
   epol_args.ext_origin = &ext_origin;
   epol_args.ext_s = &ext_s;
 
+  // In case the half of the fractional positions are legal
+  // and the other half illegal, the extension will be performed
+  // for pixels that might be uninitialized.
+  // Nevertheless, in that case the search will not use those pixels,
+  // but the thread/address sanitizer will not know that.
   kvz_get_extended_block(&epol_args);
 
   kvz_pixel *tmp_pic = pic->y + orig.y * pic->stride + orig.x;
@@ -1879,6 +1886,12 @@
         double   frac_bits = MAX_INT;
         vector2d_t frac_mv = { unipred_pu->inter.mvlist0, unipred_pu->inter.mvlist1 };
 
+        // Check that at least one quarter-pel step is possible
+        if (!fracmv_within_tile(info, frac_mv.x + 3, frac_mv.y + 3) &&
+            !fracmv_within_tile(info, frac_mv.x - 3, frac_mv.y - 3)) {
+          continue;
+        }
+
         search_frac(info, &frac_cost, &frac_bits, &frac_mv);
 
         uint8_t mv_ref_coded = LX_idx;
@@ -2083,7 +2096,7 @@
   const int width = LCU_WIDTH >> depth;
   cabac_data_t cabac_copy;
   memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy));
-  cabac_copy.update = 1;
+  state->search_cabac.update = 1;
 
   cu_info_t* cur_pu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
   *cur_pu = *cur_cu;
@@ -2109,12 +2122,16 @@
   double bits = 0;
   const int skip_context = kvz_get_skip_context(x, y, lcu, NULL);
   if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) {
-    no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_modelskip_context, 1) + *inter_bitcost;
-    bits += kvz_mock_encode_coding_unit(state, &cabac_copy, x, y, depth, lcu, cur_cu);
+    no_cbf_bits = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_modelskip_context, 1) + *inter_bitcost;
+    bits += kvz_mock_encode_coding_unit(state, &state->search_cabac, x, y, depth, lcu, cur_cu);
   }
   else {
-    no_cbf_bits = kvz_mock_encode_coding_unit(state, &cabac_copy, x, y, depth, lcu, cur_cu);
-    bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac_copy.ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac_copy.ctx.cu_qt_root_cbf_model, 1);
+    no_cbf_bits = kvz_mock_encode_coding_unit(state, &state->search_cabac, x, y, depth, lcu, cur_cu);
+    bits += no_cbf_bits;
+    if (!cur_cu->merged || cur_cu->part_size != SIZE_2Nx2N) {
+      bits += CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_qt_root_cbf_model, 1);
+      no_cbf_bits += CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_qt_root_cbf_model, 0);
+    }
   }
   double no_cbf_cost = ssd + no_cbf_bits * state->lambda;
 
@@ -2124,12 +2141,23 @@
                             lcu,
                             false);
 
+
+  if (tr_depth == depth)
+  {
+    cbf_copy(&cur_pu->cbf, cur_cu->cbf, COLOR_Y);
+    if (reconstruct_chroma)
+    {      
+     cbf_copy(&cur_pu->cbf, cur_cu->cbf, COLOR_U);
+     cbf_copy(&cur_pu->cbf, cur_cu->cbf, COLOR_V);
+    }
+  }
+
   int cbf = cbf_is_set_any(cur_cu->cbf, depth);
   
   if(cbf) {
-    *inter_cost = kvz_cu_rd_cost_luma(state, x_px, y_px, depth, cur_cu, lcu);
+    *inter_cost = kvz_cu_rd_cost_luma(state, x_px, y_px, depth, cur_cu, cur_cu, lcu);
     if (reconstruct_chroma) {
-      *inter_cost += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, lcu);
+      *inter_cost += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, cur_cu, lcu);
     }
   }
   else {
@@ -2137,13 +2165,14 @@
     *inter_cost = no_cbf_cost;
     cur_cu->cbf = 0;
     *inter_bitcost = no_cbf_bits;
+    memcpy(&state->search_cabac, &cabac_copy, sizeof(cabac_copy));
     return;
   }
   
   *inter_cost += (bits)* state->lambda;
   *inter_bitcost = bits;
 
-  if(no_cbf_cost < *inter_cost) {
+  if(no_cbf_cost < *inter_cost && !state->encoder_control->cfg.lossless) {
     cur_cu->cbf = 0;
     if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) {
       cur_cu->skipped = 1;
@@ -2152,6 +2181,7 @@
     *inter_bitcost = no_cbf_bits;
     
   }
+  memcpy(&state->search_cabac, &cabac_copy, sizeof(cabac_copy));
 }

kvazaar-2.3.1.tar.gz/src/search_intra.c -> kvazaar-2.3.2.tar.gz/src/search_intra.c Changed

@@ -221,9 +221,9 @@
                        intra_mode, chroma_mode,
                        pred_cu, lcu);
 
-    nosplit_cost += kvz_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
+    nosplit_cost += kvz_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, pred_cu, lcu);
     if (reconstruct_chroma) {
-      nosplit_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
+      nosplit_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, pred_cu, lcu);
     }
 
     // Early stop codition for the recursive search.
@@ -334,11 +334,9 @@
   cost_pixel_nxn_func *const satd_func = kvz_pixels_get_satd_func(width);
   //cost_pixel_nxn_func *const sad_func = kvz_pixels_get_sad_func(width);
 
-  kvz_pixel _pred32 * 32 + SIMD_ALIGNMENT;
-  kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT);
+  ALIGNED(SIMD_ALIGNMENT) kvz_pixel pred32 * 32;
 
-  kvz_pixel _orig_block32 * 32 + SIMD_ALIGNMENT;
-  kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
+  ALIGNED(SIMD_ALIGNMENT) kvz_pixel orig_block32 * 32;
 
   kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width);
   for (int i = 0; i < 5; ++i) {
@@ -408,11 +406,9 @@
   const bool filter_boundary = !(cfg->lossless && cfg->implicit_rdpcm);
 
   // Temporary block arrays
-  kvz_pixel _predsPARALLEL_BLKS * 32 * 32 + SIMD_ALIGNMENT;
-  pred_buffer preds = ALIGNED_POINTER(_preds, SIMD_ALIGNMENT);
-  
-  kvz_pixel _orig_block32 * 32 + SIMD_ALIGNMENT;
-  kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
+  ALIGNED(SIMD_ALIGNMENT) kvz_pixel _predsPARALLEL_BLKS * 32 * 32;
+  pred_buffer preds = (pred_buffer)_preds;
+  ALIGNED(SIMD_ALIGNMENT) kvz_pixel orig_block32 * 32;
 
   // Store original block for SAD computation
   kvz_pixels_blit(orig, orig_block, width, width, origstride, width);
@@ -731,7 +727,7 @@
                          -1, chroma.mode, // skip luma
                          NULL, lcu);
       double bits = 0;
-      chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu);
+      chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, tr_cu, lcu);
 
       double mode_bits = kvz_chroma_mode_bits(state, chroma.mode, intra_mode);
       bits += mode_bits;

kvazaar-2.3.1.tar.gz/src/strategies/avx2/dct-avx2.c -> kvazaar-2.3.2.tar.gz/src/strategies/avx2/dct-avx2.c Changed

@@ -38,7 +38,6 @@
 
 #if COMPILE_INTEL_AVX2
 #include "kvazaar.h"
-#if KVZ_BIT_DEPTH == 8
 #include <immintrin.h>
 
 #include "strategyselector.h"
@@ -938,30 +937,28 @@
 TRANSFORM(dct, 32);
 ITRANSFORM(dct, 32);
 
-#endif // KVZ_BIT_DEPTH == 8
+
 #endif //COMPILE_INTEL_AVX2
 
 int kvz_strategy_register_dct_avx2(void* opaque, uint8_t bitdepth)
 {
   bool success = true;
 #if COMPILE_INTEL_AVX2
-#if KVZ_BIT_DEPTH == 8
-  if (bitdepth == 8){
-    success &= kvz_strategyselector_register(opaque, "fast_forward_dst_4x4", "avx2", 40, &matrix_dst_4x4_avx2);
+  // Coefficients are the same for all bitdepths, no need to disable for 10-bit
+  success &= kvz_strategyselector_register(opaque, "fast_forward_dst_4x4", "avx2", 40, &matrix_dst_4x4_avx2);
 
-    success &= kvz_strategyselector_register(opaque, "dct_4x4", "avx2", 40, &matrix_dct_4x4_avx2);
-    success &= kvz_strategyselector_register(opaque, "dct_8x8", "avx2", 40, &matrix_dct_8x8_avx2);
-    success &= kvz_strategyselector_register(opaque, "dct_16x16", "avx2", 40, &matrix_dct_16x16_avx2);
-    success &= kvz_strategyselector_register(opaque, "dct_32x32", "avx2", 40, &matrix_dct_32x32_avx2);
+  success &= kvz_strategyselector_register(opaque, "dct_4x4", "avx2", 40, &matrix_dct_4x4_avx2);
+  success &= kvz_strategyselector_register(opaque, "dct_8x8", "avx2", 40, &matrix_dct_8x8_avx2);
+  success &= kvz_strategyselector_register(opaque, "dct_16x16", "avx2", 40, &matrix_dct_16x16_avx2);
+  success &= kvz_strategyselector_register(opaque, "dct_32x32", "avx2", 40, &matrix_dct_32x32_avx2);
 
-    success &= kvz_strategyselector_register(opaque, "fast_inverse_dst_4x4", "avx2", 40, &matrix_idst_4x4_avx2);
+  success &= kvz_strategyselector_register(opaque, "fast_inverse_dst_4x4", "avx2", 40, &matrix_idst_4x4_avx2);
 
-    success &= kvz_strategyselector_register(opaque, "idct_4x4", "avx2", 40, &matrix_idct_4x4_avx2);
-    success &= kvz_strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2);
-    success &= kvz_strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2);
-    success &= kvz_strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2);
-  }
-#endif // KVZ_BIT_DEPTH == 8
+  success &= kvz_strategyselector_register(opaque, "idct_4x4", "avx2", 40, &matrix_idct_4x4_avx2);
+  success &= kvz_strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2);
+  success &= kvz_strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2);
+  success &= kvz_strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2);
+ 
 #endif //COMPILE_INTEL_AVX2  
   return success;
 }

kvazaar-2.3.1.tar.gz/src/strategies/avx2/picture-avx2.c -> kvazaar-2.3.2.tar.gz/src/strategies/avx2/picture-avx2.c Changed

@@ -631,48 +631,51 @@
   static cost_pixel_any_size_multi_func satd_any_size_## suffix; \
   static void satd_any_size_ ## suffix ( \
       int width, int height, \
-      const uint8_t **preds, \
+      const kvz_pixel **preds, \
       const int stride, \
-      const uint8_t *orig, \
+      const kvz_pixel *orig, \
       const int orig_stride, \
       unsigned num_modes, \
       unsigned *costs_out, \
       int8_t *valid) \
   { \
     unsigned sumsnum_parallel_blocks = { 0 }; \
-    const uint8_t *pred_ptrs4 = { preds0, preds1, preds2, preds3 };\
-    const uint8_t *orig_ptr = orig; \
+    const kvz_pixel *pred_ptrs4 = { preds0, preds1, preds2, preds3 };\
+    const kvz_pixel *orig_ptr = orig; \
     costs_out0 = 0; costs_out1 = 0; costs_out2 = 0; costs_out3 = 0; \
-    if (width % 8 != 0) { \
+    const int width_mod_8 = width % 8; \
+    if (width_mod_8 != 0) { \
+      const kvz_pixel *pred_ptrs_tmp4 = { preds0, preds1, preds2, preds3 };\
       /* Process the first column using 4x4 blocks. */ \
       for (int y = 0; y < height; y += 4) { \
-        kvz_satd_4x4_subblock_ ## suffix(preds, stride, orig, orig_stride, sums); \
-            } \
-      orig_ptr += 4; \
+        kvz_satd_4x4_subblock_ ## suffix(pred_ptrs_tmp, stride, &origy*orig_stride, orig_stride, sums); \
+        for(int blk = 0; blk < num_parallel_blocks; ++blk){ pred_ptrs_tmpblk += 4*stride; costs_outblk += sumsblk; }\
+      } \
       for(int blk = 0; blk < num_parallel_blocks; ++blk){\
         pred_ptrsblk += 4; \
-            }\
+      }\
       width -= 4; \
-            } \
+    } \
     if (height % 8 != 0) { \
+      const kvz_pixel *pred_ptrs_tmp4 = { preds0, preds1, preds2, preds3 };\
       /* Process the first row using 4x4 blocks. */ \
       for (int x = 0; x < width; x += 4 ) { \
-        kvz_satd_4x4_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \
-            } \
-      orig_ptr += 4 * orig_stride; \
+        kvz_satd_4x4_subblock_ ## suffix(pred_ptrs_tmp, stride, &orig_ptrx, orig_stride, sums); \
+        for(int blk = 0; blk < num_parallel_blocks; ++blk){ pred_ptrs_tmpblk += 4;  costs_outblk += sumsblk; }\
+      } \
       for(int blk = 0; blk < num_parallel_blocks; ++blk){\
         pred_ptrsblk += 4 * stride; \
-            }\
+      }\
       height -= 4; \
-        } \
+    } \
     /* The rest can now be processed with 8x8 blocks. */ \
-    for (int y = 0; y < height; y += 8) { \
-      orig_ptr = &origy * orig_stride; \
-      pred_ptrs0 = &preds0y * stride; \
-      pred_ptrs1 = &preds1y * stride; \
-      pred_ptrs2 = &preds2y * stride; \
-      pred_ptrs3 = &preds3y * stride; \
-      for (int x = 0; x < width; x += 8) { \
+    for (int y = height % 8; y < height; y += 8) { \
+      orig_ptr = &origy * orig_stride + width_mod_8; \
+      pred_ptrs0 = &preds0y * stride + width_mod_8; \
+      pred_ptrs1 = &preds1y * stride + width_mod_8; \
+      pred_ptrs2 = &preds2y * stride + width_mod_8; \
+      pred_ptrs3 = &preds3y * stride + width_mod_8; \
+      for (int x = width_mod_8; x < width; x += 8) { \
         satd_8x8_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \
         orig_ptr += 8; \
         pred_ptrs0 += 8; \

kvazaar-2.3.1.tar.gz/src/strategies/avx2/sao-avx2.c -> kvazaar-2.3.2.tar.gz/src/strategies/avx2/sao-avx2.c Changed

@@ -284,7 +284,8 @@
   return calc_diff_off_delta(diff_lo, diff_hi, offset, orig);
 }
 
-static int32_t sao_edge_ddistortion_avx2(const uint8_t *orig_data,
+static int32_t sao_edge_ddistortion_avx2(const encoder_control_t* const encoder, 
+                                         const uint8_t *orig_data,
                                          const uint8_t *rec_data,
                                                int32_t  block_width,
                                                int32_t  block_height,
@@ -316,7 +317,8 @@
   assert(NUM_SAO_EDGE_CATEGORIES == 5);
 
   if (offsets_ok != 0xffff) {
-    return sao_edge_ddistortion_generic(orig_data,
+    return sao_edge_ddistortion_generic(encoder, 
+                                        orig_data,
                                         rec_data,
                                         block_width,
                                         block_height,
@@ -420,7 +422,8 @@
   }
 }
 
-static void calc_sao_edge_dir_avx2(const uint8_t *orig_data,
+static void calc_sao_edge_dir_avx2(const encoder_control_t* const encoder, 
+                                   const uint8_t *orig_data,
                                    const uint8_t *rec_data,
                                          int32_t  eo_class,
                                          int32_t  block_width,

kvazaar-2.3.1.tar.gz/src/strategies/generic/ipol-generic.c -> kvazaar-2.3.2.tar.gz/src/strategies/generic/ipol-generic.c Changed

kvazaar-2.3.1.tar.gz/src/strategies/generic/picture-generic.c -> kvazaar-2.3.2.tar.gz/src/strategies/generic/picture-generic.c Changed

@@ -417,36 +417,39 @@
     const kvz_pixel *pred_ptrs4 = { preds0, preds1, preds2, preds3 };\
     const kvz_pixel *orig_ptr = orig; \
     costs_out0 = 0; costs_out1 = 0; costs_out2 = 0; costs_out3 = 0; \
-    if (width % 8 != 0) { \
+    const int width_mod_8 = width % 8; \
+    if (width_mod_8 != 0) { \
+      const kvz_pixel *pred_ptrs_tmp4 = { preds0, preds1, preds2, preds3 };\
       /* Process the first column using 4x4 blocks. */ \
       for (int y = 0; y < height; y += 4) { \
-        kvz_satd_4x4_subblock_ ## suffix(preds, stride, orig, orig_stride, sums); \
-            } \
-      orig_ptr += 4; \
+        kvz_satd_4x4_subblock_ ## suffix(pred_ptrs_tmp, stride, &origy*orig_stride, orig_stride, sums); \
+        for(int blk = 0; blk < num_parallel_blocks; ++blk){ pred_ptrs_tmpblk += 4*stride; costs_outblk += sumsblk; }\
+      } \
       for(int blk = 0; blk < num_parallel_blocks; ++blk){\
         pred_ptrsblk += 4; \
-            }\
+      }\
       width -= 4; \
-            } \
+    } \
     if (height % 8 != 0) { \
+      const kvz_pixel *pred_ptrs_tmp4 = { preds0, preds1, preds2, preds3 };\
       /* Process the first row using 4x4 blocks. */ \
       for (int x = 0; x < width; x += 4 ) { \
-        kvz_satd_4x4_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \
-            } \
-      orig_ptr += 4 * orig_stride; \
+        kvz_satd_4x4_subblock_ ## suffix(pred_ptrs_tmp, stride, &orig_ptrx, orig_stride, sums); \
+        for(int blk = 0; blk < num_parallel_blocks; ++blk){ pred_ptrs_tmpblk += 4;  costs_outblk += sumsblk; }\
+      } \
       for(int blk = 0; blk < num_parallel_blocks; ++blk){\
         pred_ptrsblk += 4 * stride; \
-            }\
+      }\
       height -= 4; \
-        } \
+    } \
     /* The rest can now be processed with 8x8 blocks. */ \
-    for (int y = 0; y < height; y += 8) { \
-      orig_ptr = &origy * orig_stride; \
-      pred_ptrs0 = &preds0y * stride; \
-      pred_ptrs1 = &preds1y * stride; \
-      pred_ptrs2 = &preds2y * stride; \
-      pred_ptrs3 = &preds3y * stride; \
-      for (int x = 0; x < width; x += 8) { \
+    for (int y = height % 8; y < height; y += 8) { \
+      orig_ptr = &origy * orig_stride + width_mod_8; \
+      pred_ptrs0 = &preds0y * stride + width_mod_8; \
+      pred_ptrs1 = &preds1y * stride + width_mod_8; \
+      pred_ptrs2 = &preds2y * stride + width_mod_8; \
+      pred_ptrs3 = &preds3y * stride + width_mod_8; \
+      for (int x = width_mod_8; x < width; x += 8) { \
         satd_8x8_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \
         orig_ptr += 8; \
         pred_ptrs0 += 8; \

kvazaar-2.3.1.tar.gz/src/strategies/generic/sao-generic.c -> kvazaar-2.3.2.tar.gz/src/strategies/generic/sao-generic.c Changed

@@ -47,7 +47,8 @@
  * \param dir_offsets
  * \param is_chroma  0 for luma, 1 for chroma. Indicates
  */
-static void calc_sao_edge_dir_generic(const kvz_pixel *orig_data,
+static void calc_sao_edge_dir_generic(const encoder_control_t* const encoder, 
+                                      const kvz_pixel *orig_data,
                                       const kvz_pixel *rec_data,
                                       int eo_class,
                                       int block_width,
@@ -62,7 +63,7 @@
   // Don't sample the edge pixels because this function doesn't have access to
   // their neighbours.
 
-  
+  const int offset = encoder->bitdepth != 8 ? 1 << (encoder->bitdepth - 9) : 0;
 
   for (y = 1; y < block_height - 1; ++y) {
     for (x = 1; x < block_width - 1; ++x) {
@@ -73,7 +74,7 @@
 
       int eo_cat = sao_calc_eo_cat(a, b, c);
 
-      cat_sum_cnt0eo_cat += orig_datay * block_width + x - c;
+      cat_sum_cnt0eo_cat += (orig_datay * block_width + x - c + offset) >> (encoder->bitdepth - 8);
       cat_sum_cnt1eo_cat += 1;
     }
   }

kvazaar-2.3.1.tar.gz/src/strategies/generic/sao_shared_generics.h -> kvazaar-2.3.2.tar.gz/src/strategies/generic/sao_shared_generics.h Changed

@@ -49,7 +49,8 @@
   return sao_eo_idx_to_eo_categoryeo_idx;
 }
 
-static int sao_edge_ddistortion_generic(const kvz_pixel *orig_data,
+static int sao_edge_ddistortion_generic(const encoder_control_t* const encoder, 
+                                        const kvz_pixel *orig_data,
                                         const kvz_pixel *rec_data,
                                               int32_t    block_width,
                                               int32_t    block_height,
@@ -61,22 +62,24 @@
   vector2d_t a_ofs = g_sao_edge_offsetseo_class0;
   vector2d_t b_ofs = g_sao_edge_offsetseo_class1;
 
+  const int bit_offset = encoder->bitdepth != 8 ? 1 << (encoder->bitdepth - 9) : 0;
+
   for (y = 1; y < block_height - 1; y++) {
     for (x = 1; x < block_width - 1; x++) {
       uint32_t c_pos =  y            * block_width + x;
       uint32_t a_pos = (y + a_ofs.y) * block_width + x + a_ofs.x;
       uint32_t b_pos = (y + b_ofs.y) * block_width + x + b_ofs.x;
 
-      uint8_t   a    =  rec_dataa_pos;
-      uint8_t   b    =  rec_datab_pos;
-      uint8_t   c    =  rec_datac_pos;
-      uint8_t   orig = orig_datac_pos;
+      kvz_pixel a    =  rec_dataa_pos;
+      kvz_pixel b    =  rec_datab_pos;
+      kvz_pixel c    =  rec_datac_pos;
+      kvz_pixel orig = orig_datac_pos;
 
       int32_t eo_cat = sao_calc_eo_cat(a, b, c);
       int32_t offset = offsetseo_cat;
 
       if (offset != 0) {
-        int32_t diff   = orig - c;
+        int32_t diff   = (orig - c + bit_offset) >> (encoder->bitdepth - 8);
         int32_t delta  = diff - offset;
         int32_t curr   = delta * delta - diff * diff;

kvazaar-2.3.1.tar.gz/src/strategies/strategies-ipol.h -> kvazaar-2.3.2.tar.gz/src/strategies/strategies-ipol.h Changed

@@ -50,14 +50,14 @@
  // error because AVX2 reads one extra byte in the end.
 #define KVZ_IPOL_MAX_INPUT_SIZE_LUMA_SIMD   ((KVZ_EXT_BLOCK_W_LUMA   + 1) * KVZ_EXT_BLOCK_W_LUMA   + 1)
 #define KVZ_IPOL_MAX_INPUT_SIZE_CHROMA_SIMD ((KVZ_EXT_BLOCK_W_CHROMA + 3) * KVZ_EXT_BLOCK_W_CHROMA + 1)
-#define KVZ_IPOL_MAX_IM_SIZE_LUMA_SIMD      ((KVZ_EXT_BLOCK_W_LUMA   + 1) * LCU_WIDTH)
+#define KVZ_IPOL_MAX_IM_SIZE_LUMA_SIMD      ((KVZ_EXT_BLOCK_W_LUMA   + 1) * LCU_WIDTH + 1)
 #define KVZ_IPOL_MAX_IM_SIZE_CHROMA_SIMD    ((KVZ_EXT_BLOCK_W_CHROMA + 3) * LCU_WIDTH_C)
 
 // On top of basic interpolation, FME needs one extra
 // column and row for ME (left and up). Adding the
 // extra row happens to satisfy AVX2 requirements for
 // row count. No other extra rows are needed.
-#define KVZ_FME_MAX_INPUT_SIZE_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * (KVZ_EXT_BLOCK_W_LUMA + 1))
+#define KVZ_FME_MAX_INPUT_SIZE_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * (KVZ_EXT_BLOCK_W_LUMA + 1) + 1)
 
 typedef struct { kvz_pixel *buffer; kvz_pixel *orig_topleft; unsigned stride; unsigned malloc_used; } kvz_extended_block;

kvazaar-2.3.1.tar.gz/src/strategies/strategies-sao.h -> kvazaar-2.3.2.tar.gz/src/strategies/strategies-sao.h Changed

kvazaar-2.3.1.tar.gz/src/threadqueue.c -> kvazaar-2.3.2.tar.gz/src/threadqueue.c Changed

@@ -389,10 +389,31 @@
   threadqueue->first              = NULL;
   threadqueue->last               = NULL;
 
+#ifndef _MSC_VER
+  pthread_attr_t attr;
+  if (pthread_attr_init(&attr) != 0) {
+    fprintf(stderr, "pthread_attr_init failed!\n");
+    goto failed;
+  }
+  size_t default_stack_size;
+  if (pthread_attr_getstacksize(&attr, &default_stack_size) != 0) {
+    fprintf(stderr, "pthread_attr_getstacksize failed!\n");
+    goto failed;
+  }
+  if (default_stack_size < 1024 * 1024) {
+    if (pthread_attr_setstacksize(&attr, 1024 * 1024) != 0) {
+      fprintf(stderr, "pthread_attr_setstacksize failed!\n");
+      goto failed;
+    }
+  }
+#else
+  pthread_attr_t attr;
+#endif
+
   // Lock the queue before creating threads, to ensure they all have correct information.
   PTHREAD_LOCK(&threadqueue->lock);
   for (int i = 0; i < thread_count; i++) {
-    if (pthread_create(&threadqueue->threadsi, NULL, threadqueue_worker, threadqueue) != 0) {
+    if (pthread_create(&threadqueue->threadsi, &attr, threadqueue_worker, threadqueue) != 0) {
         fprintf(stderr, "pthread_create failed!\n");
         goto failed;
     }
@@ -404,6 +425,11 @@
   return threadqueue;
 
 failed:
+#ifndef _MSC_VER
+  if (pthread_attr_destroy(&attr) != 0) {
+    fprintf(stderr, "pthread_attr_destroy failed!\n");
+  }
+#endif
   kvz_threadqueue_free(threadqueue);
   return NULL;
 }

kvazaar-2.3.1.tar.gz/src/threadwrapper/include/pthread.h -> kvazaar-2.3.2.tar.gz/src/threadwrapper/include/pthread.h Changed

kvazaar-2.3.1.tar.gz/src/yuv_io.c -> kvazaar-2.3.2.tar.gz/src/yuv_io.c Changed

@@ -57,24 +57,43 @@
                                     unsigned width, unsigned height, unsigned bytes_per_sample,
                                     unsigned array_width, kvz_pixel *data)
 {
-  kvz_pixel* p = data;
-  kvz_pixel* end = data + array_width * height;
-  kvz_pixel fill_char;
-  unsigned i;
 
-  while (p < end) {
-    // Read the beginning of the line from input.
-    if (width != fread(p, bytes_per_sample, width, file))
-      return 0;
-
-    // Fill the rest with the last pixel value.
-    fill_char = pwidth - 1;
+  unsigned   i;
+  // Handle separately the case where we use KVZ_BIT_DEPTH 10+ but the input is 8-bit.
+  if (bytes_per_sample != sizeof(kvz_pixel)) {
+    uint8_t* p = (uint8_t*)data;
+    uint8_t* end = (uint8_t*)data + array_width * height;
+    uint8_t  fill_char;
+    while (p < end) {
+      // Read the beginning of the line from input.
+      if (width != fread(p, bytes_per_sample, width, file)) return 0;
+      // Fill the rest with the last pixel value.
+      // Fill the rest with the last pixel value.
+      fill_char = pwidth - 1;
+
+      for (i = width; i < array_width; ++i) {
+        pi = fill_char;
+      }
 
-    for (i = width; i < array_width; ++i) {
-      pi = fill_char;
+      p += array_width;
     }
+  }
+  else {
+    kvz_pixel* p = data;
+    kvz_pixel* end = data + array_width * height;
+    kvz_pixel  fill_char;
+    while (p < end) {
+      // Read the beginning of the line from input.
+      if (width != fread(p, bytes_per_sample, width, file)) return 0;
+      // Fill the rest with the last pixel value.
+      fill_char = pwidth - 1;
+
+      for (i = width; i < array_width; ++i) {
+        pi = fill_char;
+      }
 
-    p += array_width;
+      p += array_width;
+    }
   }
   return 1;
 }
@@ -201,24 +220,23 @@
 
 
 static int read_frame_header(FILE* input) {
-  char buffer256;
-  bool frame_start = false;
-
-  while (!frame_start) {
-    for (int i = 0; i < 256; i++) {
-      bufferi = getc(input);
-      if (bufferi == EOF) return 0;
-      // ToDo: frame headers can have some information structured same as start headers
-      // This info is just skipped for now, since it's not clear what it could be.
-      if (bufferi == 0x0A) {
-        frame_start = true;
-        break;
-      }
+  int c;
+  int count = 0;
+  const int max_scan = 4096; // prevent infinite loops
+
+  while ((c = getc(input)) != EOF && count < max_scan) {
+    count++;
+    // ToDo: frame headers can have some information structured same as start headers
+    // This info is just skipped for now, since it's not clear what it could be.
+    if (c == 0x0A) {
+      return 1; // Found frame start
     }
   }
-  return 1;
+
+  return 0; // EOF or scan limit reached
 }
 
+
 /**
  * \brief Read a single frame from a file.
  *
@@ -313,7 +331,7 @@
 
     // Seek failed. Skip data by reading.
     error = 0;
-    unsigned char* tmp4096;
+    unsigned char tmp4096;
     size_t bytes_left = skip_bytes;
     while (bytes_left > 0 && !error) {
       const size_t skip = MIN(4096, bytes_left);

kvazaar-2.3.1.tar.gz/tests/CMakeLists.txt -> kvazaar-2.3.2.tar.gz/tests/CMakeLists.txt Changed

kvazaar-2.3.1.tar.gz/tests/Makefile.am -> kvazaar-2.3.2.tar.gz/tests/Makefile.am Changed

kvazaar-2.3.1.tar.gz/tests/test_owf_wpp_tiles.sh -> kvazaar-2.3.2.tar.gz/tests/test_owf_wpp_tiles.sh Changed

kvazaar-2.3.1.tar.gz/tests/tests_main.c -> kvazaar-2.3.2.tar.gz/tests/tests_main.c Changed

Changes of Revision 22