Packman Build Service PMBS

kvazaar.changes Changed

kvazaar.spec Changed

kvazaar-0.7.0.tar.gz/README.md -> kvazaar-0.7.1.tar.gz/README.md Changed

kvazaar-0.7.0.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj -> kvazaar-0.7.1.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj Changed

@@ -153,6 +153,22 @@
     <ClCompile Include="..\..\src\search.c" />
     <ClCompile Include="..\..\src\search_inter.c" />
     <ClCompile Include="..\..\src\search_intra.c" />
+    <ClCompile Include="..\..\src\strategies\avx2\intra-avx2.c">
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c">
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\generic\intra-generic.c" />
+    <ClCompile Include="..\..\src\strategies\generic\quant-generic.c" />
+    <ClCompile Include="..\..\src\strategies\strategies-intra.c" />
+    <ClCompile Include="..\..\src\strategies\strategies-quant.c" />
     <ClCompile Include="..\..\src\yuv_io.c" />
     <ClInclude Include="..\..\src\checkpoint.h" />
     <ClInclude Include="..\..\src\cli.h" />
@@ -201,6 +217,13 @@
     <ClInclude Include="..\..\src\kvazaar_internal.h" />
     <ClInclude Include="..\..\src\search_inter.h" />
     <ClInclude Include="..\..\src\search_intra.h" />
+    <ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h" />
+    <ClInclude Include="..\..\src\strategies\generic\intra-generic.h" />
+    <ClInclude Include="..\..\src\strategies\strategies-common.h" />
+    <ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h" />
+    <ClInclude Include="..\..\src\strategies\generic\quant-generic.h" />
+    <ClInclude Include="..\..\src\strategies\strategies-intra.h" />
+    <ClInclude Include="..\..\src\strategies\strategies-quant.h" />
     <ClInclude Include="..\..\src\yuv_io.h" />
   </ItemGroup>
   <ItemGroup>

kvazaar-0.7.0.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj.filters -> kvazaar-0.7.1.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj.filters Changed

@@ -207,6 +207,24 @@
     <ClCompile Include="..\..\src\input_frame_buffer.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\strategies\strategies-quant.c">
+      <Filter>Source Files\strategies</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\generic\quant-generic.c">
+      <Filter>Source Files\strategies\generic</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c">
+      <Filter>Source Files\strategies\avx2</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\strategies-intra.c">
+      <Filter>Source Files\strategies</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\generic\intra-generic.c">
+      <Filter>Source Files\strategies\generic</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\avx2\intra-avx2.c">
+      <Filter>Source Files\strategies\avx2</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\src\global.h">
@@ -374,6 +392,27 @@
     <ClInclude Include="..\..\src\input_frame_buffer.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\src\strategies\strategies-common.h">
+      <Filter>Header Files\strategies</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\strategies-quant.h">
+      <Filter>Header Files\strategies</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\generic\quant-generic.h">
+      <Filter>Header Files\strategies\generic</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h">
+      <Filter>Header Files\strategies\avx2</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\strategies-intra.h">
+      <Filter>Header Files\strategies</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\generic\intra-generic.h">
+      <Filter>Header Files\strategies\generic</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h">
+      <Filter>Header Files\strategies\avx2</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <YASM Include="..\..\src\extras\x86inc.asm">

kvazaar-0.7.0.tar.gz/src/Makefile -> kvazaar-0.7.1.tar.gz/src/Makefile Changed

@@ -12,7 +12,7 @@
 DLLDIR  = $(BINDIR)
 
 # Library version number
-VER_MAJOR = 1
+VER_MAJOR = 2
 VER_MINOR = 0
 VER_RELEASE = 0
 
@@ -152,18 +152,21 @@
 # directories. If the instruction set is supported by the
 # architecture, compile the files in these directories with the
 # apropriate flags to cause the intrinsics to work.
+# Note: Using LTO on strategies caused issues on some older
+# compilers, -fno-lto on these files seemed to fix the issue.
+# LTO is no longer used and -fno-lto shouldn't be needed.
 ifeq ($(TARGET_CPU_ARCH), ppc)
-  strategies/altivec/%.o:  EXTRA_FLAGS += -maltivec -fno-lto
-  strategies/altivec/%.lo: EXTRA_FLAGS += -maltivec -fno-lto
+  strategies/altivec/%.o:  EXTRA_FLAGS += -maltivec
+  strategies/altivec/%.lo: EXTRA_FLAGS += -maltivec
 else ifeq ($(TARGET_CPU_ARCH), x86)
-  strategies/sse2/%.o:   EXTRA_FLAGS += -msse2   -fno-lto
-  strategies/sse41/%.o:  EXTRA_FLAGS += -msse4.1 -fno-lto
-  strategies/sse2/%.lo:  EXTRA_FLAGS += -msse2   -fno-lto
-  strategies/sse41/%.lo: EXTRA_FLAGS += -msse4.1 -fno-lto
+  strategies/sse2/%.o:   EXTRA_FLAGS += -msse2
+  strategies/sse41/%.o:  EXTRA_FLAGS += -msse4.1
+  strategies/sse2/%.lo:  EXTRA_FLAGS += -msse2
+  strategies/sse41/%.lo: EXTRA_FLAGS += -msse4.1
   # To disable avx2 on old compilers that don't support it.
   ifndef KVZ_DISABLE_AVX2
-    strategies/avx2/%.o:  EXTRA_FLAGS += -mavx2 -fno-lto
-    strategies/avx2/%.lo: EXTRA_FLAGS += -mavx2 -fno-lto
+    strategies/avx2/%.o:  EXTRA_FLAGS += -mavx2
+    strategies/avx2/%.lo: EXTRA_FLAGS += -mavx2
   endif
 endif
 
@@ -203,6 +206,8 @@
   strategies/strategies-nal.o \
   strategies/strategies-dct.o \
   strategies/strategies-ipol.o \
+  strategies/strategies-quant.o \
+  strategies/strategies-intra.o \
   strategies/generic/nal-generic.o \
   strategies/generic/picture-generic.o \
   strategies/sse2/picture-sse2.o \
@@ -213,11 +218,15 @@
   strategies/generic/dct-generic.o \
   strategies/avx2/dct-avx2.o \
   strategies/generic/ipol-generic.o \
-  strategies/avx2/ipol-avx2.o
+  strategies/avx2/ipol-avx2.o \
+  strategies/generic/quant-generic.o \
+  strategies/avx2/quant-avx2.o \
+  strategies/generic/intra-generic.o \
+  strategies/avx2/intra-avx2.o
 
 ifndef KVZ_DISABLE_ASM
   # Compile C files in x86_asm folder with KVZ_COMPILE_ASM, which will cause
-  # the registration function to register the function pointers in the ASM 
+  # the registration function to register the function pointers in the ASM
   # files.
   strategies/x86_asm/%.o: EXTRA_FLAGS += -DKVZ_COMPILE_ASM
 
@@ -321,7 +330,7 @@
 
 .PHONY: kvazaar.pc init_submodules install install-pc install-prog install-lib
 .PHONY: install-dylib install-dll clean
-kvazaar.pc: KVZ_VERSION = $(shell sed -n 's/^#define\s\+KVZ_VERSION\s\+$.*$/\1/ p' global.h)
+kvazaar.pc: KVZ_VERSION = $(shell awk '/#define KVZ_VERSION/ { print $$3 }' global.h)
 kvazaar.pc: kvazaar.pc.in Makefile
 	sed -e "s;@prefix@;$(PREFIX);" -e "s;@libdir@;$(LIBDIR);" \
             -e "s;@VERSION@;$(KVZ_VERSION);" \

kvazaar-0.7.0.tar.gz/src/cli.c -> kvazaar-0.7.1.tar.gz/src/cli.c Changed

kvazaar-0.7.0.tar.gz/src/config.c -> kvazaar-0.7.1.tar.gz/src/config.c Changed

kvazaar-0.7.0.tar.gz/src/encoder.c -> kvazaar-0.7.1.tar.gz/src/encoder.c Changed

kvazaar-0.7.0.tar.gz/src/encoder.h -> kvazaar-0.7.1.tar.gz/src/encoder.h Changed

kvazaar-0.7.0.tar.gz/src/encoder_state-bitstream.c -> kvazaar-0.7.1.tar.gz/src/encoder_state-bitstream.c Changed

@@ -41,10 +41,9 @@
   kvz_bitstream_add_rbsp_trailing_bits(stream);
 }
 
-static void encoder_state_write_bitstream_PTL(encoder_state_t * const state)
+static void encoder_state_write_bitstream_PTL(bitstream_t *stream,
+                                              encoder_state_t * const state)
 {
-  bitstream_t * const stream = &state->stream;
-  int i;
   // PTL
   // Profile Tier
   WRITE_U(stream, 0, 2, "general_profile_space");
@@ -74,17 +73,16 @@
   WRITE_U(stream, 0, 1, "sub_layer_profile_present_flag");
   WRITE_U(stream, 0, 1, "sub_layer_level_present_flag");
 
-  for (i = 1; i < 8; i++) {
+  for (int i = 1; i < 8; i++) {
     WRITE_U(stream, 0, 2, "reserved_zero_2bits");
   }
 
   // end PTL
 }
 
-static void encoder_state_write_bitstream_vid_parameter_set(encoder_state_t * const state)
+static void encoder_state_write_bitstream_vid_parameter_set(bitstream_t* stream,
+                                                            encoder_state_t * const state)
 {
-  bitstream_t * const stream = &state->stream;
-  int i;
 #ifdef KVZ_DEBUG
   printf("=========== Video Parameter Set ID: 0 ===========\n");
 #endif
@@ -96,12 +94,12 @@
   WRITE_U(stream, 0, 1, "vps_temporal_id_nesting_flag");
   WRITE_U(stream, 0xffff, 16, "vps_reserved_ffff_16bits");
 
-  encoder_state_write_bitstream_PTL(state);
+  encoder_state_write_bitstream_PTL(stream, state);
 
   WRITE_U(stream, 0, 1, "vps_sub_layer_ordering_info_present_flag");
 
   //for each layer
-  for (i = 0; i < 1; i++) {
+  for (int i = 0; i < 1; i++) {
   WRITE_UE(stream, 1, "vps_max_dec_pic_buffering");
   WRITE_UE(stream, 0, "vps_num_reorder_pics");
   WRITE_UE(stream, 0, "vps_max_latency_increase");
@@ -119,10 +117,10 @@
   kvz_bitstream_add_rbsp_trailing_bits(stream);
 }
 
-static void encoder_state_write_bitstream_scaling_list(encoder_state_t * const state)
+static void encoder_state_write_bitstream_scaling_list(bitstream_t *stream,
+                                                       encoder_state_t * const state)
 {
   const encoder_control_t * const encoder = state->encoder_control;
-  bitstream_t * const stream = &state->stream;
   uint32_t size_id;
   for (size_id = 0; size_id < SCALING_LIST_SIZE_NUM; size_id++) {
     int32_t list_id;
@@ -177,9 +175,9 @@
 }
 
 
-static void encoder_state_write_bitstream_VUI(encoder_state_t * const state)
+static void encoder_state_write_bitstream_VUI(bitstream_t *stream,
+                                              encoder_state_t * const state)
 {
-  bitstream_t * const stream = &state->stream;
   const encoder_control_t * const encoder = state->encoder_control;
 #ifdef KVZ_DEBUG
   printf("=========== VUI Set ID: 0 ===========\n");
@@ -260,8 +258,8 @@
   //ENDIF
 
   WRITE_U(stream, 0, 1, "neutral_chroma_indication_flag");
-  WRITE_U(stream, state->encoder_control->vui.field_seq_flag, 1, "field_seq_flag"); // 0: frames, 1: fields
-  WRITE_U(stream, state->encoder_control->vui.frame_field_info_present_flag, 1, "frame_field_info_present_flag");
+  WRITE_U(stream, encoder->vui.field_seq_flag, 1, "field_seq_flag"); // 0: frames, 1: fields
+  WRITE_U(stream, encoder->vui.frame_field_info_present_flag, 1, "frame_field_info_present_flag");
   WRITE_U(stream, 0, 1, "default_display_window_flag");
 
   //IF default display window
@@ -278,9 +276,9 @@
   //ENDIF
 }
 
-static void encoder_state_write_bitstream_seq_parameter_set(encoder_state_t * const state)
+static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
+                                                            encoder_state_t * const state)
 {
-  bitstream_t * const stream = &state->stream;
   const encoder_control_t * encoder = state->encoder_control;
 
 #ifdef KVZ_DEBUG
@@ -292,13 +290,13 @@
   WRITE_U(stream, 1, 3, "sps_max_sub_layers_minus1");
   WRITE_U(stream, 0, 1, "sps_temporal_id_nesting_flag");
 
-  encoder_state_write_bitstream_PTL(state);
+  encoder_state_write_bitstream_PTL(stream, state);
 
   WRITE_UE(stream, 0, "sps_seq_parameter_set_id");
-  WRITE_UE(stream, state->encoder_control->in.video_format,
+  WRITE_UE(stream, encoder->in.video_format,
            "chroma_format_idc");
 
-  if (state->encoder_control->in.video_format == 3) {
+  if (encoder->in.video_format == 3) {
     WRITE_U(stream, 0, 1, "separate_colour_plane_flag");
   }
 
@@ -331,7 +329,7 @@
   WRITE_U(stream, 0, 1, "sps_sub_layer_ordering_info_present_flag");
 
   //for each layer
-  WRITE_UE(stream, state->encoder_control->cfg->ref_frames + encoder->cfg->gop_len, "sps_max_dec_pic_buffering");
+  WRITE_UE(stream, encoder->cfg->ref_frames + encoder->cfg->gop_len, "sps_max_dec_pic_buffering");
   WRITE_UE(stream, encoder->cfg->gop_len, "sps_num_reorder_pics");
   WRITE_UE(stream, 0, "sps_max_latency_increase");
   //end for
@@ -344,14 +342,14 @@
   WRITE_UE(stream, encoder->tr_depth_intra, "max_transform_hierarchy_depth_intra");
 
   // scaling list
-  WRITE_U(stream, state->encoder_control->scaling_list.enable, 1, "scaling_list_enable_flag");
-  if (state->encoder_control->scaling_list.enable) {
+  WRITE_U(stream, encoder->scaling_list.enable, 1, "scaling_list_enable_flag");
+  if (encoder->scaling_list.enable) {
     WRITE_U(stream, 1, 1, "sps_scaling_list_data_present_flag");
-    encoder_state_write_bitstream_scaling_list(state);
+    encoder_state_write_bitstream_scaling_list(stream, state);
   }
 
   WRITE_U(stream, 0, 1, "amp_enabled_flag");
-  WRITE_U(stream, state->encoder_control->sao_enable ? 1 : 0, 1,
+  WRITE_U(stream, encoder->sao_enable ? 1 : 0, 1,
           "sample_adaptive_offset_enabled_flag");
   WRITE_U(stream, ENABLE_PCM, 1, "pcm_enabled_flag");
   #if ENABLE_PCM == 1
@@ -377,17 +375,17 @@
   WRITE_U(stream, 0, 1, "sps_strong_intra_smoothing_enable_flag");
   WRITE_U(stream, 1, 1, "vui_parameters_present_flag");
 
-  encoder_state_write_bitstream_VUI(state);
+  encoder_state_write_bitstream_VUI(stream, state);
 
   WRITE_U(stream, 0, 1, "sps_extension_flag");
 
   kvz_bitstream_add_rbsp_trailing_bits(stream);
 }
 
-static void encoder_state_write_bitstream_pic_parameter_set(encoder_state_t * const state)
+static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream,
+                                                            encoder_state_t * const state)
 {
   const encoder_control_t * const encoder = state->encoder_control;
-  bitstream_t * const stream = &state->stream;
 #ifdef KVZ_DEBUG
   printf("=========== Picture Parameter Set ID: 0 ===========\n");
 #endif
@@ -403,7 +401,7 @@
   WRITE_UE(stream, 0, "num_ref_idx_l1_default_active_minus1");
   WRITE_SE(stream, ((int8_t)encoder->cfg->qp) - 26, "pic_init_qp_minus26");
   WRITE_U(stream, 0, 1, "constrained_intra_pred_flag");
-  WRITE_U(stream, state->encoder_control->trskip_enable, 1, "transform_skip_enabled_flag");
+  WRITE_U(stream, encoder->trskip_enable, 1, "transform_skip_enabled_flag");
   WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag");
   //if cu_qp_delta_enabled_flag
   //WRITE_UE(stream, 0, "diff_cu_qp_delta_depth");
@@ -445,13 +443,13 @@
 
   //IF deblocking_filter
     WRITE_U(stream, 0, 1, "deblocking_filter_override_enabled_flag");
-  WRITE_U(stream, state->encoder_control->deblock_enable ? 0 : 1, 1,
+  WRITE_U(stream, encoder->deblock_enable ? 0 : 1, 1,
           "pps_disable_deblocking_filter_flag");
 
     //IF !disabled
-  if (state->encoder_control->deblock_enable) {
-     WRITE_SE(stream, state->encoder_control->beta_offset_div2, "beta_offset_div2");
-     WRITE_SE(stream, state->encoder_control->tc_offset_div2, "tc_offset_div2");
+  if (encoder->deblock_enable) {
+     WRITE_SE(stream, encoder->beta_offset_div2, "beta_offset_div2");
+     WRITE_SE(stream, encoder->tc_offset_div2, "tc_offset_div2");
     }
 
     //ENDIF
@@ -639,7 +637,7 @@
   int j;
   int ref_negative = 0;
   int ref_positive = 0;
-  if (state->encoder_control->cfg->gop_len) {
+  if (encoder->cfg->gop_len) {
     for (j = 0; j < state->global->ref->used_size; j++) {
       if (state->global->ref->pocs[j] < state->global->poc) {
         ref_negative++;
@@ -686,10 +684,10 @@

kvazaar-0.7.0.tar.gz/src/encoder_state-bitstream.h -> kvazaar-0.7.1.tar.gz/src/encoder_state-bitstream.h Changed

kvazaar-0.7.0.tar.gz/src/encoder_state-ctors_dtors.c -> kvazaar-0.7.1.tar.gz/src/encoder_state-ctors_dtors.c Changed

kvazaar-0.7.0.tar.gz/src/global.h -> kvazaar-0.7.1.tar.gz/src/global.h Changed

kvazaar-0.7.0.tar.gz/src/image.c -> kvazaar-0.7.1.tar.gz/src/image.c Changed

kvazaar-0.7.0.tar.gz/src/intra.c -> kvazaar-0.7.1.tar.gz/src/intra.c Changed

@@ -28,91 +28,19 @@
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
 
-#include "config.h"
 #include "encoder.h"
 #include "transform.h"
-#include "rdo.h"
+#include "strategies/strategies-intra.h"
 
 
-const uint8_t kvz_intra_hor_ver_dist_thres[5] = {0,7,1,0,0};
-
-
-/**
- * \brief Set intrablock mode (and init typedata)
- * \param pic picture to use
- * \param xCtb x CU position (smallest CU)
- * \param yCtb y CU position (smallest CU)
- * \param depth current CU depth
- * \param mode mode to set
- * \returns Void
- */
-void kvz_intra_set_block_mode(videoframe_t *frame,uint32_t x_cu, uint32_t y_cu, uint8_t depth, uint8_t mode, uint8_t part_mode)
-{
-  uint32_t x, y;
-  int block_scu_width = (LCU_WIDTH>>depth)/(LCU_WIDTH>>MAX_DEPTH);
-
-  if (part_mode == SIZE_NxN) {
-    cu_info_t *cur_cu = kvz_videoframe_get_cu(frame, x_cu, y_cu);
-    // Modes are already set.
-    cur_cu->depth = depth;
-    cur_cu->type = CU_INTRA;
-    cur_cu->tr_depth = depth + 1;
-    return;
-  }
-
-  // Loop through all the blocks in the area of cur_cu
-  for (y = y_cu; y < y_cu + block_scu_width; y++) {
-    for (x = x_cu; x < x_cu + block_scu_width; x++) {
-      cu_info_t *cur_cu = kvz_videoframe_get_cu(frame, x_cu, y_cu);
-      cur_cu->depth = depth;
-      cur_cu->type = CU_INTRA;
-      cur_cu->intra[0].mode = mode;
-      cur_cu->intra[1].mode = mode;
-      cur_cu->intra[2].mode = mode;
-      cur_cu->intra[3].mode = mode;
-      cur_cu->part_size = part_mode;
-      cur_cu->tr_depth = depth;
-    }
-  }
-}
-
-/**
- * \brief get intrablock mode
- * \param pic picture data to use
- * \param picwidth width of the picture data
- * \param xpos x-position
- * \param ypos y-position
- * \param width block width
- * \returns DC prediction
-*/
-kvz_pixel kvz_intra_get_dc_pred(const kvz_pixel *pic, uint16_t picwidth, uint8_t width)
-{
-  int32_t i, sum = 0;
-
-  // pixels on top and left
-  for (i = -picwidth; i < width - picwidth; i++) {
-    sum += pic[i];
-  }
-  for (i = -1; i < width * picwidth - 1; i += picwidth) {
-    sum += pic[i];
-  }
-
-  // return the average
-  return (kvz_pixel)((sum + width) / (width + width));
-}
-
-/**
- * \brief Function for deriving intra luma predictions
- * \param pic picture to use
- * \param x_cu x CU position (smallest CU)
- * \param y_cu y CU position (smallest CU)
- * \param preds output buffer for 3 predictions
- * \returns (predictions are found)?1:0
- */
-int8_t kvz_intra_get_dir_luma_predictor(const uint32_t x, const uint32_t y, int8_t* preds,
-                                    const cu_info_t * const cur_cu, const cu_info_t * const left_cu, const cu_info_t * const above_cu)
+int8_t kvz_intra_get_dir_luma_predictor(
+  const uint32_t x,
+  const uint32_t y,
+  int8_t *preds,
+  const cu_info_t *const cur_cu,
+  const cu_info_t *const left_cu,
+  const cu_info_t *const above_cu)
 {
   int y_cu = y>>3;
 
@@ -166,158 +94,235 @@
   return 1;
 }
 
+
+static void intra_filter_reference(
+  int_fast8_t log2_width,
+  kvz_intra_references *refs)
+{
+  if (refs->filtered_initialized) {
+    return;
+  } else {
+    refs->filtered_initialized = true;
+  }
+
+  const int_fast8_t ref_width = 2 * (1 << log2_width) + 1;
+  kvz_intra_ref *ref = &refs->ref;
+  kvz_intra_ref *filtered_ref = &refs->filtered_ref;
+
+  filtered_ref->left[0] = (ref->left[1] + 2 * ref->left[0] + ref->top[1] + 2) / 4;
+  filtered_ref->top[0] = filtered_ref->left[0];
+
+  for (int_fast8_t y = 1; y < ref_width - 1; ++y) {
+    kvz_pixel *p = &ref->left[y];
+    filtered_ref->left[y] = (p[-1] + 2 * p[0] + p[1] + 2) / 4;
+  }
+  filtered_ref->left[ref_width - 1] = ref->left[ref_width - 1];
+
+  for (int_fast8_t x = 1; x < ref_width - 1; ++x) {
+    kvz_pixel *p = &ref->top[x];
+    filtered_ref->top[x] = (p[-1] + 2 * p[0] + p[1] + 2) / 4;
+  }
+  filtered_ref->top[ref_width - 1] = ref->top[ref_width - 1];
+}
+
+
+static void intra_post_process_angular(
+  unsigned width,
+  unsigned stride,
+  const kvz_pixel *ref,
+  kvz_pixel *block)
+{
+  kvz_pixel ref2 = ref[0];
+  for (unsigned i = 0; i < width; i++) {
+    kvz_pixel val = block[i * stride];
+    kvz_pixel ref1 = ref[i + 1];
+    block[i * stride] = CLIP_TO_PIXEL(val + ((ref1 - ref2) >> 1));
+  }
+}
+
+
 /**
- * \brief Intra filtering of the border samples
- * \param ref reference picture data
- * \param x_cu x CU position (smallest CU)
- * \param y_cu y CU position (smallest CU)
- * \param depth current CU depth
- * \param preds output buffer for 3 predictions
- * \returns (predictions are found)?1:0
+ * \brief Generage planar prediction.
+ * \param log2_width    Log2 of width, range 2..5.
+ * \param in_ref_above  Pointer to -1 index of above reference, length=width*2+1.
+ * \param in_ref_left   Pointer to -1 index of left reference, length=width*2+1.
+ * \param dst           Buffer of size width*width.
  */
-void kvz_intra_filter(kvz_pixel *ref, int32_t stride,int32_t width, int8_t mode)
+static void intra_pred_planar(
+  const int_fast8_t log2_width,
+  const kvz_pixel *const ref_top,
+  const kvz_pixel *const ref_left,
+  kvz_pixel *const dst)
 {
-  #define FWIDTH (LCU_WIDTH*2+1)
-  kvz_pixel filtered[FWIDTH * FWIDTH]; //!< temporary buffer for filtered samples
-  kvz_pixel *filteredShift = &filtered[FWIDTH+1]; //!< pointer to temporary buffer with offset (1,1)
-  int x,y;
-
-  if (!mode) {
-    // pF[ -1 ][ -1 ] = ( p[ -1 ][ 0 ] + 2*p[ -1 ][ -1 ] + p[ 0 ][ -1 ] + 2 )  >>  2	(8 35)
-    filteredShift[-FWIDTH-1] = (ref[-1] + 2*ref[-(int32_t)stride-1] + ref[-(int32_t)stride] + 2) >> 2;
-
-    // pF[ -1 ][ y ] = ( p[ -1 ][ y + 1 ] + 2*p[ -1 ][ y ] + p[ -1 ][ y - 1 ] + 2 )  >>  2 for y = 0..nTbS * 2 - 2	(8 36)
-    for (y = 0; y < (int32_t)width * 2 - 1; y++) {
-      filteredShift[y*FWIDTH-1] = (ref[(y + 1) * stride - 1] + 2*ref[y * stride - 1] + ref[(y - 1) * stride - 1] + 2) >> 2;
+  assert(log2_width >= 2 && log2_width <= 5);
+
+  const int_fast8_t width = 1 << log2_width;
+  const kvz_pixel top_right = ref_top[width + 1];
+  const kvz_pixel bottom_left = ref_left[width + 1];
+
+#if 0
+  // Unoptimized version for reference.
+  for (int y = 0; y < width; ++y) {
+    for (int x = 0; x < width; ++x) {
+      int_fast16_t hor = (width - 1 - x) * ref_left[y + 1] + (x + 1) * top_right;
+      int_fast16_t ver = (width - 1 - y) * ref_top[x + 1] + (y + 1) * bottom_left;
+      dst[y * width + x] = (ver + hor + width) >> (log2_width + 1);
     }
+  }
+#else

kvazaar-0.7.0.tar.gz/src/intra.h -> kvazaar-0.7.1.tar.gz/src/intra.h Changed

@@ -27,29 +27,91 @@
 
 #include "global.h"
 
-#include "image.h"
-#include "encoder.h"
 #include "encoderstate.h"
 
-//void kvz_intra_set_block_mode(image* im,uint32_t x_ctb, uint32_t y_ctb, uint8_t depth, uint8_t mode, uint8_t part_mode);
+typedef struct {
+  kvz_pixel left[2 * 32 + 1];
+  kvz_pixel top[2 * 32 + 1];
+} kvz_intra_ref;
+typedef struct
+{
+  kvz_intra_ref ref;
+  kvz_intra_ref filtered_ref;
+  bool filtered_initialized;
+} kvz_intra_references;
 
-int8_t kvz_intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds,
-                                    const cu_info_t* cur_cu, const cu_info_t* left_cu, const cu_info_t* above_cu);
-void kvz_intra_dc_pred_filtering(const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t height );
 
-void kvz_intra_build_reference_border(const encoder_control_t *encoder, int32_t x_luma, int32_t y_luma, int16_t out_width, kvz_pixel *dst, int32_t dst_stride, int8_t chroma, int32_t pic_width, int32_t pic_height, lcu_t *lcu);
-void kvz_intra_filter(kvz_pixel* ref, int32_t stride, int32_t width, int8_t mode);
+/**
+* \brief Function for deriving intra luma predictions
+* \param pic picture to use
+* \param x_cu x CU position (smallest CU)
+* \param y_cu y CU position (smallest CU)
+* \param preds output buffer for 3 predictions
+* \returns (predictions are found)?1:0
+*/
+int8_t kvz_intra_get_dir_luma_predictor(
+  const uint32_t x,
+  const uint32_t y,
+  int8_t *preds,
+  const cu_info_t *const cur_cu,
+  const cu_info_t *const left_cu,
+  const cu_info_t *const above_cu);
 
-/* Predictions */
-void kvz_intra_get_pred(const encoder_control_t * const encoder, const kvz_pixel *rec, const kvz_pixel *rec_filtered, int recstride, kvz_pixel *dst, int width, int mode, int is_chroma);
+/**
+* \brief Generage angular predictions.
+* \param width    Width in pixels, range 4..32.
+* \param color    What color pixels to use.
+* \param luma_px  Luma coordinates of the prediction block.
+* \param pic_px   Picture dimensions in luma pixels.
+* \param lcu      LCU struct.
+* \param out_left_ref  Left reference pixels, index 0 is the top-left.
+* \param out_top_ref   Top reference pixels, index 0 is the top-left.
+*/
+void kvz_intra_build_reference(
+  const int_fast8_t log2_width,
+  const color_t color,
+  const vector2d_t *const luma_px,
+  const vector2d_t *const pic_px,
+  const lcu_t *const lcu,
+  kvz_intra_references *const refs);
 
-kvz_pixel kvz_intra_get_dc_pred(const kvz_pixel* pic, uint16_t pic_width, uint8_t width);
-void kvz_intra_get_planar_pred(const kvz_pixel* src,int32_t srcstride, uint32_t width, kvz_pixel* dst, int32_t dststride);
-void kvz_intra_get_angular_pred(const encoder_control_t *encoder, const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter);
+/**
+ * \brief Generate intra predictions.
+ * \param refs   Reference pixels used for the prediction.     
+ * \param log2_width  Width of the predicted block.
+ * \param mode   Intra mode used for the prediction.
+ * \param color  Color of the prediction.
+ * \param dst    Buffer for the predicted pixels.
+ */
+void kvz_intra_predict(
+  kvz_intra_references *refs,
+  int_fast8_t log2_width,
+  int_fast8_t mode,
+  color_t color,
+  kvz_pixel *dst);
 
-void kvz_intra_recon(const encoder_control_t *encoder, kvz_pixel* rec, int32_t rec_stride, uint32_t width, kvz_pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma);
+/**
+ * \brief Do a full intra prediction cycle on a CU in lcu for luma.
+ */
+void kvz_intra_recon_lcu_luma(
+  encoder_state_t *const state,
+  int x,
+  int y,
+  int depth,
+  int8_t intra_mode,
+  cu_info_t *cur_cu,
+  lcu_t *lcu);
 
-void kvz_intra_recon_lcu_luma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
-void kvz_intra_recon_lcu_chroma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
+/**
+* \brief Do a full intra prediction cycle on a CU in lcu for chroma.
+*/
+void kvz_intra_recon_lcu_chroma(
+  encoder_state_t *const state,
+  int x,
+  int y,
+  int depth,
+  int8_t intra_mode,
+  cu_info_t *cur_cu,
+  lcu_t *lcu);
 
 #endif

kvazaar-0.7.0.tar.gz/src/kvazaar.c -> kvazaar-0.7.1.tar.gz/src/kvazaar.c Changed

kvazaar-0.7.0.tar.gz/src/kvazaar.h -> kvazaar-0.7.1.tar.gz/src/kvazaar.h Changed

@@ -106,7 +106,18 @@
 {
   int32_t qp;        /*!< \brief Quantization parameter */
   int32_t intra_period; /*!< \brief the period of intra frames in stream */
-  int32_t vps_period; /*!< \brief how often the vps is re-sent */
+
+  /** \brief How often the VPS, SPS and PPS are re-sent
+   *
+   * -1: never
+   *  0: first frame only
+   *  1: every intra frame
+   *  2: every other intra frame
+   *  3: every third intra frame
+   *  and so on
+   */
+  int32_t vps_period;
+
   int32_t width;   /*!< \brief frame width, must be a multiple of 8 */
   int32_t height;  /*!< \brief frame height, must be a multiple of 8 */
   double framerate; /*!< \brief Input framerate */
@@ -410,6 +421,26 @@
   void          (*encoder_close)(kvz_encoder *encoder);
 
   /**
+   * \brief Get parameter sets.
+   *
+   * Encode the VPS, SPS and PPS.
+   *
+   * If data_out is set to non-NULL values, the caller is responsible for
+   * calling chunk_free on it.
+   *
+   * A null pointer may be passed in place of the parameter data_out or len_out
+   * to skip returning the corresponding value.
+   *
+   * \param encoder   encoder
+   * \param data_out  Returns the encoded parameter sets.
+   * \param len_out   Returns number of bytes in the encoded data.
+   * \return          1 on success, 0 on error.
+   */
+  int           (*encoder_headers)(kvz_encoder *encoder,
+                                   kvz_data_chunk **data_out,
+                                   uint32_t *len_out);
+
+  /**
    * \brief Encode one frame.
    *
    * Add pic_in to the encoding pipeline. If an encoded frame is ready, return

kvazaar-0.7.0.tar.gz/src/kvazaar_version.h -> kvazaar-0.7.1.tar.gz/src/kvazaar_version.h Changed

kvazaar-0.7.0.tar.gz/src/rdo.c -> kvazaar-0.7.1.tar.gz/src/rdo.c Changed

kvazaar-0.7.0.tar.gz/src/search_intra.c -> kvazaar-0.7.1.tar.gz/src/search_intra.c Changed

@@ -271,14 +271,14 @@
 static void search_intra_chroma_rough(encoder_state_t * const state,
                                       int x_px, int y_px, int depth,
                                       const kvz_pixel *orig_u, const kvz_pixel *orig_v, int16_t origstride,
-                                      const kvz_pixel *rec_u, const kvz_pixel *rec_v, int16_t recstride,
+                                      kvz_intra_references *refs_u, kvz_intra_references *refs_v,
                                       int8_t luma_mode,
                                       int8_t modes[5], double costs[5])
 {
-  const bool reconstruct_chroma = !(x_px & 4 || y_px & 4);
-  if (!reconstruct_chroma) return;
+  assert(!(x_px & 4 || y_px & 4));
 
   const unsigned width = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH);
+  const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - (depth + 1), 2);
 
   for (int i = 0; i < 5; ++i) {
     costs[i] = 0;
@@ -287,16 +287,16 @@
   cost_pixel_nxn_func *const satd_func = kvz_pixels_get_satd_func(width);
   //cost_pixel_nxn_func *const sad_func = kvz_pixels_get_sad_func(width);
 
-  kvz_pixel _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT];
+  kvz_pixel _pred[32 * 32 + SIMD_ALIGNMENT];
   kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT);
 
-  kvz_pixel _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT];
+  kvz_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT];
   kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
 
   kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width);
   for (int i = 0; i < 5; ++i) {
     if (modes[i] == luma_mode) continue;
-    kvz_intra_get_pred(state->encoder_control, rec_u, NULL, recstride, pred, width, modes[i], 1);
+    kvz_intra_predict(refs_u, log2_width_c, modes[i], COLOR_U, pred);
     //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
     costs[i] += satd_func(pred, orig_block);
   }
@@ -304,7 +304,7 @@
   kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width);
   for (int i = 0; i < 5; ++i) {
     if (modes[i] == luma_mode) continue;
-    kvz_intra_get_pred(state->encoder_control, rec_v, NULL, recstride, pred, width, modes[i], 2);
+    kvz_intra_predict(refs_v, log2_width_c, modes[i], COLOR_V, pred);
     //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
     costs[i] += satd_func(pred, orig_block);
   }
@@ -343,41 +343,25 @@
  */
 static int8_t search_intra_rough(encoder_state_t * const state, 
                                  kvz_pixel *orig, int32_t origstride,
-                                 kvz_pixel *rec, int16_t recstride,
-                                 int width, int8_t *intra_preds,
+                                 kvz_intra_references *refs,
+                                 int log2_width, int8_t *intra_preds,
                                  int8_t modes[35], double costs[35])
 {
+  assert(log2_width >= 2 && log2_width <= 5);
+  int_fast8_t width = 1 << log2_width;
   cost_pixel_nxn_func *satd_func = kvz_pixels_get_satd_func(width);
   cost_pixel_nxn_func *sad_func = kvz_pixels_get_sad_func(width);
 
   // Temporary block arrays
-  kvz_pixel _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT];
+  kvz_pixel _pred[32 * 32 + SIMD_ALIGNMENT];
   kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT);
   
-  kvz_pixel _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT];
+  kvz_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT];
   kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
-  
-  kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1];
-
-  kvz_pixel *recf = &rec_filtered_temp[recstride + 1];
-
-  assert(width == 4 || width == 8 || width == 16 || width == 32);
 
   // Store original block for SAD computation
   kvz_pixels_blit(orig, orig_block, width, width, origstride, width);
 
-  // Generate filtered reference pixels.
-  {
-    int16_t x, y;
-    for (y = -1; y < recstride; y++) {
-      recf[y*recstride - 1] = rec[y*recstride - 1];
-    }
-    for (x = 0; x < recstride; x++) {
-      recf[x - recstride] = rec[x - recstride];
-    }
-    kvz_intra_filter(recf, recstride, width, 0);
-  }
-  
   int8_t modes_selected = 0;
   unsigned min_cost = UINT_MAX;
   unsigned max_cost = 0;
@@ -387,18 +371,15 @@
   int offset;
   if (state->encoder_control->full_intra_search) {
     offset = 1;
-  } else if (width == 4) {
-    offset = 2;
-  } else if (width == 8) {
-    offset = 4;
   } else {
-    offset = 8;
+    static const int8_t offsets[4] = { 2, 4, 8, 8 };
+    offset = offsets[log2_width - 2];
   }
 
   // Calculate SAD for evenly spaced modes to select the starting point for 
   // the recursive search.
   for (int mode = 2; mode <= 34; mode += offset) {
-    kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0);
+    kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred);
     costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width);
     modes[modes_selected] = mode;
 
@@ -421,7 +402,7 @@
       int8_t center_node = best_mode;
       int8_t mode = center_node - offset;
       if (mode >= 2) {
-        kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0);
+        kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred);
         costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width);
         modes[modes_selected] = mode;
         if (costs[modes_selected] < best_cost) {
@@ -433,7 +414,7 @@
 
       mode = center_node + offset;
       if (mode <= 34) {
-        kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0);
+        kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred);
         costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width);
         modes[modes_selected] = mode;
         if (costs[modes_selected] < best_cost) {
@@ -460,7 +441,7 @@
     }
 
     if (!has_mode) {
-      kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0);
+      kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred);
       costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width);
       modes[modes_selected] = mode;
       ++modes_selected;
@@ -507,7 +488,6 @@
 static int8_t search_intra_rdo(encoder_state_t * const state, 
                              int x_px, int y_px, int depth,
                              kvz_pixel *orig, int32_t origstride,
-                             kvz_pixel *rec, int16_t recstride,
                              int8_t *intra_preds,
                              int modes_to_check,
                              int8_t modes[35], double costs[35],
@@ -517,31 +497,14 @@
   const int width = LCU_WIDTH >> depth;
 
   kvz_pixel orig_block[LCU_WIDTH * LCU_WIDTH + 1];
-  int rdo_mode;
-  int pred_mode;
-
-  kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1];
-  kvz_pixel *recf = &rec_filtered_temp[recstride + 1];
-
-  // Generate filtered reference pixels.
-  {
-    int x, y;
-    for (y = -1; y < recstride; y++) {
-      recf[y*recstride - 1] = rec[y*recstride - 1];
-    }
-    for (x = 0; x < recstride; x++) {
-      recf[x - recstride] = rec[x - recstride];
-    }
-    kvz_intra_filter(recf, recstride, width, 0);
-  }
 
   kvz_pixels_blit(orig, orig_block, width, width, origstride, width);
 
   // Check that the predicted modes are in the RDO mode list
   if (modes_to_check < 35) {
-    for (pred_mode = 0; pred_mode < 3; pred_mode++) {
+    for (int pred_mode = 0; pred_mode < 3; pred_mode++) {
       int mode_found = 0;
-      for (rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode++) {
+      for (int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode++) {
         if (intra_preds[pred_mode] == modes[rdo_mode]) {
           mode_found = 1;
           break;
@@ -555,42 +518,27 @@
     }
   }
 
-  for(rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) {
+  for(int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) {
     int rdo_bitcost = kvz_luma_mode_bits(state, modes[rdo_mode], intra_preds);
     costs[rdo_mode] = rdo_bitcost * (int)(state->global->cur_lambda_cost + 0.5);
-#if 0
-    if (width != 4 && tr_depth == depth) {
-      // This code path has been disabled for now because it increases bdrate
-      // by 1-2 %. Possibly due to not taking chroma into account during luma
-      // mode search. Enabling separate chroma search compensates a little,
-      // but not enough.

kvazaar-0.7.1.tar.gz/src/strategies/avx2/intra-avx2.c Added

@@ -0,0 +1,176 @@
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+/*
+ * \file
+ */
+
+#include <stdlib.h>
+
+#include "intra-avx2.h"
+#include "strategyselector.h"
+
+#if COMPILE_INTEL_AVX2
+#include <immintrin.h>
+
+ /**
+ * \brief Generage angular predictions.
+ * \param log2_width    Log2 of width, range 2..5.
+ * \param intra_mode    Angular mode in range 2..34.
+ * \param in_ref_above  Pointer to -1 index of above reference, length=width*2+1.
+ * \param in_ref_left   Pointer to -1 index of left reference, length=width*2+1.
+ * \param dst           Buffer of size width*width.
+ */
+static void kvz_angular_pred_avx2(
+  const int_fast8_t log2_width,
+  const int_fast8_t intra_mode,
+  const kvz_pixel *const in_ref_above,
+  const kvz_pixel *const in_ref_left,
+  kvz_pixel *const dst)
+{
+  assert(log2_width >= 2 && log2_width <= 5);
+  assert(intra_mode >= 2 && intra_mode <= 34);
+
+  static const int8_t modedisp2sampledisp[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 };
+  static const int16_t modedisp2invsampledisp[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / sampledisp
+
+                                                    // Temporary buffer for modes 11-25.
+                                                    // It only needs to be big enough to hold indices from -width to width-1.
+  kvz_pixel tmp_ref[2 * 32];
+  const int_fast8_t width = 1 << log2_width;
+
+  // Whether to swap references to always project on the left reference row.
+  const bool vertical_mode = intra_mode >= 18;
+  // Modes distance to horizontal or vertical mode.
+  const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode;
+  // Sample displacement per column in fractions of 32.
+  const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)];
+
+  // Pointer for the reference we are interpolating from.
+  const kvz_pixel *ref_main;
+  // Pointer for the other reference.
+  const kvz_pixel *ref_side;
+
+  // Set ref_main and ref_side such that, when indexed with 0, they point to
+  // index 0 in block coordinates.
+  if (sample_disp < 0) {
+    // Negative sample_disp means, we need to use both references.
+
+    ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
+    ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
+
+    // Move the reference pixels to start from the middle to the later half of
+    // the tmp_ref, so there is room for negative indices.
+    for (int_fast8_t x = -1; x < width; ++x) {
+      tmp_ref[x + width] = ref_main[x];
+    }
+    // Get a pointer to block index 0 in tmp_ref.
+    ref_main = &tmp_ref[width];
+
+    // Extend the side reference to the negative indices of main reference.
+    int_fast32_t col_sample_disp = 128; // rounding for the ">> 8"
+    int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)];
+    int_fast8_t most_negative_index = (width * sample_disp) >> 5;
+    for (int_fast8_t x = -2; x >= most_negative_index; --x) {
+      col_sample_disp += inv_abs_sample_disp;
+      int_fast8_t side_index = col_sample_disp >> 8;
+      tmp_ref[x + width] = ref_side[side_index - 1];
+    }
+  }
+  else {
+    // sample_disp >= 0 means we don't need to refer to negative indices,
+    // which means we can just use the references as is.
+    ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
+    ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
+  }
+
+  if (sample_disp != 0) {
+    // The mode is not horizontal or vertical, we have to do interpolation.
+
+    int_fast16_t delta_pos = 0;
+    for (int_fast8_t y = 0; y < width; ++y) {
+      delta_pos += sample_disp;
+      int_fast8_t delta_int = delta_pos >> 5;
+      int_fast8_t delta_fract = delta_pos & (32 - 1);
+
+      if (delta_fract) {
+        // Do linear filtering
+        if (width < 8) {
+          for (int_fast8_t x = 0; x < width; ++x) {
+            kvz_pixel ref1 = ref_main[x + delta_int];
+            kvz_pixel ref2 = ref_main[x + delta_int + 1];
+            dst[y * width + x] = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5;
+          }
+        } else {
+          struct { uint8_t w1; uint8_t w2; } packed_weights = { 32 - delta_fract, delta_fract };
+          __m128i v_weights = _mm_set1_epi16(*(int16_t*)&packed_weights);
+
+          for (int_fast8_t x = 0; x < width; x += 8) {
+            __m128i v_ref1 = _mm_loadl_epi64((__m128i*)&(ref_main[x + delta_int]));
+            __m128i v_ref2 = _mm_loadl_epi64((__m128i*)&(ref_main[x + delta_int + 1]));
+            __m128i v_refs = _mm_unpacklo_epi8(v_ref1, v_ref2);  
+            __m128i v_tmp = _mm_maddubs_epi16(v_refs, v_weights);
+            v_tmp = _mm_add_epi16(v_tmp, _mm_set1_epi16(16));
+            v_tmp = _mm_srli_epi16(v_tmp, 5);
+            v_tmp = _mm_packus_epi16(v_tmp, v_tmp);
+            _mm_storel_epi64((__m128i*)(dst + y * width + x), v_tmp);
+          }
+        }
+      }
+      else {
+        // Just copy the integer samples
+        for (int_fast8_t x = 0; x < width; x+=4) {
+          *(int32_t*)(&dst[y * width + x]) = *(int32_t*)(&ref_main[x + delta_int]);
+        }
+      }
+    }
+  }
+  else {
+    // Mode is horizontal or vertical, just copy the pixels.
+
+    for (int_fast8_t y = 0; y < width; ++y) {
+      for (int_fast8_t x = 0; x < width; x+=4) {
+        *(int32_t*)&(dst[y * width + x]) = *(int32_t*)&(ref_main[x]);
+      }
+    }
+  }
+
+  // Flip the block if this is was a horizontal mode.
+  if (!vertical_mode) {
+    for (int_fast8_t y = 0; y < width - 1; ++y) {
+      for (int_fast8_t x = y + 1; x < width; ++x) {
+        SWAP(dst[y * width + x], dst[x * width + y], kvz_pixel);
+      }
+    }
+  }
+}
+
+#endif //COMPILE_INTEL_AVX2
+
+int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth)
+{
+  bool success = true;
+#if COMPILE_INTEL_AVX2
+  if (bitdepth == 8) {
+    success &= kvz_strategyselector_register(opaque, "angular_pred", "avx2", 40, &kvz_angular_pred_avx2);
+  }
+#endif //COMPILE_INTEL_AVX2
+  return success;
+}

kvazaar-0.7.1.tar.gz/src/strategies/avx2/intra-avx2.h Added

@@ -0,0 +1,27 @@
+#ifndef STRATEGIES_INTRA_AVX2_H_
+#define STRATEGIES_INTRA_AVX2_H_
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+#include <stdint.h>
+#include "encoderstate.h"
+
+int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth);
+
+#endif //STRATEGIES_INTRA_AVX2_H_

kvazaar-0.7.0.tar.gz/src/strategies/avx2/picture-avx2.c -> kvazaar-0.7.1.tar.gz/src/strategies/avx2/picture-avx2.c Changed

@@ -26,6 +26,7 @@
 
 #if COMPILE_INTEL_AVX2
 #  include "image.h"
+#  include "strategies/strategies-common.h"
 #  include <immintrin.h>
 
 
@@ -136,6 +137,186 @@
   return m256i_horizontal_sum(sum0);
 }
 
+static unsigned satd_8bit_4x4_avx2(const kvz_pixel *org, const kvz_pixel *cur)
+{
+
+  __m128i original = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)org));
+  __m128i current = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)cur));
+
+  __m128i diff_lo = _mm_sub_epi16(current, original);
+
+  original = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)(org + 8)));
+  current = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)(cur + 8)));
+
+  __m128i diff_hi = _mm_sub_epi16(current, original);
+
+
+  //Hor
+  __m128i row0 = _mm_hadd_epi16(diff_lo, diff_hi);
+  __m128i row1 = _mm_hsub_epi16(diff_lo, diff_hi);
+
+  __m128i row2 = _mm_hadd_epi16(row0, row1);
+  __m128i row3 = _mm_hsub_epi16(row0, row1);
+
+  //Ver
+  row0 = _mm_hadd_epi16(row2, row3);
+  row1 = _mm_hsub_epi16(row2, row3);
+
+  row2 = _mm_hadd_epi16(row0, row1);
+  row3 = _mm_hsub_epi16(row0, row1);
+
+  //Abs and sum
+  row2 = _mm_abs_epi16(row2);
+  row3 = _mm_abs_epi16(row3);
+
+  row3 = _mm_add_epi16(row2, row3);
+
+  row3 = _mm_add_epi16(row3, _mm_shuffle_epi32(row3, KVZ_PERMUTE(2, 3, 0, 1) ));
+  row3 = _mm_add_epi16(row3, _mm_shuffle_epi32(row3, KVZ_PERMUTE(1, 0, 1, 0) ));
+  row3 = _mm_add_epi16(row3, _mm_shufflelo_epi16(row3, KVZ_PERMUTE(1, 0, 1, 0) ));
+
+  unsigned sum = _mm_extract_epi16(row3, 0);
+  unsigned satd = (sum + 1) >> 1;
+
+  return satd;
+}
+
+static void hor_add_sub_avx2(__m128i *row0, __m128i *row1){
+
+  __m128i a = _mm_hadd_epi16(*row0, *row1);
+  __m128i b = _mm_hsub_epi16(*row0, *row1);
+
+  __m128i c = _mm_hadd_epi16(a, b);
+  __m128i d = _mm_hsub_epi16(a, b);
+
+  *row0 = _mm_hadd_epi16(c, d);
+  *row1 = _mm_hsub_epi16(c, d);
+}
+
+static INLINE void ver_add_sub_avx2(__m128i temp_hor[8], __m128i temp_ver[8]){
+
+  // First stage
+  for (int i = 0; i < 8; i += 2){
+    temp_ver[i+0] = _mm_hadd_epi16(temp_hor[i + 0], temp_hor[i + 1]);
+    temp_ver[i+1] = _mm_hsub_epi16(temp_hor[i + 0], temp_hor[i + 1]);
+  }
+
+  // Second stage
+  for (int i = 0; i < 8; i += 4){
+    temp_hor[i + 0] = _mm_add_epi16(temp_ver[i + 0], temp_ver[i + 2]);
+    temp_hor[i + 1] = _mm_add_epi16(temp_ver[i + 1], temp_ver[i + 3]);
+    temp_hor[i + 2] = _mm_sub_epi16(temp_ver[i + 0], temp_ver[i + 2]);
+    temp_hor[i + 3] = _mm_sub_epi16(temp_ver[i + 1], temp_ver[i + 3]);
+  }
+
+  // Third stage
+  for (int i = 0; i < 4; ++i){
+    temp_ver[i + 0] = _mm_add_epi16(temp_hor[0 + i], temp_hor[4 + i]);
+    temp_ver[i + 4] = _mm_sub_epi16(temp_hor[0 + i], temp_hor[4 + i]);
+  }
+}
+
+INLINE static void haddwd_accumulate_avx2(__m128i *accumulate, __m128i *ver_row)
+{
+  __m128i abs_value = _mm_abs_epi16(*ver_row);
+  *accumulate = _mm_add_epi32(*accumulate, _mm_madd_epi16(abs_value, _mm_set1_epi16(1)));
+}
+
+INLINE static unsigned sum_block_avx2(__m128i *ver_row)
+{
+  __m128i sad = _mm_setzero_si128();
+  haddwd_accumulate_avx2(&sad, ver_row + 0);
+  haddwd_accumulate_avx2(&sad, ver_row + 1);
+  haddwd_accumulate_avx2(&sad, ver_row + 2);
+  haddwd_accumulate_avx2(&sad, ver_row + 3); 
+  haddwd_accumulate_avx2(&sad, ver_row + 4);
+  haddwd_accumulate_avx2(&sad, ver_row + 5);
+  haddwd_accumulate_avx2(&sad, ver_row + 6);
+  haddwd_accumulate_avx2(&sad, ver_row + 7);
+
+  sad = _mm_add_epi32(sad, _mm_shuffle_epi32(sad, KVZ_PERMUTE(2, 3, 0, 1)));
+  sad = _mm_add_epi32(sad, _mm_shuffle_epi32(sad, KVZ_PERMUTE(1, 0, 1, 0)));
+
+  return _mm_cvtsi128_si32(sad);
+}
+
+INLINE static __m128i diff_row_avx2(const kvz_pixel *buf1, const kvz_pixel *buf2)
+{
+  __m128i buf1_row = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)buf1));
+  __m128i buf2_row = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)buf2));
+  return _mm_sub_epi16(buf1_row, buf2_row);
+}
+
+INLINE static void diff_blocks_and_hor_transform_avx2(__m128i row_diff[8], const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2)
+{
+  row_diff[0] = diff_row_avx2(buf1 + 0 * stride1, buf2 + 0 * stride2);
+  row_diff[1] = diff_row_avx2(buf1 + 1 * stride1, buf2 + 1 * stride2);
+  hor_add_sub_avx2(row_diff + 0, row_diff + 1);
+
+  row_diff[2] = diff_row_avx2(buf1 + 2 * stride1, buf2 + 2 * stride2);
+  row_diff[3] = diff_row_avx2(buf1 + 3 * stride1, buf2 + 3 * stride2);
+  hor_add_sub_avx2(row_diff + 2, row_diff + 3);
+
+  row_diff[4] = diff_row_avx2(buf1 + 4 * stride1, buf2 + 4 * stride2);
+  row_diff[5] = diff_row_avx2(buf1 + 5 * stride1, buf2 + 5 * stride2);
+  hor_add_sub_avx2(row_diff + 4, row_diff + 5);
+
+  row_diff[6] = diff_row_avx2(buf1 + 6 * stride1, buf2 + 6 * stride2);
+  row_diff[7] = diff_row_avx2(buf1 + 7 * stride1, buf2 + 7 * stride2);
+  hor_add_sub_avx2(row_diff + 6, row_diff + 7);
+}
+
+static unsigned kvz_satd_8bit_8x8_general_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2)
+{
+  __m128i temp_hor[8];
+  __m128i temp_ver[8];
+
+  diff_blocks_and_hor_transform_avx2(temp_hor, buf1, stride1, buf2, stride2);
+
+  ver_add_sub_avx2(temp_hor, temp_ver);
+  
+  unsigned sad = sum_block_avx2(temp_ver);
+
+  unsigned result = (sad + 2) >> 2;
+  return result;
+}
+
+// Function macro for defining hadamard calculating functions
+// for fixed size blocks. They calculate hadamard for integer
+// multiples of 8x8 with the 8x8 hadamard function.
+#define SATD_NXN_AVX2(n) \
+static unsigned satd_8bit_ ## n ## x ## n ## _avx2( \
+  const kvz_pixel * const block1, const kvz_pixel * const block2) \
+{ \
+  unsigned x, y; \
+  unsigned sum = 0; \
+  for (y = 0; y < (n); y += 8) { \
+  unsigned row = y * (n); \
+  for (x = 0; x < (n); x += 8) { \
+  sum += kvz_satd_8bit_8x8_general_avx2(&block1[row + x], (n), &block2[row + x], (n)); \
+    } \
+    } \
+  return sum>>(KVZ_BIT_DEPTH-8); \
+}
+
+static unsigned satd_8bit_8x8_avx2(
+  const kvz_pixel * const block1, const kvz_pixel * const block2) 
+{ 
+  unsigned x, y; 
+  unsigned sum = 0; 
+  for (y = 0; y < (8); y += 8) { 
+  unsigned row = y * (8); 
+  for (x = 0; x < (8); x += 8) { 
+  sum += kvz_satd_8bit_8x8_general_avx2(&block1[row + x], (8), &block2[row + x], (8)); 
+      } 
+      } 
+  return sum>>(KVZ_BIT_DEPTH-8); \
+}
+
+//SATD_NXN_AVX2(8) //Use the non-macro version
+SATD_NXN_AVX2(16)
+SATD_NXN_AVX2(32)
+SATD_NXN_AVX2(64)
 
 #endif //COMPILE_INTEL_AVX2
 
@@ -153,6 +334,12 @@
     success &= kvz_strategyselector_register(opaque, "sad_16x16", "avx2", 40, &sad_8bit_16x16_avx2);
     success &= kvz_strategyselector_register(opaque, "sad_32x32", "avx2", 40, &sad_8bit_32x32_avx2);
     success &= kvz_strategyselector_register(opaque, "sad_64x64", "avx2", 40, &sad_8bit_64x64_avx2);
+

kvazaar-0.7.1.tar.gz/src/strategies/avx2/quant-avx2.c Added

@@ -0,0 +1,220 @@
+/*****************************************************************************
+* This file is part of Kvazaar HEVC encoder.
+*
+* Copyright (C) 2013-2015 Tampere University of Technology and others (see
+* COPYING file).
+*
+* Kvazaar is free software: you can redistribute it and/or modify it under
+* the terms of the GNU Lesser General Public License as published by the
+* Free Software Foundation; either version 2.1 of the License, or (at your
+* option) any later version.
+*
+* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+* FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+* more details.
+*
+* You should have received a copy of the GNU General Public License along
+* with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+****************************************************************************/
+
+/*
+* \file
+*/
+
+#include <stdlib.h>
+
+#include "quant-avx2.h"
+#include "../generic/quant-generic.h"
+#include "../strategies-common.h"
+#include "strategyselector.h"
+#include "encoder.h"
+#include "transform.h"
+
+#if COMPILE_INTEL_AVX2
+#include <immintrin.h>
+
+/**
+* \brief quantize transformed coefficents
+*
+*/
+
+void kvz_quant_flat_avx2(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
+  int32_t height, int8_t type, int8_t scan_idx, int8_t block_type)
+{
+  const encoder_control_t * const encoder = state->encoder_control;
+  const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
+  const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1];
+
+  int32_t qp_scaled = kvz_get_scaled_qp(type, state->global->QP, (encoder->bitdepth - 8) * 6);
+  const uint32_t log2_tr_size = kvz_g_convert_to_bit[width] + 2;
+  const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
+  const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_size - 2][scalinglist_type][qp_scaled % 6];
+  const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform
+  const int32_t q_bits = QUANT_SHIFT + qp_scaled / 6 + transform_shift;
+  const int32_t add = ((state->global->slicetype == KVZ_SLICE_I) ? 171 : 85) << (q_bits - 9);
+  const int32_t q_bits8 = q_bits - 8;
+
+  assert(quant_coeff[0] <= (1 << 15) - 1 && quant_coeff[0] >= -(1 << 15)); //Assuming flat values to fit int16_t
+
+  uint32_t ac_sum = 0;
+
+  __m256i v_ac_sum = _mm256_setzero_si256();
+  __m256i v_quant_coeff = _mm256_set1_epi16(quant_coeff[0]);
+
+  for (int32_t n = 0; n < width * height; n += 16) {
+
+    __m256i v_level = _mm256_loadu_si256((__m256i*)&(coef[n]));
+    __m256i v_sign = _mm256_cmpgt_epi16(_mm256_setzero_si256(), v_level);
+    v_sign = _mm256_or_si256(v_sign, _mm256_set1_epi16(1));
+
+    v_level = _mm256_abs_epi16(v_level);
+    __m256i low_a = _mm256_unpacklo_epi16(v_level, _mm256_set1_epi16(0));
+    __m256i high_a = _mm256_unpackhi_epi16(v_level, _mm256_set1_epi16(0));
+
+    __m256i low_b = _mm256_unpacklo_epi16(v_quant_coeff, _mm256_set1_epi16(0));
+    __m256i high_b = _mm256_unpackhi_epi16(v_quant_coeff, _mm256_set1_epi16(0));
+
+    __m256i v_level32_a = _mm256_madd_epi16(low_a, low_b);
+    __m256i v_level32_b = _mm256_madd_epi16(high_a, high_b);
+
+    v_level32_a = _mm256_add_epi32(v_level32_a, _mm256_set1_epi32(add));
+    v_level32_b = _mm256_add_epi32(v_level32_b, _mm256_set1_epi32(add));
+
+    v_level32_a = _mm256_srai_epi32(v_level32_a, q_bits);
+    v_level32_b = _mm256_srai_epi32(v_level32_b, q_bits);
+
+    v_level = _mm256_packs_epi32(v_level32_a, v_level32_b);
+    v_level = _mm256_sign_epi16(v_level, v_sign);
+
+    _mm256_storeu_si256((__m256i*)&(q_coef[n]), v_level);
+
+    v_ac_sum = _mm256_add_epi32(v_ac_sum, v_level32_a);
+    v_ac_sum = _mm256_add_epi32(v_ac_sum, v_level32_b);
+  }
+
+  __m128i temp = _mm_add_epi32(_mm256_castsi256_si128(v_ac_sum), _mm256_extracti128_si256(v_ac_sum, 1));
+  temp = _mm_add_epi32(temp, _mm_shuffle_epi32(temp, KVZ_PERMUTE(2, 3, 0, 1)));
+  temp = _mm_add_epi32(temp, _mm_shuffle_epi32(temp, KVZ_PERMUTE(1, 0, 1, 0)));
+  ac_sum += _mm_cvtsi128_si32(temp);
+
+  if (!(encoder->sign_hiding && ac_sum >= 2)) return;
+
+  int32_t delta_u[LCU_WIDTH*LCU_WIDTH >> 2];
+
+  for (int32_t n = 0; n < width * height; n++) {
+    int32_t level;
+    level = coef[n];
+    level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits;
+    delta_u[n] = (int32_t)(((int64_t)abs(coef[n]) * quant_coeff[n] - (level << q_bits)) >> q_bits8);
+  }
+
+  if (ac_sum >= 2) {
+#define SCAN_SET_SIZE 16
+#define LOG2_SCAN_SET_SIZE 4
+    int32_t n, last_cg = -1, abssum = 0, subset, subpos;
+    for (subset = (width*height - 1) >> LOG2_SCAN_SET_SIZE; subset >= 0; subset--) {
+      int32_t first_nz_pos_in_cg = SCAN_SET_SIZE, last_nz_pos_in_cg = -1;
+      subpos = subset << LOG2_SCAN_SET_SIZE;
+      abssum = 0;
+
+      // Find last coeff pos
+      for (n = SCAN_SET_SIZE - 1; n >= 0; n--)  {
+        if (q_coef[scan[n + subpos]])  {
+          last_nz_pos_in_cg = n;
+          break;
+        }
+      }
+
+      // First coeff pos
+      for (n = 0; n <SCAN_SET_SIZE; n++) {
+        if (q_coef[scan[n + subpos]]) {
+          first_nz_pos_in_cg = n;
+          break;
+        }
+      }
+
+      // Sum all kvz_quant coeffs between first and last
+      for (n = first_nz_pos_in_cg; n <= last_nz_pos_in_cg; n++) {
+        abssum += q_coef[scan[n + subpos]];
+      }
+
+      if (last_nz_pos_in_cg >= 0 && last_cg == -1) {
+        last_cg = 1;
+      }
+
+      if (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4) {
+        int32_t signbit = (q_coef[scan[subpos + first_nz_pos_in_cg]] > 0 ? 0 : 1);
+        if (signbit != (abssum & 0x1)) { // compare signbit with sum_parity
+          int32_t min_cost_inc = 0x7fffffff, min_pos = -1, cur_cost = 0x7fffffff;
+          int16_t final_change = 0, cur_change = 0;
+          for (n = (last_cg == 1 ? last_nz_pos_in_cg : SCAN_SET_SIZE - 1); n >= 0; n--) {
+            uint32_t blkPos = scan[n + subpos];
+            if (q_coef[blkPos] != 0) {
+              if (delta_u[blkPos] > 0) {
+                cur_cost = -delta_u[blkPos];
+                cur_change = 1;
+              }
+              else if (n == first_nz_pos_in_cg && abs(q_coef[blkPos]) == 1) {
+                cur_cost = 0x7fffffff;
+              }
+              else {
+                cur_cost = delta_u[blkPos];
+                cur_change = -1;
+              }
+            }
+            else if (n < first_nz_pos_in_cg && ((coef[blkPos] >= 0) ? 0 : 1) != signbit) {
+              cur_cost = 0x7fffffff;
+            }
+            else {
+              cur_cost = -delta_u[blkPos];
+              cur_change = 1;
+            }
+
+            if (cur_cost < min_cost_inc) {
+              min_cost_inc = cur_cost;
+              final_change = cur_change;
+              min_pos = blkPos;
+            }
+          } // CG loop
+
+          if (q_coef[min_pos] == 32767 || q_coef[min_pos] == -32768) {
+            final_change = -1;
+          }
+
+          if (coef[min_pos] >= 0) q_coef[min_pos] += final_change;
+          else q_coef[min_pos] -= final_change;
+        } // Hide
+      }
+      if (last_cg == 1) last_cg = 0;
+    }
+
+#undef SCAN_SET_SIZE
+#undef LOG2_SCAN_SET_SIZE
+  }
+}
+
+void kvz_quant_avx2(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
+  int32_t height, int8_t type, int8_t scan_idx, int8_t block_type)
+{

kvazaar-0.7.1.tar.gz/src/strategies/avx2/quant-avx2.h Added

@@ -0,0 +1,26 @@
+#ifndef STRATEGIES_QUANT_AVX2_H_
+#define STRATEGIES_QUANT_AVX2_H_
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+#include <stdint.h>
+
+int kvz_strategy_register_quant_avx2(void* opaque, uint8_t bitdepth);
+
+#endif //STRATEGIES_QUANT_AVX2_H_

kvazaar-0.7.1.tar.gz/src/strategies/generic/intra-generic.c Added

@@ -0,0 +1,154 @@
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+/*
+ * \file
+ */
+
+#include <stdlib.h>
+
+#include "intra-generic.h"
+#include "strategyselector.h"
+
+
+ /**
+ * \brief Generage angular predictions.
+ * \param log2_width    Log2 of width, range 2..5.
+ * \param intra_mode    Angular mode in range 2..34.
+ * \param in_ref_above  Pointer to -1 index of above reference, length=width*2+1.
+ * \param in_ref_left   Pointer to -1 index of left reference, length=width*2+1.
+ * \param dst           Buffer of size width*width.
+ */
+static void kvz_angular_pred_generic(
+  const int_fast8_t log2_width,
+  const int_fast8_t intra_mode,
+  const kvz_pixel *const in_ref_above,
+  const kvz_pixel *const in_ref_left,
+  kvz_pixel *const dst)
+{
+  assert(log2_width >= 2 && log2_width <= 5);
+  assert(intra_mode >= 2 && intra_mode <= 34);
+
+  static const int8_t modedisp2sampledisp[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 };
+  static const int16_t modedisp2invsampledisp[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / sampledisp
+
+                                                    // Temporary buffer for modes 11-25.
+                                                    // It only needs to be big enough to hold indices from -width to width-1.
+  kvz_pixel tmp_ref[2 * 32];
+  const int_fast8_t width = 1 << log2_width;
+
+  // Whether to swap references to always project on the left reference row.
+  const bool vertical_mode = intra_mode >= 18;
+  // Modes distance to horizontal or vertical mode.
+  const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode;
+  // Sample displacement per column in fractions of 32.
+  const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)];
+
+  // Pointer for the reference we are interpolating from.
+  const kvz_pixel *ref_main;
+  // Pointer for the other reference.
+  const kvz_pixel *ref_side;
+
+  // Set ref_main and ref_side such that, when indexed with 0, they point to
+  // index 0 in block coordinates.
+  if (sample_disp < 0) {
+    // Negative sample_disp means, we need to use both references.
+
+    ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
+    ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
+
+    // Move the reference pixels to start from the middle to the later half of
+    // the tmp_ref, so there is room for negative indices.
+    for (int_fast8_t x = -1; x < width; ++x) {
+      tmp_ref[x + width] = ref_main[x];
+    }
+    // Get a pointer to block index 0 in tmp_ref.
+    ref_main = &tmp_ref[width];
+
+    // Extend the side reference to the negative indices of main reference.
+    int_fast32_t col_sample_disp = 128; // rounding for the ">> 8"
+    int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)];
+    int_fast8_t most_negative_index = (width * sample_disp) >> 5;
+    for (int_fast8_t x = -2; x >= most_negative_index; --x) {
+      col_sample_disp += inv_abs_sample_disp;
+      int_fast8_t side_index = col_sample_disp >> 8;
+      tmp_ref[x + width] = ref_side[side_index - 1];
+    }
+  }
+  else {
+    // sample_disp >= 0 means we don't need to refer to negative indices,
+    // which means we can just use the references as is.
+    ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
+    ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
+  }
+
+  if (sample_disp != 0) {
+    // The mode is not horizontal or vertical, we have to do interpolation.
+
+    int_fast16_t delta_pos = 0;
+    for (int_fast8_t y = 0; y < width; ++y) {
+      delta_pos += sample_disp;
+      int_fast8_t delta_int = delta_pos >> 5;
+      int_fast8_t delta_fract = delta_pos & (32 - 1);
+
+      if (delta_fract) {
+        // Do linear filtering
+        for (int_fast8_t x = 0; x < width; ++x) {
+          kvz_pixel ref1 = ref_main[x + delta_int];
+          kvz_pixel ref2 = ref_main[x + delta_int + 1];
+          dst[y * width + x] = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5;
+        }
+      }
+      else {
+        // Just copy the integer samples
+        for (int_fast8_t x = 0; x < width; x++) {
+          dst[y * width + x] = ref_main[x + delta_int];
+        }
+      }
+    }
+  }
+  else {
+    // Mode is horizontal or vertical, just copy the pixels.
+
+    for (int_fast8_t y = 0; y < width; ++y) {
+      for (int_fast8_t x = 0; x < width; ++x) {
+        dst[y * width + x] = ref_main[x];
+      }
+    }
+  }
+
+  // Flip the block if this is was a horizontal mode.
+  if (!vertical_mode) {
+    for (int_fast8_t y = 0; y < width - 1; ++y) {
+      for (int_fast8_t x = y + 1; x < width; ++x) {
+        SWAP(dst[y * width + x], dst[x * width + y], kvz_pixel);
+      }
+    }
+  }
+}
+
+int kvz_strategy_register_intra_generic(void* opaque, uint8_t bitdepth)
+{
+  bool success = true;
+
+  success &= kvz_strategyselector_register(opaque, "angular_pred", "generic", 0, &kvz_angular_pred_generic);
+
+  return success;
+}

kvazaar-0.7.1.tar.gz/src/strategies/generic/intra-generic.h Added

@@ -0,0 +1,27 @@
+#ifndef STRATEGIES_INTRA_GENERIC_H_
+#define STRATEGIES_INTRA_GENERIC_H_
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+#include <stdint.h>
+#include "encoderstate.h"
+
+int kvz_strategy_register_intra_generic(void* opaque, uint8_t bitdepth);
+
+#endif //STRATEGIES_INTRA_GENERIC_H_

kvazaar-0.7.1.tar.gz/src/strategies/generic/quant-generic.c Added

@@ -0,0 +1,173 @@
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+/*
+ * \file
+ */
+
+#include <stdlib.h>
+
+#include "quant-generic.h"
+#include "strategyselector.h"
+#include "encoder.h"
+#include "transform.h"
+
+#define QUANT_SHIFT 14
+/**
+* \brief quantize transformed coefficents
+*
+*/
+void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
+  int32_t height, int8_t type, int8_t scan_idx, int8_t block_type)
+{
+  const encoder_control_t * const encoder = state->encoder_control;
+  const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
+  const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1];
+
+  int32_t qp_scaled = kvz_get_scaled_qp(type, state->global->QP, (encoder->bitdepth - 8) * 6);
+  const uint32_t log2_tr_size = kvz_g_convert_to_bit[width] + 2;
+  const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
+  const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_size - 2][scalinglist_type][qp_scaled % 6];
+  const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform
+  const int32_t q_bits = QUANT_SHIFT + qp_scaled / 6 + transform_shift;
+  const int32_t add = ((state->global->slicetype == KVZ_SLICE_I) ? 171 : 85) << (q_bits - 9);
+  const int32_t q_bits8 = q_bits - 8;
+
+  uint32_t ac_sum = 0;
+
+  for (int32_t n = 0; n < width * height; n++) {
+    int32_t level;
+    int32_t  sign;
+
+    level = coef[n];
+    sign = (level < 0 ? -1 : 1);
+
+    level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits;
+    ac_sum += level;
+
+    level *= sign;
+    q_coef[n] = (coeff_t)(CLIP(-32768, 32767, level));
+  }
+
+  if (!(encoder->sign_hiding && ac_sum >= 2)) return;
+
+  int32_t delta_u[LCU_WIDTH*LCU_WIDTH >> 2];
+
+  for (int32_t n = 0; n < width * height; n++) {
+    int32_t level;
+    level = coef[n];
+    level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits;
+    delta_u[n] = (int32_t)(((int64_t)abs(coef[n]) * quant_coeff[n] - (level << q_bits)) >> q_bits8);
+  }
+
+  if (ac_sum >= 2) {
+#define SCAN_SET_SIZE 16
+#define LOG2_SCAN_SET_SIZE 4
+    int32_t n, last_cg = -1, abssum = 0, subset, subpos;
+    for (subset = (width*height - 1) >> LOG2_SCAN_SET_SIZE; subset >= 0; subset--) {
+      int32_t first_nz_pos_in_cg = SCAN_SET_SIZE, last_nz_pos_in_cg = -1;
+      subpos = subset << LOG2_SCAN_SET_SIZE;
+      abssum = 0;
+
+      // Find last coeff pos
+      for (n = SCAN_SET_SIZE - 1; n >= 0; n--)  {
+        if (q_coef[scan[n + subpos]])  {
+          last_nz_pos_in_cg = n;
+          break;
+        }
+      }
+
+      // First coeff pos
+      for (n = 0; n <SCAN_SET_SIZE; n++) {
+        if (q_coef[scan[n + subpos]]) {
+          first_nz_pos_in_cg = n;
+          break;
+        }
+      }
+
+      // Sum all kvz_quant coeffs between first and last
+      for (n = first_nz_pos_in_cg; n <= last_nz_pos_in_cg; n++) {
+        abssum += q_coef[scan[n + subpos]];
+      }
+
+      if (last_nz_pos_in_cg >= 0 && last_cg == -1) {
+        last_cg = 1;
+      }
+
+      if (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4) {
+        int32_t signbit = (q_coef[scan[subpos + first_nz_pos_in_cg]] > 0 ? 0 : 1);
+        if (signbit != (abssum & 0x1)) { // compare signbit with sum_parity
+          int32_t min_cost_inc = 0x7fffffff, min_pos = -1, cur_cost = 0x7fffffff;
+          int16_t final_change = 0, cur_change = 0;
+          for (n = (last_cg == 1 ? last_nz_pos_in_cg : SCAN_SET_SIZE - 1); n >= 0; n--) {
+            uint32_t blkPos = scan[n + subpos];
+            if (q_coef[blkPos] != 0) {
+              if (delta_u[blkPos] > 0) {
+                cur_cost = -delta_u[blkPos];
+                cur_change = 1;
+              }
+              else if (n == first_nz_pos_in_cg && abs(q_coef[blkPos]) == 1) {
+                cur_cost = 0x7fffffff;
+              }
+              else {
+                cur_cost = delta_u[blkPos];
+                cur_change = -1;
+              }
+            }
+            else if (n < first_nz_pos_in_cg && ((coef[blkPos] >= 0) ? 0 : 1) != signbit) {
+              cur_cost = 0x7fffffff;
+            }
+            else {
+              cur_cost = -delta_u[blkPos];
+              cur_change = 1;
+            }
+
+            if (cur_cost < min_cost_inc) {
+              min_cost_inc = cur_cost;
+              final_change = cur_change;
+              min_pos = blkPos;
+            }
+          } // CG loop
+
+          if (q_coef[min_pos] == 32767 || q_coef[min_pos] == -32768) {
+            final_change = -1;
+          }
+
+          if (coef[min_pos] >= 0) q_coef[min_pos] += final_change;
+          else q_coef[min_pos] -= final_change;
+        } // Hide
+      }
+      if (last_cg == 1) last_cg = 0;
+    }
+
+#undef SCAN_SET_SIZE
+#undef LOG2_SCAN_SET_SIZE
+  }
+}
+
+
+int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth)
+{
+  bool success = true;
+
+  success &= kvz_strategyselector_register(opaque, "quant", "generic", 0, &kvz_quant_generic);
+
+  return success;
+}

kvazaar-0.7.1.tar.gz/src/strategies/generic/quant-generic.h Added

@@ -0,0 +1,31 @@
+#ifndef STRATEGIES_QUANT_GENERIC_H_
+#define STRATEGIES_QUANT_GENERIC_H_
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+#include <stdint.h>
+#include "encoderstate.h"
+
+#define QUANT_SHIFT 14
+
+int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth);
+void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
+  int32_t height, int8_t type, int8_t scan_idx, int8_t block_type);
+
+#endif //STRATEGIES_QUANT_GENERIC_H_

kvazaar-0.7.1.tar.gz/src/strategies/strategies-common.h Added

kvazaar-0.7.1.tar.gz/src/strategies/strategies-intra.c Added

@@ -0,0 +1,41 @@
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+#include "strategies-intra.h"
+#include "strategyselector.h"
+
+// Define function pointers.
+angular_pred_func *kvz_angular_pred;
+
+// Headers for platform optimizations.
+#include "generic/intra-generic.h"
+#include "avx2/intra-avx2.h"
+
+
+int kvz_strategy_register_intra(void* opaque, uint8_t bitdepth) {
+  bool success = true;
+
+  success &= kvz_strategy_register_intra_generic(opaque, bitdepth);
+
+  if (kvz_g_hardware_flags.intel_flags.avx2) {
+    success &= kvz_strategy_register_intra_avx2(opaque, bitdepth);
+  }
+  return success;
+}
\ No newline at end of file

kvazaar-0.7.1.tar.gz/src/strategies/strategies-intra.h Added

@@ -0,0 +1,43 @@
+#ifndef STRATEGIES_INTRA_H_
+#define STRATEGIES_INTRA_H_
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+ 
+#include "encoderstate.h"
+
+typedef void (angular_pred_func)(
+  const int_fast8_t log2_width,
+  const int_fast8_t intra_mode,
+  const kvz_pixel *const in_ref_above,
+  const kvz_pixel *const in_ref_left,
+  kvz_pixel *const dst);
+
+// Declare function pointers.
+extern angular_pred_func * kvz_angular_pred;
+
+int kvz_strategy_register_intra(void* opaque, uint8_t bitdepth);
+
+
+#define STRATEGIES_INTRA_EXPORTS \
+  {"angular_pred", (void**) &kvz_angular_pred}, \
+
+
+
+#endif //STRATEGIES_INTRA_H_

kvazaar-0.7.1.tar.gz/src/strategies/strategies-quant.c Added

@@ -0,0 +1,41 @@
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+#include "strategies-quant.h"
+#include "strategyselector.h"
+
+// Define function pointers.
+quant_func *kvz_quant;
+
+// Headers for platform optimizations.
+#include "generic/quant-generic.h"
+#include "avx2/quant-avx2.h"
+
+
+int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth) {
+  bool success = true;
+
+  success &= kvz_strategy_register_quant_generic(opaque, bitdepth);
+
+  if (kvz_g_hardware_flags.intel_flags.avx2) {
+    success &= kvz_strategy_register_quant_avx2(opaque, bitdepth);
+  }
+  return success;
+}
\ No newline at end of file

kvazaar-0.7.1.tar.gz/src/strategies/strategies-quant.h Added

@@ -0,0 +1,40 @@
+#ifndef STRATEGIES_QUANT_H_
+#define STRATEGIES_QUANT_H_
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+ 
+#include "encoderstate.h"
+
+// Declare function pointers.
+typedef unsigned (quant_func)(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
+  int32_t height, int8_t type, int8_t scan_idx, int8_t block_type);
+
+// Declare function pointers.
+extern quant_func * kvz_quant;
+
+int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth);
+
+
+#define STRATEGIES_QUANT_EXPORTS \
+  {"quant", (void**) &kvz_quant}, \
+
+
+
+#endif //STRATEGIES_QUANT_H_

kvazaar-0.7.0.tar.gz/src/strategyselector.c -> kvazaar-0.7.1.tar.gz/src/strategyselector.c Changed

kvazaar-0.7.0.tar.gz/src/strategyselector.h -> kvazaar-0.7.1.tar.gz/src/strategyselector.h Changed

kvazaar-0.7.0.tar.gz/src/transform.c -> kvazaar-0.7.1.tar.gz/src/transform.c Changed

@@ -33,6 +33,8 @@
 #include "nal.h"
 #include "rdo.h"
 #include "strategies/strategies-dct.h"
+#include "strategies/strategies-quant.h"
+#include "strategies/generic/quant-generic.h"
 
 //////////////////////////////////////////////////////////////////////////
 // INITIALIZATIONS
@@ -127,137 +129,6 @@
   idct_func(encoder->bitdepth, coeff, block);
 }
 
-
-#define QUANT_SHIFT 14
-/**
- * \brief quantize transformed coefficents
- *
- */
-void kvz_quant(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
-           int32_t height, int8_t type, int8_t scan_idx, int8_t block_type )
-{
-  const encoder_control_t * const encoder = state->encoder_control;
-  const uint32_t log2_block_size = kvz_g_convert_to_bit[ width ] + 2;
-  const uint32_t * const scan = kvz_g_sig_last_scan[ scan_idx ][ log2_block_size - 1 ];
-
-  int32_t qp_scaled = kvz_get_scaled_qp(type, state->global->QP, (encoder->bitdepth-8)*6);
-
-  const uint32_t log2_tr_size = kvz_g_convert_to_bit[ width ] + 2;
-  const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
-  const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_size-2][scalinglist_type][qp_scaled%6];
-  const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform
-  const int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
-  const int32_t add = ((state->global->slicetype == KVZ_SLICE_I) ? 171 : 85) << (q_bits - 9);
-  const int32_t q_bits8 = q_bits - 8;
-
-  uint32_t ac_sum = 0;
-
-  for (int32_t n = 0; n < width * height; n++) {
-    int32_t level;
-    int32_t  sign;
-
-    level = coef[n];
-    sign  = (level < 0 ? -1: 1);
-
-    level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits;
-    ac_sum += level;
-
-    level *= sign;
-    q_coef[n] = (coeff_t)(CLIP( -32768, 32767, level));
-  }
-
-  if (!(encoder->sign_hiding && ac_sum >= 2)) return;
-
-  int32_t delta_u[LCU_WIDTH*LCU_WIDTH >> 2];
-
-  for (int32_t n = 0; n < width * height; n++) {
-    int32_t level;
-    level = coef[n];
-    level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits;
-    delta_u[n] = (int32_t)(((int64_t)abs(coef[n]) * quant_coeff[n] - (level << q_bits)) >> q_bits8);
-  }
-
-  if(ac_sum >= 2) {
-    #define SCAN_SET_SIZE 16
-    #define LOG2_SCAN_SET_SIZE 4
-    int32_t n,last_cg = -1, abssum = 0, subset, subpos;
-    for(subset = (width*height - 1)>>LOG2_SCAN_SET_SIZE; subset >= 0; subset--) {
-      int32_t first_nz_pos_in_cg = SCAN_SET_SIZE, last_nz_pos_in_cg=-1;
-      subpos = subset<<LOG2_SCAN_SET_SIZE;
-      abssum = 0;
-
-      // Find last coeff pos
-      for (n = SCAN_SET_SIZE - 1; n >= 0; n--)  {
-        if (q_coef[scan[n + subpos]])  {
-          last_nz_pos_in_cg = n;
-          break;
-        }
-      }
-
-      // First coeff pos
-      for (n = 0; n <SCAN_SET_SIZE; n++) {
-        if (q_coef[scan[n + subpos]]) {
-          first_nz_pos_in_cg = n;
-          break;
-        }
-      }
-
-      // Sum all kvz_quant coeffs between first and last
-      for(n = first_nz_pos_in_cg; n <= last_nz_pos_in_cg; n++) {
-        abssum += q_coef[scan[n + subpos]];
-      }
-
-      if(last_nz_pos_in_cg >= 0 && last_cg == -1) {
-        last_cg = 1;
-      }
-
-      if(last_nz_pos_in_cg - first_nz_pos_in_cg >= 4) {
-        int32_t signbit = (q_coef[scan[subpos + first_nz_pos_in_cg]] > 0 ? 0 : 1) ;
-        if(signbit != (abssum&0x1)) { // compare signbit with sum_parity
-          int32_t min_cost_inc = 0x7fffffff,  min_pos =-1, cur_cost=0x7fffffff;
-          int16_t final_change = 0, cur_change=0;
-          for(n = (last_cg == 1 ? last_nz_pos_in_cg : SCAN_SET_SIZE - 1); n >= 0; n--) {
-            uint32_t blkPos  = scan[n + subpos];
-            if(q_coef[blkPos] != 0) {
-              if(delta_u[blkPos] > 0) {
-                cur_cost = -delta_u[blkPos];
-                cur_change=1;
-              } else if(n == first_nz_pos_in_cg && abs(q_coef[blkPos]) == 1) {
-                cur_cost=0x7fffffff;
-              } else {
-                cur_cost = delta_u[blkPos];
-                cur_change =-1;
-              }
-            } else if(n < first_nz_pos_in_cg && ((coef[blkPos] >= 0)?0:1) != signbit) {
-              cur_cost = 0x7fffffff;
-            } else {
-              cur_cost   = -delta_u[blkPos];
-              cur_change = 1;
-            }
-
-            if(cur_cost < min_cost_inc) {
-              min_cost_inc = cur_cost;
-              final_change = cur_change;
-              min_pos      = blkPos;
-            }
-          } // CG loop
-
-          if(q_coef[min_pos] == 32767 || q_coef[min_pos] == -32768) {
-            final_change = -1;
-          }
-
-          if(coef[min_pos] >= 0) q_coef[min_pos] += final_change;
-          else q_coef[min_pos] -= final_change;
-        } // Hide
-      }
-      if (last_cg == 1) last_cg=0;
-    }
-
-    #undef SCAN_SET_SIZE
-    #undef LOG2_SCAN_SET_SIZE
-  }
-}
-
 /**
  * \brief inverse quantize transformed and quantized coefficents
  *

kvazaar-0.7.0.tar.gz/src/transform.h -> kvazaar-0.7.1.tar.gz/src/transform.h Changed

kvazaar-0.7.0.tar.gz/tests/test_strategies.c -> kvazaar-0.7.1.tar.gz/tests/test_strategies.c Changed

Changes of Revision 4