Packman Build Service PMBS

Changes of Revision 4

kvazaar.changes Changed

kvazaar.spec Changed

kvazaar-0.7.0.tar.gz/README.md -> kvazaar-0.7.1.tar.gz/README.md Changed

kvazaar-0.7.0.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj -> kvazaar-0.7.1.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj Changed

@@ -153,6 +153,22 @@
     <ClCompile Include="..\..\src\search.c" />
     <ClCompile Include="..\..\src\search_inter.c" />
     <ClCompile Include="..\..\src\search_intra.c" />
+    <ClCompile Include="..\..\src\strategies\avx2\intra-avx2.c">
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c">
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\generic\intra-generic.c" />
+    <ClCompile Include="..\..\src\strategies\generic\quant-generic.c" />
+    <ClCompile Include="..\..\src\strategies\strategies-intra.c" />
+    <ClCompile Include="..\..\src\strategies\strategies-quant.c" />
     <ClCompile Include="..\..\src\yuv_io.c" />
     <ClInclude Include="..\..\src\checkpoint.h" />
     <ClInclude Include="..\..\src\cli.h" />
@@ -201,6 +217,13 @@
     <ClInclude Include="..\..\src\kvazaar_internal.h" />
     <ClInclude Include="..\..\src\search_inter.h" />
     <ClInclude Include="..\..\src\search_intra.h" />
+    <ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h" />
+    <ClInclude Include="..\..\src\strategies\generic\intra-generic.h" />
+    <ClInclude Include="..\..\src\strategies\strategies-common.h" />
+    <ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h" />
+    <ClInclude Include="..\..\src\strategies\generic\quant-generic.h" />
+    <ClInclude Include="..\..\src\strategies\strategies-intra.h" />
+    <ClInclude Include="..\..\src\strategies\strategies-quant.h" />
     <ClInclude Include="..\..\src\yuv_io.h" />
   </ItemGroup>
   <ItemGroup>

kvazaar-0.7.0.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj.filters -> kvazaar-0.7.1.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj.filters Changed

@@ -207,6 +207,24 @@
     <ClCompile Include="..\..\src\input_frame_buffer.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\strategies\strategies-quant.c">
+      <Filter>Source Files\strategies</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\generic\quant-generic.c">
+      <Filter>Source Files\strategies\generic</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c">
+      <Filter>Source Files\strategies\avx2</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\strategies-intra.c">
+      <Filter>Source Files\strategies</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\generic\intra-generic.c">
+      <Filter>Source Files\strategies\generic</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\avx2\intra-avx2.c">
+      <Filter>Source Files\strategies\avx2</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\src\global.h">
@@ -374,6 +392,27 @@
     <ClInclude Include="..\..\src\input_frame_buffer.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\src\strategies\strategies-common.h">
+      <Filter>Header Files\strategies</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\strategies-quant.h">
+      <Filter>Header Files\strategies</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\generic\quant-generic.h">
+      <Filter>Header Files\strategies\generic</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h">
+      <Filter>Header Files\strategies\avx2</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\strategies-intra.h">
+      <Filter>Header Files\strategies</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\generic\intra-generic.h">
+      <Filter>Header Files\strategies\generic</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h">
+      <Filter>Header Files\strategies\avx2</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <YASM Include="..\..\src\extras\x86inc.asm">

kvazaar-0.7.0.tar.gz/src/Makefile -> kvazaar-0.7.1.tar.gz/src/Makefile Changed

@@ -12,7 +12,7 @@
 DLLDIR  = $(BINDIR)
 
 # Library version number
-VER_MAJOR = 1
+VER_MAJOR = 2
 VER_MINOR = 0
 VER_RELEASE = 0
 
@@ -152,18 +152,21 @@
 # directories. If the instruction set is supported by the
 # architecture, compile the files in these directories with the
 # apropriate flags to cause the intrinsics to work.
+# Note: Using LTO on strategies caused issues on some older
+# compilers, -fno-lto on these files seemed to fix the issue.
+# LTO is no longer used and -fno-lto shouldn't be needed.
 ifeq ($(TARGET_CPU_ARCH), ppc)
-  strategies/altivec/%.o:  EXTRA_FLAGS += -maltivec -fno-lto
-  strategies/altivec/%.lo: EXTRA_FLAGS += -maltivec -fno-lto
+  strategies/altivec/%.o:  EXTRA_FLAGS += -maltivec
+  strategies/altivec/%.lo: EXTRA_FLAGS += -maltivec
 else ifeq ($(TARGET_CPU_ARCH), x86)
-  strategies/sse2/%.o:   EXTRA_FLAGS += -msse2   -fno-lto
-  strategies/sse41/%.o:  EXTRA_FLAGS += -msse4.1 -fno-lto
-  strategies/sse2/%.lo:  EXTRA_FLAGS += -msse2   -fno-lto
-  strategies/sse41/%.lo: EXTRA_FLAGS += -msse4.1 -fno-lto
+  strategies/sse2/%.o:   EXTRA_FLAGS += -msse2
+  strategies/sse41/%.o:  EXTRA_FLAGS += -msse4.1
+  strategies/sse2/%.lo:  EXTRA_FLAGS += -msse2
+  strategies/sse41/%.lo: EXTRA_FLAGS += -msse4.1
   # To disable avx2 on old compilers that don't support it.
   ifndef KVZ_DISABLE_AVX2
-    strategies/avx2/%.o:  EXTRA_FLAGS += -mavx2 -fno-lto
-    strategies/avx2/%.lo: EXTRA_FLAGS += -mavx2 -fno-lto
+    strategies/avx2/%.o:  EXTRA_FLAGS += -mavx2
+    strategies/avx2/%.lo: EXTRA_FLAGS += -mavx2
   endif
 endif
 
@@ -203,6 +206,8 @@
   strategies/strategies-nal.o \
   strategies/strategies-dct.o \
   strategies/strategies-ipol.o \
+  strategies/strategies-quant.o \
+  strategies/strategies-intra.o \
   strategies/generic/nal-generic.o \
   strategies/generic/picture-generic.o \
   strategies/sse2/picture-sse2.o \
@@ -213,11 +218,15 @@
   strategies/generic/dct-generic.o \
   strategies/avx2/dct-avx2.o \
   strategies/generic/ipol-generic.o \
-  strategies/avx2/ipol-avx2.o
+  strategies/avx2/ipol-avx2.o \
+  strategies/generic/quant-generic.o \
+  strategies/avx2/quant-avx2.o \
+  strategies/generic/intra-generic.o \
+  strategies/avx2/intra-avx2.o
 
 ifndef KVZ_DISABLE_ASM
   # Compile C files in x86_asm folder with KVZ_COMPILE_ASM, which will cause
-  # the registration function to register the function pointers in the ASM 
+  # the registration function to register the function pointers in the ASM
   # files.
   strategies/x86_asm/%.o: EXTRA_FLAGS += -DKVZ_COMPILE_ASM
 
@@ -321,7 +330,7 @@
 
 .PHONY: kvazaar.pc init_submodules install install-pc install-prog install-lib
 .PHONY: install-dylib install-dll clean
-kvazaar.pc: KVZ_VERSION = $(shell sed -n 's/^#define\s\+KVZ_VERSION\s\+$.*$/\1/ p' global.h)
+kvazaar.pc: KVZ_VERSION = $(shell awk '/#define KVZ_VERSION/ { print $$3 }' global.h)
 kvazaar.pc: kvazaar.pc.in Makefile
 	sed -e "s;@prefix@;$(PREFIX);" -e "s;@libdir@;$(LIBDIR);" \
             -e "s;@VERSION@;$(KVZ_VERSION);" \

kvazaar-0.7.0.tar.gz/src/cli.c -> kvazaar-0.7.1.tar.gz/src/cli.c Changed

kvazaar-0.7.0.tar.gz/src/config.c -> kvazaar-0.7.1.tar.gz/src/config.c Changed

kvazaar-0.7.0.tar.gz/src/encoder.c -> kvazaar-0.7.1.tar.gz/src/encoder.c Changed

kvazaar-0.7.0.tar.gz/src/encoder.h -> kvazaar-0.7.1.tar.gz/src/encoder.h Changed

kvazaar-0.7.0.tar.gz/src/encoder_state-bitstream.c -> kvazaar-0.7.1.tar.gz/src/encoder_state-bitstream.c Changed

@@ -41,10 +41,9 @@
   kvz_bitstream_add_rbsp_trailing_bits(stream);
 }
 
-static void encoder_state_write_bitstream_PTL(encoder_state_t * const state)
+static void encoder_state_write_bitstream_PTL(bitstream_t *stream,
+                                              encoder_state_t * const state)
 {
-  bitstream_t * const stream = &state->stream;
-  int i;
   // PTL
   // Profile Tier
   WRITE_U(stream, 0, 2, "general_profile_space");
@@ -74,17 +73,16 @@
   WRITE_U(stream, 0, 1, "sub_layer_profile_present_flag");
   WRITE_U(stream, 0, 1, "sub_layer_level_present_flag");
 
-  for (i = 1; i < 8; i++) {
+  for (int i = 1; i < 8; i++) {
     WRITE_U(stream, 0, 2, "reserved_zero_2bits");
   }
 
   // end PTL
 }
 
-static void encoder_state_write_bitstream_vid_parameter_set(encoder_state_t * const state)
+static void encoder_state_write_bitstream_vid_parameter_set(bitstream_t* stream,
+                                                            encoder_state_t * const state)
 {
-  bitstream_t * const stream = &state->stream;
-  int i;
 #ifdef KVZ_DEBUG
   printf("=========== Video Parameter Set ID: 0 ===========\n");
 #endif
@@ -96,12 +94,12 @@
   WRITE_U(stream, 0, 1, "vps_temporal_id_nesting_flag");
   WRITE_U(stream, 0xffff, 16, "vps_reserved_ffff_16bits");
 
-  encoder_state_write_bitstream_PTL(state);
+  encoder_state_write_bitstream_PTL(stream, state);
 
   WRITE_U(stream, 0, 1, "vps_sub_layer_ordering_info_present_flag");
 
   //for each layer
-  for (i = 0; i < 1; i++) {
+  for (int i = 0; i < 1; i++) {
   WRITE_UE(stream, 1, "vps_max_dec_pic_buffering");
   WRITE_UE(stream, 0, "vps_num_reorder_pics");
   WRITE_UE(stream, 0, "vps_max_latency_increase");
@@ -119,10 +117,10 @@
   kvz_bitstream_add_rbsp_trailing_bits(stream);
 }
 
-static void encoder_state_write_bitstream_scaling_list(encoder_state_t * const state)
+static void encoder_state_write_bitstream_scaling_list(bitstream_t *stream,
+                                                       encoder_state_t * const state)
 {
   const encoder_control_t * const encoder = state->encoder_control;
-  bitstream_t * const stream = &state->stream;
   uint32_t size_id;
   for (size_id = 0; size_id < SCALING_LIST_SIZE_NUM; size_id++) {
     int32_t list_id;
@@ -177,9 +175,9 @@
 }
 
 
-static void encoder_state_write_bitstream_VUI(encoder_state_t * const state)
+static void encoder_state_write_bitstream_VUI(bitstream_t *stream,
+                                              encoder_state_t * const state)
 {
-  bitstream_t * const stream = &state->stream;
   const encoder_control_t * const encoder = state->encoder_control;
 #ifdef KVZ_DEBUG
   printf("=========== VUI Set ID: 0 ===========\n");
@@ -260,8 +258,8 @@
   //ENDIF
 
   WRITE_U(stream, 0, 1, "neutral_chroma_indication_flag");
-  WRITE_U(stream, state->encoder_control->vui.field_seq_flag, 1, "field_seq_flag"); // 0: frames, 1: fields
-  WRITE_U(stream, state->encoder_control->vui.frame_field_info_present_flag, 1, "frame_field_info_present_flag");
+  WRITE_U(stream, encoder->vui.field_seq_flag, 1, "field_seq_flag"); // 0: frames, 1: fields
+  WRITE_U(stream, encoder->vui.frame_field_info_present_flag, 1, "frame_field_info_present_flag");
   WRITE_U(stream, 0, 1, "default_display_window_flag");
 
   //IF default display window
@@ -278,9 +276,9 @@
   //ENDIF
 }
 
-static void encoder_state_write_bitstream_seq_parameter_set(encoder_state_t * const state)
+static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
+                                                            encoder_state_t * const state)
 {
-  bitstream_t * const stream = &state->stream;
   const encoder_control_t * encoder = state->encoder_control;
 
 #ifdef KVZ_DEBUG
@@ -292,13 +290,13 @@
   WRITE_U(stream, 1, 3, "sps_max_sub_layers_minus1");
   WRITE_U(stream, 0, 1, "sps_temporal_id_nesting_flag");
 
-  encoder_state_write_bitstream_PTL(state);
+  encoder_state_write_bitstream_PTL(stream, state);
 
   WRITE_UE(stream, 0, "sps_seq_parameter_set_id");
-  WRITE_UE(stream, state->encoder_control->in.video_format,
+  WRITE_UE(stream, encoder->in.video_format,
            "chroma_format_idc");
 
-  if (state->encoder_control->in.video_format == 3) {
+  if (encoder->in.video_format == 3) {
     WRITE_U(stream, 0, 1, "separate_colour_plane_flag");
   }
 
@@ -331,7 +329,7 @@
   WRITE_U(stream, 0, 1, "sps_sub_layer_ordering_info_present_flag");
 
   //for each layer
-  WRITE_UE(stream, state->encoder_control->cfg->ref_frames + encoder->cfg->gop_len, "sps_max_dec_pic_buffering");
+  WRITE_UE(stream, encoder->cfg->ref_frames + encoder->cfg->gop_len, "sps_max_dec_pic_buffering");
   WRITE_UE(stream, encoder->cfg->gop_len, "sps_num_reorder_pics");
   WRITE_UE(stream, 0, "sps_max_latency_increase");
   //end for
@@ -344,14 +342,14 @@
   WRITE_UE(stream, encoder->tr_depth_intra, "max_transform_hierarchy_depth_intra");
 
   // scaling list
-  WRITE_U(stream, state->encoder_control->scaling_list.enable, 1, "scaling_list_enable_flag");
-  if (state->encoder_control->scaling_list.enable) {
+  WRITE_U(stream, encoder->scaling_list.enable, 1, "scaling_list_enable_flag");
+  if (encoder->scaling_list.enable) {
     WRITE_U(stream, 1, 1, "sps_scaling_list_data_present_flag");
-    encoder_state_write_bitstream_scaling_list(state);
+    encoder_state_write_bitstream_scaling_list(stream, state);
   }
 
   WRITE_U(stream, 0, 1, "amp_enabled_flag");
-  WRITE_U(stream, state->encoder_control->sao_enable ? 1 : 0, 1,
+  WRITE_U(stream, encoder->sao_enable ? 1 : 0, 1,
           "sample_adaptive_offset_enabled_flag");
   WRITE_U(stream, ENABLE_PCM, 1, "pcm_enabled_flag");
   #if ENABLE_PCM == 1
@@ -377,17 +375,17 @@
   WRITE_U(stream, 0, 1, "sps_strong_intra_smoothing_enable_flag");
   WRITE_U(stream, 1, 1, "vui_parameters_present_flag");
 
-  encoder_state_write_bitstream_VUI(state);
+  encoder_state_write_bitstream_VUI(stream, state);
 
   WRITE_U(stream, 0, 1, "sps_extension_flag");
 
   kvz_bitstream_add_rbsp_trailing_bits(stream);
 }
 
-static void encoder_state_write_bitstream_pic_parameter_set(encoder_state_t * const state)
+static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream,
+                                                            encoder_state_t * const state)
 {
   const encoder_control_t * const encoder = state->encoder_control;
-  bitstream_t * const stream = &state->stream;
 #ifdef KVZ_DEBUG
   printf("=========== Picture Parameter Set ID: 0 ===========\n");
 #endif
@@ -403,7 +401,7 @@
   WRITE_UE(stream, 0, "num_ref_idx_l1_default_active_minus1");
   WRITE_SE(stream, ((int8_t)encoder->cfg->qp) - 26, "pic_init_qp_minus26");
   WRITE_U(stream, 0, 1, "constrained_intra_pred_flag");
-  WRITE_U(stream, state->encoder_control->trskip_enable, 1, "transform_skip_enabled_flag");
+  WRITE_U(stream, encoder->trskip_enable, 1, "transform_skip_enabled_flag");
   WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag");
   //if cu_qp_delta_enabled_flag
   //WRITE_UE(stream, 0, "diff_cu_qp_delta_depth");
@@ -445,13 +443,13 @@
 
   //IF deblocking_filter
     WRITE_U(stream, 0, 1, "deblocking_filter_override_enabled_flag");
-  WRITE_U(stream, state->encoder_control->deblock_enable ? 0 : 1, 1,
+  WRITE_U(stream, encoder->deblock_enable ? 0 : 1, 1,
           "pps_disable_deblocking_filter_flag");
 
     //IF !disabled
-  if (state->encoder_control->deblock_enable) {
-     WRITE_SE(stream, state->encoder_control->beta_offset_div2, "beta_offset_div2");
-     WRITE_SE(stream, state->encoder_control->tc_offset_div2, "tc_offset_div2");
+  if (encoder->deblock_enable) {
+     WRITE_SE(stream, encoder->beta_offset_div2, "beta_offset_div2");
+     WRITE_SE(stream, encoder->tc_offset_div2, "tc_offset_div2");
     }
 
     //ENDIF
@@ -639,7 +637,7 @@
   int j;
   int ref_negative = 0;
   int ref_positive = 0;
-  if (state->encoder_control->cfg->gop_len) {
+  if (encoder->cfg->gop_len) {
     for (j = 0; j < state->global->ref->used_size; j++) {
       if (state->global->ref->pocsj < state->global->poc) {
         ref_negative++;
@@ -686,10 +684,10 @@
     for (j = 0; j < ref_negative; j++) {      
       int8_t delta_poc = 0;
       
-      if (state->encoder_control->cfg->gop_len) {
+      if (encoder->cfg->gop_len) {
         int8_t found = 0;
         do {
-          delta_poc = state->encoder_control->cfg->gopstate->global->gop_offset.ref_negj + poc_shift;
+          delta_poc = encoder->cfg->gopstate->global->gop_offset.ref_negj + poc_shift;
           for (int i = 0; i < state->global->ref->used_size; i++) {
             if (state->global->ref->pocsi == state->global->poc - delta_poc) {
               found = 1;
@@ -704,7 +702,7 @@
         } while (!found);
       }
 
-      WRITE_UE(stream, state->encoder_control->cfg->gop_len?delta_poc - last_poc - 1:0, "delta_poc_s0_minus1");
+      WRITE_UE(stream, encoder->cfg->gop_len?delta_poc - last_poc - 1:0, "delta_poc_s0_minus1");
       last_poc = delta_poc;
       WRITE_U(stream,1,1, "used_by_curr_pic_s0_flag");
     }
@@ -713,10 +711,10 @@
     for (j = 0; j < ref_positive; j++) {      
       int8_t delta_poc = 0;
       
-      if (state->encoder_control->cfg->gop_len) {
+      if (encoder->cfg->gop_len) {
         int8_t found = 0;
         do {
-          delta_poc = state->encoder_control->cfg->gopstate->global->gop_offset.ref_posj + poc_shift;
+          delta_poc = encoder->cfg->gopstate->global->gop_offset.ref_posj + poc_shift;
           for (int i = 0; i < state->global->ref->used_size; i++) {
             if (state->global->ref->pocsi == state->global->poc + delta_poc) {
               found = 1;
@@ -731,7 +729,7 @@
         } while (!found);
       }
       
-      WRITE_UE(stream, state->encoder_control->cfg->gop_len ? delta_poc - last_poc - 1 : 0, "delta_poc_s1_minus1");
+      WRITE_UE(stream, encoder->cfg->gop_len ? delta_poc - last_poc - 1 : 0, "delta_poc_s1_minus1");
       last_poc = delta_poc;
       WRITE_U(stream, 1, 1, "used_by_curr_pic_s1_flag");
     }
@@ -756,7 +754,7 @@
   }
 
   {
-    int slice_qp_delta = state->global->QP - state->encoder_control->cfg->qp;
+    int slice_qp_delta = state->global->QP - encoder->cfg->qp;
     WRITE_SE(stream, slice_qp_delta, "slice_qp_delta");
   }
    
@@ -840,25 +838,14 @@
   }
   
   if ((encoder->vps_period > 0 && state->global->frame % encoder->vps_period == 0)
-      || state->global->frame == 0)
+      || (state->global->frame == 0 && encoder->vps_period >= 0))
   {
     first_nal_in_au = false;
-
-    // Video Parameter Set (VPS)
-    kvz_nal_write(stream, KVZ_NAL_VPS_NUT, 0, 1);
-    encoder_state_write_bitstream_vid_parameter_set(state);
-
-    // Sequence Parameter Set (SPS)
-    kvz_nal_write(stream, KVZ_NAL_SPS_NUT, 0, 1);
-    encoder_state_write_bitstream_seq_parameter_set(state);
-
-    // Picture Parameter Set (PPS)
-    kvz_nal_write(stream, KVZ_NAL_PPS_NUT, 0, 1);
-    encoder_state_write_bitstream_pic_parameter_set(state);
+    kvz_encoder_state_write_parameter_sets(&state->stream, state);
   }
 
   // Send Kvazaar version information only in the first frame.
-  if (state->global->frame == 0 && state->encoder_control->cfg->add_encoder_info) {
+  if (state->global->frame == 0 && encoder->cfg->add_encoder_info) {
     kvz_nal_write(stream, KVZ_NAL_PREFIX_SEI_NUT, 0, first_nal_in_au);
     encoder_state_write_bitstream_prefix_sei_version(state);
 
@@ -867,7 +854,7 @@
   }
 
   //SEI messages for interlacing
-  if (state->encoder_control->vui.frame_field_info_present_flag){
+  if (encoder->vui.frame_field_info_present_flag){
     // These should be optional, needed for earlier versions
     // of HM decoder to accept bitstream
     //kvz_nal_write(stream, KVZ_NAL_PREFIX_SEI_NUT, 0, 0);
@@ -889,14 +876,14 @@
   {
     PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
     encoder_state_write_bitstream_children(state);
-    PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", state->global->frame, state->type);
+    PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, encoder->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", state->global->frame, state->type);
   }
   
   {
     PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
     // Calculate checksum
     add_checksum(state);
-    PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", state->global->frame, state->type);
+    PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, encoder->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", state->global->frame, state->type);
   }
   
   //Get bitstream length for stats
@@ -982,3 +969,19 @@
 {
   kvz_encoder_state_write_bitstream((encoder_state_t *) opaque);
 }
+
+void kvz_encoder_state_write_parameter_sets(bitstream_t *stream,
+                                            encoder_state_t * const state)
+{
+  // Video Parameter Set (VPS)
+  kvz_nal_write(stream, KVZ_NAL_VPS_NUT, 0, 1);
+  encoder_state_write_bitstream_vid_parameter_set(stream, state);
+
+  // Sequence Parameter Set (SPS)
+  kvz_nal_write(stream, KVZ_NAL_SPS_NUT, 0, 1);
+  encoder_state_write_bitstream_seq_parameter_set(stream, state);
+
+  // Picture Parameter Set (PPS)
+  kvz_nal_write(stream, KVZ_NAL_PPS_NUT, 0, 1);
+  encoder_state_write_bitstream_pic_parameter_set(stream, state);
+}

kvazaar-0.7.0.tar.gz/src/encoder_state-bitstream.h -> kvazaar-0.7.1.tar.gz/src/encoder_state-bitstream.h Changed

kvazaar-0.7.0.tar.gz/src/encoder_state-ctors_dtors.c -> kvazaar-0.7.1.tar.gz/src/encoder_state-ctors_dtors.c Changed

kvazaar-0.7.0.tar.gz/src/global.h -> kvazaar-0.7.1.tar.gz/src/global.h Changed

kvazaar-0.7.0.tar.gz/src/image.c -> kvazaar-0.7.1.tar.gz/src/image.c Changed

kvazaar-0.7.0.tar.gz/src/intra.c -> kvazaar-0.7.1.tar.gz/src/intra.c Changed

@@ -28,91 +28,19 @@
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
 
-#include "config.h"
 #include "encoder.h"
 #include "transform.h"
-#include "rdo.h"
+#include "strategies/strategies-intra.h"
 
 
-const uint8_t kvz_intra_hor_ver_dist_thres5 = {0,7,1,0,0};
-
-
-/**
- * \brief Set intrablock mode (and init typedata)
- * \param pic picture to use
- * \param xCtb x CU position (smallest CU)
- * \param yCtb y CU position (smallest CU)
- * \param depth current CU depth
- * \param mode mode to set
- * \returns Void
- */
-void kvz_intra_set_block_mode(videoframe_t *frame,uint32_t x_cu, uint32_t y_cu, uint8_t depth, uint8_t mode, uint8_t part_mode)
-{
-  uint32_t x, y;
-  int block_scu_width = (LCU_WIDTH>>depth)/(LCU_WIDTH>>MAX_DEPTH);
-
-  if (part_mode == SIZE_NxN) {
-    cu_info_t *cur_cu = kvz_videoframe_get_cu(frame, x_cu, y_cu);
-    // Modes are already set.
-    cur_cu->depth = depth;
-    cur_cu->type = CU_INTRA;
-    cur_cu->tr_depth = depth + 1;
-    return;
-  }
-
-  // Loop through all the blocks in the area of cur_cu
-  for (y = y_cu; y < y_cu + block_scu_width; y++) {
-    for (x = x_cu; x < x_cu + block_scu_width; x++) {
-      cu_info_t *cur_cu = kvz_videoframe_get_cu(frame, x_cu, y_cu);
-      cur_cu->depth = depth;
-      cur_cu->type = CU_INTRA;
-      cur_cu->intra0.mode = mode;
-      cur_cu->intra1.mode = mode;
-      cur_cu->intra2.mode = mode;
-      cur_cu->intra3.mode = mode;
-      cur_cu->part_size = part_mode;
-      cur_cu->tr_depth = depth;
-    }
-  }
-}
-
-/**
- * \brief get intrablock mode
- * \param pic picture data to use
- * \param picwidth width of the picture data
- * \param xpos x-position
- * \param ypos y-position
- * \param width block width
- * \returns DC prediction
-*/
-kvz_pixel kvz_intra_get_dc_pred(const kvz_pixel *pic, uint16_t picwidth, uint8_t width)
-{
-  int32_t i, sum = 0;
-
-  // pixels on top and left
-  for (i = -picwidth; i < width - picwidth; i++) {
-    sum += pici;
-  }
-  for (i = -1; i < width * picwidth - 1; i += picwidth) {
-    sum += pici;
-  }
-
-  // return the average
-  return (kvz_pixel)((sum + width) / (width + width));
-}
-
-/**
- * \brief Function for deriving intra luma predictions
- * \param pic picture to use
- * \param x_cu x CU position (smallest CU)
- * \param y_cu y CU position (smallest CU)
- * \param preds output buffer for 3 predictions
- * \returns (predictions are found)?1:0
- */
-int8_t kvz_intra_get_dir_luma_predictor(const uint32_t x, const uint32_t y, int8_t* preds,
-                                    const cu_info_t * const cur_cu, const cu_info_t * const left_cu, const cu_info_t * const above_cu)
+int8_t kvz_intra_get_dir_luma_predictor(
+  const uint32_t x,
+  const uint32_t y,
+  int8_t *preds,
+  const cu_info_t *const cur_cu,
+  const cu_info_t *const left_cu,
+  const cu_info_t *const above_cu)
 {
   int y_cu = y>>3;
 
@@ -166,158 +94,235 @@
   return 1;
 }
 
+
+static void intra_filter_reference(
+  int_fast8_t log2_width,
+  kvz_intra_references *refs)
+{
+  if (refs->filtered_initialized) {
+    return;
+  } else {
+    refs->filtered_initialized = true;
+  }
+
+  const int_fast8_t ref_width = 2 * (1 << log2_width) + 1;
+  kvz_intra_ref *ref = &refs->ref;
+  kvz_intra_ref *filtered_ref = &refs->filtered_ref;
+
+  filtered_ref->left0 = (ref->left1 + 2 * ref->left0 + ref->top1 + 2) / 4;
+  filtered_ref->top0 = filtered_ref->left0;
+
+  for (int_fast8_t y = 1; y < ref_width - 1; ++y) {
+    kvz_pixel *p = &ref->lefty;
+    filtered_ref->lefty = (p-1 + 2 * p0 + p1 + 2) / 4;
+  }
+  filtered_ref->leftref_width - 1 = ref->leftref_width - 1;
+
+  for (int_fast8_t x = 1; x < ref_width - 1; ++x) {
+    kvz_pixel *p = &ref->topx;
+    filtered_ref->topx = (p-1 + 2 * p0 + p1 + 2) / 4;
+  }
+  filtered_ref->topref_width - 1 = ref->topref_width - 1;
+}
+
+
+static void intra_post_process_angular(
+  unsigned width,
+  unsigned stride,
+  const kvz_pixel *ref,
+  kvz_pixel *block)
+{
+  kvz_pixel ref2 = ref0;
+  for (unsigned i = 0; i < width; i++) {
+    kvz_pixel val = blocki * stride;
+    kvz_pixel ref1 = refi + 1;
+    blocki * stride = CLIP_TO_PIXEL(val + ((ref1 - ref2) >> 1));
+  }
+}
+
+
 /**
- * \brief Intra filtering of the border samples
- * \param ref reference picture data
- * \param x_cu x CU position (smallest CU)
- * \param y_cu y CU position (smallest CU)
- * \param depth current CU depth
- * \param preds output buffer for 3 predictions
- * \returns (predictions are found)?1:0
+ * \brief Generage planar prediction.
+ * \param log2_width    Log2 of width, range 2..5.
+ * \param in_ref_above  Pointer to -1 index of above reference, length=width*2+1.
+ * \param in_ref_left   Pointer to -1 index of left reference, length=width*2+1.
+ * \param dst           Buffer of size width*width.
  */
-void kvz_intra_filter(kvz_pixel *ref, int32_t stride,int32_t width, int8_t mode)
+static void intra_pred_planar(
+  const int_fast8_t log2_width,
+  const kvz_pixel *const ref_top,
+  const kvz_pixel *const ref_left,
+  kvz_pixel *const dst)
 {
-  #define FWIDTH (LCU_WIDTH*2+1)
-  kvz_pixel filteredFWIDTH * FWIDTH; //!< temporary buffer for filtered samples
-  kvz_pixel *filteredShift = &filteredFWIDTH+1; //!< pointer to temporary buffer with offset (1,1)
-  int x,y;
-
-  if (!mode) {
-    // pF -1  -1  = ( p -1  0  + 2*p -1  -1  + p 0  -1  + 2 )  >>  2	(8 35)
-    filteredShift-FWIDTH-1 = (ref-1 + 2*ref-(int32_t)stride-1 + ref-(int32_t)stride + 2) >> 2;
-
-    // pF -1  y  = ( p -1  y + 1  + 2*p -1  y  + p -1  y - 1  + 2 )  >>  2 for y = 0..nTbS * 2 - 2	(8 36)
-    for (y = 0; y < (int32_t)width * 2 - 1; y++) {
-      filteredShifty*FWIDTH-1 = (ref(y + 1) * stride - 1 + 2*refy * stride - 1 + ref(y - 1) * stride - 1 + 2) >> 2;
+  assert(log2_width >= 2 && log2_width <= 5);
+
+  const int_fast8_t width = 1 << log2_width;
+  const kvz_pixel top_right = ref_topwidth + 1;
+  const kvz_pixel bottom_left = ref_leftwidth + 1;
+
+#if 0
+  // Unoptimized version for reference.
+  for (int y = 0; y < width; ++y) {
+    for (int x = 0; x < width; ++x) {
+      int_fast16_t hor = (width - 1 - x) * ref_lefty + 1 + (x + 1) * top_right;
+      int_fast16_t ver = (width - 1 - y) * ref_topx + 1 + (y + 1) * bottom_left;
+      dsty * width + x = (ver + hor + width) >> (log2_width + 1);
     }
+  }
+#else
+  int_fast16_t top32;
+  for (int i = 0; i < width; ++i) {
+    topi = ref_topi + 1 << log2_width;
+  }
 
-    // pF -1  nTbS * 2 - 1  = p -1  nTbS * 2 - 1 		(8 37)
-    filteredShift(width * 2 - 1) * FWIDTH - 1 = ref(width * 2 - 1) * stride - 1;
-
-    // pF x  -1  = ( p x - 1  -1  + 2*p x  -1  + p x + 1  -1  + 2 )  >>  2 for x = 0..nTbS * 2 - 2	(8 38)
-    for(x = 0; x < (int32_t)width*2-1; x++) {
-      filteredShiftx - FWIDTH = (refx - 1 - stride + 2*refx - stride + refx + 1 - stride + 2) >> 2;
+  for (int y = 0; y < width; ++y) {
+    int_fast16_t hor = (ref_lefty + 1 << log2_width) + width;
+    for (int x = 0; x < width; ++x) {
+      hor += top_right - ref_lefty + 1;
+      topx += bottom_left - ref_topx + 1;
+      dsty * width + x = (hor + topx) >> (log2_width + 1);
     }
+  }
+#endif
+}
 
-    // pF nTbS * 2 - 1  -1  = p nTbS * 2 - 1  -1 
-    filteredShift(width * 2 - 1) - FWIDTH = ref(width * 2 - 1) - stride;
 
-    // Copy filtered samples to the input array
-    for (x = -1; x < (int32_t)width * 2; x++) {
-      refx - stride = filteredx + 1;
-    }
-    for(y = 0; y < (int32_t)width * 2; y++)  {
-      refy * stride - 1 = filtered(y + 1) * FWIDTH;
-    }
-  } else  {
-    printf("UNHANDLED: %s: %d\r\n", __FILE__, __LINE__);
-    exit(1);
+/**
+* \brief Generage planar prediction.
+* \param log2_width    Log2 of width, range 2..5.
+* \param in_ref_above  Pointer to -1 index of above reference, length=width*2+1.
+* \param in_ref_left   Pointer to -1 index of left reference, length=width*2+1.
+* \param dst           Buffer of size width*width.
+*/
+static void intra_pred_dc(
+  const int_fast8_t log2_width,
+  const kvz_pixel *const ref_top,
+  const kvz_pixel *const ref_left,
+  kvz_pixel *const out_block)
+{
+  int_fast8_t width = 1 << log2_width;
+
+  int_fast16_t sum = 0;
+  for (int_fast8_t i = 0; i < width; ++i) {
+    sum += ref_topi + 1;
+    sum += ref_lefti + 1;
+  }
+
+  const kvz_pixel dc_val = (sum + width) >> (log2_width + 1);
+  const int_fast16_t block_size = 1 << (log2_width * 2);
+
+  for (int_fast16_t i = 0; i < block_size; ++i) {
+    out_blocki = dc_val;
   }
-  #undef FWIDTH
 }
 
 
 /**
- * \param rec  Reference pixel. 0 points to unfiltered and 1 to filtered.
- * \param recstride  Stride for rec pixel arrays.
- * \param dst
- */
-void kvz_intra_get_pred(const encoder_control_t * const encoder, const kvz_pixel *rec, const kvz_pixel *rec_filtered, int recstride, kvz_pixel *dst, int width, int mode, int is_chroma)
+* \brief Generage intra DC prediction with post filtering applied.
+* \param log2_width    Log2 of width, range 2..5.
+* \param in_ref_above  Pointer to -1 index of above reference, length=width*2+1.
+* \param in_ref_left   Pointer to -1 index of left reference, length=width*2+1.
+* \param dst           Buffer of size width*width.
+*/
+static void intra_pred_filtered_dc(
+  const int_fast8_t log2_width,
+  const kvz_pixel *const ref_top,
+  const kvz_pixel *const ref_left,
+  kvz_pixel *const out_block)
 {
-  const kvz_pixel *ref_pixels = rec;
-  if (is_chroma || mode == 1 || width == 4) {
+  assert(log2_width >= 2 && log2_width <= 5);
+
+  const int_fast8_t width = 1 << log2_width;
+
+  int_fast16_t sum = 0;
+  for (int_fast8_t i = 0; i < width; ++i) {
+    sum += ref_topi + 1;
+    sum += ref_lefti + 1;
+  }
+
+  const kvz_pixel dc_val = (sum + width) >> (log2_width + 1);
+
+  // Filter top-left with (1 2 1 / 4)
+  out_block0 = (ref_left1 + 2 * dc_val + ref_top1 + 2) / 4;
+
+  // Filter rest of the boundary with (1 3 / 4)
+  for (int_fast8_t x = 1; x < width; ++x) {
+    out_blockx = (ref_topx + 1 + 3 * dc_val + 2) / 4;
+  }
+  for (int_fast8_t y = 1; y < width; ++y) {
+    out_blocky * width = (ref_lefty + 1 + 3 * dc_val + 2) / 4;
+    for (int_fast8_t x = 1; x < width; ++x) {
+      out_blocky * width + x = dc_val;
+    }
+  }
+}
+
+
+void kvz_intra_predict(
+  kvz_intra_references *refs,
+  int_fast8_t log2_width,
+  int_fast8_t mode,
+  color_t color,
+  kvz_pixel *dst)
+{
+  const int_fast8_t width = 1 << log2_width;
+
+  const kvz_intra_ref *used_ref = &refs->ref;
+  if (color != COLOR_Y || mode == 1 || width == 4) {
     // For chroma, DC and 4x4 blocks, always use unfiltered reference.
   } else if (mode == 0) {
     // Otherwise, use filtered for planar.
-    ref_pixels = rec_filtered;
+    used_ref = &refs->filtered_ref;
   } else {
     // Angular modes use smoothed reference pixels, unless the mode is close
     // to being either vertical or horizontal.
+    static const int kvz_intra_hor_ver_dist_thres5 = { 0, 7, 1, 0, 0 };
     int filter_threshold = kvz_intra_hor_ver_dist_thresg_to_bitswidth;
     int dist_from_vert_or_hor = MIN(abs(mode - 26), abs(mode - 10));
     if (dist_from_vert_or_hor > filter_threshold) {
-      ref_pixels = rec_filtered;
+      used_ref = &refs->filtered_ref;
     }
   }
 
+  if (used_ref == &refs->filtered_ref && !refs->filtered_initialized) {
+    intra_filter_reference(log2_width, refs);
+  }
+
   if (mode == 0) {
-    kvz_intra_get_planar_pred(ref_pixels, recstride, width, dst, width);
+    intra_pred_planar(log2_width, used_ref->top, used_ref->left, dst);
   } else if (mode == 1) {
-    int i;
-    kvz_pixel val = kvz_intra_get_dc_pred(ref_pixels, recstride, width);
-    for (i = 0; i < width * width; i++) {
-      dsti = val;
-    }
     // Do extra post filtering for edge pixels of luma DC mode.
-    if (!is_chroma && width < 32) {
-      kvz_intra_dc_pred_filtering(ref_pixels, recstride, dst, width, width, width);
+    if (color == COLOR_Y && width < 32) {
+      intra_pred_filtered_dc(log2_width, used_ref->top, used_ref->left, dst);
+    } else {
+      intra_pred_dc(log2_width, used_ref->top, used_ref->left, dst);
     }
   } else {
-    int filter = !is_chroma && width < 32;
-    kvz_intra_get_angular_pred(encoder, ref_pixels, recstride, dst, width, width, mode, filter);
+    kvz_angular_pred(log2_width, mode, used_ref->top, used_ref->left, dst);
+    if (color == COLOR_Y && width < 32) {
+      if (mode == 10) {
+        intra_post_process_angular(width, 1, used_ref->top, dst);
+      } else if (mode == 26) {
+        intra_post_process_angular(width, width, used_ref->left, dst);
+      }
+    }
   }
 }
 
 
-
-/**
- * \brief Reconstruct intra block according to prediction
- * \param rec reconstructed picture data
- * \param recstride reconstructed picture stride
- * \param width block size to predict
- * \param dst destination buffer for best prediction
- * \param dststride destination width
- * \param mode intra mode to use
- * \param chroma chroma-block flag
-
-*/
-void kvz_intra_recon(const encoder_control_t * const encoder, kvz_pixel* rec, int32_t recstride, uint32_t width, kvz_pixel* dst, int32_t dststride, int8_t mode, int8_t chroma)
+void kvz_intra_build_reference(
+  const int_fast8_t log2_width,
+  const color_t color,
+  const vector2d_t *const luma_px,
+  const vector2d_t *const pic_px,
+  const lcu_t *const lcu,
+  kvz_intra_references *const refs)
 {
-  kvz_pixel predLCU_WIDTH * LCU_WIDTH;
-  kvz_pixel rec_filtered_temp(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1;
-  kvz_pixel *recf = &rec_filtered_temprecstride + 1;
-
-  // Generate filtered reference pixels.
-  {
-    int x, y;
-    for (y = -1; y < recstride; y++) {
-      recfy*recstride - 1 = recy*recstride - 1;
-    }
-    for (x = 0; x < recstride; x++) {
-      recfx - recstride = recx - recstride;
-    }
-    kvz_intra_filter(recf, recstride, width, 0);
-  }
-
-  kvz_intra_get_pred(encoder, rec, recf, recstride, pred, width, mode, chroma);
-
-  kvz_pixels_blit(pred, dst, width, width, width, dststride);
-}
+  assert(log2_width >= 2 && log2_width <= 5);
 
-/**
- * \brief Build top and left borders for a reference block.
- * \param pic picture to use as a source
- * \param outwidth width of the prediction block
- * \param chroma signaling if chroma is used, 0 = luma, 1 = U and 2 = V
- *
- * The end result is 2*width+8 x 2*width+8 array, with only the top and left
- * edge pixels filled with the reconstructed pixels.
- */
-void kvz_intra_build_reference_border(const encoder_control_t * const encoder, int32_t x_luma, int32_t y_luma, int16_t out_width,
-                                      kvz_pixel *dst, int32_t dst_stride, int8_t chroma,
-                                      int32_t pic_width, int32_t pic_height,
-                                      lcu_t *lcu)
-{
-  // Some other function might make use of the arrays num_ref_pixels_top and
-  // num_ref_pixels_left in the future, but until that happens lets leave
-  // them here.
-
-  /**
-   * \brief Table for looking up the number of intra reference pixels based on
-   *        prediction units coordinate within an LCU.
-   *
-   * This table was generated by "tools/generate_ref_pixel_tables.py".
-   */
+  // Tables for looking up the number of intra reference pixels based on
+  // prediction units coordinate within an LCU.
+  // generated by "tools/generate_ref_pixel_tables.py".
   static const uint8_t num_ref_pixels_top1616 = {
     { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
     {  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4 },
@@ -336,329 +341,149 @@
     { 16, 12,  8,  4, 16, 12,  8,  4, 16, 12,  8,  4, 16, 12,  8,  4 },
     {  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4 }
   };
-
-  /**
-   * \brief Table for looking up the number of intra reference pixels based on
-   *        prediction units coordinate within an LCU.
-   *
-   * This table was generated by "tools/generate_ref_pixel_tables.py".
-   */
   static const uint8_t num_ref_pixels_left1616 = {
     { 64,  4,  8,  4, 16,  4,  8,  4, 32,  4,  8,  4, 16,  4,  8,  4 },
-    { 64,  4,  4,  4, 12,  4,  4,  4, 28,  4,  4,  4, 12,  4,  4,  4 },
-    { 64,  4,  8,  4,  8,  4,  8,  4, 24,  4,  8,  4,  8,  4,  8,  4 },
-    { 64,  4,  4,  4,  4,  4,  4,  4, 20,  4,  4,  4,  4,  4,  4,  4 },
-    { 64,  4,  8,  4, 16,  4,  8,  4, 16,  4,  8,  4, 16,  4,  8,  4 },
-    { 64,  4,  4,  4, 12,  4,  4,  4, 12,  4,  4,  4, 12,  4,  4,  4 },
-    { 64,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4 },
-    { 64,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4 },
-    { 64,  4,  8,  4, 16,  4,  8,  4, 32,  4,  8,  4, 16,  4,  8,  4 },
-    { 64,  4,  4,  4, 12,  4,  4,  4, 28,  4,  4,  4, 12,  4,  4,  4 },
-    { 64,  4,  8,  4,  8,  4,  8,  4, 24,  4,  8,  4,  8,  4,  8,  4 },
-    { 64,  4,  4,  4,  4,  4,  4,  4, 20,  4,  4,  4,  4,  4,  4,  4 },
-    { 64,  4,  8,  4, 16,  4,  8,  4, 16,  4,  8,  4, 16,  4,  8,  4 },
-    { 64,  4,  4,  4, 12,  4,  4,  4, 12,  4,  4,  4, 12,  4,  4,  4 },
-    { 64,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4 },
-    { 64,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4 }
+    { 60,  4,  4,  4, 12,  4,  4,  4, 28,  4,  4,  4, 12,  4,  4,  4 },
+    { 56,  4,  8,  4,  8,  4,  8,  4, 24,  4,  8,  4,  8,  4,  8,  4 },
+    { 52,  4,  4,  4,  4,  4,  4,  4, 20,  4,  4,  4,  4,  4,  4,  4 },
+    { 48,  4,  8,  4, 16,  4,  8,  4, 16,  4,  8,  4, 16,  4,  8,  4 },
+    { 44,  4,  4,  4, 12,  4,  4,  4, 12,  4,  4,  4, 12,  4,  4,  4 },
+    { 40,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4 },
+    { 36,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4 },
+    { 32,  4,  8,  4, 16,  4,  8,  4, 32,  4,  8,  4, 16,  4,  8,  4 },
+    { 28,  4,  4,  4, 12,  4,  4,  4, 28,  4,  4,  4, 12,  4,  4,  4 },
+    { 24,  4,  8,  4,  8,  4,  8,  4, 24,  4,  8,  4,  8,  4,  8,  4 },
+    { 20,  4,  4,  4,  4,  4,  4,  4, 20,  4,  4,  4,  4,  4,  4,  4 },
+    { 16,  4,  8,  4, 16,  4,  8,  4, 16,  4,  8,  4, 16,  4,  8,  4 },
+    { 12,  4,  4,  4, 12,  4,  4,  4, 12,  4,  4,  4, 12,  4,  4,  4 },
+    { 8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4,  8,  4 },
+    { 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4 }
   };
 
-  const kvz_pixel dc_val = 1 << (encoder->bitdepth - 1);
-  const int is_chroma = chroma ? 1 : 0;
+  refs->filtered_initialized = false;
+  kvz_pixel *out_left_ref = &refs->ref.left0;
+  kvz_pixel *out_top_ref = &refs->ref.top0;
 
-  // input picture pointer
-  //const pixel * const src = (!chroma) ? pic->y_recdata : ((chroma == 1) ? pic->u_recdata : pic->v_recdata);
+  const kvz_pixel dc_val = 1 << (KVZ_BIT_DEPTH - 1);
+  const int is_chroma = color != COLOR_Y ? 1 : 0;
+  const int_fast8_t width = 1 << log2_width;
 
   // Convert luma coordinates to chroma coordinates for chroma.
-  const int x = chroma ? x_luma / 2 : x_luma;
-  const int y = chroma ? y_luma / 2 : y_luma;
-
-  const int y_in_lcu = y_luma % LCU_WIDTH;
-  const int x_in_lcu = x_luma % LCU_WIDTH;
-
-  int x_local = (x_luma&0x3f)>>is_chroma, y_local = (y_luma&0x3f)>>is_chroma;
-
-  kvz_pixel *left_ref = !chroma ? &lcu->left_ref.y1 : (chroma == 1) ? &lcu->left_ref.u1 : &lcu->left_ref.v1;
-  kvz_pixel *top_ref  = !chroma ? &lcu->top_ref.y1  : (chroma == 1) ? &lcu->top_ref.u1  : &lcu->top_ref.v1;
-  kvz_pixel *rec_ref  = !chroma ? lcu->rec.y : (chroma == 1) ? lcu->rec.u : lcu->rec.v;
-
-  kvz_pixel *left_border = &left_refy_local;
-  kvz_pixel *top_border = &top_refx_local;
-  uint32_t left_stride = 1;
+  const vector2d_t lcu_px = {
+    luma_px->x % LCU_WIDTH,
+    luma_px->y % LCU_WIDTH
+  };
+  const vector2d_t px = {
+    lcu_px.x >> is_chroma,
+    lcu_px.y >> is_chroma,
+  };
 
-  if(x_local) {
-    left_border = &rec_refx_local - 1 + y_local * (LCU_WIDTH>>is_chroma);
-    left_stride = LCU_WIDTH>>is_chroma;
-  }
+  // Init pointers to LCUs reconstruction buffers, such that index 0 refers to block coordinate 0.
+  const kvz_pixel *left_ref = !color ? &lcu->left_ref.y1 : (color == 1) ? &lcu->left_ref.u1 : &lcu->left_ref.v1;
+  const kvz_pixel *top_ref = !color ? &lcu->top_ref.y1 : (color == 1) ? &lcu->top_ref.u1 : &lcu->top_ref.v1;
+  const kvz_pixel *rec_ref = !color ? lcu->rec.y : (color == 1) ? lcu->rec.u : lcu->rec.v;
 
-  if(y_local) {
-    top_border = &rec_refx_local + (y_local - 1) * (LCU_WIDTH>>is_chroma);
+  // Init top borders pointer to point to the correct place in the correct reference array.
+  const kvz_pixel *top_border;
+  if (px.y) {
+    top_border = &rec_refpx.x + (px.y - 1) * (LCU_WIDTH >> is_chroma);
+  } else {
+    top_border = &top_refpx.x;
   }
 
-  // Copy pixels for left edge.
-  if (x > 0) {
-    // Get the number of reference pixels based on the PU coordinate within the LCU.
-    int num_ref_pixels = num_ref_pixels_lefty_in_lcu / 4x_in_lcu / 4 >> is_chroma;
-    int i;
-    kvz_pixel nearest_pixel;
-
-    // Max pixel we can copy from src is yy + outwidth - 1 because the dst
-    // extends one pixel to the left.
-    num_ref_pixels = MIN(num_ref_pixels, out_width - 1);
-    // There are no coded pixels below the frame.
-    num_ref_pixels = MIN(num_ref_pixels, pic_height - y);
-    // There are no coded pixels below the bottom of the LCU due to raster
-    // scan order.
-    num_ref_pixels = MIN(num_ref_pixels, (LCU_WIDTH - y_in_lcu) >> is_chroma);
-
-    // Copy pixels from coded CUs.
-    for (i = 0; i < num_ref_pixels; ++i) {
-      dst(i + 1) * dst_stride = left_borderi*left_stride;
-    }
-    // Extend the last pixel for the rest of the reference values.
-    nearest_pixel = dsti * dst_stride;
-    for (i = num_ref_pixels; i < out_width - 1; ++i) {
-      dsti * dst_stride = nearest_pixel;
-    }
+  // Init left borders pointer to point to the correct place in the correct reference array.
+  const kvz_pixel *left_border;
+  int left_stride; // Distance between reference samples.
+  if (px.x) {
+    left_border = &rec_refpx.x - 1 + px.y * (LCU_WIDTH >> is_chroma);
+    left_stride = LCU_WIDTH >> is_chroma;
   } else {
-    // If we are on the left edge, extend the first pixel of the top row.
-    kvz_pixel nearest_pixel = y > 0 ? top_border0 : dc_val;
-    int i;
-    for (i = 1; i < out_width - 1; i++) {
-      dsti * dst_stride = nearest_pixel;
-    }
+    left_border = &left_refpx.y;
+    left_stride = 1;
   }
 
-  // Copy pixels for top edge.
-  if (y > 0) {
+  // Generate left reference.
+  if (luma_px->x > 0) {
     // Get the number of reference pixels based on the PU coordinate within the LCU.
-    int num_ref_pixels = num_ref_pixels_topy_in_lcu / 4x_in_lcu / 4 >> is_chroma;
-    int i;
-    kvz_pixel nearest_pixel;
+    int px_available_left = num_ref_pixels_leftlcu_px.y / 4lcu_px.x / 4 >> is_chroma;
 
-    // Max pixel we can copy from src is yy + outwidth - 1 because the dst
-    // extends one pixel to the left.
-    num_ref_pixels = MIN(num_ref_pixels, out_width - 1);
-    // All LCUs in the row above have been coded.
-    num_ref_pixels = MIN(num_ref_pixels, pic_width - x);
+    // Limit the number of available pixels based on block size and dimensions
+    // of the picture.
+    px_available_left = MIN(px_available_left, width * 2);
+    px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma);
 
     // Copy pixels from coded CUs.
-    for (i = 0; i < num_ref_pixels; ++i) {
-      dsti + 1 = top_borderi;
+    for (int i = 0; i < px_available_left; ++i) {
+      out_left_refi + 1 = left_borderi * left_stride;
     }
     // Extend the last pixel for the rest of the reference values.
-    nearest_pixel = top_bordernum_ref_pixels - 1;
-    for (; i < out_width - 1; ++i) {
-      dsti + 1 = nearest_pixel;
+    kvz_pixel nearest_pixel = out_left_refpx_available_left;
+    for (int i = px_available_left; i < width * 2; ++i) {
+      out_left_refi + 1 = nearest_pixel;
     }
   } else {
-    // Extend nearest pixel.
-    kvz_pixel nearest_pixel = x > 0 ? left_border0 : dc_val;
-    int i;
-    for(i = 1; i < out_width; i++)
-    {
-      dsti = nearest_pixel;
+    // If we are on the left edge, extend the first pixel of the top row.
+    kvz_pixel nearest_pixel = luma_px->y > 0 ? top_border0 : dc_val;
+    for (int i = 0; i < width * 2; i++) {
+      out_left_refi + 1 = nearest_pixel;
     }
   }
 
-  // If top-left corner sample doesn't exist, use the sample from below.
-  // Unavailable samples on the left boundary are copied from below if
-  // available. This is the only place they are available because we don't
-  // support constrained intra prediction.
-  if (x > 0 && y > 0) {
-    // Make sure we always take the top-left pixel from the LCU reference
-    // pixel arrays if they are available.
-    if (x_local == 0) {
-      dst0 = left_border-1;
+  // Generate top-left reference.
+  if (luma_px->x > 0 && luma_px->y > 0) {
+    // If the block is at an LCU border, the top-left must be copied from
+    // the border that points to the LCUs 1D reference buffer.
+    if (px.x == 0) {
+      out_left_ref0 = left_border-1 * left_stride;
+      out_top_ref0 = left_border-1 * left_stride;
     } else {
-      dst0 = top_border-1;
+      out_left_ref0 = top_border-1;
+      out_top_ref0 = top_border-1;
     }
   } else {
-    dst0 = dstdst_stride;
+    // Copy reference clockwise.
+    out_left_ref0 = out_left_ref1;
+    out_top_ref0 = out_left_ref1;
   }
-}
-
-const int32_t kvz_ang_table9     = {0,    2,    5,   9,  13,  17,  21,  26,  32};
-const int32_t kvz_inv_ang_table9 = {0, 4096, 1638, 910, 630, 482, 390, 315, 256}; // (256 * 32) / Angle
 
-/**
- * \brief this functions constructs the angular intra prediction from border samples
- *
- */
-void kvz_intra_get_angular_pred(const encoder_control_t * const encoder, const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter)
-{
-  int32_t k,l;
-  int32_t blk_size        = width;
-
-  // Map the mode index to main prediction direction and angle
-  bool mode_ver       = dir_mode >= 18;
-  int32_t intra_pred_angle = mode_ver ? dir_mode - 26 : 10 - dir_mode;
-  int32_t abs_ang       = abs(intra_pred_angle);
-  int32_t sign_ang      = intra_pred_angle < 0 ? -1 : 1;
-
-  // Set bitshifts and scale the angle parameter to block size
-  int32_t inv_angle       = kvz_inv_ang_tableabs_ang;
-
-  // Do angular predictions
-  kvz_pixel *ref_main;
-  kvz_pixel *ref_side;
-  kvz_pixel  ref_above2 * LCU_WIDTH + 1;
-  kvz_pixel  ref_left2 * LCU_WIDTH + 1;
-
-  // Tell clang-analyzer that everything is ok.
-  assert(width == 4 || width == 8 || width == 16 || width == 32);
-
-  abs_ang           = kvz_ang_tableabs_ang;
-  intra_pred_angle  = sign_ang * abs_ang;
-
-  // Initialise the Main and Left reference array.
-  if (intra_pred_angle < 0) {
-    int32_t invAngleSum = 128; // rounding for (shift by 8)
-    for (k = 0; k < blk_size + 1; k++) {
-      ref_abovek + blk_size - 1 = srck - src_stride - 1;
-      ref_leftk + blk_size - 1  = src(k - 1) * src_stride - 1;
-    }
-
-    ref_main = (mode_ver ? ref_above : ref_left) + (blk_size - 1);
-    ref_side = (mode_ver ? ref_left : ref_above) + (blk_size - 1);
+  // Generate top reference.
+  if (luma_px->y > 0) {
+    // Get the number of reference pixels based on the PU coordinate within the LCU.
+    int px_available_top = num_ref_pixels_toplcu_px.y / 4lcu_px.x / 4 >> is_chroma;
 
-    // Extend the Main reference to the left.
-    for (k = -1; k > blk_size * intra_pred_angle>>5; k--) {
-      invAngleSum += inv_angle;
-      ref_maink = ref_sideinvAngleSum>>8;
-    }
-  } else {
-    for (k = 0; k < 2 * blk_size + 1; k++) {
-      ref_abovek = srck - src_stride - 1;
-      ref_leftk  = src(k - 1) * src_stride - 1;
-    }
-    ref_main = mode_ver ? ref_above : ref_left;
-    ref_side = mode_ver ? ref_left  : ref_above;
-  }
+    // Limit the number of available pixels based on block size and dimensions
+    // of the picture.
+    px_available_top = MIN(px_available_top, width * 2);
+    px_available_top = MIN(px_available_top, (pic_px->x - luma_px->x) >> is_chroma);
 
-  if (intra_pred_angle == 0) {
-    for (k = 0; k < blk_size; k++) {
-      for (l = 0; l < blk_size; l++) {
-        dstk * dst_stride + l = ref_mainl + 1;
-      }
+    // Copy all the pixels we can.
+    for (int i = 0; i < px_available_top; ++i) {
+      out_top_refi + 1 = top_borderi;
     }
-
-    if (filter) {
-      for (k=0;k<blk_size;k++) {
-        dstk * dst_stride = CLIP(0, (1<<encoder->bitdepth) - 1, dstk * dst_stride + (( ref_sidek + 1 - ref_side0) >> 1));
-      }
+    // Extend the last pixel for the rest of the reference values.
+    kvz_pixel nearest_pixel = top_borderpx_available_top - 1;
+    for (int i = px_available_top; i < width * 2; ++i) {
+      out_top_refi + 1 = nearest_pixel;
     }
   } else {
-    int32_t delta_pos=0;
-    int32_t delta_int;
-    int32_t delta_fract;
-    int32_t minus_delta_fract;
-    int32_t ref_main_index;
-    for (k = 0; k < blk_size; k++) {
-      delta_pos += intra_pred_angle;
-      delta_int   = delta_pos >> 5;
-      delta_fract = delta_pos & (32 - 1);
-
-
-      if (delta_fract) {
-        minus_delta_fract = (32 - delta_fract);
-        // Do linear filtering
-        for (l = 0; l < blk_size; l++) {
-          ref_main_index        = l + delta_int + 1;
-          dstk * dst_stride + l = (kvz_pixel) ( (minus_delta_fract * ref_mainref_main_index
-                                                 + delta_fract * ref_mainref_main_index + 1 + 16) >> 5);
-        }
-      } else {
-        // Just copy the integer samples
-        for (l = 0; l < blk_size; l++) {
-          dstk * dst_stride + l = ref_mainl + delta_int + 1;
-        }
-      }
-    }
-  }
-
-  // Flip the block if this is the horizontal mode
-  if (!mode_ver) {
-    kvz_pixel tmp;
-    for (k=0;k<blk_size-1;k++) {
-      for (l=k+1;l<blk_size;l++) {
-        tmp                 = dstk * dst_stride + l;
-        dstk * dst_stride + l = dstl * dst_stride + k;
-        dstl * dst_stride + k = tmp;
-      }
+    // Extend nearest pixel.
+    kvz_pixel nearest_pixel = luma_px->x > 0 ? left_border0 : dc_val;
+    for (int i = 0; i < width * 2; i++) {
+      out_top_refi + 1 = nearest_pixel;
     }
   }
 }
 
 
-
-
-void kvz_intra_dc_pred_filtering(const kvz_pixel *src, int32_t src_stride, kvz_pixel *dst, int32_t dst_stride, int32_t width, int32_t height )
+void kvz_intra_recon_lcu_luma(
+  encoder_state_t *const state,
+  int x,
+  int y,
+  int depth,
+  int8_t intra_mode,
+  cu_info_t *cur_cu,
+  lcu_t *lcu)
 {
-  int32_t x, y, dst_stride2, src_stride2;
-
-  // boundary pixels processing
-  dst0 = ((src-src_stride + src-1 + 2 * dst0 + 2) >> 2);
-
-  for (x = 1; x < width; x++) {
-    dstx = ((srcx - src_stride +  3 * dstx + 2) >> 2);
-  }
-  for ( y = 1, dst_stride2 = dst_stride, src_stride2 = src_stride-1;
-        y < height; y++, dst_stride2+=dst_stride, src_stride2+=src_stride ) {
-    dstdst_stride2 = ((srcsrc_stride2 + 3 * dstdst_stride2 + 2) >> 2);
-  }
-  return;
-}
-
-/**
- * \brief Function for deriving planar intra prediction.
- * \param src source pixel array
- * \param srcstride source width
- * \param width block size to predict
- * \param dst destination buffer for prediction
- * \param dststride destination width
-
-  This function derives the prediction samples for planar mode (intra coding).
-*/
-void kvz_intra_get_planar_pred(const kvz_pixel* src, int32_t srcstride, uint32_t width, kvz_pixel* dst, int32_t dststride)
-{
-  int32_t k, l, bottom_left, top_right;
-  int32_t hor_pred;
-  int32_t left_columnLCU_WIDTH+1, top_rowLCU_WIDTH+1, bottom_rowLCU_WIDTH+1, right_columnLCU_WIDTH+1;
-  uint32_t blk_size = width;
-  uint32_t offset_2d = width;
-  uint32_t shift_1d = kvz_g_convert_to_bit width  + 2;
-  uint32_t shift_2d = shift_1d + 1;
-
-  // Get left and above reference column and row
-  for (k = 0; k < (int32_t)blk_size + 1; k++) {
-    top_rowk = srck - srcstride;
-    left_columnk = srck * srcstride - 1;
-  }
-
-  // Prepare intermediate variables used in interpolation
-  bottom_left = left_columnblk_size;
-  top_right   = top_rowblk_size;
-  for (k = 0; k < (int32_t)blk_size; k++) {
-    bottom_rowk   = bottom_left - top_rowk;
-    right_columnk = top_right   - left_columnk;
-    top_rowk      <<= shift_1d;
-    left_columnk  <<= shift_1d;
-  }
-
-  // Generate prediction signal
-  for (k = 0; k < (int32_t)blk_size; k++) {
-    hor_pred = left_columnk + offset_2d;
-    for (l = 0; l < (int32_t)blk_size; l++) {
-      hor_pred += right_columnk;
-      top_rowl += bottom_rowl;
-      dstk * dststride + l = (kvz_pixel)((hor_pred + top_rowl) >> shift_2d);
-    }
-  }
-}
-
-void kvz_intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu)
-{
-  const encoder_control_t * const encoder = state->encoder_control;
   const vector2d_t lcu_px = { x & 0x3f, y & 0x3f };
   if (cur_cu == NULL) {
     cur_cu = &lcu->cuLCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH;
@@ -684,30 +509,33 @@
 
     return;
   }
-  {
-    const uint32_t pic_width = state->tile->frame->width;
-    const uint32_t pic_height = state->tile->frame->height;
-
-    // Pointers to reconstruction arrays
-    kvz_pixel *recbase_y = &lcu->rec.ylcu_px.x + lcu_px.y * LCU_WIDTH;
 
-    kvz_pixel rec(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8);
-    kvz_pixel *rec_shift  = &recwidth * 2 + 8 + 1;
+  // Perform intra prediction and put the result in correct place lcu.
+  vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height };
+  vector2d_t luma_px = { x, y };
+  kvz_intra_references refs;
+  const int_fast8_t log2_width = kvz_g_convert_to_bitwidth + 2;
+  kvz_intra_build_reference(log2_width, COLOR_Y, &luma_px, &pic_px, lcu, &refs);
 
-    int32_t rec_stride = LCU_WIDTH;
+  kvz_pixel pred32 * 32;
+  kvz_intra_predict(&refs, log2_width, intra_mode, COLOR_Y, pred);
+  
+  kvz_pixel *block_in_lcu = &lcu->rec.ylcu_px.x + lcu_px.y * LCU_WIDTH;
+  kvz_pixels_blit(pred, block_in_lcu, width, width, width, LCU_WIDTH);
 
-    kvz_intra_build_reference_border(encoder, x, y,(int16_t)width * 2 + 8, rec, (int16_t)width * 2 + 8, 0,
-                                 pic_width, pic_height, lcu);
-    kvz_intra_recon(encoder, rec_shift, width * 2 + 8,
-                width, recbase_y, rec_stride, intra_mode, 0);
-
-    kvz_quantize_lcu_luma_residual(state, x, y, depth, cur_cu, lcu);
-  }
+  kvz_quantize_lcu_luma_residual(state, x, y, depth, cur_cu, lcu);
 }
 
-void kvz_intra_recon_lcu_chroma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu)
+
+void kvz_intra_recon_lcu_chroma(
+  encoder_state_t *const state,
+  int x,
+  int y,
+  int depth,
+  int8_t intra_mode,
+  cu_info_t *cur_cu,
+  lcu_t *lcu)
 {
-  const encoder_control_t * const encoder = state->encoder_control;
   const vector2d_t lcu_px = { x & 0x3f, y & 0x3f };
   const int8_t width = LCU_WIDTH >> depth;
   const int8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2);
@@ -739,44 +567,35 @@
     return;
   }
 
-  {
-    const uint32_t pic_width = state->tile->frame->width;
-    const uint32_t pic_height = state->tile->frame->height;
-
-    // Pointers to reconstruction arrays
-    kvz_pixel *recbase_u = &lcu->rec.ulcu_px.x/2 + (lcu_px.y * LCU_WIDTH)/4;
-    kvz_pixel *recbase_v = &lcu->rec.vlcu_px.x/2 + (lcu_px.y * LCU_WIDTH)/4;
-
-    kvz_pixel rec(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8);
-
-    int32_t rec_stride = LCU_WIDTH;
-
-    // Reconstruct chroma.
-    if (!(x & 4 || y & 4)) {
-      kvz_pixel *rec_shift_c  = &recwidth_c * 2 + 8 + 1;
-      kvz_intra_build_reference_border(encoder, x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 1,
-                                   pic_width/2, pic_height/2, lcu);
-      kvz_intra_recon(encoder,
-                  rec_shift_c,
-                  width_c * 2 + 8,
-                  width_c,
-                  recbase_u,
-                  rec_stride >> 1,
-                  intra_mode,
-                  1);
-
-      kvz_intra_build_reference_border(encoder, x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 2,
-                                   pic_width/2, pic_height/2, lcu);
-      kvz_intra_recon(encoder,
-                  rec_shift_c,
-                  width_c * 2 + 8,
-                  width_c,
-                  recbase_v,
-                  rec_stride >> 1,
-                  intra_mode,
-                  2);
-
-      kvz_quantize_lcu_chroma_residual(state, x, y, depth, cur_cu, lcu);
+  if (!(x & 4 || y & 4)) {
+    const int_fast8_t log2_width_c = kvz_g_convert_to_bitwidth_c + 2;
+    const vector2d_t luma_px = { x, y };
+    const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height };
+
+    // Intra predict U-plane and put the result in lcu buffer.
+    {
+      kvz_intra_references refs;
+      kvz_intra_build_reference(log2_width_c, COLOR_U, &luma_px, &pic_px, lcu, &refs);
+
+      kvz_pixel pred32 * 32;
+      kvz_intra_predict(&refs, log2_width_c, intra_mode, COLOR_U, pred);
+
+      kvz_pixel *pu_in_lcu = &lcu->rec.ulcu_px.x / 2 + (lcu_px.y * LCU_WIDTH) / 4;
+      kvz_pixels_blit(pred, pu_in_lcu, width_c, width_c, width_c, LCU_WIDTH_C);
     }
+
+    // Intra predict V-plane and put the result in lcu buffer.
+    {
+      kvz_intra_references refs;
+      kvz_intra_build_reference(log2_width_c, COLOR_V, &luma_px, &pic_px, lcu, &refs);
+      
+      kvz_pixel pred32 * 32;
+      kvz_intra_predict(&refs, log2_width_c, intra_mode, COLOR_V, pred);
+
+      kvz_pixel *pu_in_lcu = &lcu->rec.vlcu_px.x / 2 + (lcu_px.y * LCU_WIDTH) / 4;
+      kvz_pixels_blit(pred, pu_in_lcu, width_c, width_c, width_c, LCU_WIDTH_C);
+    }
+
+    kvz_quantize_lcu_chroma_residual(state, x, y, depth, cur_cu, lcu);
   }
 }

kvazaar-0.7.0.tar.gz/src/intra.h -> kvazaar-0.7.1.tar.gz/src/intra.h Changed

@@ -27,29 +27,91 @@
 
 #include "global.h"
 
-#include "image.h"
-#include "encoder.h"
 #include "encoderstate.h"
 
-//void kvz_intra_set_block_mode(image* im,uint32_t x_ctb, uint32_t y_ctb, uint8_t depth, uint8_t mode, uint8_t part_mode);
+typedef struct {
+  kvz_pixel left2 * 32 + 1;
+  kvz_pixel top2 * 32 + 1;
+} kvz_intra_ref;
+typedef struct
+{
+  kvz_intra_ref ref;
+  kvz_intra_ref filtered_ref;
+  bool filtered_initialized;
+} kvz_intra_references;
 
-int8_t kvz_intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds,
-                                    const cu_info_t* cur_cu, const cu_info_t* left_cu, const cu_info_t* above_cu);
-void kvz_intra_dc_pred_filtering(const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t height );
 
-void kvz_intra_build_reference_border(const encoder_control_t *encoder, int32_t x_luma, int32_t y_luma, int16_t out_width, kvz_pixel *dst, int32_t dst_stride, int8_t chroma, int32_t pic_width, int32_t pic_height, lcu_t *lcu);
-void kvz_intra_filter(kvz_pixel* ref, int32_t stride, int32_t width, int8_t mode);
+/**
+* \brief Function for deriving intra luma predictions
+* \param pic picture to use
+* \param x_cu x CU position (smallest CU)
+* \param y_cu y CU position (smallest CU)
+* \param preds output buffer for 3 predictions
+* \returns (predictions are found)?1:0
+*/
+int8_t kvz_intra_get_dir_luma_predictor(
+  const uint32_t x,
+  const uint32_t y,
+  int8_t *preds,
+  const cu_info_t *const cur_cu,
+  const cu_info_t *const left_cu,
+  const cu_info_t *const above_cu);
 
-/* Predictions */
-void kvz_intra_get_pred(const encoder_control_t * const encoder, const kvz_pixel *rec, const kvz_pixel *rec_filtered, int recstride, kvz_pixel *dst, int width, int mode, int is_chroma);
+/**
+* \brief Generage angular predictions.
+* \param width    Width in pixels, range 4..32.
+* \param color    What color pixels to use.
+* \param luma_px  Luma coordinates of the prediction block.
+* \param pic_px   Picture dimensions in luma pixels.
+* \param lcu      LCU struct.
+* \param out_left_ref  Left reference pixels, index 0 is the top-left.
+* \param out_top_ref   Top reference pixels, index 0 is the top-left.
+*/
+void kvz_intra_build_reference(
+  const int_fast8_t log2_width,
+  const color_t color,
+  const vector2d_t *const luma_px,
+  const vector2d_t *const pic_px,
+  const lcu_t *const lcu,
+  kvz_intra_references *const refs);
 
-kvz_pixel kvz_intra_get_dc_pred(const kvz_pixel* pic, uint16_t pic_width, uint8_t width);
-void kvz_intra_get_planar_pred(const kvz_pixel* src,int32_t srcstride, uint32_t width, kvz_pixel* dst, int32_t dststride);
-void kvz_intra_get_angular_pred(const encoder_control_t *encoder, const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter);
+/**
+ * \brief Generate intra predictions.
+ * \param refs   Reference pixels used for the prediction.     
+ * \param log2_width  Width of the predicted block.
+ * \param mode   Intra mode used for the prediction.
+ * \param color  Color of the prediction.
+ * \param dst    Buffer for the predicted pixels.
+ */
+void kvz_intra_predict(
+  kvz_intra_references *refs,
+  int_fast8_t log2_width,
+  int_fast8_t mode,
+  color_t color,
+  kvz_pixel *dst);
 
-void kvz_intra_recon(const encoder_control_t *encoder, kvz_pixel* rec, int32_t rec_stride, uint32_t width, kvz_pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma);
+/**
+ * \brief Do a full intra prediction cycle on a CU in lcu for luma.
+ */
+void kvz_intra_recon_lcu_luma(
+  encoder_state_t *const state,
+  int x,
+  int y,
+  int depth,
+  int8_t intra_mode,
+  cu_info_t *cur_cu,
+  lcu_t *lcu);
 
-void kvz_intra_recon_lcu_luma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
-void kvz_intra_recon_lcu_chroma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
+/**
+* \brief Do a full intra prediction cycle on a CU in lcu for chroma.
+*/
+void kvz_intra_recon_lcu_chroma(
+  encoder_state_t *const state,
+  int x,
+  int y,
+  int depth,
+  int8_t intra_mode,
+  cu_info_t *cur_cu,
+  lcu_t *lcu);
 
 #endif

kvazaar-0.7.0.tar.gz/src/kvazaar.c -> kvazaar-0.7.1.tar.gz/src/kvazaar.c Changed

kvazaar-0.7.0.tar.gz/src/kvazaar.h -> kvazaar-0.7.1.tar.gz/src/kvazaar.h Changed

@@ -106,7 +106,18 @@
 {
   int32_t qp;        /*!< \brief Quantization parameter */
   int32_t intra_period; /*!< \brief the period of intra frames in stream */
-  int32_t vps_period; /*!< \brief how often the vps is re-sent */
+
+  /** \brief How often the VPS, SPS and PPS are re-sent
+   *
+   * -1: never
+   *  0: first frame only
+   *  1: every intra frame
+   *  2: every other intra frame
+   *  3: every third intra frame
+   *  and so on
+   */
+  int32_t vps_period;
+
   int32_t width;   /*!< \brief frame width, must be a multiple of 8 */
   int32_t height;  /*!< \brief frame height, must be a multiple of 8 */
   double framerate; /*!< \brief Input framerate */
@@ -410,6 +421,26 @@
   void          (*encoder_close)(kvz_encoder *encoder);
 
   /**
+   * \brief Get parameter sets.
+   *
+   * Encode the VPS, SPS and PPS.
+   *
+   * If data_out is set to non-NULL values, the caller is responsible for
+   * calling chunk_free on it.
+   *
+   * A null pointer may be passed in place of the parameter data_out or len_out
+   * to skip returning the corresponding value.
+   *
+   * \param encoder   encoder
+   * \param data_out  Returns the encoded parameter sets.
+   * \param len_out   Returns number of bytes in the encoded data.
+   * \return          1 on success, 0 on error.
+   */
+  int           (*encoder_headers)(kvz_encoder *encoder,
+                                   kvz_data_chunk **data_out,
+                                   uint32_t *len_out);
+
+  /**
    * \brief Encode one frame.
    *
    * Add pic_in to the encoding pipeline. If an encoded frame is ready, return

kvazaar-0.7.0.tar.gz/src/kvazaar_version.h -> kvazaar-0.7.1.tar.gz/src/kvazaar_version.h Changed

kvazaar-0.7.0.tar.gz/src/rdo.c -> kvazaar-0.7.1.tar.gz/src/rdo.c Changed

kvazaar-0.7.0.tar.gz/src/search_intra.c -> kvazaar-0.7.1.tar.gz/src/search_intra.c Changed

@@ -271,14 +271,14 @@
 static void search_intra_chroma_rough(encoder_state_t * const state,
                                       int x_px, int y_px, int depth,
                                       const kvz_pixel *orig_u, const kvz_pixel *orig_v, int16_t origstride,
-                                      const kvz_pixel *rec_u, const kvz_pixel *rec_v, int16_t recstride,
+                                      kvz_intra_references *refs_u, kvz_intra_references *refs_v,
                                       int8_t luma_mode,
                                       int8_t modes5, double costs5)
 {
-  const bool reconstruct_chroma = !(x_px & 4 || y_px & 4);
-  if (!reconstruct_chroma) return;
+  assert(!(x_px & 4 || y_px & 4));
 
   const unsigned width = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH);
+  const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - (depth + 1), 2);
 
   for (int i = 0; i < 5; ++i) {
     costsi = 0;
@@ -287,16 +287,16 @@
   cost_pixel_nxn_func *const satd_func = kvz_pixels_get_satd_func(width);
   //cost_pixel_nxn_func *const sad_func = kvz_pixels_get_sad_func(width);
 
-  kvz_pixel _predLCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT;
+  kvz_pixel _pred32 * 32 + SIMD_ALIGNMENT;
   kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT);
 
-  kvz_pixel _orig_blockLCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT;
+  kvz_pixel _orig_block32 * 32 + SIMD_ALIGNMENT;
   kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
 
   kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width);
   for (int i = 0; i < 5; ++i) {
     if (modesi == luma_mode) continue;
-    kvz_intra_get_pred(state->encoder_control, rec_u, NULL, recstride, pred, width, modesi, 1);
+    kvz_intra_predict(refs_u, log2_width_c, modesi, COLOR_U, pred);
     //costsi += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
     costsi += satd_func(pred, orig_block);
   }
@@ -304,7 +304,7 @@
   kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width);
   for (int i = 0; i < 5; ++i) {
     if (modesi == luma_mode) continue;
-    kvz_intra_get_pred(state->encoder_control, rec_v, NULL, recstride, pred, width, modesi, 2);
+    kvz_intra_predict(refs_v, log2_width_c, modesi, COLOR_V, pred);
     //costsi += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
     costsi += satd_func(pred, orig_block);
   }
@@ -343,41 +343,25 @@
  */
 static int8_t search_intra_rough(encoder_state_t * const state, 
                                  kvz_pixel *orig, int32_t origstride,
-                                 kvz_pixel *rec, int16_t recstride,
-                                 int width, int8_t *intra_preds,
+                                 kvz_intra_references *refs,
+                                 int log2_width, int8_t *intra_preds,
                                  int8_t modes35, double costs35)
 {
+  assert(log2_width >= 2 && log2_width <= 5);
+  int_fast8_t width = 1 << log2_width;
   cost_pixel_nxn_func *satd_func = kvz_pixels_get_satd_func(width);
   cost_pixel_nxn_func *sad_func = kvz_pixels_get_sad_func(width);
 
   // Temporary block arrays
-  kvz_pixel _predLCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT;
+  kvz_pixel _pred32 * 32 + SIMD_ALIGNMENT;
   kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT);
   
-  kvz_pixel _orig_blockLCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT;
+  kvz_pixel _orig_block32 * 32 + SIMD_ALIGNMENT;
   kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
-  
-  kvz_pixel rec_filtered_temp(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1;
-
-  kvz_pixel *recf = &rec_filtered_temprecstride + 1;
-
-  assert(width == 4 || width == 8 || width == 16 || width == 32);
 
   // Store original block for SAD computation
   kvz_pixels_blit(orig, orig_block, width, width, origstride, width);
 
-  // Generate filtered reference pixels.
-  {
-    int16_t x, y;
-    for (y = -1; y < recstride; y++) {
-      recfy*recstride - 1 = recy*recstride - 1;
-    }
-    for (x = 0; x < recstride; x++) {
-      recfx - recstride = recx - recstride;
-    }
-    kvz_intra_filter(recf, recstride, width, 0);
-  }
-  
   int8_t modes_selected = 0;
   unsigned min_cost = UINT_MAX;
   unsigned max_cost = 0;
@@ -387,18 +371,15 @@
   int offset;
   if (state->encoder_control->full_intra_search) {
     offset = 1;
-  } else if (width == 4) {
-    offset = 2;
-  } else if (width == 8) {
-    offset = 4;
   } else {
-    offset = 8;
+    static const int8_t offsets4 = { 2, 4, 8, 8 };
+    offset = offsetslog2_width - 2;
   }
 
   // Calculate SAD for evenly spaced modes to select the starting point for 
   // the recursive search.
   for (int mode = 2; mode <= 34; mode += offset) {
-    kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0);
+    kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred);
     costsmodes_selected = get_cost(state, pred, orig_block, satd_func, sad_func, width);
     modesmodes_selected = mode;
 
@@ -421,7 +402,7 @@
       int8_t center_node = best_mode;
       int8_t mode = center_node - offset;
       if (mode >= 2) {
-        kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0);
+        kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred);
         costsmodes_selected = get_cost(state, pred, orig_block, satd_func, sad_func, width);
         modesmodes_selected = mode;
         if (costsmodes_selected < best_cost) {
@@ -433,7 +414,7 @@
 
       mode = center_node + offset;
       if (mode <= 34) {
-        kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0);
+        kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred);
         costsmodes_selected = get_cost(state, pred, orig_block, satd_func, sad_func, width);
         modesmodes_selected = mode;
         if (costsmodes_selected < best_cost) {
@@ -460,7 +441,7 @@
     }
 
     if (!has_mode) {
-      kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0);
+      kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred);
       costsmodes_selected = get_cost(state, pred, orig_block, satd_func, sad_func, width);
       modesmodes_selected = mode;
       ++modes_selected;
@@ -507,7 +488,6 @@
 static int8_t search_intra_rdo(encoder_state_t * const state, 
                              int x_px, int y_px, int depth,
                              kvz_pixel *orig, int32_t origstride,
-                             kvz_pixel *rec, int16_t recstride,
                              int8_t *intra_preds,
                              int modes_to_check,
                              int8_t modes35, double costs35,
@@ -517,31 +497,14 @@
   const int width = LCU_WIDTH >> depth;
 
   kvz_pixel orig_blockLCU_WIDTH * LCU_WIDTH + 1;
-  int rdo_mode;
-  int pred_mode;
-
-  kvz_pixel rec_filtered_temp(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1;
-  kvz_pixel *recf = &rec_filtered_temprecstride + 1;
-
-  // Generate filtered reference pixels.
-  {
-    int x, y;
-    for (y = -1; y < recstride; y++) {
-      recfy*recstride - 1 = recy*recstride - 1;
-    }
-    for (x = 0; x < recstride; x++) {
-      recfx - recstride = recx - recstride;
-    }
-    kvz_intra_filter(recf, recstride, width, 0);
-  }
 
   kvz_pixels_blit(orig, orig_block, width, width, origstride, width);
 
   // Check that the predicted modes are in the RDO mode list
   if (modes_to_check < 35) {
-    for (pred_mode = 0; pred_mode < 3; pred_mode++) {
+    for (int pred_mode = 0; pred_mode < 3; pred_mode++) {
       int mode_found = 0;
-      for (rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode++) {
+      for (int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode++) {
         if (intra_predspred_mode == modesrdo_mode) {
           mode_found = 1;
           break;
@@ -555,42 +518,27 @@
     }
   }
 
-  for(rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) {
+  for(int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) {
     int rdo_bitcost = kvz_luma_mode_bits(state, modesrdo_mode, intra_preds);
     costsrdo_mode = rdo_bitcost * (int)(state->global->cur_lambda_cost + 0.5);
-#if 0
-    if (width != 4 && tr_depth == depth) {
-      // This code path has been disabled for now because it increases bdrate
-      // by 1-2 %. Possibly due to not taking chroma into account during luma
-      // mode search. Enabling separate chroma search compensates a little,
-      // but not enough.
-
-      // The idea for this code path is, that it would do the same thing as
-      // the more general search_intra_trdepth, but would only handle cases
-      // where transform split or transform skip don't need to be handled.
-      kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, modesrdo_mode, 0);
-      costsrdo_mode += kvz_rdo_cost_intra(state, pred, orig_block, width, modesrdo_mode, width == 4 ? 1 : 0);
-    } else 
-#endif
-	{
-      // Perform transform split search and save mode RD cost for the best one.
-      cu_info_t pred_cu;
-      pred_cu.depth = depth;
-      pred_cu.type = CU_INTRA;
-      pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N);
-      pred_cu.intra0.mode = modesrdo_mode;
-      pred_cu.intra1.mode = modesrdo_mode;
-      pred_cu.intra2.mode = modesrdo_mode;
-      pred_cu.intra3.mode = modesrdo_mode;
-      pred_cu.intra0.mode_chroma = modesrdo_mode;
-      FILL(pred_cu.cbf, 0);
-
-      // Reset transform split data in lcu.cu for this area.
-      kvz_lcu_set_trdepth(lcu, x_px, y_px, depth, depth);
-
-      double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modesrdo_mode, MAX_INT, &pred_cu, lcu);
-      costsrdo_mode += mode_cost;
-    }
+
+    // Perform transform split search and save mode RD cost for the best one.
+    cu_info_t pred_cu;
+    pred_cu.depth = depth;
+    pred_cu.type = CU_INTRA;
+    pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N);
+    pred_cu.intra0.mode = modesrdo_mode;
+    pred_cu.intra1.mode = modesrdo_mode;
+    pred_cu.intra2.mode = modesrdo_mode;
+    pred_cu.intra3.mode = modesrdo_mode;
+    pred_cu.intra0.mode_chroma = modesrdo_mode;
+    FILL(pred_cu.cbf, 0);
+
+    // Reset transform split data in lcu.cu for this area.
+    kvz_lcu_set_trdepth(lcu, x_px, y_px, depth, depth);
+
+    double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modesrdo_mode, MAX_INT, &pred_cu, lcu);
+    costsrdo_mode += mode_cost;
   }
 
   // The best transform split hierarchy is not saved anywhere, so to get the
@@ -697,7 +645,6 @@
                               const int x_px, const int y_px,
                               const int depth, lcu_t *lcu)
 {
-  const videoframe_t * const frame = state->tile->frame;
   const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f };
   const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 };
   const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH;
@@ -726,23 +673,15 @@
   // FIXME: It might make more sense to only disable rough search if
   // num_modes is 0.is 0.
   if (num_modes != 1 && num_modes != 5) {
-    kvz_pixel rec_u(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8);
-    kvz_pixel rec_v(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8);
-
-    const int16_t width_c = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH);
-    const int16_t rec_stride = width_c * 2 + 8;
-    const int16_t out_stride = rec_stride;
-
-    kvz_intra_build_reference_border(state->encoder_control,
-                                 x_px, y_px, out_stride,
-                                 rec_u, rec_stride, COLOR_U,
-                                 frame->width / 2, frame->height / 2,
-                                 lcu);
-    kvz_intra_build_reference_border(state->encoder_control,
-                                 x_px, y_px, out_stride,
-                                 rec_v, rec_stride, COLOR_V,
-                                 frame->width / 2, frame->height / 2,
-                                 lcu);
+    const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - depth - 1, 2);
+    const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height };
+    const vector2d_t luma_px = { x_px, y_px };
+
+    kvz_intra_references refs_u;
+    kvz_intra_build_reference(log2_width_c, COLOR_U, &luma_px, &pic_px, lcu, &refs_u);
+
+    kvz_intra_references refs_v;
+    kvz_intra_build_reference(log2_width_c, COLOR_V, &luma_px, &pic_px, lcu, &refs_v);
 
     vector2d_t lcu_cpx = { lcu_px.x / 2, lcu_px.y / 2 };
     kvz_pixel *ref_u = &lcu->ref.ulcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C;
@@ -750,7 +689,7 @@
 
     search_intra_chroma_rough(state, x_px, y_px, depth,
                               ref_u, ref_v, LCU_WIDTH_C,
-                              &rec_urec_stride + 1, &rec_vrec_stride + 1, rec_stride,
+                              &refs_u, &refs_v,
                               intra_mode, modes, costs);
   }
 
@@ -771,16 +710,15 @@
                            const int x_px, const int y_px,
                            const int depth, lcu_t *lcu)
 {
-  const videoframe_t * const frame = state->tile->frame;
   const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f };
   const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 };
   const int8_t cu_width = (LCU_WIDTH >> (depth));
   const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH;
+  const int_fast8_t log2_width = LOG2_LCU_WIDTH - depth;
 
   cu_info_t *cur_cu = &lcu->cucu_index;
 
-  kvz_pixel rec_buffer(LCU_WIDTH * 2 + 1) * (LCU_WIDTH * 2 + 1);
-  kvz_pixel *cu_in_rec_buffer = &rec_buffercu_width * 2 + 8 + 1;
+  kvz_intra_references refs;
 
   int8_t candidate_modes3;
 
@@ -798,12 +736,9 @@
   kvz_intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu);
 
   if (depth > 0) {
-  // Build reconstructed block to use in prediction with extrapolated borders
-  kvz_intra_build_reference_border(state->encoder_control, x_px, y_px, cu_width * 2 + 8,
-                               rec_buffer, cu_width * 2 + 8, 0,
-                               frame->width,
-                               frame->height,
-                               lcu);
+    const vector2d_t luma_px = { x_px, y_px };
+    const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height };
+    kvz_intra_build_reference(log2_width, COLOR_Y, &luma_px, &pic_px, lcu, &refs);
   }
 
   int8_t modes35;
@@ -817,10 +752,10 @@
   bool skip_rough_search = (depth == 0 || state->encoder_control->rdo >= 3);
   if (!skip_rough_search) {
     number_of_modes = search_intra_rough(state,
-                                              ref_pixels, LCU_WIDTH,
-                                              cu_in_rec_buffer, cu_width * 2 + 8,
-                                              cu_width, candidate_modes,
-                                              modes, costs);
+                                         ref_pixels, LCU_WIDTH,
+                                         &refs,
+                                         log2_width, candidate_modes,
+                                         modes, costs);
   } else {
     number_of_modes = 35;
     for (int i = 0; i < number_of_modes; ++i) {
@@ -849,7 +784,6 @@
     number_of_modes = search_intra_rdo(state,
                       x_px, y_px, depth,
                       ref_pixels, LCU_WIDTH,
-                      cu_in_rec_buffer, cu_width * 2 + 8,
                       candidate_modes,
                       num_modes_to_check,
                       modes, costs, lcu);

kvazaar-0.7.1.tar.gz/src/strategies/avx2/intra-avx2.c Added

@@ -0,0 +1,176 @@
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+/*
+ * \file
+ */
+
+#include <stdlib.h>
+
+#include "intra-avx2.h"
+#include "strategyselector.h"
+
+#if COMPILE_INTEL_AVX2
+#include <immintrin.h>
+
+ /**
+ * \brief Generage angular predictions.
+ * \param log2_width    Log2 of width, range 2..5.
+ * \param intra_mode    Angular mode in range 2..34.
+ * \param in_ref_above  Pointer to -1 index of above reference, length=width*2+1.
+ * \param in_ref_left   Pointer to -1 index of left reference, length=width*2+1.
+ * \param dst           Buffer of size width*width.
+ */
+static void kvz_angular_pred_avx2(
+  const int_fast8_t log2_width,
+  const int_fast8_t intra_mode,
+  const kvz_pixel *const in_ref_above,
+  const kvz_pixel *const in_ref_left,
+  kvz_pixel *const dst)
+{
+  assert(log2_width >= 2 && log2_width <= 5);
+  assert(intra_mode >= 2 && intra_mode <= 34);
+
+  static const int8_t modedisp2sampledisp9 = { 0, 2, 5, 9, 13, 17, 21, 26, 32 };
+  static const int16_t modedisp2invsampledisp9 = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / sampledisp
+
+                                                    // Temporary buffer for modes 11-25.
+                                                    // It only needs to be big enough to hold indices from -width to width-1.
+  kvz_pixel tmp_ref2 * 32;
+  const int_fast8_t width = 1 << log2_width;
+
+  // Whether to swap references to always project on the left reference row.
+  const bool vertical_mode = intra_mode >= 18;
+  // Modes distance to horizontal or vertical mode.
+  const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode;
+  // Sample displacement per column in fractions of 32.
+  const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledispabs(mode_disp);
+
+  // Pointer for the reference we are interpolating from.
+  const kvz_pixel *ref_main;
+  // Pointer for the other reference.
+  const kvz_pixel *ref_side;
+
+  // Set ref_main and ref_side such that, when indexed with 0, they point to
+  // index 0 in block coordinates.
+  if (sample_disp < 0) {
+    // Negative sample_disp means, we need to use both references.
+
+    ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
+    ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
+
+    // Move the reference pixels to start from the middle to the later half of
+    // the tmp_ref, so there is room for negative indices.
+    for (int_fast8_t x = -1; x < width; ++x) {
+      tmp_refx + width = ref_mainx;
+    }
+    // Get a pointer to block index 0 in tmp_ref.
+    ref_main = &tmp_refwidth;
+
+    // Extend the side reference to the negative indices of main reference.
+    int_fast32_t col_sample_disp = 128; // rounding for the ">> 8"
+    int_fast16_t inv_abs_sample_disp = modedisp2invsampledispabs(mode_disp);
+    int_fast8_t most_negative_index = (width * sample_disp) >> 5;
+    for (int_fast8_t x = -2; x >= most_negative_index; --x) {
+      col_sample_disp += inv_abs_sample_disp;
+      int_fast8_t side_index = col_sample_disp >> 8;
+      tmp_refx + width = ref_sideside_index - 1;
+    }
+  }
+  else {
+    // sample_disp >= 0 means we don't need to refer to negative indices,
+    // which means we can just use the references as is.
+    ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
+    ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
+  }
+
+  if (sample_disp != 0) {
+    // The mode is not horizontal or vertical, we have to do interpolation.
+
+    int_fast16_t delta_pos = 0;
+    for (int_fast8_t y = 0; y < width; ++y) {
+      delta_pos += sample_disp;
+      int_fast8_t delta_int = delta_pos >> 5;
+      int_fast8_t delta_fract = delta_pos & (32 - 1);
+
+      if (delta_fract) {
+        // Do linear filtering
+        if (width < 8) {
+          for (int_fast8_t x = 0; x < width; ++x) {
+            kvz_pixel ref1 = ref_mainx + delta_int;
+            kvz_pixel ref2 = ref_mainx + delta_int + 1;
+            dsty * width + x = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5;
+          }
+        } else {
+          struct { uint8_t w1; uint8_t w2; } packed_weights = { 32 - delta_fract, delta_fract };
+          __m128i v_weights = _mm_set1_epi16(*(int16_t*)&packed_weights);
+
+          for (int_fast8_t x = 0; x < width; x += 8) {
+            __m128i v_ref1 = _mm_loadl_epi64((__m128i*)&(ref_mainx + delta_int));
+            __m128i v_ref2 = _mm_loadl_epi64((__m128i*)&(ref_mainx + delta_int + 1));
+            __m128i v_refs = _mm_unpacklo_epi8(v_ref1, v_ref2);  
+            __m128i v_tmp = _mm_maddubs_epi16(v_refs, v_weights);
+            v_tmp = _mm_add_epi16(v_tmp, _mm_set1_epi16(16));
+            v_tmp = _mm_srli_epi16(v_tmp, 5);
+            v_tmp = _mm_packus_epi16(v_tmp, v_tmp);
+            _mm_storel_epi64((__m128i*)(dst + y * width + x), v_tmp);
+          }
+        }
+      }
+      else {
+        // Just copy the integer samples
+        for (int_fast8_t x = 0; x < width; x+=4) {
+          *(int32_t*)(&dsty * width + x) = *(int32_t*)(&ref_mainx + delta_int);
+        }
+      }
+    }
+  }
+  else {
+    // Mode is horizontal or vertical, just copy the pixels.
+
+    for (int_fast8_t y = 0; y < width; ++y) {
+      for (int_fast8_t x = 0; x < width; x+=4) {
+        *(int32_t*)&(dsty * width + x) = *(int32_t*)&(ref_mainx);
+      }
+    }
+  }
+
+  // Flip the block if this is was a horizontal mode.
+  if (!vertical_mode) {
+    for (int_fast8_t y = 0; y < width - 1; ++y) {
+      for (int_fast8_t x = y + 1; x < width; ++x) {
+        SWAP(dsty * width + x, dstx * width + y, kvz_pixel);
+      }
+    }
+  }
+}
+
+#endif //COMPILE_INTEL_AVX2
+
+int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth)
+{
+  bool success = true;
+#if COMPILE_INTEL_AVX2
+  if (bitdepth == 8) {
+    success &= kvz_strategyselector_register(opaque, "angular_pred", "avx2", 40, &kvz_angular_pred_avx2);
+  }
+#endif //COMPILE_INTEL_AVX2
+  return success;
+}

kvazaar-0.7.1.tar.gz/src/strategies/avx2/intra-avx2.h Added

@@ -0,0 +1,27 @@
+#ifndef STRATEGIES_INTRA_AVX2_H_
+#define STRATEGIES_INTRA_AVX2_H_
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+#include <stdint.h>
+#include "encoderstate.h"
+
+int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth);
+
+#endif //STRATEGIES_INTRA_AVX2_H_

kvazaar-0.7.0.tar.gz/src/strategies/avx2/picture-avx2.c -> kvazaar-0.7.1.tar.gz/src/strategies/avx2/picture-avx2.c Changed

@@ -26,6 +26,7 @@
 
 #if COMPILE_INTEL_AVX2
 #  include "image.h"
+#  include "strategies/strategies-common.h"
 #  include <immintrin.h>
 
 
@@ -136,6 +137,186 @@
   return m256i_horizontal_sum(sum0);
 }
 
+static unsigned satd_8bit_4x4_avx2(const kvz_pixel *org, const kvz_pixel *cur)
+{
+
+  __m128i original = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)org));
+  __m128i current = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)cur));
+
+  __m128i diff_lo = _mm_sub_epi16(current, original);
+
+  original = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)(org + 8)));
+  current = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)(cur + 8)));
+
+  __m128i diff_hi = _mm_sub_epi16(current, original);
+
+
+  //Hor
+  __m128i row0 = _mm_hadd_epi16(diff_lo, diff_hi);
+  __m128i row1 = _mm_hsub_epi16(diff_lo, diff_hi);
+
+  __m128i row2 = _mm_hadd_epi16(row0, row1);
+  __m128i row3 = _mm_hsub_epi16(row0, row1);
+
+  //Ver
+  row0 = _mm_hadd_epi16(row2, row3);
+  row1 = _mm_hsub_epi16(row2, row3);
+
+  row2 = _mm_hadd_epi16(row0, row1);
+  row3 = _mm_hsub_epi16(row0, row1);
+
+  //Abs and sum
+  row2 = _mm_abs_epi16(row2);
+  row3 = _mm_abs_epi16(row3);
+
+  row3 = _mm_add_epi16(row2, row3);
+
+  row3 = _mm_add_epi16(row3, _mm_shuffle_epi32(row3, KVZ_PERMUTE(2, 3, 0, 1) ));
+  row3 = _mm_add_epi16(row3, _mm_shuffle_epi32(row3, KVZ_PERMUTE(1, 0, 1, 0) ));
+  row3 = _mm_add_epi16(row3, _mm_shufflelo_epi16(row3, KVZ_PERMUTE(1, 0, 1, 0) ));
+
+  unsigned sum = _mm_extract_epi16(row3, 0);
+  unsigned satd = (sum + 1) >> 1;
+
+  return satd;
+}
+
+static void hor_add_sub_avx2(__m128i *row0, __m128i *row1){
+
+  __m128i a = _mm_hadd_epi16(*row0, *row1);
+  __m128i b = _mm_hsub_epi16(*row0, *row1);
+
+  __m128i c = _mm_hadd_epi16(a, b);
+  __m128i d = _mm_hsub_epi16(a, b);
+
+  *row0 = _mm_hadd_epi16(c, d);
+  *row1 = _mm_hsub_epi16(c, d);
+}
+
+static INLINE void ver_add_sub_avx2(__m128i temp_hor8, __m128i temp_ver8){
+
+  // First stage
+  for (int i = 0; i < 8; i += 2){
+    temp_veri+0 = _mm_hadd_epi16(temp_hori + 0, temp_hori + 1);
+    temp_veri+1 = _mm_hsub_epi16(temp_hori + 0, temp_hori + 1);
+  }
+
+  // Second stage
+  for (int i = 0; i < 8; i += 4){
+    temp_hori + 0 = _mm_add_epi16(temp_veri + 0, temp_veri + 2);
+    temp_hori + 1 = _mm_add_epi16(temp_veri + 1, temp_veri + 3);
+    temp_hori + 2 = _mm_sub_epi16(temp_veri + 0, temp_veri + 2);
+    temp_hori + 3 = _mm_sub_epi16(temp_veri + 1, temp_veri + 3);
+  }
+
+  // Third stage
+  for (int i = 0; i < 4; ++i){
+    temp_veri + 0 = _mm_add_epi16(temp_hor0 + i, temp_hor4 + i);
+    temp_veri + 4 = _mm_sub_epi16(temp_hor0 + i, temp_hor4 + i);
+  }
+}
+
+INLINE static void haddwd_accumulate_avx2(__m128i *accumulate, __m128i *ver_row)
+{
+  __m128i abs_value = _mm_abs_epi16(*ver_row);
+  *accumulate = _mm_add_epi32(*accumulate, _mm_madd_epi16(abs_value, _mm_set1_epi16(1)));
+}
+
+INLINE static unsigned sum_block_avx2(__m128i *ver_row)
+{
+  __m128i sad = _mm_setzero_si128();
+  haddwd_accumulate_avx2(&sad, ver_row + 0);
+  haddwd_accumulate_avx2(&sad, ver_row + 1);
+  haddwd_accumulate_avx2(&sad, ver_row + 2);
+  haddwd_accumulate_avx2(&sad, ver_row + 3); 
+  haddwd_accumulate_avx2(&sad, ver_row + 4);
+  haddwd_accumulate_avx2(&sad, ver_row + 5);
+  haddwd_accumulate_avx2(&sad, ver_row + 6);
+  haddwd_accumulate_avx2(&sad, ver_row + 7);
+
+  sad = _mm_add_epi32(sad, _mm_shuffle_epi32(sad, KVZ_PERMUTE(2, 3, 0, 1)));
+  sad = _mm_add_epi32(sad, _mm_shuffle_epi32(sad, KVZ_PERMUTE(1, 0, 1, 0)));
+
+  return _mm_cvtsi128_si32(sad);
+}
+
+INLINE static __m128i diff_row_avx2(const kvz_pixel *buf1, const kvz_pixel *buf2)
+{
+  __m128i buf1_row = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)buf1));
+  __m128i buf2_row = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)buf2));
+  return _mm_sub_epi16(buf1_row, buf2_row);
+}
+
+INLINE static void diff_blocks_and_hor_transform_avx2(__m128i row_diff8, const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2)
+{
+  row_diff0 = diff_row_avx2(buf1 + 0 * stride1, buf2 + 0 * stride2);
+  row_diff1 = diff_row_avx2(buf1 + 1 * stride1, buf2 + 1 * stride2);
+  hor_add_sub_avx2(row_diff + 0, row_diff + 1);
+
+  row_diff2 = diff_row_avx2(buf1 + 2 * stride1, buf2 + 2 * stride2);
+  row_diff3 = diff_row_avx2(buf1 + 3 * stride1, buf2 + 3 * stride2);
+  hor_add_sub_avx2(row_diff + 2, row_diff + 3);
+
+  row_diff4 = diff_row_avx2(buf1 + 4 * stride1, buf2 + 4 * stride2);
+  row_diff5 = diff_row_avx2(buf1 + 5 * stride1, buf2 + 5 * stride2);
+  hor_add_sub_avx2(row_diff + 4, row_diff + 5);
+
+  row_diff6 = diff_row_avx2(buf1 + 6 * stride1, buf2 + 6 * stride2);
+  row_diff7 = diff_row_avx2(buf1 + 7 * stride1, buf2 + 7 * stride2);
+  hor_add_sub_avx2(row_diff + 6, row_diff + 7);
+}
+
+static unsigned kvz_satd_8bit_8x8_general_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2)
+{
+  __m128i temp_hor8;
+  __m128i temp_ver8;
+
+  diff_blocks_and_hor_transform_avx2(temp_hor, buf1, stride1, buf2, stride2);
+
+  ver_add_sub_avx2(temp_hor, temp_ver);
+  
+  unsigned sad = sum_block_avx2(temp_ver);
+
+  unsigned result = (sad + 2) >> 2;
+  return result;
+}
+
+// Function macro for defining hadamard calculating functions
+// for fixed size blocks. They calculate hadamard for integer
+// multiples of 8x8 with the 8x8 hadamard function.
+#define SATD_NXN_AVX2(n) \
+static unsigned satd_8bit_ ## n ## x ## n ## _avx2( \
+  const kvz_pixel * const block1, const kvz_pixel * const block2) \
+{ \
+  unsigned x, y; \
+  unsigned sum = 0; \
+  for (y = 0; y < (n); y += 8) { \
+  unsigned row = y * (n); \
+  for (x = 0; x < (n); x += 8) { \
+  sum += kvz_satd_8bit_8x8_general_avx2(&block1row + x, (n), &block2row + x, (n)); \
+    } \
+    } \
+  return sum>>(KVZ_BIT_DEPTH-8); \
+}
+
+static unsigned satd_8bit_8x8_avx2(
+  const kvz_pixel * const block1, const kvz_pixel * const block2) 
+{ 
+  unsigned x, y; 
+  unsigned sum = 0; 
+  for (y = 0; y < (8); y += 8) { 
+  unsigned row = y * (8); 
+  for (x = 0; x < (8); x += 8) { 
+  sum += kvz_satd_8bit_8x8_general_avx2(&block1row + x, (8), &block2row + x, (8)); 
+      } 
+      } 
+  return sum>>(KVZ_BIT_DEPTH-8); \
+}
+
+//SATD_NXN_AVX2(8) //Use the non-macro version
+SATD_NXN_AVX2(16)
+SATD_NXN_AVX2(32)
+SATD_NXN_AVX2(64)
 
 #endif //COMPILE_INTEL_AVX2
 
@@ -153,6 +334,12 @@
     success &= kvz_strategyselector_register(opaque, "sad_16x16", "avx2", 40, &sad_8bit_16x16_avx2);
     success &= kvz_strategyselector_register(opaque, "sad_32x32", "avx2", 40, &sad_8bit_32x32_avx2);
     success &= kvz_strategyselector_register(opaque, "sad_64x64", "avx2", 40, &sad_8bit_64x64_avx2);
+
+    success &= kvz_strategyselector_register(opaque, "satd_4x4", "avx2", 40, &satd_8bit_4x4_avx2);
+    success &= kvz_strategyselector_register(opaque, "satd_8x8", "avx2", 40, &satd_8bit_8x8_avx2);
+    success &= kvz_strategyselector_register(opaque, "satd_16x16", "avx2", 40, &satd_8bit_16x16_avx2);
+    success &= kvz_strategyselector_register(opaque, "satd_32x32", "avx2", 40, &satd_8bit_32x32_avx2);
+    success &= kvz_strategyselector_register(opaque, "satd_64x64", "avx2", 40, &satd_8bit_64x64_avx2);
   }
 #endif
   return success;

kvazaar-0.7.1.tar.gz/src/strategies/avx2/quant-avx2.c Added

@@ -0,0 +1,220 @@
+/*****************************************************************************
+* This file is part of Kvazaar HEVC encoder.
+*
+* Copyright (C) 2013-2015 Tampere University of Technology and others (see
+* COPYING file).
+*
+* Kvazaar is free software: you can redistribute it and/or modify it under
+* the terms of the GNU Lesser General Public License as published by the
+* Free Software Foundation; either version 2.1 of the License, or (at your
+* option) any later version.
+*
+* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+* FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+* more details.
+*
+* You should have received a copy of the GNU General Public License along
+* with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+****************************************************************************/
+
+/*
+* \file
+*/
+
+#include <stdlib.h>
+
+#include "quant-avx2.h"
+#include "../generic/quant-generic.h"
+#include "../strategies-common.h"
+#include "strategyselector.h"
+#include "encoder.h"
+#include "transform.h"
+
+#if COMPILE_INTEL_AVX2
+#include <immintrin.h>
+
+/**
+* \brief quantize transformed coefficents
+*
+*/
+
+void kvz_quant_flat_avx2(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
+  int32_t height, int8_t type, int8_t scan_idx, int8_t block_type)
+{
+  const encoder_control_t * const encoder = state->encoder_control;
+  const uint32_t log2_block_size = kvz_g_convert_to_bitwidth + 2;
+  const uint32_t * const scan = kvz_g_sig_last_scanscan_idxlog2_block_size - 1;
+
+  int32_t qp_scaled = kvz_get_scaled_qp(type, state->global->QP, (encoder->bitdepth - 8) * 6);
+  const uint32_t log2_tr_size = kvz_g_convert_to_bitwidth + 2;
+  const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"type);
+  const int32_t *quant_coeff = encoder->scaling_list.quant_coefflog2_tr_size - 2scalinglist_typeqp_scaled % 6;
+  const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform
+  const int32_t q_bits = QUANT_SHIFT + qp_scaled / 6 + transform_shift;
+  const int32_t add = ((state->global->slicetype == KVZ_SLICE_I) ? 171 : 85) << (q_bits - 9);
+  const int32_t q_bits8 = q_bits - 8;
+
+  assert(quant_coeff0 <= (1 << 15) - 1 && quant_coeff0 >= -(1 << 15)); //Assuming flat values to fit int16_t
+
+  uint32_t ac_sum = 0;
+
+  __m256i v_ac_sum = _mm256_setzero_si256();
+  __m256i v_quant_coeff = _mm256_set1_epi16(quant_coeff0);
+
+  for (int32_t n = 0; n < width * height; n += 16) {
+
+    __m256i v_level = _mm256_loadu_si256((__m256i*)&(coefn));
+    __m256i v_sign = _mm256_cmpgt_epi16(_mm256_setzero_si256(), v_level);
+    v_sign = _mm256_or_si256(v_sign, _mm256_set1_epi16(1));
+
+    v_level = _mm256_abs_epi16(v_level);
+    __m256i low_a = _mm256_unpacklo_epi16(v_level, _mm256_set1_epi16(0));
+    __m256i high_a = _mm256_unpackhi_epi16(v_level, _mm256_set1_epi16(0));
+
+    __m256i low_b = _mm256_unpacklo_epi16(v_quant_coeff, _mm256_set1_epi16(0));
+    __m256i high_b = _mm256_unpackhi_epi16(v_quant_coeff, _mm256_set1_epi16(0));
+
+    __m256i v_level32_a = _mm256_madd_epi16(low_a, low_b);
+    __m256i v_level32_b = _mm256_madd_epi16(high_a, high_b);
+
+    v_level32_a = _mm256_add_epi32(v_level32_a, _mm256_set1_epi32(add));
+    v_level32_b = _mm256_add_epi32(v_level32_b, _mm256_set1_epi32(add));
+
+    v_level32_a = _mm256_srai_epi32(v_level32_a, q_bits);
+    v_level32_b = _mm256_srai_epi32(v_level32_b, q_bits);
+
+    v_level = _mm256_packs_epi32(v_level32_a, v_level32_b);
+    v_level = _mm256_sign_epi16(v_level, v_sign);
+
+    _mm256_storeu_si256((__m256i*)&(q_coefn), v_level);
+
+    v_ac_sum = _mm256_add_epi32(v_ac_sum, v_level32_a);
+    v_ac_sum = _mm256_add_epi32(v_ac_sum, v_level32_b);
+  }
+
+  __m128i temp = _mm_add_epi32(_mm256_castsi256_si128(v_ac_sum), _mm256_extracti128_si256(v_ac_sum, 1));
+  temp = _mm_add_epi32(temp, _mm_shuffle_epi32(temp, KVZ_PERMUTE(2, 3, 0, 1)));
+  temp = _mm_add_epi32(temp, _mm_shuffle_epi32(temp, KVZ_PERMUTE(1, 0, 1, 0)));
+  ac_sum += _mm_cvtsi128_si32(temp);
+
+  if (!(encoder->sign_hiding && ac_sum >= 2)) return;
+
+  int32_t delta_uLCU_WIDTH*LCU_WIDTH >> 2;
+
+  for (int32_t n = 0; n < width * height; n++) {
+    int32_t level;
+    level = coefn;
+    level = ((int64_t)abs(level) * quant_coeffn + add) >> q_bits;
+    delta_un = (int32_t)(((int64_t)abs(coefn) * quant_coeffn - (level << q_bits)) >> q_bits8);
+  }
+
+  if (ac_sum >= 2) {
+#define SCAN_SET_SIZE 16
+#define LOG2_SCAN_SET_SIZE 4
+    int32_t n, last_cg = -1, abssum = 0, subset, subpos;
+    for (subset = (width*height - 1) >> LOG2_SCAN_SET_SIZE; subset >= 0; subset--) {
+      int32_t first_nz_pos_in_cg = SCAN_SET_SIZE, last_nz_pos_in_cg = -1;
+      subpos = subset << LOG2_SCAN_SET_SIZE;
+      abssum = 0;
+
+      // Find last coeff pos
+      for (n = SCAN_SET_SIZE - 1; n >= 0; n--)  {
+        if (q_coefscann + subpos)  {
+          last_nz_pos_in_cg = n;
+          break;
+        }
+      }
+
+      // First coeff pos
+      for (n = 0; n <SCAN_SET_SIZE; n++) {
+        if (q_coefscann + subpos) {
+          first_nz_pos_in_cg = n;
+          break;
+        }
+      }
+
+      // Sum all kvz_quant coeffs between first and last
+      for (n = first_nz_pos_in_cg; n <= last_nz_pos_in_cg; n++) {
+        abssum += q_coefscann + subpos;
+      }
+
+      if (last_nz_pos_in_cg >= 0 && last_cg == -1) {
+        last_cg = 1;
+      }
+
+      if (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4) {
+        int32_t signbit = (q_coefscansubpos + first_nz_pos_in_cg > 0 ? 0 : 1);
+        if (signbit != (abssum & 0x1)) { // compare signbit with sum_parity
+          int32_t min_cost_inc = 0x7fffffff, min_pos = -1, cur_cost = 0x7fffffff;
+          int16_t final_change = 0, cur_change = 0;
+          for (n = (last_cg == 1 ? last_nz_pos_in_cg : SCAN_SET_SIZE - 1); n >= 0; n--) {
+            uint32_t blkPos = scann + subpos;
+            if (q_coefblkPos != 0) {
+              if (delta_ublkPos > 0) {
+                cur_cost = -delta_ublkPos;
+                cur_change = 1;
+              }
+              else if (n == first_nz_pos_in_cg && abs(q_coefblkPos) == 1) {
+                cur_cost = 0x7fffffff;
+              }
+              else {
+                cur_cost = delta_ublkPos;
+                cur_change = -1;
+              }
+            }
+            else if (n < first_nz_pos_in_cg && ((coefblkPos >= 0) ? 0 : 1) != signbit) {
+              cur_cost = 0x7fffffff;
+            }
+            else {
+              cur_cost = -delta_ublkPos;
+              cur_change = 1;
+            }
+
+            if (cur_cost < min_cost_inc) {
+              min_cost_inc = cur_cost;
+              final_change = cur_change;
+              min_pos = blkPos;
+            }
+          } // CG loop
+
+          if (q_coefmin_pos == 32767 || q_coefmin_pos == -32768) {
+            final_change = -1;
+          }
+
+          if (coefmin_pos >= 0) q_coefmin_pos += final_change;
+          else q_coefmin_pos -= final_change;
+        } // Hide
+      }
+      if (last_cg == 1) last_cg = 0;
+    }
+
+#undef SCAN_SET_SIZE
+#undef LOG2_SCAN_SET_SIZE
+  }
+}
+
+void kvz_quant_avx2(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
+  int32_t height, int8_t type, int8_t scan_idx, int8_t block_type)
+{
+  if (state->encoder_control->scaling_list.enable){
+    kvz_quant_generic(state, coef, q_coef, width, height, type, scan_idx, block_type);
+  }
+  else {
+    kvz_quant_flat_avx2(state, coef, q_coef, width, height, type, scan_idx, block_type);
+  }
+}
+
+#endif //COMPILE_INTEL_AVX2
+
+
+int kvz_strategy_register_quant_avx2(void* opaque, uint8_t bitdepth)
+{
+  bool success = true;
+
+#if COMPILE_INTEL_AVX2
+  success &= kvz_strategyselector_register(opaque, "quant", "avx2", 40, &kvz_quant_avx2);
+#endif //COMPILE_INTEL_AVX2
+
+  return success;
+}

kvazaar-0.7.1.tar.gz/src/strategies/avx2/quant-avx2.h Added

@@ -0,0 +1,26 @@
+#ifndef STRATEGIES_QUANT_AVX2_H_
+#define STRATEGIES_QUANT_AVX2_H_
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+#include <stdint.h>
+
+int kvz_strategy_register_quant_avx2(void* opaque, uint8_t bitdepth);
+
+#endif //STRATEGIES_QUANT_AVX2_H_

kvazaar-0.7.1.tar.gz/src/strategies/generic/intra-generic.c Added

@@ -0,0 +1,154 @@
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+/*
+ * \file
+ */
+
+#include <stdlib.h>
+
+#include "intra-generic.h"
+#include "strategyselector.h"
+
+
+ /**
+ * \brief Generage angular predictions.
+ * \param log2_width    Log2 of width, range 2..5.
+ * \param intra_mode    Angular mode in range 2..34.
+ * \param in_ref_above  Pointer to -1 index of above reference, length=width*2+1.
+ * \param in_ref_left   Pointer to -1 index of left reference, length=width*2+1.
+ * \param dst           Buffer of size width*width.
+ */
+static void kvz_angular_pred_generic(
+  const int_fast8_t log2_width,
+  const int_fast8_t intra_mode,
+  const kvz_pixel *const in_ref_above,
+  const kvz_pixel *const in_ref_left,
+  kvz_pixel *const dst)
+{
+  assert(log2_width >= 2 && log2_width <= 5);
+  assert(intra_mode >= 2 && intra_mode <= 34);
+
+  static const int8_t modedisp2sampledisp9 = { 0, 2, 5, 9, 13, 17, 21, 26, 32 };
+  static const int16_t modedisp2invsampledisp9 = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / sampledisp
+
+                                                    // Temporary buffer for modes 11-25.
+                                                    // It only needs to be big enough to hold indices from -width to width-1.
+  kvz_pixel tmp_ref2 * 32;
+  const int_fast8_t width = 1 << log2_width;
+
+  // Whether to swap references to always project on the left reference row.
+  const bool vertical_mode = intra_mode >= 18;
+  // Modes distance to horizontal or vertical mode.
+  const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode;
+  // Sample displacement per column in fractions of 32.
+  const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledispabs(mode_disp);
+
+  // Pointer for the reference we are interpolating from.
+  const kvz_pixel *ref_main;
+  // Pointer for the other reference.
+  const kvz_pixel *ref_side;
+
+  // Set ref_main and ref_side such that, when indexed with 0, they point to
+  // index 0 in block coordinates.
+  if (sample_disp < 0) {
+    // Negative sample_disp means, we need to use both references.
+
+    ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
+    ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
+
+    // Move the reference pixels to start from the middle to the later half of
+    // the tmp_ref, so there is room for negative indices.
+    for (int_fast8_t x = -1; x < width; ++x) {
+      tmp_refx + width = ref_mainx;
+    }
+    // Get a pointer to block index 0 in tmp_ref.
+    ref_main = &tmp_refwidth;
+
+    // Extend the side reference to the negative indices of main reference.
+    int_fast32_t col_sample_disp = 128; // rounding for the ">> 8"
+    int_fast16_t inv_abs_sample_disp = modedisp2invsampledispabs(mode_disp);
+    int_fast8_t most_negative_index = (width * sample_disp) >> 5;
+    for (int_fast8_t x = -2; x >= most_negative_index; --x) {
+      col_sample_disp += inv_abs_sample_disp;
+      int_fast8_t side_index = col_sample_disp >> 8;
+      tmp_refx + width = ref_sideside_index - 1;
+    }
+  }
+  else {
+    // sample_disp >= 0 means we don't need to refer to negative indices,
+    // which means we can just use the references as is.
+    ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
+    ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
+  }
+
+  if (sample_disp != 0) {
+    // The mode is not horizontal or vertical, we have to do interpolation.
+
+    int_fast16_t delta_pos = 0;
+    for (int_fast8_t y = 0; y < width; ++y) {
+      delta_pos += sample_disp;
+      int_fast8_t delta_int = delta_pos >> 5;
+      int_fast8_t delta_fract = delta_pos & (32 - 1);
+
+      if (delta_fract) {
+        // Do linear filtering
+        for (int_fast8_t x = 0; x < width; ++x) {
+          kvz_pixel ref1 = ref_mainx + delta_int;
+          kvz_pixel ref2 = ref_mainx + delta_int + 1;
+          dsty * width + x = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5;
+        }
+      }
+      else {
+        // Just copy the integer samples
+        for (int_fast8_t x = 0; x < width; x++) {
+          dsty * width + x = ref_mainx + delta_int;
+        }
+      }
+    }
+  }
+  else {
+    // Mode is horizontal or vertical, just copy the pixels.
+
+    for (int_fast8_t y = 0; y < width; ++y) {
+      for (int_fast8_t x = 0; x < width; ++x) {
+        dsty * width + x = ref_mainx;
+      }
+    }
+  }
+
+  // Flip the block if this is was a horizontal mode.
+  if (!vertical_mode) {
+    for (int_fast8_t y = 0; y < width - 1; ++y) {
+      for (int_fast8_t x = y + 1; x < width; ++x) {
+        SWAP(dsty * width + x, dstx * width + y, kvz_pixel);
+      }
+    }
+  }
+}
+
+int kvz_strategy_register_intra_generic(void* opaque, uint8_t bitdepth)
+{
+  bool success = true;
+
+  success &= kvz_strategyselector_register(opaque, "angular_pred", "generic", 0, &kvz_angular_pred_generic);
+
+  return success;
+}

kvazaar-0.7.1.tar.gz/src/strategies/generic/intra-generic.h Added

@@ -0,0 +1,27 @@
+#ifndef STRATEGIES_INTRA_GENERIC_H_
+#define STRATEGIES_INTRA_GENERIC_H_
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+#include <stdint.h>
+#include "encoderstate.h"
+
+int kvz_strategy_register_intra_generic(void* opaque, uint8_t bitdepth);
+
+#endif //STRATEGIES_INTRA_GENERIC_H_

kvazaar-0.7.1.tar.gz/src/strategies/generic/quant-generic.c Added

@@ -0,0 +1,173 @@
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+/*
+ * \file
+ */
+
+#include <stdlib.h>
+
+#include "quant-generic.h"
+#include "strategyselector.h"
+#include "encoder.h"
+#include "transform.h"
+
+#define QUANT_SHIFT 14
+/**
+* \brief quantize transformed coefficents
+*
+*/
+void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
+  int32_t height, int8_t type, int8_t scan_idx, int8_t block_type)
+{
+  const encoder_control_t * const encoder = state->encoder_control;
+  const uint32_t log2_block_size = kvz_g_convert_to_bitwidth + 2;
+  const uint32_t * const scan = kvz_g_sig_last_scanscan_idxlog2_block_size - 1;
+
+  int32_t qp_scaled = kvz_get_scaled_qp(type, state->global->QP, (encoder->bitdepth - 8) * 6);
+  const uint32_t log2_tr_size = kvz_g_convert_to_bitwidth + 2;
+  const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"type);
+  const int32_t *quant_coeff = encoder->scaling_list.quant_coefflog2_tr_size - 2scalinglist_typeqp_scaled % 6;
+  const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform
+  const int32_t q_bits = QUANT_SHIFT + qp_scaled / 6 + transform_shift;
+  const int32_t add = ((state->global->slicetype == KVZ_SLICE_I) ? 171 : 85) << (q_bits - 9);
+  const int32_t q_bits8 = q_bits - 8;
+
+  uint32_t ac_sum = 0;
+
+  for (int32_t n = 0; n < width * height; n++) {
+    int32_t level;
+    int32_t  sign;
+
+    level = coefn;
+    sign = (level < 0 ? -1 : 1);
+
+    level = ((int64_t)abs(level) * quant_coeffn + add) >> q_bits;
+    ac_sum += level;
+
+    level *= sign;
+    q_coefn = (coeff_t)(CLIP(-32768, 32767, level));
+  }
+
+  if (!(encoder->sign_hiding && ac_sum >= 2)) return;
+
+  int32_t delta_uLCU_WIDTH*LCU_WIDTH >> 2;
+
+  for (int32_t n = 0; n < width * height; n++) {
+    int32_t level;
+    level = coefn;
+    level = ((int64_t)abs(level) * quant_coeffn + add) >> q_bits;
+    delta_un = (int32_t)(((int64_t)abs(coefn) * quant_coeffn - (level << q_bits)) >> q_bits8);
+  }
+
+  if (ac_sum >= 2) {
+#define SCAN_SET_SIZE 16
+#define LOG2_SCAN_SET_SIZE 4
+    int32_t n, last_cg = -1, abssum = 0, subset, subpos;
+    for (subset = (width*height - 1) >> LOG2_SCAN_SET_SIZE; subset >= 0; subset--) {
+      int32_t first_nz_pos_in_cg = SCAN_SET_SIZE, last_nz_pos_in_cg = -1;
+      subpos = subset << LOG2_SCAN_SET_SIZE;
+      abssum = 0;
+
+      // Find last coeff pos
+      for (n = SCAN_SET_SIZE - 1; n >= 0; n--)  {
+        if (q_coefscann + subpos)  {
+          last_nz_pos_in_cg = n;
+          break;
+        }
+      }
+
+      // First coeff pos
+      for (n = 0; n <SCAN_SET_SIZE; n++) {
+        if (q_coefscann + subpos) {
+          first_nz_pos_in_cg = n;
+          break;
+        }
+      }
+
+      // Sum all kvz_quant coeffs between first and last
+      for (n = first_nz_pos_in_cg; n <= last_nz_pos_in_cg; n++) {
+        abssum += q_coefscann + subpos;
+      }
+
+      if (last_nz_pos_in_cg >= 0 && last_cg == -1) {
+        last_cg = 1;
+      }
+
+      if (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4) {
+        int32_t signbit = (q_coefscansubpos + first_nz_pos_in_cg > 0 ? 0 : 1);
+        if (signbit != (abssum & 0x1)) { // compare signbit with sum_parity
+          int32_t min_cost_inc = 0x7fffffff, min_pos = -1, cur_cost = 0x7fffffff;
+          int16_t final_change = 0, cur_change = 0;
+          for (n = (last_cg == 1 ? last_nz_pos_in_cg : SCAN_SET_SIZE - 1); n >= 0; n--) {
+            uint32_t blkPos = scann + subpos;
+            if (q_coefblkPos != 0) {
+              if (delta_ublkPos > 0) {
+                cur_cost = -delta_ublkPos;
+                cur_change = 1;
+              }
+              else if (n == first_nz_pos_in_cg && abs(q_coefblkPos) == 1) {
+                cur_cost = 0x7fffffff;
+              }
+              else {
+                cur_cost = delta_ublkPos;
+                cur_change = -1;
+              }
+            }
+            else if (n < first_nz_pos_in_cg && ((coefblkPos >= 0) ? 0 : 1) != signbit) {
+              cur_cost = 0x7fffffff;
+            }
+            else {
+              cur_cost = -delta_ublkPos;
+              cur_change = 1;
+            }
+
+            if (cur_cost < min_cost_inc) {
+              min_cost_inc = cur_cost;
+              final_change = cur_change;
+              min_pos = blkPos;
+            }
+          } // CG loop
+
+          if (q_coefmin_pos == 32767 || q_coefmin_pos == -32768) {
+            final_change = -1;
+          }
+
+          if (coefmin_pos >= 0) q_coefmin_pos += final_change;
+          else q_coefmin_pos -= final_change;
+        } // Hide
+      }
+      if (last_cg == 1) last_cg = 0;
+    }
+
+#undef SCAN_SET_SIZE
+#undef LOG2_SCAN_SET_SIZE
+  }
+}
+
+
+int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth)
+{
+  bool success = true;
+
+  success &= kvz_strategyselector_register(opaque, "quant", "generic", 0, &kvz_quant_generic);
+
+  return success;
+}

kvazaar-0.7.1.tar.gz/src/strategies/generic/quant-generic.h Added

@@ -0,0 +1,31 @@
+#ifndef STRATEGIES_QUANT_GENERIC_H_
+#define STRATEGIES_QUANT_GENERIC_H_
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+#include <stdint.h>
+#include "encoderstate.h"
+
+#define QUANT_SHIFT 14
+
+int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth);
+void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
+  int32_t height, int8_t type, int8_t scan_idx, int8_t block_type);
+
+#endif //STRATEGIES_QUANT_GENERIC_H_

kvazaar-0.7.1.tar.gz/src/strategies/strategies-common.h Added

kvazaar-0.7.1.tar.gz/src/strategies/strategies-intra.c Added

@@ -0,0 +1,41 @@
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+#include "strategies-intra.h"
+#include "strategyselector.h"
+
+// Define function pointers.
+angular_pred_func *kvz_angular_pred;
+
+// Headers for platform optimizations.
+#include "generic/intra-generic.h"
+#include "avx2/intra-avx2.h"
+
+
+int kvz_strategy_register_intra(void* opaque, uint8_t bitdepth) {
+  bool success = true;
+
+  success &= kvz_strategy_register_intra_generic(opaque, bitdepth);
+
+  if (kvz_g_hardware_flags.intel_flags.avx2) {
+    success &= kvz_strategy_register_intra_avx2(opaque, bitdepth);
+  }
+  return success;
+}
\ No newline at end of file

kvazaar-0.7.1.tar.gz/src/strategies/strategies-intra.h Added

@@ -0,0 +1,43 @@
+#ifndef STRATEGIES_INTRA_H_
+#define STRATEGIES_INTRA_H_
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+ 
+#include "encoderstate.h"
+
+typedef void (angular_pred_func)(
+  const int_fast8_t log2_width,
+  const int_fast8_t intra_mode,
+  const kvz_pixel *const in_ref_above,
+  const kvz_pixel *const in_ref_left,
+  kvz_pixel *const dst);
+
+// Declare function pointers.
+extern angular_pred_func * kvz_angular_pred;
+
+int kvz_strategy_register_intra(void* opaque, uint8_t bitdepth);
+
+
+#define STRATEGIES_INTRA_EXPORTS \
+  {"angular_pred", (void**) &kvz_angular_pred}, \
+
+
+
+#endif //STRATEGIES_INTRA_H_

kvazaar-0.7.1.tar.gz/src/strategies/strategies-quant.c Added

@@ -0,0 +1,41 @@
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+#include "strategies-quant.h"
+#include "strategyselector.h"
+
+// Define function pointers.
+quant_func *kvz_quant;
+
+// Headers for platform optimizations.
+#include "generic/quant-generic.h"
+#include "avx2/quant-avx2.h"
+
+
+int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth) {
+  bool success = true;
+
+  success &= kvz_strategy_register_quant_generic(opaque, bitdepth);
+
+  if (kvz_g_hardware_flags.intel_flags.avx2) {
+    success &= kvz_strategy_register_quant_avx2(opaque, bitdepth);
+  }
+  return success;
+}
\ No newline at end of file

kvazaar-0.7.1.tar.gz/src/strategies/strategies-quant.h Added

@@ -0,0 +1,40 @@
+#ifndef STRATEGIES_QUANT_H_
+#define STRATEGIES_QUANT_H_
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2015 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+ 
+#include "encoderstate.h"
+
+// Declare function pointers.
+typedef unsigned (quant_func)(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
+  int32_t height, int8_t type, int8_t scan_idx, int8_t block_type);
+
+// Declare function pointers.
+extern quant_func * kvz_quant;
+
+int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth);
+
+
+#define STRATEGIES_QUANT_EXPORTS \
+  {"quant", (void**) &kvz_quant}, \
+
+
+
+#endif //STRATEGIES_QUANT_H_

kvazaar-0.7.0.tar.gz/src/strategyselector.c -> kvazaar-0.7.1.tar.gz/src/strategyselector.c Changed

kvazaar-0.7.0.tar.gz/src/strategyselector.h -> kvazaar-0.7.1.tar.gz/src/strategyselector.h Changed

kvazaar-0.7.0.tar.gz/src/transform.c -> kvazaar-0.7.1.tar.gz/src/transform.c Changed

@@ -33,6 +33,8 @@
 #include "nal.h"
 #include "rdo.h"
 #include "strategies/strategies-dct.h"
+#include "strategies/strategies-quant.h"
+#include "strategies/generic/quant-generic.h"
 
 //////////////////////////////////////////////////////////////////////////
 // INITIALIZATIONS
@@ -127,137 +129,6 @@
   idct_func(encoder->bitdepth, coeff, block);
 }
 
-
-#define QUANT_SHIFT 14
-/**
- * \brief quantize transformed coefficents
- *
- */
-void kvz_quant(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
-           int32_t height, int8_t type, int8_t scan_idx, int8_t block_type )
-{
-  const encoder_control_t * const encoder = state->encoder_control;
-  const uint32_t log2_block_size = kvz_g_convert_to_bit width  + 2;
-  const uint32_t * const scan = kvz_g_sig_last_scan scan_idx  log2_block_size - 1 ;
-
-  int32_t qp_scaled = kvz_get_scaled_qp(type, state->global->QP, (encoder->bitdepth-8)*6);
-
-  const uint32_t log2_tr_size = kvz_g_convert_to_bit width  + 2;
-  const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"type);
-  const int32_t *quant_coeff = encoder->scaling_list.quant_coefflog2_tr_size-2scalinglist_typeqp_scaled%6;
-  const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform
-  const int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
-  const int32_t add = ((state->global->slicetype == KVZ_SLICE_I) ? 171 : 85) << (q_bits - 9);
-  const int32_t q_bits8 = q_bits - 8;
-
-  uint32_t ac_sum = 0;
-
-  for (int32_t n = 0; n < width * height; n++) {
-    int32_t level;
-    int32_t  sign;
-
-    level = coefn;
-    sign  = (level < 0 ? -1: 1);
-
-    level = ((int64_t)abs(level) * quant_coeffn + add) >> q_bits;
-    ac_sum += level;
-
-    level *= sign;
-    q_coefn = (coeff_t)(CLIP( -32768, 32767, level));
-  }
-
-  if (!(encoder->sign_hiding && ac_sum >= 2)) return;
-
-  int32_t delta_uLCU_WIDTH*LCU_WIDTH >> 2;
-
-  for (int32_t n = 0; n < width * height; n++) {
-    int32_t level;
-    level = coefn;
-    level = ((int64_t)abs(level) * quant_coeffn + add) >> q_bits;
-    delta_un = (int32_t)(((int64_t)abs(coefn) * quant_coeffn - (level << q_bits)) >> q_bits8);
-  }
-
-  if(ac_sum >= 2) {
-    #define SCAN_SET_SIZE 16
-    #define LOG2_SCAN_SET_SIZE 4
-    int32_t n,last_cg = -1, abssum = 0, subset, subpos;
-    for(subset = (width*height - 1)>>LOG2_SCAN_SET_SIZE; subset >= 0; subset--) {
-      int32_t first_nz_pos_in_cg = SCAN_SET_SIZE, last_nz_pos_in_cg=-1;
-      subpos = subset<<LOG2_SCAN_SET_SIZE;
-      abssum = 0;
-
-      // Find last coeff pos
-      for (n = SCAN_SET_SIZE - 1; n >= 0; n--)  {
-        if (q_coefscann + subpos)  {
-          last_nz_pos_in_cg = n;
-          break;
-        }
-      }
-
-      // First coeff pos
-      for (n = 0; n <SCAN_SET_SIZE; n++) {
-        if (q_coefscann + subpos) {
-          first_nz_pos_in_cg = n;
-          break;
-        }
-      }
-
-      // Sum all kvz_quant coeffs between first and last
-      for(n = first_nz_pos_in_cg; n <= last_nz_pos_in_cg; n++) {
-        abssum += q_coefscann + subpos;
-      }
-
-      if(last_nz_pos_in_cg >= 0 && last_cg == -1) {
-        last_cg = 1;
-      }
-
-      if(last_nz_pos_in_cg - first_nz_pos_in_cg >= 4) {
-        int32_t signbit = (q_coefscansubpos + first_nz_pos_in_cg > 0 ? 0 : 1) ;
-        if(signbit != (abssum&0x1)) { // compare signbit with sum_parity
-          int32_t min_cost_inc = 0x7fffffff,  min_pos =-1, cur_cost=0x7fffffff;
-          int16_t final_change = 0, cur_change=0;
-          for(n = (last_cg == 1 ? last_nz_pos_in_cg : SCAN_SET_SIZE - 1); n >= 0; n--) {
-            uint32_t blkPos  = scann + subpos;
-            if(q_coefblkPos != 0) {
-              if(delta_ublkPos > 0) {
-                cur_cost = -delta_ublkPos;
-                cur_change=1;
-              } else if(n == first_nz_pos_in_cg && abs(q_coefblkPos) == 1) {
-                cur_cost=0x7fffffff;
-              } else {
-                cur_cost = delta_ublkPos;
-                cur_change =-1;
-              }
-            } else if(n < first_nz_pos_in_cg && ((coefblkPos >= 0)?0:1) != signbit) {
-              cur_cost = 0x7fffffff;
-            } else {
-              cur_cost   = -delta_ublkPos;
-              cur_change = 1;
-            }
-
-            if(cur_cost < min_cost_inc) {
-              min_cost_inc = cur_cost;
-              final_change = cur_change;
-              min_pos      = blkPos;
-            }
-          } // CG loop
-
-          if(q_coefmin_pos == 32767 || q_coefmin_pos == -32768) {
-            final_change = -1;
-          }
-
-          if(coefmin_pos >= 0) q_coefmin_pos += final_change;
-          else q_coefmin_pos -= final_change;
-        } // Hide
-      }
-      if (last_cg == 1) last_cg=0;
-    }
-
-    #undef SCAN_SET_SIZE
-    #undef LOG2_SCAN_SET_SIZE
-  }
-}
-
 /**
  * \brief inverse quantize transformed and quantized coefficents
  *

kvazaar-0.7.0.tar.gz/src/transform.h -> kvazaar-0.7.1.tar.gz/src/transform.h Changed

kvazaar-0.7.0.tar.gz/tests/test_strategies.c -> kvazaar-0.7.1.tar.gz/tests/test_strategies.c Changed