Projects
Essentials
kvazaar
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 4
View file
kvazaar.changes
Changed
@@ -1,4 +1,10 @@ ------------------------------------------------------------------- +Sun Oct 25 06:12:37 UTC 2015 - aloisio@gmx.com + +- Update to version 0.7.1 + * bumped library version to 2 + +------------------------------------------------------------------- Sat Oct 10 06:21:46 UTC 2015 - aloisio@gmx.com - Update to version 0.7.0
View file
kvazaar.spec
Changed
@@ -16,9 +16,9 @@ # %define libname libkvazaar -%define libmver 1 +%define libmver 2 Name: kvazaar -Version: 0.7.0 +Version: 0.7.1 Release: 0 Summary: HEVC encoder License: LGPL-2.1
View file
kvazaar-0.7.0.tar.gz/README.md -> kvazaar-0.7.1.tar.gz/README.md
Changed
@@ -55,7 +55,7 @@ --subme <integer> : Set fractional pixel motion estimation level [1]. 0: only integer motion estimation 1: fractional pixel motion estimation enabled - --source-scan-type <string> : Set source scan type [\"progressive\"]. + --source-scan-type <string> : Set source scan type ["progressive"]. "progressive": progressive scan "tff": top field first "bff": bottom field first
View file
kvazaar-0.7.0.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj -> kvazaar-0.7.1.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj
Changed
@@ -153,6 +153,22 @@ <ClCompile Include="..\..\src\search.c" /> <ClCompile Include="..\..\src\search_inter.c" /> <ClCompile Include="..\..\src\search_intra.c" /> + <ClCompile Include="..\..\src\strategies\avx2\intra-avx2.c"> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + </ClCompile> + <ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c"> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + </ClCompile> + <ClCompile Include="..\..\src\strategies\generic\intra-generic.c" /> + <ClCompile Include="..\..\src\strategies\generic\quant-generic.c" /> + <ClCompile Include="..\..\src\strategies\strategies-intra.c" /> + <ClCompile Include="..\..\src\strategies\strategies-quant.c" /> <ClCompile Include="..\..\src\yuv_io.c" /> <ClInclude Include="..\..\src\checkpoint.h" /> <ClInclude Include="..\..\src\cli.h" /> @@ -201,6 +217,13 @@ <ClInclude Include="..\..\src\kvazaar_internal.h" /> <ClInclude Include="..\..\src\search_inter.h" /> <ClInclude Include="..\..\src\search_intra.h" /> + <ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h" /> + <ClInclude Include="..\..\src\strategies\generic\intra-generic.h" /> + <ClInclude Include="..\..\src\strategies\strategies-common.h" /> + <ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h" /> + <ClInclude Include="..\..\src\strategies\generic\quant-generic.h" /> + <ClInclude Include="..\..\src\strategies\strategies-intra.h" /> + <ClInclude Include="..\..\src\strategies\strategies-quant.h" /> <ClInclude Include="..\..\src\yuv_io.h" /> </ItemGroup> <ItemGroup>
View file
kvazaar-0.7.0.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj.filters -> kvazaar-0.7.1.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj.filters
Changed
@@ -207,6 +207,24 @@ <ClCompile Include="..\..\src\input_frame_buffer.c"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="..\..\src\strategies\strategies-quant.c"> + <Filter>Source Files\strategies</Filter> + </ClCompile> + <ClCompile Include="..\..\src\strategies\generic\quant-generic.c"> + <Filter>Source Files\strategies\generic</Filter> + </ClCompile> + <ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c"> + <Filter>Source Files\strategies\avx2</Filter> + </ClCompile> + <ClCompile Include="..\..\src\strategies\strategies-intra.c"> + <Filter>Source Files\strategies</Filter> + </ClCompile> + <ClCompile Include="..\..\src\strategies\generic\intra-generic.c"> + <Filter>Source Files\strategies\generic</Filter> + </ClCompile> + <ClCompile Include="..\..\src\strategies\avx2\intra-avx2.c"> + <Filter>Source Files\strategies\avx2</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <ClInclude Include="..\..\src\global.h"> @@ -374,6 +392,27 @@ <ClInclude Include="..\..\src\input_frame_buffer.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="..\..\src\strategies\strategies-common.h"> + <Filter>Header Files\strategies</Filter> + </ClInclude> + <ClInclude Include="..\..\src\strategies\strategies-quant.h"> + <Filter>Header Files\strategies</Filter> + </ClInclude> + <ClInclude Include="..\..\src\strategies\generic\quant-generic.h"> + <Filter>Header Files\strategies\generic</Filter> + </ClInclude> + <ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h"> + <Filter>Header Files\strategies\avx2</Filter> + </ClInclude> + <ClInclude Include="..\..\src\strategies\strategies-intra.h"> + <Filter>Header Files\strategies</Filter> + </ClInclude> + <ClInclude Include="..\..\src\strategies\generic\intra-generic.h"> + <Filter>Header Files\strategies\generic</Filter> + </ClInclude> + <ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h"> + <Filter>Header Files\strategies\avx2</Filter> + </ClInclude> </ItemGroup> <ItemGroup> <YASM Include="..\..\src\extras\x86inc.asm">
View file
kvazaar-0.7.0.tar.gz/src/Makefile -> kvazaar-0.7.1.tar.gz/src/Makefile
Changed
@@ -12,7 +12,7 @@ DLLDIR = $(BINDIR) # Library version number -VER_MAJOR = 1 +VER_MAJOR = 2 VER_MINOR = 0 VER_RELEASE = 0 @@ -152,18 +152,21 @@ # directories. If the instruction set is supported by the # architecture, compile the files in these directories with the # apropriate flags to cause the intrinsics to work. +# Note: Using LTO on strategies caused issues on some older +# compilers, -fno-lto on these files seemed to fix the issue. +# LTO is no longer used and -fno-lto shouldn't be needed. ifeq ($(TARGET_CPU_ARCH), ppc) - strategies/altivec/%.o: EXTRA_FLAGS += -maltivec -fno-lto - strategies/altivec/%.lo: EXTRA_FLAGS += -maltivec -fno-lto + strategies/altivec/%.o: EXTRA_FLAGS += -maltivec + strategies/altivec/%.lo: EXTRA_FLAGS += -maltivec else ifeq ($(TARGET_CPU_ARCH), x86) - strategies/sse2/%.o: EXTRA_FLAGS += -msse2 -fno-lto - strategies/sse41/%.o: EXTRA_FLAGS += -msse4.1 -fno-lto - strategies/sse2/%.lo: EXTRA_FLAGS += -msse2 -fno-lto - strategies/sse41/%.lo: EXTRA_FLAGS += -msse4.1 -fno-lto + strategies/sse2/%.o: EXTRA_FLAGS += -msse2 + strategies/sse41/%.o: EXTRA_FLAGS += -msse4.1 + strategies/sse2/%.lo: EXTRA_FLAGS += -msse2 + strategies/sse41/%.lo: EXTRA_FLAGS += -msse4.1 # To disable avx2 on old compilers that don't support it. ifndef KVZ_DISABLE_AVX2 - strategies/avx2/%.o: EXTRA_FLAGS += -mavx2 -fno-lto - strategies/avx2/%.lo: EXTRA_FLAGS += -mavx2 -fno-lto + strategies/avx2/%.o: EXTRA_FLAGS += -mavx2 + strategies/avx2/%.lo: EXTRA_FLAGS += -mavx2 endif endif @@ -203,6 +206,8 @@ strategies/strategies-nal.o \ strategies/strategies-dct.o \ strategies/strategies-ipol.o \ + strategies/strategies-quant.o \ + strategies/strategies-intra.o \ strategies/generic/nal-generic.o \ strategies/generic/picture-generic.o \ strategies/sse2/picture-sse2.o \ @@ -213,11 +218,15 @@ strategies/generic/dct-generic.o \ strategies/avx2/dct-avx2.o \ strategies/generic/ipol-generic.o \ - strategies/avx2/ipol-avx2.o + strategies/avx2/ipol-avx2.o \ + strategies/generic/quant-generic.o \ + strategies/avx2/quant-avx2.o \ + strategies/generic/intra-generic.o \ + strategies/avx2/intra-avx2.o ifndef KVZ_DISABLE_ASM # Compile C files in x86_asm folder with KVZ_COMPILE_ASM, which will cause - # the registration function to register the function pointers in the ASM + # the registration function to register the function pointers in the ASM # files. strategies/x86_asm/%.o: EXTRA_FLAGS += -DKVZ_COMPILE_ASM @@ -321,7 +330,7 @@ .PHONY: kvazaar.pc init_submodules install install-pc install-prog install-lib .PHONY: install-dylib install-dll clean -kvazaar.pc: KVZ_VERSION = $(shell sed -n 's/^#define\s\+KVZ_VERSION\s\+\(.*\)/\1/ p' global.h) +kvazaar.pc: KVZ_VERSION = $(shell awk '/#define KVZ_VERSION/ { print $$3 }' global.h) kvazaar.pc: kvazaar.pc.in Makefile sed -e "s;@prefix@;$(PREFIX);" -e "s;@libdir@;$(LIBDIR);" \ -e "s;@VERSION@;$(KVZ_VERSION);" \
View file
kvazaar-0.7.0.tar.gz/src/cli.c -> kvazaar-0.7.1.tar.gz/src/cli.c
Changed
@@ -198,6 +198,14 @@ goto done; } + if (opts->config->vps_period < 0) { + // Disabling parameter sets is only possible when using Kvazaar as + // a library. + fprintf(stderr, "Input error: vps_period must be non-negative\n"); + ok = 0; + goto done; + } + // Set resolution automatically if necessary if (opts->config->width == 0 && opts->config->width == 0){ ok = select_input_res_auto(opts->input, &opts->config->width, &opts->config->height);
View file
kvazaar-0.7.0.tar.gz/src/config.c -> kvazaar-0.7.1.tar.gz/src/config.c
Changed
@@ -157,7 +157,7 @@ //If the arg starts with u, we want an uniform split if (arg[0]=='u') { *ntiles = atoi(arg+1)-1; - if (MAX_TILES_PER_DIM <= *ntiles || 0 >= *ntiles) { + if (MAX_TILES_PER_DIM <= *ntiles || 0 > *ntiles) { fprintf(stderr, "Invalid number of tiles (0 < %d <= %d = MAX_TILES_PER_DIM)!\n", *ntiles + 1, MAX_TILES_PER_DIM); return 0; } @@ -504,8 +504,8 @@ error = 1; } - if (cfg->rdo < 0 || cfg->rdo > 2) { - fprintf(stderr, "Input error: --rd parameter out of range [0..2]\n"); + if (cfg->rdo < 0 || cfg->rdo > 3) { + fprintf(stderr, "Input error: --rd parameter out of range [0..3]\n"); error = 1; }
View file
kvazaar-0.7.0.tar.gz/src/encoder.c -> kvazaar-0.7.1.tar.gz/src/encoder.c
Changed
@@ -433,7 +433,11 @@ // AUD encoder->aud_enable = (int8_t)encoder->cfg->aud_enable; - encoder->vps_period = encoder->cfg->vps_period * encoder->cfg->intra_period; + if (encoder->cfg->vps_period >= 0) { + encoder->vps_period = encoder->cfg->vps_period * encoder->cfg->intra_period; + } else { + encoder->vps_period = -1; + } return encoder;
View file
kvazaar-0.7.0.tar.gz/src/encoder.h -> kvazaar-0.7.1.tar.gz/src/encoder.h
Changed
@@ -140,7 +140,7 @@ } pu_depth_inter, pu_depth_intra; // How often Video Parameter Set is re-sent. - uint32_t vps_period; + int32_t vps_period; bool sign_hiding;
View file
kvazaar-0.7.0.tar.gz/src/encoder_state-bitstream.c -> kvazaar-0.7.1.tar.gz/src/encoder_state-bitstream.c
Changed
@@ -41,10 +41,9 @@ kvz_bitstream_add_rbsp_trailing_bits(stream); } -static void encoder_state_write_bitstream_PTL(encoder_state_t * const state) +static void encoder_state_write_bitstream_PTL(bitstream_t *stream, + encoder_state_t * const state) { - bitstream_t * const stream = &state->stream; - int i; // PTL // Profile Tier WRITE_U(stream, 0, 2, "general_profile_space"); @@ -74,17 +73,16 @@ WRITE_U(stream, 0, 1, "sub_layer_profile_present_flag"); WRITE_U(stream, 0, 1, "sub_layer_level_present_flag"); - for (i = 1; i < 8; i++) { + for (int i = 1; i < 8; i++) { WRITE_U(stream, 0, 2, "reserved_zero_2bits"); } // end PTL } -static void encoder_state_write_bitstream_vid_parameter_set(encoder_state_t * const state) +static void encoder_state_write_bitstream_vid_parameter_set(bitstream_t* stream, + encoder_state_t * const state) { - bitstream_t * const stream = &state->stream; - int i; #ifdef KVZ_DEBUG printf("=========== Video Parameter Set ID: 0 ===========\n"); #endif @@ -96,12 +94,12 @@ WRITE_U(stream, 0, 1, "vps_temporal_id_nesting_flag"); WRITE_U(stream, 0xffff, 16, "vps_reserved_ffff_16bits"); - encoder_state_write_bitstream_PTL(state); + encoder_state_write_bitstream_PTL(stream, state); WRITE_U(stream, 0, 1, "vps_sub_layer_ordering_info_present_flag"); //for each layer - for (i = 0; i < 1; i++) { + for (int i = 0; i < 1; i++) { WRITE_UE(stream, 1, "vps_max_dec_pic_buffering"); WRITE_UE(stream, 0, "vps_num_reorder_pics"); WRITE_UE(stream, 0, "vps_max_latency_increase"); @@ -119,10 +117,10 @@ kvz_bitstream_add_rbsp_trailing_bits(stream); } -static void encoder_state_write_bitstream_scaling_list(encoder_state_t * const state) +static void encoder_state_write_bitstream_scaling_list(bitstream_t *stream, + encoder_state_t * const state) { const encoder_control_t * const encoder = state->encoder_control; - bitstream_t * const stream = &state->stream; uint32_t size_id; for (size_id = 0; size_id < SCALING_LIST_SIZE_NUM; size_id++) { int32_t list_id; @@ -177,9 +175,9 @@ } -static void encoder_state_write_bitstream_VUI(encoder_state_t * const state) +static void encoder_state_write_bitstream_VUI(bitstream_t *stream, + encoder_state_t * const state) { - bitstream_t * const stream = &state->stream; const encoder_control_t * const encoder = state->encoder_control; #ifdef KVZ_DEBUG printf("=========== VUI Set ID: 0 ===========\n"); @@ -260,8 +258,8 @@ //ENDIF WRITE_U(stream, 0, 1, "neutral_chroma_indication_flag"); - WRITE_U(stream, state->encoder_control->vui.field_seq_flag, 1, "field_seq_flag"); // 0: frames, 1: fields - WRITE_U(stream, state->encoder_control->vui.frame_field_info_present_flag, 1, "frame_field_info_present_flag"); + WRITE_U(stream, encoder->vui.field_seq_flag, 1, "field_seq_flag"); // 0: frames, 1: fields + WRITE_U(stream, encoder->vui.frame_field_info_present_flag, 1, "frame_field_info_present_flag"); WRITE_U(stream, 0, 1, "default_display_window_flag"); //IF default display window @@ -278,9 +276,9 @@ //ENDIF } -static void encoder_state_write_bitstream_seq_parameter_set(encoder_state_t * const state) +static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream, + encoder_state_t * const state) { - bitstream_t * const stream = &state->stream; const encoder_control_t * encoder = state->encoder_control; #ifdef KVZ_DEBUG @@ -292,13 +290,13 @@ WRITE_U(stream, 1, 3, "sps_max_sub_layers_minus1"); WRITE_U(stream, 0, 1, "sps_temporal_id_nesting_flag"); - encoder_state_write_bitstream_PTL(state); + encoder_state_write_bitstream_PTL(stream, state); WRITE_UE(stream, 0, "sps_seq_parameter_set_id"); - WRITE_UE(stream, state->encoder_control->in.video_format, + WRITE_UE(stream, encoder->in.video_format, "chroma_format_idc"); - if (state->encoder_control->in.video_format == 3) { + if (encoder->in.video_format == 3) { WRITE_U(stream, 0, 1, "separate_colour_plane_flag"); } @@ -331,7 +329,7 @@ WRITE_U(stream, 0, 1, "sps_sub_layer_ordering_info_present_flag"); //for each layer - WRITE_UE(stream, state->encoder_control->cfg->ref_frames + encoder->cfg->gop_len, "sps_max_dec_pic_buffering"); + WRITE_UE(stream, encoder->cfg->ref_frames + encoder->cfg->gop_len, "sps_max_dec_pic_buffering"); WRITE_UE(stream, encoder->cfg->gop_len, "sps_num_reorder_pics"); WRITE_UE(stream, 0, "sps_max_latency_increase"); //end for @@ -344,14 +342,14 @@ WRITE_UE(stream, encoder->tr_depth_intra, "max_transform_hierarchy_depth_intra"); // scaling list - WRITE_U(stream, state->encoder_control->scaling_list.enable, 1, "scaling_list_enable_flag"); - if (state->encoder_control->scaling_list.enable) { + WRITE_U(stream, encoder->scaling_list.enable, 1, "scaling_list_enable_flag"); + if (encoder->scaling_list.enable) { WRITE_U(stream, 1, 1, "sps_scaling_list_data_present_flag"); - encoder_state_write_bitstream_scaling_list(state); + encoder_state_write_bitstream_scaling_list(stream, state); } WRITE_U(stream, 0, 1, "amp_enabled_flag"); - WRITE_U(stream, state->encoder_control->sao_enable ? 1 : 0, 1, + WRITE_U(stream, encoder->sao_enable ? 1 : 0, 1, "sample_adaptive_offset_enabled_flag"); WRITE_U(stream, ENABLE_PCM, 1, "pcm_enabled_flag"); #if ENABLE_PCM == 1 @@ -377,17 +375,17 @@ WRITE_U(stream, 0, 1, "sps_strong_intra_smoothing_enable_flag"); WRITE_U(stream, 1, 1, "vui_parameters_present_flag"); - encoder_state_write_bitstream_VUI(state); + encoder_state_write_bitstream_VUI(stream, state); WRITE_U(stream, 0, 1, "sps_extension_flag"); kvz_bitstream_add_rbsp_trailing_bits(stream); } -static void encoder_state_write_bitstream_pic_parameter_set(encoder_state_t * const state) +static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream, + encoder_state_t * const state) { const encoder_control_t * const encoder = state->encoder_control; - bitstream_t * const stream = &state->stream; #ifdef KVZ_DEBUG printf("=========== Picture Parameter Set ID: 0 ===========\n"); #endif @@ -403,7 +401,7 @@ WRITE_UE(stream, 0, "num_ref_idx_l1_default_active_minus1"); WRITE_SE(stream, ((int8_t)encoder->cfg->qp) - 26, "pic_init_qp_minus26"); WRITE_U(stream, 0, 1, "constrained_intra_pred_flag"); - WRITE_U(stream, state->encoder_control->trskip_enable, 1, "transform_skip_enabled_flag"); + WRITE_U(stream, encoder->trskip_enable, 1, "transform_skip_enabled_flag"); WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag"); //if cu_qp_delta_enabled_flag //WRITE_UE(stream, 0, "diff_cu_qp_delta_depth"); @@ -445,13 +443,13 @@ //IF deblocking_filter WRITE_U(stream, 0, 1, "deblocking_filter_override_enabled_flag"); - WRITE_U(stream, state->encoder_control->deblock_enable ? 0 : 1, 1, + WRITE_U(stream, encoder->deblock_enable ? 0 : 1, 1, "pps_disable_deblocking_filter_flag"); //IF !disabled - if (state->encoder_control->deblock_enable) { - WRITE_SE(stream, state->encoder_control->beta_offset_div2, "beta_offset_div2"); - WRITE_SE(stream, state->encoder_control->tc_offset_div2, "tc_offset_div2"); + if (encoder->deblock_enable) { + WRITE_SE(stream, encoder->beta_offset_div2, "beta_offset_div2"); + WRITE_SE(stream, encoder->tc_offset_div2, "tc_offset_div2"); } //ENDIF @@ -639,7 +637,7 @@ int j; int ref_negative = 0; int ref_positive = 0; - if (state->encoder_control->cfg->gop_len) { + if (encoder->cfg->gop_len) { for (j = 0; j < state->global->ref->used_size; j++) { if (state->global->ref->pocs[j] < state->global->poc) { ref_negative++; @@ -686,10 +684,10 @@ for (j = 0; j < ref_negative; j++) { int8_t delta_poc = 0; - if (state->encoder_control->cfg->gop_len) { + if (encoder->cfg->gop_len) { int8_t found = 0; do { - delta_poc = state->encoder_control->cfg->gop[state->global->gop_offset].ref_neg[j + poc_shift]; + delta_poc = encoder->cfg->gop[state->global->gop_offset].ref_neg[j + poc_shift]; for (int i = 0; i < state->global->ref->used_size; i++) { if (state->global->ref->pocs[i] == state->global->poc - delta_poc) { found = 1; @@ -704,7 +702,7 @@ } while (!found); } - WRITE_UE(stream, state->encoder_control->cfg->gop_len?delta_poc - last_poc - 1:0, "delta_poc_s0_minus1"); + WRITE_UE(stream, encoder->cfg->gop_len?delta_poc - last_poc - 1:0, "delta_poc_s0_minus1"); last_poc = delta_poc; WRITE_U(stream,1,1, "used_by_curr_pic_s0_flag"); } @@ -713,10 +711,10 @@ for (j = 0; j < ref_positive; j++) { int8_t delta_poc = 0; - if (state->encoder_control->cfg->gop_len) { + if (encoder->cfg->gop_len) { int8_t found = 0; do { - delta_poc = state->encoder_control->cfg->gop[state->global->gop_offset].ref_pos[j + poc_shift]; + delta_poc = encoder->cfg->gop[state->global->gop_offset].ref_pos[j + poc_shift]; for (int i = 0; i < state->global->ref->used_size; i++) { if (state->global->ref->pocs[i] == state->global->poc + delta_poc) { found = 1; @@ -731,7 +729,7 @@ } while (!found); } - WRITE_UE(stream, state->encoder_control->cfg->gop_len ? delta_poc - last_poc - 1 : 0, "delta_poc_s1_minus1"); + WRITE_UE(stream, encoder->cfg->gop_len ? delta_poc - last_poc - 1 : 0, "delta_poc_s1_minus1"); last_poc = delta_poc; WRITE_U(stream, 1, 1, "used_by_curr_pic_s1_flag"); } @@ -756,7 +754,7 @@ } { - int slice_qp_delta = state->global->QP - state->encoder_control->cfg->qp; + int slice_qp_delta = state->global->QP - encoder->cfg->qp; WRITE_SE(stream, slice_qp_delta, "slice_qp_delta"); } @@ -840,25 +838,14 @@ } if ((encoder->vps_period > 0 && state->global->frame % encoder->vps_period == 0) - || state->global->frame == 0) + || (state->global->frame == 0 && encoder->vps_period >= 0)) { first_nal_in_au = false; - - // Video Parameter Set (VPS) - kvz_nal_write(stream, KVZ_NAL_VPS_NUT, 0, 1); - encoder_state_write_bitstream_vid_parameter_set(state); - - // Sequence Parameter Set (SPS) - kvz_nal_write(stream, KVZ_NAL_SPS_NUT, 0, 1); - encoder_state_write_bitstream_seq_parameter_set(state); - - // Picture Parameter Set (PPS) - kvz_nal_write(stream, KVZ_NAL_PPS_NUT, 0, 1); - encoder_state_write_bitstream_pic_parameter_set(state); + kvz_encoder_state_write_parameter_sets(&state->stream, state); } // Send Kvazaar version information only in the first frame. - if (state->global->frame == 0 && state->encoder_control->cfg->add_encoder_info) { + if (state->global->frame == 0 && encoder->cfg->add_encoder_info) { kvz_nal_write(stream, KVZ_NAL_PREFIX_SEI_NUT, 0, first_nal_in_au); encoder_state_write_bitstream_prefix_sei_version(state); @@ -867,7 +854,7 @@ } //SEI messages for interlacing - if (state->encoder_control->vui.frame_field_info_present_flag){ + if (encoder->vui.frame_field_info_present_flag){ // These should be optional, needed for earlier versions // of HM decoder to accept bitstream //kvz_nal_write(stream, KVZ_NAL_PREFIX_SEI_NUT, 0, 0); @@ -889,14 +876,14 @@ { PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME); encoder_state_write_bitstream_children(state); - PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", state->global->frame, state->type); + PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, encoder->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", state->global->frame, state->type); } { PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME); // Calculate checksum add_checksum(state); - PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", state->global->frame, state->type); + PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, encoder->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", state->global->frame, state->type); } //Get bitstream length for stats @@ -982,3 +969,19 @@ { kvz_encoder_state_write_bitstream((encoder_state_t *) opaque); } + +void kvz_encoder_state_write_parameter_sets(bitstream_t *stream, + encoder_state_t * const state) +{ + // Video Parameter Set (VPS) + kvz_nal_write(stream, KVZ_NAL_VPS_NUT, 0, 1); + encoder_state_write_bitstream_vid_parameter_set(stream, state); + + // Sequence Parameter Set (SPS) + kvz_nal_write(stream, KVZ_NAL_SPS_NUT, 0, 1); + encoder_state_write_bitstream_seq_parameter_set(stream, state); + + // Picture Parameter Set (PPS) + kvz_nal_write(stream, KVZ_NAL_PPS_NUT, 0, 1); + encoder_state_write_bitstream_pic_parameter_set(stream, state); +}
View file
kvazaar-0.7.0.tar.gz/src/encoder_state-bitstream.h -> kvazaar-0.7.1.tar.gz/src/encoder_state-bitstream.h
Changed
@@ -26,17 +26,18 @@ #include "global.h" - // Forward declare because including the header would lead to a cyclic // dependency. struct encoder_state_t; +struct bitstream_t; void kvz_encoder_state_write_bitstream_slice_header(struct encoder_state_t * const state); void kvz_encoder_state_write_bitstream(struct encoder_state_t * const state); void kvz_encoder_state_write_bitstream_leaf(struct encoder_state_t * const state); void kvz_encoder_state_worker_write_bitstream_leaf(void * opaque); void kvz_encoder_state_worker_write_bitstream(void * opaque); - +void kvz_encoder_state_write_parameter_sets(struct bitstream_t *stream, + struct encoder_state_t * const state); #endif // ENCODER_STATE_BITSTREAM_H_
View file
kvazaar-0.7.0.tar.gz/src/encoder_state-ctors_dtors.c -> kvazaar-0.7.1.tar.gz/src/encoder_state-ctors_dtors.c
Changed
@@ -113,15 +113,14 @@ static int encoder_state_config_slice_init(encoder_state_t * const state, const int start_address_in_ts, const int end_address_in_ts) { - int i = 0, slice_found=0; - for (i = 0; i < state->encoder_control->slice_count; ++i) { + state->slice->id = -1; + for (int i = 0; i < state->encoder_control->slice_count; ++i) { if (state->encoder_control->slice_addresses_in_ts[i] == start_address_in_ts) { state->slice->id = i; - slice_found = 1; break; } } - assert(slice_found); + assert(state->slice->id != -1); state->slice->start_in_ts = start_address_in_ts; state->slice->end_in_ts = end_address_in_ts;
View file
kvazaar-0.7.0.tar.gz/src/global.h -> kvazaar-0.7.1.tar.gz/src/global.h
Changed
@@ -124,6 +124,7 @@ #define MAX(a,b) (((a)>(b))?(a):(b)) #define MIN(a,b) (((a)<(b))?(a):(b)) #define CLIP(low,high,value) MAX((low),MIN((high),(value))) +#define CLIP_TO_PIXEL(value) CLIP(0, PIXEL_MAX, (value)) #define SWAP(a,b,swaptype) { swaptype tempval; tempval = a; a = b; b = tempval; } #define CU_WIDTH_FROM_DEPTH(depth) (LCU_WIDTH >> depth) #define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val)) @@ -143,7 +144,7 @@ // NOTE: When making a release, remember to also bump library version in // Makefile, if necessary. -#define KVZ_VERSION 0.7.0 +#define KVZ_VERSION 0.7.1 #define VERSION_STRING QUOTE_EXPAND(KVZ_VERSION) //#define VERBOSE 1
View file
kvazaar-0.7.0.tar.gz/src/image.c -> kvazaar-0.7.1.tar.gz/src/image.c
Changed
@@ -114,10 +114,9 @@ */ kvz_picture *kvz_image_copy_ref(kvz_picture *im) { - int32_t new_refcount = ATOMIC_INC(&(im->refcount)); - // The caller should have had another reference. - assert(new_refcount > 1); + assert(im->refcount > 0); + ATOMIC_INC(&(im->refcount)); return im; }
View file
kvazaar-0.7.0.tar.gz/src/intra.c -> kvazaar-0.7.1.tar.gz/src/intra.c
Changed
@@ -28,91 +28,19 @@ #include <assert.h> #include <stdio.h> #include <stdlib.h> -#include <string.h> -#include "config.h" #include "encoder.h" #include "transform.h" -#include "rdo.h" +#include "strategies/strategies-intra.h" -const uint8_t kvz_intra_hor_ver_dist_thres[5] = {0,7,1,0,0}; - - -/** - * \brief Set intrablock mode (and init typedata) - * \param pic picture to use - * \param xCtb x CU position (smallest CU) - * \param yCtb y CU position (smallest CU) - * \param depth current CU depth - * \param mode mode to set - * \returns Void - */ -void kvz_intra_set_block_mode(videoframe_t *frame,uint32_t x_cu, uint32_t y_cu, uint8_t depth, uint8_t mode, uint8_t part_mode) -{ - uint32_t x, y; - int block_scu_width = (LCU_WIDTH>>depth)/(LCU_WIDTH>>MAX_DEPTH); - - if (part_mode == SIZE_NxN) { - cu_info_t *cur_cu = kvz_videoframe_get_cu(frame, x_cu, y_cu); - // Modes are already set. - cur_cu->depth = depth; - cur_cu->type = CU_INTRA; - cur_cu->tr_depth = depth + 1; - return; - } - - // Loop through all the blocks in the area of cur_cu - for (y = y_cu; y < y_cu + block_scu_width; y++) { - for (x = x_cu; x < x_cu + block_scu_width; x++) { - cu_info_t *cur_cu = kvz_videoframe_get_cu(frame, x_cu, y_cu); - cur_cu->depth = depth; - cur_cu->type = CU_INTRA; - cur_cu->intra[0].mode = mode; - cur_cu->intra[1].mode = mode; - cur_cu->intra[2].mode = mode; - cur_cu->intra[3].mode = mode; - cur_cu->part_size = part_mode; - cur_cu->tr_depth = depth; - } - } -} - -/** - * \brief get intrablock mode - * \param pic picture data to use - * \param picwidth width of the picture data - * \param xpos x-position - * \param ypos y-position - * \param width block width - * \returns DC prediction -*/ -kvz_pixel kvz_intra_get_dc_pred(const kvz_pixel *pic, uint16_t picwidth, uint8_t width) -{ - int32_t i, sum = 0; - - // pixels on top and left - for (i = -picwidth; i < width - picwidth; i++) { - sum += pic[i]; - } - for (i = -1; i < width * picwidth - 1; i += picwidth) { - sum += pic[i]; - } - - // return the average - return (kvz_pixel)((sum + width) / (width + width)); -} - -/** - * \brief Function for deriving intra luma predictions - * \param pic picture to use - * \param x_cu x CU position (smallest CU) - * \param y_cu y CU position (smallest CU) - * \param preds output buffer for 3 predictions - * \returns (predictions are found)?1:0 - */ -int8_t kvz_intra_get_dir_luma_predictor(const uint32_t x, const uint32_t y, int8_t* preds, - const cu_info_t * const cur_cu, const cu_info_t * const left_cu, const cu_info_t * const above_cu) +int8_t kvz_intra_get_dir_luma_predictor( + const uint32_t x, + const uint32_t y, + int8_t *preds, + const cu_info_t *const cur_cu, + const cu_info_t *const left_cu, + const cu_info_t *const above_cu) { int y_cu = y>>3; @@ -166,158 +94,235 @@ return 1; } + +static void intra_filter_reference( + int_fast8_t log2_width, + kvz_intra_references *refs) +{ + if (refs->filtered_initialized) { + return; + } else { + refs->filtered_initialized = true; + } + + const int_fast8_t ref_width = 2 * (1 << log2_width) + 1; + kvz_intra_ref *ref = &refs->ref; + kvz_intra_ref *filtered_ref = &refs->filtered_ref; + + filtered_ref->left[0] = (ref->left[1] + 2 * ref->left[0] + ref->top[1] + 2) / 4; + filtered_ref->top[0] = filtered_ref->left[0]; + + for (int_fast8_t y = 1; y < ref_width - 1; ++y) { + kvz_pixel *p = &ref->left[y]; + filtered_ref->left[y] = (p[-1] + 2 * p[0] + p[1] + 2) / 4; + } + filtered_ref->left[ref_width - 1] = ref->left[ref_width - 1]; + + for (int_fast8_t x = 1; x < ref_width - 1; ++x) { + kvz_pixel *p = &ref->top[x]; + filtered_ref->top[x] = (p[-1] + 2 * p[0] + p[1] + 2) / 4; + } + filtered_ref->top[ref_width - 1] = ref->top[ref_width - 1]; +} + + +static void intra_post_process_angular( + unsigned width, + unsigned stride, + const kvz_pixel *ref, + kvz_pixel *block) +{ + kvz_pixel ref2 = ref[0]; + for (unsigned i = 0; i < width; i++) { + kvz_pixel val = block[i * stride]; + kvz_pixel ref1 = ref[i + 1]; + block[i * stride] = CLIP_TO_PIXEL(val + ((ref1 - ref2) >> 1)); + } +} + + /** - * \brief Intra filtering of the border samples - * \param ref reference picture data - * \param x_cu x CU position (smallest CU) - * \param y_cu y CU position (smallest CU) - * \param depth current CU depth - * \param preds output buffer for 3 predictions - * \returns (predictions are found)?1:0 + * \brief Generage planar prediction. + * \param log2_width Log2 of width, range 2..5. + * \param in_ref_above Pointer to -1 index of above reference, length=width*2+1. + * \param in_ref_left Pointer to -1 index of left reference, length=width*2+1. + * \param dst Buffer of size width*width. */ -void kvz_intra_filter(kvz_pixel *ref, int32_t stride,int32_t width, int8_t mode) +static void intra_pred_planar( + const int_fast8_t log2_width, + const kvz_pixel *const ref_top, + const kvz_pixel *const ref_left, + kvz_pixel *const dst) { - #define FWIDTH (LCU_WIDTH*2+1) - kvz_pixel filtered[FWIDTH * FWIDTH]; //!< temporary buffer for filtered samples - kvz_pixel *filteredShift = &filtered[FWIDTH+1]; //!< pointer to temporary buffer with offset (1,1) - int x,y; - - if (!mode) { - // pF[ -1 ][ -1 ] = ( p[ -1 ][ 0 ] + 2*p[ -1 ][ -1 ] + p[ 0 ][ -1 ] + 2 ) >> 2 (8 35) - filteredShift[-FWIDTH-1] = (ref[-1] + 2*ref[-(int32_t)stride-1] + ref[-(int32_t)stride] + 2) >> 2; - - // pF[ -1 ][ y ] = ( p[ -1 ][ y + 1 ] + 2*p[ -1 ][ y ] + p[ -1 ][ y - 1 ] + 2 ) >> 2 for y = 0..nTbS * 2 - 2 (8 36) - for (y = 0; y < (int32_t)width * 2 - 1; y++) { - filteredShift[y*FWIDTH-1] = (ref[(y + 1) * stride - 1] + 2*ref[y * stride - 1] + ref[(y - 1) * stride - 1] + 2) >> 2; + assert(log2_width >= 2 && log2_width <= 5); + + const int_fast8_t width = 1 << log2_width; + const kvz_pixel top_right = ref_top[width + 1]; + const kvz_pixel bottom_left = ref_left[width + 1]; + +#if 0 + // Unoptimized version for reference. + for (int y = 0; y < width; ++y) { + for (int x = 0; x < width; ++x) { + int_fast16_t hor = (width - 1 - x) * ref_left[y + 1] + (x + 1) * top_right; + int_fast16_t ver = (width - 1 - y) * ref_top[x + 1] + (y + 1) * bottom_left; + dst[y * width + x] = (ver + hor + width) >> (log2_width + 1); } + } +#else + int_fast16_t top[32]; + for (int i = 0; i < width; ++i) { + top[i] = ref_top[i + 1] << log2_width; + } - // pF[ -1 ][ nTbS * 2 - 1 ] = p[ -1 ][ nTbS * 2 - 1 ] (8 37) - filteredShift[(width * 2 - 1) * FWIDTH - 1] = ref[(width * 2 - 1) * stride - 1]; - - // pF[ x ][ -1 ] = ( p[ x - 1 ][ -1 ] + 2*p[ x ][ -1 ] + p[ x + 1 ][ -1 ] + 2 ) >> 2 for x = 0..nTbS * 2 - 2 (8 38) - for(x = 0; x < (int32_t)width*2-1; x++) { - filteredShift[x - FWIDTH] = (ref[x - 1 - stride] + 2*ref[x - stride] + ref[x + 1 - stride] + 2) >> 2; + for (int y = 0; y < width; ++y) { + int_fast16_t hor = (ref_left[y + 1] << log2_width) + width; + for (int x = 0; x < width; ++x) { + hor += top_right - ref_left[y + 1]; + top[x] += bottom_left - ref_top[x + 1]; + dst[y * width + x] = (hor + top[x]) >> (log2_width + 1); } + } +#endif +} - // pF[ nTbS * 2 - 1 ][ -1 ] = p[ nTbS * 2 - 1 ][ -1 ] - filteredShift[(width * 2 - 1) - FWIDTH] = ref[(width * 2 - 1) - stride]; - // Copy filtered samples to the input array - for (x = -1; x < (int32_t)width * 2; x++) { - ref[x - stride] = filtered[x + 1]; - } - for(y = 0; y < (int32_t)width * 2; y++) { - ref[y * stride - 1] = filtered[(y + 1) * FWIDTH]; - } - } else { - printf("UNHANDLED: %s: %d\r\n", __FILE__, __LINE__); - exit(1); +/** +* \brief Generage planar prediction. +* \param log2_width Log2 of width, range 2..5. +* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1. +* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1. +* \param dst Buffer of size width*width. +*/ +static void intra_pred_dc( + const int_fast8_t log2_width, + const kvz_pixel *const ref_top, + const kvz_pixel *const ref_left, + kvz_pixel *const out_block) +{ + int_fast8_t width = 1 << log2_width; + + int_fast16_t sum = 0; + for (int_fast8_t i = 0; i < width; ++i) { + sum += ref_top[i + 1]; + sum += ref_left[i + 1]; + } + + const kvz_pixel dc_val = (sum + width) >> (log2_width + 1); + const int_fast16_t block_size = 1 << (log2_width * 2); + + for (int_fast16_t i = 0; i < block_size; ++i) { + out_block[i] = dc_val; } - #undef FWIDTH } /** - * \param rec Reference pixel. 0 points to unfiltered and 1 to filtered. - * \param recstride Stride for rec pixel arrays. - * \param dst - */ -void kvz_intra_get_pred(const encoder_control_t * const encoder, const kvz_pixel *rec, const kvz_pixel *rec_filtered, int recstride, kvz_pixel *dst, int width, int mode, int is_chroma) +* \brief Generage intra DC prediction with post filtering applied. +* \param log2_width Log2 of width, range 2..5. +* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1. +* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1. +* \param dst Buffer of size width*width. +*/ +static void intra_pred_filtered_dc( + const int_fast8_t log2_width, + const kvz_pixel *const ref_top, + const kvz_pixel *const ref_left, + kvz_pixel *const out_block) { - const kvz_pixel *ref_pixels = rec; - if (is_chroma || mode == 1 || width == 4) { + assert(log2_width >= 2 && log2_width <= 5); + + const int_fast8_t width = 1 << log2_width; + + int_fast16_t sum = 0; + for (int_fast8_t i = 0; i < width; ++i) { + sum += ref_top[i + 1]; + sum += ref_left[i + 1]; + } + + const kvz_pixel dc_val = (sum + width) >> (log2_width + 1); + + // Filter top-left with ([1 2 1] / 4) + out_block[0] = (ref_left[1] + 2 * dc_val + ref_top[1] + 2) / 4; + + // Filter rest of the boundary with ([1 3] / 4) + for (int_fast8_t x = 1; x < width; ++x) { + out_block[x] = (ref_top[x + 1] + 3 * dc_val + 2) / 4; + } + for (int_fast8_t y = 1; y < width; ++y) { + out_block[y * width] = (ref_left[y + 1] + 3 * dc_val + 2) / 4; + for (int_fast8_t x = 1; x < width; ++x) { + out_block[y * width + x] = dc_val; + } + } +} + + +void kvz_intra_predict( + kvz_intra_references *refs, + int_fast8_t log2_width, + int_fast8_t mode, + color_t color, + kvz_pixel *dst) +{ + const int_fast8_t width = 1 << log2_width; + + const kvz_intra_ref *used_ref = &refs->ref; + if (color != COLOR_Y || mode == 1 || width == 4) { // For chroma, DC and 4x4 blocks, always use unfiltered reference. } else if (mode == 0) { // Otherwise, use filtered for planar. - ref_pixels = rec_filtered; + used_ref = &refs->filtered_ref; } else { // Angular modes use smoothed reference pixels, unless the mode is close // to being either vertical or horizontal. + static const int kvz_intra_hor_ver_dist_thres[5] = { 0, 7, 1, 0, 0 }; int filter_threshold = kvz_intra_hor_ver_dist_thres[g_to_bits[width]]; int dist_from_vert_or_hor = MIN(abs(mode - 26), abs(mode - 10)); if (dist_from_vert_or_hor > filter_threshold) { - ref_pixels = rec_filtered; + used_ref = &refs->filtered_ref; } } + if (used_ref == &refs->filtered_ref && !refs->filtered_initialized) { + intra_filter_reference(log2_width, refs); + } + if (mode == 0) { - kvz_intra_get_planar_pred(ref_pixels, recstride, width, dst, width); + intra_pred_planar(log2_width, used_ref->top, used_ref->left, dst); } else if (mode == 1) { - int i; - kvz_pixel val = kvz_intra_get_dc_pred(ref_pixels, recstride, width); - for (i = 0; i < width * width; i++) { - dst[i] = val; - } // Do extra post filtering for edge pixels of luma DC mode. - if (!is_chroma && width < 32) { - kvz_intra_dc_pred_filtering(ref_pixels, recstride, dst, width, width, width); + if (color == COLOR_Y && width < 32) { + intra_pred_filtered_dc(log2_width, used_ref->top, used_ref->left, dst); + } else { + intra_pred_dc(log2_width, used_ref->top, used_ref->left, dst); } } else { - int filter = !is_chroma && width < 32; - kvz_intra_get_angular_pred(encoder, ref_pixels, recstride, dst, width, width, mode, filter); + kvz_angular_pred(log2_width, mode, used_ref->top, used_ref->left, dst); + if (color == COLOR_Y && width < 32) { + if (mode == 10) { + intra_post_process_angular(width, 1, used_ref->top, dst); + } else if (mode == 26) { + intra_post_process_angular(width, width, used_ref->left, dst); + } + } } } - -/** - * \brief Reconstruct intra block according to prediction - * \param rec reconstructed picture data - * \param recstride reconstructed picture stride - * \param width block size to predict - * \param dst destination buffer for best prediction - * \param dststride destination width - * \param mode intra mode to use - * \param chroma chroma-block flag - -*/ -void kvz_intra_recon(const encoder_control_t * const encoder, kvz_pixel* rec, int32_t recstride, uint32_t width, kvz_pixel* dst, int32_t dststride, int8_t mode, int8_t chroma) +void kvz_intra_build_reference( + const int_fast8_t log2_width, + const color_t color, + const vector2d_t *const luma_px, + const vector2d_t *const pic_px, + const lcu_t *const lcu, + kvz_intra_references *const refs) { - kvz_pixel pred[LCU_WIDTH * LCU_WIDTH]; - kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; - kvz_pixel *recf = &rec_filtered_temp[recstride + 1]; - - // Generate filtered reference pixels. - { - int x, y; - for (y = -1; y < recstride; y++) { - recf[y*recstride - 1] = rec[y*recstride - 1]; - } - for (x = 0; x < recstride; x++) { - recf[x - recstride] = rec[x - recstride]; - } - kvz_intra_filter(recf, recstride, width, 0); - } - - kvz_intra_get_pred(encoder, rec, recf, recstride, pred, width, mode, chroma); - - kvz_pixels_blit(pred, dst, width, width, width, dststride); -} + assert(log2_width >= 2 && log2_width <= 5); -/** - * \brief Build top and left borders for a reference block. - * \param pic picture to use as a source - * \param outwidth width of the prediction block - * \param chroma signaling if chroma is used, 0 = luma, 1 = U and 2 = V - * - * The end result is 2*width+8 x 2*width+8 array, with only the top and left - * edge pixels filled with the reconstructed pixels. - */ -void kvz_intra_build_reference_border(const encoder_control_t * const encoder, int32_t x_luma, int32_t y_luma, int16_t out_width, - kvz_pixel *dst, int32_t dst_stride, int8_t chroma, - int32_t pic_width, int32_t pic_height, - lcu_t *lcu) -{ - // Some other function might make use of the arrays num_ref_pixels_top and - // num_ref_pixels_left in the future, but until that happens lets leave - // them here. - - /** - * \brief Table for looking up the number of intra reference pixels based on - * prediction units coordinate within an LCU. - * - * This table was generated by "tools/generate_ref_pixel_tables.py". - */ + // Tables for looking up the number of intra reference pixels based on + // prediction units coordinate within an LCU. + // generated by "tools/generate_ref_pixel_tables.py". static const uint8_t num_ref_pixels_top[16][16] = { { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 }, { 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, @@ -336,329 +341,149 @@ { 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 }, { 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 } }; - - /** - * \brief Table for looking up the number of intra reference pixels based on - * prediction units coordinate within an LCU. - * - * This table was generated by "tools/generate_ref_pixel_tables.py". - */ static const uint8_t num_ref_pixels_left[16][16] = { { 64, 4, 8, 4, 16, 4, 8, 4, 32, 4, 8, 4, 16, 4, 8, 4 }, - { 64, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 }, - { 64, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 }, - { 64, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 }, - { 64, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 }, - { 64, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 }, - { 64, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, - { 64, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, - { 64, 4, 8, 4, 16, 4, 8, 4, 32, 4, 8, 4, 16, 4, 8, 4 }, - { 64, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 }, - { 64, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 }, - { 64, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 }, - { 64, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 }, - { 64, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 }, - { 64, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, - { 64, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } + { 60, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 }, + { 56, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 }, + { 52, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 }, + { 48, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 }, + { 44, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 }, + { 40, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, + { 36, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, + { 32, 4, 8, 4, 16, 4, 8, 4, 32, 4, 8, 4, 16, 4, 8, 4 }, + { 28, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 }, + { 24, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 }, + { 20, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 }, + { 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 }, + { 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 }, + { 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, + { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } }; - const kvz_pixel dc_val = 1 << (encoder->bitdepth - 1); - const int is_chroma = chroma ? 1 : 0; + refs->filtered_initialized = false; + kvz_pixel *out_left_ref = &refs->ref.left[0]; + kvz_pixel *out_top_ref = &refs->ref.top[0]; - // input picture pointer - //const pixel * const src = (!chroma) ? pic->y_recdata : ((chroma == 1) ? pic->u_recdata : pic->v_recdata); + const kvz_pixel dc_val = 1 << (KVZ_BIT_DEPTH - 1); + const int is_chroma = color != COLOR_Y ? 1 : 0; + const int_fast8_t width = 1 << log2_width; // Convert luma coordinates to chroma coordinates for chroma. - const int x = chroma ? x_luma / 2 : x_luma; - const int y = chroma ? y_luma / 2 : y_luma; - - const int y_in_lcu = y_luma % LCU_WIDTH; - const int x_in_lcu = x_luma % LCU_WIDTH; - - int x_local = (x_luma&0x3f)>>is_chroma, y_local = (y_luma&0x3f)>>is_chroma; - - kvz_pixel *left_ref = !chroma ? &lcu->left_ref.y[1] : (chroma == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1]; - kvz_pixel *top_ref = !chroma ? &lcu->top_ref.y[1] : (chroma == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1]; - kvz_pixel *rec_ref = !chroma ? lcu->rec.y : (chroma == 1) ? lcu->rec.u : lcu->rec.v; - - kvz_pixel *left_border = &left_ref[y_local]; - kvz_pixel *top_border = &top_ref[x_local]; - uint32_t left_stride = 1; + const vector2d_t lcu_px = { + luma_px->x % LCU_WIDTH, + luma_px->y % LCU_WIDTH + }; + const vector2d_t px = { + lcu_px.x >> is_chroma, + lcu_px.y >> is_chroma, + }; - if(x_local) { - left_border = &rec_ref[x_local - 1 + y_local * (LCU_WIDTH>>is_chroma)]; - left_stride = LCU_WIDTH>>is_chroma; - } + // Init pointers to LCUs reconstruction buffers, such that index 0 refers to block coordinate 0. + const kvz_pixel *left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1]; + const kvz_pixel *top_ref = !color ? &lcu->top_ref.y[1] : (color == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1]; + const kvz_pixel *rec_ref = !color ? lcu->rec.y : (color == 1) ? lcu->rec.u : lcu->rec.v; - if(y_local) { - top_border = &rec_ref[x_local + (y_local - 1) * (LCU_WIDTH>>is_chroma)]; + // Init top borders pointer to point to the correct place in the correct reference array. + const kvz_pixel *top_border; + if (px.y) { + top_border = &rec_ref[px.x + (px.y - 1) * (LCU_WIDTH >> is_chroma)]; + } else { + top_border = &top_ref[px.x]; } - // Copy pixels for left edge. - if (x > 0) { - // Get the number of reference pixels based on the PU coordinate within the LCU. - int num_ref_pixels = num_ref_pixels_left[y_in_lcu / 4][x_in_lcu / 4] >> is_chroma; - int i; - kvz_pixel nearest_pixel; - - // Max pixel we can copy from src is yy + outwidth - 1 because the dst - // extends one pixel to the left. - num_ref_pixels = MIN(num_ref_pixels, out_width - 1); - // There are no coded pixels below the frame. - num_ref_pixels = MIN(num_ref_pixels, pic_height - y); - // There are no coded pixels below the bottom of the LCU due to raster - // scan order. - num_ref_pixels = MIN(num_ref_pixels, (LCU_WIDTH - y_in_lcu) >> is_chroma); - - // Copy pixels from coded CUs. - for (i = 0; i < num_ref_pixels; ++i) { - dst[(i + 1) * dst_stride] = left_border[i*left_stride]; - } - // Extend the last pixel for the rest of the reference values. - nearest_pixel = dst[i * dst_stride]; - for (i = num_ref_pixels; i < out_width - 1; ++i) { - dst[i * dst_stride] = nearest_pixel; - } + // Init left borders pointer to point to the correct place in the correct reference array. + const kvz_pixel *left_border; + int left_stride; // Distance between reference samples. + if (px.x) { + left_border = &rec_ref[px.x - 1 + px.y * (LCU_WIDTH >> is_chroma)]; + left_stride = LCU_WIDTH >> is_chroma; } else { - // If we are on the left edge, extend the first pixel of the top row. - kvz_pixel nearest_pixel = y > 0 ? top_border[0] : dc_val; - int i; - for (i = 1; i < out_width - 1; i++) { - dst[i * dst_stride] = nearest_pixel; - } + left_border = &left_ref[px.y]; + left_stride = 1; } - // Copy pixels for top edge. - if (y > 0) { + // Generate left reference. + if (luma_px->x > 0) { // Get the number of reference pixels based on the PU coordinate within the LCU. - int num_ref_pixels = num_ref_pixels_top[y_in_lcu / 4][x_in_lcu / 4] >> is_chroma; - int i; - kvz_pixel nearest_pixel; + int px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma; - // Max pixel we can copy from src is yy + outwidth - 1 because the dst - // extends one pixel to the left. - num_ref_pixels = MIN(num_ref_pixels, out_width - 1); - // All LCUs in the row above have been coded. - num_ref_pixels = MIN(num_ref_pixels, pic_width - x); + // Limit the number of available pixels based on block size and dimensions + // of the picture. + px_available_left = MIN(px_available_left, width * 2); + px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma); // Copy pixels from coded CUs. - for (i = 0; i < num_ref_pixels; ++i) { - dst[i + 1] = top_border[i]; + for (int i = 0; i < px_available_left; ++i) { + out_left_ref[i + 1] = left_border[i * left_stride]; } // Extend the last pixel for the rest of the reference values. - nearest_pixel = top_border[num_ref_pixels - 1]; - for (; i < out_width - 1; ++i) { - dst[i + 1] = nearest_pixel; + kvz_pixel nearest_pixel = out_left_ref[px_available_left]; + for (int i = px_available_left; i < width * 2; ++i) { + out_left_ref[i + 1] = nearest_pixel; } } else { - // Extend nearest pixel. - kvz_pixel nearest_pixel = x > 0 ? left_border[0] : dc_val; - int i; - for(i = 1; i < out_width; i++) - { - dst[i] = nearest_pixel; + // If we are on the left edge, extend the first pixel of the top row. + kvz_pixel nearest_pixel = luma_px->y > 0 ? top_border[0] : dc_val; + for (int i = 0; i < width * 2; i++) { + out_left_ref[i + 1] = nearest_pixel; } } - // If top-left corner sample doesn't exist, use the sample from below. - // Unavailable samples on the left boundary are copied from below if - // available. This is the only place they are available because we don't - // support constrained intra prediction. - if (x > 0 && y > 0) { - // Make sure we always take the top-left pixel from the LCU reference - // pixel arrays if they are available. - if (x_local == 0) { - dst[0] = left_border[-1]; + // Generate top-left reference. + if (luma_px->x > 0 && luma_px->y > 0) { + // If the block is at an LCU border, the top-left must be copied from + // the border that points to the LCUs 1D reference buffer. + if (px.x == 0) { + out_left_ref[0] = left_border[-1 * left_stride]; + out_top_ref[0] = left_border[-1 * left_stride]; } else { - dst[0] = top_border[-1]; + out_left_ref[0] = top_border[-1]; + out_top_ref[0] = top_border[-1]; } } else { - dst[0] = dst[dst_stride]; + // Copy reference clockwise. + out_left_ref[0] = out_left_ref[1]; + out_top_ref[0] = out_left_ref[1]; } -} - -const int32_t kvz_ang_table[9] = {0, 2, 5, 9, 13, 17, 21, 26, 32}; -const int32_t kvz_inv_ang_table[9] = {0, 4096, 1638, 910, 630, 482, 390, 315, 256}; // (256 * 32) / Angle -/** - * \brief this functions constructs the angular intra prediction from border samples - * - */ -void kvz_intra_get_angular_pred(const encoder_control_t * const encoder, const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter) -{ - int32_t k,l; - int32_t blk_size = width; - - // Map the mode index to main prediction direction and angle - bool mode_ver = dir_mode >= 18; - int32_t intra_pred_angle = mode_ver ? dir_mode - 26 : 10 - dir_mode; - int32_t abs_ang = abs(intra_pred_angle); - int32_t sign_ang = intra_pred_angle < 0 ? -1 : 1; - - // Set bitshifts and scale the angle parameter to block size - int32_t inv_angle = kvz_inv_ang_table[abs_ang]; - - // Do angular predictions - kvz_pixel *ref_main; - kvz_pixel *ref_side; - kvz_pixel ref_above[2 * LCU_WIDTH + 1]; - kvz_pixel ref_left[2 * LCU_WIDTH + 1]; - - // Tell clang-analyzer that everything is ok. - assert(width == 4 || width == 8 || width == 16 || width == 32); - - abs_ang = kvz_ang_table[abs_ang]; - intra_pred_angle = sign_ang * abs_ang; - - // Initialise the Main and Left reference array. - if (intra_pred_angle < 0) { - int32_t invAngleSum = 128; // rounding for (shift by 8) - for (k = 0; k < blk_size + 1; k++) { - ref_above[k + blk_size - 1] = src[k - src_stride - 1]; - ref_left[k + blk_size - 1] = src[(k - 1) * src_stride - 1]; - } - - ref_main = (mode_ver ? ref_above : ref_left) + (blk_size - 1); - ref_side = (mode_ver ? ref_left : ref_above) + (blk_size - 1); + // Generate top reference. + if (luma_px->y > 0) { + // Get the number of reference pixels based on the PU coordinate within the LCU. + int px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma; - // Extend the Main reference to the left. - for (k = -1; k > blk_size * intra_pred_angle>>5; k--) { - invAngleSum += inv_angle; - ref_main[k] = ref_side[invAngleSum>>8]; - } - } else { - for (k = 0; k < 2 * blk_size + 1; k++) { - ref_above[k] = src[k - src_stride - 1]; - ref_left[k] = src[(k - 1) * src_stride - 1]; - } - ref_main = mode_ver ? ref_above : ref_left; - ref_side = mode_ver ? ref_left : ref_above; - } + // Limit the number of available pixels based on block size and dimensions + // of the picture. + px_available_top = MIN(px_available_top, width * 2); + px_available_top = MIN(px_available_top, (pic_px->x - luma_px->x) >> is_chroma); - if (intra_pred_angle == 0) { - for (k = 0; k < blk_size; k++) { - for (l = 0; l < blk_size; l++) { - dst[k * dst_stride + l] = ref_main[l + 1]; - } + // Copy all the pixels we can. + for (int i = 0; i < px_available_top; ++i) { + out_top_ref[i + 1] = top_border[i]; } - - if (filter) { - for (k=0;k<blk_size;k++) { - dst[k * dst_stride] = CLIP(0, (1<<encoder->bitdepth) - 1, dst[k * dst_stride] + (( ref_side[k + 1] - ref_side[0]) >> 1)); - } + // Extend the last pixel for the rest of the reference values. + kvz_pixel nearest_pixel = top_border[px_available_top - 1]; + for (int i = px_available_top; i < width * 2; ++i) { + out_top_ref[i + 1] = nearest_pixel; } } else { - int32_t delta_pos=0; - int32_t delta_int; - int32_t delta_fract; - int32_t minus_delta_fract; - int32_t ref_main_index; - for (k = 0; k < blk_size; k++) { - delta_pos += intra_pred_angle; - delta_int = delta_pos >> 5; - delta_fract = delta_pos & (32 - 1); - - - if (delta_fract) { - minus_delta_fract = (32 - delta_fract); - // Do linear filtering - for (l = 0; l < blk_size; l++) { - ref_main_index = l + delta_int + 1; - dst[k * dst_stride + l] = (kvz_pixel) ( (minus_delta_fract * ref_main[ref_main_index] - + delta_fract * ref_main[ref_main_index + 1] + 16) >> 5); - } - } else { - // Just copy the integer samples - for (l = 0; l < blk_size; l++) { - dst[k * dst_stride + l] = ref_main[l + delta_int + 1]; - } - } - } - } - - // Flip the block if this is the horizontal mode - if (!mode_ver) { - kvz_pixel tmp; - for (k=0;k<blk_size-1;k++) { - for (l=k+1;l<blk_size;l++) { - tmp = dst[k * dst_stride + l]; - dst[k * dst_stride + l] = dst[l * dst_stride + k]; - dst[l * dst_stride + k] = tmp; - } + // Extend nearest pixel. + kvz_pixel nearest_pixel = luma_px->x > 0 ? left_border[0] : dc_val; + for (int i = 0; i < width * 2; i++) { + out_top_ref[i + 1] = nearest_pixel; } } } - - -void kvz_intra_dc_pred_filtering(const kvz_pixel *src, int32_t src_stride, kvz_pixel *dst, int32_t dst_stride, int32_t width, int32_t height ) +void kvz_intra_recon_lcu_luma( + encoder_state_t *const state, + int x, + int y, + int depth, + int8_t intra_mode, + cu_info_t *cur_cu, + lcu_t *lcu) { - int32_t x, y, dst_stride2, src_stride2; - - // boundary pixels processing - dst[0] = ((src[-src_stride] + src[-1] + 2 * dst[0] + 2) >> 2); - - for (x = 1; x < width; x++) { - dst[x] = ((src[x - src_stride] + 3 * dst[x] + 2) >> 2); - } - for ( y = 1, dst_stride2 = dst_stride, src_stride2 = src_stride-1; - y < height; y++, dst_stride2+=dst_stride, src_stride2+=src_stride ) { - dst[dst_stride2] = ((src[src_stride2] + 3 * dst[dst_stride2] + 2) >> 2); - } - return; -} - -/** - * \brief Function for deriving planar intra prediction. - * \param src source pixel array - * \param srcstride source width - * \param width block size to predict - * \param dst destination buffer for prediction - * \param dststride destination width - - This function derives the prediction samples for planar mode (intra coding). -*/ -void kvz_intra_get_planar_pred(const kvz_pixel* src, int32_t srcstride, uint32_t width, kvz_pixel* dst, int32_t dststride) -{ - int32_t k, l, bottom_left, top_right; - int32_t hor_pred; - int32_t left_column[LCU_WIDTH+1], top_row[LCU_WIDTH+1], bottom_row[LCU_WIDTH+1], right_column[LCU_WIDTH+1]; - uint32_t blk_size = width; - uint32_t offset_2d = width; - uint32_t shift_1d = kvz_g_convert_to_bit[ width ] + 2; - uint32_t shift_2d = shift_1d + 1; - - // Get left and above reference column and row - for (k = 0; k < (int32_t)blk_size + 1; k++) { - top_row[k] = src[k - srcstride]; - left_column[k] = src[k * srcstride - 1]; - } - - // Prepare intermediate variables used in interpolation - bottom_left = left_column[blk_size]; - top_right = top_row[blk_size]; - for (k = 0; k < (int32_t)blk_size; k++) { - bottom_row[k] = bottom_left - top_row[k]; - right_column[k] = top_right - left_column[k]; - top_row[k] <<= shift_1d; - left_column[k] <<= shift_1d; - } - - // Generate prediction signal - for (k = 0; k < (int32_t)blk_size; k++) { - hor_pred = left_column[k] + offset_2d; - for (l = 0; l < (int32_t)blk_size; l++) { - hor_pred += right_column[k]; - top_row[l] += bottom_row[l]; - dst[k * dststride + l] = (kvz_pixel)((hor_pred + top_row[l]) >> shift_2d); - } - } -} - -void kvz_intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu) -{ - const encoder_control_t * const encoder = state->encoder_control; const vector2d_t lcu_px = { x & 0x3f, y & 0x3f }; if (cur_cu == NULL) { cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH]; @@ -684,30 +509,33 @@ return; } - { - const uint32_t pic_width = state->tile->frame->width; - const uint32_t pic_height = state->tile->frame->height; - - // Pointers to reconstruction arrays - kvz_pixel *recbase_y = &lcu->rec.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; - kvz_pixel rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; - kvz_pixel *rec_shift = &rec[width * 2 + 8 + 1]; + // Perform intra prediction and put the result in correct place lcu. + vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height }; + vector2d_t luma_px = { x, y }; + kvz_intra_references refs; + const int_fast8_t log2_width = kvz_g_convert_to_bit[width] + 2; + kvz_intra_build_reference(log2_width, COLOR_Y, &luma_px, &pic_px, lcu, &refs); - int32_t rec_stride = LCU_WIDTH; + kvz_pixel pred[32 * 32]; + kvz_intra_predict(&refs, log2_width, intra_mode, COLOR_Y, pred); + + kvz_pixel *block_in_lcu = &lcu->rec.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; + kvz_pixels_blit(pred, block_in_lcu, width, width, width, LCU_WIDTH); - kvz_intra_build_reference_border(encoder, x, y,(int16_t)width * 2 + 8, rec, (int16_t)width * 2 + 8, 0, - pic_width, pic_height, lcu); - kvz_intra_recon(encoder, rec_shift, width * 2 + 8, - width, recbase_y, rec_stride, intra_mode, 0); - - kvz_quantize_lcu_luma_residual(state, x, y, depth, cur_cu, lcu); - } + kvz_quantize_lcu_luma_residual(state, x, y, depth, cur_cu, lcu); } -void kvz_intra_recon_lcu_chroma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu) + +void kvz_intra_recon_lcu_chroma( + encoder_state_t *const state, + int x, + int y, + int depth, + int8_t intra_mode, + cu_info_t *cur_cu, + lcu_t *lcu) { - const encoder_control_t * const encoder = state->encoder_control; const vector2d_t lcu_px = { x & 0x3f, y & 0x3f }; const int8_t width = LCU_WIDTH >> depth; const int8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2); @@ -739,44 +567,35 @@ return; } - { - const uint32_t pic_width = state->tile->frame->width; - const uint32_t pic_height = state->tile->frame->height; - - // Pointers to reconstruction arrays - kvz_pixel *recbase_u = &lcu->rec.u[lcu_px.x/2 + (lcu_px.y * LCU_WIDTH)/4]; - kvz_pixel *recbase_v = &lcu->rec.v[lcu_px.x/2 + (lcu_px.y * LCU_WIDTH)/4]; - - kvz_pixel rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; - - int32_t rec_stride = LCU_WIDTH; - - // Reconstruct chroma. - if (!(x & 4 || y & 4)) { - kvz_pixel *rec_shift_c = &rec[width_c * 2 + 8 + 1]; - kvz_intra_build_reference_border(encoder, x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 1, - pic_width/2, pic_height/2, lcu); - kvz_intra_recon(encoder, - rec_shift_c, - width_c * 2 + 8, - width_c, - recbase_u, - rec_stride >> 1, - intra_mode, - 1); - - kvz_intra_build_reference_border(encoder, x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 2, - pic_width/2, pic_height/2, lcu); - kvz_intra_recon(encoder, - rec_shift_c, - width_c * 2 + 8, - width_c, - recbase_v, - rec_stride >> 1, - intra_mode, - 2); - - kvz_quantize_lcu_chroma_residual(state, x, y, depth, cur_cu, lcu); + if (!(x & 4 || y & 4)) { + const int_fast8_t log2_width_c = kvz_g_convert_to_bit[width_c] + 2; + const vector2d_t luma_px = { x, y }; + const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height }; + + // Intra predict U-plane and put the result in lcu buffer. + { + kvz_intra_references refs; + kvz_intra_build_reference(log2_width_c, COLOR_U, &luma_px, &pic_px, lcu, &refs); + + kvz_pixel pred[32 * 32]; + kvz_intra_predict(&refs, log2_width_c, intra_mode, COLOR_U, pred); + + kvz_pixel *pu_in_lcu = &lcu->rec.u[lcu_px.x / 2 + (lcu_px.y * LCU_WIDTH) / 4]; + kvz_pixels_blit(pred, pu_in_lcu, width_c, width_c, width_c, LCU_WIDTH_C); } + + // Intra predict V-plane and put the result in lcu buffer. + { + kvz_intra_references refs; + kvz_intra_build_reference(log2_width_c, COLOR_V, &luma_px, &pic_px, lcu, &refs); + + kvz_pixel pred[32 * 32]; + kvz_intra_predict(&refs, log2_width_c, intra_mode, COLOR_V, pred); + + kvz_pixel *pu_in_lcu = &lcu->rec.v[lcu_px.x / 2 + (lcu_px.y * LCU_WIDTH) / 4]; + kvz_pixels_blit(pred, pu_in_lcu, width_c, width_c, width_c, LCU_WIDTH_C); + } + + kvz_quantize_lcu_chroma_residual(state, x, y, depth, cur_cu, lcu); } }
View file
kvazaar-0.7.0.tar.gz/src/intra.h -> kvazaar-0.7.1.tar.gz/src/intra.h
Changed
@@ -27,29 +27,91 @@ #include "global.h" -#include "image.h" -#include "encoder.h" #include "encoderstate.h" -//void kvz_intra_set_block_mode(image* im,uint32_t x_ctb, uint32_t y_ctb, uint8_t depth, uint8_t mode, uint8_t part_mode); +typedef struct { + kvz_pixel left[2 * 32 + 1]; + kvz_pixel top[2 * 32 + 1]; +} kvz_intra_ref; +typedef struct +{ + kvz_intra_ref ref; + kvz_intra_ref filtered_ref; + bool filtered_initialized; +} kvz_intra_references; -int8_t kvz_intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds, - const cu_info_t* cur_cu, const cu_info_t* left_cu, const cu_info_t* above_cu); -void kvz_intra_dc_pred_filtering(const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t height ); -void kvz_intra_build_reference_border(const encoder_control_t *encoder, int32_t x_luma, int32_t y_luma, int16_t out_width, kvz_pixel *dst, int32_t dst_stride, int8_t chroma, int32_t pic_width, int32_t pic_height, lcu_t *lcu); -void kvz_intra_filter(kvz_pixel* ref, int32_t stride, int32_t width, int8_t mode); +/** +* \brief Function for deriving intra luma predictions +* \param pic picture to use +* \param x_cu x CU position (smallest CU) +* \param y_cu y CU position (smallest CU) +* \param preds output buffer for 3 predictions +* \returns (predictions are found)?1:0 +*/ +int8_t kvz_intra_get_dir_luma_predictor( + const uint32_t x, + const uint32_t y, + int8_t *preds, + const cu_info_t *const cur_cu, + const cu_info_t *const left_cu, + const cu_info_t *const above_cu); -/* Predictions */ -void kvz_intra_get_pred(const encoder_control_t * const encoder, const kvz_pixel *rec, const kvz_pixel *rec_filtered, int recstride, kvz_pixel *dst, int width, int mode, int is_chroma); +/** +* \brief Generage angular predictions. +* \param width Width in pixels, range 4..32. +* \param color What color pixels to use. +* \param luma_px Luma coordinates of the prediction block. +* \param pic_px Picture dimensions in luma pixels. +* \param lcu LCU struct. +* \param out_left_ref Left reference pixels, index 0 is the top-left. +* \param out_top_ref Top reference pixels, index 0 is the top-left. +*/ +void kvz_intra_build_reference( + const int_fast8_t log2_width, + const color_t color, + const vector2d_t *const luma_px, + const vector2d_t *const pic_px, + const lcu_t *const lcu, + kvz_intra_references *const refs); -kvz_pixel kvz_intra_get_dc_pred(const kvz_pixel* pic, uint16_t pic_width, uint8_t width); -void kvz_intra_get_planar_pred(const kvz_pixel* src,int32_t srcstride, uint32_t width, kvz_pixel* dst, int32_t dststride); -void kvz_intra_get_angular_pred(const encoder_control_t *encoder, const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter); +/** + * \brief Generate intra predictions. + * \param refs Reference pixels used for the prediction. + * \param log2_width Width of the predicted block. + * \param mode Intra mode used for the prediction. + * \param color Color of the prediction. + * \param dst Buffer for the predicted pixels. + */ +void kvz_intra_predict( + kvz_intra_references *refs, + int_fast8_t log2_width, + int_fast8_t mode, + color_t color, + kvz_pixel *dst); -void kvz_intra_recon(const encoder_control_t *encoder, kvz_pixel* rec, int32_t rec_stride, uint32_t width, kvz_pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma); +/** + * \brief Do a full intra prediction cycle on a CU in lcu for luma. + */ +void kvz_intra_recon_lcu_luma( + encoder_state_t *const state, + int x, + int y, + int depth, + int8_t intra_mode, + cu_info_t *cur_cu, + lcu_t *lcu); -void kvz_intra_recon_lcu_luma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu); -void kvz_intra_recon_lcu_chroma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu); +/** +* \brief Do a full intra prediction cycle on a CU in lcu for chroma. +*/ +void kvz_intra_recon_lcu_chroma( + encoder_state_t *const state, + int x, + int y, + int depth, + int8_t intra_mode, + cu_info_t *cur_cu, + lcu_t *lcu); #endif
View file
kvazaar-0.7.0.tar.gz/src/kvazaar.c -> kvazaar-0.7.1.tar.gz/src/kvazaar.c
Changed
@@ -123,6 +123,27 @@ } +static int kvazaar_headers(kvz_encoder *enc, + kvz_data_chunk **data_out, + uint32_t *len_out) +{ + if (data_out) *data_out = NULL; + if (len_out) *len_out = 0; + + bitstream_t stream; + kvz_bitstream_init(&stream); + + kvz_encoder_state_write_parameter_sets(&stream, &enc->states[enc->cur_state_num]); + + // Get stream length before taking chunks since that clears the stream. + if (len_out) *len_out = kvz_bitstream_tell(&stream) / 8; + if (data_out) *data_out = kvz_bitstream_take_chunks(&stream); + + kvz_bitstream_finalize(&stream); + return 1; +} + + static int kvazaar_encode(kvz_encoder *enc, kvz_picture *pic_in, kvz_data_chunk **data_out, @@ -204,6 +225,7 @@ .encoder_open = kvazaar_open, .encoder_close = kvazaar_close, + .encoder_headers = kvazaar_headers, .encoder_encode = kvazaar_encode, };
View file
kvazaar-0.7.0.tar.gz/src/kvazaar.h -> kvazaar-0.7.1.tar.gz/src/kvazaar.h
Changed
@@ -106,7 +106,18 @@ { int32_t qp; /*!< \brief Quantization parameter */ int32_t intra_period; /*!< \brief the period of intra frames in stream */ - int32_t vps_period; /*!< \brief how often the vps is re-sent */ + + /** \brief How often the VPS, SPS and PPS are re-sent + * + * -1: never + * 0: first frame only + * 1: every intra frame + * 2: every other intra frame + * 3: every third intra frame + * and so on + */ + int32_t vps_period; + int32_t width; /*!< \brief frame width, must be a multiple of 8 */ int32_t height; /*!< \brief frame height, must be a multiple of 8 */ double framerate; /*!< \brief Input framerate */ @@ -410,6 +421,26 @@ void (*encoder_close)(kvz_encoder *encoder); /** + * \brief Get parameter sets. + * + * Encode the VPS, SPS and PPS. + * + * If data_out is set to non-NULL values, the caller is responsible for + * calling chunk_free on it. + * + * A null pointer may be passed in place of the parameter data_out or len_out + * to skip returning the corresponding value. + * + * \param encoder encoder + * \param data_out Returns the encoded parameter sets. + * \param len_out Returns number of bytes in the encoded data. + * \return 1 on success, 0 on error. + */ + int (*encoder_headers)(kvz_encoder *encoder, + kvz_data_chunk **data_out, + uint32_t *len_out); + + /** * \brief Encode one frame. * * Add pic_in to the encoding pipeline. If an encoded frame is ready, return
View file
kvazaar-0.7.0.tar.gz/src/kvazaar_version.h -> kvazaar-0.7.1.tar.gz/src/kvazaar_version.h
Changed
@@ -21,6 +21,6 @@ ****************************************************************************/ // KVZ_API_VERSION is incremented every time the public api changes. -#define KVZ_API_VERSION 7 +#define KVZ_API_VERSION 8 #endif // KVAZAAR_VERSION_H_
View file
kvazaar-0.7.0.tar.gz/src/rdo.c -> kvazaar-0.7.1.tar.gz/src/rdo.c
Changed
@@ -31,6 +31,7 @@ #include "context.h" #include "cabac.h" #include "transform.h" +#include "strategies/strategies-quant.h" #define QUANT_SHIFT 14
View file
kvazaar-0.7.0.tar.gz/src/search_intra.c -> kvazaar-0.7.1.tar.gz/src/search_intra.c
Changed
@@ -271,14 +271,14 @@ static void search_intra_chroma_rough(encoder_state_t * const state, int x_px, int y_px, int depth, const kvz_pixel *orig_u, const kvz_pixel *orig_v, int16_t origstride, - const kvz_pixel *rec_u, const kvz_pixel *rec_v, int16_t recstride, + kvz_intra_references *refs_u, kvz_intra_references *refs_v, int8_t luma_mode, int8_t modes[5], double costs[5]) { - const bool reconstruct_chroma = !(x_px & 4 || y_px & 4); - if (!reconstruct_chroma) return; + assert(!(x_px & 4 || y_px & 4)); const unsigned width = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH); + const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - (depth + 1), 2); for (int i = 0; i < 5; ++i) { costs[i] = 0; @@ -287,16 +287,16 @@ cost_pixel_nxn_func *const satd_func = kvz_pixels_get_satd_func(width); //cost_pixel_nxn_func *const sad_func = kvz_pixels_get_sad_func(width); - kvz_pixel _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; + kvz_pixel _pred[32 * 32 + SIMD_ALIGNMENT]; kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); - kvz_pixel _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; + kvz_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT]; kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width); for (int i = 0; i < 5; ++i) { if (modes[i] == luma_mode) continue; - kvz_intra_get_pred(state->encoder_control, rec_u, NULL, recstride, pred, width, modes[i], 1); + kvz_intra_predict(refs_u, log2_width_c, modes[i], COLOR_U, pred); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); costs[i] += satd_func(pred, orig_block); } @@ -304,7 +304,7 @@ kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width); for (int i = 0; i < 5; ++i) { if (modes[i] == luma_mode) continue; - kvz_intra_get_pred(state->encoder_control, rec_v, NULL, recstride, pred, width, modes[i], 2); + kvz_intra_predict(refs_v, log2_width_c, modes[i], COLOR_V, pred); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); costs[i] += satd_func(pred, orig_block); } @@ -343,41 +343,25 @@ */ static int8_t search_intra_rough(encoder_state_t * const state, kvz_pixel *orig, int32_t origstride, - kvz_pixel *rec, int16_t recstride, - int width, int8_t *intra_preds, + kvz_intra_references *refs, + int log2_width, int8_t *intra_preds, int8_t modes[35], double costs[35]) { + assert(log2_width >= 2 && log2_width <= 5); + int_fast8_t width = 1 << log2_width; cost_pixel_nxn_func *satd_func = kvz_pixels_get_satd_func(width); cost_pixel_nxn_func *sad_func = kvz_pixels_get_sad_func(width); // Temporary block arrays - kvz_pixel _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; + kvz_pixel _pred[32 * 32 + SIMD_ALIGNMENT]; kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); - kvz_pixel _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; + kvz_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT]; kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); - - kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; - - kvz_pixel *recf = &rec_filtered_temp[recstride + 1]; - - assert(width == 4 || width == 8 || width == 16 || width == 32); // Store original block for SAD computation kvz_pixels_blit(orig, orig_block, width, width, origstride, width); - // Generate filtered reference pixels. - { - int16_t x, y; - for (y = -1; y < recstride; y++) { - recf[y*recstride - 1] = rec[y*recstride - 1]; - } - for (x = 0; x < recstride; x++) { - recf[x - recstride] = rec[x - recstride]; - } - kvz_intra_filter(recf, recstride, width, 0); - } - int8_t modes_selected = 0; unsigned min_cost = UINT_MAX; unsigned max_cost = 0; @@ -387,18 +371,15 @@ int offset; if (state->encoder_control->full_intra_search) { offset = 1; - } else if (width == 4) { - offset = 2; - } else if (width == 8) { - offset = 4; } else { - offset = 8; + static const int8_t offsets[4] = { 2, 4, 8, 8 }; + offset = offsets[log2_width - 2]; } // Calculate SAD for evenly spaced modes to select the starting point for // the recursive search. for (int mode = 2; mode <= 34; mode += offset) { - kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0); + kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred); costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width); modes[modes_selected] = mode; @@ -421,7 +402,7 @@ int8_t center_node = best_mode; int8_t mode = center_node - offset; if (mode >= 2) { - kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0); + kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred); costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width); modes[modes_selected] = mode; if (costs[modes_selected] < best_cost) { @@ -433,7 +414,7 @@ mode = center_node + offset; if (mode <= 34) { - kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0); + kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred); costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width); modes[modes_selected] = mode; if (costs[modes_selected] < best_cost) { @@ -460,7 +441,7 @@ } if (!has_mode) { - kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0); + kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred); costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width); modes[modes_selected] = mode; ++modes_selected; @@ -507,7 +488,6 @@ static int8_t search_intra_rdo(encoder_state_t * const state, int x_px, int y_px, int depth, kvz_pixel *orig, int32_t origstride, - kvz_pixel *rec, int16_t recstride, int8_t *intra_preds, int modes_to_check, int8_t modes[35], double costs[35], @@ -517,31 +497,14 @@ const int width = LCU_WIDTH >> depth; kvz_pixel orig_block[LCU_WIDTH * LCU_WIDTH + 1]; - int rdo_mode; - int pred_mode; - - kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; - kvz_pixel *recf = &rec_filtered_temp[recstride + 1]; - - // Generate filtered reference pixels. - { - int x, y; - for (y = -1; y < recstride; y++) { - recf[y*recstride - 1] = rec[y*recstride - 1]; - } - for (x = 0; x < recstride; x++) { - recf[x - recstride] = rec[x - recstride]; - } - kvz_intra_filter(recf, recstride, width, 0); - } kvz_pixels_blit(orig, orig_block, width, width, origstride, width); // Check that the predicted modes are in the RDO mode list if (modes_to_check < 35) { - for (pred_mode = 0; pred_mode < 3; pred_mode++) { + for (int pred_mode = 0; pred_mode < 3; pred_mode++) { int mode_found = 0; - for (rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode++) { + for (int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode++) { if (intra_preds[pred_mode] == modes[rdo_mode]) { mode_found = 1; break; @@ -555,42 +518,27 @@ } } - for(rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) { + for(int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) { int rdo_bitcost = kvz_luma_mode_bits(state, modes[rdo_mode], intra_preds); costs[rdo_mode] = rdo_bitcost * (int)(state->global->cur_lambda_cost + 0.5); -#if 0 - if (width != 4 && tr_depth == depth) { - // This code path has been disabled for now because it increases bdrate - // by 1-2 %. Possibly due to not taking chroma into account during luma - // mode search. Enabling separate chroma search compensates a little, - // but not enough. - - // The idea for this code path is, that it would do the same thing as - // the more general search_intra_trdepth, but would only handle cases - // where transform split or transform skip don't need to be handled. - kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, modes[rdo_mode], 0); - costs[rdo_mode] += kvz_rdo_cost_intra(state, pred, orig_block, width, modes[rdo_mode], width == 4 ? 1 : 0); - } else -#endif - { - // Perform transform split search and save mode RD cost for the best one. - cu_info_t pred_cu; - pred_cu.depth = depth; - pred_cu.type = CU_INTRA; - pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N); - pred_cu.intra[0].mode = modes[rdo_mode]; - pred_cu.intra[1].mode = modes[rdo_mode]; - pred_cu.intra[2].mode = modes[rdo_mode]; - pred_cu.intra[3].mode = modes[rdo_mode]; - pred_cu.intra[0].mode_chroma = modes[rdo_mode]; - FILL(pred_cu.cbf, 0); - - // Reset transform split data in lcu.cu for this area. - kvz_lcu_set_trdepth(lcu, x_px, y_px, depth, depth); - - double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[rdo_mode], MAX_INT, &pred_cu, lcu); - costs[rdo_mode] += mode_cost; - } + + // Perform transform split search and save mode RD cost for the best one. + cu_info_t pred_cu; + pred_cu.depth = depth; + pred_cu.type = CU_INTRA; + pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N); + pred_cu.intra[0].mode = modes[rdo_mode]; + pred_cu.intra[1].mode = modes[rdo_mode]; + pred_cu.intra[2].mode = modes[rdo_mode]; + pred_cu.intra[3].mode = modes[rdo_mode]; + pred_cu.intra[0].mode_chroma = modes[rdo_mode]; + FILL(pred_cu.cbf, 0); + + // Reset transform split data in lcu.cu for this area. + kvz_lcu_set_trdepth(lcu, x_px, y_px, depth, depth); + + double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[rdo_mode], MAX_INT, &pred_cu, lcu); + costs[rdo_mode] += mode_cost; } // The best transform split hierarchy is not saved anywhere, so to get the @@ -697,7 +645,6 @@ const int x_px, const int y_px, const int depth, lcu_t *lcu) { - const videoframe_t * const frame = state->tile->frame; const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f }; const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 }; const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH; @@ -726,23 +673,15 @@ // FIXME: It might make more sense to only disable rough search if // num_modes is 0.is 0. if (num_modes != 1 && num_modes != 5) { - kvz_pixel rec_u[(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8)]; - kvz_pixel rec_v[(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8)]; - - const int16_t width_c = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH); - const int16_t rec_stride = width_c * 2 + 8; - const int16_t out_stride = rec_stride; - - kvz_intra_build_reference_border(state->encoder_control, - x_px, y_px, out_stride, - rec_u, rec_stride, COLOR_U, - frame->width / 2, frame->height / 2, - lcu); - kvz_intra_build_reference_border(state->encoder_control, - x_px, y_px, out_stride, - rec_v, rec_stride, COLOR_V, - frame->width / 2, frame->height / 2, - lcu); + const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - depth - 1, 2); + const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height }; + const vector2d_t luma_px = { x_px, y_px }; + + kvz_intra_references refs_u; + kvz_intra_build_reference(log2_width_c, COLOR_U, &luma_px, &pic_px, lcu, &refs_u); + + kvz_intra_references refs_v; + kvz_intra_build_reference(log2_width_c, COLOR_V, &luma_px, &pic_px, lcu, &refs_v); vector2d_t lcu_cpx = { lcu_px.x / 2, lcu_px.y / 2 }; kvz_pixel *ref_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C]; @@ -750,7 +689,7 @@ search_intra_chroma_rough(state, x_px, y_px, depth, ref_u, ref_v, LCU_WIDTH_C, - &rec_u[rec_stride + 1], &rec_v[rec_stride + 1], rec_stride, + &refs_u, &refs_v, intra_mode, modes, costs); } @@ -771,16 +710,15 @@ const int x_px, const int y_px, const int depth, lcu_t *lcu) { - const videoframe_t * const frame = state->tile->frame; const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f }; const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 }; const int8_t cu_width = (LCU_WIDTH >> (depth)); const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH; + const int_fast8_t log2_width = LOG2_LCU_WIDTH - depth; cu_info_t *cur_cu = &lcu->cu[cu_index]; - kvz_pixel rec_buffer[(LCU_WIDTH * 2 + 1) * (LCU_WIDTH * 2 + 1)]; - kvz_pixel *cu_in_rec_buffer = &rec_buffer[cu_width * 2 + 8 + 1]; + kvz_intra_references refs; int8_t candidate_modes[3]; @@ -798,12 +736,9 @@ kvz_intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu); if (depth > 0) { - // Build reconstructed block to use in prediction with extrapolated borders - kvz_intra_build_reference_border(state->encoder_control, x_px, y_px, cu_width * 2 + 8, - rec_buffer, cu_width * 2 + 8, 0, - frame->width, - frame->height, - lcu); + const vector2d_t luma_px = { x_px, y_px }; + const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height }; + kvz_intra_build_reference(log2_width, COLOR_Y, &luma_px, &pic_px, lcu, &refs); } int8_t modes[35]; @@ -817,10 +752,10 @@ bool skip_rough_search = (depth == 0 || state->encoder_control->rdo >= 3); if (!skip_rough_search) { number_of_modes = search_intra_rough(state, - ref_pixels, LCU_WIDTH, - cu_in_rec_buffer, cu_width * 2 + 8, - cu_width, candidate_modes, - modes, costs); + ref_pixels, LCU_WIDTH, + &refs, + log2_width, candidate_modes, + modes, costs); } else { number_of_modes = 35; for (int i = 0; i < number_of_modes; ++i) { @@ -849,7 +784,6 @@ number_of_modes = search_intra_rdo(state, x_px, y_px, depth, ref_pixels, LCU_WIDTH, - cu_in_rec_buffer, cu_width * 2 + 8, candidate_modes, num_modes_to_check, modes, costs, lcu);
View file
kvazaar-0.7.1.tar.gz/src/strategies/avx2/intra-avx2.c
Added
@@ -0,0 +1,176 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +/* + * \file + */ + +#include <stdlib.h> + +#include "intra-avx2.h" +#include "strategyselector.h" + +#if COMPILE_INTEL_AVX2 +#include <immintrin.h> + + /** + * \brief Generage angular predictions. + * \param log2_width Log2 of width, range 2..5. + * \param intra_mode Angular mode in range 2..34. + * \param in_ref_above Pointer to -1 index of above reference, length=width*2+1. + * \param in_ref_left Pointer to -1 index of left reference, length=width*2+1. + * \param dst Buffer of size width*width. + */ +static void kvz_angular_pred_avx2( + const int_fast8_t log2_width, + const int_fast8_t intra_mode, + const kvz_pixel *const in_ref_above, + const kvz_pixel *const in_ref_left, + kvz_pixel *const dst) +{ + assert(log2_width >= 2 && log2_width <= 5); + assert(intra_mode >= 2 && intra_mode <= 34); + + static const int8_t modedisp2sampledisp[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 }; + static const int16_t modedisp2invsampledisp[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / sampledisp + + // Temporary buffer for modes 11-25. + // It only needs to be big enough to hold indices from -width to width-1. + kvz_pixel tmp_ref[2 * 32]; + const int_fast8_t width = 1 << log2_width; + + // Whether to swap references to always project on the left reference row. + const bool vertical_mode = intra_mode >= 18; + // Modes distance to horizontal or vertical mode. + const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode; + // Sample displacement per column in fractions of 32. + const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)]; + + // Pointer for the reference we are interpolating from. + const kvz_pixel *ref_main; + // Pointer for the other reference. + const kvz_pixel *ref_side; + + // Set ref_main and ref_side such that, when indexed with 0, they point to + // index 0 in block coordinates. + if (sample_disp < 0) { + // Negative sample_disp means, we need to use both references. + + ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; + ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; + + // Move the reference pixels to start from the middle to the later half of + // the tmp_ref, so there is room for negative indices. + for (int_fast8_t x = -1; x < width; ++x) { + tmp_ref[x + width] = ref_main[x]; + } + // Get a pointer to block index 0 in tmp_ref. + ref_main = &tmp_ref[width]; + + // Extend the side reference to the negative indices of main reference. + int_fast32_t col_sample_disp = 128; // rounding for the ">> 8" + int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)]; + int_fast8_t most_negative_index = (width * sample_disp) >> 5; + for (int_fast8_t x = -2; x >= most_negative_index; --x) { + col_sample_disp += inv_abs_sample_disp; + int_fast8_t side_index = col_sample_disp >> 8; + tmp_ref[x + width] = ref_side[side_index - 1]; + } + } + else { + // sample_disp >= 0 means we don't need to refer to negative indices, + // which means we can just use the references as is. + ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; + ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; + } + + if (sample_disp != 0) { + // The mode is not horizontal or vertical, we have to do interpolation. + + int_fast16_t delta_pos = 0; + for (int_fast8_t y = 0; y < width; ++y) { + delta_pos += sample_disp; + int_fast8_t delta_int = delta_pos >> 5; + int_fast8_t delta_fract = delta_pos & (32 - 1); + + if (delta_fract) { + // Do linear filtering + if (width < 8) { + for (int_fast8_t x = 0; x < width; ++x) { + kvz_pixel ref1 = ref_main[x + delta_int]; + kvz_pixel ref2 = ref_main[x + delta_int + 1]; + dst[y * width + x] = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5; + } + } else { + struct { uint8_t w1; uint8_t w2; } packed_weights = { 32 - delta_fract, delta_fract }; + __m128i v_weights = _mm_set1_epi16(*(int16_t*)&packed_weights); + + for (int_fast8_t x = 0; x < width; x += 8) { + __m128i v_ref1 = _mm_loadl_epi64((__m128i*)&(ref_main[x + delta_int])); + __m128i v_ref2 = _mm_loadl_epi64((__m128i*)&(ref_main[x + delta_int + 1])); + __m128i v_refs = _mm_unpacklo_epi8(v_ref1, v_ref2); + __m128i v_tmp = _mm_maddubs_epi16(v_refs, v_weights); + v_tmp = _mm_add_epi16(v_tmp, _mm_set1_epi16(16)); + v_tmp = _mm_srli_epi16(v_tmp, 5); + v_tmp = _mm_packus_epi16(v_tmp, v_tmp); + _mm_storel_epi64((__m128i*)(dst + y * width + x), v_tmp); + } + } + } + else { + // Just copy the integer samples + for (int_fast8_t x = 0; x < width; x+=4) { + *(int32_t*)(&dst[y * width + x]) = *(int32_t*)(&ref_main[x + delta_int]); + } + } + } + } + else { + // Mode is horizontal or vertical, just copy the pixels. + + for (int_fast8_t y = 0; y < width; ++y) { + for (int_fast8_t x = 0; x < width; x+=4) { + *(int32_t*)&(dst[y * width + x]) = *(int32_t*)&(ref_main[x]); + } + } + } + + // Flip the block if this is was a horizontal mode. + if (!vertical_mode) { + for (int_fast8_t y = 0; y < width - 1; ++y) { + for (int_fast8_t x = y + 1; x < width; ++x) { + SWAP(dst[y * width + x], dst[x * width + y], kvz_pixel); + } + } + } +} + +#endif //COMPILE_INTEL_AVX2 + +int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth) +{ + bool success = true; +#if COMPILE_INTEL_AVX2 + if (bitdepth == 8) { + success &= kvz_strategyselector_register(opaque, "angular_pred", "avx2", 40, &kvz_angular_pred_avx2); + } +#endif //COMPILE_INTEL_AVX2 + return success; +}
View file
kvazaar-0.7.1.tar.gz/src/strategies/avx2/intra-avx2.h
Added
@@ -0,0 +1,27 @@ +#ifndef STRATEGIES_INTRA_AVX2_H_ +#define STRATEGIES_INTRA_AVX2_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ +#include <stdint.h> +#include "encoderstate.h" + +int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth); + +#endif //STRATEGIES_INTRA_AVX2_H_
View file
kvazaar-0.7.0.tar.gz/src/strategies/avx2/picture-avx2.c -> kvazaar-0.7.1.tar.gz/src/strategies/avx2/picture-avx2.c
Changed
@@ -26,6 +26,7 @@ #if COMPILE_INTEL_AVX2 # include "image.h" +# include "strategies/strategies-common.h" # include <immintrin.h> @@ -136,6 +137,186 @@ return m256i_horizontal_sum(sum0); } +static unsigned satd_8bit_4x4_avx2(const kvz_pixel *org, const kvz_pixel *cur) +{ + + __m128i original = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)org)); + __m128i current = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)cur)); + + __m128i diff_lo = _mm_sub_epi16(current, original); + + original = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)(org + 8))); + current = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)(cur + 8))); + + __m128i diff_hi = _mm_sub_epi16(current, original); + + + //Hor + __m128i row0 = _mm_hadd_epi16(diff_lo, diff_hi); + __m128i row1 = _mm_hsub_epi16(diff_lo, diff_hi); + + __m128i row2 = _mm_hadd_epi16(row0, row1); + __m128i row3 = _mm_hsub_epi16(row0, row1); + + //Ver + row0 = _mm_hadd_epi16(row2, row3); + row1 = _mm_hsub_epi16(row2, row3); + + row2 = _mm_hadd_epi16(row0, row1); + row3 = _mm_hsub_epi16(row0, row1); + + //Abs and sum + row2 = _mm_abs_epi16(row2); + row3 = _mm_abs_epi16(row3); + + row3 = _mm_add_epi16(row2, row3); + + row3 = _mm_add_epi16(row3, _mm_shuffle_epi32(row3, KVZ_PERMUTE(2, 3, 0, 1) )); + row3 = _mm_add_epi16(row3, _mm_shuffle_epi32(row3, KVZ_PERMUTE(1, 0, 1, 0) )); + row3 = _mm_add_epi16(row3, _mm_shufflelo_epi16(row3, KVZ_PERMUTE(1, 0, 1, 0) )); + + unsigned sum = _mm_extract_epi16(row3, 0); + unsigned satd = (sum + 1) >> 1; + + return satd; +} + +static void hor_add_sub_avx2(__m128i *row0, __m128i *row1){ + + __m128i a = _mm_hadd_epi16(*row0, *row1); + __m128i b = _mm_hsub_epi16(*row0, *row1); + + __m128i c = _mm_hadd_epi16(a, b); + __m128i d = _mm_hsub_epi16(a, b); + + *row0 = _mm_hadd_epi16(c, d); + *row1 = _mm_hsub_epi16(c, d); +} + +static INLINE void ver_add_sub_avx2(__m128i temp_hor[8], __m128i temp_ver[8]){ + + // First stage + for (int i = 0; i < 8; i += 2){ + temp_ver[i+0] = _mm_hadd_epi16(temp_hor[i + 0], temp_hor[i + 1]); + temp_ver[i+1] = _mm_hsub_epi16(temp_hor[i + 0], temp_hor[i + 1]); + } + + // Second stage + for (int i = 0; i < 8; i += 4){ + temp_hor[i + 0] = _mm_add_epi16(temp_ver[i + 0], temp_ver[i + 2]); + temp_hor[i + 1] = _mm_add_epi16(temp_ver[i + 1], temp_ver[i + 3]); + temp_hor[i + 2] = _mm_sub_epi16(temp_ver[i + 0], temp_ver[i + 2]); + temp_hor[i + 3] = _mm_sub_epi16(temp_ver[i + 1], temp_ver[i + 3]); + } + + // Third stage + for (int i = 0; i < 4; ++i){ + temp_ver[i + 0] = _mm_add_epi16(temp_hor[0 + i], temp_hor[4 + i]); + temp_ver[i + 4] = _mm_sub_epi16(temp_hor[0 + i], temp_hor[4 + i]); + } +} + +INLINE static void haddwd_accumulate_avx2(__m128i *accumulate, __m128i *ver_row) +{ + __m128i abs_value = _mm_abs_epi16(*ver_row); + *accumulate = _mm_add_epi32(*accumulate, _mm_madd_epi16(abs_value, _mm_set1_epi16(1))); +} + +INLINE static unsigned sum_block_avx2(__m128i *ver_row) +{ + __m128i sad = _mm_setzero_si128(); + haddwd_accumulate_avx2(&sad, ver_row + 0); + haddwd_accumulate_avx2(&sad, ver_row + 1); + haddwd_accumulate_avx2(&sad, ver_row + 2); + haddwd_accumulate_avx2(&sad, ver_row + 3); + haddwd_accumulate_avx2(&sad, ver_row + 4); + haddwd_accumulate_avx2(&sad, ver_row + 5); + haddwd_accumulate_avx2(&sad, ver_row + 6); + haddwd_accumulate_avx2(&sad, ver_row + 7); + + sad = _mm_add_epi32(sad, _mm_shuffle_epi32(sad, KVZ_PERMUTE(2, 3, 0, 1))); + sad = _mm_add_epi32(sad, _mm_shuffle_epi32(sad, KVZ_PERMUTE(1, 0, 1, 0))); + + return _mm_cvtsi128_si32(sad); +} + +INLINE static __m128i diff_row_avx2(const kvz_pixel *buf1, const kvz_pixel *buf2) +{ + __m128i buf1_row = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)buf1)); + __m128i buf2_row = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)buf2)); + return _mm_sub_epi16(buf1_row, buf2_row); +} + +INLINE static void diff_blocks_and_hor_transform_avx2(__m128i row_diff[8], const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2) +{ + row_diff[0] = diff_row_avx2(buf1 + 0 * stride1, buf2 + 0 * stride2); + row_diff[1] = diff_row_avx2(buf1 + 1 * stride1, buf2 + 1 * stride2); + hor_add_sub_avx2(row_diff + 0, row_diff + 1); + + row_diff[2] = diff_row_avx2(buf1 + 2 * stride1, buf2 + 2 * stride2); + row_diff[3] = diff_row_avx2(buf1 + 3 * stride1, buf2 + 3 * stride2); + hor_add_sub_avx2(row_diff + 2, row_diff + 3); + + row_diff[4] = diff_row_avx2(buf1 + 4 * stride1, buf2 + 4 * stride2); + row_diff[5] = diff_row_avx2(buf1 + 5 * stride1, buf2 + 5 * stride2); + hor_add_sub_avx2(row_diff + 4, row_diff + 5); + + row_diff[6] = diff_row_avx2(buf1 + 6 * stride1, buf2 + 6 * stride2); + row_diff[7] = diff_row_avx2(buf1 + 7 * stride1, buf2 + 7 * stride2); + hor_add_sub_avx2(row_diff + 6, row_diff + 7); +} + +static unsigned kvz_satd_8bit_8x8_general_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2) +{ + __m128i temp_hor[8]; + __m128i temp_ver[8]; + + diff_blocks_and_hor_transform_avx2(temp_hor, buf1, stride1, buf2, stride2); + + ver_add_sub_avx2(temp_hor, temp_ver); + + unsigned sad = sum_block_avx2(temp_ver); + + unsigned result = (sad + 2) >> 2; + return result; +} + +// Function macro for defining hadamard calculating functions +// for fixed size blocks. They calculate hadamard for integer +// multiples of 8x8 with the 8x8 hadamard function. +#define SATD_NXN_AVX2(n) \ +static unsigned satd_8bit_ ## n ## x ## n ## _avx2( \ + const kvz_pixel * const block1, const kvz_pixel * const block2) \ +{ \ + unsigned x, y; \ + unsigned sum = 0; \ + for (y = 0; y < (n); y += 8) { \ + unsigned row = y * (n); \ + for (x = 0; x < (n); x += 8) { \ + sum += kvz_satd_8bit_8x8_general_avx2(&block1[row + x], (n), &block2[row + x], (n)); \ + } \ + } \ + return sum>>(KVZ_BIT_DEPTH-8); \ +} + +static unsigned satd_8bit_8x8_avx2( + const kvz_pixel * const block1, const kvz_pixel * const block2) +{ + unsigned x, y; + unsigned sum = 0; + for (y = 0; y < (8); y += 8) { + unsigned row = y * (8); + for (x = 0; x < (8); x += 8) { + sum += kvz_satd_8bit_8x8_general_avx2(&block1[row + x], (8), &block2[row + x], (8)); + } + } + return sum>>(KVZ_BIT_DEPTH-8); \ +} + +//SATD_NXN_AVX2(8) //Use the non-macro version +SATD_NXN_AVX2(16) +SATD_NXN_AVX2(32) +SATD_NXN_AVX2(64) #endif //COMPILE_INTEL_AVX2 @@ -153,6 +334,12 @@ success &= kvz_strategyselector_register(opaque, "sad_16x16", "avx2", 40, &sad_8bit_16x16_avx2); success &= kvz_strategyselector_register(opaque, "sad_32x32", "avx2", 40, &sad_8bit_32x32_avx2); success &= kvz_strategyselector_register(opaque, "sad_64x64", "avx2", 40, &sad_8bit_64x64_avx2); + + success &= kvz_strategyselector_register(opaque, "satd_4x4", "avx2", 40, &satd_8bit_4x4_avx2); + success &= kvz_strategyselector_register(opaque, "satd_8x8", "avx2", 40, &satd_8bit_8x8_avx2); + success &= kvz_strategyselector_register(opaque, "satd_16x16", "avx2", 40, &satd_8bit_16x16_avx2); + success &= kvz_strategyselector_register(opaque, "satd_32x32", "avx2", 40, &satd_8bit_32x32_avx2); + success &= kvz_strategyselector_register(opaque, "satd_64x64", "avx2", 40, &satd_8bit_64x64_avx2); } #endif return success;
View file
kvazaar-0.7.1.tar.gz/src/strategies/avx2/quant-avx2.c
Added
@@ -0,0 +1,220 @@ +/***************************************************************************** +* This file is part of Kvazaar HEVC encoder. +* +* Copyright (C) 2013-2015 Tampere University of Technology and others (see +* COPYING file). +* +* Kvazaar is free software: you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the +* Free Software Foundation; either version 2.1 of the License, or (at your +* option) any later version. +* +* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY +* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for +* more details. +* +* You should have received a copy of the GNU General Public License along +* with Kvazaar. If not, see <http://www.gnu.org/licenses/>. +****************************************************************************/ + +/* +* \file +*/ + +#include <stdlib.h> + +#include "quant-avx2.h" +#include "../generic/quant-generic.h" +#include "../strategies-common.h" +#include "strategyselector.h" +#include "encoder.h" +#include "transform.h" + +#if COMPILE_INTEL_AVX2 +#include <immintrin.h> + +/** +* \brief quantize transformed coefficents +* +*/ + +void kvz_quant_flat_avx2(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, + int32_t height, int8_t type, int8_t scan_idx, int8_t block_type) +{ + const encoder_control_t * const encoder = state->encoder_control; + const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2; + const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1]; + + int32_t qp_scaled = kvz_get_scaled_qp(type, state->global->QP, (encoder->bitdepth - 8) * 6); + const uint32_t log2_tr_size = kvz_g_convert_to_bit[width] + 2; + const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); + const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_size - 2][scalinglist_type][qp_scaled % 6]; + const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform + const int32_t q_bits = QUANT_SHIFT + qp_scaled / 6 + transform_shift; + const int32_t add = ((state->global->slicetype == KVZ_SLICE_I) ? 171 : 85) << (q_bits - 9); + const int32_t q_bits8 = q_bits - 8; + + assert(quant_coeff[0] <= (1 << 15) - 1 && quant_coeff[0] >= -(1 << 15)); //Assuming flat values to fit int16_t + + uint32_t ac_sum = 0; + + __m256i v_ac_sum = _mm256_setzero_si256(); + __m256i v_quant_coeff = _mm256_set1_epi16(quant_coeff[0]); + + for (int32_t n = 0; n < width * height; n += 16) { + + __m256i v_level = _mm256_loadu_si256((__m256i*)&(coef[n])); + __m256i v_sign = _mm256_cmpgt_epi16(_mm256_setzero_si256(), v_level); + v_sign = _mm256_or_si256(v_sign, _mm256_set1_epi16(1)); + + v_level = _mm256_abs_epi16(v_level); + __m256i low_a = _mm256_unpacklo_epi16(v_level, _mm256_set1_epi16(0)); + __m256i high_a = _mm256_unpackhi_epi16(v_level, _mm256_set1_epi16(0)); + + __m256i low_b = _mm256_unpacklo_epi16(v_quant_coeff, _mm256_set1_epi16(0)); + __m256i high_b = _mm256_unpackhi_epi16(v_quant_coeff, _mm256_set1_epi16(0)); + + __m256i v_level32_a = _mm256_madd_epi16(low_a, low_b); + __m256i v_level32_b = _mm256_madd_epi16(high_a, high_b); + + v_level32_a = _mm256_add_epi32(v_level32_a, _mm256_set1_epi32(add)); + v_level32_b = _mm256_add_epi32(v_level32_b, _mm256_set1_epi32(add)); + + v_level32_a = _mm256_srai_epi32(v_level32_a, q_bits); + v_level32_b = _mm256_srai_epi32(v_level32_b, q_bits); + + v_level = _mm256_packs_epi32(v_level32_a, v_level32_b); + v_level = _mm256_sign_epi16(v_level, v_sign); + + _mm256_storeu_si256((__m256i*)&(q_coef[n]), v_level); + + v_ac_sum = _mm256_add_epi32(v_ac_sum, v_level32_a); + v_ac_sum = _mm256_add_epi32(v_ac_sum, v_level32_b); + } + + __m128i temp = _mm_add_epi32(_mm256_castsi256_si128(v_ac_sum), _mm256_extracti128_si256(v_ac_sum, 1)); + temp = _mm_add_epi32(temp, _mm_shuffle_epi32(temp, KVZ_PERMUTE(2, 3, 0, 1))); + temp = _mm_add_epi32(temp, _mm_shuffle_epi32(temp, KVZ_PERMUTE(1, 0, 1, 0))); + ac_sum += _mm_cvtsi128_si32(temp); + + if (!(encoder->sign_hiding && ac_sum >= 2)) return; + + int32_t delta_u[LCU_WIDTH*LCU_WIDTH >> 2]; + + for (int32_t n = 0; n < width * height; n++) { + int32_t level; + level = coef[n]; + level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits; + delta_u[n] = (int32_t)(((int64_t)abs(coef[n]) * quant_coeff[n] - (level << q_bits)) >> q_bits8); + } + + if (ac_sum >= 2) { +#define SCAN_SET_SIZE 16 +#define LOG2_SCAN_SET_SIZE 4 + int32_t n, last_cg = -1, abssum = 0, subset, subpos; + for (subset = (width*height - 1) >> LOG2_SCAN_SET_SIZE; subset >= 0; subset--) { + int32_t first_nz_pos_in_cg = SCAN_SET_SIZE, last_nz_pos_in_cg = -1; + subpos = subset << LOG2_SCAN_SET_SIZE; + abssum = 0; + + // Find last coeff pos + for (n = SCAN_SET_SIZE - 1; n >= 0; n--) { + if (q_coef[scan[n + subpos]]) { + last_nz_pos_in_cg = n; + break; + } + } + + // First coeff pos + for (n = 0; n <SCAN_SET_SIZE; n++) { + if (q_coef[scan[n + subpos]]) { + first_nz_pos_in_cg = n; + break; + } + } + + // Sum all kvz_quant coeffs between first and last + for (n = first_nz_pos_in_cg; n <= last_nz_pos_in_cg; n++) { + abssum += q_coef[scan[n + subpos]]; + } + + if (last_nz_pos_in_cg >= 0 && last_cg == -1) { + last_cg = 1; + } + + if (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4) { + int32_t signbit = (q_coef[scan[subpos + first_nz_pos_in_cg]] > 0 ? 0 : 1); + if (signbit != (abssum & 0x1)) { // compare signbit with sum_parity + int32_t min_cost_inc = 0x7fffffff, min_pos = -1, cur_cost = 0x7fffffff; + int16_t final_change = 0, cur_change = 0; + for (n = (last_cg == 1 ? last_nz_pos_in_cg : SCAN_SET_SIZE - 1); n >= 0; n--) { + uint32_t blkPos = scan[n + subpos]; + if (q_coef[blkPos] != 0) { + if (delta_u[blkPos] > 0) { + cur_cost = -delta_u[blkPos]; + cur_change = 1; + } + else if (n == first_nz_pos_in_cg && abs(q_coef[blkPos]) == 1) { + cur_cost = 0x7fffffff; + } + else { + cur_cost = delta_u[blkPos]; + cur_change = -1; + } + } + else if (n < first_nz_pos_in_cg && ((coef[blkPos] >= 0) ? 0 : 1) != signbit) { + cur_cost = 0x7fffffff; + } + else { + cur_cost = -delta_u[blkPos]; + cur_change = 1; + } + + if (cur_cost < min_cost_inc) { + min_cost_inc = cur_cost; + final_change = cur_change; + min_pos = blkPos; + } + } // CG loop + + if (q_coef[min_pos] == 32767 || q_coef[min_pos] == -32768) { + final_change = -1; + } + + if (coef[min_pos] >= 0) q_coef[min_pos] += final_change; + else q_coef[min_pos] -= final_change; + } // Hide + } + if (last_cg == 1) last_cg = 0; + } + +#undef SCAN_SET_SIZE +#undef LOG2_SCAN_SET_SIZE + } +} + +void kvz_quant_avx2(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, + int32_t height, int8_t type, int8_t scan_idx, int8_t block_type) +{ + if (state->encoder_control->scaling_list.enable){ + kvz_quant_generic(state, coef, q_coef, width, height, type, scan_idx, block_type); + } + else { + kvz_quant_flat_avx2(state, coef, q_coef, width, height, type, scan_idx, block_type); + } +} + +#endif //COMPILE_INTEL_AVX2 + + +int kvz_strategy_register_quant_avx2(void* opaque, uint8_t bitdepth) +{ + bool success = true; + +#if COMPILE_INTEL_AVX2 + success &= kvz_strategyselector_register(opaque, "quant", "avx2", 40, &kvz_quant_avx2); +#endif //COMPILE_INTEL_AVX2 + + return success; +}
View file
kvazaar-0.7.1.tar.gz/src/strategies/avx2/quant-avx2.h
Added
@@ -0,0 +1,26 @@ +#ifndef STRATEGIES_QUANT_AVX2_H_ +#define STRATEGIES_QUANT_AVX2_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ +#include <stdint.h> + +int kvz_strategy_register_quant_avx2(void* opaque, uint8_t bitdepth); + +#endif //STRATEGIES_QUANT_AVX2_H_
View file
kvazaar-0.7.1.tar.gz/src/strategies/generic/intra-generic.c
Added
@@ -0,0 +1,154 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +/* + * \file + */ + +#include <stdlib.h> + +#include "intra-generic.h" +#include "strategyselector.h" + + + /** + * \brief Generage angular predictions. + * \param log2_width Log2 of width, range 2..5. + * \param intra_mode Angular mode in range 2..34. + * \param in_ref_above Pointer to -1 index of above reference, length=width*2+1. + * \param in_ref_left Pointer to -1 index of left reference, length=width*2+1. + * \param dst Buffer of size width*width. + */ +static void kvz_angular_pred_generic( + const int_fast8_t log2_width, + const int_fast8_t intra_mode, + const kvz_pixel *const in_ref_above, + const kvz_pixel *const in_ref_left, + kvz_pixel *const dst) +{ + assert(log2_width >= 2 && log2_width <= 5); + assert(intra_mode >= 2 && intra_mode <= 34); + + static const int8_t modedisp2sampledisp[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 }; + static const int16_t modedisp2invsampledisp[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / sampledisp + + // Temporary buffer for modes 11-25. + // It only needs to be big enough to hold indices from -width to width-1. + kvz_pixel tmp_ref[2 * 32]; + const int_fast8_t width = 1 << log2_width; + + // Whether to swap references to always project on the left reference row. + const bool vertical_mode = intra_mode >= 18; + // Modes distance to horizontal or vertical mode. + const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode; + // Sample displacement per column in fractions of 32. + const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)]; + + // Pointer for the reference we are interpolating from. + const kvz_pixel *ref_main; + // Pointer for the other reference. + const kvz_pixel *ref_side; + + // Set ref_main and ref_side such that, when indexed with 0, they point to + // index 0 in block coordinates. + if (sample_disp < 0) { + // Negative sample_disp means, we need to use both references. + + ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; + ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; + + // Move the reference pixels to start from the middle to the later half of + // the tmp_ref, so there is room for negative indices. + for (int_fast8_t x = -1; x < width; ++x) { + tmp_ref[x + width] = ref_main[x]; + } + // Get a pointer to block index 0 in tmp_ref. + ref_main = &tmp_ref[width]; + + // Extend the side reference to the negative indices of main reference. + int_fast32_t col_sample_disp = 128; // rounding for the ">> 8" + int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)]; + int_fast8_t most_negative_index = (width * sample_disp) >> 5; + for (int_fast8_t x = -2; x >= most_negative_index; --x) { + col_sample_disp += inv_abs_sample_disp; + int_fast8_t side_index = col_sample_disp >> 8; + tmp_ref[x + width] = ref_side[side_index - 1]; + } + } + else { + // sample_disp >= 0 means we don't need to refer to negative indices, + // which means we can just use the references as is. + ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; + ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; + } + + if (sample_disp != 0) { + // The mode is not horizontal or vertical, we have to do interpolation. + + int_fast16_t delta_pos = 0; + for (int_fast8_t y = 0; y < width; ++y) { + delta_pos += sample_disp; + int_fast8_t delta_int = delta_pos >> 5; + int_fast8_t delta_fract = delta_pos & (32 - 1); + + if (delta_fract) { + // Do linear filtering + for (int_fast8_t x = 0; x < width; ++x) { + kvz_pixel ref1 = ref_main[x + delta_int]; + kvz_pixel ref2 = ref_main[x + delta_int + 1]; + dst[y * width + x] = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5; + } + } + else { + // Just copy the integer samples + for (int_fast8_t x = 0; x < width; x++) { + dst[y * width + x] = ref_main[x + delta_int]; + } + } + } + } + else { + // Mode is horizontal or vertical, just copy the pixels. + + for (int_fast8_t y = 0; y < width; ++y) { + for (int_fast8_t x = 0; x < width; ++x) { + dst[y * width + x] = ref_main[x]; + } + } + } + + // Flip the block if this is was a horizontal mode. + if (!vertical_mode) { + for (int_fast8_t y = 0; y < width - 1; ++y) { + for (int_fast8_t x = y + 1; x < width; ++x) { + SWAP(dst[y * width + x], dst[x * width + y], kvz_pixel); + } + } + } +} + +int kvz_strategy_register_intra_generic(void* opaque, uint8_t bitdepth) +{ + bool success = true; + + success &= kvz_strategyselector_register(opaque, "angular_pred", "generic", 0, &kvz_angular_pred_generic); + + return success; +}
View file
kvazaar-0.7.1.tar.gz/src/strategies/generic/intra-generic.h
Added
@@ -0,0 +1,27 @@ +#ifndef STRATEGIES_INTRA_GENERIC_H_ +#define STRATEGIES_INTRA_GENERIC_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ +#include <stdint.h> +#include "encoderstate.h" + +int kvz_strategy_register_intra_generic(void* opaque, uint8_t bitdepth); + +#endif //STRATEGIES_INTRA_GENERIC_H_
View file
kvazaar-0.7.1.tar.gz/src/strategies/generic/quant-generic.c
Added
@@ -0,0 +1,173 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +/* + * \file + */ + +#include <stdlib.h> + +#include "quant-generic.h" +#include "strategyselector.h" +#include "encoder.h" +#include "transform.h" + +#define QUANT_SHIFT 14 +/** +* \brief quantize transformed coefficents +* +*/ +void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, + int32_t height, int8_t type, int8_t scan_idx, int8_t block_type) +{ + const encoder_control_t * const encoder = state->encoder_control; + const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2; + const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1]; + + int32_t qp_scaled = kvz_get_scaled_qp(type, state->global->QP, (encoder->bitdepth - 8) * 6); + const uint32_t log2_tr_size = kvz_g_convert_to_bit[width] + 2; + const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); + const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_size - 2][scalinglist_type][qp_scaled % 6]; + const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform + const int32_t q_bits = QUANT_SHIFT + qp_scaled / 6 + transform_shift; + const int32_t add = ((state->global->slicetype == KVZ_SLICE_I) ? 171 : 85) << (q_bits - 9); + const int32_t q_bits8 = q_bits - 8; + + uint32_t ac_sum = 0; + + for (int32_t n = 0; n < width * height; n++) { + int32_t level; + int32_t sign; + + level = coef[n]; + sign = (level < 0 ? -1 : 1); + + level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits; + ac_sum += level; + + level *= sign; + q_coef[n] = (coeff_t)(CLIP(-32768, 32767, level)); + } + + if (!(encoder->sign_hiding && ac_sum >= 2)) return; + + int32_t delta_u[LCU_WIDTH*LCU_WIDTH >> 2]; + + for (int32_t n = 0; n < width * height; n++) { + int32_t level; + level = coef[n]; + level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits; + delta_u[n] = (int32_t)(((int64_t)abs(coef[n]) * quant_coeff[n] - (level << q_bits)) >> q_bits8); + } + + if (ac_sum >= 2) { +#define SCAN_SET_SIZE 16 +#define LOG2_SCAN_SET_SIZE 4 + int32_t n, last_cg = -1, abssum = 0, subset, subpos; + for (subset = (width*height - 1) >> LOG2_SCAN_SET_SIZE; subset >= 0; subset--) { + int32_t first_nz_pos_in_cg = SCAN_SET_SIZE, last_nz_pos_in_cg = -1; + subpos = subset << LOG2_SCAN_SET_SIZE; + abssum = 0; + + // Find last coeff pos + for (n = SCAN_SET_SIZE - 1; n >= 0; n--) { + if (q_coef[scan[n + subpos]]) { + last_nz_pos_in_cg = n; + break; + } + } + + // First coeff pos + for (n = 0; n <SCAN_SET_SIZE; n++) { + if (q_coef[scan[n + subpos]]) { + first_nz_pos_in_cg = n; + break; + } + } + + // Sum all kvz_quant coeffs between first and last + for (n = first_nz_pos_in_cg; n <= last_nz_pos_in_cg; n++) { + abssum += q_coef[scan[n + subpos]]; + } + + if (last_nz_pos_in_cg >= 0 && last_cg == -1) { + last_cg = 1; + } + + if (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4) { + int32_t signbit = (q_coef[scan[subpos + first_nz_pos_in_cg]] > 0 ? 0 : 1); + if (signbit != (abssum & 0x1)) { // compare signbit with sum_parity + int32_t min_cost_inc = 0x7fffffff, min_pos = -1, cur_cost = 0x7fffffff; + int16_t final_change = 0, cur_change = 0; + for (n = (last_cg == 1 ? last_nz_pos_in_cg : SCAN_SET_SIZE - 1); n >= 0; n--) { + uint32_t blkPos = scan[n + subpos]; + if (q_coef[blkPos] != 0) { + if (delta_u[blkPos] > 0) { + cur_cost = -delta_u[blkPos]; + cur_change = 1; + } + else if (n == first_nz_pos_in_cg && abs(q_coef[blkPos]) == 1) { + cur_cost = 0x7fffffff; + } + else { + cur_cost = delta_u[blkPos]; + cur_change = -1; + } + } + else if (n < first_nz_pos_in_cg && ((coef[blkPos] >= 0) ? 0 : 1) != signbit) { + cur_cost = 0x7fffffff; + } + else { + cur_cost = -delta_u[blkPos]; + cur_change = 1; + } + + if (cur_cost < min_cost_inc) { + min_cost_inc = cur_cost; + final_change = cur_change; + min_pos = blkPos; + } + } // CG loop + + if (q_coef[min_pos] == 32767 || q_coef[min_pos] == -32768) { + final_change = -1; + } + + if (coef[min_pos] >= 0) q_coef[min_pos] += final_change; + else q_coef[min_pos] -= final_change; + } // Hide + } + if (last_cg == 1) last_cg = 0; + } + +#undef SCAN_SET_SIZE +#undef LOG2_SCAN_SET_SIZE + } +} + + +int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth) +{ + bool success = true; + + success &= kvz_strategyselector_register(opaque, "quant", "generic", 0, &kvz_quant_generic); + + return success; +}
View file
kvazaar-0.7.1.tar.gz/src/strategies/generic/quant-generic.h
Added
@@ -0,0 +1,31 @@ +#ifndef STRATEGIES_QUANT_GENERIC_H_ +#define STRATEGIES_QUANT_GENERIC_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ +#include <stdint.h> +#include "encoderstate.h" + +#define QUANT_SHIFT 14 + +int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth); +void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, + int32_t height, int8_t type, int8_t scan_idx, int8_t block_type); + +#endif //STRATEGIES_QUANT_GENERIC_H_
View file
kvazaar-0.7.1.tar.gz/src/strategies/strategies-common.h
Added
@@ -0,0 +1,8 @@ +#ifndef STRATEGIES_COMMON_H_ +#define STRATEGIES_COMMON_H_ + +//Use with shuffle and permutation intrinsics. +//Parameters are indices to packed elements. Each must be 0, 1, 2 or 3. +#define KVZ_PERMUTE(a, b, c, d) ( (a << 0) | (b << 2) | (c << 4) | (d << 6) ) + +#endif //STRATEGIES_COMMON_H_ \ No newline at end of file
View file
kvazaar-0.7.1.tar.gz/src/strategies/strategies-intra.c
Added
@@ -0,0 +1,41 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +#include "strategies-intra.h" +#include "strategyselector.h" + +// Define function pointers. +angular_pred_func *kvz_angular_pred; + +// Headers for platform optimizations. +#include "generic/intra-generic.h" +#include "avx2/intra-avx2.h" + + +int kvz_strategy_register_intra(void* opaque, uint8_t bitdepth) { + bool success = true; + + success &= kvz_strategy_register_intra_generic(opaque, bitdepth); + + if (kvz_g_hardware_flags.intel_flags.avx2) { + success &= kvz_strategy_register_intra_avx2(opaque, bitdepth); + } + return success; +} \ No newline at end of file
View file
kvazaar-0.7.1.tar.gz/src/strategies/strategies-intra.h
Added
@@ -0,0 +1,43 @@ +#ifndef STRATEGIES_INTRA_H_ +#define STRATEGIES_INTRA_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +#include "encoderstate.h" + +typedef void (angular_pred_func)( + const int_fast8_t log2_width, + const int_fast8_t intra_mode, + const kvz_pixel *const in_ref_above, + const kvz_pixel *const in_ref_left, + kvz_pixel *const dst); + +// Declare function pointers. +extern angular_pred_func * kvz_angular_pred; + +int kvz_strategy_register_intra(void* opaque, uint8_t bitdepth); + + +#define STRATEGIES_INTRA_EXPORTS \ + {"angular_pred", (void**) &kvz_angular_pred}, \ + + + +#endif //STRATEGIES_INTRA_H_
View file
kvazaar-0.7.1.tar.gz/src/strategies/strategies-quant.c
Added
@@ -0,0 +1,41 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +#include "strategies-quant.h" +#include "strategyselector.h" + +// Define function pointers. +quant_func *kvz_quant; + +// Headers for platform optimizations. +#include "generic/quant-generic.h" +#include "avx2/quant-avx2.h" + + +int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth) { + bool success = true; + + success &= kvz_strategy_register_quant_generic(opaque, bitdepth); + + if (kvz_g_hardware_flags.intel_flags.avx2) { + success &= kvz_strategy_register_quant_avx2(opaque, bitdepth); + } + return success; +} \ No newline at end of file
View file
kvazaar-0.7.1.tar.gz/src/strategies/strategies-quant.h
Added
@@ -0,0 +1,40 @@ +#ifndef STRATEGIES_QUANT_H_ +#define STRATEGIES_QUANT_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +#include "encoderstate.h" + +// Declare function pointers. +typedef unsigned (quant_func)(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, + int32_t height, int8_t type, int8_t scan_idx, int8_t block_type); + +// Declare function pointers. +extern quant_func * kvz_quant; + +int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth); + + +#define STRATEGIES_QUANT_EXPORTS \ + {"quant", (void**) &kvz_quant}, \ + + + +#endif //STRATEGIES_QUANT_H_
View file
kvazaar-0.7.0.tar.gz/src/strategyselector.c -> kvazaar-0.7.1.tar.gz/src/strategyselector.c
Changed
@@ -69,6 +69,16 @@ fprintf(stderr, "kvz_strategy_register_ipol failed!\n"); return 0; } + + if (!kvz_strategy_register_quant(&strategies, bitdepth)) { + fprintf(stderr, "kvz_strategy_register_quant failed!\n"); + return 0; + } + + if (!kvz_strategy_register_intra(&strategies, bitdepth)) { + fprintf(stderr, "kvz_strategy_register_intra failed!\n"); + return 0; + } while(cur_strategy_to_select->fptr) { *(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type);
View file
kvazaar-0.7.0.tar.gz/src/strategyselector.h -> kvazaar-0.7.1.tar.gz/src/strategyselector.h
Changed
@@ -148,12 +148,16 @@ #include "strategies/strategies-picture.h" #include "strategies/strategies-dct.h" #include "strategies/strategies-ipol.h" +#include "strategies/strategies-quant.h" +#include "strategies/strategies-intra.h" static const strategy_to_select_t strategies_to_select[] = { STRATEGIES_NAL_EXPORTS STRATEGIES_PICTURE_EXPORTS STRATEGIES_DCT_EXPORTS STRATEGIES_IPOL_EXPORTS + STRATEGIES_QUANT_EXPORTS + STRATEGIES_INTRA_EXPORTS { NULL, NULL }, };
View file
kvazaar-0.7.0.tar.gz/src/transform.c -> kvazaar-0.7.1.tar.gz/src/transform.c
Changed
@@ -33,6 +33,8 @@ #include "nal.h" #include "rdo.h" #include "strategies/strategies-dct.h" +#include "strategies/strategies-quant.h" +#include "strategies/generic/quant-generic.h" ////////////////////////////////////////////////////////////////////////// // INITIALIZATIONS @@ -127,137 +129,6 @@ idct_func(encoder->bitdepth, coeff, block); } - -#define QUANT_SHIFT 14 -/** - * \brief quantize transformed coefficents - * - */ -void kvz_quant(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, - int32_t height, int8_t type, int8_t scan_idx, int8_t block_type ) -{ - const encoder_control_t * const encoder = state->encoder_control; - const uint32_t log2_block_size = kvz_g_convert_to_bit[ width ] + 2; - const uint32_t * const scan = kvz_g_sig_last_scan[ scan_idx ][ log2_block_size - 1 ]; - - int32_t qp_scaled = kvz_get_scaled_qp(type, state->global->QP, (encoder->bitdepth-8)*6); - - const uint32_t log2_tr_size = kvz_g_convert_to_bit[ width ] + 2; - const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); - const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_size-2][scalinglist_type][qp_scaled%6]; - const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform - const int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift; - const int32_t add = ((state->global->slicetype == KVZ_SLICE_I) ? 171 : 85) << (q_bits - 9); - const int32_t q_bits8 = q_bits - 8; - - uint32_t ac_sum = 0; - - for (int32_t n = 0; n < width * height; n++) { - int32_t level; - int32_t sign; - - level = coef[n]; - sign = (level < 0 ? -1: 1); - - level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits; - ac_sum += level; - - level *= sign; - q_coef[n] = (coeff_t)(CLIP( -32768, 32767, level)); - } - - if (!(encoder->sign_hiding && ac_sum >= 2)) return; - - int32_t delta_u[LCU_WIDTH*LCU_WIDTH >> 2]; - - for (int32_t n = 0; n < width * height; n++) { - int32_t level; - level = coef[n]; - level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits; - delta_u[n] = (int32_t)(((int64_t)abs(coef[n]) * quant_coeff[n] - (level << q_bits)) >> q_bits8); - } - - if(ac_sum >= 2) { - #define SCAN_SET_SIZE 16 - #define LOG2_SCAN_SET_SIZE 4 - int32_t n,last_cg = -1, abssum = 0, subset, subpos; - for(subset = (width*height - 1)>>LOG2_SCAN_SET_SIZE; subset >= 0; subset--) { - int32_t first_nz_pos_in_cg = SCAN_SET_SIZE, last_nz_pos_in_cg=-1; - subpos = subset<<LOG2_SCAN_SET_SIZE; - abssum = 0; - - // Find last coeff pos - for (n = SCAN_SET_SIZE - 1; n >= 0; n--) { - if (q_coef[scan[n + subpos]]) { - last_nz_pos_in_cg = n; - break; - } - } - - // First coeff pos - for (n = 0; n <SCAN_SET_SIZE; n++) { - if (q_coef[scan[n + subpos]]) { - first_nz_pos_in_cg = n; - break; - } - } - - // Sum all kvz_quant coeffs between first and last - for(n = first_nz_pos_in_cg; n <= last_nz_pos_in_cg; n++) { - abssum += q_coef[scan[n + subpos]]; - } - - if(last_nz_pos_in_cg >= 0 && last_cg == -1) { - last_cg = 1; - } - - if(last_nz_pos_in_cg - first_nz_pos_in_cg >= 4) { - int32_t signbit = (q_coef[scan[subpos + first_nz_pos_in_cg]] > 0 ? 0 : 1) ; - if(signbit != (abssum&0x1)) { // compare signbit with sum_parity - int32_t min_cost_inc = 0x7fffffff, min_pos =-1, cur_cost=0x7fffffff; - int16_t final_change = 0, cur_change=0; - for(n = (last_cg == 1 ? last_nz_pos_in_cg : SCAN_SET_SIZE - 1); n >= 0; n--) { - uint32_t blkPos = scan[n + subpos]; - if(q_coef[blkPos] != 0) { - if(delta_u[blkPos] > 0) { - cur_cost = -delta_u[blkPos]; - cur_change=1; - } else if(n == first_nz_pos_in_cg && abs(q_coef[blkPos]) == 1) { - cur_cost=0x7fffffff; - } else { - cur_cost = delta_u[blkPos]; - cur_change =-1; - } - } else if(n < first_nz_pos_in_cg && ((coef[blkPos] >= 0)?0:1) != signbit) { - cur_cost = 0x7fffffff; - } else { - cur_cost = -delta_u[blkPos]; - cur_change = 1; - } - - if(cur_cost < min_cost_inc) { - min_cost_inc = cur_cost; - final_change = cur_change; - min_pos = blkPos; - } - } // CG loop - - if(q_coef[min_pos] == 32767 || q_coef[min_pos] == -32768) { - final_change = -1; - } - - if(coef[min_pos] >= 0) q_coef[min_pos] += final_change; - else q_coef[min_pos] -= final_change; - } // Hide - } - if (last_cg == 1) last_cg=0; - } - - #undef SCAN_SET_SIZE - #undef LOG2_SCAN_SET_SIZE - } -} - /** * \brief inverse quantize transformed and quantized coefficents *
View file
kvazaar-0.7.0.tar.gz/src/transform.h -> kvazaar-0.7.1.tar.gz/src/transform.h
Changed
@@ -35,8 +35,6 @@ -void kvz_quant(const encoder_state_t *state, coeff_t *coef, coeff_t *q_coef, int32_t width, - int32_t height, int8_t type, int8_t scan_idx, int8_t block_type); void kvz_dequant(const encoder_state_t *state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height, int8_t type, int8_t block_type); void kvz_transformskip(const encoder_control_t *encoder, int16_t *block,int16_t *coeff, int8_t block_size);
View file
kvazaar-0.7.0.tar.gz/tests/test_strategies.c -> kvazaar-0.7.1.tar.gz/tests/test_strategies.c
Changed
@@ -41,7 +41,7 @@ } if (!kvz_strategy_register_dct(&strategies, KVZ_BIT_DEPTH)) { - fprintf(stderr, "strategy_register_partial_butterfly failed!\n"); + fprintf(stderr, "strategy_register_dct failed!\n"); return; } }
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.