Projects
Essentials
kvazaar
Sign Up
Log In
Username
Password
We truncated the diff of some files because they were too big. If you want to see the full diff for every file,
click here
.
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 4
View file
kvazaar.changes
Changed
@@ -1,4 +1,10 @@ ------------------------------------------------------------------- +Sun Oct 25 06:12:37 UTC 2015 - aloisio@gmx.com + +- Update to version 0.7.1 + * bumped library version to 2 + +------------------------------------------------------------------- Sat Oct 10 06:21:46 UTC 2015 - aloisio@gmx.com - Update to version 0.7.0
View file
kvazaar.spec
Changed
@@ -16,9 +16,9 @@ # %define libname libkvazaar -%define libmver 1 +%define libmver 2 Name: kvazaar -Version: 0.7.0 +Version: 0.7.1 Release: 0 Summary: HEVC encoder License: LGPL-2.1
View file
kvazaar-0.7.0.tar.gz/README.md -> kvazaar-0.7.1.tar.gz/README.md
Changed
@@ -55,7 +55,7 @@ --subme <integer> : Set fractional pixel motion estimation level [1]. 0: only integer motion estimation 1: fractional pixel motion estimation enabled - --source-scan-type <string> : Set source scan type [\"progressive\"]. + --source-scan-type <string> : Set source scan type ["progressive"]. "progressive": progressive scan "tff": top field first "bff": bottom field first
View file
kvazaar-0.7.0.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj -> kvazaar-0.7.1.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj
Changed
@@ -153,6 +153,22 @@ <ClCompile Include="..\..\src\search.c" /> <ClCompile Include="..\..\src\search_inter.c" /> <ClCompile Include="..\..\src\search_intra.c" /> + <ClCompile Include="..\..\src\strategies\avx2\intra-avx2.c"> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + </ClCompile> + <ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c"> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> + </ClCompile> + <ClCompile Include="..\..\src\strategies\generic\intra-generic.c" /> + <ClCompile Include="..\..\src\strategies\generic\quant-generic.c" /> + <ClCompile Include="..\..\src\strategies\strategies-intra.c" /> + <ClCompile Include="..\..\src\strategies\strategies-quant.c" /> <ClCompile Include="..\..\src\yuv_io.c" /> <ClInclude Include="..\..\src\checkpoint.h" /> <ClInclude Include="..\..\src\cli.h" /> @@ -201,6 +217,13 @@ <ClInclude Include="..\..\src\kvazaar_internal.h" /> <ClInclude Include="..\..\src\search_inter.h" /> <ClInclude Include="..\..\src\search_intra.h" /> + <ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h" /> + <ClInclude Include="..\..\src\strategies\generic\intra-generic.h" /> + <ClInclude Include="..\..\src\strategies\strategies-common.h" /> + <ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h" /> + <ClInclude Include="..\..\src\strategies\generic\quant-generic.h" /> + <ClInclude Include="..\..\src\strategies\strategies-intra.h" /> + <ClInclude Include="..\..\src\strategies\strategies-quant.h" /> <ClInclude Include="..\..\src\yuv_io.h" /> </ItemGroup> <ItemGroup>
View file
kvazaar-0.7.0.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj.filters -> kvazaar-0.7.1.tar.gz/build/kvazaar_lib/kvazaar_lib.vcxproj.filters
Changed
@@ -207,6 +207,24 @@ <ClCompile Include="..\..\src\input_frame_buffer.c"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="..\..\src\strategies\strategies-quant.c"> + <Filter>Source Files\strategies</Filter> + </ClCompile> + <ClCompile Include="..\..\src\strategies\generic\quant-generic.c"> + <Filter>Source Files\strategies\generic</Filter> + </ClCompile> + <ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c"> + <Filter>Source Files\strategies\avx2</Filter> + </ClCompile> + <ClCompile Include="..\..\src\strategies\strategies-intra.c"> + <Filter>Source Files\strategies</Filter> + </ClCompile> + <ClCompile Include="..\..\src\strategies\generic\intra-generic.c"> + <Filter>Source Files\strategies\generic</Filter> + </ClCompile> + <ClCompile Include="..\..\src\strategies\avx2\intra-avx2.c"> + <Filter>Source Files\strategies\avx2</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <ClInclude Include="..\..\src\global.h"> @@ -374,6 +392,27 @@ <ClInclude Include="..\..\src\input_frame_buffer.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="..\..\src\strategies\strategies-common.h"> + <Filter>Header Files\strategies</Filter> + </ClInclude> + <ClInclude Include="..\..\src\strategies\strategies-quant.h"> + <Filter>Header Files\strategies</Filter> + </ClInclude> + <ClInclude Include="..\..\src\strategies\generic\quant-generic.h"> + <Filter>Header Files\strategies\generic</Filter> + </ClInclude> + <ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h"> + <Filter>Header Files\strategies\avx2</Filter> + </ClInclude> + <ClInclude Include="..\..\src\strategies\strategies-intra.h"> + <Filter>Header Files\strategies</Filter> + </ClInclude> + <ClInclude Include="..\..\src\strategies\generic\intra-generic.h"> + <Filter>Header Files\strategies\generic</Filter> + </ClInclude> + <ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h"> + <Filter>Header Files\strategies\avx2</Filter> + </ClInclude> </ItemGroup> <ItemGroup> <YASM Include="..\..\src\extras\x86inc.asm">
View file
kvazaar-0.7.0.tar.gz/src/Makefile -> kvazaar-0.7.1.tar.gz/src/Makefile
Changed
@@ -12,7 +12,7 @@ DLLDIR = $(BINDIR) # Library version number -VER_MAJOR = 1 +VER_MAJOR = 2 VER_MINOR = 0 VER_RELEASE = 0 @@ -152,18 +152,21 @@ # directories. If the instruction set is supported by the # architecture, compile the files in these directories with the # apropriate flags to cause the intrinsics to work. +# Note: Using LTO on strategies caused issues on some older +# compilers, -fno-lto on these files seemed to fix the issue. +# LTO is no longer used and -fno-lto shouldn't be needed. ifeq ($(TARGET_CPU_ARCH), ppc) - strategies/altivec/%.o: EXTRA_FLAGS += -maltivec -fno-lto - strategies/altivec/%.lo: EXTRA_FLAGS += -maltivec -fno-lto + strategies/altivec/%.o: EXTRA_FLAGS += -maltivec + strategies/altivec/%.lo: EXTRA_FLAGS += -maltivec else ifeq ($(TARGET_CPU_ARCH), x86) - strategies/sse2/%.o: EXTRA_FLAGS += -msse2 -fno-lto - strategies/sse41/%.o: EXTRA_FLAGS += -msse4.1 -fno-lto - strategies/sse2/%.lo: EXTRA_FLAGS += -msse2 -fno-lto - strategies/sse41/%.lo: EXTRA_FLAGS += -msse4.1 -fno-lto + strategies/sse2/%.o: EXTRA_FLAGS += -msse2 + strategies/sse41/%.o: EXTRA_FLAGS += -msse4.1 + strategies/sse2/%.lo: EXTRA_FLAGS += -msse2 + strategies/sse41/%.lo: EXTRA_FLAGS += -msse4.1 # To disable avx2 on old compilers that don't support it. ifndef KVZ_DISABLE_AVX2 - strategies/avx2/%.o: EXTRA_FLAGS += -mavx2 -fno-lto - strategies/avx2/%.lo: EXTRA_FLAGS += -mavx2 -fno-lto + strategies/avx2/%.o: EXTRA_FLAGS += -mavx2 + strategies/avx2/%.lo: EXTRA_FLAGS += -mavx2 endif endif @@ -203,6 +206,8 @@ strategies/strategies-nal.o \ strategies/strategies-dct.o \ strategies/strategies-ipol.o \ + strategies/strategies-quant.o \ + strategies/strategies-intra.o \ strategies/generic/nal-generic.o \ strategies/generic/picture-generic.o \ strategies/sse2/picture-sse2.o \ @@ -213,11 +218,15 @@ strategies/generic/dct-generic.o \ strategies/avx2/dct-avx2.o \ strategies/generic/ipol-generic.o \ - strategies/avx2/ipol-avx2.o + strategies/avx2/ipol-avx2.o \ + strategies/generic/quant-generic.o \ + strategies/avx2/quant-avx2.o \ + strategies/generic/intra-generic.o \ + strategies/avx2/intra-avx2.o ifndef KVZ_DISABLE_ASM # Compile C files in x86_asm folder with KVZ_COMPILE_ASM, which will cause - # the registration function to register the function pointers in the ASM + # the registration function to register the function pointers in the ASM # files. strategies/x86_asm/%.o: EXTRA_FLAGS += -DKVZ_COMPILE_ASM @@ -321,7 +330,7 @@ .PHONY: kvazaar.pc init_submodules install install-pc install-prog install-lib .PHONY: install-dylib install-dll clean -kvazaar.pc: KVZ_VERSION = $(shell sed -n 's/^#define\s\+KVZ_VERSION\s\+\(.*\)/\1/ p' global.h) +kvazaar.pc: KVZ_VERSION = $(shell awk '/#define KVZ_VERSION/ { print $$3 }' global.h) kvazaar.pc: kvazaar.pc.in Makefile sed -e "s;@prefix@;$(PREFIX);" -e "s;@libdir@;$(LIBDIR);" \ -e "s;@VERSION@;$(KVZ_VERSION);" \
View file
kvazaar-0.7.0.tar.gz/src/cli.c -> kvazaar-0.7.1.tar.gz/src/cli.c
Changed
@@ -198,6 +198,14 @@ goto done; } + if (opts->config->vps_period < 0) { + // Disabling parameter sets is only possible when using Kvazaar as + // a library. + fprintf(stderr, "Input error: vps_period must be non-negative\n"); + ok = 0; + goto done; + } + // Set resolution automatically if necessary if (opts->config->width == 0 && opts->config->width == 0){ ok = select_input_res_auto(opts->input, &opts->config->width, &opts->config->height);
View file
kvazaar-0.7.0.tar.gz/src/config.c -> kvazaar-0.7.1.tar.gz/src/config.c
Changed
@@ -157,7 +157,7 @@ //If the arg starts with u, we want an uniform split if (arg[0]=='u') { *ntiles = atoi(arg+1)-1; - if (MAX_TILES_PER_DIM <= *ntiles || 0 >= *ntiles) { + if (MAX_TILES_PER_DIM <= *ntiles || 0 > *ntiles) { fprintf(stderr, "Invalid number of tiles (0 < %d <= %d = MAX_TILES_PER_DIM)!\n", *ntiles + 1, MAX_TILES_PER_DIM); return 0; } @@ -504,8 +504,8 @@ error = 1; } - if (cfg->rdo < 0 || cfg->rdo > 2) { - fprintf(stderr, "Input error: --rd parameter out of range [0..2]\n"); + if (cfg->rdo < 0 || cfg->rdo > 3) { + fprintf(stderr, "Input error: --rd parameter out of range [0..3]\n"); error = 1; }
View file
kvazaar-0.7.0.tar.gz/src/encoder.c -> kvazaar-0.7.1.tar.gz/src/encoder.c
Changed
@@ -433,7 +433,11 @@ // AUD encoder->aud_enable = (int8_t)encoder->cfg->aud_enable; - encoder->vps_period = encoder->cfg->vps_period * encoder->cfg->intra_period; + if (encoder->cfg->vps_period >= 0) { + encoder->vps_period = encoder->cfg->vps_period * encoder->cfg->intra_period; + } else { + encoder->vps_period = -1; + } return encoder;
View file
kvazaar-0.7.0.tar.gz/src/encoder.h -> kvazaar-0.7.1.tar.gz/src/encoder.h
Changed
@@ -140,7 +140,7 @@ } pu_depth_inter, pu_depth_intra; // How often Video Parameter Set is re-sent. - uint32_t vps_period; + int32_t vps_period; bool sign_hiding;
View file
kvazaar-0.7.0.tar.gz/src/encoder_state-bitstream.c -> kvazaar-0.7.1.tar.gz/src/encoder_state-bitstream.c
Changed
@@ -41,10 +41,9 @@ kvz_bitstream_add_rbsp_trailing_bits(stream); } -static void encoder_state_write_bitstream_PTL(encoder_state_t * const state) +static void encoder_state_write_bitstream_PTL(bitstream_t *stream, + encoder_state_t * const state) { - bitstream_t * const stream = &state->stream; - int i; // PTL // Profile Tier WRITE_U(stream, 0, 2, "general_profile_space"); @@ -74,17 +73,16 @@ WRITE_U(stream, 0, 1, "sub_layer_profile_present_flag"); WRITE_U(stream, 0, 1, "sub_layer_level_present_flag"); - for (i = 1; i < 8; i++) { + for (int i = 1; i < 8; i++) { WRITE_U(stream, 0, 2, "reserved_zero_2bits"); } // end PTL } -static void encoder_state_write_bitstream_vid_parameter_set(encoder_state_t * const state) +static void encoder_state_write_bitstream_vid_parameter_set(bitstream_t* stream, + encoder_state_t * const state) { - bitstream_t * const stream = &state->stream; - int i; #ifdef KVZ_DEBUG printf("=========== Video Parameter Set ID: 0 ===========\n"); #endif @@ -96,12 +94,12 @@ WRITE_U(stream, 0, 1, "vps_temporal_id_nesting_flag"); WRITE_U(stream, 0xffff, 16, "vps_reserved_ffff_16bits"); - encoder_state_write_bitstream_PTL(state); + encoder_state_write_bitstream_PTL(stream, state); WRITE_U(stream, 0, 1, "vps_sub_layer_ordering_info_present_flag"); //for each layer - for (i = 0; i < 1; i++) { + for (int i = 0; i < 1; i++) { WRITE_UE(stream, 1, "vps_max_dec_pic_buffering"); WRITE_UE(stream, 0, "vps_num_reorder_pics"); WRITE_UE(stream, 0, "vps_max_latency_increase"); @@ -119,10 +117,10 @@ kvz_bitstream_add_rbsp_trailing_bits(stream); } -static void encoder_state_write_bitstream_scaling_list(encoder_state_t * const state) +static void encoder_state_write_bitstream_scaling_list(bitstream_t *stream, + encoder_state_t * const state) { const encoder_control_t * const encoder = state->encoder_control; - bitstream_t * const stream = &state->stream; uint32_t size_id; for (size_id = 0; size_id < SCALING_LIST_SIZE_NUM; size_id++) { int32_t list_id; @@ -177,9 +175,9 @@ } -static void encoder_state_write_bitstream_VUI(encoder_state_t * const state) +static void encoder_state_write_bitstream_VUI(bitstream_t *stream, + encoder_state_t * const state) { - bitstream_t * const stream = &state->stream; const encoder_control_t * const encoder = state->encoder_control; #ifdef KVZ_DEBUG printf("=========== VUI Set ID: 0 ===========\n"); @@ -260,8 +258,8 @@ //ENDIF WRITE_U(stream, 0, 1, "neutral_chroma_indication_flag"); - WRITE_U(stream, state->encoder_control->vui.field_seq_flag, 1, "field_seq_flag"); // 0: frames, 1: fields - WRITE_U(stream, state->encoder_control->vui.frame_field_info_present_flag, 1, "frame_field_info_present_flag"); + WRITE_U(stream, encoder->vui.field_seq_flag, 1, "field_seq_flag"); // 0: frames, 1: fields + WRITE_U(stream, encoder->vui.frame_field_info_present_flag, 1, "frame_field_info_present_flag"); WRITE_U(stream, 0, 1, "default_display_window_flag"); //IF default display window @@ -278,9 +276,9 @@ //ENDIF } -static void encoder_state_write_bitstream_seq_parameter_set(encoder_state_t * const state) +static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream, + encoder_state_t * const state) { - bitstream_t * const stream = &state->stream; const encoder_control_t * encoder = state->encoder_control; #ifdef KVZ_DEBUG @@ -292,13 +290,13 @@ WRITE_U(stream, 1, 3, "sps_max_sub_layers_minus1"); WRITE_U(stream, 0, 1, "sps_temporal_id_nesting_flag"); - encoder_state_write_bitstream_PTL(state); + encoder_state_write_bitstream_PTL(stream, state); WRITE_UE(stream, 0, "sps_seq_parameter_set_id"); - WRITE_UE(stream, state->encoder_control->in.video_format, + WRITE_UE(stream, encoder->in.video_format, "chroma_format_idc"); - if (state->encoder_control->in.video_format == 3) { + if (encoder->in.video_format == 3) { WRITE_U(stream, 0, 1, "separate_colour_plane_flag"); } @@ -331,7 +329,7 @@ WRITE_U(stream, 0, 1, "sps_sub_layer_ordering_info_present_flag"); //for each layer - WRITE_UE(stream, state->encoder_control->cfg->ref_frames + encoder->cfg->gop_len, "sps_max_dec_pic_buffering"); + WRITE_UE(stream, encoder->cfg->ref_frames + encoder->cfg->gop_len, "sps_max_dec_pic_buffering"); WRITE_UE(stream, encoder->cfg->gop_len, "sps_num_reorder_pics"); WRITE_UE(stream, 0, "sps_max_latency_increase"); //end for @@ -344,14 +342,14 @@ WRITE_UE(stream, encoder->tr_depth_intra, "max_transform_hierarchy_depth_intra"); // scaling list - WRITE_U(stream, state->encoder_control->scaling_list.enable, 1, "scaling_list_enable_flag"); - if (state->encoder_control->scaling_list.enable) { + WRITE_U(stream, encoder->scaling_list.enable, 1, "scaling_list_enable_flag"); + if (encoder->scaling_list.enable) { WRITE_U(stream, 1, 1, "sps_scaling_list_data_present_flag"); - encoder_state_write_bitstream_scaling_list(state); + encoder_state_write_bitstream_scaling_list(stream, state); } WRITE_U(stream, 0, 1, "amp_enabled_flag"); - WRITE_U(stream, state->encoder_control->sao_enable ? 1 : 0, 1, + WRITE_U(stream, encoder->sao_enable ? 1 : 0, 1, "sample_adaptive_offset_enabled_flag"); WRITE_U(stream, ENABLE_PCM, 1, "pcm_enabled_flag"); #if ENABLE_PCM == 1 @@ -377,17 +375,17 @@ WRITE_U(stream, 0, 1, "sps_strong_intra_smoothing_enable_flag"); WRITE_U(stream, 1, 1, "vui_parameters_present_flag"); - encoder_state_write_bitstream_VUI(state); + encoder_state_write_bitstream_VUI(stream, state); WRITE_U(stream, 0, 1, "sps_extension_flag"); kvz_bitstream_add_rbsp_trailing_bits(stream); } -static void encoder_state_write_bitstream_pic_parameter_set(encoder_state_t * const state) +static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream, + encoder_state_t * const state) { const encoder_control_t * const encoder = state->encoder_control; - bitstream_t * const stream = &state->stream; #ifdef KVZ_DEBUG printf("=========== Picture Parameter Set ID: 0 ===========\n"); #endif @@ -403,7 +401,7 @@ WRITE_UE(stream, 0, "num_ref_idx_l1_default_active_minus1"); WRITE_SE(stream, ((int8_t)encoder->cfg->qp) - 26, "pic_init_qp_minus26"); WRITE_U(stream, 0, 1, "constrained_intra_pred_flag"); - WRITE_U(stream, state->encoder_control->trskip_enable, 1, "transform_skip_enabled_flag"); + WRITE_U(stream, encoder->trskip_enable, 1, "transform_skip_enabled_flag"); WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag"); //if cu_qp_delta_enabled_flag //WRITE_UE(stream, 0, "diff_cu_qp_delta_depth"); @@ -445,13 +443,13 @@ //IF deblocking_filter WRITE_U(stream, 0, 1, "deblocking_filter_override_enabled_flag"); - WRITE_U(stream, state->encoder_control->deblock_enable ? 0 : 1, 1, + WRITE_U(stream, encoder->deblock_enable ? 0 : 1, 1, "pps_disable_deblocking_filter_flag"); //IF !disabled - if (state->encoder_control->deblock_enable) { - WRITE_SE(stream, state->encoder_control->beta_offset_div2, "beta_offset_div2"); - WRITE_SE(stream, state->encoder_control->tc_offset_div2, "tc_offset_div2"); + if (encoder->deblock_enable) { + WRITE_SE(stream, encoder->beta_offset_div2, "beta_offset_div2"); + WRITE_SE(stream, encoder->tc_offset_div2, "tc_offset_div2"); } //ENDIF @@ -639,7 +637,7 @@ int j; int ref_negative = 0; int ref_positive = 0; - if (state->encoder_control->cfg->gop_len) { + if (encoder->cfg->gop_len) { for (j = 0; j < state->global->ref->used_size; j++) { if (state->global->ref->pocs[j] < state->global->poc) { ref_negative++; @@ -686,10 +684,10 @@
View file
kvazaar-0.7.0.tar.gz/src/encoder_state-bitstream.h -> kvazaar-0.7.1.tar.gz/src/encoder_state-bitstream.h
Changed
@@ -26,17 +26,18 @@ #include "global.h" - // Forward declare because including the header would lead to a cyclic // dependency. struct encoder_state_t; +struct bitstream_t; void kvz_encoder_state_write_bitstream_slice_header(struct encoder_state_t * const state); void kvz_encoder_state_write_bitstream(struct encoder_state_t * const state); void kvz_encoder_state_write_bitstream_leaf(struct encoder_state_t * const state); void kvz_encoder_state_worker_write_bitstream_leaf(void * opaque); void kvz_encoder_state_worker_write_bitstream(void * opaque); - +void kvz_encoder_state_write_parameter_sets(struct bitstream_t *stream, + struct encoder_state_t * const state); #endif // ENCODER_STATE_BITSTREAM_H_
View file
kvazaar-0.7.0.tar.gz/src/encoder_state-ctors_dtors.c -> kvazaar-0.7.1.tar.gz/src/encoder_state-ctors_dtors.c
Changed
@@ -113,15 +113,14 @@ static int encoder_state_config_slice_init(encoder_state_t * const state, const int start_address_in_ts, const int end_address_in_ts) { - int i = 0, slice_found=0; - for (i = 0; i < state->encoder_control->slice_count; ++i) { + state->slice->id = -1; + for (int i = 0; i < state->encoder_control->slice_count; ++i) { if (state->encoder_control->slice_addresses_in_ts[i] == start_address_in_ts) { state->slice->id = i; - slice_found = 1; break; } } - assert(slice_found); + assert(state->slice->id != -1); state->slice->start_in_ts = start_address_in_ts; state->slice->end_in_ts = end_address_in_ts;
View file
kvazaar-0.7.0.tar.gz/src/global.h -> kvazaar-0.7.1.tar.gz/src/global.h
Changed
@@ -124,6 +124,7 @@ #define MAX(a,b) (((a)>(b))?(a):(b)) #define MIN(a,b) (((a)<(b))?(a):(b)) #define CLIP(low,high,value) MAX((low),MIN((high),(value))) +#define CLIP_TO_PIXEL(value) CLIP(0, PIXEL_MAX, (value)) #define SWAP(a,b,swaptype) { swaptype tempval; tempval = a; a = b; b = tempval; } #define CU_WIDTH_FROM_DEPTH(depth) (LCU_WIDTH >> depth) #define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val)) @@ -143,7 +144,7 @@ // NOTE: When making a release, remember to also bump library version in // Makefile, if necessary. -#define KVZ_VERSION 0.7.0 +#define KVZ_VERSION 0.7.1 #define VERSION_STRING QUOTE_EXPAND(KVZ_VERSION) //#define VERBOSE 1
View file
kvazaar-0.7.0.tar.gz/src/image.c -> kvazaar-0.7.1.tar.gz/src/image.c
Changed
@@ -114,10 +114,9 @@ */ kvz_picture *kvz_image_copy_ref(kvz_picture *im) { - int32_t new_refcount = ATOMIC_INC(&(im->refcount)); - // The caller should have had another reference. - assert(new_refcount > 1); + assert(im->refcount > 0); + ATOMIC_INC(&(im->refcount)); return im; }
View file
kvazaar-0.7.0.tar.gz/src/intra.c -> kvazaar-0.7.1.tar.gz/src/intra.c
Changed
@@ -28,91 +28,19 @@ #include <assert.h> #include <stdio.h> #include <stdlib.h> -#include <string.h> -#include "config.h" #include "encoder.h" #include "transform.h" -#include "rdo.h" +#include "strategies/strategies-intra.h" -const uint8_t kvz_intra_hor_ver_dist_thres[5] = {0,7,1,0,0}; - - -/** - * \brief Set intrablock mode (and init typedata) - * \param pic picture to use - * \param xCtb x CU position (smallest CU) - * \param yCtb y CU position (smallest CU) - * \param depth current CU depth - * \param mode mode to set - * \returns Void - */ -void kvz_intra_set_block_mode(videoframe_t *frame,uint32_t x_cu, uint32_t y_cu, uint8_t depth, uint8_t mode, uint8_t part_mode) -{ - uint32_t x, y; - int block_scu_width = (LCU_WIDTH>>depth)/(LCU_WIDTH>>MAX_DEPTH); - - if (part_mode == SIZE_NxN) { - cu_info_t *cur_cu = kvz_videoframe_get_cu(frame, x_cu, y_cu); - // Modes are already set. - cur_cu->depth = depth; - cur_cu->type = CU_INTRA; - cur_cu->tr_depth = depth + 1; - return; - } - - // Loop through all the blocks in the area of cur_cu - for (y = y_cu; y < y_cu + block_scu_width; y++) { - for (x = x_cu; x < x_cu + block_scu_width; x++) { - cu_info_t *cur_cu = kvz_videoframe_get_cu(frame, x_cu, y_cu); - cur_cu->depth = depth; - cur_cu->type = CU_INTRA; - cur_cu->intra[0].mode = mode; - cur_cu->intra[1].mode = mode; - cur_cu->intra[2].mode = mode; - cur_cu->intra[3].mode = mode; - cur_cu->part_size = part_mode; - cur_cu->tr_depth = depth; - } - } -} - -/** - * \brief get intrablock mode - * \param pic picture data to use - * \param picwidth width of the picture data - * \param xpos x-position - * \param ypos y-position - * \param width block width - * \returns DC prediction -*/ -kvz_pixel kvz_intra_get_dc_pred(const kvz_pixel *pic, uint16_t picwidth, uint8_t width) -{ - int32_t i, sum = 0; - - // pixels on top and left - for (i = -picwidth; i < width - picwidth; i++) { - sum += pic[i]; - } - for (i = -1; i < width * picwidth - 1; i += picwidth) { - sum += pic[i]; - } - - // return the average - return (kvz_pixel)((sum + width) / (width + width)); -} - -/** - * \brief Function for deriving intra luma predictions - * \param pic picture to use - * \param x_cu x CU position (smallest CU) - * \param y_cu y CU position (smallest CU) - * \param preds output buffer for 3 predictions - * \returns (predictions are found)?1:0 - */ -int8_t kvz_intra_get_dir_luma_predictor(const uint32_t x, const uint32_t y, int8_t* preds, - const cu_info_t * const cur_cu, const cu_info_t * const left_cu, const cu_info_t * const above_cu) +int8_t kvz_intra_get_dir_luma_predictor( + const uint32_t x, + const uint32_t y, + int8_t *preds, + const cu_info_t *const cur_cu, + const cu_info_t *const left_cu, + const cu_info_t *const above_cu) { int y_cu = y>>3; @@ -166,158 +94,235 @@ return 1; } + +static void intra_filter_reference( + int_fast8_t log2_width, + kvz_intra_references *refs) +{ + if (refs->filtered_initialized) { + return; + } else { + refs->filtered_initialized = true; + } + + const int_fast8_t ref_width = 2 * (1 << log2_width) + 1; + kvz_intra_ref *ref = &refs->ref; + kvz_intra_ref *filtered_ref = &refs->filtered_ref; + + filtered_ref->left[0] = (ref->left[1] + 2 * ref->left[0] + ref->top[1] + 2) / 4; + filtered_ref->top[0] = filtered_ref->left[0]; + + for (int_fast8_t y = 1; y < ref_width - 1; ++y) { + kvz_pixel *p = &ref->left[y]; + filtered_ref->left[y] = (p[-1] + 2 * p[0] + p[1] + 2) / 4; + } + filtered_ref->left[ref_width - 1] = ref->left[ref_width - 1]; + + for (int_fast8_t x = 1; x < ref_width - 1; ++x) { + kvz_pixel *p = &ref->top[x]; + filtered_ref->top[x] = (p[-1] + 2 * p[0] + p[1] + 2) / 4; + } + filtered_ref->top[ref_width - 1] = ref->top[ref_width - 1]; +} + + +static void intra_post_process_angular( + unsigned width, + unsigned stride, + const kvz_pixel *ref, + kvz_pixel *block) +{ + kvz_pixel ref2 = ref[0]; + for (unsigned i = 0; i < width; i++) { + kvz_pixel val = block[i * stride]; + kvz_pixel ref1 = ref[i + 1]; + block[i * stride] = CLIP_TO_PIXEL(val + ((ref1 - ref2) >> 1)); + } +} + + /** - * \brief Intra filtering of the border samples - * \param ref reference picture data - * \param x_cu x CU position (smallest CU) - * \param y_cu y CU position (smallest CU) - * \param depth current CU depth - * \param preds output buffer for 3 predictions - * \returns (predictions are found)?1:0 + * \brief Generage planar prediction. + * \param log2_width Log2 of width, range 2..5. + * \param in_ref_above Pointer to -1 index of above reference, length=width*2+1. + * \param in_ref_left Pointer to -1 index of left reference, length=width*2+1. + * \param dst Buffer of size width*width. */ -void kvz_intra_filter(kvz_pixel *ref, int32_t stride,int32_t width, int8_t mode) +static void intra_pred_planar( + const int_fast8_t log2_width, + const kvz_pixel *const ref_top, + const kvz_pixel *const ref_left, + kvz_pixel *const dst) { - #define FWIDTH (LCU_WIDTH*2+1) - kvz_pixel filtered[FWIDTH * FWIDTH]; //!< temporary buffer for filtered samples - kvz_pixel *filteredShift = &filtered[FWIDTH+1]; //!< pointer to temporary buffer with offset (1,1) - int x,y; - - if (!mode) { - // pF[ -1 ][ -1 ] = ( p[ -1 ][ 0 ] + 2*p[ -1 ][ -1 ] + p[ 0 ][ -1 ] + 2 ) >> 2 (8 35) - filteredShift[-FWIDTH-1] = (ref[-1] + 2*ref[-(int32_t)stride-1] + ref[-(int32_t)stride] + 2) >> 2; - - // pF[ -1 ][ y ] = ( p[ -1 ][ y + 1 ] + 2*p[ -1 ][ y ] + p[ -1 ][ y - 1 ] + 2 ) >> 2 for y = 0..nTbS * 2 - 2 (8 36) - for (y = 0; y < (int32_t)width * 2 - 1; y++) { - filteredShift[y*FWIDTH-1] = (ref[(y + 1) * stride - 1] + 2*ref[y * stride - 1] + ref[(y - 1) * stride - 1] + 2) >> 2; + assert(log2_width >= 2 && log2_width <= 5); + + const int_fast8_t width = 1 << log2_width; + const kvz_pixel top_right = ref_top[width + 1]; + const kvz_pixel bottom_left = ref_left[width + 1]; + +#if 0 + // Unoptimized version for reference. + for (int y = 0; y < width; ++y) { + for (int x = 0; x < width; ++x) { + int_fast16_t hor = (width - 1 - x) * ref_left[y + 1] + (x + 1) * top_right; + int_fast16_t ver = (width - 1 - y) * ref_top[x + 1] + (y + 1) * bottom_left; + dst[y * width + x] = (ver + hor + width) >> (log2_width + 1); } + } +#else
View file
kvazaar-0.7.0.tar.gz/src/intra.h -> kvazaar-0.7.1.tar.gz/src/intra.h
Changed
@@ -27,29 +27,91 @@ #include "global.h" -#include "image.h" -#include "encoder.h" #include "encoderstate.h" -//void kvz_intra_set_block_mode(image* im,uint32_t x_ctb, uint32_t y_ctb, uint8_t depth, uint8_t mode, uint8_t part_mode); +typedef struct { + kvz_pixel left[2 * 32 + 1]; + kvz_pixel top[2 * 32 + 1]; +} kvz_intra_ref; +typedef struct +{ + kvz_intra_ref ref; + kvz_intra_ref filtered_ref; + bool filtered_initialized; +} kvz_intra_references; -int8_t kvz_intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds, - const cu_info_t* cur_cu, const cu_info_t* left_cu, const cu_info_t* above_cu); -void kvz_intra_dc_pred_filtering(const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t height ); -void kvz_intra_build_reference_border(const encoder_control_t *encoder, int32_t x_luma, int32_t y_luma, int16_t out_width, kvz_pixel *dst, int32_t dst_stride, int8_t chroma, int32_t pic_width, int32_t pic_height, lcu_t *lcu); -void kvz_intra_filter(kvz_pixel* ref, int32_t stride, int32_t width, int8_t mode); +/** +* \brief Function for deriving intra luma predictions +* \param pic picture to use +* \param x_cu x CU position (smallest CU) +* \param y_cu y CU position (smallest CU) +* \param preds output buffer for 3 predictions +* \returns (predictions are found)?1:0 +*/ +int8_t kvz_intra_get_dir_luma_predictor( + const uint32_t x, + const uint32_t y, + int8_t *preds, + const cu_info_t *const cur_cu, + const cu_info_t *const left_cu, + const cu_info_t *const above_cu); -/* Predictions */ -void kvz_intra_get_pred(const encoder_control_t * const encoder, const kvz_pixel *rec, const kvz_pixel *rec_filtered, int recstride, kvz_pixel *dst, int width, int mode, int is_chroma); +/** +* \brief Generage angular predictions. +* \param width Width in pixels, range 4..32. +* \param color What color pixels to use. +* \param luma_px Luma coordinates of the prediction block. +* \param pic_px Picture dimensions in luma pixels. +* \param lcu LCU struct. +* \param out_left_ref Left reference pixels, index 0 is the top-left. +* \param out_top_ref Top reference pixels, index 0 is the top-left. +*/ +void kvz_intra_build_reference( + const int_fast8_t log2_width, + const color_t color, + const vector2d_t *const luma_px, + const vector2d_t *const pic_px, + const lcu_t *const lcu, + kvz_intra_references *const refs); -kvz_pixel kvz_intra_get_dc_pred(const kvz_pixel* pic, uint16_t pic_width, uint8_t width); -void kvz_intra_get_planar_pred(const kvz_pixel* src,int32_t srcstride, uint32_t width, kvz_pixel* dst, int32_t dststride); -void kvz_intra_get_angular_pred(const encoder_control_t *encoder, const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter); +/** + * \brief Generate intra predictions. + * \param refs Reference pixels used for the prediction. + * \param log2_width Width of the predicted block. + * \param mode Intra mode used for the prediction. + * \param color Color of the prediction. + * \param dst Buffer for the predicted pixels. + */ +void kvz_intra_predict( + kvz_intra_references *refs, + int_fast8_t log2_width, + int_fast8_t mode, + color_t color, + kvz_pixel *dst); -void kvz_intra_recon(const encoder_control_t *encoder, kvz_pixel* rec, int32_t rec_stride, uint32_t width, kvz_pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma); +/** + * \brief Do a full intra prediction cycle on a CU in lcu for luma. + */ +void kvz_intra_recon_lcu_luma( + encoder_state_t *const state, + int x, + int y, + int depth, + int8_t intra_mode, + cu_info_t *cur_cu, + lcu_t *lcu); -void kvz_intra_recon_lcu_luma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu); -void kvz_intra_recon_lcu_chroma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu); +/** +* \brief Do a full intra prediction cycle on a CU in lcu for chroma. +*/ +void kvz_intra_recon_lcu_chroma( + encoder_state_t *const state, + int x, + int y, + int depth, + int8_t intra_mode, + cu_info_t *cur_cu, + lcu_t *lcu); #endif
View file
kvazaar-0.7.0.tar.gz/src/kvazaar.c -> kvazaar-0.7.1.tar.gz/src/kvazaar.c
Changed
@@ -123,6 +123,27 @@ } +static int kvazaar_headers(kvz_encoder *enc, + kvz_data_chunk **data_out, + uint32_t *len_out) +{ + if (data_out) *data_out = NULL; + if (len_out) *len_out = 0; + + bitstream_t stream; + kvz_bitstream_init(&stream); + + kvz_encoder_state_write_parameter_sets(&stream, &enc->states[enc->cur_state_num]); + + // Get stream length before taking chunks since that clears the stream. + if (len_out) *len_out = kvz_bitstream_tell(&stream) / 8; + if (data_out) *data_out = kvz_bitstream_take_chunks(&stream); + + kvz_bitstream_finalize(&stream); + return 1; +} + + static int kvazaar_encode(kvz_encoder *enc, kvz_picture *pic_in, kvz_data_chunk **data_out, @@ -204,6 +225,7 @@ .encoder_open = kvazaar_open, .encoder_close = kvazaar_close, + .encoder_headers = kvazaar_headers, .encoder_encode = kvazaar_encode, };
View file
kvazaar-0.7.0.tar.gz/src/kvazaar.h -> kvazaar-0.7.1.tar.gz/src/kvazaar.h
Changed
@@ -106,7 +106,18 @@ { int32_t qp; /*!< \brief Quantization parameter */ int32_t intra_period; /*!< \brief the period of intra frames in stream */ - int32_t vps_period; /*!< \brief how often the vps is re-sent */ + + /** \brief How often the VPS, SPS and PPS are re-sent + * + * -1: never + * 0: first frame only + * 1: every intra frame + * 2: every other intra frame + * 3: every third intra frame + * and so on + */ + int32_t vps_period; + int32_t width; /*!< \brief frame width, must be a multiple of 8 */ int32_t height; /*!< \brief frame height, must be a multiple of 8 */ double framerate; /*!< \brief Input framerate */ @@ -410,6 +421,26 @@ void (*encoder_close)(kvz_encoder *encoder); /** + * \brief Get parameter sets. + * + * Encode the VPS, SPS and PPS. + * + * If data_out is set to non-NULL values, the caller is responsible for + * calling chunk_free on it. + * + * A null pointer may be passed in place of the parameter data_out or len_out + * to skip returning the corresponding value. + * + * \param encoder encoder + * \param data_out Returns the encoded parameter sets. + * \param len_out Returns number of bytes in the encoded data. + * \return 1 on success, 0 on error. + */ + int (*encoder_headers)(kvz_encoder *encoder, + kvz_data_chunk **data_out, + uint32_t *len_out); + + /** * \brief Encode one frame. * * Add pic_in to the encoding pipeline. If an encoded frame is ready, return
View file
kvazaar-0.7.0.tar.gz/src/kvazaar_version.h -> kvazaar-0.7.1.tar.gz/src/kvazaar_version.h
Changed
@@ -21,6 +21,6 @@ ****************************************************************************/ // KVZ_API_VERSION is incremented every time the public api changes. -#define KVZ_API_VERSION 7 +#define KVZ_API_VERSION 8 #endif // KVAZAAR_VERSION_H_
View file
kvazaar-0.7.0.tar.gz/src/rdo.c -> kvazaar-0.7.1.tar.gz/src/rdo.c
Changed
@@ -31,6 +31,7 @@ #include "context.h" #include "cabac.h" #include "transform.h" +#include "strategies/strategies-quant.h" #define QUANT_SHIFT 14
View file
kvazaar-0.7.0.tar.gz/src/search_intra.c -> kvazaar-0.7.1.tar.gz/src/search_intra.c
Changed
@@ -271,14 +271,14 @@ static void search_intra_chroma_rough(encoder_state_t * const state, int x_px, int y_px, int depth, const kvz_pixel *orig_u, const kvz_pixel *orig_v, int16_t origstride, - const kvz_pixel *rec_u, const kvz_pixel *rec_v, int16_t recstride, + kvz_intra_references *refs_u, kvz_intra_references *refs_v, int8_t luma_mode, int8_t modes[5], double costs[5]) { - const bool reconstruct_chroma = !(x_px & 4 || y_px & 4); - if (!reconstruct_chroma) return; + assert(!(x_px & 4 || y_px & 4)); const unsigned width = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH); + const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - (depth + 1), 2); for (int i = 0; i < 5; ++i) { costs[i] = 0; @@ -287,16 +287,16 @@ cost_pixel_nxn_func *const satd_func = kvz_pixels_get_satd_func(width); //cost_pixel_nxn_func *const sad_func = kvz_pixels_get_sad_func(width); - kvz_pixel _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; + kvz_pixel _pred[32 * 32 + SIMD_ALIGNMENT]; kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); - kvz_pixel _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; + kvz_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT]; kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width); for (int i = 0; i < 5; ++i) { if (modes[i] == luma_mode) continue; - kvz_intra_get_pred(state->encoder_control, rec_u, NULL, recstride, pred, width, modes[i], 1); + kvz_intra_predict(refs_u, log2_width_c, modes[i], COLOR_U, pred); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); costs[i] += satd_func(pred, orig_block); } @@ -304,7 +304,7 @@ kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width); for (int i = 0; i < 5; ++i) { if (modes[i] == luma_mode) continue; - kvz_intra_get_pred(state->encoder_control, rec_v, NULL, recstride, pred, width, modes[i], 2); + kvz_intra_predict(refs_v, log2_width_c, modes[i], COLOR_V, pred); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); costs[i] += satd_func(pred, orig_block); } @@ -343,41 +343,25 @@ */ static int8_t search_intra_rough(encoder_state_t * const state, kvz_pixel *orig, int32_t origstride, - kvz_pixel *rec, int16_t recstride, - int width, int8_t *intra_preds, + kvz_intra_references *refs, + int log2_width, int8_t *intra_preds, int8_t modes[35], double costs[35]) { + assert(log2_width >= 2 && log2_width <= 5); + int_fast8_t width = 1 << log2_width; cost_pixel_nxn_func *satd_func = kvz_pixels_get_satd_func(width); cost_pixel_nxn_func *sad_func = kvz_pixels_get_sad_func(width); // Temporary block arrays - kvz_pixel _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; + kvz_pixel _pred[32 * 32 + SIMD_ALIGNMENT]; kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); - kvz_pixel _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; + kvz_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT]; kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); - - kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; - - kvz_pixel *recf = &rec_filtered_temp[recstride + 1]; - - assert(width == 4 || width == 8 || width == 16 || width == 32); // Store original block for SAD computation kvz_pixels_blit(orig, orig_block, width, width, origstride, width); - // Generate filtered reference pixels. - { - int16_t x, y; - for (y = -1; y < recstride; y++) { - recf[y*recstride - 1] = rec[y*recstride - 1]; - } - for (x = 0; x < recstride; x++) { - recf[x - recstride] = rec[x - recstride]; - } - kvz_intra_filter(recf, recstride, width, 0); - } - int8_t modes_selected = 0; unsigned min_cost = UINT_MAX; unsigned max_cost = 0; @@ -387,18 +371,15 @@ int offset; if (state->encoder_control->full_intra_search) { offset = 1; - } else if (width == 4) { - offset = 2; - } else if (width == 8) { - offset = 4; } else { - offset = 8; + static const int8_t offsets[4] = { 2, 4, 8, 8 }; + offset = offsets[log2_width - 2]; } // Calculate SAD for evenly spaced modes to select the starting point for // the recursive search. for (int mode = 2; mode <= 34; mode += offset) { - kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0); + kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred); costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width); modes[modes_selected] = mode; @@ -421,7 +402,7 @@ int8_t center_node = best_mode; int8_t mode = center_node - offset; if (mode >= 2) { - kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0); + kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred); costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width); modes[modes_selected] = mode; if (costs[modes_selected] < best_cost) { @@ -433,7 +414,7 @@ mode = center_node + offset; if (mode <= 34) { - kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0); + kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred); costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width); modes[modes_selected] = mode; if (costs[modes_selected] < best_cost) { @@ -460,7 +441,7 @@ } if (!has_mode) { - kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0); + kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred); costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width); modes[modes_selected] = mode; ++modes_selected; @@ -507,7 +488,6 @@ static int8_t search_intra_rdo(encoder_state_t * const state, int x_px, int y_px, int depth, kvz_pixel *orig, int32_t origstride, - kvz_pixel *rec, int16_t recstride, int8_t *intra_preds, int modes_to_check, int8_t modes[35], double costs[35], @@ -517,31 +497,14 @@ const int width = LCU_WIDTH >> depth; kvz_pixel orig_block[LCU_WIDTH * LCU_WIDTH + 1]; - int rdo_mode; - int pred_mode; - - kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; - kvz_pixel *recf = &rec_filtered_temp[recstride + 1]; - - // Generate filtered reference pixels. - { - int x, y; - for (y = -1; y < recstride; y++) { - recf[y*recstride - 1] = rec[y*recstride - 1]; - } - for (x = 0; x < recstride; x++) { - recf[x - recstride] = rec[x - recstride]; - } - kvz_intra_filter(recf, recstride, width, 0); - } kvz_pixels_blit(orig, orig_block, width, width, origstride, width); // Check that the predicted modes are in the RDO mode list if (modes_to_check < 35) { - for (pred_mode = 0; pred_mode < 3; pred_mode++) { + for (int pred_mode = 0; pred_mode < 3; pred_mode++) { int mode_found = 0; - for (rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode++) { + for (int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode++) { if (intra_preds[pred_mode] == modes[rdo_mode]) { mode_found = 1; break; @@ -555,42 +518,27 @@ } } - for(rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) { + for(int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) { int rdo_bitcost = kvz_luma_mode_bits(state, modes[rdo_mode], intra_preds); costs[rdo_mode] = rdo_bitcost * (int)(state->global->cur_lambda_cost + 0.5); -#if 0 - if (width != 4 && tr_depth == depth) { - // This code path has been disabled for now because it increases bdrate - // by 1-2 %. Possibly due to not taking chroma into account during luma - // mode search. Enabling separate chroma search compensates a little, - // but not enough.
View file
kvazaar-0.7.1.tar.gz/src/strategies/avx2/intra-avx2.c
Added
@@ -0,0 +1,176 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +/* + * \file + */ + +#include <stdlib.h> + +#include "intra-avx2.h" +#include "strategyselector.h" + +#if COMPILE_INTEL_AVX2 +#include <immintrin.h> + + /** + * \brief Generage angular predictions. + * \param log2_width Log2 of width, range 2..5. + * \param intra_mode Angular mode in range 2..34. + * \param in_ref_above Pointer to -1 index of above reference, length=width*2+1. + * \param in_ref_left Pointer to -1 index of left reference, length=width*2+1. + * \param dst Buffer of size width*width. + */ +static void kvz_angular_pred_avx2( + const int_fast8_t log2_width, + const int_fast8_t intra_mode, + const kvz_pixel *const in_ref_above, + const kvz_pixel *const in_ref_left, + kvz_pixel *const dst) +{ + assert(log2_width >= 2 && log2_width <= 5); + assert(intra_mode >= 2 && intra_mode <= 34); + + static const int8_t modedisp2sampledisp[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 }; + static const int16_t modedisp2invsampledisp[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / sampledisp + + // Temporary buffer for modes 11-25. + // It only needs to be big enough to hold indices from -width to width-1. + kvz_pixel tmp_ref[2 * 32]; + const int_fast8_t width = 1 << log2_width; + + // Whether to swap references to always project on the left reference row. + const bool vertical_mode = intra_mode >= 18; + // Modes distance to horizontal or vertical mode. + const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode; + // Sample displacement per column in fractions of 32. + const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)]; + + // Pointer for the reference we are interpolating from. + const kvz_pixel *ref_main; + // Pointer for the other reference. + const kvz_pixel *ref_side; + + // Set ref_main and ref_side such that, when indexed with 0, they point to + // index 0 in block coordinates. + if (sample_disp < 0) { + // Negative sample_disp means, we need to use both references. + + ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; + ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; + + // Move the reference pixels to start from the middle to the later half of + // the tmp_ref, so there is room for negative indices. + for (int_fast8_t x = -1; x < width; ++x) { + tmp_ref[x + width] = ref_main[x]; + } + // Get a pointer to block index 0 in tmp_ref. + ref_main = &tmp_ref[width]; + + // Extend the side reference to the negative indices of main reference. + int_fast32_t col_sample_disp = 128; // rounding for the ">> 8" + int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)]; + int_fast8_t most_negative_index = (width * sample_disp) >> 5; + for (int_fast8_t x = -2; x >= most_negative_index; --x) { + col_sample_disp += inv_abs_sample_disp; + int_fast8_t side_index = col_sample_disp >> 8; + tmp_ref[x + width] = ref_side[side_index - 1]; + } + } + else { + // sample_disp >= 0 means we don't need to refer to negative indices, + // which means we can just use the references as is. + ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; + ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; + } + + if (sample_disp != 0) { + // The mode is not horizontal or vertical, we have to do interpolation. + + int_fast16_t delta_pos = 0; + for (int_fast8_t y = 0; y < width; ++y) { + delta_pos += sample_disp; + int_fast8_t delta_int = delta_pos >> 5; + int_fast8_t delta_fract = delta_pos & (32 - 1); + + if (delta_fract) { + // Do linear filtering + if (width < 8) { + for (int_fast8_t x = 0; x < width; ++x) { + kvz_pixel ref1 = ref_main[x + delta_int]; + kvz_pixel ref2 = ref_main[x + delta_int + 1]; + dst[y * width + x] = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5; + } + } else { + struct { uint8_t w1; uint8_t w2; } packed_weights = { 32 - delta_fract, delta_fract }; + __m128i v_weights = _mm_set1_epi16(*(int16_t*)&packed_weights); + + for (int_fast8_t x = 0; x < width; x += 8) { + __m128i v_ref1 = _mm_loadl_epi64((__m128i*)&(ref_main[x + delta_int])); + __m128i v_ref2 = _mm_loadl_epi64((__m128i*)&(ref_main[x + delta_int + 1])); + __m128i v_refs = _mm_unpacklo_epi8(v_ref1, v_ref2); + __m128i v_tmp = _mm_maddubs_epi16(v_refs, v_weights); + v_tmp = _mm_add_epi16(v_tmp, _mm_set1_epi16(16)); + v_tmp = _mm_srli_epi16(v_tmp, 5); + v_tmp = _mm_packus_epi16(v_tmp, v_tmp); + _mm_storel_epi64((__m128i*)(dst + y * width + x), v_tmp); + } + } + } + else { + // Just copy the integer samples + for (int_fast8_t x = 0; x < width; x+=4) { + *(int32_t*)(&dst[y * width + x]) = *(int32_t*)(&ref_main[x + delta_int]); + } + } + } + } + else { + // Mode is horizontal or vertical, just copy the pixels. + + for (int_fast8_t y = 0; y < width; ++y) { + for (int_fast8_t x = 0; x < width; x+=4) { + *(int32_t*)&(dst[y * width + x]) = *(int32_t*)&(ref_main[x]); + } + } + } + + // Flip the block if this is was a horizontal mode. + if (!vertical_mode) { + for (int_fast8_t y = 0; y < width - 1; ++y) { + for (int_fast8_t x = y + 1; x < width; ++x) { + SWAP(dst[y * width + x], dst[x * width + y], kvz_pixel); + } + } + } +} + +#endif //COMPILE_INTEL_AVX2 + +int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth) +{ + bool success = true; +#if COMPILE_INTEL_AVX2 + if (bitdepth == 8) { + success &= kvz_strategyselector_register(opaque, "angular_pred", "avx2", 40, &kvz_angular_pred_avx2); + } +#endif //COMPILE_INTEL_AVX2 + return success; +}
View file
kvazaar-0.7.1.tar.gz/src/strategies/avx2/intra-avx2.h
Added
@@ -0,0 +1,27 @@ +#ifndef STRATEGIES_INTRA_AVX2_H_ +#define STRATEGIES_INTRA_AVX2_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ +#include <stdint.h> +#include "encoderstate.h" + +int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth); + +#endif //STRATEGIES_INTRA_AVX2_H_
View file
kvazaar-0.7.0.tar.gz/src/strategies/avx2/picture-avx2.c -> kvazaar-0.7.1.tar.gz/src/strategies/avx2/picture-avx2.c
Changed
@@ -26,6 +26,7 @@ #if COMPILE_INTEL_AVX2 # include "image.h" +# include "strategies/strategies-common.h" # include <immintrin.h> @@ -136,6 +137,186 @@ return m256i_horizontal_sum(sum0); } +static unsigned satd_8bit_4x4_avx2(const kvz_pixel *org, const kvz_pixel *cur) +{ + + __m128i original = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)org)); + __m128i current = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)cur)); + + __m128i diff_lo = _mm_sub_epi16(current, original); + + original = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)(org + 8))); + current = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)(cur + 8))); + + __m128i diff_hi = _mm_sub_epi16(current, original); + + + //Hor + __m128i row0 = _mm_hadd_epi16(diff_lo, diff_hi); + __m128i row1 = _mm_hsub_epi16(diff_lo, diff_hi); + + __m128i row2 = _mm_hadd_epi16(row0, row1); + __m128i row3 = _mm_hsub_epi16(row0, row1); + + //Ver + row0 = _mm_hadd_epi16(row2, row3); + row1 = _mm_hsub_epi16(row2, row3); + + row2 = _mm_hadd_epi16(row0, row1); + row3 = _mm_hsub_epi16(row0, row1); + + //Abs and sum + row2 = _mm_abs_epi16(row2); + row3 = _mm_abs_epi16(row3); + + row3 = _mm_add_epi16(row2, row3); + + row3 = _mm_add_epi16(row3, _mm_shuffle_epi32(row3, KVZ_PERMUTE(2, 3, 0, 1) )); + row3 = _mm_add_epi16(row3, _mm_shuffle_epi32(row3, KVZ_PERMUTE(1, 0, 1, 0) )); + row3 = _mm_add_epi16(row3, _mm_shufflelo_epi16(row3, KVZ_PERMUTE(1, 0, 1, 0) )); + + unsigned sum = _mm_extract_epi16(row3, 0); + unsigned satd = (sum + 1) >> 1; + + return satd; +} + +static void hor_add_sub_avx2(__m128i *row0, __m128i *row1){ + + __m128i a = _mm_hadd_epi16(*row0, *row1); + __m128i b = _mm_hsub_epi16(*row0, *row1); + + __m128i c = _mm_hadd_epi16(a, b); + __m128i d = _mm_hsub_epi16(a, b); + + *row0 = _mm_hadd_epi16(c, d); + *row1 = _mm_hsub_epi16(c, d); +} + +static INLINE void ver_add_sub_avx2(__m128i temp_hor[8], __m128i temp_ver[8]){ + + // First stage + for (int i = 0; i < 8; i += 2){ + temp_ver[i+0] = _mm_hadd_epi16(temp_hor[i + 0], temp_hor[i + 1]); + temp_ver[i+1] = _mm_hsub_epi16(temp_hor[i + 0], temp_hor[i + 1]); + } + + // Second stage + for (int i = 0; i < 8; i += 4){ + temp_hor[i + 0] = _mm_add_epi16(temp_ver[i + 0], temp_ver[i + 2]); + temp_hor[i + 1] = _mm_add_epi16(temp_ver[i + 1], temp_ver[i + 3]); + temp_hor[i + 2] = _mm_sub_epi16(temp_ver[i + 0], temp_ver[i + 2]); + temp_hor[i + 3] = _mm_sub_epi16(temp_ver[i + 1], temp_ver[i + 3]); + } + + // Third stage + for (int i = 0; i < 4; ++i){ + temp_ver[i + 0] = _mm_add_epi16(temp_hor[0 + i], temp_hor[4 + i]); + temp_ver[i + 4] = _mm_sub_epi16(temp_hor[0 + i], temp_hor[4 + i]); + } +} + +INLINE static void haddwd_accumulate_avx2(__m128i *accumulate, __m128i *ver_row) +{ + __m128i abs_value = _mm_abs_epi16(*ver_row); + *accumulate = _mm_add_epi32(*accumulate, _mm_madd_epi16(abs_value, _mm_set1_epi16(1))); +} + +INLINE static unsigned sum_block_avx2(__m128i *ver_row) +{ + __m128i sad = _mm_setzero_si128(); + haddwd_accumulate_avx2(&sad, ver_row + 0); + haddwd_accumulate_avx2(&sad, ver_row + 1); + haddwd_accumulate_avx2(&sad, ver_row + 2); + haddwd_accumulate_avx2(&sad, ver_row + 3); + haddwd_accumulate_avx2(&sad, ver_row + 4); + haddwd_accumulate_avx2(&sad, ver_row + 5); + haddwd_accumulate_avx2(&sad, ver_row + 6); + haddwd_accumulate_avx2(&sad, ver_row + 7); + + sad = _mm_add_epi32(sad, _mm_shuffle_epi32(sad, KVZ_PERMUTE(2, 3, 0, 1))); + sad = _mm_add_epi32(sad, _mm_shuffle_epi32(sad, KVZ_PERMUTE(1, 0, 1, 0))); + + return _mm_cvtsi128_si32(sad); +} + +INLINE static __m128i diff_row_avx2(const kvz_pixel *buf1, const kvz_pixel *buf2) +{ + __m128i buf1_row = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)buf1)); + __m128i buf2_row = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)buf2)); + return _mm_sub_epi16(buf1_row, buf2_row); +} + +INLINE static void diff_blocks_and_hor_transform_avx2(__m128i row_diff[8], const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2) +{ + row_diff[0] = diff_row_avx2(buf1 + 0 * stride1, buf2 + 0 * stride2); + row_diff[1] = diff_row_avx2(buf1 + 1 * stride1, buf2 + 1 * stride2); + hor_add_sub_avx2(row_diff + 0, row_diff + 1); + + row_diff[2] = diff_row_avx2(buf1 + 2 * stride1, buf2 + 2 * stride2); + row_diff[3] = diff_row_avx2(buf1 + 3 * stride1, buf2 + 3 * stride2); + hor_add_sub_avx2(row_diff + 2, row_diff + 3); + + row_diff[4] = diff_row_avx2(buf1 + 4 * stride1, buf2 + 4 * stride2); + row_diff[5] = diff_row_avx2(buf1 + 5 * stride1, buf2 + 5 * stride2); + hor_add_sub_avx2(row_diff + 4, row_diff + 5); + + row_diff[6] = diff_row_avx2(buf1 + 6 * stride1, buf2 + 6 * stride2); + row_diff[7] = diff_row_avx2(buf1 + 7 * stride1, buf2 + 7 * stride2); + hor_add_sub_avx2(row_diff + 6, row_diff + 7); +} + +static unsigned kvz_satd_8bit_8x8_general_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2) +{ + __m128i temp_hor[8]; + __m128i temp_ver[8]; + + diff_blocks_and_hor_transform_avx2(temp_hor, buf1, stride1, buf2, stride2); + + ver_add_sub_avx2(temp_hor, temp_ver); + + unsigned sad = sum_block_avx2(temp_ver); + + unsigned result = (sad + 2) >> 2; + return result; +} + +// Function macro for defining hadamard calculating functions +// for fixed size blocks. They calculate hadamard for integer +// multiples of 8x8 with the 8x8 hadamard function. +#define SATD_NXN_AVX2(n) \ +static unsigned satd_8bit_ ## n ## x ## n ## _avx2( \ + const kvz_pixel * const block1, const kvz_pixel * const block2) \ +{ \ + unsigned x, y; \ + unsigned sum = 0; \ + for (y = 0; y < (n); y += 8) { \ + unsigned row = y * (n); \ + for (x = 0; x < (n); x += 8) { \ + sum += kvz_satd_8bit_8x8_general_avx2(&block1[row + x], (n), &block2[row + x], (n)); \ + } \ + } \ + return sum>>(KVZ_BIT_DEPTH-8); \ +} + +static unsigned satd_8bit_8x8_avx2( + const kvz_pixel * const block1, const kvz_pixel * const block2) +{ + unsigned x, y; + unsigned sum = 0; + for (y = 0; y < (8); y += 8) { + unsigned row = y * (8); + for (x = 0; x < (8); x += 8) { + sum += kvz_satd_8bit_8x8_general_avx2(&block1[row + x], (8), &block2[row + x], (8)); + } + } + return sum>>(KVZ_BIT_DEPTH-8); \ +} + +//SATD_NXN_AVX2(8) //Use the non-macro version +SATD_NXN_AVX2(16) +SATD_NXN_AVX2(32) +SATD_NXN_AVX2(64) #endif //COMPILE_INTEL_AVX2 @@ -153,6 +334,12 @@ success &= kvz_strategyselector_register(opaque, "sad_16x16", "avx2", 40, &sad_8bit_16x16_avx2); success &= kvz_strategyselector_register(opaque, "sad_32x32", "avx2", 40, &sad_8bit_32x32_avx2); success &= kvz_strategyselector_register(opaque, "sad_64x64", "avx2", 40, &sad_8bit_64x64_avx2); +
View file
kvazaar-0.7.1.tar.gz/src/strategies/avx2/quant-avx2.c
Added
@@ -0,0 +1,220 @@ +/***************************************************************************** +* This file is part of Kvazaar HEVC encoder. +* +* Copyright (C) 2013-2015 Tampere University of Technology and others (see +* COPYING file). +* +* Kvazaar is free software: you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the +* Free Software Foundation; either version 2.1 of the License, or (at your +* option) any later version. +* +* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY +* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for +* more details. +* +* You should have received a copy of the GNU General Public License along +* with Kvazaar. If not, see <http://www.gnu.org/licenses/>. +****************************************************************************/ + +/* +* \file +*/ + +#include <stdlib.h> + +#include "quant-avx2.h" +#include "../generic/quant-generic.h" +#include "../strategies-common.h" +#include "strategyselector.h" +#include "encoder.h" +#include "transform.h" + +#if COMPILE_INTEL_AVX2 +#include <immintrin.h> + +/** +* \brief quantize transformed coefficents +* +*/ + +void kvz_quant_flat_avx2(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, + int32_t height, int8_t type, int8_t scan_idx, int8_t block_type) +{ + const encoder_control_t * const encoder = state->encoder_control; + const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2; + const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1]; + + int32_t qp_scaled = kvz_get_scaled_qp(type, state->global->QP, (encoder->bitdepth - 8) * 6); + const uint32_t log2_tr_size = kvz_g_convert_to_bit[width] + 2; + const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); + const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_size - 2][scalinglist_type][qp_scaled % 6]; + const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform + const int32_t q_bits = QUANT_SHIFT + qp_scaled / 6 + transform_shift; + const int32_t add = ((state->global->slicetype == KVZ_SLICE_I) ? 171 : 85) << (q_bits - 9); + const int32_t q_bits8 = q_bits - 8; + + assert(quant_coeff[0] <= (1 << 15) - 1 && quant_coeff[0] >= -(1 << 15)); //Assuming flat values to fit int16_t + + uint32_t ac_sum = 0; + + __m256i v_ac_sum = _mm256_setzero_si256(); + __m256i v_quant_coeff = _mm256_set1_epi16(quant_coeff[0]); + + for (int32_t n = 0; n < width * height; n += 16) { + + __m256i v_level = _mm256_loadu_si256((__m256i*)&(coef[n])); + __m256i v_sign = _mm256_cmpgt_epi16(_mm256_setzero_si256(), v_level); + v_sign = _mm256_or_si256(v_sign, _mm256_set1_epi16(1)); + + v_level = _mm256_abs_epi16(v_level); + __m256i low_a = _mm256_unpacklo_epi16(v_level, _mm256_set1_epi16(0)); + __m256i high_a = _mm256_unpackhi_epi16(v_level, _mm256_set1_epi16(0)); + + __m256i low_b = _mm256_unpacklo_epi16(v_quant_coeff, _mm256_set1_epi16(0)); + __m256i high_b = _mm256_unpackhi_epi16(v_quant_coeff, _mm256_set1_epi16(0)); + + __m256i v_level32_a = _mm256_madd_epi16(low_a, low_b); + __m256i v_level32_b = _mm256_madd_epi16(high_a, high_b); + + v_level32_a = _mm256_add_epi32(v_level32_a, _mm256_set1_epi32(add)); + v_level32_b = _mm256_add_epi32(v_level32_b, _mm256_set1_epi32(add)); + + v_level32_a = _mm256_srai_epi32(v_level32_a, q_bits); + v_level32_b = _mm256_srai_epi32(v_level32_b, q_bits); + + v_level = _mm256_packs_epi32(v_level32_a, v_level32_b); + v_level = _mm256_sign_epi16(v_level, v_sign); + + _mm256_storeu_si256((__m256i*)&(q_coef[n]), v_level); + + v_ac_sum = _mm256_add_epi32(v_ac_sum, v_level32_a); + v_ac_sum = _mm256_add_epi32(v_ac_sum, v_level32_b); + } + + __m128i temp = _mm_add_epi32(_mm256_castsi256_si128(v_ac_sum), _mm256_extracti128_si256(v_ac_sum, 1)); + temp = _mm_add_epi32(temp, _mm_shuffle_epi32(temp, KVZ_PERMUTE(2, 3, 0, 1))); + temp = _mm_add_epi32(temp, _mm_shuffle_epi32(temp, KVZ_PERMUTE(1, 0, 1, 0))); + ac_sum += _mm_cvtsi128_si32(temp); + + if (!(encoder->sign_hiding && ac_sum >= 2)) return; + + int32_t delta_u[LCU_WIDTH*LCU_WIDTH >> 2]; + + for (int32_t n = 0; n < width * height; n++) { + int32_t level; + level = coef[n]; + level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits; + delta_u[n] = (int32_t)(((int64_t)abs(coef[n]) * quant_coeff[n] - (level << q_bits)) >> q_bits8); + } + + if (ac_sum >= 2) { +#define SCAN_SET_SIZE 16 +#define LOG2_SCAN_SET_SIZE 4 + int32_t n, last_cg = -1, abssum = 0, subset, subpos; + for (subset = (width*height - 1) >> LOG2_SCAN_SET_SIZE; subset >= 0; subset--) { + int32_t first_nz_pos_in_cg = SCAN_SET_SIZE, last_nz_pos_in_cg = -1; + subpos = subset << LOG2_SCAN_SET_SIZE; + abssum = 0; + + // Find last coeff pos + for (n = SCAN_SET_SIZE - 1; n >= 0; n--) { + if (q_coef[scan[n + subpos]]) { + last_nz_pos_in_cg = n; + break; + } + } + + // First coeff pos + for (n = 0; n <SCAN_SET_SIZE; n++) { + if (q_coef[scan[n + subpos]]) { + first_nz_pos_in_cg = n; + break; + } + } + + // Sum all kvz_quant coeffs between first and last + for (n = first_nz_pos_in_cg; n <= last_nz_pos_in_cg; n++) { + abssum += q_coef[scan[n + subpos]]; + } + + if (last_nz_pos_in_cg >= 0 && last_cg == -1) { + last_cg = 1; + } + + if (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4) { + int32_t signbit = (q_coef[scan[subpos + first_nz_pos_in_cg]] > 0 ? 0 : 1); + if (signbit != (abssum & 0x1)) { // compare signbit with sum_parity + int32_t min_cost_inc = 0x7fffffff, min_pos = -1, cur_cost = 0x7fffffff; + int16_t final_change = 0, cur_change = 0; + for (n = (last_cg == 1 ? last_nz_pos_in_cg : SCAN_SET_SIZE - 1); n >= 0; n--) { + uint32_t blkPos = scan[n + subpos]; + if (q_coef[blkPos] != 0) { + if (delta_u[blkPos] > 0) { + cur_cost = -delta_u[blkPos]; + cur_change = 1; + } + else if (n == first_nz_pos_in_cg && abs(q_coef[blkPos]) == 1) { + cur_cost = 0x7fffffff; + } + else { + cur_cost = delta_u[blkPos]; + cur_change = -1; + } + } + else if (n < first_nz_pos_in_cg && ((coef[blkPos] >= 0) ? 0 : 1) != signbit) { + cur_cost = 0x7fffffff; + } + else { + cur_cost = -delta_u[blkPos]; + cur_change = 1; + } + + if (cur_cost < min_cost_inc) { + min_cost_inc = cur_cost; + final_change = cur_change; + min_pos = blkPos; + } + } // CG loop + + if (q_coef[min_pos] == 32767 || q_coef[min_pos] == -32768) { + final_change = -1; + } + + if (coef[min_pos] >= 0) q_coef[min_pos] += final_change; + else q_coef[min_pos] -= final_change; + } // Hide + } + if (last_cg == 1) last_cg = 0; + } + +#undef SCAN_SET_SIZE +#undef LOG2_SCAN_SET_SIZE + } +} + +void kvz_quant_avx2(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, + int32_t height, int8_t type, int8_t scan_idx, int8_t block_type) +{
View file
kvazaar-0.7.1.tar.gz/src/strategies/avx2/quant-avx2.h
Added
@@ -0,0 +1,26 @@ +#ifndef STRATEGIES_QUANT_AVX2_H_ +#define STRATEGIES_QUANT_AVX2_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ +#include <stdint.h> + +int kvz_strategy_register_quant_avx2(void* opaque, uint8_t bitdepth); + +#endif //STRATEGIES_QUANT_AVX2_H_
View file
kvazaar-0.7.1.tar.gz/src/strategies/generic/intra-generic.c
Added
@@ -0,0 +1,154 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +/* + * \file + */ + +#include <stdlib.h> + +#include "intra-generic.h" +#include "strategyselector.h" + + + /** + * \brief Generage angular predictions. + * \param log2_width Log2 of width, range 2..5. + * \param intra_mode Angular mode in range 2..34. + * \param in_ref_above Pointer to -1 index of above reference, length=width*2+1. + * \param in_ref_left Pointer to -1 index of left reference, length=width*2+1. + * \param dst Buffer of size width*width. + */ +static void kvz_angular_pred_generic( + const int_fast8_t log2_width, + const int_fast8_t intra_mode, + const kvz_pixel *const in_ref_above, + const kvz_pixel *const in_ref_left, + kvz_pixel *const dst) +{ + assert(log2_width >= 2 && log2_width <= 5); + assert(intra_mode >= 2 && intra_mode <= 34); + + static const int8_t modedisp2sampledisp[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 }; + static const int16_t modedisp2invsampledisp[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / sampledisp + + // Temporary buffer for modes 11-25. + // It only needs to be big enough to hold indices from -width to width-1. + kvz_pixel tmp_ref[2 * 32]; + const int_fast8_t width = 1 << log2_width; + + // Whether to swap references to always project on the left reference row. + const bool vertical_mode = intra_mode >= 18; + // Modes distance to horizontal or vertical mode. + const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode; + // Sample displacement per column in fractions of 32. + const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)]; + + // Pointer for the reference we are interpolating from. + const kvz_pixel *ref_main; + // Pointer for the other reference. + const kvz_pixel *ref_side; + + // Set ref_main and ref_side such that, when indexed with 0, they point to + // index 0 in block coordinates. + if (sample_disp < 0) { + // Negative sample_disp means, we need to use both references. + + ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; + ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; + + // Move the reference pixels to start from the middle to the later half of + // the tmp_ref, so there is room for negative indices. + for (int_fast8_t x = -1; x < width; ++x) { + tmp_ref[x + width] = ref_main[x]; + } + // Get a pointer to block index 0 in tmp_ref. + ref_main = &tmp_ref[width]; + + // Extend the side reference to the negative indices of main reference. + int_fast32_t col_sample_disp = 128; // rounding for the ">> 8" + int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)]; + int_fast8_t most_negative_index = (width * sample_disp) >> 5; + for (int_fast8_t x = -2; x >= most_negative_index; --x) { + col_sample_disp += inv_abs_sample_disp; + int_fast8_t side_index = col_sample_disp >> 8; + tmp_ref[x + width] = ref_side[side_index - 1]; + } + } + else { + // sample_disp >= 0 means we don't need to refer to negative indices, + // which means we can just use the references as is. + ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; + ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; + } + + if (sample_disp != 0) { + // The mode is not horizontal or vertical, we have to do interpolation. + + int_fast16_t delta_pos = 0; + for (int_fast8_t y = 0; y < width; ++y) { + delta_pos += sample_disp; + int_fast8_t delta_int = delta_pos >> 5; + int_fast8_t delta_fract = delta_pos & (32 - 1); + + if (delta_fract) { + // Do linear filtering + for (int_fast8_t x = 0; x < width; ++x) { + kvz_pixel ref1 = ref_main[x + delta_int]; + kvz_pixel ref2 = ref_main[x + delta_int + 1]; + dst[y * width + x] = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5; + } + } + else { + // Just copy the integer samples + for (int_fast8_t x = 0; x < width; x++) { + dst[y * width + x] = ref_main[x + delta_int]; + } + } + } + } + else { + // Mode is horizontal or vertical, just copy the pixels. + + for (int_fast8_t y = 0; y < width; ++y) { + for (int_fast8_t x = 0; x < width; ++x) { + dst[y * width + x] = ref_main[x]; + } + } + } + + // Flip the block if this is was a horizontal mode. + if (!vertical_mode) { + for (int_fast8_t y = 0; y < width - 1; ++y) { + for (int_fast8_t x = y + 1; x < width; ++x) { + SWAP(dst[y * width + x], dst[x * width + y], kvz_pixel); + } + } + } +} + +int kvz_strategy_register_intra_generic(void* opaque, uint8_t bitdepth) +{ + bool success = true; + + success &= kvz_strategyselector_register(opaque, "angular_pred", "generic", 0, &kvz_angular_pred_generic); + + return success; +}
View file
kvazaar-0.7.1.tar.gz/src/strategies/generic/intra-generic.h
Added
@@ -0,0 +1,27 @@ +#ifndef STRATEGIES_INTRA_GENERIC_H_ +#define STRATEGIES_INTRA_GENERIC_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ +#include <stdint.h> +#include "encoderstate.h" + +int kvz_strategy_register_intra_generic(void* opaque, uint8_t bitdepth); + +#endif //STRATEGIES_INTRA_GENERIC_H_
View file
kvazaar-0.7.1.tar.gz/src/strategies/generic/quant-generic.c
Added
@@ -0,0 +1,173 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +/* + * \file + */ + +#include <stdlib.h> + +#include "quant-generic.h" +#include "strategyselector.h" +#include "encoder.h" +#include "transform.h" + +#define QUANT_SHIFT 14 +/** +* \brief quantize transformed coefficents +* +*/ +void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, + int32_t height, int8_t type, int8_t scan_idx, int8_t block_type) +{ + const encoder_control_t * const encoder = state->encoder_control; + const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2; + const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1]; + + int32_t qp_scaled = kvz_get_scaled_qp(type, state->global->QP, (encoder->bitdepth - 8) * 6); + const uint32_t log2_tr_size = kvz_g_convert_to_bit[width] + 2; + const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); + const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_size - 2][scalinglist_type][qp_scaled % 6]; + const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform + const int32_t q_bits = QUANT_SHIFT + qp_scaled / 6 + transform_shift; + const int32_t add = ((state->global->slicetype == KVZ_SLICE_I) ? 171 : 85) << (q_bits - 9); + const int32_t q_bits8 = q_bits - 8; + + uint32_t ac_sum = 0; + + for (int32_t n = 0; n < width * height; n++) { + int32_t level; + int32_t sign; + + level = coef[n]; + sign = (level < 0 ? -1 : 1); + + level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits; + ac_sum += level; + + level *= sign; + q_coef[n] = (coeff_t)(CLIP(-32768, 32767, level)); + } + + if (!(encoder->sign_hiding && ac_sum >= 2)) return; + + int32_t delta_u[LCU_WIDTH*LCU_WIDTH >> 2]; + + for (int32_t n = 0; n < width * height; n++) { + int32_t level; + level = coef[n]; + level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits; + delta_u[n] = (int32_t)(((int64_t)abs(coef[n]) * quant_coeff[n] - (level << q_bits)) >> q_bits8); + } + + if (ac_sum >= 2) { +#define SCAN_SET_SIZE 16 +#define LOG2_SCAN_SET_SIZE 4 + int32_t n, last_cg = -1, abssum = 0, subset, subpos; + for (subset = (width*height - 1) >> LOG2_SCAN_SET_SIZE; subset >= 0; subset--) { + int32_t first_nz_pos_in_cg = SCAN_SET_SIZE, last_nz_pos_in_cg = -1; + subpos = subset << LOG2_SCAN_SET_SIZE; + abssum = 0; + + // Find last coeff pos + for (n = SCAN_SET_SIZE - 1; n >= 0; n--) { + if (q_coef[scan[n + subpos]]) { + last_nz_pos_in_cg = n; + break; + } + } + + // First coeff pos + for (n = 0; n <SCAN_SET_SIZE; n++) { + if (q_coef[scan[n + subpos]]) { + first_nz_pos_in_cg = n; + break; + } + } + + // Sum all kvz_quant coeffs between first and last + for (n = first_nz_pos_in_cg; n <= last_nz_pos_in_cg; n++) { + abssum += q_coef[scan[n + subpos]]; + } + + if (last_nz_pos_in_cg >= 0 && last_cg == -1) { + last_cg = 1; + } + + if (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4) { + int32_t signbit = (q_coef[scan[subpos + first_nz_pos_in_cg]] > 0 ? 0 : 1); + if (signbit != (abssum & 0x1)) { // compare signbit with sum_parity + int32_t min_cost_inc = 0x7fffffff, min_pos = -1, cur_cost = 0x7fffffff; + int16_t final_change = 0, cur_change = 0; + for (n = (last_cg == 1 ? last_nz_pos_in_cg : SCAN_SET_SIZE - 1); n >= 0; n--) { + uint32_t blkPos = scan[n + subpos]; + if (q_coef[blkPos] != 0) { + if (delta_u[blkPos] > 0) { + cur_cost = -delta_u[blkPos]; + cur_change = 1; + } + else if (n == first_nz_pos_in_cg && abs(q_coef[blkPos]) == 1) { + cur_cost = 0x7fffffff; + } + else { + cur_cost = delta_u[blkPos]; + cur_change = -1; + } + } + else if (n < first_nz_pos_in_cg && ((coef[blkPos] >= 0) ? 0 : 1) != signbit) { + cur_cost = 0x7fffffff; + } + else { + cur_cost = -delta_u[blkPos]; + cur_change = 1; + } + + if (cur_cost < min_cost_inc) { + min_cost_inc = cur_cost; + final_change = cur_change; + min_pos = blkPos; + } + } // CG loop + + if (q_coef[min_pos] == 32767 || q_coef[min_pos] == -32768) { + final_change = -1; + } + + if (coef[min_pos] >= 0) q_coef[min_pos] += final_change; + else q_coef[min_pos] -= final_change; + } // Hide + } + if (last_cg == 1) last_cg = 0; + } + +#undef SCAN_SET_SIZE +#undef LOG2_SCAN_SET_SIZE + } +} + + +int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth) +{ + bool success = true; + + success &= kvz_strategyselector_register(opaque, "quant", "generic", 0, &kvz_quant_generic); + + return success; +}
View file
kvazaar-0.7.1.tar.gz/src/strategies/generic/quant-generic.h
Added
@@ -0,0 +1,31 @@ +#ifndef STRATEGIES_QUANT_GENERIC_H_ +#define STRATEGIES_QUANT_GENERIC_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ +#include <stdint.h> +#include "encoderstate.h" + +#define QUANT_SHIFT 14 + +int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth); +void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, + int32_t height, int8_t type, int8_t scan_idx, int8_t block_type); + +#endif //STRATEGIES_QUANT_GENERIC_H_
View file
kvazaar-0.7.1.tar.gz/src/strategies/strategies-common.h
Added
@@ -0,0 +1,8 @@ +#ifndef STRATEGIES_COMMON_H_ +#define STRATEGIES_COMMON_H_ + +//Use with shuffle and permutation intrinsics. +//Parameters are indices to packed elements. Each must be 0, 1, 2 or 3. +#define KVZ_PERMUTE(a, b, c, d) ( (a << 0) | (b << 2) | (c << 4) | (d << 6) ) + +#endif //STRATEGIES_COMMON_H_ \ No newline at end of file
View file
kvazaar-0.7.1.tar.gz/src/strategies/strategies-intra.c
Added
@@ -0,0 +1,41 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +#include "strategies-intra.h" +#include "strategyselector.h" + +// Define function pointers. +angular_pred_func *kvz_angular_pred; + +// Headers for platform optimizations. +#include "generic/intra-generic.h" +#include "avx2/intra-avx2.h" + + +int kvz_strategy_register_intra(void* opaque, uint8_t bitdepth) { + bool success = true; + + success &= kvz_strategy_register_intra_generic(opaque, bitdepth); + + if (kvz_g_hardware_flags.intel_flags.avx2) { + success &= kvz_strategy_register_intra_avx2(opaque, bitdepth); + } + return success; +} \ No newline at end of file
View file
kvazaar-0.7.1.tar.gz/src/strategies/strategies-intra.h
Added
@@ -0,0 +1,43 @@ +#ifndef STRATEGIES_INTRA_H_ +#define STRATEGIES_INTRA_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +#include "encoderstate.h" + +typedef void (angular_pred_func)( + const int_fast8_t log2_width, + const int_fast8_t intra_mode, + const kvz_pixel *const in_ref_above, + const kvz_pixel *const in_ref_left, + kvz_pixel *const dst); + +// Declare function pointers. +extern angular_pred_func * kvz_angular_pred; + +int kvz_strategy_register_intra(void* opaque, uint8_t bitdepth); + + +#define STRATEGIES_INTRA_EXPORTS \ + {"angular_pred", (void**) &kvz_angular_pred}, \ + + + +#endif //STRATEGIES_INTRA_H_
View file
kvazaar-0.7.1.tar.gz/src/strategies/strategies-quant.c
Added
@@ -0,0 +1,41 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +#include "strategies-quant.h" +#include "strategyselector.h" + +// Define function pointers. +quant_func *kvz_quant; + +// Headers for platform optimizations. +#include "generic/quant-generic.h" +#include "avx2/quant-avx2.h" + + +int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth) { + bool success = true; + + success &= kvz_strategy_register_quant_generic(opaque, bitdepth); + + if (kvz_g_hardware_flags.intel_flags.avx2) { + success &= kvz_strategy_register_quant_avx2(opaque, bitdepth); + } + return success; +} \ No newline at end of file
View file
kvazaar-0.7.1.tar.gz/src/strategies/strategies-quant.h
Added
@@ -0,0 +1,40 @@ +#ifndef STRATEGIES_QUANT_H_ +#define STRATEGIES_QUANT_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +#include "encoderstate.h" + +// Declare function pointers. +typedef unsigned (quant_func)(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, + int32_t height, int8_t type, int8_t scan_idx, int8_t block_type); + +// Declare function pointers. +extern quant_func * kvz_quant; + +int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth); + + +#define STRATEGIES_QUANT_EXPORTS \ + {"quant", (void**) &kvz_quant}, \ + + + +#endif //STRATEGIES_QUANT_H_
View file
kvazaar-0.7.0.tar.gz/src/strategyselector.c -> kvazaar-0.7.1.tar.gz/src/strategyselector.c
Changed
@@ -69,6 +69,16 @@ fprintf(stderr, "kvz_strategy_register_ipol failed!\n"); return 0; } + + if (!kvz_strategy_register_quant(&strategies, bitdepth)) { + fprintf(stderr, "kvz_strategy_register_quant failed!\n"); + return 0; + } + + if (!kvz_strategy_register_intra(&strategies, bitdepth)) { + fprintf(stderr, "kvz_strategy_register_intra failed!\n"); + return 0; + } while(cur_strategy_to_select->fptr) { *(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type);
View file
kvazaar-0.7.0.tar.gz/src/strategyselector.h -> kvazaar-0.7.1.tar.gz/src/strategyselector.h
Changed
@@ -148,12 +148,16 @@ #include "strategies/strategies-picture.h" #include "strategies/strategies-dct.h" #include "strategies/strategies-ipol.h" +#include "strategies/strategies-quant.h" +#include "strategies/strategies-intra.h" static const strategy_to_select_t strategies_to_select[] = { STRATEGIES_NAL_EXPORTS STRATEGIES_PICTURE_EXPORTS STRATEGIES_DCT_EXPORTS STRATEGIES_IPOL_EXPORTS + STRATEGIES_QUANT_EXPORTS + STRATEGIES_INTRA_EXPORTS { NULL, NULL }, };
View file
kvazaar-0.7.0.tar.gz/src/transform.c -> kvazaar-0.7.1.tar.gz/src/transform.c
Changed
@@ -33,6 +33,8 @@ #include "nal.h" #include "rdo.h" #include "strategies/strategies-dct.h" +#include "strategies/strategies-quant.h" +#include "strategies/generic/quant-generic.h" ////////////////////////////////////////////////////////////////////////// // INITIALIZATIONS @@ -127,137 +129,6 @@ idct_func(encoder->bitdepth, coeff, block); } - -#define QUANT_SHIFT 14 -/** - * \brief quantize transformed coefficents - * - */ -void kvz_quant(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, - int32_t height, int8_t type, int8_t scan_idx, int8_t block_type ) -{ - const encoder_control_t * const encoder = state->encoder_control; - const uint32_t log2_block_size = kvz_g_convert_to_bit[ width ] + 2; - const uint32_t * const scan = kvz_g_sig_last_scan[ scan_idx ][ log2_block_size - 1 ]; - - int32_t qp_scaled = kvz_get_scaled_qp(type, state->global->QP, (encoder->bitdepth-8)*6); - - const uint32_t log2_tr_size = kvz_g_convert_to_bit[ width ] + 2; - const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); - const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_size-2][scalinglist_type][qp_scaled%6]; - const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform - const int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift; - const int32_t add = ((state->global->slicetype == KVZ_SLICE_I) ? 171 : 85) << (q_bits - 9); - const int32_t q_bits8 = q_bits - 8; - - uint32_t ac_sum = 0; - - for (int32_t n = 0; n < width * height; n++) { - int32_t level; - int32_t sign; - - level = coef[n]; - sign = (level < 0 ? -1: 1); - - level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits; - ac_sum += level; - - level *= sign; - q_coef[n] = (coeff_t)(CLIP( -32768, 32767, level)); - } - - if (!(encoder->sign_hiding && ac_sum >= 2)) return; - - int32_t delta_u[LCU_WIDTH*LCU_WIDTH >> 2]; - - for (int32_t n = 0; n < width * height; n++) { - int32_t level; - level = coef[n]; - level = ((int64_t)abs(level) * quant_coeff[n] + add) >> q_bits; - delta_u[n] = (int32_t)(((int64_t)abs(coef[n]) * quant_coeff[n] - (level << q_bits)) >> q_bits8); - } - - if(ac_sum >= 2) { - #define SCAN_SET_SIZE 16 - #define LOG2_SCAN_SET_SIZE 4 - int32_t n,last_cg = -1, abssum = 0, subset, subpos; - for(subset = (width*height - 1)>>LOG2_SCAN_SET_SIZE; subset >= 0; subset--) { - int32_t first_nz_pos_in_cg = SCAN_SET_SIZE, last_nz_pos_in_cg=-1; - subpos = subset<<LOG2_SCAN_SET_SIZE; - abssum = 0; - - // Find last coeff pos - for (n = SCAN_SET_SIZE - 1; n >= 0; n--) { - if (q_coef[scan[n + subpos]]) { - last_nz_pos_in_cg = n; - break; - } - } - - // First coeff pos - for (n = 0; n <SCAN_SET_SIZE; n++) { - if (q_coef[scan[n + subpos]]) { - first_nz_pos_in_cg = n; - break; - } - } - - // Sum all kvz_quant coeffs between first and last - for(n = first_nz_pos_in_cg; n <= last_nz_pos_in_cg; n++) { - abssum += q_coef[scan[n + subpos]]; - } - - if(last_nz_pos_in_cg >= 0 && last_cg == -1) { - last_cg = 1; - } - - if(last_nz_pos_in_cg - first_nz_pos_in_cg >= 4) { - int32_t signbit = (q_coef[scan[subpos + first_nz_pos_in_cg]] > 0 ? 0 : 1) ; - if(signbit != (abssum&0x1)) { // compare signbit with sum_parity - int32_t min_cost_inc = 0x7fffffff, min_pos =-1, cur_cost=0x7fffffff; - int16_t final_change = 0, cur_change=0; - for(n = (last_cg == 1 ? last_nz_pos_in_cg : SCAN_SET_SIZE - 1); n >= 0; n--) { - uint32_t blkPos = scan[n + subpos]; - if(q_coef[blkPos] != 0) { - if(delta_u[blkPos] > 0) { - cur_cost = -delta_u[blkPos]; - cur_change=1; - } else if(n == first_nz_pos_in_cg && abs(q_coef[blkPos]) == 1) { - cur_cost=0x7fffffff; - } else { - cur_cost = delta_u[blkPos]; - cur_change =-1; - } - } else if(n < first_nz_pos_in_cg && ((coef[blkPos] >= 0)?0:1) != signbit) { - cur_cost = 0x7fffffff; - } else { - cur_cost = -delta_u[blkPos]; - cur_change = 1; - } - - if(cur_cost < min_cost_inc) { - min_cost_inc = cur_cost; - final_change = cur_change; - min_pos = blkPos; - } - } // CG loop - - if(q_coef[min_pos] == 32767 || q_coef[min_pos] == -32768) { - final_change = -1; - } - - if(coef[min_pos] >= 0) q_coef[min_pos] += final_change; - else q_coef[min_pos] -= final_change; - } // Hide - } - if (last_cg == 1) last_cg=0; - } - - #undef SCAN_SET_SIZE - #undef LOG2_SCAN_SET_SIZE - } -} - /** * \brief inverse quantize transformed and quantized coefficents *
View file
kvazaar-0.7.0.tar.gz/src/transform.h -> kvazaar-0.7.1.tar.gz/src/transform.h
Changed
@@ -35,8 +35,6 @@ -void kvz_quant(const encoder_state_t *state, coeff_t *coef, coeff_t *q_coef, int32_t width, - int32_t height, int8_t type, int8_t scan_idx, int8_t block_type); void kvz_dequant(const encoder_state_t *state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height, int8_t type, int8_t block_type); void kvz_transformskip(const encoder_control_t *encoder, int16_t *block,int16_t *coeff, int8_t block_size);
View file
kvazaar-0.7.0.tar.gz/tests/test_strategies.c -> kvazaar-0.7.1.tar.gz/tests/test_strategies.c
Changed
@@ -41,7 +41,7 @@ } if (!kvz_strategy_register_dct(&strategies, KVZ_BIT_DEPTH)) { - fprintf(stderr, "strategy_register_partial_butterfly failed!\n"); + fprintf(stderr, "strategy_register_dct failed!\n"); return; } }
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.