CfL预测模式原理从亮度到色度CfL 是一种色度帧内预测模式通过建立共位重建亮度采样的线性函数来模拟色度采样对于不同的色度采样格式例如4:2:0和4:2:2可能需要对重建的亮度像素进行子采样以匹配色度样本的分辨率在CfL模式中从亮度样本中移除直流分量以形成交流AC贡献。预测块是色度直流DC分量和缩放的亮度交流AC分量之和在CfL模式中指定两个颜色分量之间线性函数的模型参数如缩放因子α由编码器优化并在编码过程中信号化到比特流中DC分量是预测块内所有样本的平均值代表块的直流或平均亮度水平AC分量是亮度样本与DC分量的差值代表块内的交流或细节信息。libaom相关源码分析色度的预测模式一共有 14 种而 CfL 模式是序号 13在 enums.h 种定义对外编码参数控制开关unsigned int disable_cfl在 aom_encoder.h 文件中cfg_options结构体中声明变量控制逻辑函数关系is_cfl_allowed函数该函数判断当前块是否可以使用 CfL 模式在无损编码模式下CfL只允许用于4x4的色度块。在有损编码模式下CfL允许用于亮度分区大小不超过32x32的块。// Can we use CfL for the current block?staticINLINE CFL_ALLOWED_TYPEis_cfl_allowed(constMACROBLOCKD*xd){constMB_MODE_INFO*mbmixd-mi[0];constBLOCK_SIZE bsizembmi-bsize;assert(bsizeBLOCK_SIZES_ALL);if(xd-lossless[mbmi-segment_id]){// In lossless, CfL is available when the partition size is equal to the// transform size.constintssxxd-plane[AOM_PLANE_U].subsampling_x;constintssyxd-plane[AOM_PLANE_U].subsampling_y;constintplane_bsizeget_plane_block_size(bsize,ssx,ssy);return(CFL_ALLOWED_TYPE)(plane_bsizeBLOCK_4X4);}// Spec: CfL is available to luma partitions lesser than or equal to 32x32return(CFL_ALLOWED_TYPE)(block_size_wide[bsize]32block_size_high[bsize]32);}av1_get_tx_size函数该函数根据平面编号和宏块描述符来确定变换大小。在无损编码模式下它返回 TX_4X4。对于亮度平面它返回宏块模式信息中的变换大小。对于色度平面它基于块大小和子采样因子来确定最大变换大小。staticINLINE TX_SIZEav1_get_adjusted_tx_size(TX_SIZE tx_size){switch(tx_size){caseTX_64X64:caseTX_64X32:caseTX_32X64:returnTX_32X32;caseTX_64X16:returnTX_32X16;caseTX_16X64:returnTX_16X32;default:returntx_size;}}staticINLINE TX_SIZEav1_get_max_uv_txsize(BLOCK_SIZE bsize,intsubsampling_x,intsubsampling_y){constBLOCK_SIZE plane_bsizeget_plane_block_size(bsize,subsampling_x,subsampling_y);assert(plane_bsizeBLOCK_SIZES_ALL);constTX_SIZE uv_txmax_txsize_rect_lookup[plane_bsize];returnav1_get_adjusted_tx_size(uv_tx);}staticINLINE TX_SIZEav1_get_tx_size(intplane,constMACROBLOCKD*xd){constMB_MODE_INFO*mbmixd-mi[0];if(xd-lossless[mbmi-segment_id])returnTX_4X4;if(plane0)returnmbmi-tx_size;constMACROBLOCKD_PLANE*pdxd-plane[plane];returnav1_get_max_uv_txsize(mbmi-bsize,pd-subsampling_x,pd-subsampling_y);}cfl_rd_pick_alpha函数这个函数是AV1编码过程中CfL参数选择的核心它通过估计和比较不同CfL参数的RD成本来找到最佳参数。参数验证和初始化验证 cfl_search_range 是否在有效范围内。初始化RD统计为无效。启用DC预测数据缓存为了提高效率启用CfL的DC预测数据缓存。估计最佳CfL参数使用 cfl_pick_plane_parameter 函数为U和V平面选择最佳CfL参数。处理CfL搜索范围为1的特殊情况如果 cfl_search_range 为1不进行进一步的alpha细化如果两个色度平面的CfL索引都为0则设置无效的CfL参数并返回。计算CfL模式的RD成本如果RD成本超过参考最佳RD值则设置无效的CfL参数并返回。计算每个色度平面的RD成本使用 cfl_pick_plane_rd 函数计算每个色度平面的RD成本。清除DC预测数据缓存标志清除CfL的DC预测数据缓存标志以避免意外使用缓存的DC预测数据。遍历所有可能的CfL参数组合遍历所有可能的CfL参数组合计算联合RD成本并更新最佳RD统计和最佳CfL参数。返回结果如果最佳RD统计的RD成本不小于参考最佳RD值则设置无效的CfL参数并返回0否则返回1。/*!\brief Pick the optimal parameters for Chroma to Luma (CFL) component * * \ingroup intra_mode_search * \callergraph * * This function will use DCT_DCT followed by computing SATD (sum of absolute * transformed differences) to estimate the RD score and find the best possible * CFL parameter. * * Then the function will apply a full RD search near the best possible CFL * parameter to find the best actual CFL parameter. * * Side effect: * We use ths buffers in x-plane[] and xd-plane[] as throw-away buffers for RD * search. * * \param[in] x Encoder prediction block structure. * \param[in] cpi Top-level encoder instance structure. * \param[in] tx_size Transform size. * \param[in] ref_best_rd Reference best RD. * \param[in] cfl_search_range The search range of full RD search near the * estimated best CFL parameter. * * \param[out] best_rd_stats RD stats of the best CFL parameter * \param[out] best_cfl_alpha_idx Best CFL alpha index * \param[out] best_cfl_alpha_signs Best CFL joint signs * */staticintcfl_rd_pick_alpha(MACROBLOCK*constx,constAV1_COMP*constcpi,TX_SIZE tx_size,int64_tref_best_rd,intcfl_search_range,RD_STATS*best_rd_stats,uint8_t*best_cfl_alpha_idx,int8_t*best_cfl_alpha_signs){assert(cfl_search_range1cfl_search_rangeCFL_MAGS_SIZE);constModeCosts*mode_costsx-mode_costs;RD_STATS cfl_rd_arr_u[CFL_MAGS_SIZE];RD_STATS cfl_rd_arr_v[CFL_MAGS_SIZE];MACROBLOCKD*constxdx-e_mbd;intest_best_cfl_idx_u,est_best_cfl_idx_v;av1_invalid_rd_stats(best_rd_stats);// As the dc pred data is same for different values of alpha, enable the// caching of dc pred data. Call clear_cfl_dc_pred_cache_flags() before// returning to avoid the unintentional usage of cached dc pred data.xd-cfl.use_dc_pred_cachetrue;// Evaluate alpha parameter of each chroma plane.est_best_cfl_idx_ucfl_pick_plane_parameter(cpi,x,1,tx_size,cfl_search_range);est_best_cfl_idx_vcfl_pick_plane_parameter(cpi,x,2,tx_size,cfl_search_range);if(cfl_search_range1){// For cfl_search_range1, further refinement of alpha is not enabled. Hence// CfL index0 for both the chroma planes implies invalid CfL mode.if(est_best_cfl_idx_uCFL_INDEX_ZEROest_best_cfl_idx_vCFL_INDEX_ZERO){set_invalid_cfl_parameters(best_cfl_alpha_idx,best_cfl_alpha_signs);clear_cfl_dc_pred_cache_flags(xd-cfl);return0;}intcfl_alpha_u,cfl_alpha_v;CFL_SIGN_TYPE cfl_sign_u,cfl_sign_v;constMB_MODE_INFO*mbmixd-mi[0];cfl_idx_to_sign_and_alpha(est_best_cfl_idx_u,cfl_sign_u,cfl_alpha_u);cfl_idx_to_sign_and_alpha(est_best_cfl_idx_v,cfl_sign_v,cfl_alpha_v);constintjoint_signcfl_sign_u*CFL_SIGNScfl_sign_v-1;// Compute alpha and mode signaling rate.constintrate_overheadmode_costs-cfl_cost[joint_sign][CFL_PRED_U][cfl_alpha_u]mode_costs-cfl_cost[joint_sign][CFL_PRED_V][cfl_alpha_v]mode_costs-intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi-mode][UV_CFL_PRED];// Skip the CfL mode evaluation if the RD cost derived using the rate needed// to signal the CfL mode and alpha parameter exceeds the ref_best_rd.if(RDCOST(x-rdmult,rate_overhead,0)ref_best_rd){set_invalid_cfl_parameters(best_cfl_alpha_idx,best_cfl_alpha_signs);clear_cfl_dc_pred_cache_flags(xd-cfl);return0;}}// Compute the rd cost of each chroma plane using the alpha parameters which// were already evaluated.cfl_pick_plane_rd(cpi,x,1,tx_size,cfl_search_range,cfl_rd_arr_u,est_best_cfl_idx_u);cfl_pick_plane_rd(cpi,x,2,tx_size,cfl_search_range,cfl_rd_arr_v,est_best_cfl_idx_v);clear_cfl_dc_pred_cache_flags(xd-cfl);for(intui0;uiCFL_MAGS_SIZE;ui){if(cfl_rd_arr_u[ui].rateINT_MAX)continue;intcfl_alpha_u;CFL_SIGN_TYPE cfl_sign_u;cfl_idx_to_sign_and_alpha(ui,cfl_sign_u,cfl_alpha_u);for(intvi0;viCFL_MAGS_SIZE;vi){if(cfl_rd_arr_v[vi].rateINT_MAX)continue;intcfl_alpha_v;CFL_SIGN_TYPE cfl_sign_v;cfl_idx_to_sign_and_alpha(vi,cfl_sign_v,cfl_alpha_v);// cfl_sign_u CFL_SIGN_ZERO cfl_sign_v CFL_SIGN_ZERO is not a// valid parameter for CFLif(cfl_sign_uCFL_SIGN_ZEROcfl_sign_vCFL_SIGN_ZERO)continue;intjoint_signcfl_sign_u*CFL_SIGNScfl_sign_v-1;RD_STATS rd_statscfl_rd_arr_u[ui];av1_merge_rd_stats(rd_stats,cfl_rd_arr_v[vi]);if(rd_stats.rate!INT_MAX){rd_stats.ratemode_costs-cfl_cost[joint_sign][CFL_PRED_U][cfl_alpha_u];rd_stats.ratemode_costs-cfl_cost[joint_sign][CFL_PRED_V][cfl_alpha_v];}av1_rd_cost_update(x-rdmult,rd_stats);if(rd_stats.rdcostbest_rd_stats-rdcost){*best_rd_statsrd_stats;*best_cfl_alpha_idx(cfl_alpha_uCFL_ALPHABET_SIZE_LOG2)cfl_alpha_v;*best_cfl_alpha_signsjoint_sign;}}}if(best_rd_stats-rdcostref_best_rd){av1_invalid_rd_stats(best_rd_stats);// Set invalid CFL parameters here since the rdcost is not better than// ref_best_rd.set_invalid_cfl_parameters(best_cfl_alpha_idx,best_cfl_alpha_signs);return0;}return1;}cfl_pick_plane_rd函数该函数通过在给定的搜索范围内遍历不同的CfL参数并计算每个参数对应的RD统计来找到最佳的CfL参数。这个过程涉及到生成预测块基于亮度信息和CfL参数计算失真通常使用SATD以及计算编码这些预测残差所需的速率。最终函数会选择使RD成本最小的CfL参数以优化编码效率和图像质量。staticvoidcfl_pick_plane_rd(constAV1_COMP*constcpi,MACROBLOCK*x,intplane,TX_SIZE tx_size,intcfl_search_range,RD_STATS cfl_rd_arr[CFL_MAGS_SIZE],intest_best_cfl_idx){assert(cfl_search_range1cfl_search_rangeCFL_MAGS_SIZE);constMACROBLOCKD*constxdx-e_mbd;constMB_MODE_INFO*constmbmixd-mi[0];assert(mbmi-uv_modeUV_CFL_PRED);constMACROBLOCKD_PLANE*pdxd-plane[plane];constBLOCK_SIZE plane_bsizeget_plane_block_size(mbmi-bsize,pd-subsampling_x,pd-subsampling_y);for(intcfl_idx0;cfl_idxCFL_MAGS_SIZE;cfl_idx){av1_invalid_rd_stats(cfl_rd_arr[cfl_idx]);}intfast_mode0;intstart_cfl_idxest_best_cfl_idx;cfl_compute_rd(cpi,x,plane,tx_size,plane_bsize,start_cfl_idx,fast_mode,cfl_rd_arr[start_cfl_idx]);if(cfl_search_range1)return;for(intsi0;si2;si){constintdircfl_dir_ls[si];for(inti1;icfl_search_range;i){intcfl_idxstart_cfl_idxdir*i;if(cfl_idx0||cfl_idxCFL_MAGS_SIZE)break;cfl_compute_rd(cpi,x,plane,tx_size,plane_bsize,cfl_idx,fast_mode,cfl_rd_arr[cfl_idx]);}}}cfl_compute_rd函数cfl_idx_to_sign_and_alpha函数用于从CfL索引中提取CfL符号和alpha值。mbmi-cfl_alpha_signs和mbmi-cfl_alpha_idx被设置为新的CfL符号和alpha值以便生成预测块。在快速模式下使用intra_model_rd函数计算CfL成本该函数可能使用简化的失真度量。在非快速模式下使用av1_txfm_rd_in_plane函数执行完整的变换、量化和失真度量然后使用av1_rd_cost_update函数更新RD统计。最后函数返回计算得到的CfL成本。staticint64_tcfl_compute_rd(constAV1_COMP*constcpi,MACROBLOCK*x,intplane,TX_SIZE tx_size,BLOCK_SIZE plane_bsize,intcfl_idx,intfast_mode,RD_STATS*rd_stats){assert(IMPLIES(fast_mode,rd_statsNULL));constAV1_COMMON*constcmcpi-common;MACROBLOCKD*constxdx-e_mbd;MB_MODE_INFO*constmbmixd-mi[0];intcfl_planeget_cfl_pred_type(plane);CFL_SIGN_TYPE cfl_sign;intcfl_alpha;cfl_idx_to_sign_and_alpha(cfl_idx,cfl_sign,cfl_alpha);// We conly build CFL for a given plane, the other planes sign is dummyintdummy_signCFL_SIGN_NEG;constint8_torig_cfl_alpha_signsmbmi-cfl_alpha_signs;constuint8_torig_cfl_alpha_idxmbmi-cfl_alpha_idx;mbmi-cfl_alpha_signsPLANE_SIGN_TO_JOINT_SIGN(cfl_plane,cfl_sign,dummy_sign);mbmi-cfl_alpha_idx(cfl_alphaCFL_ALPHABET_SIZE_LOG2)cfl_alpha;int64_tcfl_cost;if(fast_mode){cfl_costintra_model_rd(cm,x,plane,plane_bsize,tx_size,/*use_hadamard*/0);}else{av1_init_rd_stats(rd_stats);av1_txfm_rd_in_plane(x,cpi,rd_stats,INT64_MAX,0,plane,plane_bsize,tx_size,FTXS_NONE,0);av1_rd_cost_update(x-rdmult,rd_stats);cfl_costrd_stats-rdcost;}mbmi-cfl_alpha_signsorig_cfl_alpha_signs;mbmi-cfl_alpha_idxorig_cfl_alpha_idx;returncfl_cost;}intra_model_rd函数获取宏块描述符和比特深度信息从宏块结构中获取宏块描述符xd。获取比特深度信息bd_info。参数验证和初始化确保当前块不是交织块。初始化行和列的步长这些步长基于变换大小tx_size。计算最大块宽和高。帧内预测对于每个块执行帧内预测生成预测块。使用av1_predict_intra_block_facade函数进行帧内预测。计算预测残差使用av1_subtract_block函数计算预测残差即原始块和预测块之间的差异。快速变换使用av1_quick_txfm函数执行快速变换这个函数简化了变换过程不需要完整的量化和逆变换步骤。计算失真使用aom_satd函数计算变换后的残差的绝对变换差之和Sum of Absolute Transformed DifferencesSATD作为失真的度量。返回RD成本将所有块的失真相加得到总的失真成本并返回。/*!\cond */// Makes a quick intra prediction and estimate the rdcost with a model without// going through the whole txfm/quantize/itxfm process.staticint64_tintra_model_rd(constAV1_COMMON*cm,MACROBLOCK*constx,intplane,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,intuse_hadamard){MACROBLOCKD*constxdx-e_mbd;constBitDepthInfo bd_infoget_bit_depth_info(xd);introw,col;assert(!is_inter_block(xd-mi[0]));constintsteprtx_size_high_unit[tx_size];constintstepctx_size_wide_unit[tx_size];constinttxbwtx_size_wide[tx_size];constinttxbhtx_size_high[tx_size];constintmax_blocks_widemax_block_wide(xd,plane_bsize,plane);constintmax_blocks_highmax_block_high(xd,plane_bsize,plane);int64_tsatd_cost0;structmacroblock_plane*px-plane[plane];structmacroblockd_plane*pdxd-plane[plane];// Prediction.for(row0;rowmax_blocks_high;rowstepr){for(col0;colmax_blocks_wide;colstepc){av1_predict_intra_block_facade(cm,xd,plane,col,row,tx_size);// Here we use p-src_diff and p-coeff as temporary buffers for// prediction residue and transform coefficients. The buffers are only// used in this for loop, therefore we dont need to properly add offset// to the buffers.av1_subtract_block(bd_info,txbh,txbw,p-src_diff,block_size_wide[plane_bsize],p-src.buf(((row*p-src.stride)col)2),p-src.stride,pd-dst.buf(((row*pd-dst.stride)col)2),pd-dst.stride);av1_quick_txfm(use_hadamard,tx_size,bd_info,p-src_diff,block_size_wide[plane_bsize],p-coeff);satd_costaom_satd(p-coeff,tx_size_2d[tx_size]);}}returnsatd_cost;}