Caffe源码解读（一）------loss层之softmax_loss_layer.cpp

作者：weixin_40725706 | 2024-02-23 03:27:18

踩

softmax_loss_layer

这几天需要自己写个loss层，因此把caffe源码研读了下，在此记录下经验，方便后人以及自己日后复习。

首先看看caffe前向传播的工作流程，即net.cpp中的ForwardFromTo函数：

可以看到，该函数通过一个for循环来对每一个layer执行前向传播传播。具体的传播函数在layer.hpp中可以找到：

可以看到在这个函数中，最主要的就是执行了Reshape和Forward_cpu函数，剩下的就是对数据的一些处理，我们暂时先不用管它。然后由于Reshape和Forward_cpu都是虚函数，都是由具体的子类来实现的，即softmax_loss_layer.cpp中。接下来我们仔细看看softmax_loss_layer.cpp:

namespace caffe { template <typename Dtype> void SoftmaxWithLossLayer<Dtype>::LayerSetUp( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { LossLayer<Dtype>::LayerSetUp(bottom, top);

LayerParameter softmax_param(this->layer_param_); softmax_param.set_type("Softmax");

//softmax loss涉及到两层计算，第一层是通过softmax函数计算出每一个分类的概率，第二层

//则是通过概率计算出最终的损失，在caffe中是将这两步操作分开在了两层中，分别是softmax_layer

//和softmax_loss_layer。下面就是创建softmax_layer来进行概率的计算

softmax_layer_ = LayerRegistry<Dtype>::CreateLayer(softmax_param);

softmax_bottom_vec_.clear(); softmax_bottom_vec_.push_back(bottom[0]); softmax_top_vec_.clear(); softmax_top_vec_.push_back(&prob_); softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_); has_ignore_label_ = this->layer_param_.loss_param().has_ignore_label(); if (has_ignore_label_) { ignore_label_ = this->layer_param_.loss_param().ignore_label(); } if (!this->layer_param_.loss_param().has_normalization() && this->layer_param_.loss_param().has_normalize()) { normalization_ = this->layer_param_.loss_param().normalize() ? LossParameter_NormalizationMode_VALID : LossParameter_NormalizationMode_BATCH_SIZE; } else { normalization_ = this->layer_param_.loss_param().normalization(); } } template <typename Dtype> void SoftmaxWithLossLayer<Dtype>::Reshape( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { LossLayer<Dtype>::Reshape(bottom, top); softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_); softmax_axis_ = bottom[0]->CanonicalAxisIndex(this->layer_param_.softmax_param().axis()); outer_num_ = bottom[0]->count(0, softmax_axis_); inner_num_ = bottom[0]->count(softmax_axis_ + 1); CHECK_EQ(outer_num_ * inner_num_, bottom[1]->count()) << "Number of labels must match number of predictions; " << "e.g., if softmax axis == 1 and prediction shape is (N, C, H, W), " << "label count (number of labels) must be N*H*W, " << "with integer values in {0, 1, ..., C-1}."; if (top.size() >= 2) { // softmax output top[1]->ReshapeLike(*bottom[0]); } } template <typename Dtype> Dtype SoftmaxWithLossLayer<Dtype>::get_normalizer( LossParameter_NormalizationMode normalization_mode, int valid_count) { Dtype normalizer; switch (normalization_mode) { case LossParameter_NormalizationMode_FULL: normalizer = Dtype(outer_num_ * inner_num_); break; case LossParameter_NormalizationMode_VALID: if (valid_count == -1) { normalizer = Dtype(outer_num_ * inner_num_); } else { normalizer = Dtype(valid_count); } break; case LossParameter_NormalizationMode_BATCH_SIZE: normalizer = Dtype(outer_num_); break; case LossParameter_NormalizationMode_NONE: normalizer = Dtype(1); break; default: LOG(FATAL) << "Unknown normalization mode: " << LossParameter_NormalizationMode_Name(normalization_mode); } // Some users will have no labels for some examples in order to 'turn off' a // particular loss in a multi-task setup. The max prevents NaNs in that case. return std::max(Dtype(1.0), normalizer); } template <typename Dtype> void SoftmaxWithLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // The forward pass computes the softmax prob values. softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); const Dtype* prob_data = prob_.cpu_data(); const Dtype* label = bottom[1]->cpu_data(); int dim = prob_.count() / outer_num_; int count = 0; Dtype loss = 0; for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; j++) { const int label_value = static_cast<int>(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { continue; } DCHECK_GE(label_value, 0); DCHECK_LT(label_value, prob_.shape(softmax_axis_)); loss -= log(std::max(prob_data[i * dim + label_value * inner_num_ + j], Dtype(FLT_MIN))); ++count; } } top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, count); if (top.size() == 2) { top[1]->ShareData(prob_); } } template <typename Dtype> void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* prob_data = prob_.cpu_data(); caffe_copy(prob_.count(), prob_data, bottom_diff); const Dtype* label = bottom[1]->cpu_data(); int dim = prob_.count() / outer_num_; int count = 0; for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; ++j) { const int label_value = static_cast<int>(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) { bottom_diff[i * dim + c * inner_num_ + j] = 0; } } else { bottom_diff[i * dim + label_value * inner_num_ + j] -= 1; ++count; } } } // Scale gradient Dtype loss_weight = top[0]->cpu_diff()[0] / get_normalizer(normalization_, count); caffe_scal(prob_.count(), loss_weight, bottom_diff); } } #ifdef CPU_ONLY STUB_GPU(SoftmaxWithLossLayer); #endif INSTANTIATE_CLASS(SoftmaxWithLossLayer); REGISTER_LAYER_CLASS(SoftmaxWithLoss); }

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/weixin_40725706/article/detail/133342