lmdb 是Lightning Memory-Mapped Database的缩写.闪电般的内存映射数据库。它文件结构简单,一个文件夹,里面一个数据文件,一个锁文件。数据随意复制,随意传输。它的访问简单,不需要运行单独的数据库管理进程,只要在访问数据的代码里引用LMDB库,访问时给文件路径即可。 它包含一个数据文件和一个锁文件.
syntax = "proto2";
package caffe;
message Datum {
optional int32 channels = 1;
optional int32 height = 2;
optional int32 width = 3;
// the actual image data, in bytes
optional bytes data = 4;
optional int32 label = 5;
// Optionally, the datum could also hold float data.
repeated float float_data = 6;
// If true data contains an encoded image that need to be decoded
optional bool encoded = 7 [default = false];
// The label (display) name and label id.
message LabelMapItem {
// Both name and label are required.
optional string name = 1;
optional int32 label = 2;
// display_name is optional.
optional string display_name = 3;
message LabelMap {
repeated LabelMapItem item = 1;
// The normalized bounding box [0, 1] w.r.t. the input image size.
message NormalizedBBox {
optional float xmin = 1;
optional float ymin = 2;
optional float xmax = 3;
optional float ymax = 4;
optional int32 label = 5;
optional bool difficult = 6;
optional float score = 7;
optional float size = 8;
// Annotation for each object instance.
message Annotation {
optional NormalizedBBox bbox = 2;
// Group of annotations for a particular label.
message AnnotationGroup {
optional int32 group_label = 1;
repeated Annotation annotation = 2;
// An extension of Datum which contains "rich" annotations.
message AnnotatedDatum {
optional Datum datum = 1;
// Each group contains annotation for a particular class.
repeated AnnotationGroup annotation_group = 3;

protoc -I=. --cpp_out=./ caffe.proto
/output/oldFile/1000015_10/wKgB5Fr6WwWAJb7iAAABKohu5Nw109.png 0
/output/oldFile/1000015_10/wKgB5Fr6WwWAEbg6AAABC_mxdD8880.png 0
/output/oldFile/1000015_10/wKgB5Fr6WwWAUGTdAAAA8wVERrQ677.png 0
/output/oldFile/1000015_10/wKgB5Fr6WwWAPJ-lAAABPYAoeuY242.png 0
/output/oldFile/1000015_10/wKgB5Fr6WwWARVIWAAABCK2alGs331.png 0
/output/oldFile/1000015_10/wKgB5Fr6WwWAV3R5AAAA5573dko147.png 0
/output/oldFile/1000015_10/wKgB5Fr6WwaAUjQRAAABIkYxqoY008.png 0
/output/oldFile/1000015_10/wKgB5Vr6YF-AALG-AAAA-qStI_Q208.png 1
/output/oldFile/1000015_10/wKgB5Vr6YGCAe1VYAAABN5fz53Y240.png 1
/output/oldFile/1000015_10/wKgB5Vr6YGCAQo7fAAABVFasXJ4223.png 1
/output/oldFile/1000015_10/wKgB5Vr6YGCAL00yAAABJdrU4U0508.png 1
/output/oldFile/1000015_10/wKgB5Vr6YGCAFjTyAAABJVgoCrU242.png 1
/output/oldFile/1000015_10/wKgB5Vr6YGCAKmMMAAABMd1_pJg240.png 1
/output/oldFile/1000015_10/wKgB5Vr6YGCAR2FqAAABFCQ7LRY651.png 1
- /home/test/data/VOC2007/JPEGImages/004379.jpg /home/test/data/VOC2007/Annotations/004379.xml
- /home/test/data/VOC2007/JPEGImages/001488.jpg /home/test/data/VOC2007/Annotations/001488.xml
- /home/test/data/VOC2007/JPEGImages/004105.jpg /home/test/data/VOC2007/Annotations/004105.xml
- /home/test/data/VOC2007/JPEGImages/006146.jpg /home/test/data/VOC2007/Annotations/006146.xml
- /home/test/data/VOC2007/JPEGImages/004295.jpg /home/test/data/VOC2007/Annotations/004295.xml
- /home/test/data/VOC2007/JPEGImages/001360.jpg /home/test/data/VOC2007/Annotations/001360.xml
- /home/test/data/VOC2007/JPEGImages/003468.jpg /home/test/data/VOC2007/Annotations/003468.xml
- ...
- #include <string>
- #include "caffe.pb.h"
- namespace caffe {
- namespace db {
- using namespace std;
- enum Mode { READ, WRITE, NEW };
- class Cursor {
- public:
- Cursor() {}
- virtual ~Cursor() {}
- virtual void SeekToFirst() = 0;
- virtual void Next() = 0;
- virtual string key() = 0;
- virtual string value() = 0;
- virtual bool valid() = 0;
- };
- class Transaction {
- public:
- Transaction() {}
- virtual ~Transaction() {}
- virtual void Put(const string& key, const string& value) = 0;
- virtual void Commit() = 0;
- };
- class DB {
- public:
- DB() {}
- virtual ~DB() {}
- virtual void Open(const string& source, Mode mode) = 0;
- virtual void Close() = 0;
- virtual Cursor* NewCursor() = 0;
- virtual Transaction* NewTransaction() = 0;
- };
- DB* GetDB();
- } // namespace db
- } // namespace caffe
- #endif // CAFFE_UTIL_DB_HPP

- #include "db.hpp"
- #include "db_lmdb.hpp"
- #include "glog/logging.h"
- #include <string>
- namespace caffe {
- namespace db {
- DB* GetDB() { return new LMDB(); }
- } // namespace db
- } // namespace caffe
- #ifdef USE_LMDB
- #include <string>
- #include <vector>
- #include "lmdb.h"
- #include "db.hpp"
- #include "glog/logging.h"
- namespace caffe {
- namespace db {
- inline void MDB_CHECK(int mdb_status) {
- CHECK_EQ(mdb_status, MDB_SUCCESS) << mdb_strerror(mdb_status);
- }
- class LMDBCursor : public Cursor {
- public:
- explicit LMDBCursor(MDB_txn* mdb_txn, MDB_cursor* mdb_cursor)
- : mdb_txn_(mdb_txn), mdb_cursor_(mdb_cursor), valid_(false) {
- SeekToFirst();
- }
- virtual ~LMDBCursor() {
- mdb_cursor_close(mdb_cursor_);
- mdb_txn_abort(mdb_txn_);
- }
- virtual void SeekToFirst() { Seek(MDB_FIRST); }
- virtual void Next() { Seek(MDB_NEXT); }
- virtual string key() {
- return string(static_cast<const char*>(mdb_key_.mv_data), mdb_key_.mv_size);
- }
- virtual string value() {
- return string(static_cast<const char*>(mdb_value_.mv_data),
- mdb_value_.mv_size);
- }
- virtual bool valid() { return valid_; }
- private:
- void Seek(MDB_cursor_op op) {
- int mdb_status = mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, op);
- if (mdb_status == MDB_NOTFOUND) {
- valid_ = false;
- } else {
- MDB_CHECK(mdb_status);
- valid_ = true;
- }
- }
- MDB_txn* mdb_txn_;
- MDB_cursor* mdb_cursor_;
- MDB_val mdb_key_, mdb_value_;
- bool valid_;
- };
- class LMDBTransaction : public Transaction {
- public:
- explicit LMDBTransaction(MDB_env* mdb_env) : mdb_env_(mdb_env) {}
- virtual void Put(const string& key, const string& value);
- virtual void Commit();
- private:
- MDB_env* mdb_env_;
- vector<string> keys, values;
- void DoubleMapSize();
- };
- class LMDB : public DB {
- public:
- LMDB() : mdb_env_(NULL) {}
- virtual ~LMDB() { Close(); }
- virtual void Open(const string& source, Mode mode);
- virtual void Close() {
- if (mdb_env_ != NULL) {
- mdb_dbi_close(mdb_env_, mdb_dbi_);
- mdb_env_close(mdb_env_);
- mdb_env_ = NULL;
- }
- }
- virtual LMDBCursor* NewCursor();
- virtual LMDBTransaction* NewTransaction();
- private:
- MDB_env* mdb_env_;
- MDB_dbi mdb_dbi_;
- };
- } // namespace db
- } // namespace caffe
- #endif // USE_LMDB

- bool ReadImageToDatum(const string& filename, const int label, const int height,
- const int width, const int min_dim, const int max_dim,
- const bool is_color, const std::string& encoding,
- Datum* datum) {
- cv::Mat cv_img =
- ReadImageToCVMat(filename, height, width, min_dim, max_dim, is_color);
- if (cv_img.data) {
- if (encoding.size()) {
- if ((cv_img.channels() == 3) == is_color && !height && !width &&
- !min_dim && !max_dim && matchExt(filename, encoding)) {
- datum->set_channels(cv_img.channels());
- datum->set_height(cv_img.rows);
- datum->set_width(cv_img.cols);
- return ReadFileToDatum(filename, label, datum);
- }
- EncodeCVMatToDatum(cv_img, encoding, datum);
- datum->set_label(label);
- return true;
- }
- CVMatToDatum(cv_img, datum);
- datum->set_label(label);
- return true;
- } else {
- return false;
- }
- }

item {
name: "aeroplane"
label: 0
display_name: "aeroplane"
item {
name: "bicycle"
label: 1
display_name: "bicycle"
item {
name: "bird"
label: 2
display_name: "bird"
item {
name: "boat"
label: 3
display_name: "boat"

- bool ReadProtoFromTextFile(const char* filename, Message* proto) {
- int fd = open(filename, O_RDONLY);
- CHECK_NE(fd, -1) << "File not found: " << filename;
- FileInputStream* input = new FileInputStream(fd);
- bool success = google::protobuf::TextFormat::Parse(input, proto);
- delete input;
- close(fd);
- return success;
- }
MapNameToLabel函数将NaReadProtoFromTextFile读取出来的LabelMap转换为一个std::map<string, int>* name_to_label的形式,便于后面的操作:
- bool MapNameToLabel(const LabelMap& map, const bool strict_check,
- std::map<string, int>* name_to_label) {
- // cleanup
- name_to_label->clear();
- for (int i = 0; i < map.item_size(); ++i) {
- const string& name = map.item(i).name();
- const int label = map.item(i).label();
- if (strict_check) {
- if (!name_to_label->insert(std::make_pair(name, label)).second) {
- LOG(FATAL) << "There are many duplicates of name: " << name;
- return false;
- }
- } else {
- (*name_to_label)[name] = label;
- }
- }
- return true;
- }

- bool ReadRichImageToAnnotatedDatum(
- const string& filename, const string& labelfile, const int height,
- const int width, const int min_dim, const int max_dim, const bool is_color,
- const string& encoding, const AnnotatedDatum_AnnotationType type,
- const string& labeltype, const std::map<string, int>& name_to_label,
- AnnotatedDatum* anno_datum) {
- // Read image to datum.
- bool status =
- ReadImageToDatum(filename, -1, height, width, min_dim, max_dim, is_color,
- encoding, anno_datum->mutable_datum());
- if (status == false) {
- return status;
- }
- anno_datum->clear_annotation_group();
- switch (type) {
- case AnnotatedDatum_AnnotationType_BBOX:
- int ori_height, ori_width;
- GetImageSize(filename, &ori_height, &ori_width);
- if (labeltype == "xml") {
- return ReadXMLToAnnotatedDatum(labelfile, ori_height, ori_width,
- name_to_label, anno_datum);
- } else {
- LOG(FATAL) << "Unknown label file type.";
- return false;
- }
- break;
- default:
- LOG(FATAL) << "Unknown annotation type.";
- return false;
- }
- }

- #include <iomanip> // NOLINT(readability/streams)
- #include <sstream> // NOLINT(readability/streams)
- #include <string>
- namespace caffe {
- inline std::string format_int(int n, int numberOfLeadingZeros = 0) {
- std::ostringstream s;
- s << std::setw(numberOfLeadingZeros) << std::setfill('0') << n;
- return s.str();
- }
- }
- #endif // CAFFE_UTIL_FORMAT_H_

如果还需要考虑对文件列表进行打乱,则还需要增加rng.hpp文件,而该文件又涉及到common.cpp和common.hpp,当然你也可以将convert_imageset.cpp中的Shuffle代码注释,那么你只需要这么几个文件就可以编译一个独立的lmdb转换代码:caffe.proto,convert_imageset.cpp,db.cpp db.hpp db_lmdb.cpp db_lmdb.hpp format.hpp io.cpp io.hpp
- #include <algorithm>
- #include <fstream> // NOLINT(readability/streams)
- #include <string>
- #include <utility>
- #include <vector>
- #include "boost/scoped_ptr.hpp"
- #include "gflags/gflags.h"
- #include "glog/logging.h"
- #include "caffe.pb.h"
- #include "db.hpp"
- #include "format.hpp"
- #include "io.hpp"
- using namespace caffe; // NOLINT(build/namespaces)
- using std::pair;
- using boost::scoped_ptr;

- #include <algorithm>
- #include <fstream> // NOLINT(readability/streams)
- #include <map>
- #include <string>
- #include <utility>
- #include <vector>
- #include "boost/scoped_ptr.hpp"
- #include "boost/variant.hpp"
- #include "gflags/gflags.h"
- #include "glog/logging.h"
- #include <opencv2/highgui/highgui_c.h>
- #include <opencv2/core/core.hpp>
- #include <opencv2/highgui/highgui.hpp>
- #include <opencv2/imgproc/imgproc.hpp>
- #include "caffe.pb.h"
- #include "db.hpp"
- #include "format.hpp"
- #include "io.hpp"
- using namespace caffe; // NOLINT(build/namespaces)
- using std::pair;
- using boost::scoped_ptr;

rm -rf caffe.pb*
/usr/local/protobuf/bin/protoc -I=. --cpp_out=./ caffe.proto
g++ -g --std=c++11 -DUSE_LMDB -DUSE_OPENCV -o convert_annoset convert_annoset.cpp caffe.pb.cc db.cpp db_lmdb.cpp io.cpp format.hpp -I/usr/local/opencv2/include -L/usr/local/opencv2/lib -L. -lopencv_core -lopencv_highgui -lopencv_imgproc -I. -I/usr/local/protobuf/include/ -L/usr/local/protobuf/lib -lprotobuf -lglog -lgflags -lpthread -lboost_system -lboost_filesystem -llmdb
g++ -g --std=c++11 -DUSE_LMDB -DUSE_OPENCV -o convert_imageset convert_imageset.cpp caffe.pb.cc db.cpp db_lmdb.cpp io.cpp format.hpp -I/usr/local/opencv2/include -L/usr/local/opencv2/lib -L. -lopencv_core -lopencv_highgui -lopencv_imgproc -I. -I/usr/local/protobuf/include/ -L/usr/local/protobuf/lib -lprotobuf -lglog -lgflags -lpthread -lboost_system -lboost_filesystem -llmdb
