赞
踩
本文档节选翻译自这里,原文为韩语,使用谷歌翻译译为中文,纯机翻没有经过人工审核,辩证阅读。以下内容是原作者对dataset.proto文件的解释。
每个数据都包含在帧消息中。
由于框架内的消息也具有依赖性,因此我们将解释从基本消息到框架的所有内容。不过,因为这是面向解释的代码,所以强调插入的值与数据类型根本不匹配。
message Label { // Upright box, zero pitch and roll. message Box { // Box coordinates in vehicle frame. optional double center_x = 1; optional double center_y = 2; optional double center_z = 3; // Dimensions of the box. length: dim x. width: dim y. height: dim z. optional double length = 5; optional double width = 4; optional double height = 6; // The heading of the bounding box (in radians). The heading is the angle // required to rotate +x to the surface normal of the box front face. It is // normalized to [-pi, pi). optional double heading = 7; enum Type { TYPE_UNKNOWN = 0; // 7-DOF 3D (a.k.a upright 3D box). TYPE_3D = 1; // 5-DOF 2D. Mostly used for laser top down representation. TYPE_2D = 2; // Axis aligned 2D. Mostly used for image. TYPE_AA_2D = 3; } } optional Box box = 1; message Metadata { optional double speed_x = 1; optional double speed_y = 2; optional double accel_x = 3; optional double accel_y = 4; } optional Metadata metadata = 2; enum Type { TYPE_UNKNOWN = 0; TYPE_VEHICLE = 1; TYPE_PEDESTRIAN = 2; TYPE_SIGN = 3; TYPE_CYCLIST = 4; } optional Type type = 3; // Object ID. optional string id = 4; // The difficulty level of this label. The higher the level, the harder it is. enum DifficultyLevel { UNKNOWN = 0; LEVEL_1 = 1; LEVEL_2 = 2; } // Difficulty level for detection problem. optional DifficultyLevel detection_difficulty_level = 5; // Difficulty level for tracking problem. optional DifficultyLevel tracking_difficulty_level = 6; // The total number of lidar points in this box. optional int32 num_lidar_points_in_box = 7; }
标签是一种表示带注释数据的消息类型。这包括框的位置、框的类型和对象的类型。还有激光雷达点数、识别难度等。
- message MatrixShape {
- // Dimensions for the Matrix messages defined below. Must not be empty.
- //
- // The order of entries in 'dims' matters, as it indicates the layout of the
- // values in the tensor in-memory representation.
- //
- // The first entry in 'dims' is the outermost dimension used to lay out the
- // values; the last entry is the innermost dimension. This matches the
- // in-memory layout of row-major matrices.
- repeated int32 dims = 1;
- }
MatrixShape 声明维度的形状。这是因为随着维度的增加,所需的变量数量也会增加。
- // Row-major matrix.
- // Requires: data.size() = product(shape.dims()).
- message MatrixFloat {
- repeated float data = 1 [packed = true];
- optional MatrixShape shape = 2;
- }
MatrixFloat 将维度的形状声明为浮点数。
- // Row-major matrix.
- // Requires: data.size() = product(shape.dims()).
- message MatrixFloat {
- repeated float data = 1 [packed = true];
- optional MatrixShape shape = 2;
- }
MatrixInt32 将维度的形状声明为整数。
- // Row-major matrix.
- // Requires: data.size() = product(shape.dims()).
- message MatrixInt32 {
- repeated int32 data = 1 [packed = true];
- optional MatrixShape shape = 2;
- }
CameraName 是指安装在车辆上的摄像头。
- message CameraName {
- enum Name {
- UNKNOWN = 0;
- FRONT = 1;
- FRONT_LEFT = 2;
- FRONT_RIGHT = 3;
- SIDE_LEFT = 4;
- SIDE_RIGHT = 5;
- }
- }
LaserName 是指安装在车辆上的激光雷达。
- // 'Laser' is used interchangeably with 'Lidar' in this file.
- message LaserName {
- enum Name {
- UNKNOWN = 0;
- TOP = 1;
- FRONT = 2;
- SIDE_LEFT = 3;
- SIDE_RIGHT = 4;
- REAR = 5;
- }
- }
变换是一个变化矩阵,用于将 3D 点从一帧变换到另一帧。
- message Velocity {
- // Velocity in m/s.
- optional float v_x = 1;
- optional float v_y = 2;
- optional float v_z = 3;
-
- // Angular velocity in rad/s.
- optional double w_x = 4;
- optional double w_y = 5;
- optional double w_z = 6;
- }
速度是指物体的速度。
message CameraCalibration { optional CameraName.Name name = 1; // 1d Array of [f_u, f_v, c_u, c_v, k{1, 2}, p{1, 2}, k{3}]. // Note that this intrinsic corresponds to the images after scaling. // Camera model: pinhole camera. // Lens distortion: // Radial distortion coefficients: k1, k2, k3. // Tangential distortion coefficients: p1, p2. // k_{1, 2, 3}, p_{1, 2} follows the same definition as OpenCV. // https://en.wikipedia.org/wiki/Distortion_(optics) // https://docs.opencv.org/2.4/doc/tutorials/calib3d/camera_calibration/camera_calibration.html repeated double intrinsic = 2; // Camera frame to vehicle frame. optional Transform extrinsic = 3; // Camera image size. optional int32 width = 4; optional int32 height = 5; enum RollingShutterReadOutDirection { UNKNOWN = 0; TOP_TO_BOTTOM = 1; LEFT_TO_RIGHT = 2; BOTTOM_TO_TOP = 3; RIGHT_TO_LEFT = 4; GLOBAL_SHUTTER = 5; } optional RollingShutterReadOutDirection rolling_shutter_direction = 6; }
CameraCalibration 在校准相机时显示相机的图像尺寸和内部设置。
- message LaserCalibration {
- optional LaserName.Name name = 1;
- // If non-empty, the beam pitch (in radians) is non-uniform. When constructing
- // a range image, this mapping is used to map from beam pitch to range image
- // row. If this is empty, we assume a uniform distribution.
- repeated double beam_inclinations = 2;
- // beam_inclination_{min,max} (in radians) are used to determine the mapping.
- optional double beam_inclination_min = 3;
- optional double beam_inclination_max = 4;
- // Lidar frame to vehicle frame.
- optional Transform extrinsic = 5;
-
- }
LaserCalibration 显示 LiDAR 校准值。
message Context { // A unique name that identifies the frame sequence. optional string name = 1; repeated CameraCalibration camera_calibrations = 2; repeated LaserCalibration laser_calibrations = 3; // Some stats for the run segment used. message Stats { message ObjectCount { optional Label.Type type = 1; // The number of unique objects with the type in the segment. optional int32 count = 2; } repeated ObjectCount laser_object_counts = 1; repeated ObjectCount camera_object_counts = 5; // Day, Dawn/Dusk, or Night, determined from sun elevation. optional string time_of_day = 2; // Human readable location (e.g. CHD, SF) of the run segment. optional string location = 3; // Currently either Sunny or Rain. optional string weather = 4; } optional Stats stats = 4; }
上下文表示帧的名称、当时相机和激光雷达的标定值、注释对象的数量、当时的天气和位置等。
message RangeImage { // Zlib compressed [H, W, 4] serialized version of MatrixFloat. // To decompress: // string val = ZlibDecompress(range_image_compressed); // MatrixFloat range_image; // range_image.ParseFromString(val); // Inner dimensions are: // * channel 0: range // * channel 1: intensity // * channel 2: elongation // * channel 3: is in any no label zone. optional bytes range_image_compressed = 2; // Lidar point to camera image projections. A point can be projected to // multiple camera images. We pick the first two at the following order: // [FRONT, FRONT_LEFT, FRONT_RIGHT, SIDE_LEFT, SIDE_RIGHT]. // // Zlib compressed [H, W, 6] serialized version of MatrixInt32. // To decompress: // string val = ZlibDecompress(camera_projection_compressed); // MatrixInt32 camera_projection; // camera_projection.ParseFromString(val); // Inner dimensions are: // * channel 0: CameraName.Name of 1st projection. Set to UNKNOWN if no // projection. // * channel 1: x (axis along image width) // * channel 2: y (axis along image height) // * channel 3: CameraName.Name of 2nd projection. Set to UNKNOWN if no // projection. // * channel 4: x (axis along image width) // * channel 5: y (axis along image height) // Note: pixel 0 corresponds to the left edge of the first pixel in the image. optional bytes camera_projection_compressed = 3; // Zlib compressed [H, W, 6] serialized version of MatrixFloat. // To decompress: // string val = ZlibDecompress(range_image_pose_compressed); // MatrixFloat range_image_pose; // range_image_pose.ParseFromString(val); // Inner dimensions are [roll, pitch, yaw, x, y, z] represents a transform // from vehicle frame to global frame for every range image pixel. // This is ONLY populated for the first return. The second return is assumed // to have exactly the same range_image_pose_compressed. // // The roll, pitch and yaw are specified as 3-2-1 Euler angle rotations, // meaning that rotating from the navigation to vehicle frame consists of a // yaw, then pitch and finally roll rotation about the z, y and x axes // respectively. All rotations use the right hand rule and are positive // in the counter clockwise direction. optional bytes range_image_pose_compressed = 4; // Zlib compressed [H, W, 5] serialized version of MatrixFloat. // To decompress: // string val = ZlibDecompress(range_image_flow_compressed); // MatrixFloat range_image_flow; // range_image_flow.ParseFromString(val); // Inner dimensions are [vx, vy, vz, pointwise class]. // // If the point is not annotated with scene flow information, class is set // to -1. A point is not annotated if it is in a no-label zone or if its label // bounding box does not have a corresponding match in the previous frame, // making it infeasible to estimate the motion of the point. // Otherwise, (vx, vy, vz) are velocity along (x, y, z)-axis for this point // and class is set to one of the following values: // -1: no-flow-label, the point has no flow information. // 0: unlabeled or "background,", i.e., the point is not contained in a // bounding box. // 1: vehicle, i.e., the point corresponds to a vehicle label box. // 2: pedestrian, i.e., the point corresponds to a pedestrian label box. // 3: sign, i.e., the point corresponds to a sign label box. // 4: cyclist, i.e., the point corresponds to a cyclist label box. optional bytes range_image_flow_compressed = 5; // Deprecated, do not use. optional MatrixFloat range_image = 1 [deprecated = true]; }
RangeImage是指照片所代表的特征。通过针对每个特征调整通道,您可以获得您想要的数据。
// All timestamps in this proto are represented as seconds since Unix epoch. message CameraImage { optional CameraName.Name name = 1; // JPEG image. optional bytes image = 2; // SDC pose. optional Transform pose = 3; // SDC velocity at 'pose_timestamp' below. The velocity value is represented // at *global* frame. // With this velocity, the pose can be extrapolated. // r(t+dt) = r(t) + dr/dt * dt where dr/dt = v_{x,y,z}. // dR(t)/dt = W*R(t) where W = SkewSymmetric(w_{x,y,z}) // This differential equation solves to: R(t) = exp(Wt)*R(0) if W is constant. // When dt is small: R(t+dt) = (I+W*dt)R(t) // r(t) = (x(t), y(t), z(t)) is vehicle location at t in the global frame. // R(t) = Rotation Matrix (3x3) from the body frame to the global frame at t. // SkewSymmetric(x,y,z) is defined as the cross-product matrix in the // following: // https://en.wikipedia.org/wiki/Cross_product#Conversion_to_matrix_multiplication optional Velocity velocity = 4; // Timestamp of the `pose` above. optional double pose_timestamp = 5; // Rolling shutter params. // The following explanation assumes left->right rolling shutter. // // Rolling shutter cameras expose and read the image column by column, offset // by the read out time for each column. The desired timestamp for each column // is the middle of the exposure of that column as outlined below for an image // with 3 columns: // ------time------> // |---- exposure col 1----| read | // -------|---- exposure col 2----| read | // --------------|---- exposure col 3----| read | // ^trigger time ^readout end time // ^time for row 1 (= middle of exposure of row 1) // ^time image center (= middle of exposure of middle row) // Shutter duration in seconds. Exposure time per column. optional double shutter = 6; // Time when the sensor was triggered and when last readout finished. // The difference between trigger time and readout done time includes // the exposure time and the actual sensor readout time. optional double camera_trigger_time = 7; optional double camera_readout_done_time = 8; }
CameraImage表示照片拍摄的时间和环境、拍摄时的相机操作值等。
- // The camera labels associated with a given camera image. This message
- // indicates the ground truth information for the camera image
- // recorded by the given camera. If there are no labeled objects in the image,
- // then the labels field is empty.
- message CameraLabels {
- optional CameraName.Name name = 1;
- repeated Label labels = 2;
- }
CameraLabels 指示哪些注释存在于哪些相机上。
- message Laser {
- optional LaserName.Name name = 1;
- optional RangeImage ri_return1 = 2;
- optional RangeImage ri_return2 = 3;
- }
激光指示哪个 RangeImage 在哪个激光雷达上。
message Frame { // The following field numbers are reserved for third-party extensions. Users // may declare new fields in that range in their own .proto files without // having to edit the original file. extensions 1000 to max; // This context is the same for all frames belong to the same driving run // segment. Use context.name to identify frames belong to the same driving // segment. We do not store all frames from one driving segment in one proto // to avoid huge protos. optional Context context = 1; // Frame start time, which is the timestamp of the first top lidar spin // within this frame. optional int64 timestamp_micros = 2; // The vehicle pose. optional Transform pose = 3; repeated CameraImage images = 4; repeated Laser lasers = 5; repeated Label laser_labels = 6; // Lidar labels (laser_labels) projected to camera images. A projected // label is the smallest image axis aligned rectangle that can cover all // projected points from the 3d lidar label. The projected label is ignored if // the projection is fully outside a camera image. The projected label is // clamped to the camera image if it is partially outside. repeated CameraLabels projected_lidar_labels = 9; // NOTE: if a camera identified by CameraLabels.name has an entry in this // field, then it has been labeled, even though it is possible that there are // no labeled objects in the corresponding image, which is identified by a // zero sized CameraLabels.labels. repeated CameraLabels camera_labels = 8; // No label zones in the *global* frame. repeated Polygon2dProto no_label_zones = 7; }
Frame 组合了上面的所有值并将它们显示为一帧。每个值都显示为键值。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。