赞
踩
Redis没有表的概念。但是可以用key作标识进行区分,比如:user:1000作为key值,表示在user表下id为1000的元素,类似于user表的id=1000的行。
Redis逻辑关系: redisServer–>redisDB–>key-redisObject。所以这里主要介绍redisServer、redisDB、redisObject
结构。
服务端结构体redisServer存储Redis服务器的所有信息,包括但不限于数据库、配置参数、命令表、监听端口与地址、客户端列表、若干统计信息、RDB与AOF持久化相关信息、主从复制相关信息、集群相关信息等。
redis.h/redisServer
结构记录了和服务器相关的所有数据, 这个结构体主要包含以下信息:
struct redisServer { /* General */ //配置文件路径 char *configfile; /* Absolute config file path, or NULL */ //serverCron()调用频率 int hz; /* serverCron() calls frequency in hertz */ //数据库对象数组指针 redisDb *db; //支持的命令列表 dict *commands; /* Command table */ //没有转化的命令 dict *orig_commands; /* Command table before command renaming. */ //事件 aeEventLoop *el; //每分钟增加一次 unsigned lruclock:22; /* Clock incrementing every minute, for LRU */ unsigned lruclock_padding:10; int shutdown_asap; /* SHUTDOWN needed ASAP */ int activerehashing; /* Incremental rehash in serverCron() */ //验证密码 char *requirepass; /* Pass for AUTH command, or NULL */ char *pidfile; /* PID file path */ int arch_bits; /* 32 or 64 depending on sizeof(long) */ int cronloops; /* Number of times the cron function run */ char runid[REDIS_RUN_ID_SIZE+1]; /* ID always different at every exec. */ int sentinel_mode; /* True if this instance is a Sentinel. */ /* Networking */ int port; /* TCP listening port */ int tcp_backlog; /* TCP listen() backlog */ char *bindaddr[REDIS_BINDADDR_MAX]; /* Addresses we should bind to */ int bindaddr_count; /* Number of addresses in server.bindaddr[] */ char *unixsocket; /* UNIX socket path */ mode_t unixsocketperm; /* UNIX socket permission */ int ipfd[REDIS_BINDADDR_MAX]; /* TCP socket file descriptors */ int ipfd_count; /* Used slots in ipfd[] */ int sofd; /* Unix socket file descriptor */ int cfd[REDIS_BINDADDR_MAX];/* Cluster bus listening socket */ int cfd_count; /* Used slots in cfd[] */ // 连接的客户端 list *clients; /* List of active clients */ list *clients_to_close; /* Clients to close asynchronously */ list *slaves, *monitors; /* List of slaves and MONITORs */ redisClient *current_client; /* Current client, only used on crash report */ int clients_paused; /* True if clients are currently paused */ mstime_t clients_pause_end_time; /* Time when we undo clients_paused */ char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */ dict *migrate_cached_sockets;/* MIGRATE cached sockets */ /* RDB / AOF loading information */ int loading; /* We are loading data from disk if true */ off_t loading_total_bytes; off_t loading_loaded_bytes; time_t loading_start_time; off_t loading_process_events_interval_bytes; /* Fast pointers to often looked up command */ struct redisCommand *delCommand, *multiCommand, *lpushCommand, *lpopCommand, *rpopCommand; /* Fields used only for stats */ time_t stat_starttime; /* Server start time */ long long stat_numcommands; /* Number of processed commands */ long long stat_numconnections; /* Number of connections received */ long long stat_expiredkeys; /* Number of expired keys */ long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */ long long stat_keyspace_hits; /* Number of successful lookups of keys */ long long stat_keyspace_misses; /* Number of failed lookups of keys */ size_t stat_peak_memory; /* Max used memory record */ long long stat_fork_time; /* Time needed to perform latest fork() */ long long stat_rejected_conn; /* Clients rejected because of maxclients */ long long stat_sync_full; /* Number of full resyncs with slaves. */ long long stat_sync_partial_ok; /* Number of accepted PSYNC requests. */ long long stat_sync_partial_err;/* Number of unaccepted PSYNC requests. */ //保存慢日志命令 list *slowlog; /* SLOWLOG list of commands */ long long slowlog_entry_id; /* SLOWLOG current entry ID */ long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */ unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */ /* The following two are used to track instantaneous "load" in terms * of operations per second. */ long long ops_sec_last_sample_time; /* Timestamp of last sample (in ms) */ long long ops_sec_last_sample_ops; /* numcommands in last sample */ long long ops_sec_samples[REDIS_OPS_SEC_SAMPLES]; int ops_sec_idx; /* Configuration */ int verbosity; /* Loglevel in redis.conf */ int maxidletime; /* Client timeout in seconds */ int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */ int active_expire_enabled; /* Can be disabled for testing purposes. */ size_t client_max_querybuf_len; /* Limit for client query buffer length */ int dbnum; /* Total number of configured DBs */ int daemonize; /* True if running as a daemon */ clientBufferLimitsConfig client_obuf_limits[REDIS_CLIENT_LIMIT_NUM_CLASSES]; /* AOF persistence */ int aof_state; /* REDIS_AOF_(ON|OFF|WAIT_REWRITE) */ int aof_fsync; /* Kind of fsync() policy */ char *aof_filename; /* Name of the AOF file */ int aof_no_fsync_on_rewrite; /* Don't fsync if a rewrite is in prog. */ int aof_rewrite_perc; /* Rewrite AOF if % growth is > M and... */ off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */ off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */ off_t aof_current_size; /* AOF current size. */ int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */ pid_t aof_child_pid; /* PID if rewriting process */ list *aof_rewrite_buf_blocks; /* Hold changes during an AOF rewrite. */ sds aof_buf; /* AOF buffer, written before entering the event loop */ int aof_fd; /* File descriptor of currently selected AOF file */ int aof_selected_db; /* Currently selected DB in AOF */ time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */ time_t aof_last_fsync; /* UNIX time of last fsync() */ time_t aof_rewrite_time_last; /* Time used by last AOF rewrite run. */ time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */ int aof_lastbgrewrite_status; /* REDIS_OK or REDIS_ERR */ unsigned long aof_delayed_fsync; /* delayed AOF fsync() counter */ int aof_rewrite_incremental_fsync;/* fsync incrementally while rewriting? */ int aof_last_write_status; /* REDIS_OK or REDIS_ERR */ int aof_last_write_errno; /* Valid if aof_last_write_status is ERR */ /* RDB persistence */ long long dirty; /* Changes to DB from the last save */ long long dirty_before_bgsave; /* Used to restore dirty on failed BGSAVE */ pid_t rdb_child_pid; /* PID of RDB saving child */ struct saveparam *saveparams; /* Save points array for RDB */ int saveparamslen; /* Number of saving points */ char *rdb_filename; /* Name of RDB file */ int rdb_compression; /* Use compression in RDB? */ int rdb_checksum; /* Use RDB checksum? */ time_t lastsave; /* Unix time of last successful save */ time_t lastbgsave_try; /* Unix time of last attempted bgsave */ time_t rdb_save_time_last; /* Time used by last RDB save run. */ time_t rdb_save_time_start; /* Current RDB save start time. */ int lastbgsave_status; /* REDIS_OK or REDIS_ERR */ int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */ /* Propagation of commands in AOF / replication */ redisOpArray also_propagate; /* Additional command to propagate. */ /* Logging */ char *logfile; /* Path of log file */ int syslog_enabled; /* Is syslog enabled? */ char *syslog_ident; /* Syslog ident */ int syslog_facility; /* Syslog facility */ /* Replication (master) */ int slaveseldb; /* Last SELECTed DB in replication output */ long long master_repl_offset; /* Global replication offset */ int repl_ping_slave_period; /* Master pings the slave every N seconds */ char *repl_backlog; /* Replication backlog for partial syncs */ long long repl_backlog_size; /* Backlog circular buffer size */ long long repl_backlog_histlen; /* Backlog actual data length */ long long repl_backlog_idx; /* Backlog circular buffer current offset */ long long repl_backlog_off; /* Replication offset of first byte in the backlog buffer. */ time_t repl_backlog_time_limit; /* Time without slaves after the backlog gets released. */ time_t repl_no_slaves_since; /* We have no slaves since that time. Only valid if server.slaves len is 0. */ int repl_min_slaves_to_write; /* Min number of slaves to write. */ int repl_min_slaves_max_lag; /* Max lag of <count> slaves to write. */ int repl_good_slaves_count; /* Number of slaves with lag <= max_lag. */ /* Replication (slave) */ char *masterauth; /* AUTH with this password with master */ char *masterhost; /* Hostname of master */ int masterport; /* Port of master */ int repl_timeout; /* Timeout after N seconds of master idle */ redisClient *master; /* Client that is master for this slave */ redisClient *cached_master; /* Cached master to be reused for PSYNC. */ int repl_syncio_timeout; /* Timeout for synchronous I/O calls */ int repl_state; /* Replication status if the instance is a slave */ off_t repl_transfer_size; /* Size of RDB to read from master during sync. */ off_t repl_transfer_read; /* Amount of RDB read from master during sync. */ off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */ int repl_transfer_s; /* Slave -> Master SYNC socket */ int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */ char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */ time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */ int repl_serve_stale_data; /* Serve stale data when link is down? */ int repl_slave_ro; /* Slave is read only? */ time_t repl_down_since; /* Unix time at which link with master went down */ int repl_disable_tcp_nodelay; /* Disable TCP_NODELAY after SYNC? */ int slave_priority; /* Reported in INFO and used by Sentinel. */ char repl_master_runid[REDIS_RUN_ID_SIZE+1]; /* Master run id for PSYNC. */ long long repl_master_initial_offset; /* Master PSYNC offset. */ /* Replication script cache. */ dict *repl_scriptcache_dict; /* SHA1 all slaves are aware of. */ list *repl_scriptcache_fifo; /* First in, first out LRU eviction. */ int repl_scriptcache_size; /* Max number of elements. */ /* Synchronous replication. */ list *clients_waiting_acks; /* Clients waiting in WAIT command. */ int get_ack_from_slaves; /* If true we send REPLCONF GETACK. */ /* Limits */ unsigned int maxclients; /* Max number of simultaneous clients */ unsigned long long maxmemory; /* Max number of memory bytes to use */ int maxmemory_policy; /* Policy for key eviction */ int maxmemory_samples; /* Pricision of random sampling */ /* Blocked clients */ unsigned int bpop_blocked_clients; /* Number of clients blocked by lists */ list *unblocked_clients; /* list of clients to unblock before next loop */ list *ready_keys; /* List of readyList structures for BLPOP & co */ /* Sort parameters - qsort_r() is only available under BSD so we * have to take this state global, in order to pass it to sortCompare() */ int sort_desc; int sort_alpha; int sort_bypattern; int sort_store; /* Zip structure config, see redis.conf for more information */ size_t hash_max_ziplist_entries; size_t hash_max_ziplist_value; size_t list_max_ziplist_entries; size_t list_max_ziplist_value; size_t set_max_intset_entries; size_t zset_max_ziplist_entries; size_t zset_max_ziplist_value; time_t unixtime; /* Unix time sampled every cron cycle. */ long long mstime; /* Like 'unixtime' but with milliseconds resolution. */ /* Pubsub */ dict *pubsub_channels; /* Map channels to list of subscribed clients */ list *pubsub_patterns; /* A list of pubsub_patterns */ int notify_keyspace_events; /* Events to propagate via Pub/Sub. This is an xor of REDIS_NOTIFY... flags. */ /* Cluster */ int cluster_enabled; /* Is cluster enabled? */ mstime_t cluster_node_timeout; /* Cluster node timeout. */ char *cluster_configfile; /* Cluster auto-generated config file name. */ struct clusterState *cluster; /* State of the cluster */ int cluster_migration_barrier; /* Cluster replicas migration barrier. */ /* Scripting */ lua_State *lua; /* The Lua interpreter. We use just one for all clients */ redisClient *lua_client; /* The "fake client" to query Redis from Lua */ redisClient *lua_caller; /* The client running EVAL right now, or NULL */ dict *lua_scripts; /* A dictionary of SHA1 -> Lua scripts */ mstime_t lua_time_limit; /* Script timeout in milliseconds */ mstime_t lua_time_start; /* Start time of script, milliseconds time */ int lua_write_dirty; /* True if a write command was called during the execution of the current script. */ int lua_random_dirty; /* True if a random command was called during the execution of the current script. */ int lua_timedout; /* True if we reached the time limit for script execution. */ int lua_kill; /* Kill the script if true. */ /* Assert & bug reporting */ char *assert_failed; char *assert_file; int assert_line; int bug_report_start; /* True if bug report header was already logged. */ int watchdog_period; /* Software watchdog period in ms. 0 = off */ };
typedef struct redisDb {
int id; /* id是本数据库的序号,为0-15(默认Redis有16个数据库) */
dict *dict; /* 存储数据库所有的key-value */
dict *expires; /* 键的过期时间,字典的键为键,字典的值为过期 UNIX 时间戳 */
dict *blocking_keys; /* blpop 存储阻塞key和客户端对象 */
dict *ready_keys; /* 阻塞后push 响应阻塞客户端 存储阻塞后push的key和客户端对象 */
dict *watched_keys; /* 存储watch监控的的key和客户端对象 */
long long avg_ttl; /* 存储的数据库对象的平均ttl(time to live),用于统计 */
list *defrag_later; /* List of key names to attempt to defrag one by one, gradually. */
} redisDb;
在32位的系统中: short
与int
占两个字节, long
占四个字节, long long
占八个字节;
在64位的系统中: short
占两个字节, int
与 long
占四个字节, long long
占八个字节。
C short/int/long/long long 等数据类型大小
C的标准并没有规定每种类型占多少位,只是说 “sizeof(long)>=sizeof(int)>=sizeof(short)”,所以具体的字节数都是根据编译器来确定的
数据类型 | 字节大小 | 数值范围 |
---|---|---|
char | 1 字节 | -128 到 127 或 0 到 255 |
unsigned char | 1 字节 | 0 到 255 |
short int (短整型) | 2 字节 | -32 768 〜+32 767**(2^15 - 1)** |
unsigned short int (无符号短整型) | 2 字节 | 0 〜+65 535**(2^16 - 1)** |
int (整型) | 2或4 字节 | 4字节:-2 147 483 648 〜+2 147 483 647**(2^31 - 1)** |
unsigned int (无符号整型) | 2或4字节 | 4字节:0 〜4 294 967 295 (2^32 - 1) |
long int (长整型) | 4 字节 | -2 147 483 648 〜+2 147 483 647**(2^31 - 1)** |
unsigned long int (无符号长整型) | 4 字节 | 0 〜4 294 967 295**(2^32 - 1)** |
long long int (超长整型) | 8字节 | -9 223 372 036 854 775 808~9 223 372 036 854 775 807 (2^63 - 1) |
unsigned long long int (无符号超长整型) | 8字节 | 048 446 744 073 709 551 615 (2^64 - 1) |
unsigned、signed、short、long修饰int,int可以省略
typedef struct redisObject {
// 类型 0-string 1-list 2-set 3-zset 4-hash,在宏中定义。
unsigned type:4; // :4 是位域(位段),表示只占用4bit,2^4 ,http://c.biancheng.net/view/2037.html
// 对象编码。某些类型的对象(如字符串和哈希)可以通过多种方式在内部表示。ENCODING表明表示方式。
unsigned encoding:4;
// LRU_BITS = 24,共24位,高16位存储一个分钟数级别的时间戳,低8位存储访问计数(lfu : 最近访问次数)
unsigned lru:LRU_BITS;
// 引用次数
int refcount;
// 指针指向具体数据,void * 类型,从而可以执行那六大数据结构
void *ptr;
} robj;
分别对应着5种基础数据类型(非数据结构类型)
/* The actual Redis Object */
#define OBJ_STRING 0 /* String object. */
#define OBJ_LIST 1 /* List object. */
#define OBJ_SET 2 /* Set object. */
#define OBJ_ZSET 3 /* Sorted set object. */
#define OBJ_HASH 4 /* Hash object. */
对象编码(数据结构类型)。某些类型的对象(如字符串和哈希)可以通过多种方式在内部表示。ENCODING表明表示方式。
#define OBJ_ENCODING_RAW 0 // 编码为字符串 c语言类型
#define OBJ_ENCODING_INT 1 // 编码为整数
#define OBJ_ENCODING_HT 2 // 编码为哈希表
#define OBJ_ENCODING_ZIPMAP 3 // 编码为 zipmap
#define OBJ_ENCODING_LINKEDLIST 4 /* No longer used: old list encoding. */
#define OBJ_ENCODING_ZIPLIST 5 // 编码为压缩列表
#define OBJ_ENCODING_INTSET 6 // 编码为整数集合
#define OBJ_ENCODING_SKIPLIST 7 // 编码为跳跃表
#define OBJ_ENCODING_EMBSTR 8 // 编码为SDS字符串
#define OBJ_ENCODING_QUICKLIST 9 /* 快速列表 压缩列表+链表 */
#define OBJ_ENCODING_STREAM 10 /* Encoded as a radix tree of listpacks */
lru 记录的是对象最后一次被命令程序访问的时间,( 4.0 以上版本占 24 位,2.6 版本占 22 位)。高16位存储一个分钟数级别的时间戳,低8位存储访问计数(lfu : 最近访问次数) 。
lru----> 高16位: 最后被访问的时间,时间戳秒级十进制是19位,所以分钟级别的是16位。
lfu-----> 低8位: 最近访问次数
refcount 记录的是该对象被引用的次数,类型为整型。refcount 的作用,主要在于对象的引用计数和内存回收。
当对象的refcount>1时,称为共享对象,Redis 为了节省内存,当有一些对象重复出现时(例如经常返回的"OK"字符串),新的程序不会创建新的对象,而是仍然使用原来的对象。
ptr 指针指向具体的数据,比如:set hello world,ptr 指向包含字符串 world 的 SDS。
当然是为了追求速度,不同数据类型使用不同的数据结构速度才得以提升。每种数据类型都有一种或者多种数据结构来支撑,底层数据结构有 6 种。
type与encoding有固定的搭配,encoding表示的是对应的数据结构。
type | encoding-simple | encoding | 描述 |
---|---|---|---|
string | int | REDIS_ENCODING_INT(整数) | value为纯数字时 |
embstr | REDIS_ENCODING_EMBSTR(embstr 编码的简单动态字符串(SDS)) | value为小字符串,长度 小于 44(OBJ_ENCODING_EMBSTR_SIZE_LIMIT)个字节 | |
raw | REDIS_ENCODING_RAW(简单动态字符串) | value为大字符串,长度 大于 44(OBJ_ENCODING_EMBSTR_SIZE_LIMIT)个字节 | |
list | quicklist | REDIS_ENCODING_QUICKLIST(快速列表) | |
set | intset | REDIS_ENCODING_INTSET(整数集合) | 当Redis集合类型的所有元素均是整数并且都处在64位有符号整数范围内。 |
dict | REDIS_ENCODING_HT(字典) | 当Redis集合类型的元素是非整数或包含处在64位有符号整数范围外元素。 | |
zset | ziplist | REDIS_ENCODING_ZIPLIST(压缩列表) | 当元素的个数比较少,且元素都是小整数或短字符串时。 |
skiplist | REDIS_ENCODING_SKIPLIST(跳表) | 当元素的个数比较多或元素不是小整数或短字符串时。 | |
hash | ziplist | REDIS_ENCODING_ZIPLIST(压缩列表) | 当散列表元素的个数比较少,且元素都是小整数或短字符串时。 |
dict | REDIS_ENCODING_HT(字典) | 当散列表元素的个数比较多或元素不是小整数或短字符串时。 | |
stream | ziplist | OBJ_ENCODING_STREAM(流) | 类似消息队列 |
Redis是典型的客户端/服务器(C/S)结构,客户端通过socket与服务端建立网络连接并发送命令请求,服务端处理命令请求并回复。Redis使用结构体client存储客户端连接的所有信息。包括但不限于客户端的名称、客户端连接的套接字描述符、客户端当前选择的数据库ID、客户端的输入缓冲区与输出缓冲区等。结构体client字段较多,此处只介绍命令处理主流程所需的关键字段。
typedef struct client { uint64_t id; // 客户端唯一ID,通过全局变量server.next_client_id实现。 int fd; // socket的文件描述符。 redisDb *db; // select命令选择的数据库对象 robj *name; // 客户端名称,可以使用命令CLIENT SETNAME设置。 time_t lastinteraction // 客户端上次与服务器交互的时间,以此实现客户端的超时处理。 sds querybuf; //输入缓冲区,recv函数接收的客户端命令请求会暂时缓存在此缓冲区。 int argc; robj **argv; struct redisCommand *cmd; list *reply; unsigned long long reply_bytes; size_t sentlen; char buf[PROTO_REPLY_CHUNK_BYTES]; int bufpos; } client;
id为客户端唯一ID,通过全局变量server.next_client_id实现。
fd为客户端socket的文件描述符。
db为客户端使用select命令选择的数据库对象。
name:客户端名称,可以使用命令CLIENT SETNAME设置。
lastinteraction:客户端上次与服务器交互的时间,以此实现客户端的超时处理。
querybuf:输入缓冲区,recv函数接收的客户端命令请求会暂时缓存在此缓冲区。
argc:输入缓冲区的命令请求是按照Redis协议格式编码字符串,需要解析出命令请求的所有参数,参数个数存储在argc字段,参数内容被解析为robj对象,存储在argv数组。
cmd:待执行的客户端命令;解析命令请求后,会根据命令名称查找该命令对应的命令对象,存储在客户端cmd字段,可以看到其类型为struct redisCommand。
reply:输出链表,存储待返回给客户端的命令回复数据。链表节点存储的值类型为clientReplyBlock,定义如下:
typedef struct clientReplyBlock {
size_t size, used;
char buf[];
} clientReplyBlock;
可以看到链表节点本质上就是一个缓冲区(buffffer),其中size表示缓冲区空间总大小,used表示缓冲区已使用空间大小。
redis支持的所有命令初始都存储在全局变量redisCommandTable中,类型为redisCommand。
struct redisCommand redisCommandTable[] = {
{"module",moduleCommand,-2,"as",0,NULL,0,0,0,0,0},
{"get",getCommand,2,"rF",0,NULL,1,1,1,0,0},
{"set",setCommand,-3,"wm",0,NULL,1,1,1,0,0},
// ……忽略部分代码……
};
redisCommand结构也非常简单:
struct redisCommand {
char *name;
redisCommandProc *proc;
int arity;
char *sflags; /* Flags as string representation, one char per flag. */
int flags; /* The actual flags, obtained from the 'sflags' field. */
/* Use a function to determine keys arguments in a command line.
* Used for Redis Cluster redirect. */
redisGetKeysProc *getkeys_proc;
/* What keys should be loaded in background when calling this command? */
int firstkey; /* The first argument that's a key (0 = no keys) */
int lastkey; /* The last argument that's a key */
int keystep; /* The step between first and last key */
long long microseconds, calls;
};
name:命令名称。
proc:命令处理函数。
arity:命令参数数目,用于校验命令请求格式是否正确;当arity小于0时,表示命令参数数目大于等于arity;当arity大于0时,表示命令参数数目必须为arity;注意命令请求中,命令的名称本身也是一个参数,如get命令的参数数目为2,命令请求格式为get key。
sflags:命令标志,例如标识命令时读命令还是写命令,详情参见表9-2;注意到sflags的类型为字符串,此处只是为了良好的可读性。
flags:命令的二进制标志,服务器启动时解析sflags字段生成。
calls:从服务器启动至今命令执行的次数,用于统计。
microseconds:从服务器启动至今命令总的执行时间,
microseconds/calls即可计算出该命令的平均处理时间,用于统计。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。