赞
踩
这篇笔记来看看TCP对accept()系统调用的实现。
//从注释中也可以看出accept()系统调用要做的事情: //1.建立一个新的套接字供服务器端和客户端通信 //2.创建一个新的fd供应用程序后续读写该套接字 /* * For accept, we attempt to create a new socket, set up the link * with the client, wake up the client, then return the new * connected fd. We collect the address of the connector in kernel * space and move it to user at the very end. This is unclean because * we open the socket then return an error. * * 1003.1g adds the ability to recvmsg() to query connection pending * status to recvmsg. We need to add that support in a way thats * clean when we restucture accept also. */ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) { struct socket *sock, *newsock; struct file *newfile; int err, len, newfd, fput_needed; char address[MAX_SOCK_ADDR]; //根据监听套接字的fd找到监听套接字对应的套接字结构struct scoket sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; //为新的通信套接字分配套接字结构struct socket err = -ENFILE; if (!(newsock = sock_alloc())) goto out_put; //新的套接字类型和操作函数集与监听套接字相同 newsock->type = sock->type; newsock->ops = sock->ops; /* * We don't need try_module_get here, as the listening socket (sock) * has the protocol module (sock->ops->owner) held. */ __module_get(newsock->ops->owner); //为新创建的通信套接字结构分配文件描述符fd newfd = sock_alloc_fd(&newfile); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); goto out_put; } //将fd与socket关联起来 err = sock_attach_fd(newsock, newfile); if (err < 0) goto out_fd_simple; //SELinux相关 err = security_socket_accept(sock, newsock); if (err) goto out_fd; //调用协议族提供的accept()函数完成接收,IPv4协议族中,为inet_accept(),见下文 err = sock->ops->accept(sock, newsock, sock->file->f_flags); if (err < 0) goto out_fd; //如果accept()系统调用参数中指明要获取客户端地址信息, //则调用getname()接口获取客户端信息后将其拷贝到用户空间 if (upeer_sockaddr) { if (newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2) < 0) { err = -ECONNABORTED; goto out_fd; } err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen); if (err < 0) goto out_fd; } //关联文件系统 /* File flags are not inherited via accept() unlike another OSes. */ fd_install(newfd, newfile); err = newfd; security_socket_post_accept(sock, newsock); out_put: fput_light(sock->file, fput_needed); out: return err; out_fd_simple: sock_release(newsock); put_filp(newfile); put_unused_fd(newfd); goto out_put; out_fd: fput(newfile); put_unused_fd(newfd); goto out_put; }
/* * Accept a pending connection. The TCP layer now gives BSD semantics. */ int inet_accept(struct socket *sock, struct socket *newsock, int flags) { struct sock *sk1 = sock->sk; int err = -EINVAL; //直接调用传输层的accept()回调,TCP为inet_csk_accept(),该回调需要返回 //新的通信套接字对应的TCB struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err); if (!sk2) goto do_err; lock_sock(sk2); BUG_TRAP((1 << sk2->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)); //将新的TCB和新的套接字结构sock关联起来 sock_graft(sk2, newsock); //设置套接字结构中的状态为”已连接“ newsock->state = SS_CONNECTED; err = 0; release_sock(sk2); do_err: return err; } static inline void sock_graft(struct sock *sk, struct socket *parent) { write_lock_bh(&sk->sk_callback_lock); sk->sk_sleep = &parent->wait; parent->sk = sk; sk->sk_socket = parent; security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); }
/* * This will accept the next outstanding connection. */ //sk为监听套接字传输控制块 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *newsk; int error; lock_sock(sk); //传入到的套接字的TCB状态必须是TCP_LISTEN /* We need to make sure that this socket is listening, * and that it has something pending. */ error = -EINVAL; if (sk->sk_state != TCP_LISTEN) goto out_err; //如果监听套接字的accept接收队列为空,则需要根据当前套接字是否阻塞进行操作 /* Find already established connection */ if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { //根据是否阻塞决定一个超时值,如果为非阻塞模式,那么timeo将为0 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); //对于非阻塞模式,直接返回重试错误 /* If this is a non blocking socket don't sleep */ error = -EAGAIN; if (!timeo) goto out_err; //休眠等待accept接收队列非空 error = inet_csk_wait_for_connect(sk, timeo); if (error) goto out_err; } //到这里,说明当前accept队列已经有连接可以接收(可能是阻塞后被唤醒的) newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); out: release_sock(sk); return newsk; out_err: newsk = NULL; *err = error; goto out; }
在之前介绍服务器端三次握手过程的笔记中,其实就已经知道,这些等待accept()的套接字已经放到了监听套接字的accept()接收队列中。
static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue, struct sock *parent) { //从accept接收队列上将第一个已完成连接的请求块摘除 struct request_sock *req = reqsk_queue_remove(queue); //记录与该请求块关联的真正的传输控制块(在三次握手完成时创建) struct sock *child = req->sk; BUG_TRAP(child != NULL); //更新监听套接字的accept接收队列中的计数信息(即当前已完成连接的请求数目) sk_acceptq_removed(parent); //释放该连接请求块,它已经完成了它的使命 __reqsk_free(req); return child; } //取出指定队列的第一个节点,典型的链表操作 static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue) { struct request_sock *req = queue->rskq_accept_head; BUG_TRAP(req != NULL); queue->rskq_accept_head = req->dl_next; if (queue->rskq_accept_head == NULL) queue->rskq_accept_tail = NULL; return req; } static inline void sk_acceptq_removed(struct sock *sk) { sk->sk_ack_backlog--; }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。