赞
踩
1、初始化一个napi_struct 结构。此时 NAPI的状态为 NAPI_STATE_SCHED状态。。
void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { INIT_LIST_HEAD(&napi->poll_list); hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); napi->timer.function = napi_watchdog; init_gro_hash(napi); napi->skb = NULL; INIT_LIST_HEAD(&napi->rx_list); napi->rx_count = 0; napi->poll = poll; if (weight > NAPI_POLL_WEIGHT) netdev_err_once(dev, "%s() called with weight %d\n", __func__, weight); napi->weight = weight; list_add(&napi->dev_list, &dev->napi_list); napi->dev = dev; #ifdef CONFIG_NETPOLL napi->poll_owner = -1; #endif set_bit(NAPI_STATE_SCHED, &napi->state);//初始化为NAPI_STATE_SCHED状态 napi_hash_add(napi); }
2、使能一个napi_struct, napi_enable. 此时NAPI的状态全被清除
static inline void napi_enable(struct napi_struct *n)
{
BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
smp_mb__before_atomic();
clear_bit(NAPI_STATE_SCHED, &n->state);
clear_bit(NAPI_STATE_NPSVC, &n->state);
}
3、网卡硬件中断产生, 执行中断处理函数,并调用
bool napi_schedule_prep(struct napi_struct *n);和 napi_schedule。
napi_schedule_prep 检测NAPI状态是否是NAPI_STATE_SCHED或NAPI_STATE_DISABLE状态,是的话就返回,不是则设置NAPI 为NAPI_STATE_SCHED状态,然后执行napi_schedule,将NAPI挂到队列里面,触发一个软件中断。
//网卡接收中断处理函数 irqreturn xxx_intr (int irq, xxx *data) { if (likely(napi_schedule_prep(&data->napi_rx))) { __napi_schedule(&data->napi_rx); } else { //clear irq } return IRQ_HANDLED; } void __napi_schedule(struct napi_struct *n) { unsigned long flags; local_irq_save(flags); ____napi_schedule(this_cpu_ptr(&softnet_data), n); local_irq_restore(flags); } static inline void ____napi_schedule(struct softnet_data *sd, struct napi_struct *napi) { //将NAPI加入软件中断的poll_list链表里面 list_add_tail(&napi->poll_list, &sd->poll_list); //发出一个软件中断 __raise_softirq_irqoff(NET_RX_SOFTIRQ); }
4、处理软件中断, 此时状态仍为 NAPI_STATE_SCHED状态
static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + usecs_to_jiffies(netdev_budget_usecs); int budget = netdev_budget; LIST_HEAD(list); LIST_HEAD(repoll); local_irq_disable(); list_splice_init(&sd->poll_list, &list); local_irq_enable(); for (;;) { //轮训NAPI 队列 struct napi_struct *n; if (list_empty(&list)) { if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) goto out; break; } //取出一个NAPI n = list_first_entry(&list, struct napi_struct, poll_list); //执行poll操作 budget -= napi_poll(n, &repoll); /* If softirq window is exhausted then punt. * Allow this to run for 2 jiffies since which will allow * an average latency of 1.5/HZ. */ if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) { sd->time_squeeze++; break; } } local_irq_disable(); list_splice_tail_init(&sd->poll_list, &list); list_splice_tail(&repoll, &list); list_splice(&list, &sd->poll_list); if (!list_empty(&sd->poll_list)) __raise_softirq_irqoff(NET_RX_SOFTIRQ); net_rps_action_and_irq_enable(sd); out: __kfree_skb_flush(); } //将NAPI节点从队列里面删除,再执行驱动自己注册的收包回调 static int napi_poll(struct napi_struct *n, struct list_head *repoll) { void *have; int work, weight; //先从队列里面删除 list_del_init(&n->poll_list); have = netpoll_poll_lock(n); weight = n->weight; /* This NAPI_STATE_SCHED test is for avoiding a race * with netpoll's poll_napi(). Only the entity which * obtains the lock and sees NAPI_STATE_SCHED set will * actually make the ->poll() call. Therefore we avoid * accidentally calling ->poll() when NAPI is not scheduled. */ work = 0; if (test_bit(NAPI_STATE_SCHED, &n->state)) { work = n->poll(n, weight);//调用我们驱动里面注册的回调 trace_napi_poll(n, work, weight); } WARN_ON_ONCE(work > weight); if (likely(work < weight)) goto out_unlock; /* Drivers must not modify the NAPI state if they * consume the entire weight. In such cases this code * still "owns" the NAPI instance and therefore can * move the instance around on the list at-will. */ if (unlikely(napi_disable_pending(n))) { napi_complete(n); goto out_unlock; } if (n->gro_bitmask) { /* flush too old packets * If HZ < 1000, flush all packets. */ napi_gro_flush(n, HZ >= 1000); } gro_normal_list(n); /* Some drivers may have called napi_schedule * prior to exhausting their budget. */ if (unlikely(!list_empty(&n->poll_list))) { pr_warn_once("%s: Budget exhausted after napi rescheduled\n", n->dev ? n->dev->name : "backlog"); goto out_unlock; } list_add_tail(&n->poll_list, repoll); out_unlock: netpoll_poll_unlock(have); return work; }
4、执行驱动收报回调,最后掉用napi_complete,将NAPI NAPIF_STATE_SCHED 状态清除.
bool napi_complete(struct napi_struct *n, int work_done) { unsigned long flags, val, new; /* * 1) Don't let napi dequeue from the cpu poll list * just in case its running on a different cpu. * 2) If we are busy polling, do nothing here, we have * the guarantee we will be called later. */ if (unlikely(n->state & (NAPIF_STATE_NPSVC | NAPIF_STATE_IN_BUSY_POLL))) return false; if (n->gro_bitmask) { unsigned long timeout = 0; if (work_done) timeout = n->dev->gro_flush_timeout; /* When the NAPI instance uses a timeout and keeps postponing * it, we need to bound somehow the time packets are kept in * the GRO layer */ napi_gro_flush(n, !!timeout); if (timeout) hrtimer_start(&n->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); } gro_normal_list(n); if (unlikely(!list_empty(&n->poll_list))) { /* If n->poll_list is not empty, we need to mask irqs */ local_irq_save(flags); list_del_init(&n->poll_list); local_irq_restore(flags); } do { val = READ_ONCE(n->state); WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED)); new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED); /* If STATE_MISSED was set, leave STATE_SCHED set, * because we will call napi->poll() one more time. * This C code was suggested by Alexander Duyck to help gcc. */ new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED * NAPIF_STATE_SCHED; } while (cmpxchg(&n->state, val, new) != val); if (unlikely(val & NAPIF_STATE_MISSED)) { __napi_schedule(n); return false; } return true; }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。