赞
踩
在我们的系统中,有几种软中断(在硬件中断的下部分执行):
enum { HI_SOFTIRQ=0, TIMER_SOFTIRQ, NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, IRQ_POLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the numbering. Sigh! */ RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS };
其中 NET_TX_SOFTIRQ 和 NET_RX_SOFTIRQ 就是我们网络收发包的软中断。
系统会为每一个cpu创建一个内核任务,见kernel/softirq.c
static struct smp_hotplug_thread softirq_threads = { .store = &ksoftirqd, .thread_should_run = ksoftirqd_should_run, .thread_fn = run_ksoftirqd, .thread_comm = "ksoftirqd/%u", }; static __init int spawn_ksoftirqd(void) { cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, takeover_tasklets); BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); return 0; } early_initcall(spawn_ksoftirqd); static void run_ksoftirqd(unsigned int cpu) { local_irq_disable(); if (local_softirq_pending()) { __do_softirq(); local_irq_enable(); .... } } asmlinkage __visible void __softirq_entry __do_softirq(void) { __u32 pending; .... pending = local_softirq_pending();//获取当前cpu软中断状态(哪些软中断需要处理) .... while ((softirq_bit = ffs(pending))) { //一个一个处理 unsigned int vec_nr; int prev_count; h += softirq_bit - 1; vec_nr = h - softirq_vec; prev_count = preempt_count(); kstat_incr_softirqs_this_cpu(vec_nr); trace_softirq_entry(vec_nr); h->action(h); //处理注册的软中断回调(open_softirq) trace_softirq_exit(vec_nr); if (unlikely(prev_count != preempt_count())) { pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", vec_nr, softirq_to_name[vec_nr], h->action, prev_count, preempt_count()); preempt_count_set(prev_count); } h++; pending >>= softirq_bit; } }
当调用__napi_schedule的时候,会触发一个 NET_RX_SOFTIRQ 类型的软件中断,触发内核线程执行。
注意:在调用__napi_schedule之前,需要调用 napi_schedule_prep(),判断napi 不为 NAPI_STATE_DISABLE状态,并设置napi NAPI_STATE_SCHED 状态。驱动在处理完成 <= budget 次数以后,需要调用 napi_complete(napi),来清除NAPI_STATE_SCHED 状态。
napi_schedule_prep (struct napi_struct *n) { return !napi_disable_pending(n) && !test_and_set(NAPI_STATE_SCHED, &n->state); } static inline void ____napi_schedule(struct softnet_data *sd, struct napi_struct *napi) { list_add_tail(&napi->poll_list, &sd->poll_list);//添加到链表 __raise_softirq_irqoff(NET_RX_SOFTIRQ);//发出软中断信号 } void __raise_softirq_irqoff(unsigned int nr) { trace_softirq_raise(nr); or_softirq_pending(1UL << nr); //告诉内核,NET_RX_SOFTIRQ 软中断产生了,(设置pending状态) } void __napi_complete(struct napi_struct *n) { .... clear_bit(NAPI_STATE_SCHED, &n->state); }
然后内核线程调用 run_ksoftirqd -> __do_softirq
asmlinkage __visible void __softirq_entry __do_softirq(void) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; int max_restart = MAX_SOFTIRQ_RESTART; struct softirq_action *h; bool in_hardirq; __u32 pending; int softirq_bit; /* * Mask out PF_MEMALLOC s current task context is borrowed for the * softirq. A softirq handled such as network RX might set PF_MEMALLOC * again if the socket is related to swap */ current->flags &= ~PF_MEMALLOC; pending = local_softirq_pending();//获取软中断pending状态 account_irq_enter_time(current); __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); in_hardirq = lockdep_softirq_start(); restart: /* Reset the pending bitmask before enabling irqs */ set_softirq_pending(0); local_irq_enable(); h = softirq_vec; while ((softirq_bit = ffs(pending))) { unsigned int vec_nr; int prev_count; h += softirq_bit - 1; vec_nr = h - softirq_vec; prev_count = preempt_count(); kstat_incr_softirqs_this_cpu(vec_nr); trace_softirq_entry(vec_nr); h->action(h); //这就是我们注册的中断回调 open_softirq trace_softirq_exit(vec_nr); if (unlikely(prev_count != preempt_count())) { pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", vec_nr, softirq_to_name[vec_nr], h->action, prev_count, preempt_count()); preempt_count_set(prev_count); } h++; pending >>= softirq_bit; } rcu_bh_qs(); local_irq_disable(); pending = local_softirq_pending(); if (pending) { if (time_before(jiffies, end) && !need_resched() && --max_restart) goto restart; wakeup_softirqd(); } lockdep_softirq_end(in_hardirq); account_irq_exit_time(current); __local_bh_enable(SOFTIRQ_OFFSET); WARN_ON_ONCE(in_interrupt()); tsk_restore_flags(current, old_flags, PF_MEMALLOC); }
在 net/core/dev.c 文件中,有一个函数 net_dev_init(),里面为每一个CPU初始化了skb的队列,还有我们的软中断: NET_TX_SOFTIRQ和NET_RX_SOFTIRQ,对应的处理函数为net_tx_action和net_rx_action.
static int __init net_dev_init(void)
{
....
for_each_possible_cpu(i) {
struct softnet_data *sd = &per_cpu(softnet_data, i);
skb_queue_head_init(&sd->input_pkt_queue);
skb_queue_head_init(&sd->input_pkt_queue);
INIT_LIST_HEAD(&sd->poll_list);
}
open_softirq(NET_TX_SOFTIRQ, net_tx_action);//注册中断回调
open_softirq(NET_RX_SOFTIRQ, net_rx_action);//注册中断回调
}
函数原型: void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct naoi_struct *, int), int weight); 功能介绍: 1、初始化napi结构成员, 并将自己挂在napi的链表里面 napi->poll, napi->weight,napi->state.napi->timer,INIT_LIST_HEAD(&napi->poll_list) 2、高版本的kernel还创建了一个内核任务,调用napi_kthread_create函数创建了一个napi任务,任务函数为:napi_threaded_poll if (dev->threaded && napi_kthread_create(napi)) dev->threaded = 0; static int napi_kthread_create(struct napi_struct *n) { n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d", n->dev->name,n->napi_id);//poll函数函数 } 该函数一般由具体的网卡驱动调用,并将poll函数传入。
当网卡产生中断以后,在中断函数里面处理的事情很少,包含:关闭中断和__napi_schedule.
__napi_schedule里面有两种处理方式:
1、如果创建了 napi任务,则唤醒任务,wake_up_process(thread),最后调用任务处理函数napi_threaded_poll进行包处理
static int napi_threaded_poll(void *data)
{
struct napi_struct *napi = data;
for(;;) {
__napi_poll(napi, &repoll);
}
}
static int __naou_poll(struct napi_struct *napi, bool *repoll)
{
if (test_bit(NAPI_STATE_SCHES, &N->STATE)) {
work = n->poll(n, weight);//这就是网卡驱动注册的poll函数
}
}
2、否则,触发一个NET_RX_SOFTIRQ软中断,__raise_softirq_irqoff(NET_RX_SOFTIRQ), 然后调用软中断处理函数 net_rx_action.
static void net_rx_action(struct softirq_action *h)
{
LIST_HEAD(list);
for(;;) {
if (list_empty(&list)) {
break;
}
n = list_first_entry(&list, struct napi_struct, poll_list);
budge -= napi_poll(n, &repoll)
}
}
napi_poll-> __napi_poll//与上面一致。
static int napi_kthread_create(struct napi_struct *n) { int err = 0; n->thread = kthread_run(napi_thread_poll, n, "napi/%s-%d", n->dev->name, n->napi_id); return err; } static int napi_threaded_poll(void *data) { struct napi_struct *napi = data; void *have; while (!napi_thread_wait(napi)) { for(;;) { ..... __napi_poll(napi, &repoll); .... } } } static int __napi_poll(struct napi_struct *n, bool *repoll) { int work, weight; weight = n->weight; if (test_bit(NAPI_STATE_SCHED, &n->state)) { work = n->poll(n, weight); } }
napi_threaded_poll->__napi_poll->poll钩子函数
软中断处理函数net_rx_action中,轮训Napi_list链表,然后调用 napi_poll函数,
在napi_pool函数里面,首先从链表中删掉本napi节点,然后调用__napi_pool函数,
在__napi_pool函数里面调用我们设备注册的pool钩子。net_rx_action->napi_poll->__napi_pool->poll钩子
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。