赞
踩
- static inline int rt_prio(int prio)
- {
- if (unlikely(prio < MAX_RT_PRIO))
- return 1;
- return 0;
- }
- #define MAX_USER_RT_PRIO 100
- #define MAX_RT_PRIO MAX_USER_RT_PRIO
- /*
- * Adding/removing a task to/from a priority array:
- */
- static void
- enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
- {
- struct sched_rt_entity *rt_se = &p->rt;
-
- if (flags & ENQUEUE_WAKEUP)
- rt_se->timeout = 0;
-
- enqueue_rt_entity(rt_se, flags); //enqueue到rt调度器正统的queueu当中
-
- if (!task_current(rq, p) && p->nr_cpus_allowed > 1) //可push,从这个条件判断可以看出rt中也是维护了一个可以Push的qeueue,将可以push的task放到这个queue当中去。
- enqueue_pushable_task(rq, p);
- }
- static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
- {
- struct rq *rq = rq_of_rt_se(rt_se);
-
- dequeue_rt_stack(rt_se, flags); //先将原来的task dequeue,如果task已经不在runqueue当中,其实相当于什么都不做。
- for_each_sched_rt_entity(rt_se) //这个地方为什么要用for each的方式呢?其实只对于CONFIG_RT_GROUP_SCHED才需要循环,而对于非CONFIG_RT_GROUP_SCHED来讲,不需要循环的。
- __enqueue_rt_entity(rt_se, flags);//将当前task enqueue到rt 的runqueue当中
- enqueue_top_rt_rq(&rq->rt);
- }
- static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
- {
- struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
- struct rt_prio_array *array = &rt_rq->active;
- struct rt_rq *group_rq = group_rt_rq(rt_se);
- struct list_head *queue = array->queue + rt_se_prio(rt_se);//通过此处可以判断,rt task的管理为每个优先级都维护一个list,每个优先级都对应一个list。
-
- /*
- * Don't enqueue the group if its throttled, or when empty.
- * The latter is a consequence of the former when a child group
- * get throttled and the current group doesn't have any other
- * active members.
- */
- if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
- if (rt_se->on_list)
- __delist_rt_entity(rt_se, array);
- return;
- }//如果task被throttled,则不做enqueue的操作。
-
- if (move_entity(flags)) {
- WARN_ON_ONCE(rt_se->on_list);
- if (flags & ENQUEUE_HEAD) //通过flags可以操控其放在队尾还是对的开始。
- list_add(&rt_se->run_list, queue);
- else
- list_add_tail(&rt_se->run_list, queue);
-
- __set_bit(rt_se_prio(rt_se), array->bitmap); //每个Priority一个标志位,如果被置位表示对应的Priority还有task。提供了一种快速判断某一个priority是否还有task未处理的方式。
- rt_se->on_list = 1;
- }
- rt_se->on_rq = 1;
-
- inc_rt_tasks(rt_se, rt_rq);
- }
根据以上代码,看上去,rt task的管理类似如下图所示:-
- /*
- * This is the priority-queue data structure of the RT scheduling class:
- */
- struct rt_prio_array {
- DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
- struct list_head queue[MAX_RT_PRIO];
- };
-
-
queue相当于是一个二维数组,纵向根据优先级排序,横向则同优先级的task。同时为加速访问,维护一个bitmap,而bitmap的每个bit位的0或者1表示对应的优先级是否有task。即对应的queue是否为空。
pushable task的管理
在enqueue task的时候,会判断这个task是否可以在其他cpu上面run,如果可以则将其添加到pushable list当中。
- static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
- {
- plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);//如果已经存在plist当中,就先删除,此函数会自动判断,如果不在list当中就不会删除。
- plist_node_init(&p->pushable_tasks, p->prio); //重新初始化
- plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); //重新加入
- //为什么要经历一次重新删除又重新加入呢?主要是在add的时候,可以加入到队尾,所以,原本已经在plist中的话,会重新操作一次,使其到队尾当中。
-
- /* Update the highest prio pushable task */
- if (p->prio < rq->rt.highest_prio.next)
- rq->rt.highest_prio.next = p->prio;
- }
-
- 关于plist:
- struct plist_head {
- struct list_head node_list;
- };
-
- struct plist_node {
- int prio;
- struct list_head prio_list;
- struct list_head node_list;
- };
- 可以看到其实,此处有机会处于两个表当中,一个是priolist一个nodelist。
- 而下面的plist_add则反映了pushable task的管理。
-
- /**
- * plist_add - add @node to @head
- *
- * @node: &struct plist_node pointer
- * @head: &struct plist_head pointer
- */
- void plist_add(struct plist_node *node, struct plist_head *head)
- {
- struct plist_node *first, *iter, *prev = NULL;
- struct list_head *node_next = &head->node_list;
-
- plist_check_head(head);
- WARN_ON(!plist_node_empty(node));
- WARN_ON(!list_empty(&node->prio_list));
-
- if (plist_head_empty(head))
- goto ins_node;
-
- first = iter = plist_first(head);
-
- do {
- if (node->prio < iter->prio) { //在prio list中查找当前node prio应该处于的位置。priolist按照Prio从低到高排序。
- node_next = &iter->node_list;
- break;
- }
-
- prev = iter;
- iter = list_entry(iter->prio_list.next,
- struct plist_node, prio_list);
- } while (iter != first);
-
- if (!prev || prev->prio != node->prio)//没有合适的priolist存在,则将当前node的设置为此prio的第1个节点
- list_add_tail(&node->prio_list, &iter->prio_list);
- ins_node:
- list_add_tail(&node->node_list, node_next);//加入到node list的最后。
-
- plist_check_head(head);
- }
-
其结果如下所示:
如何从一个run queue中选择一个task出来执行
- static struct task_struct *
- pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
- {
- struct task_struct *p;
-
- WARN_ON_ONCE(prev || rf);
-
- if (!sched_rt_runnable(rq)) //如果队列中没有task,肯定选不到task。
- return NULL;
-
- p = _pick_next_task_rt(rq);
- set_next_task_rt(rq, p, true);
- return p;
- }
- static struct task_struct *_pick_next_task_rt(struct rq *rq)
- {
- struct sched_rt_entity *rt_se;
- struct rt_rq *rt_rq = &rq->rt;
-
- do {
- rt_se = pick_next_rt_entity(rq, rt_rq);//从rt的rt_rq中选优先级最高的且最选入队的task出来执行
- BUG_ON(!rt_se);
- rt_rq = group_rt_rq(rt_se);//rt_group相关。这个后续研究
- } while (rt_rq);
-
- return rt_task_of(rt_se);
- }
-
-
- static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
- struct rt_rq *rt_rq)
- {
- struct rt_prio_array *array = &rt_rq->active;//如前所述,active是一个rt_prio_array结构体,这个结构体对象中保存了一个bitmap,一个类似二维数组的list。bitmap用来表示对应的优级的queue是否为空。list存放着每一个优先级对应的queue。
- struct sched_rt_entity *next = NULL;
- struct list_head *queue;
- int idx;
-
- idx = sched_find_first_bit(array->bitmap);//第1个不为0的bit位置。
- BUG_ON(idx >= MAX_RT_PRIO);
-
- queue = array->queue + idx; //取出对应的queue
- next = list_entry(queue->next, struct sched_rt_entity, run_list);//取这个queue的首个元素。
-
- return next;
- }
如上代码所示,根据前面的rt task的管理知道,rt的task采用list管理,list的元素都又是一个优先级相等的rt task list。采用bitmap的方式标记哪些优先级的list还不为空,先找这个list再取出元素。
为task选择CPU
为task选择cpu是调度器的核心工作之一。和其他调度器一样,当task被wakeup的时候,会为这个task重新选择一个合适的cpu。而给RT task选CPU就是通过如下的function进行的。
- static int
- select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
- {
- struct task_struct *curr;
- struct rq *rq;
-
- /* For anything but wake ups, just return the task_cpu */
- if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
- goto out;
-
- rq = cpu_rq(cpu);
-
- rcu_read_lock();
- curr = READ_ONCE(rq->curr); /* unlocked access */
-
- /*
- * If the current task on @p's runqueue is an RT task, then
- * try to see if we can wake this RT task up on another
- * runqueue. Otherwise simply start this RT task
- * on its current runqueue.
- *
- * We want to avoid overloading runqueues. If the woken
- * task is a higher priority, then it will stay on this CPU
- * and the lower prio task should be moved to another CPU.
- * Even though this will probably make the lower prio task
- * lose its cache, we do not want to bounce a higher task
- * around just because it gave up its CPU, perhaps for a
- * lock?
- *
- * For equal prio tasks, we just let the scheduler sort it out.
- *
- * Otherwise, just let it ride on the affined RQ and the
- * post-schedule router will push the preempted task away
- *
- * This test is optimistic, if we get it wrong the load-balancer
- * will have to sort it out.
- */
- if (curr && unlikely(rt_task(curr)) &&
- (curr->nr_cpus_allowed < 2 ||
- curr->prio <= p->prio)) {//当前cpu不是idle,且正在run的是一个rt task且当前cpu只允许run在一个cpu当中或者当前task的优先级更高,则要从别的cpu中选一个出来。
- int target = find_lowest_rq(p);//看名字应该是选一个正在run的优先级比较低task的cpu出来。
-
- /*
- * Don't bother moving it if the destination CPU is
- * not running a lower priority task.
- */
- if (target != -1 &&
- p->prio < cpu_rq(target)->rt.highest_prio.curr) //不抢优先级比当前task高的cpu。
- cpu = target;
- }
- rcu_read_unlock();
-
- out:
- return cpu;
- }
总结选择CPU的标准如下:
1. 优先考虑来选择cpu的task之前在的cpu,这应该是尽量减少对cpu cache的干扰。
2. 满足如下条件则从其他cpu当中选择
a. 之前所在的cpu中有task正在run,而且还是一个rt task
b. 这个正在run的rt task只能跑在一颗cpu上面或者其优先级比来选择cpu的task优先高
c. 其他cpu中rt 优先级最低的cpu
d. 如果其他cpu的task优先级都比这个task优先级高,那就还保留在原来的cpu。
综上可以看出,rt task在选择CPU,将优先级考虑在第1位的。尽量选择那些正在run或者马上就要run的优先级比自己小的cpu去摆。不会去考虑cpu的能力状况,例如大小核之类的。可能是满足优先级高的响应的latency最短吧。
- static int find_lowest_rq(struct task_struct *task)
- {
- struct sched_domain *sd;
- struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
- int this_cpu = smp_processor_id();
- int cpu = task_cpu(task);
-
- /* Make sure the mask is initialized first */
- if (unlikely(!lowest_mask))
- return -1;
-
- if (task->nr_cpus_allowed == 1)
- return -1; /* No other targets possible */
-
- if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) //寻找cpu的优先级比当前task还低的cpu出来
- return -1; /* No targets found */
-
- /*
- * At this point we have built a mask of CPUs representing the
- * lowest priority tasks in the system. Now we want to elect
- * the best one based on our affinity and topology.
- *
- * We prioritize the last CPU that the task executed on since
- * it is most likely cache-hot in that location.
- */
- if (cpumask_test_cpu(cpu, lowest_mask))//如果当前cpu位于其中,则直接返回这个cpu即可。
- return cpu;
-
- /*
- * Otherwise, we consult the sched_domains span maps to figure
- * out which CPU is logically closest to our hot cache data.
- */
- if (!cpumask_test_cpu(this_cpu, lowest_mask))//v如果当前来选核的cpu的优先级比task的要高,直接忽略掉这颗cpu,但是如果比自己低的话,则要看是否一个共同域当中。
- this_cpu = -1; /* Skip this_cpu opt if not among lowest */
-
- rcu_read_lock();
- for_each_domain(cpu, sd) {
- if (sd->flags & SD_WAKE_AFFINE) {
- int best_cpu;
-
- /*
- * "this_cpu" is cheaper to preempt than a
- * remote processor.
- */
- if (this_cpu != -1 && //当前cpu的优先级比这个task的还要低,且处在共同域当中
- cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
- rcu_read_unlock();
- return this_cpu;
- }
-
- best_cpu = cpumask_first_and(lowest_mask,
- sched_domain_span(sd)); //选择lowest_mask与对应domain共域的cpui出来。
- if (best_cpu < nr_cpu_ids) {
- rcu_read_unlock();
- return best_cpu;
- }
- }
- }
- rcu_read_unlock();
-
- /*
- * And finally, if there were no matches within the domains
- * just give the caller *something* to work with from the compatible
- * locations.
- */
- if (this_cpu != -1)
- return this_cpu;
-
- cpu = cpumask_any(lowest_mask);
- if (cpu < nr_cpu_ids)
- return cpu;
-
- return -1;
- }
Summary
RT task是一种对响应延时比较敏感的task。但是cpu里面总是会有多个task在run,为了区分,只能采用优先级的概念。优先级高的优先run,优先级低的后run。RT对task的管理采用类似二维数组的方式,纵向根据优先级排,横向的为相同优先级的task。相同优先级的则按照入队顺序支行。在选择cpu时,则找优先级最低的cpu。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。