1、大家应该都知道到Linux内核的用于进程调度的主要函数就是schedule函数,当然要进行进程调度,有许多条件需要满足,现在假设所有的条件都已经满足,要进行调度了。
/*
* schedule() is the main scheduler function.*/
asmlinkage void __sched schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
int cpu;
need_resched:
........
#ifdef CONFIG_SMP
.........
#endif
if (unlikely(!rq->nr_running))
idle_balance(cpu, rq);
prev->sched_class->put_prev_task(rq, prev);
next = pick_next_task(rq, prev);应该是选择出下一个需要运行的进程,当然很复杂,与进程的三种分类和优先级都有关系,就不细讲了。
if (likely(prev != next)) {
sched_info_switch(prev, next);
rq->nr_switches++;
rq->curr = next;
++*switch_count;
context_switch(rq, prev, next); /* unlocks the rq */
我们重点讲这个函数,先列出它的源码:如下所示:
/*
* context_switch - switch to the new MM and the new
* thread's register state.看注释,即可知道这个函数的主要作用,就是切换MM(内存管理方面的)和thread(CPU此时的状态)
*/
static inline void
context_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);对ARM来说是空函数
trace_sched_switch(rq, prev, next);
mm = next->mm;
oldmm = prev->active_mm;
/*
* For paravirt, this is coupled with an exit in switch_to to
* combine the page table reload and the switch backend into
* one hypercall.
*/
arch_enter_lazy_cpu_mode();
if (unlikely(!mm)) {
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next);
} else
switch_mm(oldmm, mm, next);
数据结构mm_struct描述着一个地址空间,每个进程控制块中有两个mm_struct结构指针。一个是mm,指向描述着本进程运行空间的mm_struct结构,如果mm为NULL,就表示本进程是个线程。而active_mm则指向进程或线程实际使用的空间,如果是线程就指向其所“挂靠”进程的空间。“内核线程”则使用系统空间,其指针active_mm指向描述这内核空间的mm_struct结构。下面是struct task_struct结构中的一小段
struct list_head tasks;
struct mm_struct *mm, *active_mm;
/* task state */
struct linux_binfmt *binfmt;
int exit_state;
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies */
/* ??? */
unsigned int personality;
unsigned did_exec:1;
pid_t pid;
pid_t tgid;
现在列出switch_mm函数的源码,如下所示:
/*
* This is the actual mm switch as far as the scheduler
* is concerned. No registers are touched. We avoid
* calling the CPU specific function when the mm hasn't
* actually changed.
*/
static inline void
switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
#ifdef CONFIG_MMU
unsigned int cpu = smp_processor_id();
#ifdef CONFIG_SMP
/* check for possible thread migration */
if (!cpus_empty(next->cpu_vm_mask) && !cpu_isset(cpu, next->cpu_vm_mask))
__flush_icache_all();
#endif
if (!cpu_test_and_set(cpu, next->cpu_vm_mask) || prev != next) {
check_context(next);
cpu_switch_mm(next->pgd, next);
切换空间,实际上就是换一套页面映射目录和映射表。看下这个宏的展开:
#define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm)
#define cpu_do_switch_mm(pgd,mm)processor.switch_mm(pgd,mm)
出现了一个新的结构体
/*
* Don't change this structure - ASM code
* relies on it.
*/
extern struct processor {
/* MISC
* get data abort address/flags
*/
void (*_data_abort)(unsigned long pc);
/*
* Retrieve prefetch fault address
*/
unsigned long (*_prefetch_abort)(unsigned long lr);
/*
* Set up any processor specifics
*/
void (*_proc_init)(void);
/*
* Disable any processor specifics
*/
void (*_proc_fin)(void);
/*
* Special stuff for a reset
*/
void (*reset)(unsigned long addr) __attribute__((noreturn));
/*
* Idle the processor
*/
int (*_do_idle)(void);
/*
* Processor architecture specific
*/
/*
* clean a virtual address range from the
* D-cache without flushing the cache.
*/
void (*dcache_clean_area)(void *addr, int size);
/*
* Set the page table
*/
void (*switch_mm)(unsigned long pgd_phys, struct mm_struct *mm);
/*
* Set a possibly extended PTE. Non-extended PTEs should
* ignore 'ext'.
*/
void (*set_pte_ext)(pte_t *ptep, pte_t pte, unsigned int ext);
} processor;
一看就知道这个结构应该和具体CPU有关,而却成员都是函数指针,那么这些函数指针在你赋值的呢?
举个例子,如下所示:在文件linux/arch/arm/mm/proc-sa110.S有如下定义: