ARM Linux的进程调度

1、大家应该都知道到Linux内核的用于进程调度的主要函数就是schedule函数，当然要进行进程调度，有许多条件需要满足，现在假设所有的条件都已经满足，要进行调度了。/* * schedule（） is the main scheduler function.
*/
asmlinkage void __sched schedule（void）
{
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
int cpu;

need_resched:
........

#ifdef CONFIG_SMP
.........
#endif

if （unlikely（！rq->nr_running））
idle_balance（cpu, rq）;

prev->sched_class->put_prev_task（rq, prev）;
next = pick_next_task（rq, prev）;应该是选择出下一个需要运行的进程，当然很复杂，与进程的三种分类和优先级都有关系，就不细讲了。

if （likely（prev ！= next）） {
sched_info_switch（prev, next）;

rq->nr_switches++;
rq->curr = next;
++*switch_count;

context_switch（rq, prev, next）; /* unlocks the rq */我们重点讲这个函数，先列出它的源码：如下所示：/*
* context_switch - switch to the new MM and the new
* thread"s register state.看注释，即可知道这个函数的主要作用，就是切换MM（内存管理方面的）和thread（CPU此时的状态）
*/
static inline void
context_switch（struct rq *rq, struct task_struct *prev,
struct task_struct *next）
{
struct mm_struct *mm, *oldmm;

prepare_task_switch（rq, prev, next）;对ARM来说是空函数
trace_sched_switch（rq, prev, next）;
mm = next->mm;
oldmm = prev->active_mm;
/*
* For paravirt, this is coupled with an exit in switch_to to
* combine the page table reload and the switch backend into
* one hypercall.
*/
arch_enter_lazy_cpu_mode（）;

if （unlikely（！mm）） {
next->active_mm = oldmm;
atomic_inc（&oldmm->mm_count）;
enter_lazy_tlb（oldmm, next）;
} else
switch_mm（oldmm, mm, next）;数据结构mm_struct描述着一个地址空间，每个进程控制块中有两个mm_struct结构指针。一个是mm,指向描述着本进程运行空间的mm_struct结构，如果mm为NULL，就表示本进程是个线程。而active_mm则指向进程或线程实际使用的空间，如果是线程就指向其所“挂靠”进程的空间。“内核线程”则使用系统空间，其指针active_mm指向描述这内核空间的mm_struct结构。下面是struct task_struct结构中的一小段struct list_head tasks;

struct mm_struct *mm, *active_mm;

/* task state */
struct linux_binfmt *binfmt;
int exit_state;
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies */
/* ？？？ */
unsigned int personality;
unsigned did_exec:1;
pid_t pid;
pid_t tgid;
现在列出switch_mm函数的源码，如下所示：/*
* This is the actual mm switch as far as the scheduler
* is concerned. No registers are touched. We avoid
* calling the CPU specific function when the mm hasn"t
* actually changed.
*/
static inline void
switch_mm（struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk）
{
#ifdef CONFIG_MMU
unsigned int cpu = smp_processor_id（）;

#ifdef CONFIG_SMP
/* check for possible thread migration */
if （！cpus_empty（next->cpu_vm_mask） && ！cpu_isset（cpu, next->cpu_vm_mask））
__flush_icache_all（）;
#endif
if （！cpu_test_and_set（cpu, next->cpu_vm_mask） || prev ！= next） {
check_context（next）;
cpu_switch_mm（next->pgd, next）;切换空间，实际上就是换一套页面映射目录和映射表。看下这个宏的展开：#define cpu_switch_mm（pgd,mm） cpu_do_switch_mm（virt_to_phys（pgd）,mm）#define cpu_do_switch_mm（pgd,mm）processor.switch_mm（pgd,mm）出现了一个新的结构体/*
* Don"t change this structure - ASM code
* relies on it.
*/
extern struct processor {
/* MISC
* get data abort address/flags
*/
void （*_data_abort）（unsigned long pc）;
/*
* Retrieve prefetch fault address
*/
unsigned long （*_prefetch_abort）（unsigned long lr）;
/*
* Set up any processor specifics
*/
void （*_proc_init）（void）;
/*
* Disable any processor specifics
*/
void （*_proc_fin）（void）;
/*
* Special stuff for a reset
*/
void （*reset）（unsigned long addr） __attribute__（（noreturn））;
/*
* Idle the processor
*/
int （*_do_idle）（void）;
/*
* Processor architecture specific
*/
/*
* clean a virtual address range from the
* D-cache without flushing the cache.
*/
void （*dcache_clean_area）（void *addr, int size）;

/*
* Set the page table
*/
void （*switch_mm）（unsigned long pgd_phys, struct mm_struct *mm）;
/*
* Set a possibly extended PTE. Non-extended PTEs should
* ignore "ext".
*/
void （*set_pte_ext）（pte_t *ptep, pte_t pte, unsigned int ext）;
} processor;一看就知道这个结构应该和具体CPU有关，而却成员都是函数指针，那么这些函数指针在你赋值的呢？举个例子，如下所示：在文件linux/arch/arm/mm/proc-sa110.S有如下定义：.typesa110_processor_functions, #object
ENTRY（sa110_processor_functions）
.word v4_early_abort
.word pabort_noifar
.word cpu_sa110_proc_init
.word cpu_sa110_proc_fin
.word cpu_sa110_reset
.word cpu_sa110_do_idle
.word cpu_sa110_dcache_clean_area
.wordcpu_sa110_switch_mm
.word cpu_sa110_set_pte_ext
对应的有ENTRY（cpu_sa110_switch_mm）
#ifdef CONFIG_MMU
str lr, [sp, #-4]！
bl v4wb_flush_kern_cache_all@ clears IP
mcrp15, 0, r0, c2, c0, 0@ load page table pointer将指向新进程的首层映射表的指针写入MMU中的寄存器c2，即地址转化表基地址寄存器，
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs丢掉高速缓存中原有的地址映射表
ldr pc, [sp], #4
#else
mov pc, lr
#endif
这样，CPU的用户空间映射就改变了，但是对当前程序的运行没影响，因为现在CPU运行在系统空间中。注：进程的调度、切换只能在系统空间中进行。

if （cache_is_vivt（））
cpu_clear（cpu, prev->cpu_vm_mask）;
}
#endif
}switch_mm函数源码到此结束。

if （unlikely（！prev->mm）） {
prev->active_mm = NULL;
rq->prev_mm = oldmm;
}
/*
* Since the runqueue lock will be released by the next
* task （which is an invalid locking op but in the case
* of the scheduler it"s an obvious special-case）, so we
* do an early lockdep release here:
*/
#ifndef __ARCH_WANT_UNLOCKED_CTXSW
spin_release（&rq->lock.dep_map, 1, _THIS_IP_）;
#endif

/* Here we just switch the register state and the stack. */
switch_to（prev, next, prev）;

barrier（）;
/*
* this_rq must be evaluated again because prev may have moved
* CPUs since it called schedule（）, thus the "rq" on its stack
* frame will be invalid.
*/
finish_task_switch（this_rq（）, prev）;这一部分，下篇再说。
}context_switch到此结束。
/*
* the context switch might have flipped the stack from under
* us, hence refresh the local variables.
*/
cpu = smp_processor_id（）;
rq = cpu_rq（cpu）;
} else
spin_unlock_irq（&rq->lock）;

..........

preempt_enable_no_resched（）;
if （unlikely（test_thread_flag（TIF_NEED_RESCHED）））
goto need_resched;
}schedule函数到此结束这篇写到这里，也就结束了，我们主要讲述的就是已经找到下一个要运行的进程，现在进行切换，这一篇说的主要是内存管理的切换，即mm_struct结构体。其实，它的切换就是页面目录的切换，当然，不同的处理其会不同。