context_switch(rq, prev, next); /* unlocks the rq */我们重点讲这个函数,先列出它的源码:如下所示:/* * context_switch - switch to the new MM and the new * thread"s register state.看注释,即可知道这个函数的主要作用,就是切换MM(内存管理方面的)和thread(CPU此时的状态) */ static inline void context_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);对ARM来说是空函数 trace_sched_switch(rq, prev, next); mm = next->mm; oldmm = prev->active_mm; /* * For paravirt, this is coupled with an exit in switch_to to * combine the page table reload and the switch backend into * one hypercall. */ arch_enter_lazy_cpu_mode();
if (unlikely(!mm)) { next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next); } else switch_mm(oldmm, mm, next);数据结构mm_struct描述着一个地址空间,每个进程控制块中有两个mm_struct结构指针。一个是mm,指向描述着本进程运行空间的mm_struct结构,如果mm为NULL,就表示本进程是个线程。而active_mm则指向进程或线程实际使用的空间,如果是线程就指向其所“挂靠”进程的空间。“内核线程”则使用系统空间,其指针active_mm指向描述这内核空间的mm_struct结构。下面是struct task_struct结构中的一小段struct list_head tasks;
struct mm_struct *mm, *active_mm;
/* task state */ struct linux_binfmt *binfmt; int exit_state; int exit_code, exit_signal; int pdeath_signal; /* The signal sent when the parent dies */ /* ??? */ unsigned int personality; unsigned did_exec:1; pid_t pid; pid_t tgid; 现在列出switch_mm函数的源码,如下所示:/* * This is the actual mm switch as far as the scheduler * is concerned. No registers are touched. We avoid * calling the CPU specific function when the mm hasn"t * actually changed. */ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { #ifdef CONFIG_MMU unsigned int cpu = smp_processor_id();
#ifdef CONFIG_SMP /* check for possible thread migration */ if (!cpus_empty(next->cpu_vm_mask) && !cpu_isset(cpu, next->cpu_vm_mask)) __flush_icache_all(); #endif if (!cpu_test_and_set(cpu, next->cpu_vm_mask) || prev != next) { check_context(next); cpu_switch_mm(next->pgd, next);切换空间,实际上就是换一套页面映射目录和映射表。看下这个宏的展开:#define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm)#define cpu_do_switch_mm(pgd,mm)processor.switch_mm(pgd,mm)出现了一个新的结构体/* * Don"t change this structure - ASM code * relies on it. */ extern struct processor { /* MISC * get data abort address/flags */ void (*_data_abort)(unsigned long pc); /* * Retrieve prefetch fault address */ unsigned long (*_prefetch_abort)(unsigned long lr); /* * Set up any processor specifics */ void (*_proc_init)(void); /* * Disable any processor specifics */ void (*_proc_fin)(void); /* * Special stuff for a reset */ void (*reset)(unsigned long addr) __attribute__((noreturn)); /* * Idle the processor */ int (*_do_idle)(void); /* * Processor architecture specific */ /* * clean a virtual address range from the * D-cache without flushing the cache. */ void (*dcache_clean_area)(void *addr, int size);
/* * Set the page table */ void (*switch_mm)(unsigned long pgd_phys, struct mm_struct *mm); /* * Set a possibly extended PTE. Non-extended PTEs should * ignore "ext". */ void (*set_pte_ext)(pte_t *ptep, pte_t pte, unsigned int ext); } processor;一看就知道这个结构应该和具体CPU有关,而却成员都是函数指针,那么这些函数指针在你赋值的呢?举个例子,如下所示:在文件linux/arch/arm/mm/proc-sa110.S有如下定义:.typesa110_processor_functions, #object ENTRY(sa110_processor_functions) .word v4_early_abort .word pabort_noifar .word cpu_sa110_proc_init .word cpu_sa110_proc_fin .word cpu_sa110_reset .word cpu_sa110_do_idle .word cpu_sa110_dcache_clean_area .wordcpu_sa110_switch_mm .word cpu_sa110_set_pte_ext 对应的有ENTRY(cpu_sa110_switch_mm) #ifdef CONFIG_MMU str lr, [sp, #-4]! bl v4wb_flush_kern_cache_all@ clears IP mcrp15, 0, r0, c2, c0, 0@ load page table pointer将指向新进程的首层映射表的指针写入MMU中的寄存器c2,即地址转化表基地址寄存器, mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs丢掉高速缓存中原有的地址映射表 ldr pc, [sp], #4 #else mov pc, lr #endif 这样,CPU的用户空间映射就改变了,但是对当前程序的运行没影响,因为现在CPU运行在系统空间中。注:进程的调度、切换只能在系统空间中进行。
if (cache_is_vivt()) cpu_clear(cpu, prev->cpu_vm_mask); } #endif }switch_mm函数源码到此结束。
if (unlikely(!prev->mm)) { prev->active_mm = NULL; rq->prev_mm = oldmm; } /* * Since the runqueue lock will be released by the next * task (which is an invalid locking op but in the case * of the scheduler it"s an obvious special-case), so we * do an early lockdep release here: */ #ifndef __ARCH_WANT_UNLOCKED_CTXSW spin_release(&rq->lock.dep_map, 1, _THIS_IP_); #endif
/* Here we just switch the register state and the stack. */ switch_to(prev, next, prev);
barrier(); /* * this_rq must be evaluated again because prev may have moved * CPUs since it called schedule(), thus the "rq" on its stack * frame will be invalid. */ finish_task_switch(this_rq(), prev);这一部分,下篇再说。 }context_switch到此结束。 /* * the context switch might have flipped the stack from under * us, hence refresh the local variables. */ cpu = smp_processor_id(); rq = cpu_rq(cpu); } else spin_unlock_irq(&rq->lock);
..........
preempt_enable_no_resched(); if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) goto need_resched; }schedule函数到此结束这篇写到这里,也就结束了,我们主要讲述的就是已经找到下一个要运行的进程,现在进行切换,这一篇说的主要是内存管理的切换,即mm_struct结构体。其实,它的切换就是页面目录的切换,当然,不同的处理其会不同。