Operating System (5) : 用户进程管理
May 25, 2016
了解用户态进程创建、执行、切换和结束的动态管理过程,了解在用户态通过系统调用得到内核态的内核服务的过程。
概要
- 建立用户进程的关键信息
- 实现用户进程管理
- 分析进程和内存管理的关系
- 实现系统调用的处理过程
kern/process/proc.c
/* load_icode - load the content of binary program(ELF format) as the new content of current process
* @binary: the memory addr of the content of binary program
* @size: the size of the content of binary program
*/
static int
load_icode(unsigned char *binary, size_t size) {
if (current->mm != NULL) {
panic("load_icode: current->mm must be empty.\n");
}
int ret = -E_NO_MEM;
struct mm_struct *mm;
//(1) create a new mm for current process
if ((mm = mm_create()) == NULL) {
goto bad_mm;
}
//(2) create a new PDT, and mm->pgdir= kernel virtual addr of PDT
if (setup_pgdir(mm) != 0) {
goto bad_pgdir_cleanup_mm;
}
//(3) copy TEXT/DATA section, build BSS parts in binary to memory space of process
struct Page *page;
//(3.1) get the file header of the bianry program (ELF format)
struct elfhdr *elf = (struct elfhdr *)binary;
//(3.2) get the entry of the program section headers of the bianry program (ELF format)
struct proghdr *ph = (struct proghdr *)(binary + elf->e_phoff);
//(3.3) This program is valid?
if (elf->e_magic != ELF_MAGIC) {
ret = -E_INVAL_ELF;
goto bad_elf_cleanup_pgdir;
}
uint32_t vm_flags, perm;
struct proghdr *ph_end = ph + elf->e_phnum;
for (; ph < ph_end; ph ++) {
//(3.4) find every program section headers
if (ph->p_type != ELF_PT_LOAD) {
continue ;
}
if (ph->p_filesz > ph->p_memsz) {
ret = -E_INVAL_ELF;
goto bad_cleanup_mmap;
}
if (ph->p_filesz == 0) {
continue ;
}
//(3.5) call mm_map fun to setup the new vma ( ph->p_va, ph->p_memsz)
vm_flags = 0, perm = PTE_U;
if (ph->p_flags & ELF_PF_X) vm_flags |= VM_EXEC;
if (ph->p_flags & ELF_PF_W) vm_flags |= VM_WRITE;
if (ph->p_flags & ELF_PF_R) vm_flags |= VM_READ;
if (vm_flags & VM_WRITE) perm |= PTE_W;
if ((ret = mm_map(mm, ph->p_va, ph->p_memsz, vm_flags, NULL)) != 0) {
goto bad_cleanup_mmap;
}
unsigned char *from = binary + ph->p_offset;
size_t off, size;
uintptr_t start = ph->p_va, end, la = ROUNDDOWN(start, PGSIZE);
ret = -E_NO_MEM;
//(3.6) alloc memory, and copy the contents of every program section (from, from+end) to process's memory (la, la+end)
end = ph->p_va + ph->p_filesz;
//(3.6.1) copy TEXT/DATA section of bianry program
while (start < end) {
if ((page = pgdir_alloc_page(mm->pgdir, la, perm)) == NULL) {
goto bad_cleanup_mmap;
}
off = start - la, size = PGSIZE - off, la += PGSIZE;
if (end < la) {
size -= la - end;
}
memcpy(page2kva(page) + off, from, size);
start += size, from += size;
}
//(3.6.2) build BSS section of binary program
end = ph->p_va + ph->p_memsz;
if (start < la) {
/* ph->p_memsz == ph->p_filesz */
if (start == end) {
continue ;
}
off = start + PGSIZE - la, size = PGSIZE - off;
if (end < la) {
size -= la - end;
}
memset(page2kva(page) + off, 0, size);
start += size;
assert((end < la && start == end) || (end >= la && start == la));
}
while (start < end) {
if ((page = pgdir_alloc_page(mm->pgdir, la, perm)) == NULL) {
goto bad_cleanup_mmap;
}
off = start - la, size = PGSIZE - off, la += PGSIZE;
if (end < la) {
size -= la - end;
}
memset(page2kva(page) + off, 0, size);
start += size;
}
}
//(4) build user stack memory
vm_flags = VM_READ | VM_WRITE | VM_STACK;
if ((ret = mm_map(mm, USTACKTOP - USTACKSIZE, USTACKSIZE, vm_flags, NULL)) != 0) {
goto bad_cleanup_mmap;
}
assert(pgdir_alloc_page(mm->pgdir, USTACKTOP-PGSIZE , PTE_USER) != NULL);
assert(pgdir_alloc_page(mm->pgdir, USTACKTOP-2*PGSIZE , PTE_USER) != NULL);
assert(pgdir_alloc_page(mm->pgdir, USTACKTOP-3*PGSIZE , PTE_USER) != NULL);
assert(pgdir_alloc_page(mm->pgdir, USTACKTOP-4*PGSIZE , PTE_USER) != NULL);
//(5) set current process's mm, sr3, and set CR3 reg = physical addr of Page Directory
mm_count_inc(mm);
current->mm = mm;
current->cr3 = PADDR(mm->pgdir);
lcr3(PADDR(mm->pgdir));
//(6) setup trapframe for user environment
struct trapframe *tf = current->tf;
memset(tf, 0, sizeof(struct trapframe));
/* LAB5:EXERCISE1
* should set tf_cs,tf_ds,tf_es,tf_ss,tf_esp,tf_eip,tf_eflags
* NOTICE: If we set trapframe correctly, then the user level process can return to USER MODE from kernel. So
* tf_cs should be USER_CS segment (see memlayout.h)
* tf_ds=tf_es=tf_ss should be USER_DS segment
* tf_esp should be the top addr of user stack (USTACKTOP)
* tf_eip should be the entry point of this binary program (elf->e_entry)
* tf_eflags should be set to enable computer to produce Interrupt
*/
tf->tf_cs = USER_CS;
tf->tf_ds = tf->tf_es = tf->tf_ss = USER_DS;
tf->tf_esp = USTACKTOP;
tf->tf_eip = elf->e_entry;
tf->tf_eflags = FL_IF;
ret = 0;
out:
return ret;
bad_cleanup_mmap:
exit_mmap(mm);
bad_elf_cleanup_pgdir:
put_pgdir(mm);
bad_pgdir_cleanup_mm:
mm_destroy(mm);
bad_mm:
goto out;
}
kern/mm/pmm.c
/* copy_range - copy content of memory (start, end) of one process A to another process B
* @to: the addr of process B's Page Directory
* @from: the addr of process A's Page Directory
* @share: flags to indicate to dup OR share. We just use dup method, so it didn't be used.
*
* CALL GRAPH: copy_mm-->dup_mmap-->copy_range
*/
int
copy_range(pde_t *to, pde_t *from, uintptr_t start, uintptr_t end, bool share) {
assert(start % PGSIZE == 0 && end % PGSIZE == 0);
assert(USER_ACCESS(start, end));
// copy content by page unit.
do {
//call get_pte to find process A's pte according to the addr start
pte_t *ptep = get_pte(from, start, 0), *nptep;
if (ptep == NULL) {
start = ROUNDDOWN(start + PTSIZE, PTSIZE);
continue ;
}
//call get_pte to find process B's pte according to the addr start. If pte is NULL, just alloc a PT
if (*ptep & PTE_P) {
if ((nptep = get_pte(to, start, 1)) == NULL) {
return -E_NO_MEM;
}
uint32_t perm = (*ptep & PTE_USER);
//get page from ptep
struct Page *page = pte2page(*ptep);
// alloc a page for process B
struct Page *npage=alloc_page();
assert(page!=NULL);
assert(npage!=NULL);
int ret=0;
/* LAB5:EXERCISE2
* replicate content of page to npage, build the map of phy addr of nage with the linear addr start
*
* Some Useful MACROs and DEFINEs, you can use them in below implementation.
* MACROs or Functions:
* page2kva(struct Page *page): return the kernel vritual addr of memory which page managed (SEE pmm.h)
* page_insert: build the map of phy addr of an Page with the linear addr la
* memcpy: typical memory copy function
*
* (1) find src_kvaddr: the kernel virtual address of page
* (2) find dst_kvaddr: the kernel virtual address of npage
* (3) memory copy from src_kvaddr to dst_kvaddr, size is PGSIZE
* (4) build the map of phy addr of nage with the linear addr start
*/
void *src_kvaddr = page2kva(page);
void *dst_kvaddr = page2kva(npage);
memcpy(dst_kvaddr, src_kvaddr, PGSIZE);
page_insert(to, npage, start, perm);
assert(ret == 0);
}
start += PGSIZE;
} while (start != 0 && start < end);
return 0;
}
kern/trap/trap.c
static void
trap_dispatch(struct trapframe *tf) {
char c;
int ret=0;
switch (tf->tf_trapno) {
case T_PGFLT: //page fault
if ((ret = pgfault_handler(tf)) != 0) {
print_trapframe(tf);
if (current == NULL) {
panic("handle pgfault failed. ret=%d\n", ret);
}
else {
if (trap_in_kernel(tf)) {
panic("handle pgfault failed in kernel mode. ret=%d\n", ret);
}
cprintf("killed by kernel.\n");
panic("handle user mode pgfault failed. ret=%d\n", ret);
do_exit(-E_KILLED);
}
}
break;
case T_SYSCALL:
syscall();
break;
case IRQ_OFFSET + IRQ_TIMER:
#if 0
LAB3 : If some page replacement algorithm(such as CLOCK PRA) need tick to change the priority of pages,
then you can add code here.
#endif
/* LAB1 : STEP 3 */
/* handle the timer interrupt */
/* (1) After a timer interrupt, you should record this event using a global variable (increase it), such as ticks in kern/driver/clock.c
* (2) Every TICK_NUM cycle, you can print some info using a funciton, such as print_ticks().
* (3) Too Simple? Yes, I think so!
*/
ticks = (ticks + 1) % TICK_NUM;
if (ticks == 0) {
print_ticks();
/* LAB5 */
/* you should upate you lab1 code (just add ONE or TWO lines of code):
* Every TICK_NUM cycle, you should set current process's current->need_resched = 1
*/
current->need_resched = 1;
}
break;
case IRQ_OFFSET + IRQ_COM1:
c = cons_getc();
cprintf("serial [%03d] %c\n", c, c);
break;
case IRQ_OFFSET + IRQ_KBD:
c = cons_getc();
cprintf("kbd [%03d] %c\n", c, c);
break;
//LAB1 CHALLENGE 1 :
case T_SWITCH_TOU:
case T_SWITCH_TOK:
panic("T_SWITCH_** ??\n");
break;
case IRQ_OFFSET + IRQ_IDE1:
case IRQ_OFFSET + IRQ_IDE2:
/* do nothing */
break;
default:
print_trapframe(tf);
if (current != NULL) {
cprintf("unhandled trap.\n");
do_exit(-E_KILLED);
}
// in kernel, it must be a mistake
panic("unexpected trap in kernel.\n");
}
}
1: 加载应用程序并执行
设置好tf的各个成员变量
-
描述当创建一个用户态进程并加载了应用程序后,CPU是如何让这个应用程序最终在用户态执行起来的。即这个用户态进程被ucore选择占用CPU执行(RUNNING态)到具体执行应用程序第一条指令的整个经过
设置用户堆栈,建立虚拟地址与物理地址的映射,建立页表。修改trapframe,即这一步中的tf,tf相应的值被设置成了用户态的值,这样在iret指令返回之后就会成为用户态的进程进行执行
2: 父进程复制自己的内存空间给子进程
使用 page2kva 得到需要进行拷贝的地址 src 和 des ,通过 memcpy 对其进行复制,再用 page_insert 建立物理页与虚拟页的映射
-
简要说明如何设计实现 Copy on Write 机制,给出概要设计
在父进程创建子进程时,进行拷贝时直接拷贝页的地址指针,但把空间设置为只读。当进行写操作时会产生一个页异常中断,这时进行拷贝工作即可
3: 阅读分析源代码,理解进程执行 fork/exec/wait/exit 的实现,以及系统调用的实现
fork/exec/wait/exit函数分别对应了系统调用SYSfork、SYSexec、SYSwait、 SYSexit;系统调用实现是通过产生特定中断,在syscall函数中调用该中断trapframe中保存的eax值所对应的syscalls中的函数指针
-
fork/exec/wait/exit在实现中是如何影响进程的执行状态的?
fork可以产生拷贝自己的子进程;exec使用load_icode复制执行进程;wait使当前进程进入子进程或者IO操作的状态;exit可以回收当前进程资源,并进入僵尸模式等待父进程回收
-
ucore中一个用户态进程的执行状态生命周期图
进入就绪队列 -> 就绪 -> 执行 -> 就绪 -> … -> 执行 -> 退出