xv6进程调度细节

第一个用户进程

当xv6被加载到qemu后，执行的第一段代码如下所示，就是为每个cpu分配一个运行栈，即stack0，然后跳转到start函数

    # qemu -kernel loads the kernel at 0x80000000
        # and causes each CPU to jump there.
        # kernel.ld causes the following code to
        # be placed at 0x80000000.
.section .text
_entry:
    # set up a stack for C.
        # stack0 is declared in start.c,
        # with a 4096-byte stack per CPU.
        # sp = stack0 + (hartid * 4096)
        la sp, stack0
        li a0, 1024*4
    csrr a1, mhartid
        addi a1, a1, 1
        mul a0, a0, a1
        add sp, sp, a0
    # jump to start() in start.c
        call start
spin:
        j spin

在start函数内，xv6处于machine mode，在这里会进行一些初始化工作，最重要的是设置mepc寄存器的地址为main，然后通过mret指令跳转到main函数开始执行。

// entry.S jumps here in machine mode on stack0.
void
start()
{
  //    ...    初始化工作
  w_mepc((uint64)main);
  //    ...    初始化工作
  // switch to supervisor mode and jump to main().
  asm volatile("mret");
}

在main函数里，真正开始进行一系列初始化操作，包括物理内存分配器初始化、内核页表映射、开启分页模式等一系列工作，最后会调用userinit创建第一个user process。

// start() jumps here in supervisor mode on all CPUs.
void
main()
{
  if(cpuid() == 0){
    // ... 一系列初始化工作

    userinit();      // first user process
    __sync_synchronize();
    started = 1;
  } else {
     // ... 其他cpu核的初始化工作
  }

  scheduler();     // 这就是调度线程的开始   
}

userinit代码如下：

// Set up first user process.
void
userinit(void)
{
  struct proc *p;

  p = allocproc(); // 分配一个UNUSED状态的proc
  initproc = p;

  // ... 一些初始化工作
  p->state = RUNNABLE; // 设置为RUNNABLE才能被调度器线程调度到
}

在userinit里会调用allocproc，allocproc里有非常关键的几行代码

static struct proc*
allocproc(void)
{
  struct proc *p;

 // ... 遍历proc table找到一个UNUSED的proc

found:
  // ... 一些初始化工作

  // Set up new context to start executing at forkret,
  // which returns to user space.
  memset(&p->context, 0, sizeof(p->context));
  p->context.ra = (uint64)forkret;
  p->context.sp = p->kstack + PGSIZE;

  return p;
}

可以看到最后会设置p->context的ra字段和sp字段，前者将返回地址指向了forkret函数，后者保存了这个进程的内核栈位置。

// A fork child's very first scheduling by scheduler()
// will swtch to forkret.
void
forkret(void)
{
  static int first = 1;

  // Still holding p->lock from scheduler.
  release(&myproc()->lock);

  if (first) {
    // File system initialization must be run in the context of a
    // regular process (e.g., because it calls sleep), and thus cannot
    // be run from main().
    first = 0;
    fsinit(ROOTDEV);
  }

  usertrapret();
}

可以看到forkret实际上就是调用了usertrapret。由于系统刚刚初始化，实际上没有任何用户进程在之前运行过，因此这里实际上是伪装成一次用户进程陷入内核又回到用户态的操作。在usertrapret里会为这个用户进程一些关键的值，从而确保在下一次真正"trap"时能正确进入内核态。

//
// return to user space
//
void
usertrapret(void)
{
  struct proc *p = myproc();

  // we're about to switch the destination of traps from
  // kerneltrap() to usertrap(), so turn off interrupts until
  // we're back in user space, where usertrap() is correct.
  intr_off();

  // send syscalls, interrupts, and exceptions to trampoline.S
  w_stvec(TRAMPOLINE + (uservec - trampoline));

  // set up trapframe values that uservec will need when
  // the process next re-enters the kernel.
  p->trapframe->kernel_satp = r_satp();         // kernel page table
  p->trapframe->kernel_sp = p->kstack + PGSIZE; // process's kernel stack
  p->trapframe->kernel_trap = (uint64)usertrap;
  p->trapframe->kernel_hartid = r_tp();         // hartid for cpuid()

  // set up the registers that trampoline.S's sret will use
  // to get to user space.

  // set S Previous Privilege mode to User.
  unsigned long x = r_sstatus();
  x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode
  x |= SSTATUS_SPIE; // enable interrupts in user mode
  w_sstatus(x);

  // set S Exception Program Counter to the saved user pc.
  w_sepc(p->trapframe->epc);

  // tell trampoline.S the user page table to switch to.
  uint64 satp = MAKE_SATP(p->pagetable);

  // jump to trampoline.S at the top of memory, which 
  // switches to the user page table, restores user registers,
  // and switches to user mode with sret.
  uint64 fn = TRAMPOLINE + (userret - trampoline);
  ((void (*)(uint64,uint64))fn)(TRAPFRAME, satp);
}

在usertrapret执行完后，返回到main函数体里，真正开始执行scheduler函数，实际上现在就运行在xv6的调度器线程上。scheduler是一个死循环，当遍历proc table找到了一个可运行的进程后，随即调用swtch切换运行上下文。

void
scheduler(void)
{
  struct proc *p;
  struct cpu *c = mycpu();

  c->proc = 0;
  for(;;){
    // Avoid deadlock by ensuring that devices can interrupt.
    intr_on();

    int nproc = 0;
    for(p = proc; p < &proc[NPROC]; p++) {
      acquire(&p->lock);
      if(p->state != UNUSED) {
        nproc++;
      }
      if(p->state == RUNNABLE) {
        // Switch to chosen process.  It is the process's job
        // to release its lock and then reacquire it
        // before jumping back to us.
        p->state = RUNNING;
        c->proc = p;
        // 调度器线程调用swtch函数切换运行上下文，此时的ra就是下一行指令的地址，即c->proc = 0
        swtch(&c->context, &p->context); 

        // 当下次再有一个进程主动调用sched时，会切换回调度器线程，并加载调度器线程的运行上下文，
        // 于是sp寄存器指向了调度器线程的内核栈stack0，stack0上保存了调用栈帧，
        // ra寄存器保存了c->proc = 0的地址

        // Process is done running for now.
        // It should have changed its p->state before coming back.
        c->proc = 0;
      }
      release(&p->lock);
    }
    if(nproc <= 2) {   // only init and sh exist
      intr_on();
      asm volatile("wfi");
    }
  }
}

swtch是一段汇编代码，如下所示：

# Context switch
#
#   void swtch(struct context *old, struct context *new);
# 
# Save current registers in old. Load from new.    

.globl swtch
swtch:
        sd ra, 0(a0) # 保存return address
        sd sp, 8(a0) # 保护函数调用栈现场
      # ... 保存其他寄存器的值 

        ld ra, 0(a1) # 加载return address
        ld sp, 8(a1) # 恢复函数调用栈现场
      # ... 加载其他寄存器的值

        ret # 跳转回return address位置

它保存了当前线程的运行上下文，然后加载被调度线程的运行上下文，关键的寄存器还是ra和sp，前者保存了返回地址，后者指向了线程执行所在的内核栈。

前面我们已经知道第一个用户进程用forkret伪造了一个ra，因此切换运行上下文后，这个用户进程就开始在forkret里执行了，最后通过usertrapret返回了用户态，从此开始了正常生活！

之后的进程如何切换？

首先，如果也是一个新创建的用户进程，仍然会通过forkret伪造现场然后返回。这和之前没什么区别。

但如果是一个进程由于时间片用完了，陷入了内核，是怎样一种情况？实际上它会主动调用yield放弃运行：

// Give up the CPU for one scheduling round.
void
yield(void)
{
  struct proc *p = myproc();
  acquire(&p->lock);
  p->state = RUNNABLE;
  sched();
  release(&p->lock);
}

最终它会调用sched来实现运行上下文的切换。

sched会做一些防御性检查，确保内核调度的正确性，包括是否持有锁，是否关闭了中断，当前进程是否时RUNNING状态。

void
sched(void)
{
  int intena;
  struct proc *p = myproc();

  if(!holding(&p->lock))
    panic("sched p->lock");
  if(mycpu()->noff != 1)
    panic("sched locks");
  if(p->state == RUNNING)
    panic("sched running");
  if(intr_get())
    panic("sched interruptible");

  intena = mycpu()->intena;
  swtch(&p->context, &mycpu()->context);
  // 当前线程主动调用swtch后，自身的运行上下文被保存，当它被调度回来的时候
  // 其保存运行上下文中的ra寄存器指向这里
  mycpu()->intena = intena; // 运行上下文保存了sp，于是函数的调用栈被正常恢复
}

当用户进程P1的内核线程执行sched时，通过swtch(&p->context, &mycpu()->context)，保存自身运行上下文，载入调度器线程的运行上下文；切换线程后，由于调度器线程自身的运行上下文中ra寄存器存放的地址的指令是c->proc = 0，因此此时会跳转到c->proc = 0处继续执行，将c->proc置零后，通过release(&p->lock)释放P1持有的锁。进入下一轮循环后重新遍历进程表，如果一个进程P2处于RUNNABLE状态，修改P2状态为RUNNING，调用swtch(&c->context, &p->context)，加载P2上次运行结束，主动调用sched()时的运行上下文，于是P2也会返回到它的ra寄存器所指向的指令位置，然后通过保存的sp寄存器，恢复函数调用栈，然后开始执行下一行代码，好像一切都没发生过一样。

over…