赞
踩
这一部分涉及到修改xv6内核的一些代码,包括添加一些系统调用的函数。系统调用涉及到如何从用户空间进入内核空间,因此首先要了解一个系统调用的流程。
按照流程,首先要切换到可以工作的根目录,也就是实验二的。但是如果之前没有commit实验一的代码,可能再切换回来就找不到了,因此在用git在切换时也会提示你要commit你的修改。其实也可以直接就在实验一的基础上做,只需要在/user文件夹里加上trace.c和sysinfotest.c两个缺乏的文件即可。
$ git fetch
$ git checkout syscall
$ make clean
user/user.h: 所有系统调用(system calls)的函数都在这里被声明
user/usys.S: 系统调用的函数使用ecall 指令,调用到内核态,这部分是汇编代码
user/usys.pl 这里使用entry宏来使用汇编直接实现调用(可以对照usys.S对应部分来看)
kernel/syscall.h 定义了所有系统调用函数的编号
kernel/syscall.c 内核中找系统调用编号,以及使用proc()来处理相应系统调用
kernel/sysproc.c 到达真正发挥作用的函数处,执行具体内核操作
大概流程就是按照上面文件的顺序来执行的,也就是用户发起一个系统调用,然后CPU使用eall指令跳转到内核的syscall中查找对应系统调用编号与对应函数,并跳转到系统调用函数被定义的地方来执行。
$ trace 32 grep hello README
//这里的32就是mask(十进制),转化为三十二位的二进制就是0000 0000 ... 0010 0000
//第六位被置1了,查询kernel/syscall.h,里面第五个系统调用就是read
//因此就是追踪read这个系统调用
$ trace 2147483647 grep hello README
//二进制低31位全部置[11111111111111111111111111111111]就是2147483647
//因此说明要跟踪所有的系统调用
输出顺序就是 “pid: 调用名称 -> 返回值”,运行结果如下:
$ trace 32 grep hello README //跟踪read系统调用 4: syscall trace -> 0 4: syscall exec -> 3 4: syscall open -> 3 4: syscall read -> 1023 4: syscall read -> 966 4: syscall read -> 70 4: syscall read -> 0 4: syscall close -> 0 $ trace 2147483647 grep hello README //跟踪所有系统调用 4: syscall trace -> 0 4: syscall exec -> 3 4: syscall open -> 3 4: syscall read -> 1023 4: syscall read -> 966 4: syscall read -> 70 4: syscall read -> 0 4: syscall close -> 0
所以接下来就按照上面系统调用的流程来写代码
## user.h
// system calls
int fork(void);
int exit(int) __attribute__((noreturn));
int wait(int*);
...
int uptime(void);
int trace(int); //添加在这里即可
## usys.pl
entry("fork");
entry("exit");
...
entry("uptime");
entry("trace");
entry("trace"); //添加在这里即可
对于user/usys.S,这里在编译后会自动添加相应代码,就是entry宏里的,可以不用添加。
.global trace
trace:
li a7, SYS_trace
ecall
ret
如果你的user文件夹里没有trace.c,记得添加一个,因为编译时候会用到。相应的文件,代码如下:
## trace.c #include "kernel/param.h" #include "kernel/types.h" #include "kernel/stat.h" #include "user/user.h" int main(int argc, char *argv[]) { int i; //存储待跟踪程序的名称和参数 char *nargv[MAXARG]; //保证trace的参数不少于三个,并且跟踪的系统调用号在0-99之间 if(argc < 3 || (argv[1][0] < '0' || argv[1][0] > '9')){ fprintf(2, "Usage: %s mask command\n", argv[0]); exit(1); } //调用trace系统调用,传入待跟踪系统调用号 if (trace(atoi(argv[1])) < 0) { fprintf(2, "%s: trace failed\n", argv[0]); exit(1); } //保存待跟踪程序的名称和参数 for(i = 2; i < argc && i < MAXARG; i++){ nargv[i-2] = argv[i]; } //运行待跟踪的程序 exec(nargv[0], nargv); exit(0); }
首先是添加系统调用的编号,直接加在最后一行即可。
## syscall.h
// System call numbers
#define SYS_fork 1
#define SYS_exit 2h
#define SYS_wait 3
...
#define SYS_close 21
#define SYS_trace 22 //加在这里
## syscall.c //这里需要声明sys_trace(void),来自于proc.h头文件 extern uint64 sys_chdir(void); extern uint64 sys_close(void); ... extern uint64 sys_uptime(void); extern uint64 sys_trace(void);//添加到最后一行 // 新加一个数组syscall_names[num]: 从 syscall 编号到 syscall 名的映射表 const char *syscall_names[] = { [SYS_fork] "fork", [SYS_exit] "exit", [SYS_wait] "wait", [SYS_pipe] "pipe", [SYS_read] "read", [SYS_kill] "kill", [SYS_exec] "exec", [SYS_fstat] "fstat", [SYS_chdir] "chdir", [SYS_dup] "dup", [SYS_getpid] "getpid", [SYS_sbrk] "sbrk", [SYS_sleep] "sleep", [SYS_uptime] "uptime", [SYS_open] "open", [SYS_write] "write", [SYS_mknod] "mknod", [SYS_unlink] "unlink", [SYS_link] "link", [SYS_mkdir] "mkdir", [SYS_close] "close", [SYS_trace] "trace", }; //只需要加一个判断,是否调用了trace(p->syscall_trace),然后打印当前进程使用的系统调用即可 void syscall(void) { int num; struct proc *p = myproc(); num = p->trapframe->a7; if(num > 0 && num < NELEM(syscalls) && syscalls[num]) { p->trapframe->a0 = syscalls[num](); //查验现在的num调用的哪个系统调用,然后将相应的信息打印出来 if((p->syscall_trace >> num) & 1) { printf("%d: syscall %s -> %d\n",p->pid, syscall_names[num], p->trapframe->a0); } } else { printf("%d %s: unknown sys call %d\n", p->pid, p->name, num); p->trapframe->a0 = -1; } }
接下来给proc结构体加一个变量,用来获取trace传进来的mask
## proc.h // Per-process state struct proc { struct spinlock lock; // p->lock must be held when using these: enum procstate state; // Process state struct proc *parent; // Parent process void *chan; // If non-zero, sleeping on chan int killed; // If non-zero, have been killed int xstate; // Exit status to be returned to parent's wait int pid; // Process ID // these are private to the process, so p->lock need not be held. uint64 kstack; // Virtual address of kernel stack uint64 sz; // Size of process memory (bytes) pagetable_t pagetable; // User page table struct trapframe *trapframe; // data page for trampoline.S struct context context; // swtch() here to run process struct file *ofile[NOFILE]; // Open files struct inode *cwd; // Current directory char name[16]; // Process name (debugging) uint64 syscall_trace; //添加一个变量来存储传入的mask值 };
申请空间并创建proc()的结构体后,需要给syscall_trace赋值,默认赋值为0
## proc.c
static struct proc*
allocproc(void)
{
struct proc *p;
...
memset(&p->context, 0, sizeof(p->context));
p->context.ra = (uint64)forkret;
p->context.sp = p->kstack + PGSIZE;
p->syscall_trace = 0; //这里初始化为0
return p;
}
需要特别注意,fork()创建子进程后,子进程也需要继承父进程的mask值,不然子进程是没法trace的,因此在proc.c中也需要修改fork()函数,如下所示:
int fork(void) { int i, pid; struct proc *np; struct proc *p = myproc(); ... safestrcpy(np->name, p->name, sizeof(p->name)); np->syscall_trace = p->syscall_trace; //在这里把父进程的mask获取到 pid = np->pid; np->state = RUNNABLE; release(&np->lock); return pid; }
然后再sysproc的结构体中将syscall_trace赋值为mask
## sysproc.c
//给当前进程结构体中的syscall_trace赋值
uint64
sys_trace(void)
{
int mask;
if(argint(0,&mask) < 0)
return -1;
myproc()->syscall_trace |= mask;
return 0;
}
UPROGS=\
$U/_cat\
$U/_echo\
$U/_forktest\
$U/_grep\
...
$U/_find\
$U/_xargs\
$U/_trace\ //添加进Makefile
jimmy@ubuntu:~/xv6-labs-2020$ make qemu qemu-system-riscv64 -machine virt -bios none -kernel kernel/kernel -m 128M -smp 3 -nographic -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 xv6 kernel is booting hart 1 starting hart 2 starting init: starting sh $ trace 32 grep hello README 3: syscall read -> 1023 3: syscall read -> 966 3: syscall read -> 70 3: syscall read -> 0 $ trace 2147483647 grep hello README 4: syscall trace -> 0 4: syscall exec -> 3 4: syscall open -> 3 4: syscall read -> 1023 4: syscall read -> 966 4: syscall read -> 70 4: syscall read -> 0 4: syscall close -> 0
可以看到,测试成功了。
## kalloc.c uint64 count_free_mem(void) { struct run *r; //给内存分配上锁,禁止访问,防止出现冲突 acquire(&kmem.lock); //获取空的页表数量r,剩余内存大小就是 页表数量×PGSIZE uint64 mem_bytes = 0; r = kmem.freelist; //循环读页表列表,并且给mem_bytes赋值 while(r){ mem_bytes += PGSIZE; r = r->next; } //解锁,返回剩余内存大小 release(&kmem.lock); return mem_bytes; }
count_free_mem基本上参考kalloc函数来推断每个变量的意思,以及如何获取页表列表。
## porc.c
uint64
count_process(void)
{
struct proc *p;
uint64 process_num = 0;
//循环读所有的进程,并且查看其状态,可用进程就让process_num,然后返回
for(p = proc; p < &proc[NPROC]; p++){
if(p->state != UNUSED)
process_num++;
}
return process_num;
}
## syscall.h
// System call numbers
#define SYS_fork 1
#define SYS_exit 2
...
#define SYS_close 21
#define SYS_trace 22
#define SYS_sysinfo 23 //添加在末尾即可
## syscall.c extern uint64 sys_chdir(void); extern uint64 sys_close(void); ... extern uint64 sys_trace(void); extern uint64 sys_sysinfo(void); ... static uint64 (*syscalls[])(void) = { [SYS_fork] sys_fork, [SYS_exit] sys_exit, ... [SYS_trace] sys_trace, [SYS_sysinfo] sys_sysinfo, }; const char *syscall_names[] = { [SYS_fork] "fork", [SYS_exit] "exit", ... [SYS_trace] "trace", [SYS_sysinfo] "sysinfo", };
##defs.h
// kalloc.c
void* kalloc(void);
void kfree(void *);
void kinit(void);
uint64 count_free_mem(void); //这里声明获取剩余内存大小的函数
...
// proc.c
int cpuid(void);
void exit(int);
...
void procdump(void);
uint64 count_process(void); //这里声明获取进程数的函数
struct sysinfo {
uint64 freemem; // amount of free memory (bytes)
uint64 nproc; // number of process
};
## sysproc.h #include "sysinfo.h" uint64 sys_sysinfo(void) { // 从用户态读入一个指针,作为存放 sysinfo 结构的缓冲区 uint64 addr; if(argaddr(0, &addr)<0) return -1; struct sysinfo sinfo; sinfo.freemem = count_free_mem(); sinfo.nproc = count_process(); // copyout函数存在于vm.c中 // 用于结合当前进程的页表,获得进程传进来的指针(逻辑地址)对应的物理地址 // 然后将 &sinfo 中的数据复制到该指针所指位置,供用户进程使用。 if(copyout(myproc()->pagetable, addr, (char *)&sinfo, sizeof(sinfo)) < 0) return -1; return 0; }
##user.h
// system calls
int fork(void);
int exit(int) __attribute__((noreturn));
...
int trace(int);
struct sysinfo; //要声明这个结构体
int sysinfo(struct sysinfo *); //声明对应函数
##usys.pl
entry("fork");
entry("exit");
...
entry("trace");
entry("sysinfo"); //添加在这里
#include "kernel/types.h" #include "kernel/riscv.h" #include "kernel/sysinfo.h" #include "user/user.h" void sinfo(struct sysinfo *info) { if (sysinfo(info) < 0) { printf("FAIL: sysinfo failed"); exit(1); } } // // use sbrk() to count how many free physical memory pages there are. // int countfree() { uint64 sz0 = (uint64)sbrk(0); struct sysinfo info; int n = 0; while(1){ if((uint64)sbrk(PGSIZE) == 0xffffffffffffffff){ break; } n += PGSIZE; } sinfo(&info); if (info.freemem != 0) { printf("FAIL: there is no free mem, but sysinfo.freemem=%d\n", info.freemem); exit(1); } sbrk(-((uint64)sbrk(0) - sz0)); return n; } void testmem() { struct sysinfo info; uint64 n = countfree(); sinfo(&info); if (info.freemem!= n) { printf("FAIL: free mem %d (bytes) instead of %d\n", info.freemem, n); exit(1); } if((uint64)sbrk(PGSIZE) == 0xffffffffffffffff){ printf("sbrk failed"); exit(1); } sinfo(&info); if (info.freemem != n-PGSIZE) { printf("FAIL: free mem %d (bytes) instead of %d\n", n-PGSIZE, info.freemem); exit(1); } if((uint64)sbrk(-PGSIZE) == 0xffffffffffffffff){ printf("sbrk failed"); exit(1); } sinfo(&info); if (info.freemem != n) { printf("FAIL: free mem %d (bytes) instead of %d\n", n, info.freemem); exit(1); } } void testcall() { struct sysinfo info; if (sysinfo(&info) < 0) { printf("FAIL: sysinfo failed\n"); exit(1); } if (sysinfo((struct sysinfo *) 0xeaeb0b5b00002f5e) != 0xffffffffffffffff) { printf("FAIL: sysinfo succeeded with bad argument\n"); exit(1); } } void testproc() { struct sysinfo info; uint64 nproc; int status; int pid; sinfo(&info); nproc = info.nproc; pid = fork(); if(pid < 0){ printf("sysinfotest: fork failed\n"); exit(1); } if(pid == 0){ sinfo(&info); if(info.nproc != nproc+1) { printf("sysinfotest: FAIL nproc is %d instead of %d\n", info.nproc, nproc+1); exit(1); } exit(0); } wait(&status); sinfo(&info); if(info.nproc != nproc) { printf("sysinfotest: FAIL nproc is %d instead of %d\n", info.nproc, nproc); exit(1); } } int main(int argc, char *argv[]) { printf("sysinfotest: 0 start\n"); testcall(); printf("sysinfotest: 1\n"); testmem(); printf("sysinfotest: 2\n"); testproc(); printf("sysinfotest: OK\n"); exit(0); }
写到这里,再回顾开头写的系统调用流程,在用户态定义在用户态的系统调用的名称(user.h),然后定义好entry,进入内核的syscall.c,这里是用户态与内核态对应函数的一个转换处。接下来进入内核sysproc.c中寻找对应的函数,获取结果,然后返回结果。
UPROGS=\
$U/_cat\
$U/_echo\
$U/_forktest\
$U/_grep\
...
$U/_find\
$U/_xargs\
$U/_trace\
$U/_sysinfotest\//添加进Makefile
jimmy@ubuntu:~/xv6-labs-2020$ make qemu
qemu-system-riscv64 -machine virt -bios none -kernel kernel/kernel -m 128M -smp 3 -nographic -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0
xv6 kernel is booting
hart 1 starting
hart 2 starting
init: starting sh
$ sysinfotest
sysinfotest: 0 start
sysinfotest: 1
sysinfotest: 2
sysinfotest: OK
出现这样的结果,说明实验成功了。
这个实验主要在于理解系统调用函数如何从用户态进入内核态,然后执行功能,返回结果的过程。
[1]: https://pdos.csail.mit.edu/6.S081/2020/labs/syscall.html
[2]: https://blog.miigon.net/posts/s081-lab2-system-calls/
[3]: https://zhuanlan.zhihu.com/p/624091268
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。