当前位置:   article > 正文

arm64内核进程创建从内核态到用户态过程分析。___primary_switched

__primary_switched

1:先看看current宏的含义:

这个宏是本篇文章的思路口,内核中有一个很常用的current宏,执向当前任务结构体。

#define current get_current()

static __always_inline struct task_struct *get_current(void)
{
	unsigned long sp_el0;

	asm ("mrs %0, sp_el0" : "=r" (sp_el0));

	return (struct task_struct *)sp_el0;
}

可知sp_el0 指向task_struct。sp_el0本身是用于用户态的栈sp(异常级别EL0)。到了内核态,该寄存器被用来执行当前进程的任务结构。也就意味着,原来的sp_el0的值肯定在内核中某个地方保存了起来,不然返回用户态都找不到原来的值了。为了解答这个问题,先看看启动核的__primary_switched函数:最早的第一个进程由启动核运行的一段汇编代码。

  1. arch/arm64/kernel/head.S
  2. __primary_switched:
  3. adrp x4, init_thread_union
  4. add sp, x4, #THREAD_SIZE //SP在内核态就是SP_EL1
  5. adr_l x5, init_task
  6. msr sp_el0, x5 // Save thread_info
  7. adr_l x8, vectors // load VBAR_EL1 with virtual
  8. msr vbar_el1, x8 // vector table address
  9. isb
  10. stp xzr, x30, [sp, #-16]!
  11. mov x29, sp
  12. str_l x21, __fdt_pointer, x5 // Save FDT pointer
  13. ldr_l x4, kimage_vaddr // Save the offset between
  14. sub x4, x4, x0 // the kernel virtual and
  15. str_l x4, kimage_voffset, x5 // physical mappings
  16. // Clear BSS
  17. adr_l x0, __bss_start
  18. mov x1, xzr
  19. adr_l x2, __bss_stop
  20. sub x2, x2, x0
  21. bl __pi_memset
  22. dsb ishst // Make zero page visible to PTW

第一:sp的值 init_thread_union+#THREAD_SIZE大小地址。

第二:sp_el0的值init_task的地址。init_task就是内核静态定义的0号内核线程。

 

init_thread_union的地址:

./include/linux/sched/task.h:extern union thread_union init_thread_union;

  1. ./include/asm-generic/vmlinux.lds.h
  2. #define INIT_TASK_DATA(align) \
  3. . = ALIGN(align); \
  4. __start_init_task = .; \
  5. init_thread_union = .; \
  6. init_stack = .; \
  7. KEEP(*(.data..init_task)) \
  8. KEEP(*(.data..init_thread_info)) \
  9. . = __start_init_task + THREAD_SIZE; \
  10. __end_init_task = .;
  11. KEEP中的两个段 include/linux/init_task.h
  12. /* Attach to the init_task data structure for proper alignment */
  13. #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
  14. #define __init_task_data __attribute__((__section__(".data..init_task")))
  15. #else
  16. #define __init_task_data /**/
  17. #endif
  18. /* Attach to the thread_info data structure for proper alignment */
  19. #define __init_thread_info __attribute__((__section__(".data..init_thread_info")))
  20. //
  21. include/linux/sched.h
  22. union thread_union {
  23. #ifndef CONFIG_ARCH_TASK_STRUCT_ON_STACK
  24. struct task_struct task;
  25. #endif
  26. #ifndef CONFIG_THREAD_INFO_IN_TASK
  27. struct thread_info thread_info;
  28. #endif
  29. unsigned long stack[THREAD_SIZE/sizeof(long)];
  30. };
THEAD_SIZE 是内核栈的大小 (arch/arm64/include/asm/memory.h)

#ifdef CONFIG_KASAN
#define KASAN_SHADOW_SCALE_SHIFT 3
#define KASAN_SHADOW_SIZE       (UL(1) << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT))
#define KASAN_THREAD_SHIFT      1
#else
#define KASAN_SHADOW_SIZE       (0)
#define KASAN_THREAD_SHIFT      0
#endif

#define MIN_THREAD_SHIFT        (14 + KASAN_THREAD_SHIFT)
#if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT)
#define THREAD_SHIFT		PAGE_SHIFT
#else
#define THREAD_SHIFT		MIN_THREAD_SHIFT
#endif

#if THREAD_SHIFT >= PAGE_SHIFT
#define THREAD_SIZE_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
#endif

#define THREAD_SIZE		(UL(1) << THREAD_SHIFT)

应该就是一个页面大小 64K。

  1. struct task_struct init_task
  2. #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
  3. __init_task_data //将该init_task放在".data..init_task"段。正好被链接器使用
  4. #endif
  5. = {
  6. #ifdef CONFIG_THREAD_INFO_IN_TASK
  7. .thread_info = INIT_THREAD_INFO(init_task),
  8. .stack_refcount = ATOMIC_INIT(1),
  9. #endif
  10. .state = 0,
  11. .stack = init_stack,
  12. .usage = ATOMIC_INIT(2),
  13. .flags = PF_KTHREAD,
  14. ......
  15. }
  16. struct thread_info init_thread_info __init_thread_info = INIT_THREAD_INFO(init_task);
  17. //将该段放在".data..init_thread_info"

0号进行的栈和task_struct 图:

d891d4e3ff554012bc1a67a379039756.png

 

2:新创建的进程创建的内核栈。

copy_process
    dup_task_struct
        alloc_thread_stack_node
            __vmalloc_node_range分配stack大小THREAD_SIZE

 

  1. static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
  2. {
  3. struct task_struct *tsk;
  4. unsigned long *stack;
  5. struct vm_struct *stack_vm_area;
  6. int err;
  7. if (node == NUMA_NO_NODE)
  8. node = tsk_fork_get_node(orig);
  9. tsk = alloc_task_struct_node(node);
  10. if (!tsk)
  11. return NULL;
  12. stack = alloc_thread_stack_node(tsk, node);
  13. if (!stack)
  14. goto free_tsk;
  15. stack_vm_area = task_stack_vm_area(tsk);
  16. err = arch_dup_task_struct(tsk, orig);
  17. /*
  18. * arch_dup_task_struct() clobbers the stack-related fields. Make
  19. * sure they're properly initialized before using any stack-related
  20. * functions again.
  21. */
  22. tsk->stack = stack;
  23. #ifdef CONFIG_VMAP_STACK
  24. tsk->stack_vm_area = stack_vm_area;
  25. 。。。。。。。省略
  26. }
  1. tatic unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
  2. {
  3. #ifdef CONFIG_VMAP_STACK
  4. void *stack;
  5. int i;
  6. for (i = 0; i < NR_CACHED_STACKS; i++) {
  7. struct vm_struct *s;
  8. s = this_cpu_xchg(cached_stacks[i], NULL);
  9. if (!s)
  10. continue;
  11. /* Clear stale pointers from reused stack. */
  12. memset(s->addr, 0, THREAD_SIZE);
  13. tsk->stack_vm_area = s;
  14. return s->addr;
  15. }
  16. stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
  17. VMALLOC_START, VMALLOC_END,
  18. THREADINFO_GFP,
  19. PAGE_KERNEL,
  20. 0, node, __builtin_return_address(0));
  21. /*
  22. * We can't call find_vm_area() in interrupt context, and
  23. * free_thread_stack() can be called in interrupt context,
  24. * so cache the vm_struct.
  25. */
  26. if (stack)
  27. tsk->stack_vm_area = find_vm_area(stack);
  28. return stack;
  29. #else
  30. struct page *page = alloc_pages_node(node, THREADINFO_GFP,
  31. THREAD_SIZE_ORDER);
  32. return page ? page_address(page) : NULL;
  33. #endif
  34. }

3:通过fork创建的新进程其pc地址和sp是在那里设置?

arch/arm64/kernel/process.c::copy_thread函数

_do

_fork->copy_process->copy_thread_tls->copy_thread

  1. #define task_pt_regs(p) \
  2.         ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
  3. #define current_pt_regs() task_pt_regs(current)
  4. #define task_stack_page(task)    ((void *)(task)->stack)
  5. /
  6. int copy_thread(unsigned long clone_flags, unsigned long stack_start,
  7. unsigned long stk_sz, struct task_struct *p)
  8. {
  9. printk("===copy_thread pid %d stack_start %llx stk_sz %llx kernel_stack %llx \n",p->pid,stack_start,stk_sz,p->stack);
  10. struct pt_regs *childregs = task_pt_regs(p); //在p的内核栈最后保留pt_regs大小
  11. memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
  12. /*
  13. * Unalias p->thread.sve_state (if any) from the parent task
  14. * and disable discard SVE state for p:
  15. */
  16. clear_tsk_thread_flag(p, TIF_SVE);
  17. p->thread.sve_state = NULL;
  18. /*
  19. * In case p was allocated the same task_struct pointer as some
  20. * other recently-exited task, make sure p is disassociated from
  21. * any cpu that may have run that now-exited task recently.
  22. * Otherwise we could erroneously skip reloading the FPSIMD
  23. * registers for p.
  24. */
  25. fpsimd_flush_task_state(p);
  26. if (likely(!(p->flags & PF_KTHREAD))) {//用户进程
  27. *childregs = *current_pt_regs(); //使用父进程的寄存器
  28. childregs->regs[0] = 0;
  29. /*
  30. * Read the current TLS pointer from tpidr_el0 as it may be
  31. * out-of-sync with the saved value.
  32. */
  33. *task_user_tls(p) = read_sysreg(tpidr_el0);
  34. if (stack_start) {
  35. if (is_compat_thread(task_thread_info(p)))
  36. childregs->compat_sp = stack_start;
  37. else
  38. childregs->sp = stack_start; //用户空间的栈.
  39. }
  40. /*
  41. * If a TLS pointer was passed to clone (4th argument), use it
  42. * for the new thread.
  43. */
  44. if (clone_flags & CLONE_SETTLS)
  45. p->thread.uw.tp_value = childregs->regs[3];
  46. } else {
  47. memset(childregs, 0, sizeof(struct pt_regs));
  48. childregs->pstate = PSR_MODE_EL1h;
  49. if (IS_ENABLED(CONFIG_ARM64_UAO) &&
  50. cpus_have_const_cap(ARM64_HAS_UAO))
  51. childregs->pstate |= PSR_UAO_BIT;
  52. p->thread.cpu_context.x19 = stack_start;
  53. p->thread.cpu_context.x20 = stk_sz;
  54. }
  55. p->thread.cpu_context.pc = (unsigned long)ret_from_fork; //新进程唤醒后需要执行的第一段代码入口。
  56. p->thread.cpu_context.sp = (unsigned long)childregs; //新进程的sp寄存器值,其执行当前内核栈往后pt_regs大小。
  57. ptrace_hw_copy_thread(p);
  58. return 0;
  59. }

 

 

上述打印如下:stack_start 地址范围确实在用户空间。部分为0 应该是内核线程。

[   29.460947] ===copy_thread pid 848 stack_start  ffffc25ea670  stk_sz 0 kernel_stack ffff00000eba0000 
[   29.473410] ===copy_thread pid 1046 stack_start  ffffaeb0c710  stk_sz 0 kernel_stack ffff000023880000 
[   29.509338] ===copy_thread pid 848 stack_start  ffffc25ea670  stk_sz 0 kernel_stack ffff000023a00000 
[   29.786659] ===copy_thread pid 1 stack_start  0  stk_sz 0 kernel_stack ffff00000e920000 
[   29.792466] ===copy_thread pid 848 stack_start  ffffc25eb220  stk_sz 0 kernel_stack ffff000023ae0000 
[   29.819915] ===copy_thread pid 848 stack_start  ffffc25eb220  stk_sz 0 kernel_stack ffff000023ca0000 
[   29.824324] ===copy_thread pid 848 stack_start  ffffc25eb220  stk_sz 0 kernel_stack ffff000023e40000 
[   29.853700] ===copy_thread pid 848 stack_start  ffffc25eb220  stk_sz 0 kernel_stack ffff000024000000 
[   29.856679] ===copy_thread pid 848 stack_start  ffffc25eb220  stk_sz 0 kernel_stack ffff0000241c0000 
[   29.909747] ===copy_thread pid 1 stack_start  0  stk_sz 0 kernel_stack ffff00000de20000 
[   29.931232] ===copy_thread pid 1 stack_start  0  stk_sz 0 kernel_stack ffff0000251a0000 
[   29.933685] ===copy_thread pid 1074 stack_start  ffff9d53e760  stk_sz 0 kernel_stack ffff00000f380000 
[   29.934775] ===copy_thread pid 848 stack_start  ffff7ad5eae0  stk_sz 0 kernel_stack ffff000024000000 
[   29.953477] ===copy_thread pid 1046 stack_start  ffffadaaeae0  stk_sz 0 kernel_stack ffff0000251c0000 
[   29.961483] ===copy_thread pid 1074 stack_start  ffff9cd2e760  stk_sz 0 kernel_stack ffff00000f3a0000 
 

5 任务切换核心函数cpu_switch_to

上述函数会创建新进程,在内核栈中预留pt_regs大小的空间,用来设置用户空间所需要的寄存器,新创建的进程被唤醒最终通过cpu_switch_to切换:

  1. //x0 是prev ,x1是next 任务
  2. ENTRY(cpu_switch_to)
  3. mov x10, #THREAD_CPU_CONTEXT // 寄存器x10存放thread.cpu_context偏移
  4. add x8, x0, x10 //x8上个进程描述符的地址
  5. mov x9, sp //x9栈指针
  6. //把上一个进程的x19-x28,x29,SP,LR保存到thead.cpu_context中
  7. stp x19, x20, [x8], #16
  8. stp x21, x22, [x8], #16
  9. stp x23, x24, [x8], #16
  10. stp x25, x26, [x8], #16
  11. stp x27, x28, [x8], #16
  12. stp x29, x9, [x8], #16
  13. str lr, [x8] //寄存器LR存放函数的返回地址,是context_switch调用cpu_switch_to之后的一行代码
  14. add x8, x1, x10 //x8下个进程描述符的地址
  15. //把下一个进程的thead.cpu_context恢复到的x19-x28,x29,SP,LR中
  16. ldp x19, x20, [x8], #16 //
  17. ldp x21, x22, [x8], #16
  18. ldp x23, x24, [x8], #16
  19. ldp x25, x26, [x8], #16
  20. ldp x27, x28, [x8], #16
  21. ldp x29, x9, [x8], #16
  22. ldr lr, [x8]
  23. mov sp, x9
  24. ret //返回寄存器x0的值
  25. ENDPROC(cpu_switch_to)

新建进程前面提到过task_struct.thread.cpu_context.SP  执行内核栈往前pt_regs大小处:

473e5ad128e74413a2fc7f47af4049b2.png

新进程的被首次唤醒运行的第一个函数是ret_from_fork。最终调用ret_to_user->kernel_exit 0:

  1. .macro kernel_exit, el
  2. .if \el != 0
  3. disable_daif
  4. /* Restore the task's original addr_limit. */
  5. ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
  6. str x20, [tsk, #TSK_TI_ADDR_LIMIT]
  7. /* No need to restore UAO, it will be restored from SPSR_EL1 */
  8. .endif
  9. ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
  10. .if \el == 0
  11. ct_user_enter
  12. .endif
  13. #ifdef CONFIG_ARM64_SW_TTBR0_PAN
  14. /*
  15. * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
  16. * PAN bit checking.
  17. */
  18. alternative_if ARM64_HAS_PAN
  19. b 2f // skip TTBR0 PAN
  20. alternative_else_nop_endif
  21. .if \el != 0
  22. tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
  23. .endif
  24. __uaccess_ttbr0_enable x0, x1
  25. .if \el == 0
  26. /*
  27. * Enable errata workarounds only if returning to user. The only
  28. * workaround currently required for TTBR0_EL1 changes are for the
  29. * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
  30. * corruption).
  31. */
  32. bl post_ttbr_update_workaround
  33. .endif
  34. 1:
  35. .if \el != 0
  36. and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit
  37. .endif
  38. 2:
  39. #endif
  40. .if \el == 0
  41. ldr x23, [sp, #S_SP] // load return stack pointer
  42. msr sp_el0, x23 //还原用户空间栈,该栈保存在内核栈往前pt_regs的机构中。
  43. tst x22, #PSR_MODE32_BIT // native task?
  44. b.eq 3f
  45. #ifdef CONFIG_ARM64_ERRATUM_845719
  46. alternative_if ARM64_WORKAROUND_845719
  47. #ifdef CONFIG_PID_IN_CONTEXTIDR
  48. mrs x29, contextidr_el1
  49. msr contextidr_el1, x29
  50. #else
  51. msr contextidr_el1, xzr
  52. #endif
  53. alternative_else_nop_endif
  54. #endif
  55. 3:
  56. apply_ssbd 0, x0, x1
  57. .endif
  58. msr elr_el1, x21 // set up the return data
  59. msr spsr_el1, x22
  60. ldp x0, x1, [sp, #16 * 0]
  61. ldp x2, x3, [sp, #16 * 1]
  62. ldp x4, x5, [sp, #16 * 2]
  63. ldp x6, x7, [sp, #16 * 3]
  64. ldp x8, x9, [sp, #16 * 4]
  65. ldp x10, x11, [sp, #16 * 5]
  66. ldp x12, x13, [sp, #16 * 6]
  67. ldp x14, x15, [sp, #16 * 7]
  68. ldp x16, x17, [sp, #16 * 8]
  69. ldp x18, x19, [sp, #16 * 9]
  70. ldp x20, x21, [sp, #16 * 10]
  71. ldp x22, x23, [sp, #16 * 11]
  72. ldp x24, x25, [sp, #16 * 12]
  73. ldp x26, x27, [sp, #16 * 13]
  74. ldp x28, x29, [sp, #16 * 14]
  75. ldr lr, [sp, #S_LR]
  76. add sp, sp, #S_FRAME_SIZE // restore sp //准备返回用户空间,将内核栈加S_FRAME_SIZE(pt_regs大小)。即还原成内核栈。
  77. /*
  78. * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on eret context synchronization
  79. * when returning from IPI handler, and when returning to user-space.
  80. */
  81. .if \el == 0 //eret 将恢复elr_el1 。spsr_el1等寄存器
  82. alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
  83. #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
  84. bne 4f
  85. msr far_el1, x30
  86. tramp_alias x30, tramp_exit_native
  87. br x30
  88. 4:
  89. tramp_alias x30, tramp_exit_compat
  90. br x30
  91. #endif
  92. .else
  93. eret
  94. .endif
  95. .endm

总结:进程fork的时候通过同步异常进入内核。内核将此时的寄存器保存到进场内核栈的pt_regs中

 新创建德进程复制这些寄存器,并设置task_struct的cpu_context得pc为ret_from_fork。(如果创建的是内核线程还会设置其他的参考copy_thread)

 

其中父进程按照中断返回用户空间 将寄存从内核战弹出继续。

子进程在下一次wakeup之后 switch_to会将它的task_struct的cpu_context寄存器恢复。执行ret_from_fork 然后返回用户空间 弹出pt_regs寄存器。这样子进程继续父进程同样得代码位置。(如果是内核线程也有一些不同)

 

既然提到进程首次运行并返回用户空间,那么来看看内核通过异常进入内核空间。,主要看看内核如何处理内核SP和用户SP。

kernel_ventry:是异常的第一个函数。

  1. .macro kernel_ventry, el, label, regsize = 64
  2. .align 7
  3. #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
  4. alternative_if ARM64_UNMAP_KERNEL_AT_EL0
  5. .if \el == 0
  6. .if \regsize == 64
  7. mrs x30, tpidrro_el0
  8. msr tpidrro_el0, xzr
  9. .else
  10. mov x30, xzr
  11. .endif
  12. .endif
  13. alternative_else_nop_endif
  14. #endif
  15. sub sp, sp, #S_FRAME_SIZE //内核栈sp 直接减S_FRAME_SIZE, 依然预留pt_regs大小空间。为后面kernel_entry 压入寄存器准备。

 上面:内核栈sp 直接减S_FRAME_SIZE, 依然预留pt_regs大小空间。为后面kernel_entry 压入寄存器准备。

  1. .macro kernel_entry, el, regsize = 64
  2. .if \regsize == 32
  3. mov w0, w0 // zero upper 32 bits of x0
  4. .endif
  5. stp x0, x1, [sp, #16 * 0]
  6. stp x2, x3, [sp, #16 * 1]
  7. stp x4, x5, [sp, #16 * 2]
  8. stp x6, x7, [sp, #16 * 3]
  9. stp x8, x9, [sp, #16 * 4]
  10. stp x10, x11, [sp, #16 * 5]
  11. stp x12, x13, [sp, #16 * 6]
  12. stp x14, x15, [sp, #16 * 7]
  13. stp x16, x17, [sp, #16 * 8]
  14. stp x18, x19, [sp, #16 * 9]
  15. stp x20, x21, [sp, #16 * 10]
  16. stp x22, x23, [sp, #16 * 11]
  17. stp x24, x25, [sp, #16 * 12]
  18. stp x26, x27, [sp, #16 * 13]
  19. stp x28, x29, [sp, #16 * 14]
  20. .if \el == 0
  21. clear_gp_regs
  22. mrs x21, sp_el0 //先保存用户态栈寄存器sp_el0到x21
  23. ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear,
  24. ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug
  25. disable_step_tsk x19, x20 // exceptions when scheduling.
  26. apply_ssbd 1, x22, x23
  27. .else
  28. add x21, sp, #S_FRAME_SIZE
  29. get_thread_info tsk
  30. /* Save the task's original addr_limit and set USER_DS */
  31. ldr x20, [tsk, #TSK_TI_ADDR_LIMIT]
  32. str x20, [sp, #S_ORIG_ADDR_LIMIT]
  33. mov x20, #USER_DS
  34. str x20, [tsk, #TSK_TI_ADDR_LIMIT]
  35. /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
  36. .endif /* \el == 0 */
  37. mrs x22, elr_el1
  38. mrs x23, spsr_el1
  39. stp lr, x21, [sp, #S_LR] //将x21中保存的用户态栈寄存器值压入pt_regs中。
  40. /*
  41. * In order to be able to dump the contents of struct pt_regs at the
  42. * time the exception was taken (in case we attempt to walk the call
  43. * stack later), chain it together with the stack frames.
  44. */
  45. .if \el == 0
  46. stp xzr, xzr, [sp, #S_STACKFRAME]
  47. .else
  48. stp x29, x22, [sp, #S_STACKFRAME]
  49. .endif
  50. add x29, sp, #S_STACKFRAME
  51. ..........................................
  52. .if \el == 0
  53. mov w21, #NO_SYSCALL
  54. str w21, [sp, #S_SYSCALLNO]
  55. .endif
  56. /*
  57. * Set sp_el0 to current thread_info.
  58. */
  59. .if \el == 0
  60. msr sp_el0, tsk // 从用户态进入,tsk放入sp_el0用户栈寄存器。
  61. .endif
  62. /*
  63. * Registers that may be useful after this macro is invoked:
  64. *
  65. * x21 - aborted SP
  66. * x22 - aborted PC
  67. * x23 - aborted PSTATE
  68. */
  69. .endm

用户态通过异常进入到内核态:

1;内核栈寄存器预留pt_regs大小保留当前cpu寄存,其中就有用户态的栈寄存器sp_el0: 

        stp     lr, x21, [sp, #S_LR]   //S_LR的偏移位置,一次搞两个。

        DEFINE(S_LR,                  offsetof(struct pt_regs, regs[30]));  //regs[30] 就是第31个寄存器的位置。下一个就是SP。

2:;从内核态返回用户态将弹出之前保存的部分寄存器:

         ldr     x23, [sp, #S_SP]                // load return stack pointer
        msr     sp_el0, x23
 

前面提到。用户的栈在进程创建的时候在copy_thread的时候被赋值。0号内核线程自然是没有用户栈,那么1号进程的用户栈在那里确定呢?在init/main.c的run_init_process函数增加打印日志:

6  用户1号进程的用户态栈怎么确定的?

  1. static int run_init_process(const char *init_filename)
  2. {
  3. argv_init[0] = init_filename;
  4. int ret ;
  5. pr_info("0 Run %s as init process user_stack %llx kernel_stack %llx \n""
  6. , init_filename,current_pt_regs()->sp,current->stack);
  7. ret = do_execve(getname_kernel(init_filename),
  8. (const char __user *const __user *)argv_init,
  9. (const char __user *const __user *)envp_init);
  10. pr_info("1 Run %s as init process user_stack %llx kernel_stack %llx \n""
  11. , init_filename,current_pt_regs()->sp,current->stack);
  12. return ret;
  13. }

[    9.086479] 0 Run /init as init process user_stack 0  kernel_stack ffff00000b640000 
[    9.094441] 1 Run /init as init process user_stack ffffc33ad8f0  kernel_stack ffff00000b640000

说明是在do_execv中确定的。

static struct linux_binfmt elf_format = {
        .module         = THIS_MODULE,
        .load_binary    = load_elf_binary,
        .load_shlib     = load_elf_library,
        .core_dump      = elf_core_dump,
        .min_coredump   = ELF_EXEC_PAGESIZE,
};


do_execve
    do_execveat_common
        __do_execve_file
            bprm_mm_init
                __bprm_mm_init //创建一个vm_area_alloc 代表进程的栈设置bprm->p
            exec_binprm
                search_binary_handler
                    fmt->load_binary(bprm);
                        load_elf_binary
                            start_thread(regs, elf_entry, bprm->p);

  1. static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
  2. {
  3. memset(regs, 0, sizeof(*regs));
  4. forget_syscall(regs);
  5. regs->pc = pc;
  6. }
  7. static inline void start_thread(struct pt_regs *regs, unsigned long pc,
  8. unsigned long sp)
  9. {
  10. start_thread_common(regs, pc);
  11. regs->pstate = PSR_MODE_EL0t;
  12. regs->sp = sp;
  13. }

__bprm_mm_init函数: 

  1. static int __bprm_mm_init(struct linux_binprm *bprm)
  2. {
  3. int err;
  4. struct vm_area_struct *vma = NULL;
  5. struct mm_struct *mm = bprm->mm;
  6. bprm->vma = vma = vm_area_alloc(mm);
  7. if (!vma)
  8. return -ENOMEM;
  9. vma_set_anonymous(vma);
  10. if (down_write_killable(&mm->mmap_sem)) {
  11. err = -EINTR;
  12. goto err_free;
  13. }
  14. /*
  15. * Place the stack at the largest stack address the architecture
  16. * supports. Later, we'll move this to an appropriate place. We don't
  17. * use STACK_TOP because that can depend on attributes which aren't
  18. * configured yet.
  19. */
  20. BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
  21. vma->vm_end = STACK_TOP_MAX; //0x1000000000000
  22. vma->vm_start = vma->vm_end - PAGE_SIZE; //0xffffffff0000 相差一页
  23. vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
  24. vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
  25. err = insert_vm_struct(mm, vma);
  26. if (err)
  27. goto err;
  28. mm->stack_vm = mm->total_vm = 1;
  29. arch_bprm_mm_init(mm, vma);
  30. up_write(&mm->mmap_sem);
  31. bprm->p = vma->vm_end - sizeof(void *);
  32. return 0;
  33. err:
  34. up_write(&mm->mmap_sem);
  35. err_free:
  36. ...............................................省略
  37. }

load_elf_binary函数:

  1. static int load_elf_binary(struct linux_binprm *bprm)
  2. {
  3. 。。。。。。。
  4. setup_new_exec(bprm); //__set_task_comm 更新进程comm
  5. install_exec_creds(bprm);
  6. /* Do this so that we can load the interpreter, if need be. We will
  7. change some of these later */
  8. retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
  9. executable_stack); //设置进程参数,更新栈。 通过randomize_stack_top 设置一个随机栈给用户空间。
  10. 。。。。。。
  11. }

 

 

 

声明:本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
  

闽ICP备14008679号