当前位置:   article > 正文

linux4.2.0内核分析_linux calculate the start of physical memory

linux calculate the start of physical memory

      今天下载了最新版本的4.2.0的内核进行阅读。

      这是我理解的流程图

                                              

      首先开始解压zImage内核镜像文件。代码在linux4.2.0/arch/arm/boot/compress/head.S中

      首先我们找到程序的入口

  1. start:
  2. .type start,#function
  3. .rept 7
  4. __nop
  5. .endr
  6. #ifndef CONFIG_THUMB2_KERNEL
  7. mov r0, r0
  8. #else
  9. AR_CLASS( sub pc, pc, #3 ) @ A/R: switch to Thumb2 mode
  10. M_CLASS( nop.w ) @ M: already in Thumb2 mode
  11. .thumb
  12. #endif
  13. W(b) 1f
  14. .word _magic_sig @ Magic numbers to help the loader
  15. .word _magic_start @ absolute load/run zImage address
  16. .word _magic_end @ zImage end address
  17. .word 0x04030201 @ endianness flag
  18. .word 0x45454545 @ another magic number to indicate
  19. .word _magic_table @ additional data table
  20. __EFI_HEADER
  21. 1:
  22. ARM_BE8( setend be ) @ go BE8 if compiled for BE8
  23. AR_CLASS( mrs r9, cpsr )
  24. #ifdef CONFIG_ARM_VIRT_EXT
  25. bl __hyp_stub_install @ get into SVC mode, reversibly
  26. #endif
  27. mov r7, r1 @ save architecture ID
  28. mov r8, r2 @ save atags pointer
  29. #ifndef CONFIG_CPU_V7M
  30. /*
  31. * Booting from Angel - need to enter SVC mode and disable
  32. * FIQs/IRQs (numeric definitions from angel arm.h source).
  33. * We only do this if we were in user mode on entry.
  34. */
  35. mrs r2, cpsr @ get current mode
  36. tst r2, #3 @ not user?
  37. bne not_angel
  38. mov r0, #0x17 @ angel_SWIreason_EnterSVC
  39. ARM( swi 0x123456 ) @ angel_SWI_ARM
  40. THUMB( svc 0xab ) @ angel_SWI_THUMB
  41. not_angel:
  42. safe_svcmode_maskall r0
  43. msr spsr_cxsf, r9 @ Save the CPU boot mode in
  44. @ SPSR
  45. #endif

  在该文件夹下的makefile中去寻找我们的define,找到的定义如下:

  1. ifeq ($(CONFIG_ARM_VIRT_EXT),y)
  2. OBJS += hyp-stub.o
  3. endif

   开始将机器码ID存入R7中,将参数数据存入R8中。因为没有定义CONFIG_CPU_V7M所以进入SVC模式。

   SVC的进入接口safe+svcmode_maskall在linux4.2.0/arch/arm/include/asm/assembler.h

  1. .macro safe_svcmode_maskall reg:req
  2. #if __LINUX_ARM_ARCH__ >= 6 && !defined(CONFIG_CPU_V7M)
  3. mrs \reg , cpsr
  4. eor \reg, \reg, #HYP_MODE
  5. tst \reg, #MODE_MASK
  6. bic \reg , \reg , #MODE_MASK
  7. orr \reg , \reg , #PSR_I_BIT | PSR_F_BIT | SVC_MODE
  8. THUMB( orr \reg , \reg , #PSR_T_BIT )
  9. bne 1f
  10. orr \reg, \reg, #PSR_A_BIT
  11. badr lr, 2f
  12. msr spsr_cxsf, \reg
  13. __MSR_ELR_HYP(14)
  14. __ERET
  15. 1: msr cpsr_c, \reg
  16. 2:
  17. #else
  18. /*
  19. * workaround for possibly broken pre-v6 hardware
  20. * (akita, Sharp Zaurus C-1000, PXA270-based)
  21. */
  22. setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, \reg
  23. #endif
  24. .endm

  在进入SVC模式时会关闭FIQ/IRQ。

  接下来我们继续顺序执行程序

  1. .text
  2. #ifdef CONFIG_AUTO_ZRELADDR
  3. /*
  4. * Find the start of physical memory. As we are executing
  5. * without the MMU on, we are in the physical address space.
  6. * We just need to get rid of any offset by aligning the
  7. * address.
  8. *
  9. * This alignment is a balance between the requirements of
  10. * different platforms - we have chosen 128MB to allow
  11. * platforms which align the start of their physical memory
  12. * to 128MB to use this feature, while allowing the zImage
  13. * to be placed within the first 128MB of memory on other
  14. * platforms. Increasing the alignment means we place
  15. * stricter alignment requirements on the start of physical
  16. * memory, but relaxing it means that we break people who
  17. * are already placing their zImage in (eg) the top 64MB
  18. * of this range.
  19. */
  20. mov r4, pc
  21. and r4, r4, #0xf8000000
  22. /* Determine final kernel image address. */
  23. add r4, r4, #TEXT_OFFSET
  24. #else
  25. ldr r4, =zreladdr
  26. #endif

  在该文件夹下的makefile并没有发现定义了CONFIG_AUTO_ZERLADDR。所以直接将镜像文件的地址设置为zreladdr

  然后继续执行代码

  1. /*
  2. * Set up a page table only if it won't overwrite ourself.
  3. * That means r4 < pc || r4 - 16k page directory > &_end.
  4. * Given that r4 > &_end is most unfrequent, we add a rough
  5. * additional 1MB of room for a possible appended DTB.
  6. */
  7. mov r0, pc
  8. cmp r0, r4
  9. ldrcc r0, LC0+32
  10. addcc r0, r0, pc
  11. cmpcc r4, r0
  12. orrcc r4, r4, #1 @ remember we skipped cache_on
  13. blcs cache_on
  14. restart: adr r0, LC0
  15. ldmia r0, {r1, r2, r3, r6, r10, r11, r12}
  16. ldr sp, [r0, #28]
  17. /*
  18. * We might be running at a different address. We need
  19. * to fix up various pointers.
  20. */
  21. sub r0, r0, r1 @ calculate the delta offset
  22. add r6, r6, r0 @ _edata
  23. add r10, r10, r0 @ inflated kernel size location
  24. /*
  25. * The kernel build system appends the size of the
  26. * decompressed kernel at the end of the compressed data
  27. * in little-endian form.
  28. */
  29. ldrb r9, [r10, #0]
  30. ldrb lr, [r10, #1]
  31. orr r9, r9, lr, lsl #8
  32. ldrb lr, [r10, #2]
  33. ldrb r10, [r10, #3]
  34. orr r9, r9, lr, lsl #16
  35. orr r9, r9, r10, lsl #24
  36. #ifndef CONFIG_ZBOOT_ROM
  37. /* malloc space is above the relocated stack (64k max) */
  38. add sp, sp, r0
  39. add r10, sp, #0x10000
  40. #else
  41. /*
  42. * With ZBOOT_ROM the bss/stack is non relocatable,
  43. * but someone could still run this code from RAM,
  44. * in which case our reference is _edata.
  45. */
  46. mov r10, r6
  47. #endif
  48. mov r5, #0 @ init dtb size to 0
  49. #ifdef CONFIG_ARM_APPENDED_DTB
  50. /*
  51. * r0 = delta
  52. * r2 = BSS start
  53. * r3 = BSS end
  54. * r4 = final kernel address (possibly with LSB set)
  55. * r5 = appended dtb size (still unknown)
  56. * r6 = _edata
  57. * r7 = architecture ID
  58. * r8 = atags/device tree pointer
  59. * r9 = size of decompressed image
  60. * r10 = end of this image, including bss/stack/malloc space if non XIP
  61. * r11 = GOT start
  62. * r12 = GOT end
  63. * sp = stack pointer
  64. *
  65. * if there are device trees (dtb) appended to zImage, advance r10 so that the
  66. * dtb data will get relocated along with the kernel if necessary.
  67. */

 判断内核解压地址是否在镜像文件的地址,是则pc指针地址直接赋值给r0,如果不是则创建一个页表。之后初始化缓存,计算镜像文件地址。

 然后继续向下

  1. mov r8, r6 @ use the appended device tree
  2. /*
  3. * Make sure that the DTB doesn't end up in the final
  4. * kernel's .bss area. To do so, we adjust the decompressed
  5. * kernel size to compensate if that .bss size is larger
  6. * than the relocated code.
  7. */
  8. ldr r5, =_kernel_bss_size
  9. adr r1, wont_overwrite
  10. sub r1, r6, r1
  11. subs r1, r5, r1
  12. addhi r9, r9, r1
  13. /* Get the current DTB size */
  14. ldr r5, [r6, #4]
  15. #ifndef __ARMEB__
  16. /* convert r5 (dtb size) to little endian */
  17. eor r1, r5, r5, ror #16
  18. bic r1, r1, #0x00ff0000
  19. mov r5, r5, ror #8
  20. eor r5, r5, r1, lsr #8
  21. #endif
  22. /* preserve 64-bit alignment */
  23. add r5, r5, #7
  24. bic r5, r5, #7
  25. /* relocate some pointers past the appended dtb */
  26. add r6, r6, r5
  27. add r10, r10, r5
  28. add sp, sp, r5

 这里重新调整设备树

  1. /*
  2. * Check to see if we will overwrite ourselves.
  3. * r4 = final kernel address (possibly with LSB set)
  4. * r9 = size of decompressed image
  5. * r10 = end of this image, including bss/stack/malloc space if non XIP
  6. * We basically want:
  7. * r4 - 16k page directory >= r10 -> OK
  8. * r4 + image length <= address of wont_overwrite -> OK
  9. * Note: the possible LSB in r4 is harmless here.
  10. */
  11. add r10, r10, #16384
  12. cmp r4, r10
  13. bhs wont_overwrite
  14. add r10, r4, r9
  15. adr r9, wont_overwrite
  16. cmp r10, r9
  17. bls wont_overwrite

  这部分代码用来分析当前代码是否会和最后的解压部分重叠,如果有重叠则需要执行代码搬移,判断r4,r9,r10是否需要代码搬运,需要则进行代码搬运。

  1. /*
  2. * Relocate ourselves past the end of the decompressed kernel.
  3. * r6 = _edata
  4. * r10 = end of the decompressed kernel
  5. * Because we always copy ahead, we need to do it from the end and go
  6. * backward in case the source and destination overlap.
  7. */
  8. /*
  9. * Bump to the next 256-byte boundary with the size of
  10. * the relocation code added. This avoids overwriting
  11. * ourself when the offset is small.
  12. */
  13. add r10, r10, #((reloc_code_end - restart + 256) & ~255)
  14. bic r10, r10, #255
  15. /* Get start of code we want to copy and align it down. */
  16. adr r5, restart
  17. bic r5, r5, #31
  18. /* Relocate the hyp vector base if necessary */

   然后进行代码搬移地址的扩展

  1. sub r9, r6, r5 @ size to copy
  2. add r9, r9, #31 @ rounded up to a multiple
  3. bic r9, r9, #31 @ ... of 32 bytes
  4. add r6, r9, r5
  5. add r9, r9, r10
  6. bl cache_clean_flush
  7. badr r0, restart
  8. add r0, r0, r6
  9. mov pc, r0

这里首先计算出需要搬运的大小保存到r9中,搬运的原结束地址到r6中,搬运的目的结束地址到r9中。注意这里只搬运代码段和数据段,并不包含bss、栈和堆空间。cache_clean_flush清除缓存然后正式开始搬运

  然后继续运行程序

  1. wont_overwrite:
  2. /*
  3. * If delta is zero, we are running at the address we were linked at.
  4. * r0 = delta
  5. * r2 = BSS start
  6. * r3 = BSS end
  7. * r4 = kernel execution address (possibly with LSB set)
  8. * r5 = appended dtb size (0 if not present)
  9. * r7 = architecture ID
  10. * r8 = atags pointer
  11. * r11 = GOT start
  12. * r12 = GOT end
  13. * sp = stack pointer
  14. */
  15. orrs r1, r0, r5
  16. beq not_relocated
  17. add r11, r11, r0
  18. add r12, r12, r0

 如果r0为0则说明当前运行的地址就是链接地址,无需进行重定位,跳转到not_relocated执行,但是这里运行的地址已经被移动到内核解压地址之后跳转到not_relocated

  1. not_relocated: mov r0, #0
  2. 1: str r0, [r2], #4 @ clear bss
  3. str r0, [r2], #4
  4. str r0, [r2], #4
  5. str r0, [r2], #4
  6. cmp r2, r3
  7. blo 1b
  8. /*
  9. * Did we skip the cache setup earlier?
  10. * That is indicated by the LSB in r4.
  11. * Do it now if so.
  12. */
  13. tst r4, #1
  14. bic r4, r4, #1
  15. blne cache_on
  16. /*
  17. * The C runtime environment should now be setup sufficiently.
  18. * Set up some pointers, and start decompressing.
  19. * r4 = kernel execution address
  20. * r7 = architecture ID
  21. * r8 = atags pointer
  22. */
  23. mov r0, r4
  24. mov r1, sp @ malloc space above stack
  25. add r2, sp, #0x10000 @ 64k max
  26. mov r3, r7
  27. bl decompress_kernel
  28. bl cache_clean_flush
  29. bl cache_off
  30. #ifdef CONFIG_ARM_VIRT_EXT
  31. mrs r0, spsr @ Get saved CPU boot mode
  32. and r0, r0, #MODE_MASK
  33. cmp r0, #HYP_MODE @ if not booted in HYP mode...
  34. bne __enter_kernel @ boot kernel directly
  35. adr r12, .L__hyp_reentry_vectors_offset
  36. ldr r0, [r12]
  37. add r0, r0, r12
  38. bl __hyp_set_vectors
  39. __HVC(0) @ otherwise bounce to hyp mode
  40. b . @ should never be reached
  41. .align 2
  42. .L__hyp_reentry_vectors_offset: .long __hyp_reentry_vectors - .
  43. #else
  44. b __enter_kernel
  45. #endif
  46. __enter_kernel:
  47. mov r0, #0 @ must be 0
  48. mov r1, r7 @ restore architecture number
  49. mov r2, r8 @ restore atags pointer
  50. ARM( mov pc, r4 ) @ call kernel
  51. M_CLASS( add r4, r4, #1 ) @ enter in Thumb mode for M class
  52. THUMB( bx r4 ) @ entry point is always ARM for A/R classes

 然后解压镜像文件,刷新缓存,关闭缓存,然后进入内核入口。之后我们就转到内核中去。在文件linux-4.20/arch/arm/kernel/head.S中。

  其实我还是很多地方不懂,等学到一定程度再回来复习吧。我是参考ARM Linux启动流程分析——内核自解压阶段

  这是进入内核的启动流程

 

  1. __HEAD
  2. ENTRY(stext)
  3. ARM_BE8(setend be ) @ ensure we are in BE8 mode
  4. THUMB( badr r9, 1f ) @ Kernel is always entered in ARM.
  5. THUMB( bx r9 ) @ If this is a Thumb-2 kernel,
  6. THUMB( .thumb ) @ switch to Thumb now.
  7. THUMB(1: )
  8. #ifdef CONFIG_ARM_VIRT_EXT
  9. bl __hyp_stub_install
  10. #endif
  11. @ ensure svc mode and all interrupts masked
  12. safe_svcmode_maskall r9

  一进入内核我们就先进入SVC模式和关闭中断。

  1. mrc p15, 0, r9, c0, c0 @ get processor id
  2. bl __lookup_processor_type @ r5=procinfo r9=cpuid
  3. movs r10, r5 @ invalid processor (r5=0)?
  4. THUMB( it eq ) @ force fixup-able long branch encoding
  5. beq __error_p @ yes, error 'p'

   然后进行处理器类型检测,成功再进行下一步

  1. #ifndef CONFIG_XIP_KERNEL
  2. adr r3, 2f
  3. ldmia r3, {r4, r8}
  4. sub r4, r3, r4 @ (PHYS_OFFSET - PAGE_OFFSET)
  5. add r8, r8, r4 @ PHYS_OFFSET
  6. #else
  7. ldr r8, =PLAT_PHYS_OFFSET @ always constant in this case
  8. #endif

  网络初始化

  1. /*
  2. * r1 = machine no, r2 = atags or dtb,
  3. * r8 = phys_offset, r9 = cpuid, r10 = procinfo
  4. */
  5. bl __vet_atags
  6. #ifdef CONFIG_SMP_ON_UP
  7. bl __fixup_smp
  8. #endif
  9. #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
  10. bl __fixup_pv_table
  11. #endif
  12. bl __create_page_tables

  创建核心页

  1. ldr r13, =__mmap_switched @ address to jump to after
  2. @ mmu has been enabled
  3. badr lr, 1f @ return (PIC) address
  4. #ifdef CONFIG_ARM_LPAE
  5. mov r5, #0 @ high TTBR0
  6. mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn
  7. #else
  8. mov r8, r4 @ set TTBR1 to swapper_pg_dir
  9. #endif
  10. ldr r12, [r10, #PROCINFO_INITFUNC]
  11. add r12, r12, r10
  12. ret r12
  13. 1: b __enable_mmu
  14. ENDPROC(stext)
  15. .ltorg
  16. #ifndef CONFIG_XIP_KERNEL
  17. 2: .long .
  18. .long PAGE_OFFSET
  19. #endif
  1. /*
  2. * Setup common bits before finally enabling the MMU. Essentially
  3. * this is just loading the page table pointer and domain access
  4. * registers. All these registers need to be preserved by the
  5. * processor setup function (or set in the case of r0)
  6. *
  7. * r0 = cp#15 control register
  8. * r1 = machine ID
  9. * r2 = atags or dtb pointer
  10. * r4 = TTBR pointer (low word)
  11. * r5 = TTBR pointer (high word if LPAE)
  12. * r9 = processor ID
  13. * r13 = *virtual* address to jump to upon completion
  14. */
  15. __enable_mmu:
  16. #if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6
  17. orr r0, r0, #CR_A
  18. #else
  19. bic r0, r0, #CR_A
  20. #endif
  21. #ifdef CONFIG_CPU_DCACHE_DISABLE
  22. bic r0, r0, #CR_C
  23. #endif
  24. #ifdef CONFIG_CPU_BPREDICT_DISABLE
  25. bic r0, r0, #CR_Z
  26. #endif
  27. #ifdef CONFIG_CPU_ICACHE_DISABLE
  28. bic r0, r0, #CR_I
  29. #endif
  30. #ifdef CONFIG_ARM_LPAE
  31. mcrr p15, 0, r4, r5, c2 @ load TTBR0
  32. #else
  33. mov r5, #DACR_INIT
  34. mcr p15, 0, r5, c3, c0, 0 @ load domain access register
  35. mcr p15, 0, r4, c2, c0, 0 @ load page table pointer
  36. #endif
  37. b __turn_mmu_on
  38. ENDPROC(__enable_mmu)

 

  1. /*
  2. * Enable the MMU. This completely changes the structure of the visible
  3. * memory space. You will not be able to trace execution through this.
  4. * If you have an enquiry about this, *please* check the linux-arm-kernel
  5. * mailing list archives BEFORE sending another post to the list.
  6. *
  7. * r0 = cp#15 control register
  8. * r1 = machine ID
  9. * r2 = atags or dtb pointer
  10. * r9 = processor ID
  11. * r13 = *virtual* address to jump to upon completion
  12. *
  13. * other registers depend on the function called upon completion
  14. */
  15. .align 5
  16. .pushsection .idmap.text, "ax"
  17. ENTRY(__turn_mmu_on)
  18. mov r0, r0
  19. instr_sync
  20. mcr p15, 0, r0, c1, c0, 0 @ write control reg
  21. mrc p15, 0, r3, c0, c0, 0 @ read id reg
  22. instr_sync
  23. mov r3, r3
  24. mov r3, r13
  25. ret r3
  26. __turn_mmu_on_end:
  27. ENDPROC(__turn_mmu_on)

这里进行mmu的使能,然后返回R13的地址,让我们来看看r13到底是什么,我们一点点网上找

 

mov	r13, r12			@ __secondary_switched address
  1. adr r4, __secondary_data
  2. ldmia r4, {r5, r7, r12} @ address to jump to after

 就是_secondary_data的地址,然后我们继续代码的运行

  1. __secondary_data:
  2. .long .
  3. .long secondary_data
  4. .long __secondary_switched

 

  1. ENTRY(__secondary_switched)
  2. ldr sp, [r7, #12] @ get secondary_data.stack
  3. mov fp, #0
  4. b secondary_start_kernel
  5. ENDPROC(__secondary_switched)

这样我们就运行到代码的内核启动了,开始代码在init/main.c下面,因为已经初始化栈和指针所以这部分是c语言写的

  1. asmlinkage __visible void __init start_kernel(void)
  2. {
  3. char *command_line;
  4. char *after_dashes;
  5. set_task_stack_end_magic(&init_task);
  6. smp_setup_processor_id();
  7. debug_objects_early_init();
  8. cgroup_init_early();
  9. local_irq_disable();
  10. early_boot_irqs_disabled = true;
  11. /*
  12. * Interrupts are still disabled. Do necessary setups, then
  13. * enable them.
  14. */
  15. boot_cpu_init();
  16. page_address_init();
  17. pr_notice("%s", linux_banner);
  18. setup_arch(&command_line);
  19. /*
  20. * Set up the the initial canary and entropy after arch
  21. * and after adding latent and command line entropy.
  22. */
  23. add_latent_entropy();
  24. add_device_randomness(command_line, strlen(command_line));
  25. boot_init_stack_canary();
  26. mm_init_cpumask(&init_mm);
  27. setup_command_line(command_line);
  28. setup_nr_cpu_ids();
  29. setup_per_cpu_areas();
  30. smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
  31. boot_cpu_hotplug_init();
  32. build_all_zonelists(NULL);
  33. page_alloc_init();
  34. pr_notice("Kernel command line: %s\n", boot_command_line);
  35. parse_early_param();
  36. after_dashes = parse_args("Booting kernel",
  37. static_command_line, __start___param,
  38. __stop___param - __start___param,
  39. -1, -1, NULL, &unknown_bootoption);
  40. if (!IS_ERR_OR_NULL(after_dashes))
  41. parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
  42. NULL, set_init_arg);
  43. jump_label_init();
  44. /*
  45. * These use large bootmem allocations and must precede
  46. * kmem_cache_init()
  47. */
  48. setup_log_buf(0);
  49. vfs_caches_init_early();
  50. sort_main_extable();
  51. trap_init();
  52. mm_init();
  53. ftrace_init();
  54. /* trace_printk can be enabled here */
  55. early_trace_init();
  56. /*
  57. * Set up the scheduler prior starting any interrupts (such as the
  58. * timer interrupt). Full topology setup happens at smp_init()
  59. * time - but meanwhile we still have a functioning scheduler.
  60. */
  61. sched_init();
  62. /*
  63. * Disable preemption - early bootup scheduling is extremely
  64. * fragile until we cpu_idle() for the first time.
  65. */
  66. preempt_disable();
  67. if (WARN(!irqs_disabled(),
  68. "Interrupts were enabled *very* early, fixing it\n"))
  69. local_irq_disable();
  70. radix_tree_init();
  71. /*
  72. * Set up housekeeping before setting up workqueues to allow the unbound
  73. * workqueue to take non-housekeeping into account.
  74. */
  75. housekeeping_init();
  76. /*
  77. * Allow workqueue creation and work item queueing/cancelling
  78. * early. Work item execution depends on kthreads and starts after
  79. * workqueue_init().
  80. */
  81. workqueue_init_early();
  82. rcu_init();
  83. /* Trace events are available after this */
  84. trace_init();
  85. if (initcall_debug)
  86. initcall_debug_enable();
  87. context_tracking_init();
  88. /* init some links before init_ISA_irqs() */
  89. early_irq_init();
  90. init_IRQ();
  91. tick_init();
  92. rcu_init_nohz();
  93. init_timers();
  94. hrtimers_init();
  95. softirq_init();
  96. timekeeping_init();
  97. time_init();
  98. printk_safe_init();
  99. perf_event_init();
  100. profile_init();
  101. call_function_init();
  102. WARN(!irqs_disabled(), "Interrupts were enabled early\n");
  103. early_boot_irqs_disabled = false;
  104. local_irq_enable();
  105. kmem_cache_init_late();
  106. /*
  107. * HACK ALERT! This is early. We're enabling the console before
  108. * we've done PCI setups etc, and console_init() must be aware of
  109. * this. But we do want output early, in case something goes wrong.
  110. */
  111. console_init();
  112. if (panic_later)
  113. panic("Too many boot %s vars at `%s'", panic_later,
  114. panic_param);
  115. lockdep_init();
  116. /*
  117. * Need to run this when irqs are enabled, because it wants
  118. * to self-test [hard/soft]-irqs on/off lock inversion bugs
  119. * too:
  120. */
  121. locking_selftest();
  122. /*
  123. * This needs to be called before any devices perform DMA
  124. * operations that might use the SWIOTLB bounce buffers. It will
  125. * mark the bounce buffers as decrypted so that their usage will
  126. * not cause "plain-text" data to be decrypted when accessed.
  127. */
  128. mem_encrypt_init();
  129. #ifdef CONFIG_BLK_DEV_INITRD
  130. if (initrd_start && !initrd_below_start_ok &&
  131. page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
  132. pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
  133. page_to_pfn(virt_to_page((void *)initrd_start)),
  134. min_low_pfn);
  135. initrd_start = 0;
  136. }
  137. #endif
  138. page_ext_init();
  139. kmemleak_init();
  140. debug_objects_mem_init();
  141. setup_per_cpu_pageset();
  142. numa_policy_init();
  143. acpi_early_init();
  144. if (late_time_init)
  145. late_time_init();
  146. sched_clock_init();
  147. calibrate_delay();
  148. pid_idr_init();
  149. anon_vma_init();
  150. #ifdef CONFIG_X86
  151. if (efi_enabled(EFI_RUNTIME_SERVICES))
  152. efi_enter_virtual_mode();
  153. #endif
  154. thread_stack_cache_init();
  155. cred_init();
  156. fork_init();
  157. proc_caches_init();
  158. uts_ns_init();
  159. buffer_init();
  160. key_init();
  161. security_init();
  162. dbg_late_init();
  163. vfs_caches_init();
  164. pagecache_init();
  165. signals_init();
  166. seq_file_init();
  167. proc_root_init();
  168. nsfs_init();
  169. cpuset_init();
  170. cgroup_init();
  171. taskstats_init_early();
  172. delayacct_init();
  173. check_bugs();
  174. acpi_subsystem_init();
  175. arch_post_acpi_subsys_init();
  176. sfi_init_late();
  177. if (efi_enabled(EFI_RUNTIME_SERVICES)) {
  178. efi_free_boot_services();
  179. }
  180. /* Do the rest non-__init'ed, we're now alive */
  181. arch_call_rest_init();
  182. }

 先将各种资源初始化好后最后调用进程初始化。arch_call_rest_init()里面是这样的

  1. void __init __weak arch_call_rest_init(void)
  2. {
  3. rest_init();
  4. }
  5. noinline void __ref rest_init(void)
  6. {
  7. struct task_struct *tsk;
  8. int pid;
  9. rcu_scheduler_starting();
  10. /*
  11. * We need to spawn init first so that it obtains pid 1, however
  12. * the init task will end up wanting to create kthreads, which, if
  13. * we schedule it before we create kthreadd, will OOPS.
  14. */
  15. pid = kernel_thread(kernel_init, NULL, CLONE_FS);
  16. /*
  17. * Pin init on the boot CPU. Task migration is not properly working
  18. * until sched_init_smp() has been run. It will set the allowed
  19. * CPUs for init to the non isolated CPUs.
  20. */
  21. rcu_read_lock();
  22. tsk = find_task_by_pid_ns(pid, &init_pid_ns);
  23. set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id()));
  24. rcu_read_unlock();
  25. numa_default_policy();
  26. pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
  27. rcu_read_lock();
  28. kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
  29. rcu_read_unlock();
  30. /*
  31. * Enable might_sleep() and smp_processor_id() checks.
  32. * They cannot be enabled earlier because with CONFIG_PREEMPT=y
  33. * kernel_thread() would trigger might_sleep() splats. With
  34. * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled
  35. * already, but it's stuck on the kthreadd_done completion.
  36. */
  37. system_state = SYSTEM_SCHEDULING;
  38. complete(&kthreadd_done);
  39. /*
  40. * The boot idle thread must execute schedule()
  41. * at least once to get things moving:
  42. */
  43. schedule_preempt_disabled();
  44. /* Call into cpu_idle with preempt disabled */
  45. cpu_startup_entry(CPUHP_ONLINE);
  46. }

  内核启动

  1. static int __ref kernel_init(void *unused)
  2. {
  3. int ret;
  4. kernel_init_freeable();
  5. /* need to finish all async __init code before freeing the memory */
  6. async_synchronize_full();
  7. ftrace_free_init_mem();
  8. free_initmem();
  9. mark_readonly();
  10. /*
  11. * Kernel mappings are now finalized - update the userspace page-table
  12. * to finalize PTI.
  13. */
  14. pti_finalize();
  15. system_state = SYSTEM_RUNNING;
  16. numa_default_policy();
  17. rcu_end_inkernel_boot();
  18. if (ramdisk_execute_command) {
  19. ret = run_init_process(ramdisk_execute_command);
  20. if (!ret)
  21. return 0;
  22. pr_err("Failed to execute %s (error %d)\n",
  23. ramdisk_execute_command, ret);
  24. }
  25. /*
  26. * We try each of these until one succeeds.
  27. *
  28. * The Bourne shell can be used instead of init if we are
  29. * trying to recover a really broken machine.
  30. */
  31. if (execute_command) {
  32. ret = run_init_process(execute_command);
  33. if (!ret)
  34. return 0;
  35. panic("Requested init %s failed (error %d).",
  36. execute_command, ret);
  37. }
  38. if (!try_to_run_init_process("/sbin/init") ||
  39. !try_to_run_init_process("/etc/init") ||
  40. !try_to_run_init_process("/bin/init") ||
  41. !try_to_run_init_process("/bin/sh"))
  42. return 0;
  43. panic("No working init found. Try passing init= option to kernel. "
  44. "See Linux Documentation/admin-guide/init.rst for guidance.");
  45. }

 

  1. static noinline void __init kernel_init_freeable(void)
  2. {
  3. /*
  4. * Wait until kthreadd is all set-up.
  5. */
  6. wait_for_completion(&kthreadd_done);
  7. /* Now the scheduler is fully set up and can do blocking allocations */
  8. gfp_allowed_mask = __GFP_BITS_MASK;
  9. /*
  10. * init can allocate pages on any node
  11. */
  12. set_mems_allowed(node_states[N_MEMORY]);
  13. cad_pid = task_pid(current);
  14. smp_prepare_cpus(setup_max_cpus);
  15. workqueue_init();
  16. init_mm_internals();
  17. do_pre_smp_initcalls();
  18. lockup_detector_init();
  19. smp_init();
  20. sched_init_smp();
  21. page_alloc_init_late();
  22. do_basic_setup();
  23. /* Open the /dev/console on the rootfs, this should never fail */
  24. if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
  25. pr_err("Warning: unable to open an initial console.\n");
  26. (void) ksys_dup(0);
  27. (void) ksys_dup(0);
  28. /*
  29. * check if there is an early userspace init. If yes, let it do all
  30. * the work
  31. */
  32. if (!ramdisk_execute_command)
  33. ramdisk_execute_command = "/init";
  34. if (ksys_access((const char __user *)
  35. ramdisk_execute_command, 0) != 0) {
  36. ramdisk_execute_command = NULL;
  37. prepare_namespace();
  38. }
  39. /*
  40. * Ok, we have completed the initial bootup, and
  41. * we're essentially up and running. Get rid of the
  42. * initmem segments and start the user-mode stuff..
  43. *
  44. * rootfs is available now, try loading the public keys
  45. * and default modules
  46. */
  47. integrity_load_keys();
  48. load_default_modules();
  49. }

  其中prepare_namespace()是根文件系统的挂载,代码在linux-4.20/init/do_mounts.c中

  1. /*
  2. * Prepare the namespace - decide what/where to mount, load ramdisks, etc.
  3. */
  4. void __init prepare_namespace(void)
  5. {
  6. int is_floppy;
  7. if (root_delay) {
  8. printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
  9. root_delay);
  10. ssleep(root_delay);
  11. }
  12. /*
  13. * wait for the known devices to complete their probing
  14. *
  15. * Note: this is a potential source of long boot delays.
  16. * For example, it is not atypical to wait 5 seconds here
  17. * for the touchpad of a laptop to initialize.
  18. */
  19. wait_for_device_probe();
  20. md_run_setup();
  21. if (saved_root_name[0]) {
  22. root_device_name = saved_root_name;
  23. if (!strncmp(root_device_name, "mtd", 3) ||
  24. !strncmp(root_device_name, "ubi", 3)) {
  25. mount_block_root(root_device_name, root_mountflags);
  26. goto out;
  27. }
  28. ROOT_DEV = name_to_dev_t(root_device_name);
  29. if (strncmp(root_device_name, "/dev/", 5) == 0)
  30. root_device_name += 5;
  31. }
  32. if (initrd_load())
  33. goto out;
  34. /* wait for any asynchronous scanning to complete */
  35. if ((ROOT_DEV == 0) && root_wait) {
  36. printk(KERN_INFO "Waiting for root device %s...\n",
  37. saved_root_name);
  38. while (driver_probe_done() != 0 ||
  39. (ROOT_DEV = name_to_dev_t(saved_root_name)) == 0)
  40. msleep(5);
  41. async_synchronize_full();
  42. }
  43. is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR;
  44. if (is_floppy && rd_doload && rd_load_disk(0))
  45. ROOT_DEV = Root_RAM0;
  46. mount_root();
  47. out:
  48. devtmpfs_mount("dev");
  49. ksys_mount(".", "/", NULL, MS_MOVE, NULL);
  50. ksys_chroot(".");
  51. }

  加载完成,最后交出权限,运行用户代码(linux首先启动/sbin/init)。总结下精简的流程如下

                                                                                

  

声明:本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号