当前位置:   article > 正文

【安卓13 源码】RescueParty救援机制

【安卓13 源码】RescueParty救援机制

RescueParty机制正是在这个背景下诞生的,当它注意到系统或系统核心组件陷入循环崩溃状态时,就会根据崩溃的程度执行不同的救援行动,以期望让设备恢复到正常使用的状态。

开机后会自动重启,进入Recovery界面。经查找,是由于有一个进程phone进程在系统开机的时候,不断崩溃,触发了RescueParty机制,

  1. Line 529269: E07D549 02-01 22:48:35.910 856 856 I am_crash: [15147,0,com.android.phone,684211789,java.lang.OutOfMemoryError,NULL,Parcel.java,-2,0]
  2. Line 531948: E07DDD3 02-01 22:49:10.425 856 3019 I am_crash: [25787,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  3. Line 533159: E07E272 02-01 22:49:13.280 856 2861 I am_crash: [25881,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  4. Line 534403: E07E722 02-01 22:49:16.198 856 5586 I am_crash: [25969,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  5. Line 535604: E07EBA7 02-01 22:49:18.967 856 1642 I am_crash: [26055,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  6. Line 536730: E07EFF1 02-01 22:49:21.790 856 2295 I am_crash: [26149,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  7. Line 537881: E07F454 02-01 22:49:24.643 856 3012 I am_crash: [26231,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  8. Line 539010: E07F8A1 02-01 22:49:27.549 856 4223 I am_crash: [26315,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  9. Line 540549: E07FE88 02-01 22:49:30.200 856 1642 I am_crash: [26396,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  10. Line 541649: E0802B8 02-01 22:49:33.040 856 1718 I am_crash: [26491,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  11. Line 542844: E080747 02-01 22:49:35.740 856 2344 I am_crash: [26576,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  12. Line 544058: E080BD9 02-01 22:49:38.501 856 2889 I am_crash: [26677,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  13. Line 545226: E08104C 02-01 22:49:41.248 856 933 I am_crash: [26814,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  14. Line 546330: E081480 02-01 22:49:44.017 856 6041 I am_crash: [26960,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  15. Line 547501: E0818D7 02-01 22:49:46.825 856 3986 I am_crash: [27047,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  16. Line 548666: E081D28 02-01 22:49:49.657 856 1718 I am_crash: [27127,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  17. Line 549841: E082183 02-01 22:49:52.626 856 2861 I am_crash: [27210,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  18. Line 550922: E082590 02-01 22:49:55.352 856 6035 I am_crash: [27291,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  19. Line 552014: E0829B8 02-01 22:49:58.081 856 2920 I am_crash: [27401,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  20. Line 553516: E082F7A 02-01 22:50:00.633 856 1719 I am_crash: [27513,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  21. Line 554659: E0833D5 02-01 22:50:03.484 856 4600 I am_crash: [27600,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  22. Line 555805: E083833 02-01 22:50:06.021 856 5825 I am_crash: [27684,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  23. Line 556963: E083C8D 02-01 22:50:08.703 856 1718 I am_crash: [27763,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  24. Line 558157: E0840FB 02-01 22:50:11.451 856 3017 I am_crash: [27846,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]

代码基于安卓 13

0.crash 触发的流程

  1. M086FC7 02-01 22:50:42.202 28772 28772 D AndroidRuntime: Shutting down VM
  2. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: FATAL EXCEPTION: main
  3. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: Process: com.android.phone, PID: 28772
  4. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: java.lang.RuntimeException: Error receiving broadcast Intent { act=android.intent.action.SIM_STATE_CHANGED flg=0x15000010 (has extras) } in com.android.internal.telephony.SimStateTracker$1@c995e01
  5. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.app.LoadedApk$ReceiverDispatcher$Args.lambda$getRunnable$0(LoadedApk.java:1819)
  6. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.app.LoadedApk$ReceiverDispatcher$Args.$r8$lambda$gDuJqgxY6Zb-ifyeubKeivTLAwk(Unknown Source:0)
  7. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.app.LoadedApk$ReceiverDispatcher$Args$$ExternalSyntheticLambda0.run(Unknown Source:2)
  8. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.os.Handler.handleCallback(Handler.java:958)
  9. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.os.Handler.dispatchMessage(Handler.java:99)
  10. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.os.Looper.loopOnce(Looper.java:205)
  11. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.os.Looper.loop(Looper.java:294)
  12. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.app.ActivityThread.main(ActivityThread.java:8492)
  13. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at java.lang.reflect.Method.invoke(Native Method)
  14. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:640)
  15. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:1026)
  16. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: Caused by: java.util.NoSuchElementException: No value present
  17. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at java.util.Optional.get(Optional.java:144)
  18. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at com.android.internal.telephony.SimStateTracker.isUiccEnable(SimStateTracker.java:318)
  19. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at com.android.internal.telephony.SimStateTracker.onSimStateChanged(SimStateTracker.java:252)
  20. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at com.android.internal.telephony.SimStateTracker.-$$Nest$monSimStateChanged(Unknown Source:0)
  21. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at com.android.internal.telephony.SimStateTracker$1.onReceive(SimStateTracker.java:190)
  22. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.app.LoadedApk$ReceiverDispatcher$Args.lambda$getRunnable$0(LoadedApk.java:1811)
  23. C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: ... 10 more
  24. E086FC9 02-01 22:50:42.206 856 3017 I am_crash: [28772,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
  25. S086FCA 02-01 22:50:42.211 856 3017 D ActivityManager: crashInfo.exceptionClassName = java.util.NoSuchElementException , crashInfo.exceptionMessage = No value present, crashInfo.throwFileName = Optional.java,crashInfo.throwLineNumber = 144

 AndroidRuntime: Shutting down VM 的打印是在进程退出的时候打印的。

在app 进程启动的时候,会走 AndroidRuntime.start 方法,然后死循环执行loop handler 方法。在进程退出的时候,去 free

/frameworks/base/core/jni/AndroidRuntime.cpp

  1. 1193 void AndroidRuntime::start(const char* className, const Vector<String8>& options, bool zygote)
  2. 1194 {
  3. 1195 ALOGD(">>>>>> START %s uid %d <<<<<<\n",
  4. 1196 className != NULL ? className : "(unknown)", getuid());
  5. 1197
  6. 1198 static const String8 startSystemServer("start-system-server");
  7. 1199 // Whether this is the primary zygote, meaning the zygote which will fork system server.
  8. 1200 bool primary_zygote = false;
  9. 。。。。。。
  10. 1290 char* slashClassName = toSlashClassName(className != NULL ? className : "");
  11. 1291 jclass startClass = env->FindClass(slashClassName);
  12. 1292 if (startClass == NULL) {
  13. 1293 ALOGE("JavaVM unable to locate class '%s'\n", slashClassName);
  14. 1294 /* keep going */
  15. 1295 } else {
  16. 1296 jmethodID startMeth = env->GetStaticMethodID(startClass, "main",
  17. 1297 "([Ljava/lang/String;)V");
  18. 1298 if (startMeth == NULL) {
  19. 1299 ALOGE("JavaVM unable to find main() in '%s'\n", className);
  20. 1300 /* keep going */
  21. 1301 } else {
  22. // 执行main 方法
  23. 1302 env->CallStaticVoidMethod(startClass, startMeth, strArray);
  24. 1303
  25. 1304 #if 0
  26. 1305 if (env->ExceptionCheck())
  27. 1306 threadExitUncaughtException(env);
  28. 1307 #endif
  29. 1308 }
  30. 1309 }
  31. 1310 free(slashClassName);
  32. 1311
  33. // 打印下列的log
  34. 1312 ALOGD("Shutting down VM\n");

在执行main 方法的时候,会去监听 app crash 的报错的信息

/frameworks/base/core/java/com/android/internal/os/RuntimeInit.java

  1. 339 @UnsupportedAppUsage
  2. 340 public static final void main(String[] argv) {
  3. 341 preForkInit();
  4. 342 if (argv.length == 2 && argv[1].equals("application")) {
  5. 343 if (DEBUG) Slog.d(TAG, "RuntimeInit: Starting application");
  6. 344 redirectLogStreams();
  7. 345 } else {
  8. 346 if (DEBUG) Slog.d(TAG, "RuntimeInit: Starting tool");
  9. 347 }
  10. 348
  11. 349 commonInit();
  12. -------------
  13. 221 @UnsupportedAppUsage
  14. 222 protected static final void commonInit() {
  15. 223 if (DEBUG) Slog.d(TAG, "Entered RuntimeInit!");
  16. 224
  17. 225 /*
  18. 226 * set handlers; these apply to all threads in the VM. Apps can replace
  19. 227 * the default handler, but not the pre handler.
  20. 228 */
  21. // 创建 LoggingHandler 对象
  22. 229 LoggingHandler loggingHandler = new LoggingHandler();
  23. 230 RuntimeHooks.setUncaughtExceptionPreHandler(loggingHandler);
  24. // 设置捕获异常的handler类为 内部类 KillApplicationHandler:setDefaultUncaughtExceptionHandler
  25. 231 Thread.setDefaultUncaughtExceptionHandler(new KillApplicationHandler(loggingHandler));
  26. 232
  27. 233 /*

// 设置捕获异常的handler类为 内部类 KillApplicationHandler:setDefaultUncaughtExceptionHandler

/libcore/ojluni/src/main/java/java/lang/Thread.java

  1. 2193 public static void setDefaultUncaughtExceptionHandler(UncaughtExceptionHandler eh) {
  2. 2194 // Android-removed: SecurityManager stubbed out on Android.
  3. 2195 /*
  4. 2196 SecurityManager sm = System.getSecurityManager();
  5. 2197 if (sm != null) {
  6. 2198 sm.checkPermission(
  7. 2199 new RuntimePermission("setDefaultUncaughtExceptionHandler")
  8. 2200 );
  9. 2201 }
  10. 2202 */
  11. 2203
  12. 2204 defaultUncaughtExceptionHandler = eh;
  13. 2205 }
  14. ===========
  15. 2215 public static UncaughtExceptionHandler getDefaultUncaughtExceptionHandler(){
  16. 2216 return defaultUncaughtExceptionHandler;
  17. 2217 }
  18. ==========
  19. // 回调 uncaughtException 在如下,调用 dispatchUncaughtException 方法
  20. 2293 // Android-changed: Make dispatchUncaughtException() public, for use by tests.
  21. 2294 public final void dispatchUncaughtException(Throwable e) {
  22. 2295 // BEGIN Android-added: uncaughtExceptionPreHandler for use by platform.
  23. 2296 Thread.UncaughtExceptionHandler initialUeh =
  24. 2297 Thread.getUncaughtExceptionPreHandler();
  25. 2298 if (initialUeh != null) {
  26. 2299 try {
  27. 2300 initialUeh.uncaughtException(this, e);
  28. 2301 } catch (RuntimeException | Error ignored) {
  29. 2302 // Throwables thrown by the initial handler are ignored
  30. 2303 }
  31. 2304 }
  32. 2305 // END Android-added: uncaughtExceptionPreHandler for use by platform.
  33. 2306 getUncaughtExceptionHandler().uncaughtException(this, e);
  34. 2307 }
  35. 2308

在 art 虚拟机 进程退出的时候,会去抛出执行错误的异常

/art/runtime/thread.cc

  1. // 进程回收的时候调用
  2. 2500 void Thread::Destroy() {
  3. 2501 Thread* self = this;
  4. 2502 DCHECK_EQ(self, Thread::Current());
  5. 2503
  6. 2504 if (tlsPtr_.jni_env != nullptr) {
  7. 2505 {
  8. 2506 ScopedObjectAccess soa(self);
  9. 2507 MonitorExitVisitor visitor(self);
  10. 2508 // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
  11. 2509 tlsPtr_.jni_env->monitors_.VisitRoots(&visitor, RootInfo(kRootVMInternal));
  12. 2510 }
  13. 2511 // Release locally held global references which releasing may require the mutator lock.
  14. 2512 if (tlsPtr_.jpeer != nullptr) {
  15. 2513 // If pthread_create fails we don't have a jni env here.
  16. 2514 tlsPtr_.jni_env->DeleteGlobalRef(tlsPtr_.jpeer);
  17. 2515 tlsPtr_.jpeer = nullptr;
  18. 2516 }
  19. 2517 if (tlsPtr_.class_loader_override != nullptr) {
  20. 2518 tlsPtr_.jni_env->DeleteGlobalRef(tlsPtr_.class_loader_override);
  21. 2519 tlsPtr_.class_loader_override = nullptr;
  22. 2520 }
  23. 2521 }
  24. 2522
  25. 2523 if (tlsPtr_.opeer != nullptr) {
  26. 2524 ScopedObjectAccess soa(self);
  27. 2525 // We may need to call user-supplied managed code, do this before final clean-up.
  28. // 执行下列方法:HandleUncaughtExceptions
  29. 2526 HandleUncaughtExceptions(soa);

// 执行下列方法:HandleUncaughtExceptions

  1. 2612 void Thread::HandleUncaughtExceptions(ScopedObjectAccessAlreadyRunnable& soa) {
  2. 2613 if (!IsExceptionPending()) {
  3. 2614 return;
  4. 2615 }
  5. 2616 ScopedLocalRef<jobject> peer(tlsPtr_.jni_env, soa.AddLocalReference<jobject>(tlsPtr_.opeer));
  6. 2617 ScopedThreadStateChange tsc(this, ThreadState::kNative);
  7. 2618
  8. 2619 // Get and clear the exception.
  9. // 获取到抛出异常的信息
  10. 2620 ScopedLocalRef<jthrowable> exception(tlsPtr_.jni_env, tlsPtr_.jni_env->ExceptionOccurred());
  11. 2621 tlsPtr_.jni_env->ExceptionClear();
  12. 2622
  13. 2623 // Call the Thread instance's dispatchUncaughtException(Throwable)
  14. 2624 tlsPtr_.jni_env->CallVoidMethod(peer.get(),
  15. // 调用方法:java_lang_Thread_dispatchUncaughtException
  16. 2625 WellKnownClasses::java_lang_Thread_dispatchUncaughtException,
  17. 2626 exception.get());
  18. 2627
  19. 2628 // If the dispatchUncaughtException threw, clear that exception too.
  20. 2629 tlsPtr_.jni_env->ExceptionClear();
  21. 2630 }

// 调用方法:java_lang_Thread_dispatchUncaughtException,分发异常信息

/art/runtime/well_known_classes.cc

  1. // 调用 Thread 的 dispatchUncaughtException 方法
  2. 421 java_lang_Thread_dispatchUncaughtException = CacheMethod(env, java_lang_Thread, false, "dispatchUncaughtException", "(Ljava/lang/Throwable;)V");

综上从虚拟机调用了  Thread 的 dispatchUncaughtException 方法

回到 RuntimeInit.java

/frameworks/base/core/java/com/android/internal/os/RuntimeInit.java

  1. 135 public KillApplicationHandler(LoggingHandler loggingHandler) {
  2. 136 this.mLoggingHandler = Objects.requireNonNull(loggingHandler);
  3. 137 }
  4. 138
  5. // 执行 uncaughtException 方法
  6. 139 @Override
  7. 140 public void uncaughtException(Thread t, Throwable e) {
  8. 141 try {
  9. // 1)先执行 ensureLogging 打印crash log 方法
  10. 142 ensureLogging(t, e);
  11. 143
  12. 144 // Don't re-enter -- avoid infinite loops if crash-reporting crashes.
  13. 145 if (mCrashing) return;
  14. 146 mCrashing = true;
  15. 151 if (ActivityThread.currentActivityThread() != null) {
  16. 152 ActivityThread.currentActivityThread().stopProfiling();
  17. 153 }
  18. 154
  19. 155 // Bring up crash dialog, wait for it to be dismissed
  20. // 2)ams 去执行app crash 的方法:handleApplicationCrash
  21. 156 ActivityManager.getService().handleApplicationCrash(
  22. 157 mApplicationObject, new ApplicationErrorReport.ParcelableCrashInfo(e));
  23. 158 } catch (Throwable t2) {
  24. 159 if (t2 instanceof DeadObjectException) {
  25. 160 // System process is dead; ignore
  26. 161 } else {
  27. 162 try {
  28. 163 Clog_e(TAG, "Error reporting crash", t2);
  29. 164 } catch (Throwable t3) {
  30. 165 // Even Clog_e() fails! Oh well.
  31. 166 }
  32. 167 }
  33. 168 } finally {
  34. // 3)最后杀掉自己的进程:killProcess
  35. 169 // Try everything to make sure this process goes away.
  36. 170 Process.killProcess(Process.myPid());
  37. 171 System.exit(10);
  38. 172 }
  39. 173 }

// 1)先执行 ensureLogging 打印crash log 方法

  1. 192 private void ensureLogging(Thread t, Throwable e) {
  2. 193 if (!mLoggingHandler.mTriggered) {
  3. 194 try {
  4. // mLoggingHandler 是对象 LoggingHandler ,执行 uncaughtException 方法
  5. 195 mLoggingHandler.uncaughtException(t, e);
  6. 196 } catch (Throwable loggingThrowable) {
  7. 197 // Ignored.
  8. 198 }
  9. 199 }
  10. 200 }
  11. ============
  12. 93 private static class LoggingHandler implements Thread.UncaughtExceptionHandler {
  13. 94 public volatile boolean mTriggered = false;
  14. 95
  15. 96 @Override
  16. 97 public void uncaughtException(Thread t, Throwable e) {
  17. 98 mTriggered = true;
  18. 99
  19. 100 // Don't re-enter if KillApplicationHandler has already run
  20. 101 if (mCrashing) return;
  21. 102
  22. 103 // mApplicationObject is null for non-zygote java programs (e.g. "am")
  23. 104 // There are also apps running with the system UID. We don't want the
  24. 105 // first clause in either of these two cases, only for system_server.
  25. // 如果是系统进程的话
  26. 106 if (mApplicationObject == null && (Process.SYSTEM_UID == Process.myUid())) {
  27. 107 Clog_e(TAG, "*** FATAL EXCEPTION IN SYSTEM PROCESS: " + t.getName(), e);
  28. 108 } else {
  29. // app 进程走 logUncaught
  30. 109 logUncaught(t.getName(), ActivityThread.currentProcessName(), Process.myPid(), e);
  31. 110 }
  32. 111 }
  33. 112 }

// app 进程走 logUncaught

  1. // 打印线程名字,进程名,pid 进程号,e 抛出的异常
  2. 75 public static void logUncaught(String threadName, String processName, int pid, Throwable e) {
  3. 76 StringBuilder message = new StringBuilder();
  4. 77 // The "FATAL EXCEPTION" string is still used on Android even though
  5. 78 // apps can set a custom UncaughtExceptionHandler that renders uncaught
  6. 79 // exceptions non-fatal.
  7. 80 message.append("FATAL EXCEPTION: ").append(threadName).append("\n");
  8. 81 if (processName != null) {
  9. 82 message.append("Process: ").append(processName).append(", ");
  10. 83 }
  11. 84 message.append("PID: ").append(pid);
  12. 85 Clog_e(TAG, message.toString(), e);
  13. 86 }
  14. ----------
  15. 71 private static int Clog_e(String tag, String msg, Throwable tr) {
  16. 72 return Log.printlns(Log.LOG_ID_CRASH, Log.ERROR, tag, msg, tr);
  17. 73 }

// 主进程crash,进程名为:com.android.phone,进程号为:28772

C086FC8  02-01 22:50:42.203 28772 28772 E AndroidRuntime: FATAL EXCEPTION: main
C086FC8  02-01 22:50:42.203 28772 28772 E AndroidRuntime: Process: com.android.phone, PID: 28772

// 2)ams 去执行app crash 的方法:handleApplicationCrash

/frameworks/base/services/core/java/com/android/server/am/ActivityManagerService.java

  1. 8369 public void handleApplicationCrash(IBinder app,
  2. 8370 ApplicationErrorReport.ParcelableCrashInfo crashInfo) {
  3. 8371 ProcessRecord r = findAppProcess(app, "Crash");
  4. 8372 final String processName = app == null ? "system_server"
  5. 8373 : (r == null ? "unknown" : r.processName);
  6. 8374
  7. 8375 handleApplicationCrashInner("crash", r, processName, crashInfo);
  8. 8376 }
  9. ============
  10. // 执行 handleApplicationCrashInner 方法

// 会打印下列log

S086FD5  02-01 22:50:42.220   856  3017 W ActivityManager: Process com.android.phone has crashed too many times, killing! Reason: crashed quickly
E086FD6  02-01 22:50:42.221   856  3017 I am_process_crashed_too_much: [0,com.android.phone,1001]

// 3)最后杀掉自己的进程:killProcess:发送signal 9去杀掉进程

/frameworks/base/core/java/android/os/Process.java

  1. 585 public static final int SIGNAL_KILL = 9;
  2. 1302 public static final void killProcess(int pid) {
  3. // 发送signal 9去杀掉进程
  4. 1303 sendSignal(pid, SIGNAL_KILL);
  5. 1304 }
  6. 1318 public static final native void sendSignal(int pid, int signal);

1. 救援的级别

  1. //什么也不做
  2. static final int LEVEL_NONE = 0;
  3. //主要针对非系统进程的属性设置进行重置
  4. static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
  5. //针对非系统进程属性,来自系统默认的属性重置,其他删除
  6. static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
  7. //所有进程系统默认的属性重置,其他删除
  8. static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
  9. //重启设备
  10. static final int LEVEL_WARM_REBOOT = 4;
  11. //尝试恢复出厂设置
  12. static final int LEVEL_FACTORY_RESET = 5;

2. 触发场景:

(1)永久性系统应用在 30 秒内崩溃 5 次以上调整一次级别。(Android 12 默认为60秒内5次)

(2)system_server 在 5 分钟内重启 5 次以上调整一次级别。(Android 12 为10分钟内5次)

当检测到上述某种情况时,救援程序会将其上报给下一救援级别、处理与该级别相关联的任务,并让设备继续运行,看看能否恢复。清除或重置内容的程度随级别而增加。最高级别会提示用户将设备恢复出厂设置。
 

(1)永久性系统应用在 60 秒内崩溃 5 次以上调整一次级别。(Android 12 默认为60秒内5次)

比如phone 进程短时间crash 多次

ResucParty机制 - 掘金

/frameworks/base/services/core/java/com/android/server/am/AppErrors.java

  1. 575 private void crashApplicationInner(ProcessRecord r, ApplicationErrorReport.CrashInfo crashInfo,
  2. 576 int callingPid, int callingUid) {
  3. 577 long timeMillis = System.currentTimeMillis();
  4. 578 String shortMsg = crashInfo.exceptionClassName;
  5. 579 String longMsg = crashInfo.exceptionMessage;
  6. 580 String stackTrace = crashInfo.stackTrace;
  7. 581 if (shortMsg != null && longMsg != null) {
  8. 582 longMsg = shortMsg + ": " + longMsg;
  9. 583 } else if (shortMsg != null) {
  10. 584 longMsg = shortMsg;
  11. 585 }
  12. 586
  13. 587 if (r != null) {
  14. 588 mPackageWatchdog.onPackageFailure(r.getPackageListWithVersionCode(),
  15. 589 PackageWatchdog.FAILURE_REASON_APP_CRASH);
  16. 590
  17. 591 synchronized (mService) {
  18. 592 mService.mProcessList.noteAppKill(r, (crashInfo != null
  19. 593 && "Native crash".equals(crashInfo.exceptionClassName))
  20. 594 ? ApplicationExitInfo.REASON_CRASH_NATIVE
  21. 595 : ApplicationExitInfo.REASON_CRASH,
  22. 596 ApplicationExitInfo.SUBREASON_UNKNOWN,
  23. 597 "crash");
  24. 598 }
  25. 599 }
  26. 600
  27. 601 final int relaunchReason = r != null

其中:r.getPackageListWithVersionCode 可以通过dump 获取到

 /frameworks/base/services/core/java/com/android/server/am/PackageList.java

  1. 141 List<VersionedPackage> getPackageListWithVersionCode() {
  2. 142 synchronized (this) {
  3. 143 int size = mPkgList.size();
  4. 144 if (size == 0) {
  5. 145 return null;
  6. 146 }
  7. 147 List<VersionedPackage> list = new ArrayList<>();
  8. 148 for (int i = 0; i < size; i++) {
  9. 149 list.add(new VersionedPackage(mPkgList.keyAt(i), mPkgList.valueAt(i).appVersion));
  10. 150 }
  11. 151 return list;
  12. 152 }
  13. 153 }
  14. 154
  15. 155 void dump(PrintWriter pw, String prefix) {
  16. 156 synchronized (this) {
  17. 157 pw.print(prefix); pw.print("packageList={");
  18. 158 for (int i = 0, size = mPkgList.size(); i < size; i++) {
  19. 159 if (i > 0) pw.print(", ");
  20. 160 pw.print(mPkgList.keyAt(i));
  21. 161 }
  22. 162 pw.println("}");
  23. 163 }
  24. 164 }
  25. 165 }
  26. 166

打印 packageList 输出信息是该进程所有的包名,使用命令为:adb shell dumpsys activity processes 

 packageList={com.android.providers.telephony, com.sprd.omacp, com.android.ons, com.android.stk, com.android.unisoc.telephony.server, com.spreadtrum.ims, com.unisoc.phone, com.android.phone, com.sprd.ImsConnectionManager}

调用 PackageWatchdog 的  onPackageFailure 方法

/frameworks/base/services/core/java/com/android/server/PackageWatchdog.java

  1. 391 public void onPackageFailure(List<VersionedPackage> packages,
  2. 392 @FailureReasons int failureReason) {
  3. 393 if (packages == null) {
  4. 394 Slog.w(TAG, "Could not resolve a list of failing packages");
  5. 395 return;
  6. 396 }
  7. 397 mLongTaskHandler.post(() -> {
  8. 398 synchronized (mLock) {
  9. 399 if (mAllObservers.isEmpty()) {
  10. 400 return;
  11. 401 }
  12. // fail 原因是 FAILURE_REASON_APP_CRASH,不满足下列条件,走else 逻辑
  13. 402 boolean requiresImmediateAction = (failureReason == FAILURE_REASON_NATIVE_CRASH
  14. 403 || failureReason == FAILURE_REASON_EXPLICIT_HEALTH_CHECK);
  15. 404 if (requiresImmediateAction) {
  16. 405 handleFailureImmediately(packages, failureReason);
  17. 406 } else {
  18. // 走下列条件:
  19. // 如上分析,packages ,phone 进程有多个包名。遍历所有的包名
  20. 407 for (int pIndex = 0; pIndex < packages.size(); pIndex++) {
  21. 408 VersionedPackage versionedPackage = packages.get(pIndex);
  22. 409 // Observer that will receive failure for versionedPackage
  23. 410 PackageHealthObserver currentObserverToNotify = null;
  24. 411 int currentObserverImpact = Integer.MAX_VALUE;
  25. 412 MonitoredPackage currentMonitoredPackage = null;
  26. 413
  27. 414 // Find observer with least user impact
  28. // 遍历所有的观察者,调用了方法 registerHealthObserver
  29. 415 for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) {
  30. 416 ObserverInternal observer = mAllObservers.valueAt(oIndex);
  31. 417 PackageHealthObserver registeredObserver = observer.registeredObserver;
  32. // 1) 调用 observer 为 ObserverInternal 的 onPackageFailureLocked 方法
  33. 418 if (registeredObserver != null
  34. 419 && observer.onPackageFailureLocked(
  35. 420 versionedPackage.getPackageName())) {
  36. 421 MonitoredPackage p = observer.getMonitoredPackage(
  37. 422 versionedPackage.getPackageName());
  38. 423 int mitigationCount = 1;
  39. 424 if (p != null) {
  40. // 获取到 getMitigationCountLocked 的值为0,所以 mitigationCount 的值为 1
  41. 425 mitigationCount = p.getMitigationCountLocked() + 1;
  42. 426 }
  43. // 2)调用RescueParty.java 的方法onHealthCheckFailed
  44. // 该方法分析的结果是返回 1
  45. 427 int impact = registeredObserver.onHealthCheckFailed(
  46. 428 versionedPackage, failureReason, mitigationCount);
  47. 429 if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
  48. 430 && impact < currentObserverImpact) {
  49. 431 currentObserverToNotify = registeredObserver;
  50. // 重新设置下 currentObserverImpact 为 1
  51. 432 currentObserverImpact = impact;
  52. // 缓存当前的 MonitoredPackage
  53. 433 currentMonitoredPackage = p;
  54. 434 }
  55. 435 }
  56. 436 }
  57. 437
  58. 438 // Execute action with least user impact
  59. 439 if (currentObserverToNotify != null) {
  60. 440 int mitigationCount = 1;
  61. // noteMitigationCallLocked 通知增加 mitigationCount 的值为 1
  62. 441 if (currentMonitoredPackage != null) {
  63. 442 currentMonitoredPackage.noteMitigationCallLocked();
  64. 443 mitigationCount =
  65. 444 currentMonitoredPackage.getMitigationCountLocked();
  66. 445 }
  67. // mitigationCount 的值为 1
  68. // 3)调用RescueParty.java 的方法execute,mitigationCount 的值为 1
  69. 446 currentObserverToNotify.execute(versionedPackage,
  70. 447 failureReason, mitigationCount);
  71. 448 }
  72. 449 }
  73. 450 }
  74. 451 }
  75. 452 });
  76. 453 }
  77. ============
  78. // registerHealthObserver 方法,在
  79. 279 public void registerHealthObserver(PackageHealthObserver observer) {
  80. 280 synchronized (mLock) {
  81. 281 ObserverInternal internalObserver = mAllObservers.get(observer.getName());
  82. 282 if (internalObserver != null) {
  83. 283 internalObserver.registeredObserver = observer;
  84. 284 } else {
  85. // 在 PackageWatchdog.getInstance(context).registerHealthObserver(
  86. 126 RescuePartyObserver.getInstance(context));
  87. // PackageWatchdog 设置 registeredObserver 为 RescuePartyObserver
  88. 285 internalObserver = new ObserverInternal(observer.getName(), new ArrayList<>());
  89. 286 internalObserver.registeredObserver = observer;
  90. 287 mAllObservers.put(observer.getName(), internalObserver);
  91. 288 syncState("added new observer");
  92. 289 }
  93. 290 }
  94. 291 }

// 1) 调用 observer 为 ObserverInternal 的 onPackageFailureLocked 方法

  1. 1236 @GuardedBy("mLock")
  2. 1237 public boolean onPackageFailureLocked(String packageName) {
  3. // RescueParty.java 返回的 isPersistent 是为true
  4. 1238 if (getMonitoredPackage(packageName) == null && registeredObserver.isPersistent()
  5. // mayObservePackage方法会判断该app 是否是常驻进程
  6. 1239 && registeredObserver.mayObservePackage(packageName)) {
  7. // 接下来调用 PackageWatchdog.newMonitoredPackage
  8. 1240 putMonitoredPackage(sPackageWatchdog.newMonitoredPackage(
  9. 1241 packageName, DEFAULT_OBSERVING_DURATION_MS, false));
  10. 1242 }
  11. 1243 MonitoredPackage p = getMonitoredPackage(packageName);
  12. 1244 if (p != null) {
  13. // 然后调用 MonitoredPackage 的 onFailureLocked 方法
  14. 1245 return p.onFailureLocked();
  15. 1246 }
  16. 1247 return false;
  17. 1248 }
  18. 1249

// mayObservePackage方法会判断该app 是否是常驻进程

/frameworks/base/services/core/java/com/android/server/RescueParty.java

  1. 652 @Override
  2. 653 public boolean mayObservePackage(String packageName) {
  3. 654 PackageManager pm = mContext.getPackageManager();
  4. 655 try {
  5. 656 // A package is a module if this is non-null
  6. 657 if (pm.getModuleInfo(packageName, 0) != null) {
  7. 658 return true;
  8. 659 }
  9. 660 } catch (PackageManager.NameNotFoundException ignore) {
  10. 661 }
  11. 662
  12. // 判断是常驻进程才会返回为 true
  13. 663 return isPersistentSystemApp(packageName);
  14. 664 }
  15. ============
  16. 702 private boolean isPersistentSystemApp(@NonNull String packageName) {
  17. 703 PackageManager pm = mContext.getPackageManager();
  18. 704 try {
  19. 705 ApplicationInfo info = pm.getApplicationInfo(packageName, 0);
  20. 706 return (info.flags & PERSISTENT_MASK) == PERSISTENT_MASK;
  21. 707 } catch (PackageManager.NameNotFoundException e) {
  22. 708 return false;
  23. 709 }
  24. 710 }

// 接下来调用 PackageWatchdog.newMonitoredPackage

  1. // 观察的时间为 2 分钟, 为 durationMs
  2. 122 static final long DEFAULT_OBSERVING_DURATION_MS = TimeUnit.DAYS.toMillis(2);
  3. 1360 MonitoredPackage newMonitoredPackage(
  4. 1361 String name, long durationMs, boolean hasPassedHealthCheck) {
  5. 1362 return newMonitoredPackage(name, durationMs, Long.MAX_VALUE, hasPassedHealthCheck,
  6. 1363 new LongArrayQueue());
  7. 1364 }
  8. 1365
  9. 1366 MonitoredPackage newMonitoredPackage(String name, long durationMs, long healthCheckDurationMs,
  10. 1367 boolean hasPassedHealthCheck, LongArrayQueue mitigationCalls) {
  11. // 创建了 MonitoredPackage 对象
  12. 1368 return new MonitoredPackage(name, durationMs, healthCheckDurationMs,
  13. 1369 hasPassedHealthCheck, mitigationCalls);
  14. 1370 }
  15. ==========
  16. // MonitoredPackage 构造函数
  17. 1421 MonitoredPackage(String packageName, long durationMs,
  18. 1422 long healthCheckDurationMs, boolean hasPassedHealthCheck,
  19. 1423 LongArrayQueue mitigationCalls) {
  20. 1424 mPackageName = packageName;
  21. 1425 mDurationMs = durationMs;
  22. 1426 mHealthCheckDurationMs = healthCheckDurationMs;
  23. 1427 mHasPassedHealthCheck = hasPassedHealthCheck;
  24. 1428 mMitigationCalls = mitigationCalls;
  25. 1429 updateHealthCheckStateLocked();
  26. 1430 }

// 然后调用 MonitoredPackage 的 onFailureLocked 方法

  1. 1450 @GuardedBy("mLock")
  2. 1451 public boolean onFailureLocked() {
  3. 1452 // Sliding window algorithm: find out if there exists a window containing failures >=
  4. 1453 // mTriggerFailureCount.
  5. // 首先获取当前的时间
  6. 1454 final long now = mSystemClock.uptimeMillis();
  7. / 首先将当前的时间保存到 mFailureHistory
  8. 1455 mFailureHistory.addLast(now);
  9. // mTriggerFailureDurationMs 的值是为 1 分钟的
  10. // 如果下一次产生crash 的时间大于 1 分钟,则移除
  11. 1456 while (now - mFailureHistory.peekFirst() > mTriggerFailureDurationMs) {
  12. 1457 // Prune values falling out of the window
  13. 1458 mFailureHistory.removeFirst();
  14. 1459 }
  15. // 产生fail 的次数为 5 次。
  16. // 如果产生crash 的次数超过 5 次的话,则设置failed 为 true,返回值也是为true
  17. 1460 boolean failed = mFailureHistory.size() >= mTriggerFailureCount;
  18. 1461 if (failed) {
  19. 1462 mFailureHistory.clear();
  20. 1463 }
  21. 1464 return failed;
  22. 1465 }
  23. ===========
  24. 197 private int mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS;
  25. 116 static final int DEFAULT_TRIGGER_FAILURE_DURATION_MS =
  26. 117 (int) TimeUnit.MINUTES.toMillis(1);
  27. ------
  28. 198 @GuardedBy("mLock")
  29. 199 private int mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT;
  30. 120 static final int DEFAULT_TRIGGER_FAILURE_COUNT = 5;

  1. 418 if (registeredObserver != null
  2. 419 && observer.onPackageFailureLocked(
  3. 420 versionedPackage.getPackageName())) {

综上,上述代码判断条件为 true,需要满足  该常驻进程1 分钟内 crash  5 次以上。

// 2)调用RescueParty.java 的方法onHealthCheckFailed

/frameworks/base/services/core/java/com/android/server/RescueParty.java

  1. 617 @Override
  2. 618 public int onHealthCheckFailed(@Nullable VersionedPackage failedPackage,
  3. 619 @FailureReasons int failureReason, int mitigationCount) {
  4. // isDisabled 有一些配置看是否设置进入到recovery
  5. // failureReason 的值为 FAILURE_REASON_APP_CRASH
  6. 620 if (!isDisabled() && (failureReason == PackageWatchdog.FAILURE_REASON_APP_CRASH
  7. 621 || failureReason == PackageWatchdog.FAILURE_REASON_APP_NOT_RESPONDING)) {
  8. // mayPerformFactoryReset 返回的值为 true
  9. 622 return mapRescueLevelToUserImpact(getRescueLevel(mitigationCount,
  10. 623 mayPerformFactoryReset(failedPackage)));
  11. 624 } else {
  12. 625 return PackageHealthObserverImpact.USER_IMPACT_NONE;
  13. 626 }
  14. 627 }
  15. =============
  16. // 获取到救援模式的级别,当前的级别是为 1 的,所以进入到 LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS
  17. 346 private static int getRescueLevel(int mitigationCount, boolean mayPerformFactoryReset) {
  18. 347 if (mitigationCount == 1) {
  19. 348 return LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS;
  20. 349 } else if (mitigationCount == 2) {
  21. 350 return LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES;
  22. 351 } else if (mitigationCount == 3) {
  23. 352 return LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS;
  24. 353 } else if (mitigationCount == 4) {
  25. 354 return Math.min(getMaxRescueLevel(mayPerformFactoryReset), LEVEL_WARM_REBOOT);
  26. 355 } else if (mitigationCount >= 5) {
  27. 356 return Math.min(getMaxRescueLevel(mayPerformFactoryReset), LEVEL_FACTORY_RESET);
  28. 357 } else {
  29. 358 Slog.w(TAG, "Expected positive mitigation count, was " + mitigationCount);
  30. 359 return LEVEL_NONE;
  31. 360 }
  32. 361 }
  33. ============
  34. // 接着执行mapRescueLevelToUserImpact
  35. // 返回的值为 int USER_IMPACT_LOW = 1;
  36. 477 private static int mapRescueLevelToUserImpact(int rescueLevel) {
  37. 478 switch(rescueLevel) {
  38. 479 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
  39. 480 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
  40. 481 return PackageHealthObserverImpact.USER_IMPACT_LOW;
  41. 482 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
  42. 483 case LEVEL_WARM_REBOOT:
  43. 484 case LEVEL_FACTORY_RESET:
  44. 485 return PackageHealthObserverImpact.USER_IMPACT_HIGH;
  45. 486 default:
  46. 487 return PackageHealthObserverImpact.USER_IMPACT_NONE;
  47. 488 }
  48. 489 }

// 3)调用RescueParty.java 的方法execute,mitigationCount 的值为 1

/frameworks/base/services/core/java/com/android/server/RescueParty.java

  1. 629 @Override
  2. 630 public boolean execute(@Nullable VersionedPackage failedPackage,
  3. 631 @FailureReasons int failureReason, int mitigationCount) {
  4. 632 if (isDisabled()) {
  5. 633 return false;
  6. 634 }
  7. 635 if (failureReason == PackageWatchdog.FAILURE_REASON_APP_CRASH
  8. 636 || failureReason == PackageWatchdog.FAILURE_REASON_APP_NOT_RESPONDING) {
  9. // 获取到的level 为 LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS;
  10. 637 final int level = getRescueLevel(mitigationCount,
  11. 638 mayPerformFactoryReset(failedPackage));
  12. 639 executeRescueLevel(mContext,
  13. 640 failedPackage == null ? null : failedPackage.getPackageName(), level);
  14. 641 return true;
  15. 642 } else {
  16. 643 return false;
  17. 644 }
  18. 645 }
  19. ===========
  20. 363 private static void executeRescueLevel(Context context, @Nullable String failedPackage,
  21. 364 int level) {
  22. // 会打印下列的lg
  23. 365 Slog.w(TAG, "Attempting rescue level " + levelToString(level));
  24. 366 try {
  25. // 执行下列方法 executeRescueLevelInternal:level为 LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS
  26. 367 executeRescueLevelInternal(context, level, failedPackage);
  27. // 会打印下列的 event log
  28. 368 EventLogTags.writeRescueSuccess(level);
  29. 369 String successMsg = "Finished rescue level " + levelToString(level);
  30. 370 if (!TextUtils.isEmpty(failedPackage)) {
  31. 371 successMsg += " for package " + failedPackage;
  32. 372 }
  33. 373 logCriticalInfo(Log.DEBUG, successMsg);
  34. 374 } catch (Throwable t) {
  35. 375 logRescueException(level, failedPackage, t);
  36. 376 }
  37. 377 }

// 执行下列方法 executeRescueLevelInternal:level为 LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS

  1. 379 private static void executeRescueLevelInternal(Context context, int level, @Nullable
  2. 380 String failedPackage) throws Exception {
  3. 381 FrameworkStatsLog.write(FrameworkStatsLog.RESCUE_PARTY_RESET_REPORTED, level);
  4. 382 // Try our best to reset all settings possible, and once finished
  5. 383 // rethrow any exception that we encountered
  6. 384 Exception res = null;
  7. 385 Runnable runnable;
  8. 386 Thread thread;
  9. 387 switch (level) {
  10. 388 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
  11. 389 try {
  12. 390 resetAllSettingsIfNecessary(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS,
  13. 391 level);
  14. 392 } catch (Exception e) {
  15. 393 res = e;
  16. 394 }
  17. 395 try {
  18. 396 resetDeviceConfig(context, /*isScoped=*/true, failedPackage);
  19. 397 } catch (Exception e) {
  20. 398 res = e;
  21. 399 }
  22. 400 break;
  23. ---------
  24. 427 case LEVEL_WARM_REBOOT:
  25. 428 // Request the reboot from a separate thread to avoid deadlock on PackageWatchdog
  26. 429 // when device shutting down.
  27. 430 SystemProperties.set(PROP_ATTEMPTING_REBOOT, "true");
  28. 431 runnable = () -> {
  29. 432 try {
  30. // 如果是 LEVEL_WARM_REBOOT,则会去调用pm 的重启:reboot
  31. 433 PowerManager pm = context.getSystemService(PowerManager.class);
  32. 434 if (pm != null) {
  33. 435 pm.reboot(TAG);
  34. 436 }
  35. 437 } catch (Throwable t) {
  36. 438 logRescueException(level, failedPackage, t);
  37. 439 }
  38. 440 };
  39. 441 thread = new Thread(runnable);
  40. 442 thread.start();
  41. 443 break;

(2)system_server 在 5 分钟内重启 5 次以上调整一次级别。(Android 12 为10分钟内5次)

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Li_阴宅/article/detail/769780
推荐阅读
相关标签
  

闽ICP备14008679号