RescueParty机制正是在这个背景下诞生的,当它注意到系统或系统核心组件陷入循环崩溃状态时,就会根据崩溃的程度执行不同的救援行动,以期望让设备恢复到正常使用的状态。
开机后会自动重启,进入Recovery界面。经查找,是由于有一个进程phone进程在系统开机的时候,不断崩溃,触发了RescueParty机制,
Line 529269: E07D549 02-01 22:48:35.910 856 856 I am_crash: [15147,0,com.android.phone,684211789,java.lang.OutOfMemoryError,NULL,Parcel.java,-2,0]
Line 531948: E07DDD3 02-01 22:49:10.425 856 3019 I am_crash: [25787,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 533159: E07E272 02-01 22:49:13.280 856 2861 I am_crash: [25881,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 534403: E07E722 02-01 22:49:16.198 856 5586 I am_crash: [25969,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 535604: E07EBA7 02-01 22:49:18.967 856 1642 I am_crash: [26055,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 536730: E07EFF1 02-01 22:49:21.790 856 2295 I am_crash: [26149,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 537881: E07F454 02-01 22:49:24.643 856 3012 I am_crash: [26231,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 539010: E07F8A1 02-01 22:49:27.549 856 4223 I am_crash: [26315,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 540549: E07FE88 02-01 22:49:30.200 856 1642 I am_crash: [26396,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 541649: E0802B8 02-01 22:49:33.040 856 1718 I am_crash: [26491,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 542844: E080747 02-01 22:49:35.740 856 2344 I am_crash: [26576,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 544058: E080BD9 02-01 22:49:38.501 856 2889 I am_crash: [26677,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 545226: E08104C 02-01 22:49:41.248 856 933 I am_crash: [26814,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 546330: E081480 02-01 22:49:44.017 856 6041 I am_crash: [26960,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 547501: E0818D7 02-01 22:49:46.825 856 3986 I am_crash: [27047,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 548666: E081D28 02-01 22:49:49.657 856 1718 I am_crash: [27127,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 549841: E082183 02-01 22:49:52.626 856 2861 I am_crash: [27210,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 550922: E082590 02-01 22:49:55.352 856 6035 I am_crash: [27291,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 552014: E0829B8 02-01 22:49:58.081 856 2920 I am_crash: [27401,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 553516: E082F7A 02-01 22:50:00.633 856 1719 I am_crash: [27513,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 554659: E0833D5 02-01 22:50:03.484 856 4600 I am_crash: [27600,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 555805: E083833 02-01 22:50:06.021 856 5825 I am_crash: [27684,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 556963: E083C8D 02-01 22:50:08.703 856 1718 I am_crash: [27763,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
Line 558157: E0840FB 02-01 22:50:11.451 856 3017 I am_crash: [27846,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
代码基于安卓 13
0.crash 触发的流程
M086FC7 02-01 22:50:42.202 28772 28772 D AndroidRuntime: Shutting down VM
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: FATAL EXCEPTION: main
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: Process: com.android.phone, PID: 28772
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: java.lang.RuntimeException: Error receiving broadcast Intent { act=android.intent.action.SIM_STATE_CHANGED flg=0x15000010 (has extras) } in com.android.internal.telephony.SimStateTracker$1@c995e01
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.app.LoadedApk$ReceiverDispatcher$Args.lambda$getRunnable$0(LoadedApk.java:1819)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.app.LoadedApk$ReceiverDispatcher$Args.$r8$lambda$gDuJqgxY6Zb-ifyeubKeivTLAwk(Unknown Source:0)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.app.LoadedApk$ReceiverDispatcher$Args$$ExternalSyntheticLambda0.run(Unknown Source:2)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.os.Handler.handleCallback(Handler.java:958)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.os.Handler.dispatchMessage(Handler.java:99)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.os.Looper.loopOnce(Looper.java:205)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.os.Looper.loop(Looper.java:294)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.app.ActivityThread.main(ActivityThread.java:8492)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at java.lang.reflect.Method.invoke(Native Method)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:640)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:1026)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: Caused by: java.util.NoSuchElementException: No value present
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at java.util.Optional.get(Optional.java:144)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at com.android.internal.telephony.SimStateTracker.isUiccEnable(SimStateTracker.java:318)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at com.android.internal.telephony.SimStateTracker.onSimStateChanged(SimStateTracker.java:252)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at com.android.internal.telephony.SimStateTracker.-$$Nest$monSimStateChanged(Unknown Source:0)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at com.android.internal.telephony.SimStateTracker$1.onReceive(SimStateTracker.java:190)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: at android.app.LoadedApk$ReceiverDispatcher$Args.lambda$getRunnable$0(LoadedApk.java:1811)
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: ... 10 more
E086FC9 02-01 22:50:42.206 856 3017 I am_crash: [28772,0,com.android.phone,684211789,java.util.NoSuchElementException,No value present,Optional.java,144,0]
S086FCA 02-01 22:50:42.211 856 3017 D ActivityManager: crashInfo.exceptionClassName = java.util.NoSuchElementException , crashInfo.exceptionMessage = No value present, crashInfo.throwFileName = Optional.java,crashInfo.throwLineNumber = 144
AndroidRuntime: Shutting down VM 的打印是在进程退出的时候打印的。
在app 进程启动的时候,会走 AndroidRuntime.start 方法,然后死循环执行loop handler 方法。在进程退出的时候,去 free
/frameworks/base/core/jni/AndroidRuntime.cpp
1193 void AndroidRuntime::start(const char* className, const Vector<String8>& options, bool zygote)
1194 {
1195 ALOGD(">>>>>> START %s uid %d <<<<<<\n",
1196 className != NULL ? className : "(unknown)", getuid());
1197
1198 static const String8 startSystemServer("start-system-server");
1199 // Whether this is the primary zygote, meaning the zygote which will fork system server.
1200 bool primary_zygote = false;
。。。。。。
1290 char* slashClassName = toSlashClassName(className != NULL ? className : "");
1291 jclass startClass = env->FindClass(slashClassName);
1292 if (startClass == NULL) {
1293 ALOGE("JavaVM unable to locate class '%s'\n", slashClassName);
1294 /* keep going */
1295 } else {
1296 jmethodID startMeth = env->GetStaticMethodID(startClass, "main",
1297 "([Ljava/lang/String;)V");
1298 if (startMeth == NULL) {
1299 ALOGE("JavaVM unable to find main() in '%s'\n", className);
1300 /* keep going */
1301 } else {
// 执行main 方法
1302 env->CallStaticVoidMethod(startClass, startMeth, strArray);
1303
1304 #if 0
1305 if (env->ExceptionCheck())
1306 threadExitUncaughtException(env);
1307 #endif
1308 }
1309 }
1310 free(slashClassName);
1311
// 打印下列的log
1312 ALOGD("Shutting down VM\n");
在执行main 方法的时候,会去监听 app crash 的报错的信息
/frameworks/base/core/java/com/android/internal/os/RuntimeInit.java
339 @UnsupportedAppUsage
340 public static final void main(String[] argv) {
341 preForkInit();
342 if (argv.length == 2 && argv[1].equals("application")) {
343 if (DEBUG) Slog.d(TAG, "RuntimeInit: Starting application");
344 redirectLogStreams();
345 } else {
346 if (DEBUG) Slog.d(TAG, "RuntimeInit: Starting tool");
347 }
348
349 commonInit();
-------------
221 @UnsupportedAppUsage
222 protected static final void commonInit() {
223 if (DEBUG) Slog.d(TAG, "Entered RuntimeInit!");
224
225 /*
226 * set handlers; these apply to all threads in the VM. Apps can replace
227 * the default handler, but not the pre handler.
228 */
// 创建 LoggingHandler 对象
229 LoggingHandler loggingHandler = new LoggingHandler();
230 RuntimeHooks.setUncaughtExceptionPreHandler(loggingHandler);
// 设置捕获异常的handler类为 内部类 KillApplicationHandler:setDefaultUncaughtExceptionHandler
231 Thread.setDefaultUncaughtExceptionHandler(new KillApplicationHandler(loggingHandler));
232
233 /*
// 设置捕获异常的handler类为 内部类 KillApplicationHandler:setDefaultUncaughtExceptionHandler
/libcore/ojluni/src/main/java/java/lang/Thread.java
2193 public static void setDefaultUncaughtExceptionHandler(UncaughtExceptionHandler eh) {
2194 // Android-removed: SecurityManager stubbed out on Android.
2195 /*
2196 SecurityManager sm = System.getSecurityManager();
2197 if (sm != null) {
2198 sm.checkPermission(
2199 new RuntimePermission("setDefaultUncaughtExceptionHandler")
2200 );
2201 }
2202 */
2203
2204 defaultUncaughtExceptionHandler = eh;
2205 }
===========
2215 public static UncaughtExceptionHandler getDefaultUncaughtExceptionHandler(){
2216 return defaultUncaughtExceptionHandler;
2217 }
==========
// 回调 uncaughtException 在如下,调用 dispatchUncaughtException 方法
2293 // Android-changed: Make dispatchUncaughtException() public, for use by tests.
2294 public final void dispatchUncaughtException(Throwable e) {
2295 // BEGIN Android-added: uncaughtExceptionPreHandler for use by platform.
2296 Thread.UncaughtExceptionHandler initialUeh =
2297 Thread.getUncaughtExceptionPreHandler();
2298 if (initialUeh != null) {
2299 try {
2300 initialUeh.uncaughtException(this, e);
2301 } catch (RuntimeException | Error ignored) {
2302 // Throwables thrown by the initial handler are ignored
2303 }
2304 }
2305 // END Android-added: uncaughtExceptionPreHandler for use by platform.
2306 getUncaughtExceptionHandler().uncaughtException(this, e);
2307 }
2308
在 art 虚拟机 进程退出的时候,会去抛出执行错误的异常
/art/runtime/thread.cc
// 进程回收的时候调用
2500 void Thread::Destroy() {
2501 Thread* self = this;
2502 DCHECK_EQ(self, Thread::Current());
2503
2504 if (tlsPtr_.jni_env != nullptr) {
2505 {
2506 ScopedObjectAccess soa(self);
2507 MonitorExitVisitor visitor(self);
2508 // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
2509 tlsPtr_.jni_env->monitors_.VisitRoots(&visitor, RootInfo(kRootVMInternal));
2510 }
2511 // Release locally held global references which releasing may require the mutator lock.
2512 if (tlsPtr_.jpeer != nullptr) {
2513 // If pthread_create fails we don't have a jni env here.
2514 tlsPtr_.jni_env->DeleteGlobalRef(tlsPtr_.jpeer);
2515 tlsPtr_.jpeer = nullptr;
2516 }
2517 if (tlsPtr_.class_loader_override != nullptr) {
2518 tlsPtr_.jni_env->DeleteGlobalRef(tlsPtr_.class_loader_override);
2519 tlsPtr_.class_loader_override = nullptr;
2520 }
2521 }
2522
2523 if (tlsPtr_.opeer != nullptr) {
2524 ScopedObjectAccess soa(self);
2525 // We may need to call user-supplied managed code, do this before final clean-up.
// 执行下列方法:HandleUncaughtExceptions
2526 HandleUncaughtExceptions(soa);
// 执行下列方法:HandleUncaughtExceptions
2612 void Thread::HandleUncaughtExceptions(ScopedObjectAccessAlreadyRunnable& soa) {
2613 if (!IsExceptionPending()) {
2614 return;
2615 }
2616 ScopedLocalRef<jobject> peer(tlsPtr_.jni_env, soa.AddLocalReference<jobject>(tlsPtr_.opeer));
2617 ScopedThreadStateChange tsc(this, ThreadState::kNative);
2618
2619 // Get and clear the exception.
// 获取到抛出异常的信息
2620 ScopedLocalRef<jthrowable> exception(tlsPtr_.jni_env, tlsPtr_.jni_env->ExceptionOccurred());
2621 tlsPtr_.jni_env->ExceptionClear();
2622
2623 // Call the Thread instance's dispatchUncaughtException(Throwable)
2624 tlsPtr_.jni_env->CallVoidMethod(peer.get(),
// 调用方法:java_lang_Thread_dispatchUncaughtException
2625 WellKnownClasses::java_lang_Thread_dispatchUncaughtException,
2626 exception.get());
2627
2628 // If the dispatchUncaughtException threw, clear that exception too.
2629 tlsPtr_.jni_env->ExceptionClear();
2630 }
// 调用方法:java_lang_Thread_dispatchUncaughtException,分发异常信息
/art/runtime/well_known_classes.cc
// 调用 Thread 的 dispatchUncaughtException 方法
421 java_lang_Thread_dispatchUncaughtException = CacheMethod(env, java_lang_Thread, false, "dispatchUncaughtException", "(Ljava/lang/Throwable;)V");
综上从虚拟机调用了 Thread 的 dispatchUncaughtException 方法
回到 RuntimeInit.java
/frameworks/base/core/java/com/android/internal/os/RuntimeInit.java
135 public KillApplicationHandler(LoggingHandler loggingHandler) {
136 this.mLoggingHandler = Objects.requireNonNull(loggingHandler);
137 }
138
// 执行 uncaughtException 方法
139 @Override
140 public void uncaughtException(Thread t, Throwable e) {
141 try {
// 1)先执行 ensureLogging 打印crash log 方法
142 ensureLogging(t, e);
143
144 // Don't re-enter -- avoid infinite loops if crash-reporting crashes.
145 if (mCrashing) return;
146 mCrashing = true;
151 if (ActivityThread.currentActivityThread() != null) {
152 ActivityThread.currentActivityThread().stopProfiling();
153 }
154
155 // Bring up crash dialog, wait for it to be dismissed
// 2)ams 去执行app crash 的方法:handleApplicationCrash
156 ActivityManager.getService().handleApplicationCrash(
157 mApplicationObject, new ApplicationErrorReport.ParcelableCrashInfo(e));
158 } catch (Throwable t2) {
159 if (t2 instanceof DeadObjectException) {
160 // System process is dead; ignore
161 } else {
162 try {
163 Clog_e(TAG, "Error reporting crash", t2);
164 } catch (Throwable t3) {
165 // Even Clog_e() fails! Oh well.
166 }
167 }
168 } finally {
// 3)最后杀掉自己的进程:killProcess
169 // Try everything to make sure this process goes away.
170 Process.killProcess(Process.myPid());
171 System.exit(10);
172 }
173 }
// 1)先执行 ensureLogging 打印crash log 方法
192 private void ensureLogging(Thread t, Throwable e) {
193 if (!mLoggingHandler.mTriggered) {
194 try {
// mLoggingHandler 是对象 LoggingHandler ,执行 uncaughtException 方法
195 mLoggingHandler.uncaughtException(t, e);
196 } catch (Throwable loggingThrowable) {
197 // Ignored.
198 }
199 }
200 }
============
93 private static class LoggingHandler implements Thread.UncaughtExceptionHandler {
94 public volatile boolean mTriggered = false;
95
96 @Override
97 public void uncaughtException(Thread t, Throwable e) {
98 mTriggered = true;
99
100 // Don't re-enter if KillApplicationHandler has already run
101 if (mCrashing) return;
102
103 // mApplicationObject is null for non-zygote java programs (e.g. "am")
104 // There are also apps running with the system UID. We don't want the
105 // first clause in either of these two cases, only for system_server.
// 如果是系统进程的话
106 if (mApplicationObject == null && (Process.SYSTEM_UID == Process.myUid())) {
107 Clog_e(TAG, "*** FATAL EXCEPTION IN SYSTEM PROCESS: " + t.getName(), e);
108 } else {
// app 进程走 logUncaught
109 logUncaught(t.getName(), ActivityThread.currentProcessName(), Process.myPid(), e);
110 }
111 }
112 }
// app 进程走 logUncaught
// 打印线程名字,进程名,pid 进程号,e 抛出的异常
75 public static void logUncaught(String threadName, String processName, int pid, Throwable e) {
76 StringBuilder message = new StringBuilder();
77 // The "FATAL EXCEPTION" string is still used on Android even though
78 // apps can set a custom UncaughtExceptionHandler that renders uncaught
79 // exceptions non-fatal.
80 message.append("FATAL EXCEPTION: ").append(threadName).append("\n");
81 if (processName != null) {
82 message.append("Process: ").append(processName).append(", ");
83 }
84 message.append("PID: ").append(pid);
85 Clog_e(TAG, message.toString(), e);
86 }
----------
71 private static int Clog_e(String tag, String msg, Throwable tr) {
72 return Log.printlns(Log.LOG_ID_CRASH, Log.ERROR, tag, msg, tr);
73 }
// 主进程crash,进程名为:com.android.phone,进程号为:28772
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: FATAL EXCEPTION: main
C086FC8 02-01 22:50:42.203 28772 28772 E AndroidRuntime: Process: com.android.phone, PID: 28772
// 2)ams 去执行app crash 的方法:handleApplicationCrash
/frameworks/base/services/core/java/com/android/server/am/ActivityManagerService.java
8369 public void handleApplicationCrash(IBinder app,
8370 ApplicationErrorReport.ParcelableCrashInfo crashInfo) {
8371 ProcessRecord r = findAppProcess(app, "Crash");
8372 final String processName = app == null ? "system_server"
8373 : (r == null ? "unknown" : r.processName);
8374
8375 handleApplicationCrashInner("crash", r, processName, crashInfo);
8376 }
============
// 执行 handleApplicationCrashInner 方法
// 会打印下列log
S086FD5 02-01 22:50:42.220 856 3017 W ActivityManager: Process com.android.phone has crashed too many times, killing! Reason: crashed quickly
E086FD6 02-01 22:50:42.221 856 3017 I am_process_crashed_too_much: [0,com.android.phone,1001]
// 3)最后杀掉自己的进程:killProcess:发送signal 9去杀掉进程
/frameworks/base/core/java/android/os/Process.java
585 public static final int SIGNAL_KILL = 9;
1302 public static final void killProcess(int pid) {
// 发送signal 9去杀掉进程
1303 sendSignal(pid, SIGNAL_KILL);
1304 }
1318 public static final native void sendSignal(int pid, int signal);
1. 救援的级别
//什么也不做
static final int LEVEL_NONE = 0;
//主要针对非系统进程的属性设置进行重置
static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
//针对非系统进程属性,来自系统默认的属性重置,其他删除
static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
//所有进程系统默认的属性重置,其他删除
static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
//重启设备
static final int LEVEL_WARM_REBOOT = 4;
//尝试恢复出厂设置
static final int LEVEL_FACTORY_RESET = 5;
2. 触发场景:
(1)永久性系统应用在 30 秒内崩溃 5 次以上调整一次级别。(Android 12 默认为60秒内5次)
(2)system_server 在 5 分钟内重启 5 次以上调整一次级别。(Android 12 为10分钟内5次)
当检测到上述某种情况时,救援程序会将其上报给下一救援级别、处理与该级别相关联的任务,并让设备继续运行,看看能否恢复。清除或重置内容的程度随级别而增加。最高级别会提示用户将设备恢复出厂设置。
(1)永久性系统应用在 60 秒内崩溃 5 次以上调整一次级别。(Android 12 默认为60秒内5次)
比如phone 进程短时间crash 多次
ResucParty机制 - 掘金
/frameworks/base/services/core/java/com/android/server/am/AppErrors.java
575 private void crashApplicationInner(ProcessRecord r, ApplicationErrorReport.CrashInfo crashInfo,
576 int callingPid, int callingUid) {
577 long timeMillis = System.currentTimeMillis();
578 String shortMsg = crashInfo.exceptionClassName;
579 String longMsg = crashInfo.exceptionMessage;
580 String stackTrace = crashInfo.stackTrace;
581 if (shortMsg != null && longMsg != null) {
582 longMsg = shortMsg + ": " + longMsg;
583 } else if (shortMsg != null) {
584 longMsg = shortMsg;
585 }
586
587 if (r != null) {
588 mPackageWatchdog.onPackageFailure(r.getPackageListWithVersionCode(),
589 PackageWatchdog.FAILURE_REASON_APP_CRASH);
590
591 synchronized (mService) {
592 mService.mProcessList.noteAppKill(r, (crashInfo != null
593 && "Native crash".equals(crashInfo.exceptionClassName))
594 ? ApplicationExitInfo.REASON_CRASH_NATIVE
595 : ApplicationExitInfo.REASON_CRASH,
596 ApplicationExitInfo.SUBREASON_UNKNOWN,
597 "crash");
598 }
599 }
600
601 final int relaunchReason = r != null
其中:r.getPackageListWithVersionCode 可以通过dump 获取到
/frameworks/base/services/core/java/com/android/server/am/PackageList.java
141 List<VersionedPackage> getPackageListWithVersionCode() {
142 synchronized (this) {
143 int size = mPkgList.size();
144 if (size == 0) {
145 return null;
146 }
147 List<VersionedPackage> list = new ArrayList<>();
148 for (int i = 0; i < size; i++) {
149 list.add(new VersionedPackage(mPkgList.keyAt(i), mPkgList.valueAt(i).appVersion));
150 }
151 return list;
152 }
153 }
154
155 void dump(PrintWriter pw, String prefix) {
156 synchronized (this) {
157 pw.print(prefix); pw.print("packageList={");
158 for (int i = 0, size = mPkgList.size(); i < size; i++) {
159 if (i > 0) pw.print(", ");
160 pw.print(mPkgList.keyAt(i));
161 }
162 pw.println("}");
163 }
164 }
165 }
166
打印 packageList 输出信息是该进程所有的包名,使用命令为:adb shell dumpsys activity processes
packageList={com.android.providers.telephony, com.sprd.omacp, com.android.ons, com.android.stk, com.android.unisoc.telephony.server, com.spreadtrum.ims, com.unisoc.phone, com.android.phone, com.sprd.ImsConnectionManager}
调用 PackageWatchdog 的 onPackageFailure 方法
/frameworks/base/services/core/java/com/android/server/PackageWatchdog.java
391 public void onPackageFailure(List<VersionedPackage> packages,
392 @FailureReasons int failureReason) {
393 if (packages == null) {
394 Slog.w(TAG, "Could not resolve a list of failing packages");
395 return;
396 }
397 mLongTaskHandler.post(() -> {
398 synchronized (mLock) {
399 if (mAllObservers.isEmpty()) {
400 return;
401 }
// fail 原因是 FAILURE_REASON_APP_CRASH,不满足下列条件,走else 逻辑
402 boolean requiresImmediateAction = (failureReason == FAILURE_REASON_NATIVE_CRASH
403 || failureReason == FAILURE_REASON_EXPLICIT_HEALTH_CHECK);
404 if (requiresImmediateAction) {
405 handleFailureImmediately(packages, failureReason);
406 } else {
// 走下列条件:
// 如上分析,packages ,phone 进程有多个包名。遍历所有的包名
407 for (int pIndex = 0; pIndex < packages.size(); pIndex++) {
408 VersionedPackage versionedPackage = packages.get(pIndex);
409 // Observer that will receive failure for versionedPackage
410 PackageHealthObserver currentObserverToNotify = null;
411 int currentObserverImpact = Integer.MAX_VALUE;
412 MonitoredPackage currentMonitoredPackage = null;
413
414 // Find observer with least user impact
// 遍历所有的观察者,调用了方法 registerHealthObserver
415 for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) {
416 ObserverInternal observer = mAllObservers.valueAt(oIndex);
417 PackageHealthObserver registeredObserver = observer.registeredObserver;
// 1) 调用 observer 为 ObserverInternal 的 onPackageFailureLocked 方法
418 if (registeredObserver != null
419 && observer.onPackageFailureLocked(
420 versionedPackage.getPackageName())) {
421 MonitoredPackage p = observer.getMonitoredPackage(
422 versionedPackage.getPackageName());
423 int mitigationCount = 1;
424 if (p != null) {
// 获取到 getMitigationCountLocked 的值为0,所以 mitigationCount 的值为 1
425 mitigationCount = p.getMitigationCountLocked() + 1;
426 }
// 2)调用RescueParty.java 的方法onHealthCheckFailed
// 该方法分析的结果是返回 1
427 int impact = registeredObserver.onHealthCheckFailed(
428 versionedPackage, failureReason, mitigationCount);
429 if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
430 && impact < currentObserverImpact) {
431 currentObserverToNotify = registeredObserver;
// 重新设置下 currentObserverImpact 为 1
432 currentObserverImpact = impact;
// 缓存当前的 MonitoredPackage
433 currentMonitoredPackage = p;
434 }
435 }
436 }
437
438 // Execute action with least user impact
439 if (currentObserverToNotify != null) {
440 int mitigationCount = 1;
// noteMitigationCallLocked 通知增加 mitigationCount 的值为 1
441 if (currentMonitoredPackage != null) {
442 currentMonitoredPackage.noteMitigationCallLocked();
443 mitigationCount =
444 currentMonitoredPackage.getMitigationCountLocked();
445 }
// mitigationCount 的值为 1
// 3)调用RescueParty.java 的方法execute,mitigationCount 的值为 1
446 currentObserverToNotify.execute(versionedPackage,
447 failureReason, mitigationCount);
448 }
449 }
450 }
451 }
452 });
453 }
============
// registerHealthObserver 方法,在
279 public void registerHealthObserver(PackageHealthObserver observer) {
280 synchronized (mLock) {
281 ObserverInternal internalObserver = mAllObservers.get(observer.getName());
282 if (internalObserver != null) {
283 internalObserver.registeredObserver = observer;
284 } else {
// 在 PackageWatchdog.getInstance(context).registerHealthObserver(
126 RescuePartyObserver.getInstance(context));
// PackageWatchdog 设置 registeredObserver 为 RescuePartyObserver
285 internalObserver = new ObserverInternal(observer.getName(), new ArrayList<>());
286 internalObserver.registeredObserver = observer;
287 mAllObservers.put(observer.getName(), internalObserver);
288 syncState("added new observer");
289 }
290 }
291 }
// 1) 调用 observer 为 ObserverInternal 的 onPackageFailureLocked 方法
1236 @GuardedBy("mLock")
1237 public boolean onPackageFailureLocked(String packageName) {
// RescueParty.java 返回的 isPersistent 是为true
1238 if (getMonitoredPackage(packageName) == null && registeredObserver.isPersistent()
// mayObservePackage方法会判断该app 是否是常驻进程
1239 && registeredObserver.mayObservePackage(packageName)) {
// 接下来调用 PackageWatchdog.newMonitoredPackage
1240 putMonitoredPackage(sPackageWatchdog.newMonitoredPackage(
1241 packageName, DEFAULT_OBSERVING_DURATION_MS, false));
1242 }
1243 MonitoredPackage p = getMonitoredPackage(packageName);
1244 if (p != null) {
// 然后调用 MonitoredPackage 的 onFailureLocked 方法
1245 return p.onFailureLocked();
1246 }
1247 return false;
1248 }
1249
// mayObservePackage方法会判断该app 是否是常驻进程
/frameworks/base/services/core/java/com/android/server/RescueParty.java
652 @Override
653 public boolean mayObservePackage(String packageName) {
654 PackageManager pm = mContext.getPackageManager();
655 try {
656 // A package is a module if this is non-null
657 if (pm.getModuleInfo(packageName, 0) != null) {
658 return true;
659 }
660 } catch (PackageManager.NameNotFoundException ignore) {
661 }
662
// 判断是常驻进程才会返回为 true
663 return isPersistentSystemApp(packageName);
664 }
============
702 private boolean isPersistentSystemApp(@NonNull String packageName) {
703 PackageManager pm = mContext.getPackageManager();
704 try {
705 ApplicationInfo info = pm.getApplicationInfo(packageName, 0);
706 return (info.flags & PERSISTENT_MASK) == PERSISTENT_MASK;
707 } catch (PackageManager.NameNotFoundException e) {
708 return false;
709 }
710 }
// 接下来调用 PackageWatchdog.newMonitoredPackage
// 观察的时间为 2 分钟, 为 durationMs
122 static final long DEFAULT_OBSERVING_DURATION_MS = TimeUnit.DAYS.toMillis(2);
1360 MonitoredPackage newMonitoredPackage(
1361 String name, long durationMs, boolean hasPassedHealthCheck) {
1362 return newMonitoredPackage(name, durationMs, Long.MAX_VALUE, hasPassedHealthCheck,
1363 new LongArrayQueue());
1364 }
1365
1366 MonitoredPackage newMonitoredPackage(String name, long durationMs, long healthCheckDurationMs,
1367 boolean hasPassedHealthCheck, LongArrayQueue mitigationCalls) {
// 创建了 MonitoredPackage 对象
1368 return new MonitoredPackage(name, durationMs, healthCheckDurationMs,
1369 hasPassedHealthCheck, mitigationCalls);
1370 }
==========
// MonitoredPackage 构造函数
1421 MonitoredPackage(String packageName, long durationMs,
1422 long healthCheckDurationMs, boolean hasPassedHealthCheck,
1423 LongArrayQueue mitigationCalls) {
1424 mPackageName = packageName;
1425 mDurationMs = durationMs;
1426 mHealthCheckDurationMs = healthCheckDurationMs;
1427 mHasPassedHealthCheck = hasPassedHealthCheck;
1428 mMitigationCalls = mitigationCalls;
1429 updateHealthCheckStateLocked();
1430 }
// 然后调用 MonitoredPackage 的 onFailureLocked 方法
1450 @GuardedBy("mLock")
1451 public boolean onFailureLocked() {
1452 // Sliding window algorithm: find out if there exists a window containing failures >=
1453 // mTriggerFailureCount.
// 首先获取当前的时间
1454 final long now = mSystemClock.uptimeMillis();
/ 首先将当前的时间保存到 mFailureHistory
1455 mFailureHistory.addLast(now);
// mTriggerFailureDurationMs 的值是为 1 分钟的
// 如果下一次产生crash 的时间大于 1 分钟,则移除
1456 while (now - mFailureHistory.peekFirst() > mTriggerFailureDurationMs) {
1457 // Prune values falling out of the window
1458 mFailureHistory.removeFirst();
1459 }
// 产生fail 的次数为 5 次。
// 如果产生crash 的次数超过 5 次的话,则设置failed 为 true,返回值也是为true 的
1460 boolean failed = mFailureHistory.size() >= mTriggerFailureCount;
1461 if (failed) {
1462 mFailureHistory.clear();
1463 }
1464 return failed;
1465 }
===========
197 private int mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS;
116 static final int DEFAULT_TRIGGER_FAILURE_DURATION_MS =
117 (int) TimeUnit.MINUTES.toMillis(1);
------
198 @GuardedBy("mLock")
199 private int mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT;
120 static final int DEFAULT_TRIGGER_FAILURE_COUNT = 5;
418 if (registeredObserver != null
419 && observer.onPackageFailureLocked(
420 versionedPackage.getPackageName())) {
综上,上述代码判断条件为 true,需要满足 该常驻进程1 分钟内 crash 5 次以上。
// 2)调用RescueParty.java 的方法onHealthCheckFailed
/frameworks/base/services/core/java/com/android/server/RescueParty.java
617 @Override
618 public int onHealthCheckFailed(@Nullable VersionedPackage failedPackage,
619 @FailureReasons int failureReason, int mitigationCount) {
// isDisabled 有一些配置看是否设置进入到recovery
// failureReason 的值为 FAILURE_REASON_APP_CRASH
620 if (!isDisabled() && (failureReason == PackageWatchdog.FAILURE_REASON_APP_CRASH
621 || failureReason == PackageWatchdog.FAILURE_REASON_APP_NOT_RESPONDING)) {
// mayPerformFactoryReset 返回的值为 true
622 return mapRescueLevelToUserImpact(getRescueLevel(mitigationCount,
623 mayPerformFactoryReset(failedPackage)));
624 } else {
625 return PackageHealthObserverImpact.USER_IMPACT_NONE;
626 }
627 }
=============
// 获取到救援模式的级别,当前的级别是为 1 的,所以进入到 LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS
346 private static int getRescueLevel(int mitigationCount, boolean mayPerformFactoryReset) {
347 if (mitigationCount == 1) {
348 return LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS;
349 } else if (mitigationCount == 2) {
350 return LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES;
351 } else if (mitigationCount == 3) {
352 return LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS;
353 } else if (mitigationCount == 4) {
354 return Math.min(getMaxRescueLevel(mayPerformFactoryReset), LEVEL_WARM_REBOOT);
355 } else if (mitigationCount >= 5) {
356 return Math.min(getMaxRescueLevel(mayPerformFactoryReset), LEVEL_FACTORY_RESET);
357 } else {
358 Slog.w(TAG, "Expected positive mitigation count, was " + mitigationCount);
359 return LEVEL_NONE;
360 }
361 }
============
// 接着执行mapRescueLevelToUserImpact
// 返回的值为 int USER_IMPACT_LOW = 1;
477 private static int mapRescueLevelToUserImpact(int rescueLevel) {
478 switch(rescueLevel) {
479 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
480 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
481 return PackageHealthObserverImpact.USER_IMPACT_LOW;
482 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
483 case LEVEL_WARM_REBOOT:
484 case LEVEL_FACTORY_RESET:
485 return PackageHealthObserverImpact.USER_IMPACT_HIGH;
486 default:
487 return PackageHealthObserverImpact.USER_IMPACT_NONE;
488 }
489 }
// 3)调用RescueParty.java 的方法execute,mitigationCount 的值为 1
/frameworks/base/services/core/java/com/android/server/RescueParty.java
629 @Override
630 public boolean execute(@Nullable VersionedPackage failedPackage,
631 @FailureReasons int failureReason, int mitigationCount) {
632 if (isDisabled()) {
633 return false;
634 }
635 if (failureReason == PackageWatchdog.FAILURE_REASON_APP_CRASH
636 || failureReason == PackageWatchdog.FAILURE_REASON_APP_NOT_RESPONDING) {
// 获取到的level 为 LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS;
637 final int level = getRescueLevel(mitigationCount,
638 mayPerformFactoryReset(failedPackage));
639 executeRescueLevel(mContext,
640 failedPackage == null ? null : failedPackage.getPackageName(), level);
641 return true;
642 } else {
643 return false;
644 }
645 }
===========
363 private static void executeRescueLevel(Context context, @Nullable String failedPackage,
364 int level) {
// 会打印下列的lg
365 Slog.w(TAG, "Attempting rescue level " + levelToString(level));
366 try {
// 执行下列方法 executeRescueLevelInternal:level为 LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS
367 executeRescueLevelInternal(context, level, failedPackage);
// 会打印下列的 event log
368 EventLogTags.writeRescueSuccess(level);
369 String successMsg = "Finished rescue level " + levelToString(level);
370 if (!TextUtils.isEmpty(failedPackage)) {
371 successMsg += " for package " + failedPackage;
372 }
373 logCriticalInfo(Log.DEBUG, successMsg);
374 } catch (Throwable t) {
375 logRescueException(level, failedPackage, t);
376 }
377 }
// 执行下列方法 executeRescueLevelInternal:level为 LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS
379 private static void executeRescueLevelInternal(Context context, int level, @Nullable
380 String failedPackage) throws Exception {
381 FrameworkStatsLog.write(FrameworkStatsLog.RESCUE_PARTY_RESET_REPORTED, level);
382 // Try our best to reset all settings possible, and once finished
383 // rethrow any exception that we encountered
384 Exception res = null;
385 Runnable runnable;
386 Thread thread;
387 switch (level) {
388 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
389 try {
390 resetAllSettingsIfNecessary(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS,
391 level);
392 } catch (Exception e) {
393 res = e;
394 }
395 try {
396 resetDeviceConfig(context, /*isScoped=*/true, failedPackage);
397 } catch (Exception e) {
398 res = e;
399 }
400 break;
---------
427 case LEVEL_WARM_REBOOT:
428 // Request the reboot from a separate thread to avoid deadlock on PackageWatchdog
429 // when device shutting down.
430 SystemProperties.set(PROP_ATTEMPTING_REBOOT, "true");
431 runnable = () -> {
432 try {
// 如果是 LEVEL_WARM_REBOOT,则会去调用pm 的重启:reboot
433 PowerManager pm = context.getSystemService(PowerManager.class);
434 if (pm != null) {
435 pm.reboot(TAG);
436 }
437 } catch (Throwable t) {
438 logRescueException(level, failedPackage, t);
439 }
440 };
441 thread = new Thread(runnable);
442 thread.start();
443 break;
(2)system_server 在 5 分钟内重启 5 次以上调整一次级别。(Android 12 为10分钟内5次)