近期处理Bugly上OOM问题,很多发生在Thread创建启动过程,虽然最后分析出是32位4G虚拟内存不足导致,但还是分析下Java层Thread 源码过程,可能会抛出的异常InternalError/Out of memory。
Thread报错堆栈:
Java线程创建到启动过程:
从Thread.start()-> c++层CreateNativeThread()->JNIEnvExt::Create()创建JniEnv ->c++层pthread_create()—> allocate_thread()分配堆内存->Linux层clone()拷贝新线程-> 反射调用Thread.run()
源码分析
Java层Thread#start():
接着来到c++层:
http://aospxref.com/android-7.1.2_r39/xref/art/runtime/native/java_lang_Thread.cc
/art/runtime/native/java_lang_Thread.cc
static void Thread_nativeCreate(JNIEnv* env, jclass, jobject java_thread, jlong stack_size,jboolean daemon) {
//... 部分zygote进程是不允许创建线程,会抛出InternalError异常
//接下来看
Thread::CreateNativeThread(env, java_thread, stack_size, daemon == JNI_TRUE);
}
http://aospxref.com/android-7.1.2_r39/xref/art/runtime/thread.cc
/art/runtime/thread.cc
void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
CHECK(java_peer != nullptr);
Thread* self = static_cast<JNIEnvExt*>(env)->self;
//若当虚拟机正在关闭时,创建线程会抛出InternalError异常
Runtime* runtime = Runtime::Current();
bool thread_start_during_shutdown = false;
{
MutexLock mu(self, *Locks::runtime_shutdown_lock_);
if (runtime->IsShuttingDownLocked()) {
thread_start_during_shutdown = true;
} else {
runtime->StartThreadBirth();
}
}
if (thread_start_during_shutdown) {
ScopedLocalRef<jclass> error_class(env, env->FindClass("java/lang/InternalError"));
env->ThrowNew(error_class.get(), "Thread starting during runtime shutdown");
return;
}
Thread* child_thread = new Thread(is_daemon);//创建java层thread对应的c++对象
child_thread->tlsPtr_.jpeer = env->NewGlobalRef(java_peer); // 将java层的Thread引用创建成全局引用
stack_size = FixStackSize(stack_size);// 计算出线程的堆内存大小,默认计算出是1040kb
//将线程记录在线程组中
env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer,
reinterpret_cast<jlong>(child_thread));
//给c++层Threa对象创建JNIEnvExt环境(一个线程对应一个jniEnv),这一步可能会OOM
std::unique_ptr<JNIEnvExt> child_jni_env_ext(
JNIEnvExt::Create(child_thread, Runtime::Current()->GetJavaVM()));
int pthread_create_result = 0;
if (child_jni_env_ext.get() != nullptr) {// 闯将线程的JniEnv成功时
pthread_t new_pthread;
pthread_attr_t attr;
child_thread->tlsPtr_.tmp_jni_env = child_jni_env_ext.get();//将JniEnv赋值给C++层Thread对象
CHECK_PTHREAD_CALL(pthread_attr_init, (&attr), "new thread");
CHECK_PTHREAD_CALL(pthread_attr_setdetachstate, (&attr, PTHREAD_CREATE_DETACHED),
"PTHREAD_CREATE_DETACHED");
CHECK_PTHREAD_CALL(pthread_attr_setstacksize, (&attr, stack_size), stack_size);
//真正创建线程,参数1是线程标识符;参数2:线程属性设置(设置堆的大小等等);参数3:线程函数的起始地址;参数4:传递给参数3线程函数的参数;
pthread_create_result = pthread_create(&new_pthread,
&attr,
Thread::CreateCallback,
child_thread);
CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attr), "new thread");
if (pthread_create_result == 0) { // 若是线程创建,执行完Java层Thread#run()后会返回0
child_jni_env_ext.release();
return; // 释放执行完成任务的线程资源,不会往下走
}
}
//当创建失败时,释放资源
env->DeleteGlobalRef(child_thread->tlsPtr_.jpeer); //删除java层的thread 全局引用
child_thread->tlsPtr_.jpeer = nullptr;
delete child_thread; //删除 c++层Thread指针
child_thread = nullptr;
//从线程组中移除
env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer, 0);
//当创建线程的JniEnv失败或者pthread_create创建线程失败时,会抛出异常
{
std::string msg(child_jni_env_ext.get() == nullptr ?
"Could not allocate JNI Env" : //当线程创建JniEnv 环境失败时,抛出该提示语
StringPrintf("pthread_create (%s stack) failed: %s",
PrettySize(stack_size).c_str(), strerror(pthread_create_result)));
ScopedObjectAccess soa(env);
soa.Self()->ThrowOutOfMemoryError(msg.c_str()); //抛出OOM 异常
}
}
通过FixStackSize()
计算出线程的堆内存大小,堆内存=1024K(1M)+8k+8K=1040k
static size_t FixStackSize(size_t stack_size) { //参数是java层中thread 的stack_size默认0
if (stack_size == 0) {
// GetDefaultStackSize 是启动art时命令行的 "-Xss=" 参数, Android 中没有该参数,因此为0.
stack_size = Runtime::Current()->GetDefaultStackSize();
}
// bionic pthread 默认栈大小是 1M
stack_size += 1 * MB;
//...
if (Runtime::Current()->ExplicitStackOverflowChecks()) {
//8k
stack_size += GetStackOverflowReservedBytes(kRuntimeISA);
} else {
8k+8K
stack_size += Thread::kStackOverflowImplicitCheckSize +
GetStackOverflowReservedBytes(kRuntimeISA);
}
//...
return stack_size;
}
查看创建JniEnv过程:
http://aospxref.com/android-7.1.2_r39/xref/art/runtime/jni_env_ext.cc
/art/runtime/jni_env_ext.cc
JNIEnvExt* JNIEnvExt::Create(Thread* self_in, JavaVMExt* vm_in) {
std::unique_ptr<JNIEnvExt> ret(new JNIEnvExt(self_in, vm_in));
if (CheckLocalsValid(ret.get())) {
return ret.release();
}
return nullptr;
}
JNIEnvExt::JNIEnvExt(Thread* self_in, JavaVMExt* vm_in)
: self(self_in),
vm(vm_in),
local_ref_cookie(IRT_FIRST_SEGMENT),
locals(kLocalsInitial, kLocalsMax, kLocal, false),
check_jni(false),
runtime_deleted(false),
critical(0),
monitors("monitors", kMonitorsInitial, kMonitorsMax) {
functions = unchecked_functions = GetJniNativeInterface(); //获取到全局的Jni函数接口列表
if (vm->IsCheckJniEnabled()) {
SetCheckJniEnabled(true);
}
}
查看pthread的创建线程过程:
http://aospxref.com/android-7.1.2_r39/xref/bionic/libc/bionic/pthread_create.cpp
/bionic/libc/bionic/pthread_create.cpp
int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
void* (*start_routine)(void*), void* arg) {
pthread_internal_t* thread = NULL;
void* child_stack = NULL;
//创建线程的堆内存
int result = __allocate_thread(&thread_attr, &thread, &child_stack);
if (result != 0) {
return result; //若是创建失败,则抛出oom 异常
}
//....
int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
//linux 的clone 进程,即
int rc = clone(__pthread_start, child_stack, flags, thread, &(thread->tid), tls, &(thread->tid));
if (rc == -1) {
int clone_errno = errno;
if (thread->mmap_size != 0) {
//当拷贝失败时,释放申请好的匿名共享内存
munmap(thread->attr.stack_base, thread->mmap_size);
}
// 当拷贝进程失败时,会输出错误日志 clone faild
__libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %s", strerror(errno));
return clone_errno;
}
//...
return 0;
}
接下来看下__allocate_thread()
是如何创建线程的堆内存
static int __allocate_thread(pthread_attr_t* attr, pthread_internal_t** threadp, void** child_stack) {
size_t mmap_size;
uint8_t* stack_top;
if (attr->stack_base == NULL) {
//计算出mmap_size
mmap_size = BIONIC_ALIGN(attr->stack_size + sizeof(pthread_internal_t), PAGE_SIZE);
attr->guard_size = BIONIC_ALIGN(attr->guard_size, PAGE_SIZE);
attr->stack_base = __create_thread_mapped_space(mmap_size, attr->guard_size);
if (attr->stack_base == NULL) {
return EAGAIN; //创建mapp空间失败,则返回错误码
}
stack_top = reinterpret_cast<uint8_t*>(attr->stack_base) + mmap_size;
}
//....
return 0;
}
线程的分配mmap_size=线程堆大小(1040k)+线程结构体pthread_internal_t的大小 , 线程结构体pthread_internal_t包含了线程的名字,localtread等。
接下来看下__create_thread_mapped_space()
:
static void* __create_thread_mapped_space(size_t mmap_size, size_t stack_guard_size) {
int prot = PROT_READ | PROT_WRITE;
int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
//根据MAP_ANONYMOUS flags,分配指定mmap_size大小的匿名共享内存
void* space = mmap(NULL, mmap_size, prot, flags, -1, 0);
if (space == MAP_FAILED) {
__libc_format_log(ANDROID_LOG_WARN,
"libc","pthread_create failed: couldn't allocate %zu-bytes mapped space: %s", mmap_size, strerror(errno));
return NULL;
}
//....
return space;
}
这里和Bugly上的pthread_create failed: couldn't allocate 1085440-bytes mapped space: Out of memory
对应上了,即创建线程的堆内存失败了,虚拟内存不够了。
接下来看下,Linux 是如何创建新子进程,即创建线程。
先来了解下一些Linux中的概念:
进程创建:
- Linux 进程创建: 通过fork(),复制资源(包含代码段、数据段、堆、栈)给子进程,但两进程内存资源不共享;
- Linux用户级别线程创建:通过pthread库中的pthread_create()创建线程,共享同个进程中的资源;
- inux内核线程创建: 通过kthread_create()
在Linux看来线程是一种进程间共享资源的方式,线程也可以看做跟其进程共享资源的进程。线程与进程的区别是是否共享资源。
http://aospxref.com/android-7.1.2_r39/xref/bionic/libc/bionic/clone.cpp
/bionic/libc/bionic/clone.cpp
int clone(int (*fn)(void*), void* child_stack, int flags, void* arg, ...) {
//真正拷贝子进程过程,更多调用过程
int clone_result = __bionic_clone(flags, child_stack, parent_tid, new_tls, child_tid, fn, arg);
self->set_cached_pid(parent_pid);
return clone_result;
}
pthread_create()->linux的clone()->sys_clone()->do_fork()->copy_process()
,在这个过程中,会拷贝当前进程(比如主进程)的资源,
会检查进程是超出限制(即线程是否超过最大值),fd资源是否超过限制(在linux 中socket、file都是fd),共享信号处理。
更多请阅读,http://gityuan.com/2017/08/05/linux-process-fork/
最后看下每个code对应的异常msg:
http://aospxref.com/android-7.1.2_r39/xref/bionic/libc/bionic/strerror.cpp#36
/bionic/libc/bionic/strerror.cpp
char* strerror(int error_number) {
// Just return the original constant in the easy cases.
char* result = const_cast<char*>(__strerror_lookup(error_number));
if (result != nullptr) {
return result;
}
result = g_strerror_tls_buffer.get();
strerror_r(error_number, result, g_strerror_tls_buffer.size());
return result;
}
http://aospxref.com/android-7.1.2_r39/xref/bionic/libc/include/sys/_errdefs.h
/bionic/libc/include/sys/_errdefs.h
__BIONIC_ERRDEF( EAGAIN , 11, "Try again" )
__BIONIC_ERRDEF( ENOMEM , 12, "Out of memory" )
__BIONIC_ERRDEF( EACCES , 13, "Permission denied" )
__BIONIC_ERRDEF( EMFILE , 24, "Too many open files" )
这里延伸点,Thread 异常捕捉处理器中:
-
捕获到java 层异常时,不能再创建Thread,不然会抛出
InternalError:Thread starting during runtime shutdown
。即异常上报的线程要提前创建。 -
当发生异常时,当内存不足时进行异常上报,使用OkHttp传输(会创建新线程),可能造成新的OOM 异常;
资料参考:
- http://gityuan.com/2016/09/24/android-thread/
- https://blog.csdn.net/Tencent_Bugly/article/details/78542324