Feature #8214
closedデッドロックチェックに全スレッドのバックトレースダンプの追加
Added by kosaki (Motohiro KOSAKI) over 12 years ago. Updated over 9 years ago.
Description
さいきん、くくぱど方面のデッドロックバグの調査を手伝っていて気づいたのですが、
現在のRubyのdeadlock detectorは開発者に優しくないです。
例外があがるとスタックが巻戻ってしまって証拠が消えてしまうので、例外をあげるまえに
全スレッドのバックトレースをダンプするオプションがあるべきではないでしょうか。
デッドロックが起きたときは99%、直さないといけない状況になるはずで、証拠を残す機能は
有意義なはずです。
TBD: ruby level だけで十分か、Cレベルも含めるか
        
           Updated by nobu (Nobuyoshi Nakada) over 12 years ago
          
          
        
        
          
            Actions
          
          #1
            [ruby-dev:47268]
          Updated by nobu (Nobuyoshi Nakada) over 12 years ago
          
          
        
        
          
            Actions
          
          #1
            [ruby-dev:47268]
        
      
      - Subject changed from デッドロックチェックに前スレッドのバックトレースダンプの追加 to デッドロックチェックに全スレッドのバックトレースダンプの追加
- Description updated (diff)
        
           Updated by naruse (Yui NARUSE) over 9 years ago
          
          
        
        
          
            Actions
          
          #2
            [ruby-dev:49639]
          Updated by naruse (Yui NARUSE) over 9 years ago
          
          
        
        
          
            Actions
          
          #2
            [ruby-dev:49639]
        
      
      とりあえず以下のような感じで作ってみました。
やっぱりバックトレースも欲しいかな
% ./miniruby -ve'Thread.current.name="MainThread!";z=Thread.new{Thread.stop};a,b=Thread.new{1until b;b.join},Thread.new{1until a;a.join};a.name="aaaaa";b.name="bbbbb";z.name="zzzz";a.join'
`miniruby' is up to date.
ruby 2.4.0dev (2016-06-01 trunk 55248) [x86_64-freebsd10.3]
-e:1:in `join': No live threads left. Deadlock? (fatal)
4 threads, 4 sleeps current:0x00000802c1b200 main thread:0x00000802c17300
* #<Thread:0x00000802d8ce58@MainThread! sleep_forever>
   rb_thread_t:0x00000802c17300 native:0x00000802c06400 int:1
* #<Thread:0x00000802d7f780@zzzz@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1ac00 native:0x00000802c07800 int:0
* #<Thread:0x00000802d7f690@aaaaa@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1af00 native:0x00000802c07c00 int:0
    depended by: tb_thread_id:0x00000802c1b200
    depended by: tb_thread_id:0x00000802c17300
* #<Thread:0x00000802d7f5a0@bbbbb@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1b200 native:0x00000802c08000 int:0
    depended by: tb_thread_id:0x00000802c1af00
        from -e:1:in `<main>'
diff --git a/thread.c b/thread.c
index d17b663..359dada 100644
--- a/thread.c
+++ b/thread.c
@@ -2618,7 +2618,7 @@ rb_thread_group(VALUE thread)
 }
 
 static const char *
-thread_status_name(rb_thread_t *th)
+thread_status_name(rb_thread_t *th, int detail)
 {
     switch (th->status) {
       case THREAD_RUNNABLE:
@@ -2626,8 +2626,9 @@ thread_status_name(rb_thread_t *th)
 	    return "aborting";
 	else
 	    return "run";
-      case THREAD_STOPPED:
       case THREAD_STOPPED_FOREVER:
+	if (detail) return "sleep_forever";
+      case THREAD_STOPPED:
 	return "sleep";
       case THREAD_KILLED:
 	return "dead";
@@ -2687,7 +2688,7 @@ rb_thread_status(VALUE thread)
 	}
 	return Qfalse;
     }
-    return rb_str_new2(thread_status_name(th));
+    return rb_str_new2(thread_status_name(th, FALSE));
 }
 
 
@@ -2833,7 +2834,7 @@ rb_thread_inspect(VALUE thread)
     VALUE str;
 
     GetThreadPtr(thread, th);
-    status = thread_status_name(th);
+    status = thread_status_name(th, FALSE);
     str = rb_sprintf("#<%"PRIsVALUE":%p", cname, (void *)thread);
     if (!NIL_P(th->name)) {
 	rb_str_catf(str, "@%"PRIsVALUE, th->name);
@@ -4728,26 +4729,36 @@ ruby_native_thread_p(void)
 }
 
 static void
-debug_deadlock_check(rb_vm_t *vm)
+debug_deadlock_check(rb_vm_t *vm, VALUE msg)
 {
-#ifdef DEBUG_DEADLOCK_CHECK
     rb_thread_t *th = 0;
 
-    printf("%d %d %p %p\n", vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
+    rb_str_catf(msg, "\n%d threads, %d sleeps current:%p main thread:%p\n",
+	    vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
     list_for_each(&vm->living_threads, th, vmlt_node) {
-	printf("th:%p %d %d", th, th->status, th->interrupt_flag);
 	if (th->locking_mutex) {
 	    rb_mutex_t *mutex;
+	    struct rb_thread_struct volatile *mth;
+	    int waiting;
 	    GetMutexPtr(th->locking_mutex, mutex);
 
 	    native_mutex_lock(&mutex->lock);
-	    printf(" %p %d\n", mutex->th, mutex->cond_waiting);
+	    mth = mutex->th;
+	    waiting = mutex->cond_waiting;
 	    native_mutex_unlock(&mutex->lock);
+	    rb_str_catf(msg, "  rb_thread_t:%p %s %u waiting:%p cond_waiting:%d", th, thread_status_name(th, TRUE), th->interrupt_flag, mth, waiting);
+	}
+	else {
+	    rb_str_catf(msg, "  rb_thread_t:%p %s %u\n", th, thread_status_name(th, TRUE), th->interrupt_flag);
+	}
+	{
+	    rb_thread_list_t *list = th->join_list;
+	    while (list) {
+		rb_str_catf(msg, "    depended by: %p\n", list->th);
+		list = list->next;
+	    }
 	}
-	else
-	    puts("");
     }
-#endif
 }
 
 static void
@@ -4782,7 +4793,7 @@ rb_check_deadlock(rb_vm_t *vm)
 	VALUE argv[2];
 	argv[0] = rb_eFatal;
 	argv[1] = rb_str_new2("No live threads left. Deadlock?");
-	debug_deadlock_check(vm);
+	debug_deadlock_check(vm, argv[1]);
 	vm->sleeper--;
 	rb_threadptr_raise(vm->main_thread, 2, argv);
     }
        
           Updated by naruse (Yui NARUSE) over 9 years ago
          
          
        
        
          
            Actions
          
          #3
            [ruby-dev:49640]
          Updated by naruse (Yui NARUSE) over 9 years ago
          
          
        
        
          
            Actions
          
          #3
            [ruby-dev:49640]
        
      
      バックトレースも欲しいらしいので
% ./miniruby -ve'Thread.current.name="MainThread!";z=Thread.new{Thread.stop};a,b=Thread.new{1until b;b.join},Thread.new{1until a;a.join};a.name="aaaaa";b.name="bbbbb";z.name="zzzz";a.join'
--- thread.o ---
compiling ../../ruby/thread.c
--- miniruby ---
linking miniruby
ruby 2.4.0dev (2016-06-01 trunk 55248) [x86_64-freebsd10.3]
-e:1:in `join': No live threads left. Deadlock? (fatal)
4 threads, 4 sleeps current:0x00000802c1b200 main thread:0x00000802c17300
* #<Thread:0x00000802d8ce58@MainThread! sleep_forever>
   rb_thread_t:0x00000802c17300 native:0x00000802c06400 int:1
   -e:1:in `join'
   -e:1:in `<main>'
* #<Thread:0x00000802d7f780@zzzz@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1ac00 native:0x00000802c07800 int:0
   -e:1:in `stop'
   -e:1:in `block in <main>'
* #<Thread:0x00000802d7f690@aaaaa@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1af00 native:0x00000802c07c00 int:0
    depended by: tb_thread_id:0x00000802c1b200
    depended by: tb_thread_id:0x00000802c17300
   -e:1:in `join'
   -e:1:in `block in <main>'
* #<Thread:0x00000802d7f5a0@bbbbb@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1b200 native:0x00000802c08000 int:0
    depended by: tb_thread_id:0x00000802c1af00
   -e:1:in `join'
   -e:1:in `block in <main>'
        from -e:1:in `<main>'
diff --git a/thread.c b/thread.c
index d17b663..84f76d1 100644
--- a/thread.c
+++ b/thread.c
@@ -2618,7 +2618,7 @@ rb_thread_group(VALUE thread)
 }
 
 static const char *
-thread_status_name(rb_thread_t *th)
+thread_status_name(rb_thread_t *th, int detail)
 {
     switch (th->status) {
       case THREAD_RUNNABLE:
@@ -2626,8 +2626,9 @@ thread_status_name(rb_thread_t *th)
 	    return "aborting";
 	else
 	    return "run";
-      case THREAD_STOPPED:
       case THREAD_STOPPED_FOREVER:
+	if (detail) return "sleep_forever";
+      case THREAD_STOPPED:
 	return "sleep";
       case THREAD_KILLED:
 	return "dead";
@@ -2687,7 +2688,7 @@ rb_thread_status(VALUE thread)
 	}
 	return Qfalse;
     }
-    return rb_str_new2(thread_status_name(th));
+    return rb_str_new2(thread_status_name(th, FALSE));
 }
 
 
@@ -2833,7 +2834,7 @@ rb_thread_inspect(VALUE thread)
     VALUE str;
 
     GetThreadPtr(thread, th);
-    status = thread_status_name(th);
+    status = thread_status_name(th, TRUE);
     str = rb_sprintf("#<%"PRIsVALUE":%p", cname, (void *)thread);
     if (!NIL_P(th->name)) {
 	rb_str_catf(str, "@%"PRIsVALUE, th->name);
@@ -4727,27 +4728,46 @@ ruby_native_thread_p(void)
     return th != 0;
 }
 
+VALUE rb_vm_backtrace_str_ary(rb_thread_t *th, long lev, long n);
 static void
-debug_deadlock_check(rb_vm_t *vm)
+debug_deadlock_check(rb_vm_t *vm, VALUE msg)
 {
-#ifdef DEBUG_DEADLOCK_CHECK
     rb_thread_t *th = 0;
+    VALUE sep = rb_str_new_cstr("\n   ");
 
-    printf("%d %d %p %p\n", vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
+    rb_str_catf(msg, "\n%d threads, %d sleeps current:%p main thread:%p\n",
+	    vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
     list_for_each(&vm->living_threads, th, vmlt_node) {
-	printf("th:%p %d %d", th, th->status, th->interrupt_flag);
 	if (th->locking_mutex) {
 	    rb_mutex_t *mutex;
+	    struct rb_thread_struct volatile *mth;
+	    int waiting;
 	    GetMutexPtr(th->locking_mutex, mutex);
 
 	    native_mutex_lock(&mutex->lock);
-	    printf(" %p %d\n", mutex->th, mutex->cond_waiting);
+	    mth = mutex->th;
+	    waiting = mutex->cond_waiting;
 	    native_mutex_unlock(&mutex->lock);
+	    rb_str_catf(msg, "* %+"PRIsVALUE"\n   rb_thread_t:%p native:%p int:%u mutex:%p cond:%d\n",
+		    th->self, th, th->thread_id,
+		    th->interrupt_flag, mth, waiting);
 	}
-	else
-	    puts("");
+	else {
+	    rb_str_catf(msg, "* %+"PRIsVALUE"\n   rb_thread_t:%p native:%p int:%u\n",
+		    th->self, th, th->thread_id,
+		    th->interrupt_flag);
+	}
+	{
+	    rb_thread_list_t *list = th->join_list;
+	    while (list) {
+		rb_str_catf(msg, "    depended by: tb_thread_id:%p\n", list->th);
+		list = list->next;
+	    }
+	}
+	rb_str_catf(msg, "   ");
+	rb_str_concat(msg, rb_ary_join(rb_vm_backtrace_str_ary(th, 0, 0), sep));
+	rb_str_catf(msg, "\n");
     }
-#endif
 }
 
 static void
@@ -4782,7 +4802,7 @@ rb_check_deadlock(rb_vm_t *vm)
 	VALUE argv[2];
 	argv[0] = rb_eFatal;
 	argv[1] = rb_str_new2("No live threads left. Deadlock?");
-	debug_deadlock_check(vm);
+	debug_deadlock_check(vm, argv[1]);
 	vm->sleeper--;
 	rb_threadptr_raise(vm->main_thread, 2, argv);
     }
        
           Updated by tagomoris (Satoshi Tagomori) over 9 years ago
          
          
        
        
          
            Actions
          
          #4
            [ruby-dev:49641]
          Updated by tagomoris (Satoshi Tagomori) over 9 years ago
          
          
        
        
          
            Actions
          
          #4
            [ruby-dev:49641]
        
      
      デッドロック関連のデバッグであれば A -> B の順にロックを取ろうとしているスレッドと B -> A の順に
ロックを取ろうとしているスレッドが両方いる、という事実が重要だと思うので、バックトレースは極めて重要だと考えます。
        
           Updated by shyouhei (Shyouhei Urabe) over 9 years ago
          
          
        
        
          
            Actions
          
          #5
            [ruby-dev:49662]
          Updated by shyouhei (Shyouhei Urabe) over 9 years ago
          
          
        
        
          
            Actions
          
          #5
            [ruby-dev:49662]
        
      
      - Status changed from Open to Assigned
- Assignee set to naruse (Yui NARUSE)
        
           Updated by naruse (Yui NARUSE) over 9 years ago
          
          
        
        
          
            Actions
          
          #6
          Updated by naruse (Yui NARUSE) over 9 years ago
          
          
        
        
          
            Actions
          
          #6
        
      
      - Status changed from Assigned to Closed
Applied in changeset r55397.
- 
thread.c (debug_deadlock_check): show thread lock dependency and 
 backtrace [Feature #8214] [ruby-dev:47217]
- 
thread.c (thread_status_name): show "sleep_forever" instead of 
 "sleep" if called from inspect.