Project

General

Profile

Feature #8214

デッドロックチェックに全スレッドのバックトレースダンプの追加

Added by kosaki (Motohiro KOSAKI) over 4 years ago. Updated about 1 year ago.

Status:
Closed
Priority:
Normal
Target version:
-
[ruby-dev:47217]

Description

さいきん、くくぱど方面のデッドロックバグの調査を手伝っていて気づいたのですが、
現在のRubyのdeadlock detectorは開発者に優しくないです。

例外があがるとスタックが巻戻ってしまって証拠が消えてしまうので、例外をあげるまえに
全スレッドのバックトレースをダンプするオプションがあるべきではないでしょうか。
デッドロックが起きたときは99%、直さないといけない状況になるはずで、証拠を残す機能は
有意義なはずです。

TBD: ruby level だけで十分か、Cレベルも含めるか

Associated revisions

Revision 55397
Added by naruse (Yui NARUSE) about 1 year ago

  • thread.c (debug_deadlock_check): show thread lock dependency and
    backtrace [Feature #8214]

  • thread.c (thread_status_name): show "sleep_forever" instead of
    "sleep" if called from inspect.

Revision 55397
Added by naruse (Yui NARUSE) about 1 year ago

  • thread.c (debug_deadlock_check): show thread lock dependency and
    backtrace [Feature #8214]

  • thread.c (thread_status_name): show "sleep_forever" instead of
    "sleep" if called from inspect.

History

#1 [ruby-dev:47268] Updated by nobu (Nobuyoshi Nakada) over 4 years ago

  • Subject changed from デッドロックチェックに前スレッドのバックトレースダンプの追加 to デッドロックチェックに全スレッドのバックトレースダンプの追加
  • Description updated (diff)

#2 [ruby-dev:49639] Updated by naruse (Yui NARUSE) about 1 year ago

とりあえず以下のような感じで作ってみました。
やっぱりバックトレースも欲しいかな

% ./miniruby -ve'Thread.current.name="MainThread!";z=Thread.new{Thread.stop};a,b=Thread.new{1until b;b.join},Thread.new{1until a;a.join};a.name="aaaaa";b.name="bbbbb";z.name="zzzz";a.join'
`miniruby' is up to date.
ruby 2.4.0dev (2016-06-01 trunk 55248) [x86_64-freebsd10.3]
-e:1:in `join': No live threads left. Deadlock? (fatal)
4 threads, 4 sleeps current:0x00000802c1b200 main thread:0x00000802c17300
* #<Thread:0x00000802d8ce58@MainThread! sleep_forever>
   rb_thread_t:0x00000802c17300 native:0x00000802c06400 int:1
* #<Thread:0x00000802d7f780@zzzz@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1ac00 native:0x00000802c07800 int:0
* #<Thread:0x00000802d7f690@aaaaa@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1af00 native:0x00000802c07c00 int:0
    depended by: tb_thread_id:0x00000802c1b200
    depended by: tb_thread_id:0x00000802c17300
* #<Thread:0x00000802d7f5a0@bbbbb@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1b200 native:0x00000802c08000 int:0
    depended by: tb_thread_id:0x00000802c1af00
        from -e:1:in `<main>'
diff --git a/thread.c b/thread.c
index d17b663..359dada 100644
--- a/thread.c
+++ b/thread.c
@@ -2618,7 +2618,7 @@ rb_thread_group(VALUE thread)
 }

 static const char *
-thread_status_name(rb_thread_t *th)
+thread_status_name(rb_thread_t *th, int detail)
 {
     switch (th->status) {
       case THREAD_RUNNABLE:
@@ -2626,8 +2626,9 @@ thread_status_name(rb_thread_t *th)
        return "aborting";
    else
        return "run";
-      case THREAD_STOPPED:
       case THREAD_STOPPED_FOREVER:
+   if (detail) return "sleep_forever";
+      case THREAD_STOPPED:
    return "sleep";
       case THREAD_KILLED:
    return "dead";
@@ -2687,7 +2688,7 @@ rb_thread_status(VALUE thread)
    }
    return Qfalse;
     }
-    return rb_str_new2(thread_status_name(th));
+    return rb_str_new2(thread_status_name(th, FALSE));
 }


@@ -2833,7 +2834,7 @@ rb_thread_inspect(VALUE thread)
     VALUE str;

     GetThreadPtr(thread, th);
-    status = thread_status_name(th);
+    status = thread_status_name(th, FALSE);
     str = rb_sprintf("#<%"PRIsVALUE":%p", cname, (void *)thread);
     if (!NIL_P(th->name)) {
    rb_str_catf(str, "@%"PRIsVALUE, th->name);
@@ -4728,26 +4729,36 @@ ruby_native_thread_p(void)
 }

 static void
-debug_deadlock_check(rb_vm_t *vm)
+debug_deadlock_check(rb_vm_t *vm, VALUE msg)
 {
-#ifdef DEBUG_DEADLOCK_CHECK
     rb_thread_t *th = 0;

-    printf("%d %d %p %p\n", vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
+    rb_str_catf(msg, "\n%d threads, %d sleeps current:%p main thread:%p\n",
+       vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
     list_for_each(&vm->living_threads, th, vmlt_node) {
-   printf("th:%p %d %d", th, th->status, th->interrupt_flag);
    if (th->locking_mutex) {
        rb_mutex_t *mutex;
+       struct rb_thread_struct volatile *mth;
+       int waiting;
        GetMutexPtr(th->locking_mutex, mutex);

        native_mutex_lock(&mutex->lock);
-       printf(" %p %d\n", mutex->th, mutex->cond_waiting);
+       mth = mutex->th;
+       waiting = mutex->cond_waiting;
        native_mutex_unlock(&mutex->lock);
+       rb_str_catf(msg, "  rb_thread_t:%p %s %u waiting:%p cond_waiting:%d", th, thread_status_name(th, TRUE), th->interrupt_flag, mth, waiting);
+   }
+   else {
+       rb_str_catf(msg, "  rb_thread_t:%p %s %u\n", th, thread_status_name(th, TRUE), th->interrupt_flag);
+   }
+   {
+       rb_thread_list_t *list = th->join_list;
+       while (list) {
+       rb_str_catf(msg, "    depended by: %p\n", list->th);
+       list = list->next;
+       }
    }
-   else
-       puts("");
     }
-#endif
 }

 static void
@@ -4782,7 +4793,7 @@ rb_check_deadlock(rb_vm_t *vm)
    VALUE argv[2];
    argv[0] = rb_eFatal;
    argv[1] = rb_str_new2("No live threads left. Deadlock?");
-   debug_deadlock_check(vm);
+   debug_deadlock_check(vm, argv[1]);
    vm->sleeper--;
    rb_threadptr_raise(vm->main_thread, 2, argv);
     }

#3 [ruby-dev:49640] Updated by naruse (Yui NARUSE) about 1 year ago

バックトレースも欲しいらしいので

% ./miniruby -ve'Thread.current.name="MainThread!";z=Thread.new{Thread.stop};a,b=Thread.new{1until b;b.join},Thread.new{1until a;a.join};a.name="aaaaa";b.name="bbbbb";z.name="zzzz";a.join'
--- thread.o ---
compiling ../../ruby/thread.c
--- miniruby ---
linking miniruby
ruby 2.4.0dev (2016-06-01 trunk 55248) [x86_64-freebsd10.3]
-e:1:in `join': No live threads left. Deadlock? (fatal)
4 threads, 4 sleeps current:0x00000802c1b200 main thread:0x00000802c17300
* #<Thread:0x00000802d8ce58@MainThread! sleep_forever>
   rb_thread_t:0x00000802c17300 native:0x00000802c06400 int:1
   -e:1:in `join'
   -e:1:in `<main>'
* #<Thread:0x00000802d7f780@zzzz@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1ac00 native:0x00000802c07800 int:0
   -e:1:in `stop'
   -e:1:in `block in <main>'
* #<Thread:0x00000802d7f690@aaaaa@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1af00 native:0x00000802c07c00 int:0
    depended by: tb_thread_id:0x00000802c1b200
    depended by: tb_thread_id:0x00000802c17300
   -e:1:in `join'
   -e:1:in `block in <main>'
* #<Thread:0x00000802d7f5a0@bbbbb@-e:1 sleep_forever>
   rb_thread_t:0x00000802c1b200 native:0x00000802c08000 int:0
    depended by: tb_thread_id:0x00000802c1af00
   -e:1:in `join'
   -e:1:in `block in <main>'
        from -e:1:in `<main>'
diff --git a/thread.c b/thread.c
index d17b663..84f76d1 100644
--- a/thread.c
+++ b/thread.c
@@ -2618,7 +2618,7 @@ rb_thread_group(VALUE thread)
 }

 static const char *
-thread_status_name(rb_thread_t *th)
+thread_status_name(rb_thread_t *th, int detail)
 {
     switch (th->status) {
       case THREAD_RUNNABLE:
@@ -2626,8 +2626,9 @@ thread_status_name(rb_thread_t *th)
        return "aborting";
    else
        return "run";
-      case THREAD_STOPPED:
       case THREAD_STOPPED_FOREVER:
+   if (detail) return "sleep_forever";
+      case THREAD_STOPPED:
    return "sleep";
       case THREAD_KILLED:
    return "dead";
@@ -2687,7 +2688,7 @@ rb_thread_status(VALUE thread)
    }
    return Qfalse;
     }
-    return rb_str_new2(thread_status_name(th));
+    return rb_str_new2(thread_status_name(th, FALSE));
 }


@@ -2833,7 +2834,7 @@ rb_thread_inspect(VALUE thread)
     VALUE str;

     GetThreadPtr(thread, th);
-    status = thread_status_name(th);
+    status = thread_status_name(th, TRUE);
     str = rb_sprintf("#<%"PRIsVALUE":%p", cname, (void *)thread);
     if (!NIL_P(th->name)) {
    rb_str_catf(str, "@%"PRIsVALUE, th->name);
@@ -4727,27 +4728,46 @@ ruby_native_thread_p(void)
     return th != 0;
 }

+VALUE rb_vm_backtrace_str_ary(rb_thread_t *th, long lev, long n);
 static void
-debug_deadlock_check(rb_vm_t *vm)
+debug_deadlock_check(rb_vm_t *vm, VALUE msg)
 {
-#ifdef DEBUG_DEADLOCK_CHECK
     rb_thread_t *th = 0;
+    VALUE sep = rb_str_new_cstr("\n   ");

-    printf("%d %d %p %p\n", vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
+    rb_str_catf(msg, "\n%d threads, %d sleeps current:%p main thread:%p\n",
+       vm_living_thread_num(vm), vm->sleeper, GET_THREAD(), vm->main_thread);
     list_for_each(&vm->living_threads, th, vmlt_node) {
-   printf("th:%p %d %d", th, th->status, th->interrupt_flag);
    if (th->locking_mutex) {
        rb_mutex_t *mutex;
+       struct rb_thread_struct volatile *mth;
+       int waiting;
        GetMutexPtr(th->locking_mutex, mutex);

        native_mutex_lock(&mutex->lock);
-       printf(" %p %d\n", mutex->th, mutex->cond_waiting);
+       mth = mutex->th;
+       waiting = mutex->cond_waiting;
        native_mutex_unlock(&mutex->lock);
+       rb_str_catf(msg, "* %+"PRIsVALUE"\n   rb_thread_t:%p native:%p int:%u mutex:%p cond:%d\n",
+           th->self, th, th->thread_id,
+           th->interrupt_flag, mth, waiting);
    }
-   else
-       puts("");
+   else {
+       rb_str_catf(msg, "* %+"PRIsVALUE"\n   rb_thread_t:%p native:%p int:%u\n",
+           th->self, th, th->thread_id,
+           th->interrupt_flag);
+   }
+   {
+       rb_thread_list_t *list = th->join_list;
+       while (list) {
+       rb_str_catf(msg, "    depended by: tb_thread_id:%p\n", list->th);
+       list = list->next;
+       }
+   }
+   rb_str_catf(msg, "   ");
+   rb_str_concat(msg, rb_ary_join(rb_vm_backtrace_str_ary(th, 0, 0), sep));
+   rb_str_catf(msg, "\n");
     }
-#endif
 }

 static void
@@ -4782,7 +4802,7 @@ rb_check_deadlock(rb_vm_t *vm)
    VALUE argv[2];
    argv[0] = rb_eFatal;
    argv[1] = rb_str_new2("No live threads left. Deadlock?");
-   debug_deadlock_check(vm);
+   debug_deadlock_check(vm, argv[1]);
    vm->sleeper--;
    rb_threadptr_raise(vm->main_thread, 2, argv);
     }

#4 [ruby-dev:49641] Updated by tagomoris (Satoshi TAGOMORI) about 1 year ago

デッドロック関連のデバッグであれば A -> B の順にロックを取ろうとしているスレッドと B -> A の順に
ロックを取ろうとしているスレッドが両方いる、という事実が重要だと思うので、バックトレースは極めて重要だと考えます。

#5 [ruby-dev:49662] Updated by shyouhei (Shyouhei Urabe) about 1 year ago

  • Assignee set to naruse (Yui NARUSE)
  • Status changed from Open to Assigned

#6 Updated by naruse (Yui NARUSE) about 1 year ago

  • Status changed from Assigned to Closed

Applied in changeset r55397.


  • thread.c (debug_deadlock_check): show thread lock dependency and
    backtrace [Feature #8214]

  • thread.c (thread_status_name): show "sleep_forever" instead of
    "sleep" if called from inspect.

Also available in: Atom PDF