From 53a4f308baf5f95713fc49b0ad11820064bb5ea4 Mon Sep 17 00:00:00 2001 From: KJ Tsanaktsidis Date: Sun, 22 May 2022 00:14:19 -0700 Subject: [PATCH] Fix crash when printing RGENGC_DEBUG=5 output from GC I was trying to debug an (unrelated) issue in the GC, and wanted to turn on the trace-level GC output by compiling it with -DRGENGC_DEBUG=5. Unfortunately, this actually causes a crash in newobj_init() because the code there tries to log the obj_info() of the newly created object. However, the object is not actually sufficiently set up for some of the things that obj_info() tries to do: * The instance variable table for a class is not yet initialized, and when using variable-length RVALUES, said ivar table is embedded in as-yet unitialized memory after the struct RValue. Attempting to read this, as obj_info() does, causes a crash. * T_DATA variables need to dereference their ->type field to print out the underlying C type name, which is not set up until newobj_fill() is called. To fix this, we do two things: * Firstly, we define a new function obj_info_basic(), which is just obj_info() except skipping bits that don't work on newly-created objects. * Then, move the log message out of newobj_init() and into newobj_fill(); all possible codepaths which call newobj_init() do subsequently call newobj_fill(), so this won't change any output, and it allows the type of T_DATA objects to still be printed even in obj_info_basic(), which is helpful. --- gc.c | 372 ++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 203 insertions(+), 169 deletions(-) diff --git a/gc.c b/gc.c index 1b3fcd2c67..b1cce24a0c 100644 --- a/gc.c +++ b/gc.c @@ -1262,6 +1262,7 @@ static inline void gc_prof_set_heap_info(rb_objspace_t *); #endif PRINTF_ARGS(static void gc_report_body(int level, rb_objspace_t *objspace, const char *fmt, ...), 3, 4); static const char *obj_info(VALUE obj); +static const char *obj_info_basic(VALUE obj); static const char *obj_type_name(VALUE obj); /* @@ -2431,8 +2432,6 @@ newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, GC_ASSERT(!SPECIAL_CONST_P(obj)); /* check alignment */ #endif - gc_report(5, objspace, "newobj: %s\n", obj_info(obj)); - #if RGENGC_OLD_NEWOBJ_CHECK > 0 { static int newobj_cnt = RGENGC_OLD_NEWOBJ_CHECK; @@ -2573,6 +2572,13 @@ newobj_fill(VALUE obj, VALUE v1, VALUE v2, VALUE v3) p->as.values.v1 = v1; p->as.values.v2 = v2; p->as.values.v3 = v3; + + // Must call obj_info_basic() here, not obj_info(), because some of the things + // that obj_info tries to print (e.g. the iv table for a class) won't be setup yet. + // This also needs to be here, NOT in newobj_init, because fetching the type of T_DATA + // things depends on RTYPEDDATA(obj)->type, which is set up above (as v1). + gc_report(5, &rb_objspace, "newobj: %s\n", obj_info_basic(obj)); + return obj; } @@ -13457,8 +13463,10 @@ str_len_no_raise(VALUE str) return (int)len; } -const char * -rb_raw_obj_info(char *buff, const int buff_size, VALUE obj) +#define OBJ_INFO_EXT 1 << 0 + +static const char * +rb_raw_obj_info_impl(char *buff, const int buff_size, VALUE obj, int obj_info_flags) { int pos = 0; void *poisoned = asan_poisoned_object_p(obj); @@ -13519,189 +13527,191 @@ rb_raw_obj_info(char *buff, const int buff_size, VALUE obj) APPENDF((BUFF_ARGS, "@%s:%d", RANY(obj)->file, RANY(obj)->line)); #endif - switch (type) { - case T_NODE: - UNEXPECTED_NODE(rb_raw_obj_info); - break; - case T_ARRAY: - if (FL_TEST(obj, ELTS_SHARED)) { - APPENDF((BUFF_ARGS, "shared -> %s", - rb_obj_info(RARRAY(obj)->as.heap.aux.shared_root))); - } - else if (FL_TEST(obj, RARRAY_EMBED_FLAG)) { - APPENDF((BUFF_ARGS, "[%s%s] len: %ld (embed)", - C(ARY_EMBED_P(obj), "E"), - C(ARY_SHARED_P(obj), "S"), - RARRAY_LEN(obj))); - } - else { - APPENDF((BUFF_ARGS, "[%s%s%s] len: %ld, capa:%ld ptr:%p", - C(ARY_EMBED_P(obj), "E"), - C(ARY_SHARED_P(obj), "S"), - C(RARRAY_TRANSIENT_P(obj), "T"), - RARRAY_LEN(obj), - ARY_EMBED_P(obj) ? -1L : RARRAY(obj)->as.heap.aux.capa, - (void *)RARRAY_CONST_PTR_TRANSIENT(obj))); - } - break; - case T_STRING: { - if (STR_SHARED_P(obj)) { - APPENDF((BUFF_ARGS, " [shared] len: %ld", RSTRING_LEN(obj))); - } - else { - if (STR_EMBED_P(obj)) APPENDF((BUFF_ARGS, " [embed]")); - - APPENDF((BUFF_ARGS, " len: %ld, capa: %" PRIdSIZE, RSTRING_LEN(obj), rb_str_capacity(obj))); - } - APPENDF((BUFF_ARGS, " \"%.*s\"", str_len_no_raise(obj), RSTRING_PTR(obj))); - break; - } - case T_SYMBOL: { - VALUE fstr = RSYMBOL(obj)->fstr; - ID id = RSYMBOL(obj)->id; - if (RB_TYPE_P(fstr, T_STRING)) { - APPENDF((BUFF_ARGS, ":%s id:%d", RSTRING_PTR(fstr), (unsigned int)id)); - } - else { - APPENDF((BUFF_ARGS, "(%p) id:%d", (void *)fstr, (unsigned int)id)); - } - break; - } - case T_MOVED: { - APPENDF((BUFF_ARGS, "-> %p", (void*)rb_gc_location(obj))); + if (obj_info_flags & OBJ_INFO_EXT) { + switch (type) { + case T_NODE: + UNEXPECTED_NODE(rb_raw_obj_info); break; - } - case T_HASH: { - APPENDF((BUFF_ARGS, "[%c%c] %"PRIdSIZE, - RHASH_AR_TABLE_P(obj) ? 'A' : 'S', - RHASH_TRANSIENT_P(obj) ? 'T' : ' ', - RHASH_SIZE(obj))); - break; - } - case T_CLASS: - case T_MODULE: - { - VALUE class_path = rb_class_path_cached(obj); - if (!NIL_P(class_path)) { - APPENDF((BUFF_ARGS, "%s", RSTRING_PTR(class_path))); + case T_ARRAY: + if (FL_TEST(obj, ELTS_SHARED)) { + APPENDF((BUFF_ARGS, "shared -> %s", + rb_obj_info(RARRAY(obj)->as.heap.aux.shared_root))); + } + else if (FL_TEST(obj, RARRAY_EMBED_FLAG)) { + APPENDF((BUFF_ARGS, "[%s%s] len: %ld (embed)", + C(ARY_EMBED_P(obj), "E"), + C(ARY_SHARED_P(obj), "S"), + RARRAY_LEN(obj))); } else { - APPENDF((BUFF_ARGS, "(annon)")); + APPENDF((BUFF_ARGS, "[%s%s%s] len: %ld, capa:%ld ptr:%p", + C(ARY_EMBED_P(obj), "E"), + C(ARY_SHARED_P(obj), "S"), + C(RARRAY_TRANSIENT_P(obj), "T"), + RARRAY_LEN(obj), + ARY_EMBED_P(obj) ? -1L : RARRAY(obj)->as.heap.aux.capa, + (void *)RARRAY_CONST_PTR_TRANSIENT(obj))); } - break; - } - case T_ICLASS: - { - VALUE class_path = rb_class_path_cached(RBASIC_CLASS(obj)); - if (!NIL_P(class_path)) { - APPENDF((BUFF_ARGS, "src:%s", RSTRING_PTR(class_path))); + break; + case T_STRING: { + if (STR_SHARED_P(obj)) { + APPENDF((BUFF_ARGS, " [shared] len: %ld", RSTRING_LEN(obj))); } - break; - } - case T_OBJECT: - { - uint32_t len = ROBJECT_NUMIV(obj); + else { + if (STR_EMBED_P(obj)) APPENDF((BUFF_ARGS, " [embed]")); - if (RANY(obj)->as.basic.flags & ROBJECT_EMBED) { - APPENDF((BUFF_ARGS, "(embed) len:%d", len)); + APPENDF((BUFF_ARGS, " len: %ld, capa: %" PRIdSIZE, RSTRING_LEN(obj), rb_str_capacity(obj))); + } + APPENDF((BUFF_ARGS, " \"%.*s\"", str_len_no_raise(obj), RSTRING_PTR(obj))); + break; + } + case T_SYMBOL: { + VALUE fstr = RSYMBOL(obj)->fstr; + ID id = RSYMBOL(obj)->id; + if (RB_TYPE_P(fstr, T_STRING)) { + APPENDF((BUFF_ARGS, ":%s id:%d", RSTRING_PTR(fstr), (unsigned int)id)); } else { - VALUE *ptr = ROBJECT_IVPTR(obj); - APPENDF((BUFF_ARGS, "len:%d ptr:%p", len, (void *)ptr)); + APPENDF((BUFF_ARGS, "(%p) id:%d", (void *)fstr, (unsigned int)id)); } + break; } - break; - case T_DATA: { - const struct rb_block *block; - const rb_iseq_t *iseq; - if (rb_obj_is_proc(obj) && - (block = vm_proc_block(obj)) != NULL && - (vm_block_type(block) == block_type_iseq) && - (iseq = vm_block_iseq(block)) != NULL) { - rb_raw_iseq_info(BUFF_ARGS, iseq); - } - else if (rb_ractor_p(obj)) { - rb_ractor_t *r = (void *)DATA_PTR(obj); - if (r) { - APPENDF((BUFF_ARGS, "r:%d", r->pub.id)); - } + case T_MOVED: { + APPENDF((BUFF_ARGS, "-> %p", (void*)rb_gc_location(obj))); + break; } - else { - const char * const type_name = rb_objspace_data_type_name(obj); - if (type_name) { - APPENDF((BUFF_ARGS, "%s", type_name)); - } - } - break; - } - case T_IMEMO: { - APPENDF((BUFF_ARGS, "<%s> ", rb_imemo_name(imemo_type(obj)))); - - switch (imemo_type(obj)) { - case imemo_ment: + case T_HASH: { + APPENDF((BUFF_ARGS, "[%c%c] %"PRIdSIZE, + RHASH_AR_TABLE_P(obj) ? 'A' : 'S', + RHASH_TRANSIENT_P(obj) ? 'T' : ' ', + RHASH_SIZE(obj))); + break; + } + case T_CLASS: + case T_MODULE: { - const rb_method_entry_t *me = &RANY(obj)->as.imemo.ment; - - APPENDF((BUFF_ARGS, ":%s (%s%s%s%s) type:%s alias:%d owner:%p defined_class:%p", - rb_id2name(me->called_id), - METHOD_ENTRY_VISI(me) == METHOD_VISI_PUBLIC ? "pub" : - METHOD_ENTRY_VISI(me) == METHOD_VISI_PRIVATE ? "pri" : "pro", - METHOD_ENTRY_COMPLEMENTED(me) ? ",cmp" : "", - METHOD_ENTRY_CACHED(me) ? ",cc" : "", - METHOD_ENTRY_INVALIDATED(me) ? ",inv" : "", - me->def ? rb_method_type_name(me->def->type) : "NULL", - me->def ? me->def->alias_count : -1, - (void *)me->owner, // obj_info(me->owner), - (void *)me->defined_class)); //obj_info(me->defined_class))); - - if (me->def) { - switch (me->def->type) { - case VM_METHOD_TYPE_ISEQ: - APPENDF((BUFF_ARGS, " (iseq:%s)", obj_info((VALUE)me->def->body.iseq.iseqptr))); - break; - default: - break; - } + VALUE class_path = rb_class_path_cached(obj); + if (!NIL_P(class_path)) { + APPENDF((BUFF_ARGS, "%s", RSTRING_PTR(class_path))); + } + else { + APPENDF((BUFF_ARGS, "(annon)")); } - break; } - case imemo_iseq: { - const rb_iseq_t *iseq = (const rb_iseq_t *)obj; - rb_raw_iseq_info(BUFF_ARGS, iseq); - break; - } - case imemo_callinfo: + case T_ICLASS: { - const struct rb_callinfo *ci = (const struct rb_callinfo *)obj; - APPENDF((BUFF_ARGS, "(mid:%s, flag:%x argc:%d, kwarg:%s)", - rb_id2name(vm_ci_mid(ci)), - vm_ci_flag(ci), - vm_ci_argc(ci), - vm_ci_kwarg(ci) ? "available" : "NULL")); + VALUE class_path = rb_class_path_cached(RBASIC_CLASS(obj)); + if (!NIL_P(class_path)) { + APPENDF((BUFF_ARGS, "src:%s", RSTRING_PTR(class_path))); + } break; } - case imemo_callcache: + case T_OBJECT: { - const struct rb_callcache *cc = (const struct rb_callcache *)obj; - VALUE class_path = cc->klass ? rb_class_path_cached(cc->klass) : Qnil; - const rb_callable_method_entry_t *cme = vm_cc_cme(cc); - - APPENDF((BUFF_ARGS, "(klass:%s cme:%s%s (%p) call:%p", - NIL_P(class_path) ? (cc->klass ? "??" : "") : RSTRING_PTR(class_path), - cme ? rb_id2name(cme->called_id) : "", - cme ? (METHOD_ENTRY_INVALIDATED(cme) ? " [inv]" : "") : "", - (void *)cme, - (void *)vm_cc_call(cc))); - break; + uint32_t len = ROBJECT_NUMIV(obj); + + if (RANY(obj)->as.basic.flags & ROBJECT_EMBED) { + APPENDF((BUFF_ARGS, "(embed) len:%d", len)); + } + else { + VALUE *ptr = ROBJECT_IVPTR(obj); + APPENDF((BUFF_ARGS, "len:%d ptr:%p", len, (void *)ptr)); + } } - default: - break; - } - } - default: - break; - } + break; + case T_DATA: { + const struct rb_block *block; + const rb_iseq_t *iseq; + if (rb_obj_is_proc(obj) && + (block = vm_proc_block(obj)) != NULL && + (vm_block_type(block) == block_type_iseq) && + (iseq = vm_block_iseq(block)) != NULL) { + rb_raw_iseq_info(BUFF_ARGS, iseq); + } + else if (rb_ractor_p(obj)) { + rb_ractor_t *r = (void *)DATA_PTR(obj); + if (r) { + APPENDF((BUFF_ARGS, "r:%d", r->pub.id)); + } + } + else { + const char * const type_name = rb_objspace_data_type_name(obj); + if (type_name) { + APPENDF((BUFF_ARGS, "%s", type_name)); + } + } + break; + } + case T_IMEMO: { + APPENDF((BUFF_ARGS, "<%s> ", rb_imemo_name(imemo_type(obj)))); + + switch (imemo_type(obj)) { + case imemo_ment: + { + const rb_method_entry_t *me = &RANY(obj)->as.imemo.ment; + + APPENDF((BUFF_ARGS, ":%s (%s%s%s%s) type:%s alias:%d owner:%p defined_class:%p", + rb_id2name(me->called_id), + METHOD_ENTRY_VISI(me) == METHOD_VISI_PUBLIC ? "pub" : + METHOD_ENTRY_VISI(me) == METHOD_VISI_PRIVATE ? "pri" : "pro", + METHOD_ENTRY_COMPLEMENTED(me) ? ",cmp" : "", + METHOD_ENTRY_CACHED(me) ? ",cc" : "", + METHOD_ENTRY_INVALIDATED(me) ? ",inv" : "", + me->def ? rb_method_type_name(me->def->type) : "NULL", + me->def ? me->def->alias_count : -1, + (void *)me->owner, // obj_info(me->owner), + (void *)me->defined_class)); //obj_info(me->defined_class))); + + if (me->def) { + switch (me->def->type) { + case VM_METHOD_TYPE_ISEQ: + APPENDF((BUFF_ARGS, " (iseq:%s)", obj_info((VALUE)me->def->body.iseq.iseqptr))); + break; + default: + break; + } + } + + break; + } + case imemo_iseq: { + const rb_iseq_t *iseq = (const rb_iseq_t *)obj; + rb_raw_iseq_info(BUFF_ARGS, iseq); + break; + } + case imemo_callinfo: + { + const struct rb_callinfo *ci = (const struct rb_callinfo *)obj; + APPENDF((BUFF_ARGS, "(mid:%s, flag:%x argc:%d, kwarg:%s)", + rb_id2name(vm_ci_mid(ci)), + vm_ci_flag(ci), + vm_ci_argc(ci), + vm_ci_kwarg(ci) ? "available" : "NULL")); + break; + } + case imemo_callcache: + { + const struct rb_callcache *cc = (const struct rb_callcache *)obj; + VALUE class_path = cc->klass ? rb_class_path_cached(cc->klass) : Qnil; + const rb_callable_method_entry_t *cme = vm_cc_cme(cc); + + APPENDF((BUFF_ARGS, "(klass:%s cme:%s%s (%p) call:%p", + NIL_P(class_path) ? (cc->klass ? "??" : "") : RSTRING_PTR(class_path), + cme ? rb_id2name(cme->called_id) : "", + cme ? (METHOD_ENTRY_INVALIDATED(cme) ? " [inv]" : "") : "", + (void *)cme, + (void *)vm_cc_call(cc))); + break; + } + default: + break; + } + } + default: + break; + } + } #undef TF #undef C } @@ -13715,6 +13725,12 @@ rb_raw_obj_info(char *buff, const int buff_size, VALUE obj) #undef BUFF_ARGS } +const char * +rb_raw_obj_info(char *buff, const int buff_size, VALUE obj) +{ + return rb_raw_obj_info_impl(buff, buff_size, obj, OBJ_INFO_EXT); +} + #if RGENGC_OBJ_INFO #define OBJ_INFO_BUFFERS_NUM 10 #define OBJ_INFO_BUFFERS_SIZE 0x100 @@ -13722,7 +13738,7 @@ static int obj_info_buffers_index = 0; static char obj_info_buffers[OBJ_INFO_BUFFERS_NUM][OBJ_INFO_BUFFERS_SIZE]; static const char * -obj_info(VALUE obj) +obj_info_impl(VALUE obj, int obj_info_flags) { const int index = obj_info_buffers_index++; char *const buff = &obj_info_buffers[index][0]; @@ -13731,7 +13747,19 @@ obj_info(VALUE obj) obj_info_buffers_index = 0; } - return rb_raw_obj_info(buff, OBJ_INFO_BUFFERS_SIZE, obj); + return rb_raw_obj_info_impl(buff, OBJ_INFO_BUFFERS_SIZE, obj, obj_info_flags); +} + +static const char * +obj_info(VALUE obj) +{ + return obj_info_impl(obj, OBJ_INFO_EXT); +} + +static const char * +obj_info_basic(VALUE obj) +{ + return obj_info_impl(obj, 0); } #else static const char * @@ -13739,6 +13767,12 @@ obj_info(VALUE obj) { return obj_type_name(obj); } + +static const char * +obj_info_basic(VALUE obj) +{ + return obj_type_name(obj); +} #endif MJIT_FUNC_EXPORTED const char * -- 2.35.1