Project

General

Profile

Feature #13110 ยป byteindex.diff

PoC code for String#byteindex and Regexp#byteoffset - shugo (Shugo Maeda), 01/06/2017 04:03 PM

View differences:

re.c
1184 1184
			INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
1185 1185
}
1186 1186

  
1187
static VALUE
1188
match_byteoffset(VALUE match, VALUE n)
1189
{
1190
    int i = match_backref_number(match, n);
1191
    struct re_registers *regs = RMATCH_REGS(match);
1192

  
1193
    match_check(match);
1194
    if (i < 0 || regs->num_regs <= i)
1195
	rb_raise(rb_eIndexError, "index %d out of matches", i);
1196

  
1197
    if (BEG(i) < 0)
1198
	return rb_assoc_new(Qnil, Qnil);
1199

  
1200
    return rb_assoc_new(INT2FIX(BEG(i)), INT2FIX(END(i)));
1201
}
1202

  
1187 1203

  
1188 1204
/*
1189 1205
 *  call-seq:
......
3977 3993
    rb_define_method(rb_cMatch, "size", match_size, 0);
3978 3994
    rb_define_method(rb_cMatch, "length", match_size, 0);
3979 3995
    rb_define_method(rb_cMatch, "offset", match_offset, 1);
3996
    rb_define_method(rb_cMatch, "byteoffset", match_byteoffset, 1);
3980 3997
    rb_define_method(rb_cMatch, "begin", match_begin, 1);
3981 3998
    rb_define_method(rb_cMatch, "end", match_end, 1);
3982 3999
    rb_define_method(rb_cMatch, "to_a", match_to_a, 0);
string.c
3353 3353
    return LONG2NUM(pos);
3354 3354
}
3355 3355

  
3356
static VALUE
3357
rb_str_byteindex_m(int argc, VALUE *argv, VALUE str)
3358
{
3359
    VALUE sub;
3360
    VALUE initpos;
3361
    long pos;
3362

  
3363
    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
3364
	pos = NUM2LONG(initpos);
3365
    }
3366
    else {
3367
	pos = 0;
3368
    }
3369
    if (pos < 0) {
3370
	pos += LONG2NUM(RSTRING_LEN(str));
3371
	if (pos < 0) {
3372
	    if (RB_TYPE_P(sub, T_REGEXP)) {
3373
		rb_backref_set(Qnil);
3374
	    }
3375
	    return Qnil;
3376
	}
3377
    }
3378

  
3379
    if (SPECIAL_CONST_P(sub)) goto generic;
3380
    switch (BUILTIN_TYPE(sub)) {
3381
      case T_REGEXP:
3382
	if (pos > RSTRING_LEN(str))
3383
	    return Qnil;
3384
	pos = rb_reg_search(sub, str, pos, 0);
3385
	break;
3386

  
3387
      generic:
3388
      default: {
3389
	VALUE tmp;
3390

  
3391
	tmp = rb_check_string_type(sub);
3392
	if (NIL_P(tmp)) {
3393
	    rb_raise(rb_eTypeError, "type mismatch: %s given",
3394
		     rb_obj_classname(sub));
3395
	}
3396
	sub = tmp;
3397
      }
3398
	/* fall through */
3399
      case T_STRING:
3400
	pos = rb_strseq_index(str, sub, pos, 1);
3401
	break;
3402
    }
3403

  
3404
    if (pos == -1) return Qnil;
3405
    return LONG2NUM(pos);
3406
}
3407

  
3356 3408
#ifdef HAVE_MEMRCHR
3357 3409
static long
3358 3410
str_rindex(VALUE str, VALUE sub, const char *s, long pos, rb_encoding *enc)
......
9991 10043
    rb_define_method(rb_cString, "next!", rb_str_succ_bang, 0);
9992 10044
    rb_define_method(rb_cString, "upto", rb_str_upto, -1);
9993 10045
    rb_define_method(rb_cString, "index", rb_str_index_m, -1);
10046
    rb_define_method(rb_cString, "byteindex", rb_str_byteindex_m, -1);
9994 10047
    rb_define_method(rb_cString, "rindex", rb_str_rindex_m, -1);
9995 10048
    rb_define_method(rb_cString, "replace", rb_str_replace, 1);
9996 10049
    rb_define_method(rb_cString, "clear", rb_str_clear, 0);