Feature #22094 » 0001-Speed-up-Array-join-with-a-byte-copy-fast-path.patch
| array.c | ||
|---|---|---|
|
#include "internal/object.h"
|
||
|
#include "internal/proc.h"
|
||
|
#include "internal/rational.h"
|
||
|
#include "internal/string.h"
|
||
|
#include "internal/vm.h"
|
||
|
#include "probes.h"
|
||
|
#include "ruby/encoding.h"
|
||
| ... | ... | |
|
VALUE
|
||
|
rb_ary_join(VALUE ary, VALUE sep)
|
||
|
{
|
||
|
long len = 1, i;
|
||
|
long len = 1, i, n = RARRAY_LEN(ary);
|
||
|
VALUE val, tmp, result;
|
||
|
if (RARRAY_LEN(ary) == 0) return rb_usascii_str_new(0, 0);
|
||
|
if (n == 0) return rb_usascii_str_new(0, 0);
|
||
|
int sep_cr = ENC_CODERANGE_7BIT, sep_enc = 0;
|
||
|
if (!NIL_P(sep)) {
|
||
|
StringValue(sep);
|
||
|
len += RSTRING_LEN(sep) * (RARRAY_LEN(ary) - 1);
|
||
|
len += RSTRING_LEN(sep) * (n - 1);
|
||
|
sep_cr = rb_enc_str_coderange(sep);
|
||
|
sep_enc = ENCODING_GET(sep);
|
||
|
}
|
||
|
long len_memo = RARRAY_LEN(ary);
|
||
|
for (i=0; i < len_memo; i++) {
|
||
|
/* Measure the result length and note whether every part is 7-bit ASCII. A
|
||
|
non-String element switches to the general per-element append path. */
|
||
|
int ascii_only = NIL_P(sep) || sep_cr == ENC_CODERANGE_7BIT;
|
||
|
long len_memo = n;
|
||
|
for (i = 0; i < len_memo; i++) {
|
||
|
val = RARRAY_AREF(ary, i);
|
||
|
if (RB_UNLIKELY(!RB_TYPE_P(val, T_STRING))) {
|
||
|
tmp = rb_check_string_type(val);
|
||
|
if (NIL_P(tmp) || tmp != val) {
|
||
|
int first;
|
||
|
long n = RARRAY_LEN(ary);
|
||
|
if (i > n) i = n;
|
||
|
result = rb_str_buf_new(len + (n-i)*10);
|
||
|
long m = RARRAY_LEN(ary);
|
||
|
if (i > m) i = m;
|
||
|
result = rb_str_buf_new(len + (m - i) * 10);
|
||
|
rb_enc_associate(result, rb_usascii_encoding());
|
||
|
i = ary_join_0(ary, sep, i, result);
|
||
|
first = i == 0;
|
||
|
first = (i == 0);
|
||
|
ary_join_1(ary, ary, sep, i, result, &first);
|
||
|
return result;
|
||
|
}
|
||
|
len += RSTRING_LEN(tmp);
|
||
|
len_memo = RARRAY_LEN(ary);
|
||
|
ascii_only = 0;
|
||
|
}
|
||
|
else {
|
||
|
len += RSTRING_LEN(val);
|
||
|
if (ascii_only && ENC_CODERANGE(val) != ENC_CODERANGE_7BIT) ascii_only = 0;
|
||
|
}
|
||
|
}
|
||
|
n = RARRAY_LEN(ary);
|
||
|
result = rb_str_new(0, len);
|
||
|
rb_str_set_len(result, 0);
|
||
|
/* Fast path: build the result by copying raw bytes (no per-append bookkeeping)
|
||
|
when no encoding negotiation can occur. That holds when every part is 7-bit
|
||
|
ASCII, or when every element shares one ASCII-compatible single-byte-terminated
|
||
|
encoding and the separator is compatible. The result takes element 0's
|
||
|
encoding; result_cr is its code range, left UNKNOWN to compute lazily. */
|
||
|
int enc = ENCODING_GET(RARRAY_AREF(ary, 0));
|
||
|
int result_cr = ENC_CODERANGE_7BIT;
|
||
|
int copyable = ascii_only;
|
||
|
if (!copyable && rb_str_encindex_fastpath(enc) &&
|
||
|
(n <= 1 || NIL_P(sep) || sep_enc == enc || sep_cr == ENC_CODERANGE_7BIT)) {
|
||
|
/* Every element must share enc; merge code ranges (VALID if any part is
|
||
|
clean multibyte, UNKNOWN if any is UNKNOWN/BROKEN -> compute lazily). */
|
||
|
int valid = 0, uncertain = 0;
|
||
|
copyable = 1;
|
||
|
for (i = 0; i < n; i++) {
|
||
|
val = RARRAY_AREF(ary, i);
|
||
|
if (ENCODING_GET(val) != enc) { copyable = 0; break; }
|
||
|
int c = ENC_CODERANGE(val);
|
||
|
if (c == ENC_CODERANGE_VALID) valid = 1;
|
||
|
else if (c != ENC_CODERANGE_7BIT) uncertain = 1;
|
||
|
}
|
||
|
if (copyable) {
|
||
|
if (n > 1 && !NIL_P(sep)) {
|
||
|
if (sep_cr == ENC_CODERANGE_VALID) valid = 1;
|
||
|
else if (sep_cr != ENC_CODERANGE_7BIT) uncertain = 1;
|
||
|
}
|
||
|
result_cr = uncertain ? ENC_CODERANGE_UNKNOWN :
|
||
|
(valid ? ENC_CODERANGE_VALID : ENC_CODERANGE_7BIT);
|
||
|
}
|
||
|
}
|
||
|
ary_join_0(ary, sep, RARRAY_LEN(ary), result);
|
||
|
if (copyable) {
|
||
|
result = rb_str_buf_new(len);
|
||
|
rb_enc_associate_index(result, enc);
|
||
|
const char *sep_ptr = NIL_P(sep) ? NULL : RSTRING_PTR(sep);
|
||
|
long sep_len = NIL_P(sep) ? 0 : RSTRING_LEN(sep);
|
||
|
char *const buf = RSTRING_PTR(result);
|
||
|
char *p = buf;
|
||
|
for (i = 0; i < n; i++) {
|
||
|
val = RARRAY_AREF(ary, i);
|
||
|
long vlen = RSTRING_LEN(val);
|
||
|
if (i > 0 && sep_len) {
|
||
|
memcpy(p, sep_ptr, sep_len);
|
||
|
p += sep_len;
|
||
|
}
|
||
|
if (vlen) {
|
||
|
memcpy(p, RSTRING_PTR(val), vlen);
|
||
|
p += vlen;
|
||
|
}
|
||
|
}
|
||
|
ENC_CODERANGE_CLEAR(result); /* so rb_str_set_len does not rescan the bytes */
|
||
|
rb_str_set_len(result, p - buf);
|
||
|
if (result_cr != ENC_CODERANGE_UNKNOWN) ENC_CODERANGE_SET(result, result_cr);
|
||
|
return result;
|
||
|
}
|
||
|
/* General path: per-element append with full encoding handling. */
|
||
|
result = rb_str_new(0, len);
|
||
|
rb_str_set_len(result, 0);
|
||
|
ary_join_0(ary, sep, n, result);
|
||
|
return result;
|
||
|
}
|
||
| benchmark/array_join.yml | ||
|---|---|---|
|
prelude: |
|
||
|
# All elements are 7-bit ASCII Strings (the dominant real-world join case).
|
||
|
# Distinct objects so large-N cases exercise realistic heap/cache behavior.
|
||
|
a100_1 = Array.new(100) { "a" * 1 }
|
||
|
a100_8 = Array.new(100) { "a" * 8 }
|
||
|
a100_40 = Array.new(100) { "a" * 40 }
|
||
|
a1k_1 = Array.new(1000) { "a" * 1 }
|
||
|
a1k_8 = Array.new(1000) { "a" * 8 }
|
||
|
a1k_40 = Array.new(1000) { "a" * 40 }
|
||
|
a100k_1 = Array.new(100000) { "a" * 1 }
|
||
|
a100k_8 = Array.new(100000) { "a" * 8 }
|
||
|
a100k_40 = Array.new(100000) { "a" * 40 }
|
||
|
benchmark:
|
||
|
# separator " " (space)
|
||
|
join_sp_n100_e1: a100_1.join(" ")
|
||
|
join_sp_n100_e8: a100_8.join(" ")
|
||
|
join_sp_n100_e40: a100_40.join(" ")
|
||
|
join_sp_n1k_e1: a1k_1.join(" ")
|
||
|
join_sp_n1k_e8: a1k_8.join(" ")
|
||
|
join_sp_n1k_e40: a1k_40.join(" ")
|
||
|
join_sp_n100k_e1: a100k_1.join(" ")
|
||
|
join_sp_n100k_e8: a100k_8.join(" ")
|
||
|
join_sp_n100k_e40: a100k_40.join(" ")
|
||
|
# separator "\n" (newline)
|
||
|
join_nl_n100_e1: a100_1.join("\n")
|
||
|
join_nl_n100_e8: a100_8.join("\n")
|
||
|
join_nl_n100_e40: a100_40.join("\n")
|
||
|
join_nl_n1k_e1: a1k_1.join("\n")
|
||
|
join_nl_n1k_e8: a1k_8.join("\n")
|
||
|
join_nl_n1k_e40: a1k_40.join("\n")
|
||
|
join_nl_n100k_e1: a100k_1.join("\n")
|
||
|
join_nl_n100k_e8: a100k_8.join("\n")
|
||
|
join_nl_n100k_e40: a100k_40.join("\n")
|
||
| benchmark/array_join_regression.yml | ||
|---|---|---|
|
prelude: |
|
||
|
# --- must NOT take any fast path: prove added precompute-loop checks don't regress ---
|
||
|
# all multibyte UTF-8 (VALID coderange). 7-bit gate bails at elem 0; same-encoding gate TAKES it.
|
||
|
mb_utf8 = Array.new(1000) { "café" }
|
||
|
# ASCII except a trailing multibyte elem: WORST CASE for 7-bit gate (scans all, then falls back).
|
||
|
tail_mb = Array.new(1000) { "a" * 8 }; tail_mb[-1] = "café"
|
||
|
# non-String elements -> ary_join_1 (to_s). Gate bails at type check before coderange.
|
||
|
nonstr_int = Array.new(1000) { |i| i }
|
||
|
# ASCII strings except a trailing Integer: precompute scans strings, then mixed fallback.
|
||
|
mixed_tail = (Array.new(999) { "a" * 8 } << 42)
|
||
|
# UTF-16LE: not ASCII-compatible. Both gates bail immediately.
|
||
|
utf16 = Array.new(1000) { "ab".encode("UTF-16LE") }
|
||
|
# nested arrays -> recursive fallback.
|
||
|
nested = Array.new(500) { [1, 2] }
|
||
|
# 7-bit ASCII elements but a multibyte separator (3-byte em dash). Both gates bail at sep check.
|
||
|
ascii_elems = Array.new(1000) { "a" * 8 }
|
||
|
sep_mb = "—"
|
||
|
# --- fast-path ELIGIBLE variants: confirm benefit generalizes beyond single chars / find crossover ---
|
||
|
# frozen ASCII elements (read-only path).
|
||
|
frozen_elems = Array.new(1000) { ("a" * 8).freeze }
|
||
|
# large elements: where memcpy should dominate and the win decays toward 1x.
|
||
|
big_e256 = Array.new(1000) { "a" * 256 }
|
||
|
big_e1k = Array.new(1000) { "a" * 1024 }
|
||
|
big_e4k = Array.new(1000) { "a" * 4096 }
|
||
|
benchmark:
|
||
|
# regression (fallback / gate worst-case) — all use a 1-byte ASCII separator unless noted
|
||
|
reg_multibyte_utf8: mb_utf8.join(" ")
|
||
|
reg_tail_multibyte: tail_mb.join(" ")
|
||
|
reg_nonstring_int: nonstr_int.join(" ")
|
||
|
reg_mixed_tail_int: mixed_tail.join(" ")
|
||
|
reg_utf16: utf16.join(" ".encode("UTF-16LE"))
|
||
|
reg_nested: nested.join(" ")
|
||
|
reg_multibyte_sep: ascii_elems.join(sep_mb)
|
||
|
# fast-path eligible variants
|
||
|
fp_frozen: frozen_elems.join(" ")
|
||
|
fp_nosep: ascii_elems.join
|
||
|
fp_multichar_sep: ascii_elems.join(", ")
|
||
|
fp_big_e256: big_e256.join(" ")
|
||
|
fp_big_e1k: big_e1k.join(" ")
|
||
|
fp_big_e4k: big_e4k.join(" ")
|
||
| test/ruby/test_array.rb | ||
|---|---|---|
|
assert_equal("b", x.ary.join(""))
|
||
|
end
|
||
|
def test_join_encoding
|
||
|
# Multibyte UTF-8 elements: result is UTF-8, valid, not ASCII-only.
|
||
|
r = @cls["caf\u00e9", "na\u00efve"].join(" ")
|
||
|
assert_equal("caf\u00e9 na\u00efve", r)
|
||
|
assert_equal(Encoding::UTF_8, r.encoding)
|
||
|
assert_equal(true, r.valid_encoding?)
|
||
|
assert_equal(false, r.ascii_only?)
|
||
|
# All 7-bit content stays ASCII-only.
|
||
|
r = @cls["abc", "def"].join(",")
|
||
|
assert_equal("abc,def", r)
|
||
|
assert_equal(true, r.ascii_only?)
|
||
|
# Multibyte separator (same encoding as the elements).
|
||
|
r = @cls["a", "b", "c"].join("\u2014")
|
||
|
assert_equal("a\u2014b\u2014c", r)
|
||
|
assert_equal(Encoding::UTF_8, r.encoding)
|
||
|
assert_equal(false, r.ascii_only?)
|
||
|
assert_equal(true, r.valid_encoding?)
|
||
|
# Mixed ASCII-compatible encodings, all 7-bit: result takes element 0's encoding.
|
||
|
r = @cls["abc".dup.force_encoding("US-ASCII"), "def".dup.force_encoding("UTF-8")].join(" ")
|
||
|
assert_equal("abc def", r)
|
||
|
assert_equal(Encoding::US_ASCII, r.encoding)
|
||
|
assert_equal(true, r.ascii_only?)
|
||
|
# 7-bit content in a non-ASCII encoding (Shift_JIS).
|
||
|
r = @cls["abc".encode("Shift_JIS"), "def".encode("Shift_JIS")].join(" ")
|
||
|
assert_equal(Encoding::Shift_JIS, r.encoding)
|
||
|
assert_equal("abc def".b, r.b)
|
||
|
# Same-encoding multibyte (Shift_JIS): byte-for-byte concatenation.
|
||
|
s = "\u65e5\u672c".encode("Shift_JIS")
|
||
|
sep = "/".encode("Shift_JIS")
|
||
|
r = @cls[s, s].join(sep)
|
||
|
assert_equal(Encoding::Shift_JIS, r.encoding)
|
||
|
assert_equal((s + sep + s).b, r.b)
|
||
|
# Broken bytes: result keeps them and reports invalid.
|
||
|
bad = "\xff\xfe".dup.force_encoding("UTF-8")
|
||
|
r = @cls[bad, bad].join(" ")
|
||
|
assert_equal((bad + " " + bad).b, r.b)
|
||
|
assert_equal(false, r.valid_encoding?)
|
||
|
# Incompatible element/separator encodings still raise.
|
||
|
assert_raise(Encoding::CompatibilityError) do
|
||
|
@cls["a".dup.force_encoding("UTF-8"), "b".encode("UTF-16LE")].join(" ")
|
||
|
end
|
||
|
# Bulk copy: large array, verified against a non-join oracle.
|
||
|
big = @cls.new(500) { "ab" }
|
||
|
assert_equal(("ab," * 500).chomp(","), big.join(","))
|
||
|
end
|
||
|
def test_to_a2
|
||
|
klass = Class.new(Array)
|
||
|
a = klass.new.to_a
|
||