Actions
Bug #21663
openIO#pos will corrupt the file position in specific situation.
Bug #21663:
IO#pos will corrupt the file position in specific situation.
Status:
Open
Assignee:
-
Target version:
-
ruby -v:
ruby 3.2.3 (2024-01-18 revision 52bb2ac0a6) [x64-mingw-ucrt]
Description
IO#pos and IO#tell will corrupt the file position in the following situation.
Situation:¶
- Running on Windows. (
RUBY_CRLF_ENVIRONMENTis defined.) - The filesize is over 0x100000000 bytes.
- The file is opened in text mode.
- There are several
CRLFs written around 0x100000000. - Some read operations to fill internal buffer (
rbuf) has been performed. - The real file position (associated with the file descriptor) is somewhat greater than 0x100000000.
Code to reproduce:¶
file = "./test_file"
# Prepare a test file
f = File.open(file, "wb:binary")
f.truncate(0x100000000)
f.pos = f.size - 13
f.puts("0123456789\r\n9876543210\r\n" * 10)
f.close
# Operations to reproduce
f = File.open(file, "r:utf-8")
f.seek(0x100000000 - 13)
puts "%#x" % f.pos
p f.gets
puts "%#x" % f.pos
p f.gets
puts "%#x" % f.pos
f.close
Actual result:¶
0xfffffff3
"0123456789\n"
0x100000012
"789\n"
0x100000017
Expected result:¶
0xfffffff3
"0123456789\n"
0xffffffff
"9876543210\n"
0x10000000b
Notes:¶
Currently, io.c has the following code.
In the above situation, extra_max may be set to 0 or a small integer value by casting it as the type long (assumed 4 bytes).
So newlines is set lower than the actual count, the file does not seek back correctly.
/*
* IO unread with taking care of removed '\r' in text mode.
*/
static void
io_unread(rb_io_t *fptr, bool discard_rbuf)
{
rb_off_t r, pos;
ssize_t read_size;
long i;
long newlines = 0;
long extra_max;
char *p;
char *buf;
rb_io_check_closed(fptr);
if (fptr->rbuf.len == 0 || fptr->mode & FMODE_DUPLEX) {
return;
}
errno = 0;
if (!rb_w32_fd_is_text(fptr->fd)) {
r = lseek(fptr->fd, -fptr->rbuf.len, SEEK_CUR);
if (r < 0 && errno) {
if (errno == ESPIPE)
fptr->mode |= FMODE_DUPLEX;
if (!discard_rbuf) return;
}
goto end;
}
pos = lseek(fptr->fd, 0, SEEK_CUR);
if (pos < 0 && errno) {
if (errno == ESPIPE)
fptr->mode |= FMODE_DUPLEX;
if (!discard_rbuf) goto end;
}
/* add extra offset for removed '\r' in rbuf */
extra_max = (long)(pos - fptr->rbuf.len);
p = fptr->rbuf.ptr + fptr->rbuf.off;
/* if the end of rbuf is '\r', rbuf doesn't have '\r' within rbuf.len */
if (*(fptr->rbuf.ptr + fptr->rbuf.capa - 1) == '\r') {
newlines++;
}
for (i = 0; i < fptr->rbuf.len; i++) {
if (*p == '\n') newlines++;
if (extra_max == newlines) break;
p++;
}
buf = ALLOC_N(char, fptr->rbuf.len + newlines);
while (newlines >= 0) {
r = lseek(fptr->fd, pos - fptr->rbuf.len - newlines, SEEK_SET);
if (newlines == 0) break;
if (r < 0) {
newlines--;
continue;
}
read_size = _read(fptr->fd, buf, fptr->rbuf.len + newlines);
if (read_size < 0) {
int e = errno;
free(buf);
rb_syserr_fail_path(e, fptr->pathv);
}
if (read_size == fptr->rbuf.len) {
lseek(fptr->fd, r, SEEK_SET);
break;
}
else {
newlines--;
}
}
free(buf);
end:
fptr->rbuf.off = 0;
fptr->rbuf.len = 0;
clear_codeconv(fptr);
return;
}
No data to display
Actions