Project

General

Profile

Actions

Bug #21663

open

IO#pos will corrupt the file position in specific situation.

Bug #21663: IO#pos will corrupt the file position in specific situation.

Added by t_nagaya (Tsutomu Nagaya) 1 day ago.

Status:
Open
Assignee:
-
Target version:
-
ruby -v:
ruby 3.2.3 (2024-01-18 revision 52bb2ac0a6) [x64-mingw-ucrt]
[ruby-dev:<unknown>]

Description

IO#pos and IO#tell will corrupt the file position in the following situation.

Situation:

  • Running on Windows. (RUBY_CRLF_ENVIRONMENT is defined.)
  • The filesize is over 0x100000000 bytes.
  • The file is opened in text mode.
  • There are several CRLFs written around 0x100000000.
  • Some read operations to fill internal buffer (rbuf) has been performed.
  • The real file position (associated with the file descriptor) is somewhat greater than 0x100000000.

Code to reproduce:

file = "./test_file"

# Prepare a test file
f = File.open(file, "wb:binary")
f.truncate(0x100000000)
f.pos = f.size - 13
f.puts("0123456789\r\n9876543210\r\n" * 10)
f.close

# Operations to reproduce
f = File.open(file, "r:utf-8")
f.seek(0x100000000 - 13)
puts "%#x" % f.pos
p f.gets
puts "%#x" % f.pos
p f.gets
puts "%#x" % f.pos
f.close

Actual result:

0xfffffff3
"0123456789\n"
0x100000012
"789\n"
0x100000017

Expected result:

0xfffffff3
"0123456789\n"
0xffffffff
"9876543210\n"
0x10000000b

Notes:

Currently, io.c has the following code.
In the above situation, extra_max may be set to 0 or a small integer value by casting it as the type long (assumed 4 bytes).
So newlines is set lower than the actual count, the file does not seek back correctly.

/*
 * IO unread with taking care of removed '\r' in text mode.
 */
static void
io_unread(rb_io_t *fptr, bool discard_rbuf)
{
    rb_off_t r, pos;
    ssize_t read_size;
    long i;
    long newlines = 0;
    long extra_max;
    char *p;
    char *buf;

    rb_io_check_closed(fptr);
    if (fptr->rbuf.len == 0 || fptr->mode & FMODE_DUPLEX) {
        return;
    }

    errno = 0;
    if (!rb_w32_fd_is_text(fptr->fd)) {
        r = lseek(fptr->fd, -fptr->rbuf.len, SEEK_CUR);
        if (r < 0 && errno) {
            if (errno == ESPIPE)
                fptr->mode |= FMODE_DUPLEX;
            if (!discard_rbuf) return;
        }

        goto end;
    }

    pos = lseek(fptr->fd, 0, SEEK_CUR);
    if (pos < 0 && errno) {
        if (errno == ESPIPE)
            fptr->mode |= FMODE_DUPLEX;
        if (!discard_rbuf) goto end;
    }

    /* add extra offset for removed '\r' in rbuf */
    extra_max = (long)(pos - fptr->rbuf.len);
    p = fptr->rbuf.ptr + fptr->rbuf.off;

    /* if the end of rbuf is '\r', rbuf doesn't have '\r' within rbuf.len */
    if (*(fptr->rbuf.ptr + fptr->rbuf.capa - 1) == '\r') {
        newlines++;
    }

    for (i = 0; i < fptr->rbuf.len; i++) {
        if (*p == '\n') newlines++;
        if (extra_max == newlines) break;
        p++;
    }

    buf = ALLOC_N(char, fptr->rbuf.len + newlines);
    while (newlines >= 0) {
        r = lseek(fptr->fd, pos - fptr->rbuf.len - newlines, SEEK_SET);
        if (newlines == 0) break;
        if (r < 0) {
            newlines--;
            continue;
        }
        read_size = _read(fptr->fd, buf, fptr->rbuf.len + newlines);
        if (read_size < 0) {
            int e = errno;
            free(buf);
            rb_syserr_fail_path(e, fptr->pathv);
        }
        if (read_size == fptr->rbuf.len) {
            lseek(fptr->fd, r, SEEK_SET);
            break;
        }
        else {
            newlines--;
        }
    }
    free(buf);
  end:
    fptr->rbuf.off = 0;
    fptr->rbuf.len = 0;
    clear_codeconv(fptr);
    return;
}

No data to display

Actions

Also available in: PDF Atom