diff --git a/configure.in b/configure.in index 27aed2f..a91e896 100644 --- a/configure.in +++ b/configure.in @@ -1185,7 +1185,7 @@ main() AC_CHECK_FUNCS(cygwin_conv_path) AC_LIBOBJ([langinfo]) ], -[mingw*], [ LIBS="-lshell32 -lws2_32 -limagehlp $LIBS" +[mingw*], [ LIBS="-lshell32 -lws2_32 -limagehlp -lshlwapi $LIBS" ac_cv_header_a_out_h=no ac_cv_header_pwd_h=no ac_cv_header_utime_h=no diff --git a/file.c b/file.c index 4050067..5b78baa 100644 --- a/file.c +++ b/file.c @@ -2882,8 +2882,9 @@ struct utimbuf { return buf + dirlen; } -static VALUE -file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result) +#ifndef _WIN32 +VALUE +rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, VALUE result) { const char *s, *b, *fend; char *buf, *p, *pend, *root; @@ -2945,7 +2946,7 @@ struct utimbuf { /* specified drive, but not full path */ int same = 0; if (!NIL_P(dname) && !not_same_drive(dname, s[0])) { - file_expand_path(dname, Qnil, abs_mode, result); + rb_file_expand_path_internal(dname, Qnil, abs_mode, result); BUFINIT(); if (has_drive_letter(p) && TOLOWER(p[0]) == TOLOWER(s[0])) { /* ok, same drive */ @@ -2969,7 +2970,7 @@ struct utimbuf { #endif else if (!rb_is_absolute_path(s)) { if (!NIL_P(dname)) { - file_expand_path(dname, Qnil, abs_mode, result); + rb_file_expand_path_internal(dname, Qnil, abs_mode, result); rb_enc_associate(result, rb_enc_check(result, fname)); BUFINIT(); p = pend; @@ -3222,6 +3223,7 @@ struct utimbuf { ENC_CODERANGE_CLEAR(result); return result; } +#endif #define EXPAND_PATH_BUFFER() rb_usascii_str_new(0, MAXPATHLEN + 2) @@ -3232,14 +3234,14 @@ struct utimbuf { static VALUE file_expand_path_1(VALUE fname) { - return file_expand_path(fname, Qnil, 0, EXPAND_PATH_BUFFER()); + return rb_file_expand_path_internal(fname, Qnil, 0, EXPAND_PATH_BUFFER()); } VALUE rb_file_expand_path(VALUE fname, VALUE dname) { check_expand_path_args(fname, dname); - return file_expand_path(fname, dname, 0, EXPAND_PATH_BUFFER()); + return rb_file_expand_path_internal(fname, dname, 0, EXPAND_PATH_BUFFER()); } /* @@ -3276,7 +3278,7 @@ struct utimbuf { rb_file_absolute_path(VALUE fname, VALUE dname) { check_expand_path_args(fname, dname); - return file_expand_path(fname, dname, 1, EXPAND_PATH_BUFFER()); + return rb_file_expand_path_internal(fname, dname, 1, EXPAND_PATH_BUFFER()); } /* @@ -5250,7 +5252,7 @@ struct utimbuf { RB_GC_GUARD(str) = rb_get_path_check(str, safe_level); if (RSTRING_LEN(str) == 0) continue; - file_expand_path(fname, str, 0, tmp); + rb_file_expand_path_internal(fname, str, 0, tmp); if (rb_file_load_ok(RSTRING_PTR(tmp))) { *filep = copy_path_class(tmp, *filep); return (int)(j+1); @@ -5309,7 +5311,7 @@ struct utimbuf { VALUE str = RARRAY_PTR(load_path)[i]; RB_GC_GUARD(str) = rb_get_path_check(str, safe_level); if (RSTRING_LEN(str) > 0) { - file_expand_path(path, str, 0, tmp); + rb_file_expand_path_internal(path, str, 0, tmp); f = RSTRING_PTR(tmp); if (rb_file_load_ok(f)) goto found; } @@ -5544,4 +5546,8 @@ struct utimbuf { rb_define_method(rb_cStat, "setuid?", rb_stat_suid, 0); rb_define_method(rb_cStat, "setgid?", rb_stat_sgid, 0); rb_define_method(rb_cStat, "sticky?", rb_stat_sticky, 0); + +#ifdef _WIN32 + rb_w32_init_file(); +#endif } diff --git a/include/ruby/win32.h b/include/ruby/win32.h index 718da13..1344dd4 100644 --- a/include/ruby/win32.h +++ b/include/ruby/win32.h @@ -299,6 +299,7 @@ struct msghdr { extern int rb_w32_uaccess(const char *, int); extern char rb_w32_fd_is_text(int); extern int rb_w32_fstati64(int, struct stati64 *); +extern void rb_w32_init_file(void); #ifdef __BORLANDC__ extern off_t _lseeki64(int, off_t, int); diff --git a/internal.h b/internal.h index 08291af..e6f6b64 100644 --- a/internal.h +++ b/internal.h @@ -102,6 +102,7 @@ struct rb_classext_struct { VALUE rb_realpath_internal(VALUE basedir, VALUE path, int strict); void rb_file_const(const char*, VALUE); int rb_file_load_ok(const char *); +VALUE rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, VALUE result); void Init_File(void); /* gc.c */ diff --git a/lib/webrick/httpservlet/filehandler.rb b/lib/webrick/httpservlet/filehandler.rb index 8736f57..adae046 100644 --- a/lib/webrick/httpservlet/filehandler.rb +++ b/lib/webrick/httpservlet/filehandler.rb @@ -327,12 +327,38 @@ def check_filename(req, res, name) end end + if /mswin|bccwin|mingw/ =~ RUBY_PLATFORM + require 'dl/import' + require 'dl/types' + + module Kernel32 + extend DL::Importer + dlload "kernel32" + include DL::Win32Types + extern "DWORD GetLongPathName(LPCSTR, LPSTR, DWORD)" + + def self.long_path_name(name) + if (len = GetLongPathName(name, nil, 0)).nonzero? + buf = "\0" * len + buf[0...GetLongPathName(name, buf, buf.size)] + else + name + end + end + end + + def long_path_name(name) + Kernel32.long_path_name(name) + end + end + def shift_path_info(req, res, path_info, base=nil) tmp = path_info.shift base = base || tmp req.path_info = path_info.join req.script_name << base res.filename = File.expand_path(res.filename + base) + res.filename = long_path_name(res.filename) if defined?(long_path_name) check_filename(req, res, File.basename(res.filename)) end diff --git a/test/ruby/test_file_exhaustive.rb b/test/ruby/test_file_exhaustive.rb index a19fcd9..f542e4e 100644 --- a/test/ruby/test_file_exhaustive.rb +++ b/test/ruby/test_file_exhaustive.rb @@ -14,6 +14,7 @@ def assert_incompatible_encoding def setup @dir = Dir.mktmpdir("rubytest-file") + @rootdir = "#{DRIVE}/" File.chown(-1, Process.gid, @dir) @file = make_tmp_filename("file") @zerofile = make_tmp_filename("zerofile") @@ -425,13 +426,25 @@ def test_expand_path assert_equal(expected.force_encoding(cp), File.expand_path(a.dup.force_encoding(cp)), cp) end + path = "\u3042\u3044\u3046\u3048\u304a".encode("EUC-JP") + assert_equal("#{Dir.pwd}/#{path}".encode("CP932"), File.expand_path(path).encode("CP932")) + + path = "\u3042\u3044\u3046\u3048\u304a".encode("CP51932") + assert_equal("#{Dir.pwd}/#{path}", File.expand_path(path)) + assert_kind_of(String, File.expand_path("~")) if ENV["HOME"] assert_raise(ArgumentError) { File.expand_path("~foo_bar_baz_unknown_user_wahaha") } assert_raise(ArgumentError) { File.expand_path("~foo_bar_baz_unknown_user_wahaha", "/") } begin bug3630 = '[ruby-core:31537]' home = ENV["HOME"] + home_drive = ENV["HOMEDRIVE"] + home_path = ENV["HOMEPATH"] + user_profile = ENV["USERPROFILE"] ENV["HOME"] = nil + ENV["HOMEDRIVE"] = nil + ENV["HOMEPATH"] = nil + ENV["USERPROFILE"] = nil assert_raise(ArgumentError) { File.expand_path("~") } ENV["HOME"] = "~" assert_raise(ArgumentError, bug3630) { File.expand_path("~") } @@ -439,10 +452,188 @@ def test_expand_path assert_raise(ArgumentError, bug3630) { File.expand_path("~") } ensure ENV["HOME"] = home + ENV["HOMEDRIVE"] = home_drive + ENV["HOMEPATH"] = home_path + ENV["USERPROFILE"] = user_profile end assert_incompatible_encoding {|d| File.expand_path(d)} end + def test_resolve_empty_string_current_directory + assert_equal(Dir.pwd, File.expand_path("")) + end + + def test_resolve_dot_current_directory + assert_equal(Dir.pwd, File.expand_path(".")) + end + + def test_resolve_file_name_relative_current_directory + assert_equal(File.join(Dir.pwd, "foo"), File.expand_path("foo")) + end + + def test_ignore_nil_dir_string + assert_equal(File.join(Dir.pwd, "foo"), File.expand_path("foo", nil)) + end + + def test_resolve_file_name_and_dir_string_relative + assert_equal(File.join(Dir.pwd, "bar", "foo"), + File.expand_path("foo", "bar")) + end + + def test_cleanup_dots_file_name + bug = "[ruby-talk:18512]" + + assert_equal(File.join(Dir.pwd, ".a"), File.expand_path(".a"), bug) + assert_equal(File.join(Dir.pwd, "..a"), File.expand_path("..a"), bug) + + if DRIVE + # cleanup dots only on Windows + assert_equal(File.join(Dir.pwd, "a"), File.expand_path("a."), bug) + skip "FIXME" + assert_equal(File.join(Dir.pwd, "a"), File.expand_path("a.."), bug) + else + assert_equal(File.join(Dir.pwd, "a."), File.expand_path("a."), bug) + assert_equal(File.join(Dir.pwd, "a.."), File.expand_path("a.."), bug) + end + end + + def test_converts_a_pathname_to_an_absolute_pathname_using_a_complete_path + assert_equal(@dir, File.expand_path("", "#{@dir}")) + assert_equal(File.join(@dir, "a"), File.expand_path("a", "#{@dir}")) + assert_equal(File.join(@dir, "a"), File.expand_path("../a", "#{@dir}/xxx")) + assert_equal(@rootdir, File.expand_path(".", "#{@rootdir}")) + end + + def test_ignores_supplied_dir_if_path_contains_a_drive_letter + assert_equal(@rootdir, File.expand_path(@rootdir, "D:/")) + end if DRIVE + + def test_removes_trailing_slashes_from_absolute_path + assert_equal(File.join(@rootdir, "foo"), File.expand_path("#{@rootdir}foo/")) + assert_equal(File.join(@rootdir, "foo.rb"), File.expand_path("#{@rootdir}foo.rb/")) + end + + def test_removes_trailing_spaces_from_absolute_path + assert_equal(File.join(@rootdir, "a"), File.expand_path("#{@rootdir}a ")) + end if DRIVE + + def test_converts_a_pathname_which_starts_with_a_slash_using_dir_s_drive + assert_match(%r"\Az:/foo\z"i, File.expand_path('/foo', "z:/bar")) + end if DRIVE + + def test_converts_a_pathname_which_starts_with_a_slash_and_unc_pathname + assert_equal("//foo", File.expand_path('//foo', "//bar")) + assert_equal("//bar/foo", File.expand_path('/foo', "//bar")) + assert_equal("//foo", File.expand_path('//foo', "/bar")) + end if DRIVE + + def test_converts_a_dot_with_unc_dir + assert_equal("//", File.expand_path('.', "//")) + end + + def test_preserves_unc_path_root + assert_equal("//", File.expand_path("//")) + assert_equal("//", File.expand_path("//.")) + assert_equal("//", File.expand_path("//..")) + end + + def test_converts_a_pathname_which_starts_with_a_slash_using_host_share + assert_match(%r"\A//host/share/foo\z"i, File.expand_path('/foo', "//host/share/bar")) + end if DRIVE + + def test_converts_a_pathname_which_starts_with_a_slash_using_a_current_drive + assert_match(%r"\A#{DRIVE}/foo\z"i, File.expand_path('/foo')) + end + + def test_returns_tainted_strings_or_not + assert_equal(true, File.expand_path('foo').tainted?) + assert_equal(true, File.expand_path('foo'.taint).tainted?) + assert_equal(true, File.expand_path('/foo'.taint).tainted?) + assert_equal(true, File.expand_path('foo', 'bar').tainted?) + assert_equal(true, File.expand_path('foo', '/bar'.taint).tainted?) + assert_equal(true, File.expand_path('foo'.taint, '/bar').tainted?) + assert_equal(true, File.expand_path('~').tainted?) + + if DRIVE + assert_equal(true, File.expand_path('/foo').tainted?) + assert_equal(false, File.expand_path('//foo').tainted?) + assert_equal(true, File.expand_path('C:/foo'.taint).tainted?) + assert_equal(false, File.expand_path('C:/foo').tainted?) + assert_equal(true, File.expand_path('foo', '/bar').tainted?) + assert_equal(true, File.expand_path('foo', 'C:/bar'.taint).tainted?) + assert_equal(true, File.expand_path('foo'.taint, 'C:/bar').tainted?) + assert_equal(false, File.expand_path('foo', 'C:/bar').tainted?) + assert_equal(false, File.expand_path('C:/foo/../bar').tainted?) + assert_equal(false, File.expand_path('foo', '//bar').tainted?) + else + assert_equal(false, File.expand_path('/foo').tainted?) + assert_equal(false, File.expand_path('foo', '/bar').tainted?) + end + end + + def test_converts_a_pathname_to_an_absolute_pathname_using_home_as_base + old_home = ENV["HOME"] + home = ENV["HOME"] = "#{DRIVE}/UserHome" + assert_equal(home, File.expand_path("~")) + assert_equal(home, File.expand_path("~", "C:/FooBar")) + assert_equal(File.join(home, "a"), File.expand_path("~/a", "C:/FooBar")) + ensure + ENV["HOME"] = old_home + end + + def test_converts_a_pathname_to_an_absolute_pathname_using_unc_home + old_home = ENV["HOME"] + unc_home = ENV["HOME"] = "//UserHome" + assert_equal(unc_home, File.expand_path("~")) + ensure + ENV["HOME"] = old_home + end if DRIVE + + def test_does_not_modify_a_home_string_argument + old_home = ENV["HOME"] + home = ENV["HOME"] = "#{DRIVE}/UserHome" + str = "~/a" + assert_equal("#{home}/a", File.expand_path(str)) + assert_equal("~/a", str) + ensure + ENV["HOME"] = old_home + end + + def test_raises_argument_error_for_any_supplied_username + bug = '[ruby-core:39597]' + assert_raise(ArgumentError, bug) { File.expand_path("~anything") } + end if DRIVE + + def test_raises_a_type_error_if_not_passed_a_string_type + assert_raise(TypeError) { File.expand_path(1) } + assert_raise(TypeError) { File.expand_path(nil) } + assert_raise(TypeError) { File.expand_path(true) } + end + + def test_expands_dot_dir + assert_equal("#{DRIVE}/dir", File.expand_path("#{DRIVE}/./dir")) + end + + def test_does_not_modify_the_string_argument + str = "./a/b/../c" + assert_equal("#{Dir.pwd}/a/c", File.expand_path(str, Dir.pwd)) + assert_equal("./a/b/../c", str) + end + + def test_returns_a_string_when_passed_a_string_subclass + sub = Class.new(String) + str = sub.new "./a/b/../c" + path = File.expand_path(str, Dir.pwd) + assert_equal("#{Dir.pwd}/a/c", path) + assert_instance_of(String, path) + end + + def test_accepts_objects_that_have_a_to_path_method + klass = Class.new { def to_path; "a/b/c"; end } + obj = klass.new + assert_equal("#{Dir.pwd}/a/b/c", File.expand_path(obj)) + end + def test_basename assert_equal(File.basename(@file).sub(/\.test$/, ""), File.basename(@file, ".test")) assert_equal("", s = File.basename("")) diff --git a/win32/Makefile.sub b/win32/Makefile.sub index 3929417..3cf6b90 100644 --- a/win32/Makefile.sub +++ b/win32/Makefile.sub @@ -226,7 +226,7 @@ EXTLIBS = EXTSOLIBS = !endif !if !defined(LIBS) -LIBS = oldnames.lib user32.lib advapi32.lib shell32.lib ws2_32.lib imagehlp.lib $(EXTLIBS) +LIBS = oldnames.lib user32.lib advapi32.lib shell32.lib ws2_32.lib imagehlp.lib shlwapi.lib $(EXTLIBS) !endif !if !defined(MISSING) MISSING = acosh.obj cbrt.obj crypt.obj erf.obj ffs.obj langinfo.obj lgamma_r.obj strlcat.obj strlcpy.obj tgamma.obj win32/win32.obj win32/file.obj setproctitle.obj diff --git a/win32/file.c b/win32/file.c index 955f91b..e161e27 100644 --- a/win32/file.c +++ b/win32/file.c @@ -1,5 +1,17 @@ #include "ruby/ruby.h" +#include "ruby/encoding.h" #include +#include +#include + +/* MultiByteToWideChar() doesn't work with code page 51932 */ +#define INVALID_CODE_PAGE 51932 + +#define IS_DIR_SEPARATOR_P(c) (c == L'\\' || c == L'/') +#define IS_DIR_UNC_P(c) (IS_DIR_SEPARATOR_P(c[0]) && IS_DIR_SEPARATOR_P(c[1])) + +#define malloc xmalloc +#define free xfree #ifndef INVALID_FILE_ATTRIBUTES # define INVALID_FILE_ATTRIBUTES ((DWORD)-1) @@ -27,3 +39,530 @@ } return ret; } + + +static inline void +replace_wchar(wchar_t *s, int find, int replace) +{ + while (*s != 0) { + if (*s == find) + *s = replace; + s++; + } +} + +/* + * Return user's home directory using environment variables combinations. + * Memory allocated by this function should be manually freeded afterwards. + */ +static wchar_t * +home_dir(void) +{ + wchar_t *buffer = NULL; + size_t buffer_len = 0, len = 0; + size_t home_env = 0; + + /* + * determine User's home directory trying: + * HOME, HOMEDRIVE + HOMEPATH and USERPROFILE environment variables + * TODO: Special Folders - Profile and Personal + */ + + /* + * GetEnvironmentVariableW when used with NULL will return the required + * buffer size and its terminating character. + * http://msdn.microsoft.com/en-us/library/windows/desktop/ms683188(v=vs.85).aspx + */ + + if (len = GetEnvironmentVariableW(L"HOME", NULL, 0)) { + buffer_len = len; + home_env = 1; + } + else if (len = GetEnvironmentVariableW(L"HOMEDRIVE", NULL, 0)) { + buffer_len = len; + if (len = GetEnvironmentVariableW(L"HOMEPATH", NULL, 0)) { + buffer_len += len; + home_env = 2; + } + else { + buffer_len = 0; + } + } + else if (len = GetEnvironmentVariableW(L"USERPROFILE", NULL, 0)) { + buffer_len = len; + home_env = 3; + } + + /* allocate buffer */ + if (home_env) + buffer = (wchar_t *)malloc(buffer_len * sizeof(wchar_t)); + + switch (home_env) { + case 1: /* HOME */ + GetEnvironmentVariableW(L"HOME", buffer, buffer_len); + break; + case 2: /* HOMEDRIVE + HOMEPATH */ + len = GetEnvironmentVariableW(L"HOMEDRIVE", buffer, buffer_len); + GetEnvironmentVariableW(L"HOMEPATH", buffer + len, buffer_len - len); + break; + case 3: /* USERPROFILE */ + GetEnvironmentVariableW(L"USERPROFILE", buffer, buffer_len); + break; + default: + break; + } + + if (home_env) { + /* sanitize backslashes with forwardslashes */ + replace_wchar(buffer, L'\\', L'/'); + return buffer; + } + + return NULL; +} + + +/* Convert the path from char to wchar with specified code page */ +static inline void +path_to_wchar(VALUE path, wchar_t **wpath, wchar_t **wpath_pos, size_t *wpath_len, UINT cp) +{ + size_t size; + + if (NIL_P(path)) + return; + + size = MultiByteToWideChar(cp, 0, RSTRING_PTR(path), -1, NULL, 0); + *wpath = (wchar_t *)malloc(size * sizeof(wchar_t)); + if (wpath_pos) + *wpath_pos = *wpath; + + MultiByteToWideChar(cp, 0, RSTRING_PTR(path), -1, *wpath, size); + *wpath_len = size - 1; +} + +/* Remove trailing invalid ':$DATA' of the path. */ +static inline size_t +remove_invalid_alternative_data(wchar_t *wfullpath, size_t size) { + static const wchar_t prime[] = L":$DATA"; + enum {prime_len = (sizeof(prime) / sizeof(wchar_t)) -1}; + + if (size <= prime_len || _wcsnicmp(wfullpath + size - prime_len, prime, prime_len) != 0) + return size; + + /* alias of stream */ + /* get rid of a bug of x64 VC++ */ + if (wfullpath[size - (prime_len + 1)] == ':') { + /* remove trailing '::$DATA' */ + size -= prime_len + 1; /* prime */ + wfullpath[size] = L'\0'; + } + else { + /* remove trailing ':$DATA' of paths like '/aa:a:$DATA' */ + wchar_t *pos = wfullpath + size - (prime_len + 1); + while (!IS_DIR_SEPARATOR_P(*pos) && pos != wfullpath) { + if (*pos == L':') { + size -= prime_len; /* alternative */ + wfullpath[size] = L'\0'; + break; + } + pos--; + } + } + return size; +} + +/* Return system code page. */ +static inline UINT +system_code_page(void) { + return AreFileApisANSI() ? CP_ACP : CP_OEMCP; +} + +/* cache 'encoding name' => 'code page' into a hash */ +static VALUE rb_code_page; + + +/* + * Return code page number of the encoding. + * Cache code page into a hash for performance since finding the code page in + * Encoding#names is slow. + */ +static UINT +code_page(rb_encoding *enc) +{ + VALUE code_page_value, name_key; + VALUE encoding, names_ary = Qundef, name; + char *enc_name; + struct RString fake_str; + ID names; + long i; + + if (!enc) + return system_code_page(); + + enc_name = (char *)rb_enc_name(enc); + + fake_str.basic.flags = T_STRING|RSTRING_NOEMBED; + fake_str.basic.klass = rb_cString; + fake_str.as.heap.len = strlen(enc_name); + fake_str.as.heap.ptr = enc_name; + fake_str.as.heap.aux.capa = fake_str.as.heap.len; + name_key = (VALUE)&fake_str; + ENCODING_CODERANGE_SET(name_key, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + + code_page_value = rb_hash_lookup(rb_code_page, name_key); + if (code_page_value != Qnil) { + return (UINT)FIX2INT(code_page_value); + } + + name_key = rb_usascii_str_new2(enc_name); + + encoding = rb_enc_from_encoding(enc); + if (!NIL_P(encoding)) { + CONST_ID(names, "names"); + names_ary = rb_funcall(encoding, names, 0); + } + + if (enc == rb_usascii_encoding() || enc == rb_ascii8bit_encoding()) { + UINT code_page = 20127; + rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page)); + return code_page; + } + + if (names_ary != Qundef) { + for (i = 0; i < RARRAY_LEN(names_ary); i++) { + name = RARRAY_PTR(names_ary)[i]; + if (strncmp("CP", RSTRING_PTR(name), 2) == 0) { + int code_page = atoi(RSTRING_PTR(name) + 2); + if (code_page != 0) { + rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page)); + return (UINT)code_page; + } + } + } + } + + rb_hash_aset(rb_code_page, name_key, INT2FIX(INVALID_CODE_PAGE)); + return INVALID_CODE_PAGE; +} + +#define PATH_BUFFER_SIZE MAX_PATH * 2 + +VALUE +rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, VALUE result) +{ + size_t size = 0, wpath_len = 0, wdir_len = 0, whome_len = 0; + size_t buffer_len = 0; + wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL, *wdir = NULL; + wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL; + UINT path_cp, cp; + VALUE path = fname, dir = dname; + wchar_t wfullpath_buffer[PATH_BUFFER_SIZE]; + wchar_t path_drive = L'\0', dir_drive = L'\0'; + int ignore_dir = 0; + rb_encoding *path_encoding; + int tainted = 0; + + /* tainted if path is tainted */ + tainted = OBJ_TAINTED(path); + + + /* get path encoding */ + if (NIL_P(dir)) { + path_encoding = rb_enc_get(path); + } + else { + path_encoding = rb_enc_check(path, dir); + } + cp = path_cp = code_page(path_encoding); + + /* convert char * to wchar_t */ + if (path_cp == INVALID_CODE_PAGE) { + cp = CP_UTF8; + if (!NIL_P(path)) { + VALUE tmp = rb_enc_str_new(RSTRING_PTR(path), RSTRING_LEN(path), path_encoding); + path = rb_str_encode(tmp, rb_enc_from_encoding(rb_utf8_encoding()), 0, Qnil); + rb_str_resize(tmp, 0); + } + } + path_to_wchar(path, &wpath, &wpath_pos, &wpath_len, cp); + + /* determine if we need the user's home directory */ + /* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */ + if (abs_mode == 0 && ((wpath_len == 1 && wpath_pos[0] == L'~') || + (wpath_len >= 2 && wpath_pos[0] == L'~' && IS_DIR_SEPARATOR_P(wpath_pos[1])))) { + /* tainted if expanding '~' */ + tainted = 1; + + whome = home_dir(); + if (whome == NULL) { + free(wpath); + rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'"); + } + whome_len = wcslen(whome); + + if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) { + free(wpath); + rb_raise(rb_eArgError, "non-absolute home"); + } + + /* ignores dir since we are expading home */ + ignore_dir = 1; + + /* exclude ~ from the result */ + wpath_pos++; + wpath_len--; + + /* exclude separator if present */ + if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { + wpath_pos++; + wpath_len--; + } + } + else if (wpath_len >= 2 && wpath_pos[1] == L':') { + if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) { + /* ignore dir since path contains a drive letter and a root slash */ + ignore_dir = 1; + } + else { + /* determine if we ignore dir or not later */ + path_drive = wpath_pos[0]; + } + } + else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') { + wchar_t *wuser = wpath_pos + 1; + wchar_t *pos = wuser; + char *user; + + /* tainted if expanding '~' */ + tainted = 1; + + while (!IS_DIR_SEPARATOR_P(*pos) && *pos != '\0') + pos++; + + *pos = '\0'; + + size = WideCharToMultiByte(cp, 0, wuser, -1, NULL, 0, NULL, NULL); + user = (char *)malloc(size * sizeof(char)); + WideCharToMultiByte(cp, 0, wuser, -1, user, size, NULL, NULL); + /* convert to VALUE and set the path encoding */ + if (path_cp == INVALID_CODE_PAGE) { + VALUE tmp = rb_enc_str_new(user, size - 1, rb_utf8_encoding()); + result = rb_str_encode(tmp, rb_enc_from_encoding(path_encoding), 0, Qnil); + rb_str_resize(tmp, 0); + } + else { + result = rb_enc_str_new(user, size - 1, path_encoding); + } + + free(wpath); + if (user) + free(user); + + rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result)); + } + + /* convert dir */ + if (!ignore_dir && !NIL_P(dir)) { + /* convert char * to wchar_t */ + if (path_cp == INVALID_CODE_PAGE) { + VALUE tmp = rb_enc_str_new(RSTRING_PTR(dir), RSTRING_LEN(dir), path_encoding); + dir = rb_str_encode(tmp, rb_enc_from_encoding(rb_utf8_encoding()), 0, Qnil); + rb_str_resize(tmp, 0); + } + path_to_wchar(dir, &wdir, NULL, &wdir_len, cp); + + if (wdir_len >= 2 && wdir[1] == L':') { + dir_drive = wdir[0]; + if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { + wdir_len = 2; + } + } + else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) { + /* UNC path */ + if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { + /* cut the UNC path tail to '//host/share' */ + size_t separators = 0; + size_t pos = 2; + while (pos < wdir_len && separators < 2) { + if (IS_DIR_SEPARATOR_P(wdir[pos])) { + separators++; + } + pos++; + } + if (separators == 2) + wdir_len = pos - 1; + } + } + } + + /* determine if we ignore dir or not */ + if (!ignore_dir && path_drive && dir_drive) { + if (towupper(path_drive) == towupper(dir_drive)) { + /* exclude path drive letter to use dir */ + wpath_pos += 2; + wpath_len -= 2; + } + else { + /* ignore dir since path drive is different from dir drive */ + ignore_dir = 1; + wdir_len = 0; + } + } + + if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) { + /* ignore dir since path has UNC root */ + ignore_dir = 1; + wdir_len = 0; + } + else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) && + !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) { + /* ignore dir since path has root slash and dir doesn't have drive or UNC root */ + ignore_dir = 1; + wdir_len = 0; + } + + buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1; + + buffer = buffer_pos = (wchar_t *)malloc((buffer_len + 1) * sizeof(wchar_t)); + + /* add home */ + if (whome_len) { + wcsncpy(buffer_pos, whome, whome_len); + buffer_pos += whome_len; + } + + /* Add separator if required */ + if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { + buffer_pos[0] = L'\\'; + buffer_pos++; + } + + if (wdir_len) { + /* tainted if dir is used and dir is tainted */ + if (!tainted && OBJ_TAINTED(dir)) + tainted = 1; + + wcsncpy(buffer_pos, wdir, wdir_len); + buffer_pos += wdir_len; + } + + /* add separator if required */ + if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { + buffer_pos[0] = L'\\'; + buffer_pos++; + } + + /* now deal with path */ + if (wpath_len) { + wcsncpy(buffer_pos, wpath_pos, wpath_len); + buffer_pos += wpath_len; + } + + /* GetFullPathNameW requires at least "." to determine current directory */ + if (wpath_len == 0) { + buffer_pos[0] = L'.'; + buffer_pos++; + } + + /* Ensure buffer is NULL terminated */ + buffer_pos[0] = L'\0'; + + /* tainted if path is relative */ + if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer))) { + tainted = 1; + } + + /* Determine require buffer size */ + size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL); + if (size) { + size_t wfullpath_size; + if (size > PATH_BUFFER_SIZE) { + /* allocate enough memory to contain the response */ + wfullpath = (wchar_t *)malloc(size * sizeof(wchar_t)); + size = GetFullPathNameW(buffer, size, wfullpath, NULL); + } + else { + wfullpath = wfullpath_buffer; + } + + + /* Remove any trailing slashes */ + if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) && + wfullpath[size - 2] != L':' && + !(size == 2 && IS_DIR_UNC_P(wfullpath))) { + size -= 1; + wfullpath[size] = L'\0'; + } + + /* Remove any trailing dot */ + if (wfullpath[size - 1] == L'.') { + size -= 1; + wfullpath[size] = L'\0'; + } + + /* removes trailing invalid ':$DATA' */ + size = remove_invalid_alternative_data(wfullpath, size); + + /* sanitize backslashes with forwardslashes */ + replace_wchar(wfullpath, L'\\', L'/'); + + /* convert to char * */ + wfullpath_size = size; + size = WideCharToMultiByte(cp, 0, wfullpath, wfullpath_size, NULL, 0, NULL, NULL); + if (size > (size_t)RSTRING_LEN(result)) { + rb_str_modify(result); + rb_str_resize(result, size); + } + + WideCharToMultiByte(cp, 0, wfullpath, wfullpath_size, RSTRING_PTR(result), size, NULL, NULL); + + /* set the String VALUE length and the path encoding */ + rb_str_set_len(result, size); + if (path_cp == INVALID_CODE_PAGE) { + VALUE tmp; + size_t len; + + rb_enc_associate(result, rb_utf8_encoding()); + ENC_CODERANGE_CLEAR(result); + tmp = rb_str_encode(result, rb_enc_from_encoding(path_encoding), 0, Qnil); + len = RSTRING_LEN(tmp); + rb_str_modify(result); + rb_str_resize(result, len); + memcpy(RSTRING_PTR(result), RSTRING_PTR(tmp), len); + rb_str_resize(tmp, 0); + } + rb_enc_associate(result, path_encoding); + ENC_CODERANGE_CLEAR(result); + + /* makes the result object tainted if expanding tainted strings or returning modified path */ + if (tainted) + OBJ_TAINT(result); + } + + if (buffer) + free(buffer); + + if (wpath) + free(wpath); + + if (wdir) + free(wdir); + + if (whome) + free(whome); + + if (wfullpath && wfullpath != wfullpath_buffer) + free(wfullpath); + + return result; +} + + +void +rb_w32_init_file(void) +{ + rb_code_page = rb_hash_new(); + + /* prevent GC removing rb_code_page */ + rb_gc_register_mark_object(rb_code_page); +}