From 54809ad54cd9b7e0567471783c46c802c18528ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C4=81vis=20Mos=C4=81ns?= <davispuh@gmail.com>
Date: Wed, 3 Aug 2016 20:06:23 +0300
Subject: [PATCH] On Windows use UTF-8 as filesystem encoding

---
 include/ruby/win32.h       | 24 ++++++++++-----------
 localeinit.c               |  7 +------
 test/ruby/test_dir_m17n.rb | 52 ++++++++++++++++------------------------------
 test/ruby/test_process.rb  |  2 +-
 win32/dir.h                |  2 +-
 win32/file.c               |  2 +-
 win32/win32.c              | 15 +------------
 7 files changed, 35 insertions(+), 69 deletions(-)

diff --git a/include/ruby/win32.h b/include/ruby/win32.h
index 68b8db7..78e8471 100644
--- a/include/ruby/win32.h
+++ b/include/ruby/win32.h
@@ -142,7 +142,7 @@ typedef int clockid_t;
 #define lseek(_f, _o, _w)	_lseeki64(_f, _o, _w)

 #define pipe(p)			rb_w32_pipe(p)
-#define open			rb_w32_open
+#define open			rb_w32_uopen
 #define close(h)		rb_w32_close(h)
 #define fclose(f)		rb_w32_fclose(f)
 #define read(f, b, s)		rb_w32_read(f, b, s)
@@ -154,16 +154,16 @@ typedef int clockid_t;
 #define fstati64(fd,st) 	rb_w32_fstati64(fd,st)

 #undef execv
-#define execv(path,argv)	rb_w32_aspawn(P_OVERLAY,path,argv)
+#define execv(path,argv)	rb_w32_uaspawn(P_OVERLAY,path,argv)
 #undef isatty
 #define isatty(h)		rb_w32_isatty(h)

 #undef mkdir
-#define mkdir(p, m)		rb_w32_mkdir(p, m)
+#define mkdir(p, m)		rb_w32_umkdir(p, m)
 #undef rmdir
-#define rmdir(p)		rb_w32_rmdir(p)
+#define rmdir(p)		rb_w32_urmdir(p)
 #undef unlink
-#define unlink(p)		rb_w32_unlink(p)
+#define unlink(p)		rb_w32_uunlink(p)
 #endif /* RUBY_EXPORT */

 #if SIZEOF_OFF_T == 8
@@ -173,20 +173,20 @@ typedef int clockid_t;
 #if !defined(_MSC_VER) || RUBY_MSVCRT_VERSION < 80
 #define stati64 _stati64
 #ifndef _stati64
-#define _stati64(path, st) rb_w32_stati64(path, st)
+#define _stati64(path, st) rb_w32_ustati64(path, st)
 #endif
 #else
 #define stati64 _stat64
-#define _stat64(path, st) rb_w32_stati64(path, st)
+#define _stat64(path, st) rb_w32_ustati64(path, st)
 #endif
 #else
-#define stat(path,st)		rb_w32_stat(path,st)
-#define fstat(fd,st)		rb_w32_fstat(fd,st)
+#define stat(path,st)		rb_w32_ustat(path,st)
+#define fstat(fd,st)		rb_w32_ufstat(fd,st)
 extern int rb_w32_stat(const char *, struct stat *);
 extern int rb_w32_fstat(int, struct stat *);
 #endif
-#define lstat(path,st)		rb_w32_lstati64(path,st)
-#define access(path,mode)	rb_w32_access(path,mode)
+#define lstat(path,st)		rb_w32_ulstati64(path,st)
+#define access(path,mode)	rb_w32_uaccess(path,mode)

 #define strcasecmp		_stricmp
 #define strncasecmp		_strnicmp
@@ -698,7 +698,7 @@ extern char *rb_w32_strerror(int);
 #define getenv(n)		rb_w32_getenv(n)

 #undef rename
-#define rename(o, n)		rb_w32_rename(o, n)
+#define rename(o, n)		rb_w32_urename(o, n)

 #undef times
 #define times(t)		rb_w32_times(t)
diff --git a/localeinit.c b/localeinit.c
index eeec9ad..11f2379 100644
--- a/localeinit.c
+++ b/localeinit.c
@@ -100,12 +100,7 @@ Init_enc_set_filesystem_encoding(void)
     int idx;
 #if defined NO_LOCALE_CHARMAP
 # error NO_LOCALE_CHARMAP defined
-#elif defined _WIN32
-    char cp[SIZEOF_CP_NAME];
-    CP_FORMAT(cp, AreFileApisANSI() ? GetACP() : GetOEMCP());
-    idx = rb_enc_find_index(cp);
-    if (idx < 0) idx = ENCINDEX_ASCII;
-#elif defined __CYGWIN__
+#elif defined _WIN32 || defined __CYGWIN__
     idx = ENCINDEX_UTF_8;
 #else
     idx = rb_enc_to_index(rb_default_external_encoding());
diff --git a/test/ruby/test_dir_m17n.rb b/test/ruby/test_dir_m17n.rb
index 754c035..834d58d 100644
--- a/test/ruby/test_dir_m17n.rb
+++ b/test/ruby/test_dir_m17n.rb
@@ -16,27 +16,21 @@ def assert_raw_file_name(code, encoding)
       assert_separately(["-E#{encoding}"], <<-EOS, :chdir=>dir)
         filename = #{code}.chr('UTF-8').force_encoding("#{encoding}")
         File.open(filename, "w") {}
-        opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
+        opts = {}
         ents = Dir.entries(".", opts)
+        if /mswin|mingw/ =~ RUBY_PLATFORM
+          filename = filename.encode("UTF-8")
+        end
         assert_include(ents, filename)
       EOS

       return if /cygwin/ =~ RUBY_PLATFORM
       assert_separately(%w[-EASCII-8BIT], <<-EOS, :chdir=>dir)
         filename = #{code}.chr('UTF-8').force_encoding("ASCII-8BIT")
-        opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
+        opts = {}
         ents = Dir.entries(".", opts)
-        expected_filename = #{code}.chr('UTF-8').encode(Encoding.find("filesystem")) rescue expected_filename = "?"
-        expected_filename = expected_filename.force_encoding("ASCII-8BIT")
         if /mswin|mingw/ =~ RUBY_PLATFORM
-          case
-          when ents.include?(filename)
-          when ents.include?(expected_filename)
-            filename = expected_filename
-          else
-            ents = Dir.entries(".", {:encoding => Encoding.find("filesystem")})
-            filename = expected_filename
-          end
+          filename.force_encoding("UTF-8")
         end
         assert_include(ents, filename)
       EOS
@@ -196,27 +190,25 @@ def test_filename_euc_jp
       assert_separately(%w[-EEUC-JP], <<-'EOS', :chdir=>d)
         filename = "\xA4\xA2".force_encoding("euc-jp")
         File.open(filename, "w") {}
-        opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
+        opts = {}
         ents = Dir.entries(".", opts)
         if /darwin/ =~ RUBY_PLATFORM
           filename = filename.encode("utf-8").force_encoding("euc-jp")
+        elsif /mswin|mingw/ =~ RUBY_PLATFORM
+          filename = filename.encode("utf-8")
         end
         assert_include(ents, filename)
       EOS
       assert_separately(%w[-EASCII-8BIT], <<-'EOS', :chdir=>d)
         filename = "\xA4\xA2".force_encoding('ASCII-8BIT')
-        win_expected_filename = filename.encode(Encoding.find("filesystem"), "euc-jp") rescue "?"
-        opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
+        opts = {}
         ents = Dir.entries(".", opts)
         unless ents.include?(filename)
           case RUBY_PLATFORM
           when /darwin/
             filename = filename.encode("utf-8", "euc-jp").b
           when /mswin|mingw/
-            if ents.include?(win_expected_filename.b)
-              ents = Dir.entries(".", {:encoding => Encoding.find("filesystem")})
-              filename = win_expected_filename
-            end
+            filename = filename.encode("utf-8", "euc-jp")
           end
         end
         assert_include(ents, filename)
@@ -402,14 +394,10 @@ def test_entries_compose
     with_tmpdir {|d|
       orig = %W"d\u{e9}tente x\u{304c 304e 3050 3052 3054}"
       orig.each {|n| open(n, "w") {}}
-      if /mswin|mingw/ =~ RUBY_PLATFORM
-        opts = {:encoding => Encoding.default_external}
-        orig.map! {|o| o.encode("filesystem") rescue o.tr("^a-z", "?")}
-      else
-        enc = Encoding.find("filesystem")
-        enc = Encoding::ASCII_8BIT if enc == Encoding::US_ASCII
-        orig.each {|o| o.force_encoding(enc) }
-      end
+      enc = Encoding.find("filesystem")
+      enc = Encoding::ASCII_8BIT if enc == Encoding::US_ASCII
+      orig.each {|o| o.force_encoding(enc) }
+      opts = {}
       ents = Dir.entries(".", opts).reject {|n| /\A\./ =~ n}
       ents.sort!
       PP.assert_equal(orig, ents, bug7267)
@@ -421,13 +409,9 @@ def test_pwd
     expected = []
     results = []
     orig.each {|o|
-      if /mswin|mingw/ =~ RUBY_PLATFORM
-        n = (o.encode("filesystem") rescue next)
-      else
-        enc = Encoding.find("filesystem")
-        enc = Encoding::ASCII_8BIT if enc == Encoding::US_ASCII
-        n = o.dup.force_encoding(enc)
-      end
+      enc = Encoding.find("filesystem")
+      enc = Encoding::ASCII_8BIT if enc == Encoding::US_ASCII
+      n = o.dup.force_encoding(enc)
       expected << n
       with_tmpdir {
         Dir.mkdir(o)
diff --git a/test/ruby/test_process.rb b/test/ruby/test_process.rb
index 28617b6..0b3a4a7 100644
--- a/test/ruby/test_process.rb
+++ b/test/ruby/test_process.rb
@@ -432,7 +432,7 @@ def test_execopts_open_chdir_m17n_path
       system(*PWD, :chdir => "テスト", :out => "open_chdir_テスト")
       assert_file.exist?("open_chdir_テスト")
       assert_file.not_exist?("テスト/open_chdir_テスト")
-      assert_equal("#{d}/テスト", File.read("open_chdir_テスト").chomp.encode(__ENCODING__))
+      assert_equal("#{d}/テスト", File.read("open_chdir_テスト", encoding: __ENCODING__).chomp)
     }
   end if windows? || Encoding.find('locale') == Encoding::UTF_8

diff --git a/win32/dir.h b/win32/dir.h
index b1f981f..8a2aa96 100644
--- a/win32/dir.h
+++ b/win32/dir.h
@@ -34,7 +34,7 @@ void           rb_w32_seekdir(DIR *, long);
 void           rb_w32_rewinddir(DIR *);
 void           rb_w32_closedir(DIR *);

-#define opendir(s)   rb_w32_opendir((s))
+#define opendir(s)   rb_w32_uopendir((s))
 #define readdir(d)   rb_w32_readdir((d), 0)
 #define telldir(d)   rb_w32_telldir((d))
 #define seekdir(d, l)   rb_w32_seekdir((d), (l))
diff --git a/win32/file.c b/win32/file.c
index 556d961..59c7904 100644
--- a/win32/file.c
+++ b/win32/file.c
@@ -385,7 +385,7 @@ rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_na
 	if (path_cp == INVALID_CODE_PAGE || rb_enc_str_asciionly_p(path)) {
 	    /* use filesystem encoding if expanding home dir */
 	    path_encoding = rb_filesystem_encoding();
-	    cp = path_cp = system_code_page();
+	    cp = path_cp = code_page(path_encoding);
 	}

 	/* ignores dir since we are expanding home */
diff --git a/win32/win32.c b/win32/win32.c
index 38ad089..4975658 100644
--- a/win32/win32.c
+++ b/win32/win32.c
@@ -2139,7 +2139,7 @@ rb_w32_conv_from_wstr(const WCHAR *wstr, long *lenp, rb_encoding *enc)
     long len;
     char *ptr;

-    if (NIL_P(str)) return wstr_to_filecp(wstr, lenp);
+    if (NIL_P(str)) return wstr_to_utf8(wstr, lenp);
     *lenp = len = RSTRING_LEN(str);
     memcpy(ptr = malloc(len + 1), RSTRING_PTR(str), len);
     ptr[len] = '\0';
@@ -4645,19 +4645,6 @@ rb_w32_getcwd(char *buffer, int size)

     translate_char(p, '\\', '/', CP_UTF8);

-    rp = rb_str_conv_enc_opts(rb_utf8_str_new_cstr(p), NULL, rb_filesystem_encoding(), ECONV_UNDEF_REPLACE, Qnil);
-    if (!buffer) {
-      free(p);
-      p = strdup(RSTRING_PTR(rp));
-    } else {
-      if (RSTRING_LEN(rp) >= size) {
-        errno = ERANGE;
-        return NULL;
-      }
-      strncpy(p, RSTRING_PTR(rp), size - 1);
-      p[size - 1] = '\0';
-    }
-
     return p;
 }

--
2.9.2

