From 4644ac5527e48a1a8c48dc790621c73913e6dbf8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Tue, 30 Sep 2014 22:27:53 +0700
Subject: [PATCH 1/2] [core] Decode environment variables with filesystem
 encoding (Fixes #3854, Fixes #3217, Fixes #2918)

Introduces compat versions of os.getenv and os.path.expanduser
---
 test/test_utils.py      | 13 ++++++++++
 youtube_dl/YoutubeDL.py |  3 ++-
 youtube_dl/__init__.py  |  3 ++-
 youtube_dl/cache.py     |  3 ++-
 youtube_dl/options.py   | 14 ++++++-----
 youtube_dl/utils.py     | 53 +++++++++++++++++++++++++++++++++++++----
 6 files changed, 76 insertions(+), 13 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 97551ce9c..19c9ba7f8 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -44,6 +44,9 @@ from youtube_dl.utils import (
     limit_length,
     escape_rfc3986,
     escape_url,
+    get_filesystem_encoding,
+    compat_getenv,
+    compat_expanduser,
 )
 
 
@@ -331,5 +334,15 @@ class TestUtil(unittest.TestCase):
         )
         self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
 
+    def test_compat_getenv(self):
+        test_str = 'тест'
+        os.environ['YOUTUBE-DL-TEST'] = test_str.encode(get_filesystem_encoding())
+        self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
+
+    def test_compat_expanduser(self):
+        test_str = 'C:\Documents and Settings\тест\Application Data'
+        os.environ['HOME'] = test_str.encode(get_filesystem_encoding())
+        self.assertEqual(compat_expanduser('~'), test_str)
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 4a9610355..34a1e3b5c 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -24,6 +24,7 @@ if os.name == 'nt':
 
 from .utils import (
     compat_cookiejar,
+    compat_expanduser,
     compat_http_client,
     compat_str,
     compat_urllib_error,
@@ -447,7 +448,7 @@ class YoutubeDL(object):
             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 
             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
-            tmpl = os.path.expanduser(outtmpl)
+            tmpl = compat_expanduser(outtmpl)
             filename = tmpl % template_dict
             return filename
         except ValueError as err:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 7f2b4dfcc..e73bc5c37 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -94,6 +94,7 @@ from .options import (
     parseOpts,
 )
 from .utils import (
+    compat_expanduser,
     compat_getpass,
     compat_print,
     DateRange,
@@ -285,7 +286,7 @@ def _real_main(argv=None):
                      u' template'.format(outtmpl))
 
     any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson
-    download_archive_fn = os.path.expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
+    download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
 
     ydl_opts = {
         'usenetrc': opts.usenetrc,
diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py
index 79ff09f78..ac5925d32 100644
--- a/youtube_dl/cache.py
+++ b/youtube_dl/cache.py
@@ -9,6 +9,7 @@ import shutil
 import traceback
 
 from .utils import (
+    compat_expanduser,
     write_json_file,
 )
 
@@ -22,7 +23,7 @@ class Cache(object):
         if res is None:
             cache_root = os.environ.get('XDG_CACHE_HOME', '~/.cache')
             res = os.path.join(cache_root, 'youtube-dl')
-        return os.path.expanduser(res)
+        return compat_expanduser(res)
 
     def _get_cache_fn(self, section, key, dtype):
         assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index f651337ad..e6f9f33a2 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -6,6 +6,8 @@ import shlex
 import sys
 
 from .utils import (
+    compat_expanduser,
+    compat_getenv,
     get_term_width,
     write_string,
 )
@@ -27,19 +29,19 @@ def parseOpts(overrideArguments=None):
         return res
 
     def _readUserConf():
-        xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
+        xdg_config_home = compat_getenv('XDG_CONFIG_HOME')
         if xdg_config_home:
             userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
             if not os.path.isfile(userConfFile):
                 userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
         else:
-            userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
+            userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl', 'config')
             if not os.path.isfile(userConfFile):
-                userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
+                userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl.conf')
         userConf = _readOptions(userConfFile, None)
 
         if userConf is None:
-            appdata_dir = os.environ.get('appdata')
+            appdata_dir = compat_getenv('appdata')
             if appdata_dir:
                 userConf = _readOptions(
                     os.path.join(appdata_dir, 'youtube-dl', 'config'),
@@ -51,11 +53,11 @@ def parseOpts(overrideArguments=None):
 
         if userConf is None:
             userConf = _readOptions(
-                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
+                os.path.join(compat_expanduser('~'), 'youtube-dl.conf'),
                 default=None)
         if userConf is None:
             userConf = _readOptions(
-                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
+                os.path.join(compat_expanduser('~'), 'youtube-dl.conf.txt'),
                 default=None)
 
         if userConf is None:
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index f05747097..afe32ae05 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -203,6 +203,48 @@ def compat_ord(c):
     if type(c) is int: return c
     else: return ord(c)
 
+
+# Environment variables should be decoded with filesystem encoding
+# otherwise this results in issues like #3854 #2918 #3217
+if sys.version_info >= (3, 0):
+    compat_getenv = os.getenv
+    compat_expanduser = os.path.expanduser
+else:
+    def compat_getenv(key, default=None):
+        env = os.getenv(key, default)
+        if env:
+            env = env.decode(get_filesystem_encoding())
+        return env
+
+    def compat_expanduser(path):
+        """Expand ~ and ~user constructs.
+
+        If user or $HOME is unknown, do nothing."""
+        if path[:1] != '~':
+            return path
+        i, n = 1, len(path)
+        while i < n and path[i] not in '/\\':
+            i += 1
+
+        if 'HOME' in os.environ:
+            userhome = compat_getenv('HOME')
+        elif 'USERPROFILE' in os.environ:
+            userhome = compat_getenv('USERPROFILE')
+        elif not 'HOMEPATH' in os.environ:
+            return path
+        else:
+            try:
+                drive = compat_getenv('HOMEDRIVE')
+            except KeyError:
+                drive = ''
+            userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
+
+        if i != 1:  # ~user
+            userhome = os.path.join(os.path.dirname(userhome), path[1:i])
+
+        return userhome + path[i:]
+
+
 # This is not clearly defined otherwise
 compiled_regex_type = type(re.compile(''))
 
@@ -1204,11 +1246,14 @@ class locked_file(object):
         return self.f.read(*args)
 
 
+def get_filesystem_encoding():
+    encoding = sys.getfilesystemencoding()
+    return encoding if encoding is not None else 'utf-8'
+
+
 def shell_quote(args):
     quoted_args = []
-    encoding = sys.getfilesystemencoding()
-    if encoding is None:
-        encoding = 'utf-8'
+    encoding = get_filesystem_encoding()
     for a in args:
         if isinstance(a, bytes):
             # We may get a filename encoded with 'encodeFilename'
@@ -1258,7 +1303,7 @@ def format_bytes(bytes):
 
 
 def get_term_width():
-    columns = os.environ.get('COLUMNS', None)
+    columns = compat_getenv('COLUMNS', None)
     if columns:
         return int(columns)
 

From fc66e4a0d59d064518c3f18d65d1f4d87de8fb8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Wed, 1 Oct 2014 19:48:55 +0700
Subject: [PATCH 2/2] [utils] Add posix expanduser implementation and clarify
 the original source

---
 youtube_dl/utils.py | 84 +++++++++++++++++++++++++++++++--------------
 1 file changed, 59 insertions(+), 25 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index afe32ae05..526d2cc02 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -204,45 +204,79 @@ def compat_ord(c):
     else: return ord(c)
 
 
-# Environment variables should be decoded with filesystem encoding
-# otherwise this results in issues like #3854 #2918 #3217
 if sys.version_info >= (3, 0):
     compat_getenv = os.getenv
     compat_expanduser = os.path.expanduser
 else:
+    # Environment variables should be decoded with filesystem encoding.
+    # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
+
     def compat_getenv(key, default=None):
         env = os.getenv(key, default)
         if env:
             env = env.decode(get_filesystem_encoding())
         return env
 
-    def compat_expanduser(path):
-        """Expand ~ and ~user constructs.
+    # HACK: The default implementations of os.path.expanduser from cpython do not decode
+    # environment variables with filesystem encoding. We will work around this by
+    # providing adjusted implementations.
+    # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
+    # for different platforms with correct environment variables decoding.
 
-        If user or $HOME is unknown, do nothing."""
-        if path[:1] != '~':
-            return path
-        i, n = 1, len(path)
-        while i < n and path[i] not in '/\\':
-            i += 1
+    if os.name == 'posix':
+        def compat_expanduser(path):
+            """Expand ~ and ~user constructions.  If user or $HOME is unknown,
+            do nothing."""
+            if not path.startswith('~'):
+                return path
+            i = path.find('/', 1)
+            if i < 0:
+                i = len(path)
+            if i == 1:
+                if 'HOME' not in os.environ:
+                    import pwd
+                    userhome = pwd.getpwuid(os.getuid()).pw_dir
+                else:
+                    userhome = compat_getenv('HOME')
+            else:
+                import pwd
+                try:
+                    pwent = pwd.getpwnam(path[1:i])
+                except KeyError:
+                    return path
+                userhome = pwent.pw_dir
+            userhome = userhome.rstrip('/')
+            return (userhome + path[i:]) or '/'
+    elif os.name == 'nt' or os.name == 'ce':
+        def compat_expanduser(path):
+            """Expand ~ and ~user constructs.
 
-        if 'HOME' in os.environ:
-            userhome = compat_getenv('HOME')
-        elif 'USERPROFILE' in os.environ:
-            userhome = compat_getenv('USERPROFILE')
-        elif not 'HOMEPATH' in os.environ:
-            return path
-        else:
-            try:
-                drive = compat_getenv('HOMEDRIVE')
-            except KeyError:
-                drive = ''
-            userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
+            If user or $HOME is unknown, do nothing."""
+            if path[:1] != '~':
+                return path
+            i, n = 1, len(path)
+            while i < n and path[i] not in '/\\':
+                i = i + 1
 
-        if i != 1:  # ~user
-            userhome = os.path.join(os.path.dirname(userhome), path[1:i])
+            if 'HOME' in os.environ:
+                userhome = compat_getenv('HOME')
+            elif 'USERPROFILE' in os.environ:
+                userhome = compat_getenv('USERPROFILE')
+            elif not 'HOMEPATH' in os.environ:
+                return path
+            else:
+                try:
+                    drive = compat_getenv('HOMEDRIVE')
+                except KeyError:
+                    drive = ''
+                userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
 
-        return userhome + path[i:]
+            if i != 1: #~user
+                userhome = os.path.join(os.path.dirname(userhome), path[1:i])
+
+            return userhome + path[i:]
+    else:
+        compat_expanduser = os.path.expanduser
 
 
 # This is not clearly defined otherwise