diff --git a/glob/glob.py b/glob/glob.py index e69de29b..1f602656 100644 --- a/glob/glob.py +++ b/glob/glob.py @@ -0,0 +1,94 @@ +"""Filename globbing utility.""" + +import os +import re +import fnmatch + +__all__ = ["glob", "iglob"] + +def glob(pathname): + """Return a list of paths matching a pathname pattern. + + The pattern may contain simple shell-style wildcards a la + fnmatch. However, unlike fnmatch, filenames starting with a + dot are special cases that are not matched by '*' and '?' + patterns. + + """ + return list(iglob(pathname)) + +def iglob(pathname): + """Return an iterator which yields the paths matching a pathname pattern. + + The pattern may contain simple shell-style wildcards a la + fnmatch. However, unlike fnmatch, filenames starting with a + dot are special cases that are not matched by '*' and '?' + patterns. + + """ + if not has_magic(pathname): + if os.path.lexists(pathname): + yield pathname + return + dirname, basename = os.path.split(pathname) + if not dirname: + for name in glob1(None, basename): + yield name + return + # `os.path.split()` returns the argument itself as a dirname if it is a + # drive or UNC path. Prevent an infinite recursion if a drive or UNC path + # contains magic characters (i.e. r'\\?\C:'). + if dirname != pathname and has_magic(dirname): + dirs = iglob(dirname) + else: + dirs = [dirname] + if has_magic(basename): + glob_in_dir = glob1 + else: + glob_in_dir = glob0 + for dirname in dirs: + for name in glob_in_dir(dirname, basename): + yield os.path.join(dirname, name) + +# These 2 helper functions non-recursively glob inside a literal directory. +# They return a list of basenames. `glob1` accepts a pattern while `glob0` +# takes a literal basename (so it only has to check for its existence). + +def glob1(dirname, pattern): + if not dirname: + if isinstance(pattern, bytes): + dirname = bytes(os.curdir, 'ASCII') + else: + dirname = os.curdir + try: + names = os.listdir(dirname) + except os.error: + return [] + if not _ishidden(pattern): + names = [x for x in names if not _ishidden(x)] + return fnmatch.filter(names, pattern) + +def glob0(dirname, basename): + if not basename: + # `os.path.split()` returns an empty basename for paths ending with a + # directory separator. 'q*x/' should match only directories. + if os.path.isdir(dirname): + return [basename] + else: + if os.path.lexists(os.path.join(dirname, basename)): + return [basename] + return [] + + +magic_check = re.compile('[*?[]') +magic_check_bytes = re.compile(b'[*?[]') + +def has_magic(s): + if isinstance(s, bytes): + match = magic_check_bytes.search(s) + else: + match = magic_check.search(s) + return match is not None + +def _ishidden(path): + return path[0] in ('.', b'.'[0]) diff --git a/glob/test_glob.py b/glob/test_glob.py new file mode 100644 index 00000000..eb9aeb57 --- /dev/null +++ b/glob/test_glob.py @@ -0,0 +1,178 @@ +import glob +import os +import shutil +import sys +import unittest + +from test.support import (run_unittest, TESTFN, skip_unless_symlink, + can_symlink, create_empty_file) + + +class GlobTests(unittest.TestCase): + + def norm(self, *parts): + return os.path.normpath(os.path.join(self.tempdir, *parts)) + + def mktemp(self, *parts): + filename = self.norm(*parts) + base, file = os.path.split(filename) + if not os.path.exists(base): + os.makedirs(base) + create_empty_file(filename) + + def setUp(self): + self.tempdir = TESTFN + "_dir" + self.mktemp('a', 'D') + self.mktemp('aab', 'F') + self.mktemp('.aa', 'G') + self.mktemp('.bb', 'H') + self.mktemp('aaa', 'zzzF') + self.mktemp('ZZZ') + self.mktemp('a', 'bcd', 'EF') + self.mktemp('a', 'bcd', 'efg', 'ha') + if can_symlink(): + os.symlink(self.norm('broken'), self.norm('sym1')) + os.symlink('broken', self.norm('sym2')) + os.symlink(os.path.join('a', 'bcd'), self.norm('sym3')) + + def tearDown(self): + shutil.rmtree(self.tempdir) + + def glob(self, *parts): + if len(parts) == 1: + pattern = parts[0] + else: + pattern = os.path.join(*parts) + p = os.path.join(self.tempdir, pattern) + res = glob.glob(p) + self.assertEqual(list(glob.iglob(p)), res) + bres = [os.fsencode(x) for x in res] + self.assertEqual(glob.glob(os.fsencode(p)), bres) + self.assertEqual(list(glob.iglob(os.fsencode(p))), bres) + return res + + def assertSequencesEqual_noorder(self, l1, l2): + l1 = list(l1) + l2 = list(l2) + self.assertEqual(set(l1), set(l2)) + self.assertEqual(sorted(l1), sorted(l2)) + + def test_glob_literal(self): + eq = self.assertSequencesEqual_noorder + eq(self.glob('a'), [self.norm('a')]) + eq(self.glob('a', 'D'), [self.norm('a', 'D')]) + eq(self.glob('aab'), [self.norm('aab')]) + eq(self.glob('zymurgy'), []) + + res = glob.glob('*') + self.assertEqual({type(r) for r in res}, {str}) + res = glob.glob(os.path.join(os.curdir, '*')) + self.assertEqual({type(r) for r in res}, {str}) + + res = glob.glob(b'*') + self.assertEqual({type(r) for r in res}, {bytes}) + res = glob.glob(os.path.join(os.fsencode(os.curdir), b'*')) + self.assertEqual({type(r) for r in res}, {bytes}) + + def test_glob_one_directory(self): + eq = self.assertSequencesEqual_noorder + eq(self.glob('a*'), map(self.norm, ['a', 'aab', 'aaa'])) + eq(self.glob('*a'), map(self.norm, ['a', 'aaa'])) + eq(self.glob('.*'), map(self.norm, ['.aa', '.bb'])) + eq(self.glob('?aa'), map(self.norm, ['aaa'])) + eq(self.glob('aa?'), map(self.norm, ['aaa', 'aab'])) + eq(self.glob('aa[ab]'), map(self.norm, ['aaa', 'aab'])) + eq(self.glob('*q'), []) + + def test_glob_nested_directory(self): + eq = self.assertSequencesEqual_noorder + if os.path.normcase("abCD") == "abCD": + # case-sensitive filesystem + eq(self.glob('a', 'bcd', 'E*'), [self.norm('a', 'bcd', 'EF')]) + else: + # case insensitive filesystem + eq(self.glob('a', 'bcd', 'E*'), [self.norm('a', 'bcd', 'EF'), + self.norm('a', 'bcd', 'efg')]) + eq(self.glob('a', 'bcd', '*g'), [self.norm('a', 'bcd', 'efg')]) + + def test_glob_directory_names(self): + eq = self.assertSequencesEqual_noorder + eq(self.glob('*', 'D'), [self.norm('a', 'D')]) + eq(self.glob('*', '*a'), []) + eq(self.glob('a', '*', '*', '*a'), + [self.norm('a', 'bcd', 'efg', 'ha')]) + eq(self.glob('?a?', '*F'), [self.norm('aaa', 'zzzF'), + self.norm('aab', 'F')]) + + def test_glob_directory_with_trailing_slash(self): + # Patterns ending with a slash shouldn't match non-dirs + res = glob.glob(self.norm('Z*Z') + os.sep) + self.assertEqual(res, []) + res = glob.glob(self.norm('ZZZ') + os.sep) + self.assertEqual(res, []) + # When there is a wildcard pattern which ends with os.sep, glob() + # doesn't blow up. + res = glob.glob(self.norm('aa*') + os.sep) + self.assertEqual(len(res), 2) + # either of these results is reasonable + self.assertIn(set(res), [ + {self.norm('aaa'), self.norm('aab')}, + {self.norm('aaa') + os.sep, self.norm('aab') + os.sep}, + ]) + + def test_glob_bytes_directory_with_trailing_slash(self): + # Same as test_glob_directory_with_trailing_slash, but with a + # bytes argument. + res = glob.glob(os.fsencode(self.norm('Z*Z') + os.sep)) + self.assertEqual(res, []) + res = glob.glob(os.fsencode(self.norm('ZZZ') + os.sep)) + self.assertEqual(res, []) + res = glob.glob(os.fsencode(self.norm('aa*') + os.sep)) + self.assertEqual(len(res), 2) + # either of these results is reasonable + self.assertIn(set(res), [ + {os.fsencode(self.norm('aaa')), + os.fsencode(self.norm('aab'))}, + {os.fsencode(self.norm('aaa') + os.sep), + os.fsencode(self.norm('aab') + os.sep)}, + ]) + + @skip_unless_symlink + def test_glob_symlinks(self): + eq = self.assertSequencesEqual_noorder + eq(self.glob('sym3'), [self.norm('sym3')]) + eq(self.glob('sym3', '*'), [self.norm('sym3', 'EF'), + self.norm('sym3', 'efg')]) + self.assertIn(self.glob('sym3' + os.sep), + [[self.norm('sym3')], [self.norm('sym3') + os.sep]]) + eq(self.glob('*', '*F'), + [self.norm('aaa', 'zzzF'), + self.norm('aab', 'F'), self.norm('sym3', 'EF')]) + + @skip_unless_symlink + def test_glob_broken_symlinks(self): + eq = self.assertSequencesEqual_noorder + eq(self.glob('sym*'), [self.norm('sym1'), self.norm('sym2'), + self.norm('sym3')]) + eq(self.glob('sym1'), [self.norm('sym1')]) + eq(self.glob('sym2'), [self.norm('sym2')]) + + @unittest.skipUnless(sys.platform == "win32", "Win32 specific test") + def test_glob_magic_in_drive(self): + eq = self.assertSequencesEqual_noorder + eq(glob.glob('*:'), []) + eq(glob.glob(b'*:'), []) + eq(glob.glob('?:'), []) + eq(glob.glob(b'?:'), []) + eq(glob.glob('\\\\?\\c:\\'), ['\\\\?\\c:\\']) + eq(glob.glob(b'\\\\?\\c:\\'), [b'\\\\?\\c:\\']) + eq(glob.glob('\\\\*\\*\\'), []) + eq(glob.glob(b'\\\\*\\*\\'), []) + + +def test_main(): + run_unittest(GlobTests) + + +if __name__ == "__main__": + test_main()