diff --git a/tests/bytecode/.gitignore b/tests/bytecode/.gitignore new file mode 100644 index 0000000000..53752db253 --- /dev/null +++ b/tests/bytecode/.gitignore @@ -0,0 +1 @@ +output diff --git a/tests/bytecode/README.md b/tests/bytecode/README.md new file mode 100644 index 0000000000..e8778900bf --- /dev/null +++ b/tests/bytecode/README.md @@ -0,0 +1,12 @@ +This directory contains the framework and test files for testing the byte code +output of the Micro Python compiler. + +You need to first build the 'cpy' executable in the directory micropython/unix-cpy/. +This executable is a minimal version of Micro Python which compiles a single source +file and outputs the corresponding byte code. + +The output of Micro Python is checked against CPython 3.3. + +To run the tests use: + + ./run-tests diff --git a/tests/bytecode/check.py b/tests/bytecode/check.py new file mode 100644 index 0000000000..b5211b0fe8 --- /dev/null +++ b/tests/bytecode/check.py @@ -0,0 +1,56 @@ +import sys +name = sys.argv[1].split('/')[-1].split('.')[0] +with open(sys.argv[1]) as f: + lines_correct = [l.strip('\n') for l in f.readlines()] +lines_me = [l.strip('\n') for l in sys.stdin.readlines()] +if len(lines_me) != len(lines_correct): + if len(lines_me) == 0: + print('{:<20}: no output'.format(name)) + elif lines_me[0].find('syntax error') >= 0: + print('{:<20}: syntax error'.format(name)) + elif lines_me[0].find(' cannot be compiled') >= 0: + print('{:<20}: compile error: {}'.format(name, lines_me[0])) + else: + print('{:<20}: mismatch in number of lines'.format(name)) +else: + total = len(lines_me) + same = 0 + bad_num_fields = 0 + bad_2 = 0 + bad_3 = 0 + jump_op = ['JUMP_FORWARD', 'JUMP_ABSOLUTE', 'POP_JUMP_IF_FALSE', 'POP_JUMP_IF_TRUE', 'SETUP_LOOP'] + jump_abs_op = ['JUMP_FORWARD', 'JUMP_ABSOLUTE'] + for i in range(total): + if lines_me[i] == lines_correct[i]: + same += 1 + else: + # line is different + line_me = lines_me[i].strip().split(' ', 2) + line_correct = lines_correct[i].strip().split(' ', 2) + allow = False + if len(line_me) != len(line_correct): + bad_num_fields += 1 + elif len(line_me) == 2: + if line_me[0] == line_correct[0] == 'stacksize': + allow = True + else: + bad_2 += 1 + else: + assert(len(line_me) == 3) + if line_me[0] == line_correct[0] and line_me[1] in jump_abs_op and line_correct[1] in jump_abs_op: + allow = True + elif line_me[0] == line_correct[0] and line_me[1] == line_correct[1] in jump_op: + allow = True + else: + bad_3 += 1 + #if not allow: + # print(line_me, 'vs', line_correct) + + bad_str = '' + if bad_num_fields > 0: + bad_str += ', {} bad num fields'.format(bad_num_fields) + if bad_2 > 0: + bad_str += ', {} bad 2-field'.format(bad_2) + if bad_3 > 0: + bad_str += ', {} bad 3-field'.format(bad_3) + print('{:<20}: {:>6} lines, {:>5.1f}% correct{}'.format(name, total, 100 * same / total, bad_str)) diff --git a/tests/bytecode/mp-tests/assert1.py b/tests/bytecode/mp-tests/assert1.py new file mode 100644 index 0000000000..077defc970 --- /dev/null +++ b/tests/bytecode/mp-tests/assert1.py @@ -0,0 +1,2 @@ +assert x +assert x, 'test' diff --git a/tests/bytecode/mp-tests/assign1.py b/tests/bytecode/mp-tests/assign1.py new file mode 100644 index 0000000000..64ae4a0c59 --- /dev/null +++ b/tests/bytecode/mp-tests/assign1.py @@ -0,0 +1,10 @@ +[] = () +[] = [] +a = b +(a) = b +a, b = c, d +a, b, c = d, e, f +a, b, c, d = e, f, g, h +#(a, b) = c, d +#a, b = (c, d) +#(a, b) = (c, d) diff --git a/tests/bytecode/mp-tests/assign2.py b/tests/bytecode/mp-tests/assign2.py new file mode 100644 index 0000000000..f55a3221d0 --- /dev/null +++ b/tests/bytecode/mp-tests/assign2.py @@ -0,0 +1,10 @@ +*a, = b +a, *b = c +a, *b, = c +a, *b, c = d + +[*a] = b +[*a,] = b +[a, *b] = c +#[a, *b,] = c +#[a, *b, c] = d diff --git a/tests/bytecode/mp-tests/augassign1.py b/tests/bytecode/mp-tests/augassign1.py new file mode 100644 index 0000000000..38a376af46 --- /dev/null +++ b/tests/bytecode/mp-tests/augassign1.py @@ -0,0 +1,5 @@ +[] = () +x += 1 +x.y += 1 +x.f().y += 1 +x[1] += 2 diff --git a/tests/bytecode/mp-tests/call1.py b/tests/bytecode/mp-tests/call1.py new file mode 100644 index 0000000000..eb8a8bf5f1 --- /dev/null +++ b/tests/bytecode/mp-tests/call1.py @@ -0,0 +1 @@ +f(a, b=c) diff --git a/tests/bytecode/mp-tests/class1.py b/tests/bytecode/mp-tests/class1.py new file mode 100644 index 0000000000..bc87666806 --- /dev/null +++ b/tests/bytecode/mp-tests/class1.py @@ -0,0 +1,3 @@ +class C: + pass +C() diff --git a/tests/bytecode/mp-tests/class2.py b/tests/bytecode/mp-tests/class2.py new file mode 100644 index 0000000000..1a3e89849d --- /dev/null +++ b/tests/bytecode/mp-tests/class2.py @@ -0,0 +1,4 @@ +class A: + x = 1 + y = x + z +A() diff --git a/tests/bytecode/mp-tests/class3.py b/tests/bytecode/mp-tests/class3.py new file mode 100644 index 0000000000..f49e2e8114 --- /dev/null +++ b/tests/bytecode/mp-tests/class3.py @@ -0,0 +1,10 @@ +class A: + def f(x): + return x + def g(y): + def h(z): + return x + y + z + h(y) +A() +A.f(1) +A.g(2)(3) diff --git a/tests/bytecode/mp-tests/class4.py b/tests/bytecode/mp-tests/class4.py new file mode 100644 index 0000000000..4cb6258093 --- /dev/null +++ b/tests/bytecode/mp-tests/class4.py @@ -0,0 +1,9 @@ +class A: + def __init__(self, x): + self.x = x + self.y = 0 + + def get(self): + return self.x + self.y +A(1) +A(2).get() diff --git a/tests/bytecode/mp-tests/class5.py b/tests/bytecode/mp-tests/class5.py new file mode 100644 index 0000000000..4bf96c8e2f --- /dev/null +++ b/tests/bytecode/mp-tests/class5.py @@ -0,0 +1,8 @@ +class A(B): + pass +class A(object): + pass +class A(x.y()): + pass +class A(B, C): + pass diff --git a/tests/bytecode/mp-tests/closure1.py b/tests/bytecode/mp-tests/closure1.py new file mode 100644 index 0000000000..fdfb4eaf27 --- /dev/null +++ b/tests/bytecode/mp-tests/closure1.py @@ -0,0 +1,2 @@ +# basic closure +# to write! diff --git a/tests/bytecode/mp-tests/closure2.py b/tests/bytecode/mp-tests/closure2.py new file mode 100644 index 0000000000..08b4205810 --- /dev/null +++ b/tests/bytecode/mp-tests/closure2.py @@ -0,0 +1,7 @@ +# test closing over an argument + +def f(x): + y = 2 * x + def g(z): + return x + y + z + return g diff --git a/tests/bytecode/mp-tests/closure3.py b/tests/bytecode/mp-tests/closure3.py new file mode 100644 index 0000000000..905211317a --- /dev/null +++ b/tests/bytecode/mp-tests/closure3.py @@ -0,0 +1,12 @@ +# test when different variables are closed over by different functions + +def f(): + l1 = 1 + l2 = 2 + l3 = 3 + + def g(): + return l1 + l2 + + def h(): + return l2 + l3 diff --git a/tests/bytecode/mp-tests/closure4.py b/tests/bytecode/mp-tests/closure4.py new file mode 100644 index 0000000000..6828f89008 --- /dev/null +++ b/tests/bytecode/mp-tests/closure4.py @@ -0,0 +1,13 @@ +# test when a function has cell and free vars + +def f(): + f_local = 1 + f_cell = 2 + + def g(): + g_local = 3 + g_cell = f_cell + 4 + + def h(): + h1_local = 4 + h2_local = f_cell + g_cell diff --git a/tests/bytecode/mp-tests/compare1.py b/tests/bytecode/mp-tests/compare1.py new file mode 100644 index 0000000000..32ba43e3bf --- /dev/null +++ b/tests/bytecode/mp-tests/compare1.py @@ -0,0 +1,8 @@ +if 1 <= x <= 5: + f() + +if 1 <= x <= y <= 7: + f() + +if a < b > c in l != c is not d: + f() diff --git a/tests/bytecode/mp-tests/const1.py b/tests/bytecode/mp-tests/const1.py new file mode 100644 index 0000000000..545b334344 --- /dev/null +++ b/tests/bytecode/mp-tests/const1.py @@ -0,0 +1,9 @@ +x = 1 +#x = 1.2 +#x = 1.2e5 +#x = 1.2e+5 +#x = 1.2e-5 +x = () +x = (1,) +x = (1,2) +x = ('a',None,3) diff --git a/tests/bytecode/mp-tests/continue1.py b/tests/bytecode/mp-tests/continue1.py new file mode 100644 index 0000000000..3600691b1f --- /dev/null +++ b/tests/bytecode/mp-tests/continue1.py @@ -0,0 +1,44 @@ +for a in b: + continue + +for a in b: + try: + f() + except: + continue + g() + +for a in b: + try: + f() + continue + except: + g() + +for a in b: + try: + f() + except: + try: + g() + except: + continue + +for a in b: + try: + f() + except: + try: + g() + continue + except: + h() + +for a in b: + try: + f() + except: + pass + else: + continue + g() diff --git a/tests/bytecode/mp-tests/decorate1.py b/tests/bytecode/mp-tests/decorate1.py new file mode 100644 index 0000000000..208aebc5bf --- /dev/null +++ b/tests/bytecode/mp-tests/decorate1.py @@ -0,0 +1,20 @@ +@d +def f(): + pass + +@d +@e +def g(): + pass + +@d.e.f +def h(): + pass + +@d(a + 1) +def i(): + pass + +@d(a + 1, b + 2) +def i(): + pass diff --git a/tests/bytecode/mp-tests/del1.py b/tests/bytecode/mp-tests/del1.py new file mode 100644 index 0000000000..0a259fac78 --- /dev/null +++ b/tests/bytecode/mp-tests/del1.py @@ -0,0 +1,16 @@ +del x +del x.y +del x().y +del g +del x[a] +def f(): + global g + del x + del g + local = 1 + local2 = 2 + local3 = 3 + del local, local2, local3 + def f2(): + nonlocal local3 + del local2, local3 diff --git a/tests/bytecode/mp-tests/del2.py b/tests/bytecode/mp-tests/del2.py new file mode 100644 index 0000000000..1c63d15fcb --- /dev/null +++ b/tests/bytecode/mp-tests/del2.py @@ -0,0 +1,11 @@ +del x +del x, +del x, y +del x, y, +del x, y, z +del (x) +del (x,) +del (x, y) +del (x, y,) +del (x, y, z) +del a, (b, c) diff --git a/tests/bytecode/mp-tests/dict1.py b/tests/bytecode/mp-tests/dict1.py new file mode 100644 index 0000000000..3243faa632 --- /dev/null +++ b/tests/bytecode/mp-tests/dict1.py @@ -0,0 +1,3 @@ +x = {} +x = {'a':1} +x = {'a':1, 'b':2} diff --git a/tests/bytecode/mp-tests/dictcomp1.py b/tests/bytecode/mp-tests/dictcomp1.py new file mode 100644 index 0000000000..9dca499c57 --- /dev/null +++ b/tests/bytecode/mp-tests/dictcomp1.py @@ -0,0 +1,2 @@ +x = {a:None for a in l} +x = {b:c for c, b in l if c} diff --git a/tests/bytecode/mp-tests/docstring1.py b/tests/bytecode/mp-tests/docstring1.py new file mode 100644 index 0000000000..d1e0184547 --- /dev/null +++ b/tests/bytecode/mp-tests/docstring1.py @@ -0,0 +1,8 @@ +"""Module""" + +class A: + """Class""" + pass + +class B: + """Class B""" diff --git a/tests/bytecode/mp-tests/docstring2.py b/tests/bytecode/mp-tests/docstring2.py new file mode 100644 index 0000000000..5a2183aef9 --- /dev/null +++ b/tests/bytecode/mp-tests/docstring2.py @@ -0,0 +1,3 @@ +# comment before doc string + +"""Doc string""" diff --git a/tests/bytecode/mp-tests/fun1.py b/tests/bytecode/mp-tests/fun1.py new file mode 100644 index 0000000000..36e079c01e --- /dev/null +++ b/tests/bytecode/mp-tests/fun1.py @@ -0,0 +1,2 @@ +def f(*args): + g(*args) diff --git a/tests/bytecode/mp-tests/fun2.py b/tests/bytecode/mp-tests/fun2.py new file mode 100644 index 0000000000..a6cba92aad --- /dev/null +++ b/tests/bytecode/mp-tests/fun2.py @@ -0,0 +1,23 @@ +def f(*, b): + return b + +def f(a, *, b): + return a + b + +def f(a, *, b, c): + return a + b + c + +def f(a, *, b=c): + return a + b + +def f(a, *, b=c, c): + return a + b + c + +def f(a, *, b=c, c=d): + return a + b + c + +def f(a, *, b=c, c, d=e): + return a + b + c + d + +def f(a=None, *, b=None): + return a + b diff --git a/tests/bytecode/mp-tests/fun3.py b/tests/bytecode/mp-tests/fun3.py new file mode 100644 index 0000000000..5336a70797 --- /dev/null +++ b/tests/bytecode/mp-tests/fun3.py @@ -0,0 +1,3 @@ +def f(a, b): + def g(c, d=None, *, e=True): + return a + b + c + d + e diff --git a/tests/bytecode/mp-tests/if1.py b/tests/bytecode/mp-tests/if1.py new file mode 100644 index 0000000000..8c8a08ccdd --- /dev/null +++ b/tests/bytecode/mp-tests/if1.py @@ -0,0 +1,24 @@ +if x: + x() +if x: + x() +elif y: + y() +if x: + x() +else: + zz() +if x: + x() +elif y: + y() +else: + zz() +if x: + x() +elif y: + y() +elif z: + z() +else: + zz() diff --git a/tests/bytecode/mp-tests/if2.py b/tests/bytecode/mp-tests/if2.py new file mode 100644 index 0000000000..deb0cd5811 --- /dev/null +++ b/tests/bytecode/mp-tests/if2.py @@ -0,0 +1,26 @@ +def f(x): + if x: + return + if x: + return + elif y: + return + if x: + return + else: + return + if x: + return + elif y: + return + else: + return + if x: + return + elif y: + return + elif z: + return + else: + return + return None diff --git a/tests/bytecode/mp-tests/if3.py b/tests/bytecode/mp-tests/if3.py new file mode 100644 index 0000000000..bd01514d63 --- /dev/null +++ b/tests/bytecode/mp-tests/if3.py @@ -0,0 +1,6 @@ +if a and b: + f() +if a or b: + f() +if a and (b or c): + f() diff --git a/tests/bytecode/mp-tests/if4.py b/tests/bytecode/mp-tests/if4.py new file mode 100644 index 0000000000..4d5a86cd8b --- /dev/null +++ b/tests/bytecode/mp-tests/if4.py @@ -0,0 +1,8 @@ +if not a: + f() +if not a and b: + f() +if not a and not b: + f() +while not a: + f() diff --git a/tests/bytecode/mp-tests/ifexpr1.py b/tests/bytecode/mp-tests/ifexpr1.py new file mode 100644 index 0000000000..bdb2efc0a1 --- /dev/null +++ b/tests/bytecode/mp-tests/ifexpr1.py @@ -0,0 +1 @@ +x = 1 if a else 2 diff --git a/tests/bytecode/mp-tests/import1.py b/tests/bytecode/mp-tests/import1.py new file mode 100644 index 0000000000..696f3a2708 --- /dev/null +++ b/tests/bytecode/mp-tests/import1.py @@ -0,0 +1,5 @@ +a = 1 +def f(): + global a +import a +import b, c diff --git a/tests/bytecode/mp-tests/import2.py b/tests/bytecode/mp-tests/import2.py new file mode 100644 index 0000000000..2a89703d9d --- /dev/null +++ b/tests/bytecode/mp-tests/import2.py @@ -0,0 +1 @@ +from a import b diff --git a/tests/bytecode/mp-tests/import3.py b/tests/bytecode/mp-tests/import3.py new file mode 100644 index 0000000000..7f365a51eb --- /dev/null +++ b/tests/bytecode/mp-tests/import3.py @@ -0,0 +1,8 @@ +import a.b +import a.b.c +from a.b import d +from a.b.c import d + +from a import * +from a import d, e +from a import (d, e) diff --git a/tests/bytecode/mp-tests/import4.py b/tests/bytecode/mp-tests/import4.py new file mode 100644 index 0000000000..ecc3786755 --- /dev/null +++ b/tests/bytecode/mp-tests/import4.py @@ -0,0 +1,3 @@ +import a as y +import a.b as y +import a.b.c as y diff --git a/tests/bytecode/mp-tests/import5.py b/tests/bytecode/mp-tests/import5.py new file mode 100644 index 0000000000..fb93862d19 --- /dev/null +++ b/tests/bytecode/mp-tests/import5.py @@ -0,0 +1,4 @@ +from a import b as c +from a.b import c as d +from a.b.c import d as e +from a.b.c import d as e, f as h diff --git a/tests/bytecode/mp-tests/lambda1.py b/tests/bytecode/mp-tests/lambda1.py new file mode 100644 index 0000000000..559c7c20f5 --- /dev/null +++ b/tests/bytecode/mp-tests/lambda1.py @@ -0,0 +1,2 @@ +f = lambda: 0 +f = lambda x: 1 + x diff --git a/tests/bytecode/mp-tests/lambda2.py b/tests/bytecode/mp-tests/lambda2.py new file mode 100644 index 0000000000..1b4500c08f --- /dev/null +++ b/tests/bytecode/mp-tests/lambda2.py @@ -0,0 +1 @@ +f = lambda *args: args diff --git a/tests/bytecode/mp-tests/list1.py b/tests/bytecode/mp-tests/list1.py new file mode 100644 index 0000000000..e2a1a3e9fa --- /dev/null +++ b/tests/bytecode/mp-tests/list1.py @@ -0,0 +1,8 @@ +x = [] +x = [1] +x = [1,] # not implemented +x = [1, 2] +x = [1, 2,] +x = [1, 2, 3] +x = [1, 2, 3, 4] +x = [1, 2, 3, 4, 5] diff --git a/tests/bytecode/mp-tests/list2.py b/tests/bytecode/mp-tests/list2.py new file mode 100644 index 0000000000..90b21184da --- /dev/null +++ b/tests/bytecode/mp-tests/list2.py @@ -0,0 +1,8 @@ +x = [()] +x = [(a)] +x = [(a,)] +x = [(a)] +x = [(a,)] +x = [a, b] +x = [(a, b)] +x = [(a, b, c)] diff --git a/tests/bytecode/mp-tests/listcomp1.py b/tests/bytecode/mp-tests/listcomp1.py new file mode 100644 index 0000000000..3a0ef49791 --- /dev/null +++ b/tests/bytecode/mp-tests/listcomp1.py @@ -0,0 +1,4 @@ +x = (a for a in l) + +f(a for a in l) +f(a + b for a, b in f()) diff --git a/tests/bytecode/mp-tests/listcomp2.py b/tests/bytecode/mp-tests/listcomp2.py new file mode 100644 index 0000000000..5f52a5e6b0 --- /dev/null +++ b/tests/bytecode/mp-tests/listcomp2.py @@ -0,0 +1 @@ +[x.y for x in k.l] diff --git a/tests/bytecode/mp-tests/listcomp3.py b/tests/bytecode/mp-tests/listcomp3.py new file mode 100644 index 0000000000..77a8f2be20 --- /dev/null +++ b/tests/bytecode/mp-tests/listcomp3.py @@ -0,0 +1,3 @@ +x = (a + 1 for a in l if a.f()) + +x = [a + 1 for a in l if a.f()] diff --git a/tests/bytecode/mp-tests/listcomp4.py b/tests/bytecode/mp-tests/listcomp4.py new file mode 100644 index 0000000000..6b29993097 --- /dev/null +++ b/tests/bytecode/mp-tests/listcomp4.py @@ -0,0 +1,4 @@ +# closing over a local variable in a list comprehension +def f(): + a = 1 + x = [a + b for b in l] diff --git a/tests/bytecode/mp-tests/listcomp5.py b/tests/bytecode/mp-tests/listcomp5.py new file mode 100644 index 0000000000..a42d811b75 --- /dev/null +++ b/tests/bytecode/mp-tests/listcomp5.py @@ -0,0 +1,11 @@ +# nested ifs +x = [a for a in l if a if a + 1] +x = [a for a in l if a if a + 1 if a + 2] + +# nested for loops +x = [a for a in l for l in ls] +x = [a for ls in lss for l in ls for a in l] +x = [a for a in l for l in ls for ls in lss] + +# nested ifs and for loops +x = [a for a in l if a for l in ls if l if a for ls in lss if ls] diff --git a/tests/bytecode/mp-tests/locals1.py b/tests/bytecode/mp-tests/locals1.py new file mode 100644 index 0000000000..49c34da1ad --- /dev/null +++ b/tests/bytecode/mp-tests/locals1.py @@ -0,0 +1,22 @@ +# to test the order of locals and arguments (LOAD_FAST, STORE_FAST) + +def f1(): + b = 1 + a = 2 + return a + b + +def f2(b): + a = 2 + return a + b + +def f3(): + def f3f(): + return True + a = 1 + return f3f(a) + +def f4(): + x = 1 + def f3f(): + return True + return f3f(x) diff --git a/tests/bytecode/mp-tests/ptex.py b/tests/bytecode/mp-tests/ptex.py new file mode 100644 index 0000000000..8f23d78009 --- /dev/null +++ b/tests/bytecode/mp-tests/ptex.py @@ -0,0 +1,269 @@ +import sys +import os +import os.path +import datetime +import argparse +from xml.etree.ElementTree import Element, SubElement, tostring + +from log import Log +from texparser import TexParser +from latexparser import LatexParser +from gettexfile import file_has_suffix +from gettexfile import get_tex_file + +from xiwi.common.misc import buildFileList +from xiwi.common import arxivid +from xiwi.common.stats import Statistics + +def str_contains(s1, s2): + return s1.find(s2) != -1 + +def str_contains_one_of(st, st_list): + for st2 in st_list: + if str_contains(st, st2): + return True + return False + +def detect_file_kind(file_obj): + """Simple detection of kind of source file.""" + kind = 'unknown' + firstline = file_obj.readline() + while firstline.isspace(): + firstline = file_obj.readline() + if firstline.startswith('%!PS'): + kind = 'PS' + elif firstline.startswith('%auto-ignore'): + kind = 'auto-ignore' + else: + file_obj.seek(0) + for line in file_obj: + if str_contains(line, '\\def'): + # might be tex, if we don't find anything else + kind = 'tex' + if str_contains(line, '\\input'): + # might be tex, if we don't find anything else + kind = 'tex' + if str_contains(line, 'amstex') or str_contains(line, 'harvmac'): + # definitely tex + kind = 'tex' + break + if str_contains(line, '\\documentclass'): + # definitely latex + kind = 'latex' + break + if str_contains(line, '\\documentstyle'): + # could be tex or latex + if str_contains(line, 'amsppt'): + kind = 'tex' + break + else: + kind = 'latex' + break + file_obj.seek(0) + return kind + +class WithdrawnPaper(object): + def __init__(self): + pass + + def __getitem__(self, item): + if item == 'refs': + return [] + elif item == 'success': + return True + + def parse(self): + pass + +def process_article(filename): + """Returns TexParserBase derived object on success, None on failure.""" + + # get the tex file + filename, file_obj, tarfile_obj = get_tex_file(filename) + if file_obj is None: + return None + + # detect the type of file + kind = detect_file_kind(file_obj) + + # act on the type of file + parser = None + if kind == 'PS': + print('skipping postscript file') + elif kind == 'auto-ignore': + print('asked to ignore file, most likely it was withdrawn') + parser = WithdrawnPaper() + if kind == 'tex': + print('parsing as TeX') + parser = TexParser(filename, file_obj, tarfile_obj) + elif kind == 'latex': + print('parsing as LaTeX') + parser = LatexParser(filename, file_obj, tarfile_obj) + else: + print('cannot determine kind of file') + + # attempt to parse the document + try: + if parser is not None: + parser.parse() + except Exception as e: + print('exception while trying to parse file:') + print(str(e)) + parser = None + + # close the files + file_obj.close() + if tarfile_obj is not None: + tarfile_obj.close() + + # return the parsed document + return parser + +arxiv_classes = [ + 'acc-phys', 'adap-org', 'alg-geom', 'ao-sci', 'astro-ph', 'atom-ph', + 'bayes-an', 'chao-dyn', 'chem-ph', 'cmp-lg', 'comp-gas', 'cond-mat', + 'cs', 'dg-ga', 'funct-an', 'gr-qc', 'hep-ex', 'hep-lat', + 'hep-ph', 'hep-th', 'math', 'math-ph', 'mtrl-th', 'nlin', + 'nucl-ex', 'nucl-th', 'patt-sol', 'physics', 'plasm-ph', 'q-alg', + 'q-bio', 'quant-ph', 'solv-int', 'supr-con' +] + +def do_single_file(file_name, print_xml, write_xml_dir): + arxiv_id, arxiv_version = arxivid.filenameToArxivAndVersion(file_name) + if arxiv_id is None: + print('WARN: could not determine arXiv identifier for', file_name) + arxiv_id = '' + arxiv_version = 0 + + Log.reset() + Statistics.begin_item(arxiv_id) + + if file_has_suffix(file_name, '.pdf'): + Statistics.count('1) pdf') + succ = True + else: + Statistics.count('2) processed') + + parser = process_article(file_name) + + if parser is not None : + succ = parser['success'] + bib_refs = parser['refs'] + else : + succ = False + bib_refs = [] + + if str_contains_one_of(arxiv_id, ['gr-qc', 'hep-']): + Statistics.count('hep-processed') + if succ: + Statistics.count('hep-success') + if succ: + print('-success--------') + Statistics.count('3) success') + else: + print('-fail-----------') + Statistics.count('4) fail') + + show_ref = False + + if succ and show_ref: + for bib_ref in bib_refs: + print(bib_ref.key, 'with', bib_ref.cite_count, 'citations in paper') + if len(bib_ref.bib_info) == 0: + print('no reference') + else: + print(bib_ref.bib_info_as_str(keep_comments=True)) + + if succ and (print_xml or write_xml_dir): + xml = Element('article') + SubElement(xml, 'id').text = arxiv_id + if arxiv_version > 0: + SubElement(xml, 'version').text = str(arxiv_version) + refs = SubElement(xml, 'refs') + for bib_ref in bib_refs: + bib_text = bib_ref.bib_info_as_str(keep_comments=True) + if len(bib_text) != 0: + ncites = bib_ref.cite_count + if ncites < 1: + ncites = 1 + ref = SubElement(refs, 'ref', order=str(bib_ref.ref_order_num), freq=str(ncites)) + ref.text = bib_text + if print_xml: + print(tostring(xml)) + if isinstance(write_xml_dir, str): + if arxiv_id != '': + xml_file_name = os.path.join(write_xml_dir, arxiv_id.replace('/', '') + '.xml') + else: + fname = os.path.split(file_name)[1] + if fname.rfind('.') > 0: + fname = fname[:fname.rfind('.')] + xml_file_name = write_xml_dir + '/' + fname + '.xml' + file_obj = open(xml_file_name, 'wb') + file_obj.write(tostring(xml, encoding='utf-8')) + file_obj.close() + + Statistics.end_item() + + return succ + +summaryStrs = [] + +if __name__ == "__main__": + cmd_parser = argparse.ArgumentParser(description='Parse TeX/LaTeX to find references.') + cmd_parser.add_argument('--filelist', action='store_true', help='file names on the command line each contain a list of files to process') + cmd_parser.add_argument('--print-xml', action='store_true', help='print XML output to stdout') + cmd_parser.add_argument('--write-xml', metavar='', help='destination directory to write XML output files') + cmd_parser.add_argument('--failed', metavar='', help='output file to write list of failed files') + cmd_parser.add_argument('files', nargs='+', help='input files') + args = cmd_parser.parse_args() + + # print date stamp + timeStart = datetime.datetime.now() + print('[ptex] started processing at', str(timeStart)) + + print('given', len(args.files), 'files, first file:', args.files[0]) + print('================') + + Statistics.clear('article') + + # build list of files to process + file_list = buildFileList(args.filelist, args.files) + + # ensure the destination directory exists + if args.write_xml is not None and os.path.exists(args.write_xml): + try: + os.makedirs(args.write_xml) + except: + pass + + # process the files + failed_files = [] + for file_name in file_list: + success = do_single_file(file_name, args.print_xml, args.write_xml) + if not success: + failed_files.append(file_name) + + # write the failed files to an output file, if requested + if args.failed is not None: + file_obj = open(args.failed, 'w') + file_obj.writelines(f + '\n' for f in failed_files) + file_obj.close() + + print('================') + Statistics.show() + Statistics.show_detail('fail') + #Statistics.show_detail('cite-range') + #Statistics.show_detail('bad-ascii') + #Statistics.show_detail('non-ascii') + + print('================') + + # print date stamp + timeEnd = datetime.datetime.now() + print('[ptex] finished processing at', str(timeEnd)) + + # print summary for email + summaryStrs.extend(Statistics.get_summary()) + summaryStrs.insert(0, 'started processing at %s, took %.1f minutes' % (timeStart.strftime('%H:%M'), (timeEnd - timeStart).total_seconds() / 60)) + for s in summaryStrs: + print('**SUMMARY** [ptex]', s) diff --git a/tests/bytecode/mp-tests/raise1.py b/tests/bytecode/mp-tests/raise1.py new file mode 100644 index 0000000000..9cceed4944 --- /dev/null +++ b/tests/bytecode/mp-tests/raise1.py @@ -0,0 +1,11 @@ +def f(): + raise +def g(): + raise 1 +def h(): + raise 1 from 2 +def i(): + try: + f() + except: + raise diff --git a/tests/bytecode/mp-tests/scope0.py b/tests/bytecode/mp-tests/scope0.py new file mode 100644 index 0000000000..5d81345ea4 --- /dev/null +++ b/tests/bytecode/mp-tests/scope0.py @@ -0,0 +1,7 @@ +x = 1 +print(x) + +# local store after load +def f(): + print(x) + x = 1 diff --git a/tests/bytecode/mp-tests/scope1.py b/tests/bytecode/mp-tests/scope1.py new file mode 100644 index 0000000000..92a0f9fa8c --- /dev/null +++ b/tests/bytecode/mp-tests/scope1.py @@ -0,0 +1,6 @@ +x = 1 +print(x) +def f1(): + print(x) +def f2(x): + print(x) diff --git a/tests/bytecode/mp-tests/scope2.py b/tests/bytecode/mp-tests/scope2.py new file mode 100644 index 0000000000..af9e372318 --- /dev/null +++ b/tests/bytecode/mp-tests/scope2.py @@ -0,0 +1,18 @@ +# scope + +gl = 1 + +def f(x): + global gl + gl += 2 + lo1 = 3 + lo2 = 4 + lo3 = 5 + + def f2(x, y): + global gl + nonlocal lo3 + lo3 = 5 + lo4 = gl + lo2 + lo3 + + return f2 diff --git a/tests/bytecode/mp-tests/scope3.py b/tests/bytecode/mp-tests/scope3.py new file mode 100644 index 0000000000..a5fc8d09fe --- /dev/null +++ b/tests/bytecode/mp-tests/scope3.py @@ -0,0 +1,11 @@ +# test nested functions and scope + +def f(x): + def f2(y): + return y + x + print(f2(x)) + return f2 +x=f(2) +print(x, x(5)) +f=123 +print(f(f)) diff --git a/tests/bytecode/mp-tests/scope4.py b/tests/bytecode/mp-tests/scope4.py new file mode 100644 index 0000000000..70968cdf30 --- /dev/null +++ b/tests/bytecode/mp-tests/scope4.py @@ -0,0 +1,14 @@ +# test scope + +def f(x): + global x42 + print(x, x42) + x42 = x + +x42 = 123 +f(1) +print(x42) + +x42 = 456 +f(2) +print(x42) diff --git a/tests/bytecode/mp-tests/scope5.py b/tests/bytecode/mp-tests/scope5.py new file mode 100644 index 0000000000..a14de350ed --- /dev/null +++ b/tests/bytecode/mp-tests/scope5.py @@ -0,0 +1,12 @@ +# test scope + +def f(x): + def f2(y): + print(y, x42, y42) + x42 = x = y42 = 123 + myf2 = f2 + x42 = 456 + return myf2 + +myf = f(1) +myf(1) diff --git a/tests/bytecode/mp-tests/scope6.py b/tests/bytecode/mp-tests/scope6.py new file mode 100644 index 0000000000..4848378887 --- /dev/null +++ b/tests/bytecode/mp-tests/scope6.py @@ -0,0 +1,7 @@ +# closed over variable 2 deep + +def f(): + x = 1 + def g(): + def h(): + return 1 + x diff --git a/tests/bytecode/mp-tests/scope7.py b/tests/bytecode/mp-tests/scope7.py new file mode 100644 index 0000000000..699d12510c --- /dev/null +++ b/tests/bytecode/mp-tests/scope7.py @@ -0,0 +1,15 @@ +# test order of closed over locals +# not that CPython seems to sort closed over variables (but not fast locals) + +def f(): + l1 = 1 + l2 = 4 + l3 = 3 + l4 = 2 + l5 = 5 + + def g(): + return l1 + l4 + l3 + l2 + l5 + + def h(): + return l1 + l2 + l3 + l4 + l5 diff --git a/tests/bytecode/mp-tests/set1.py b/tests/bytecode/mp-tests/set1.py new file mode 100644 index 0000000000..f6de75606c --- /dev/null +++ b/tests/bytecode/mp-tests/set1.py @@ -0,0 +1,6 @@ +x = set() +x = {1} +x = {1,} +x = {1, 2} +x = {1, 2,} +x = {1, 2, 3} diff --git a/tests/bytecode/mp-tests/setcomp1.py b/tests/bytecode/mp-tests/setcomp1.py new file mode 100644 index 0000000000..82927f5d19 --- /dev/null +++ b/tests/bytecode/mp-tests/setcomp1.py @@ -0,0 +1,2 @@ +x = {a for a in l} +x = {a + b for a, b in l if b} diff --git a/tests/bytecode/mp-tests/slice1.py b/tests/bytecode/mp-tests/slice1.py new file mode 100644 index 0000000000..008e57c182 --- /dev/null +++ b/tests/bytecode/mp-tests/slice1.py @@ -0,0 +1,16 @@ +x = x[:] +x = x[::] +x = x[::c] +x = x[:b] +x = x[:b:] +x = x[:b:c] +x = x[a] +x = x[a:] +x = x[a::] +x = x[a::c] +x = x[a:b] +x = x[a:b:] +x = x[a:b:c] + +x[0] = 1 +x[x] = x diff --git a/tests/bytecode/mp-tests/slice2.py b/tests/bytecode/mp-tests/slice2.py new file mode 100644 index 0000000000..e329156c3c --- /dev/null +++ b/tests/bytecode/mp-tests/slice2.py @@ -0,0 +1,3 @@ +x = x[a, b] + +x[a, b] = x diff --git a/tests/bytecode/mp-tests/string1.py b/tests/bytecode/mp-tests/string1.py new file mode 100644 index 0000000000..d6ddc7ae43 --- /dev/null +++ b/tests/bytecode/mp-tests/string1.py @@ -0,0 +1,11 @@ +x = 'abc' +x = "abc" +x = r'abc' +x = 'abc' \ + 'def' +x = ('abc' + 'def') + +x = 'ab"c' +x = "ab'c" +x = '''ab'c''' diff --git a/tests/bytecode/mp-tests/string2.py b/tests/bytecode/mp-tests/string2.py new file mode 100644 index 0000000000..70dc9924b0 --- /dev/null +++ b/tests/bytecode/mp-tests/string2.py @@ -0,0 +1,14 @@ +'abc' +class f: + u"123" + pass +x = 'abc' +x = u"abc" +x = u"ab\\c" +x = r"ab\\c" +x = b"abc" +x = rb"abc" +x = b"ab\\c" +x = rb"ab\\c" +x = """abc""" +x = b"""abc""" diff --git a/tests/bytecode/mp-tests/super1.py b/tests/bytecode/mp-tests/super1.py new file mode 100644 index 0000000000..1512429939 --- /dev/null +++ b/tests/bytecode/mp-tests/super1.py @@ -0,0 +1,17 @@ +class A(B): + def f(): + super.a() + +class B(C): + def g(): + def h(): + super.a() + +super.a() + +def i(): + super.a() + +def j(): + def k(): + super.a() diff --git a/tests/bytecode/mp-tests/try1.py b/tests/bytecode/mp-tests/try1.py new file mode 100644 index 0000000000..10344c8ae3 --- /dev/null +++ b/tests/bytecode/mp-tests/try1.py @@ -0,0 +1,13 @@ +def f(x): + try: + f(x) + except: + f(x) + try: + f(x) + except Exception: + f(x) + try: + f(x) + except Exception as e: + f(x, e) diff --git a/tests/bytecode/mp-tests/try2.py b/tests/bytecode/mp-tests/try2.py new file mode 100644 index 0000000000..efdac04756 --- /dev/null +++ b/tests/bytecode/mp-tests/try2.py @@ -0,0 +1,5 @@ +def f(): + try: + f() + finally: + g() diff --git a/tests/bytecode/mp-tests/try3.py b/tests/bytecode/mp-tests/try3.py new file mode 100644 index 0000000000..9741aaf681 --- /dev/null +++ b/tests/bytecode/mp-tests/try3.py @@ -0,0 +1,14 @@ +def f(): + try: + f() + except: + g() + finally: + f() + + try: + f() + except Exception: + g() + finally: + f() diff --git a/tests/bytecode/mp-tests/try4.py b/tests/bytecode/mp-tests/try4.py new file mode 100644 index 0000000000..412cb74ee5 --- /dev/null +++ b/tests/bytecode/mp-tests/try4.py @@ -0,0 +1,22 @@ +try: + f() +except A: + g() +except: + h() + +try: + f() +except A: + g() +except B as c: + h() + +try: + f() +except A: + g() +except B as c: + h() +except: + i() diff --git a/tests/bytecode/mp-tests/try5.py b/tests/bytecode/mp-tests/try5.py new file mode 100644 index 0000000000..7ba7949125 --- /dev/null +++ b/tests/bytecode/mp-tests/try5.py @@ -0,0 +1,8 @@ +try: + f() +except A: + g() +except B as b: + h() +finally: + i() diff --git a/tests/bytecode/mp-tests/try6.py b/tests/bytecode/mp-tests/try6.py new file mode 100644 index 0000000000..d5b68722e9 --- /dev/null +++ b/tests/bytecode/mp-tests/try6.py @@ -0,0 +1,15 @@ +try: + f() +except: + g() +else: + h() + +try: + f() +except: + g() +else: + h() +finally: + i() diff --git a/tests/bytecode/mp-tests/tuple1.py b/tests/bytecode/mp-tests/tuple1.py new file mode 100644 index 0000000000..d70e4cf569 --- /dev/null +++ b/tests/bytecode/mp-tests/tuple1.py @@ -0,0 +1,17 @@ +x = () +x = a +x = a, +x = a, 2 +x = a, 2, +x = a, 2, 3 +x = a, 2, 3, 4 +x = a, 2, 3, 4, 5 + +x = () +x = (a) +x = (a,) +x = (a, 2) +x = (a, 2,) +x = (a, 2, 3) +x = (a, 2, 3, 4) +x = (a, 2, 3, 4, 5) diff --git a/tests/bytecode/mp-tests/tuple2.py b/tests/bytecode/mp-tests/tuple2.py new file mode 100644 index 0000000000..df11e74ce0 --- /dev/null +++ b/tests/bytecode/mp-tests/tuple2.py @@ -0,0 +1,15 @@ +x = t +x, = t +x, y = t +x, y, = t +x, y, z = t +x, y, z, = t +x, y, z, z = a, b, c, d + +(x) = t +(x,) = t +(x, y) = t +(x, y,) = t +(x, y, z) = t +(x, y, z,) = t +(x, y, z, z) = a, b, c, d diff --git a/tests/bytecode/mp-tests/tuple3.py b/tests/bytecode/mp-tests/tuple3.py new file mode 100644 index 0000000000..29ddd86d08 --- /dev/null +++ b/tests/bytecode/mp-tests/tuple3.py @@ -0,0 +1,4 @@ +def f(x): + return x, x + 1 +for a in b, c: + f(a) diff --git a/tests/bytecode/mp-tests/with1.py b/tests/bytecode/mp-tests/with1.py new file mode 100644 index 0000000000..897ec530fa --- /dev/null +++ b/tests/bytecode/mp-tests/with1.py @@ -0,0 +1,8 @@ +with x: + f() +with x(): + f() +with f() as x: + f(x) +with f() as x, g() as y: + f(x, y) diff --git a/tests/bytecode/mp-tests/yield1.py b/tests/bytecode/mp-tests/yield1.py new file mode 100644 index 0000000000..114151e718 --- /dev/null +++ b/tests/bytecode/mp-tests/yield1.py @@ -0,0 +1,17 @@ +# generators and yield + +def main(): + def f(): + print(123) + yield + print(456) + yield 2 + print(789) + + a = f() + print(a) + print(a.__next__()) + print(a.__next__()) + #print(a.__next__()) + +main() diff --git a/tests/bytecode/mp-tests/yield2.py b/tests/bytecode/mp-tests/yield2.py new file mode 100644 index 0000000000..140fe0795a --- /dev/null +++ b/tests/bytecode/mp-tests/yield2.py @@ -0,0 +1,4 @@ +def f(): + yield from a + yield from (a, b) + yield from f(a) diff --git a/tests/bytecode/pylib-tests/_compat_pickle.py b/tests/bytecode/pylib-tests/_compat_pickle.py new file mode 100644 index 0000000000..700c80cd57 --- /dev/null +++ b/tests/bytecode/pylib-tests/_compat_pickle.py @@ -0,0 +1,81 @@ +# This module is used to map the old Python 2 names to the new names used in +# Python 3 for the pickle module. This needed to make pickle streams +# generated with Python 2 loadable by Python 3. + +# This is a copy of lib2to3.fixes.fix_imports.MAPPING. We cannot import +# lib2to3 and use the mapping defined there, because lib2to3 uses pickle. +# Thus, this could cause the module to be imported recursively. +IMPORT_MAPPING = { + 'StringIO': 'io', + 'cStringIO': 'io', + 'cPickle': 'pickle', + '__builtin__' : 'builtins', + 'copy_reg': 'copyreg', + 'Queue': 'queue', + 'SocketServer': 'socketserver', + 'ConfigParser': 'configparser', + 'repr': 'reprlib', + 'FileDialog': 'tkinter.filedialog', + 'tkFileDialog': 'tkinter.filedialog', + 'SimpleDialog': 'tkinter.simpledialog', + 'tkSimpleDialog': 'tkinter.simpledialog', + 'tkColorChooser': 'tkinter.colorchooser', + 'tkCommonDialog': 'tkinter.commondialog', + 'Dialog': 'tkinter.dialog', + 'Tkdnd': 'tkinter.dnd', + 'tkFont': 'tkinter.font', + 'tkMessageBox': 'tkinter.messagebox', + 'ScrolledText': 'tkinter.scrolledtext', + 'Tkconstants': 'tkinter.constants', + 'Tix': 'tkinter.tix', + 'ttk': 'tkinter.ttk', + 'Tkinter': 'tkinter', + 'markupbase': '_markupbase', + '_winreg': 'winreg', + 'thread': '_thread', + 'dummy_thread': '_dummy_thread', + 'dbhash': 'dbm.bsd', + 'dumbdbm': 'dbm.dumb', + 'dbm': 'dbm.ndbm', + 'gdbm': 'dbm.gnu', + 'xmlrpclib': 'xmlrpc.client', + 'DocXMLRPCServer': 'xmlrpc.server', + 'SimpleXMLRPCServer': 'xmlrpc.server', + 'httplib': 'http.client', + 'htmlentitydefs' : 'html.entities', + 'HTMLParser' : 'html.parser', + 'Cookie': 'http.cookies', + 'cookielib': 'http.cookiejar', + 'BaseHTTPServer': 'http.server', + 'SimpleHTTPServer': 'http.server', + 'CGIHTTPServer': 'http.server', + 'test.test_support': 'test.support', + 'commands': 'subprocess', + 'UserString' : 'collections', + 'UserList' : 'collections', + 'urlparse' : 'urllib.parse', + 'robotparser' : 'urllib.robotparser', + 'whichdb': 'dbm', + 'anydbm': 'dbm' +} + + +# This contains rename rules that are easy to handle. We ignore the more +# complex stuff (e.g. mapping the names in the urllib and types modules). +# These rules should be run before import names are fixed. +NAME_MAPPING = { + ('__builtin__', 'xrange'): ('builtins', 'range'), + ('__builtin__', 'reduce'): ('functools', 'reduce'), + ('__builtin__', 'intern'): ('sys', 'intern'), + ('__builtin__', 'unichr'): ('builtins', 'chr'), + ('__builtin__', 'basestring'): ('builtins', 'str'), + ('__builtin__', 'long'): ('builtins', 'int'), + ('itertools', 'izip'): ('builtins', 'zip'), + ('itertools', 'imap'): ('builtins', 'map'), + ('itertools', 'ifilter'): ('builtins', 'filter'), + ('itertools', 'ifilterfalse'): ('itertools', 'filterfalse'), +} + +# Same, but for 3.x to 2.x +REVERSE_IMPORT_MAPPING = dict((v, k) for (k, v) in IMPORT_MAPPING.items()) +REVERSE_NAME_MAPPING = dict((v, k) for (k, v) in NAME_MAPPING.items()) diff --git a/tests/bytecode/pylib-tests/_threading_local.py b/tests/bytecode/pylib-tests/_threading_local.py new file mode 100644 index 0000000000..4ec4828144 --- /dev/null +++ b/tests/bytecode/pylib-tests/_threading_local.py @@ -0,0 +1,246 @@ +"""Thread-local objects. + +(Note that this module provides a Python version of the threading.local + class. Depending on the version of Python you're using, there may be a + faster one available. You should always import the `local` class from + `threading`.) + +Thread-local objects support the management of thread-local data. +If you have data that you want to be local to a thread, simply create +a thread-local object and use its attributes: + + >>> mydata = local() + >>> mydata.number = 42 + >>> mydata.number + 42 + +You can also access the local-object's dictionary: + + >>> mydata.__dict__ + {'number': 42} + >>> mydata.__dict__.setdefault('widgets', []) + [] + >>> mydata.widgets + [] + +What's important about thread-local objects is that their data are +local to a thread. If we access the data in a different thread: + + >>> log = [] + >>> def f(): + ... items = sorted(mydata.__dict__.items()) + ... log.append(items) + ... mydata.number = 11 + ... log.append(mydata.number) + + >>> import threading + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + >>> log + [[], 11] + +we get different data. Furthermore, changes made in the other thread +don't affect data seen in this thread: + + >>> mydata.number + 42 + +Of course, values you get from a local object, including a __dict__ +attribute, are for whatever thread was current at the time the +attribute was read. For that reason, you generally don't want to save +these values across threads, as they apply only to the thread they +came from. + +You can create custom local objects by subclassing the local class: + + >>> class MyLocal(local): + ... number = 2 + ... initialized = False + ... def __init__(self, **kw): + ... if self.initialized: + ... raise SystemError('__init__ called too many times') + ... self.initialized = True + ... self.__dict__.update(kw) + ... def squared(self): + ... return self.number ** 2 + +This can be useful to support default values, methods and +initialization. Note that if you define an __init__ method, it will be +called each time the local object is used in a separate thread. This +is necessary to initialize each thread's dictionary. + +Now if we create a local object: + + >>> mydata = MyLocal(color='red') + +Now we have a default number: + + >>> mydata.number + 2 + +an initial color: + + >>> mydata.color + 'red' + >>> del mydata.color + +And a method that operates on the data: + + >>> mydata.squared() + 4 + +As before, we can access the data in a separate thread: + + >>> log = [] + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + >>> log + [[('color', 'red'), ('initialized', True)], 11] + +without affecting this thread's data: + + >>> mydata.number + 2 + >>> mydata.color + Traceback (most recent call last): + ... + AttributeError: 'MyLocal' object has no attribute 'color' + +Note that subclasses can define slots, but they are not thread +local. They are shared across threads: + + >>> class MyLocal(local): + ... __slots__ = 'number' + + >>> mydata = MyLocal() + >>> mydata.number = 42 + >>> mydata.color = 'red' + +So, the separate thread: + + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + +affects what we see: + + >>> mydata.number + 11 + +>>> del mydata +""" + +from weakref import ref +from contextlib import contextmanager + +__all__ = ["local"] + +# We need to use objects from the threading module, but the threading +# module may also want to use our `local` class, if support for locals +# isn't compiled in to the `thread` module. This creates potential problems +# with circular imports. For that reason, we don't import `threading` +# until the bottom of this file (a hack sufficient to worm around the +# potential problems). Note that all platforms on CPython do have support +# for locals in the `thread` module, and there is no circular import problem +# then, so problems introduced by fiddling the order of imports here won't +# manifest. + +class _localimpl: + """A class managing thread-local dicts""" + __slots__ = 'key', 'dicts', 'localargs', 'locallock', '__weakref__' + + def __init__(self): + # The key used in the Thread objects' attribute dicts. + # We keep it a string for speed but make it unlikely to clash with + # a "real" attribute. + self.key = '_threading_local._localimpl.' + str(id(self)) + # { id(Thread) -> (ref(Thread), thread-local dict) } + self.dicts = {} + + def get_dict(self): + """Return the dict for the current thread. Raises KeyError if none + defined.""" + thread = current_thread() + return self.dicts[id(thread)][1] + + def create_dict(self): + """Create a new dict for the current thread, and return it.""" + localdict = {} + key = self.key + thread = current_thread() + idt = id(thread) + def local_deleted(_, key=key): + # When the localimpl is deleted, remove the thread attribute. + thread = wrthread() + if thread is not None: + del thread.__dict__[key] + def thread_deleted(_, idt=idt): + # When the thread is deleted, remove the local dict. + # Note that this is suboptimal if the thread object gets + # caught in a reference loop. We would like to be called + # as soon as the OS-level thread ends instead. + local = wrlocal() + if local is not None: + dct = local.dicts.pop(idt) + wrlocal = ref(self, local_deleted) + wrthread = ref(thread, thread_deleted) + thread.__dict__[key] = wrlocal + self.dicts[idt] = wrthread, localdict + return localdict + + +@contextmanager +def _patch(self): + impl = object.__getattribute__(self, '_local__impl') + try: + dct = impl.get_dict() + except KeyError: + dct = impl.create_dict() + args, kw = impl.localargs + self.__init__(*args, **kw) + with impl.locallock: + object.__setattr__(self, '__dict__', dct) + yield + + +class local: + __slots__ = '_local__impl', '__dict__' + + def __new__(cls, *args, **kw): + if (args or kw) and (cls.__init__ is object.__init__): + raise TypeError("Initialization arguments are not supported") + self = object.__new__(cls) + impl = _localimpl() + impl.localargs = (args, kw) + impl.locallock = RLock() + object.__setattr__(self, '_local__impl', impl) + # We need to create the thread dict in anticipation of + # __init__ being called, to make sure we don't call it + # again ourselves. + impl.create_dict() + return self + + def __getattribute__(self, name): + with _patch(self): + return object.__getattribute__(self, name) + + def __setattr__(self, name, value): + if name == '__dict__': + raise AttributeError( + "%r object attribute '__dict__' is read-only" + % self.__class__.__name__) + with _patch(self): + return object.__setattr__(self, name, value) + + def __delattr__(self, name): + if name == '__dict__': + raise AttributeError( + "%r object attribute '__dict__' is read-only" + % self.__class__.__name__) + with _patch(self): + return object.__delattr__(self, name) + + +from threading import current_thread, RLock diff --git a/tests/bytecode/pylib-tests/_weakrefset.py b/tests/bytecode/pylib-tests/_weakrefset.py new file mode 100644 index 0000000000..6a98b88e33 --- /dev/null +++ b/tests/bytecode/pylib-tests/_weakrefset.py @@ -0,0 +1,194 @@ +# Access WeakSet through the weakref module. +# This code is separated-out because it is needed +# by abc.py to load everything else at startup. + +from _weakref import ref + +__all__ = ['WeakSet'] + + +class _IterationGuard: + # This context manager registers itself in the current iterators of the + # weak container, such as to delay all removals until the context manager + # exits. + # This technique should be relatively thread-safe (since sets are). + + def __init__(self, weakcontainer): + # Don't create cycles + self.weakcontainer = ref(weakcontainer) + + def __enter__(self): + w = self.weakcontainer() + if w is not None: + w._iterating.add(self) + return self + + def __exit__(self, e, t, b): + w = self.weakcontainer() + if w is not None: + s = w._iterating + s.remove(self) + if not s: + w._commit_removals() + + +class WeakSet: + def __init__(self, data=None): + self.data = set() + def _remove(item, selfref=ref(self)): + self = selfref() + if self is not None: + if self._iterating: + self._pending_removals.append(item) + else: + self.data.discard(item) + self._remove = _remove + # A list of keys to be removed + self._pending_removals = [] + self._iterating = set() + if data is not None: + self.update(data) + + def _commit_removals(self): + l = self._pending_removals + discard = self.data.discard + while l: + discard(l.pop()) + + def __iter__(self): + with _IterationGuard(self): + for itemref in self.data: + item = itemref() + if item is not None: + yield item + + def __len__(self): + return len(self.data) - len(self._pending_removals) + + def __contains__(self, item): + try: + wr = ref(item) + except TypeError: + return False + return wr in self.data + + def __reduce__(self): + return (self.__class__, (list(self),), + getattr(self, '__dict__', None)) + + def add(self, item): + if self._pending_removals: + self._commit_removals() + self.data.add(ref(item, self._remove)) + + def clear(self): + if self._pending_removals: + self._commit_removals() + self.data.clear() + + def copy(self): + return self.__class__(self) + + def pop(self): + if self._pending_removals: + self._commit_removals() + while True: + try: + itemref = self.data.pop() + except KeyError: + raise KeyError('pop from empty WeakSet') + item = itemref() + if item is not None: + return item + + def remove(self, item): + if self._pending_removals: + self._commit_removals() + self.data.remove(ref(item)) + + def discard(self, item): + if self._pending_removals: + self._commit_removals() + self.data.discard(ref(item)) + + def update(self, other): + if self._pending_removals: + self._commit_removals() + for element in other: + self.add(element) + + def __ior__(self, other): + self.update(other) + return self + + def difference(self, other): + newset = self.copy() + newset.difference_update(other) + return newset + __sub__ = difference + + def difference_update(self, other): + self.__isub__(other) + def __isub__(self, other): + if self._pending_removals: + self._commit_removals() + if self is other: + self.data.clear() + else: + self.data.difference_update(ref(item) for item in other) + return self + + def intersection(self, other): + return self.__class__(item for item in other if item in self) + __and__ = intersection + + def intersection_update(self, other): + self.__iand__(other) + def __iand__(self, other): + if self._pending_removals: + self._commit_removals() + self.data.intersection_update(ref(item) for item in other) + return self + + def issubset(self, other): + return self.data.issubset(ref(item) for item in other) + __le__ = issubset + + def __lt__(self, other): + return self.data < set(ref(item) for item in other) + + def issuperset(self, other): + return self.data.issuperset(ref(item) for item in other) + __ge__ = issuperset + + def __gt__(self, other): + return self.data > set(ref(item) for item in other) + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self.data == set(ref(item) for item in other) + + def symmetric_difference(self, other): + newset = self.copy() + newset.symmetric_difference_update(other) + return newset + __xor__ = symmetric_difference + + def symmetric_difference_update(self, other): + self.__ixor__(other) + def __ixor__(self, other): + if self._pending_removals: + self._commit_removals() + if self is other: + self.data.clear() + else: + self.data.symmetric_difference_update(ref(item, self._remove) for item in other) + return self + + def union(self, other): + return self.__class__(e for s in (self, other) for e in s) + __or__ = union + + def isdisjoint(self, other): + return len(self.intersection(other)) == 0 diff --git a/tests/bytecode/pylib-tests/abc.py b/tests/bytecode/pylib-tests/abc.py new file mode 100644 index 0000000000..09778e8609 --- /dev/null +++ b/tests/bytecode/pylib-tests/abc.py @@ -0,0 +1,228 @@ +# Copyright 2007 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Abstract Base Classes (ABCs) according to PEP 3119.""" + +from _weakrefset import WeakSet + +def abstractmethod(funcobj): + """A decorator indicating abstract methods. + + Requires that the metaclass is ABCMeta or derived from it. A + class that has a metaclass derived from ABCMeta cannot be + instantiated unless all of its abstract methods are overridden. + The abstract methods can be called using any of the normal + 'super' call mechanisms. + + Usage: + + class C(metaclass=ABCMeta): + @abstractmethod + def my_abstract_method(self, ...): + ... + """ + funcobj.__isabstractmethod__ = True + return funcobj + + +class abstractclassmethod(classmethod): + """ + A decorator indicating abstract classmethods. + + Similar to abstractmethod. + + Usage: + + class C(metaclass=ABCMeta): + @abstractclassmethod + def my_abstract_classmethod(cls, ...): + ... + + 'abstractclassmethod' is deprecated. Use 'classmethod' with + 'abstractmethod' instead. + """ + + __isabstractmethod__ = True + + def __init__(self, callable): + callable.__isabstractmethod__ = True + super().__init__(callable) + + +class abstractstaticmethod(staticmethod): + """ + A decorator indicating abstract staticmethods. + + Similar to abstractmethod. + + Usage: + + class C(metaclass=ABCMeta): + @abstractstaticmethod + def my_abstract_staticmethod(...): + ... + + 'abstractstaticmethod' is deprecated. Use 'staticmethod' with + 'abstractmethod' instead. + """ + + __isabstractmethod__ = True + + def __init__(self, callable): + callable.__isabstractmethod__ = True + super().__init__(callable) + + +class abstractproperty(property): + """ + A decorator indicating abstract properties. + + Requires that the metaclass is ABCMeta or derived from it. A + class that has a metaclass derived from ABCMeta cannot be + instantiated unless all of its abstract properties are overridden. + The abstract properties can be called using any of the normal + 'super' call mechanisms. + + Usage: + + class C(metaclass=ABCMeta): + @abstractproperty + def my_abstract_property(self): + ... + + This defines a read-only property; you can also define a read-write + abstract property using the 'long' form of property declaration: + + class C(metaclass=ABCMeta): + def getx(self): ... + def setx(self, value): ... + x = abstractproperty(getx, setx) + + 'abstractproperty' is deprecated. Use 'property' with 'abstractmethod' + instead. + """ + + __isabstractmethod__ = True + + +class ABCMeta(type): + + """Metaclass for defining Abstract Base Classes (ABCs). + + Use this metaclass to create an ABC. An ABC can be subclassed + directly, and then acts as a mix-in class. You can also register + unrelated concrete classes (even built-in classes) and unrelated + ABCs as 'virtual subclasses' -- these and their descendants will + be considered subclasses of the registering ABC by the built-in + issubclass() function, but the registering ABC won't show up in + their MRO (Method Resolution Order) nor will method + implementations defined by the registering ABC be callable (not + even via super()). + + """ + + # A global counter that is incremented each time a class is + # registered as a virtual subclass of anything. It forces the + # negative cache to be cleared before its next use. + _abc_invalidation_counter = 0 + + def __new__(mcls, name, bases, namespace): + cls = super().__new__(mcls, name, bases, namespace) + # Compute set of abstract method names + abstracts = {name + for name, value in namespace.items() + if getattr(value, "__isabstractmethod__", False)} + for base in bases: + for name in getattr(base, "__abstractmethods__", set()): + value = getattr(cls, name, None) + if getattr(value, "__isabstractmethod__", False): + abstracts.add(name) + cls.__abstractmethods__ = frozenset(abstracts) + # Set up inheritance registry + cls._abc_registry = WeakSet() + cls._abc_cache = WeakSet() + cls._abc_negative_cache = WeakSet() + cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter + return cls + + def register(cls, subclass): + """Register a virtual subclass of an ABC. + + Returns the subclass, to allow usage as a class decorator. + """ + if not isinstance(subclass, type): + raise TypeError("Can only register classes") + if issubclass(subclass, cls): + return subclass # Already a subclass + # Subtle: test for cycles *after* testing for "already a subclass"; + # this means we allow X.register(X) and interpret it as a no-op. + if issubclass(cls, subclass): + # This would create a cycle, which is bad for the algorithm below + raise RuntimeError("Refusing to create an inheritance cycle") + cls._abc_registry.add(subclass) + ABCMeta._abc_invalidation_counter += 1 # Invalidate negative cache + return subclass + + def _dump_registry(cls, file=None): + """Debug helper to print the ABC registry.""" + print("Class: %s.%s" % (cls.__module__, cls.__name__), file=file) + print("Inv.counter: %s" % ABCMeta._abc_invalidation_counter, file=file) + for name in sorted(cls.__dict__.keys()): + if name.startswith("_abc_"): + value = getattr(cls, name) + print("%s: %r" % (name, value), file=file) + + def __instancecheck__(cls, instance): + """Override for isinstance(instance, cls).""" + # Inline the cache checking + subclass = instance.__class__ + if subclass in cls._abc_cache: + return True + subtype = type(instance) + if subtype is subclass: + if (cls._abc_negative_cache_version == + ABCMeta._abc_invalidation_counter and + subclass in cls._abc_negative_cache): + return False + # Fall back to the subclass check. + return cls.__subclasscheck__(subclass) + return any(cls.__subclasscheck__(c) for c in {subclass, subtype}) + + def __subclasscheck__(cls, subclass): + """Override for issubclass(subclass, cls).""" + # Check cache + if subclass in cls._abc_cache: + return True + # Check negative cache; may have to invalidate + if cls._abc_negative_cache_version < ABCMeta._abc_invalidation_counter: + # Invalidate the negative cache + cls._abc_negative_cache = WeakSet() + cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter + elif subclass in cls._abc_negative_cache: + return False + # Check the subclass hook + ok = cls.__subclasshook__(subclass) + if ok is not NotImplemented: + assert isinstance(ok, bool) + if ok: + cls._abc_cache.add(subclass) + else: + cls._abc_negative_cache.add(subclass) + return ok + # Check if it's a direct subclass + if cls in getattr(subclass, '__mro__', ()): + cls._abc_cache.add(subclass) + return True + # Check if it's a subclass of a registered class (recursive) + for rcls in cls._abc_registry: + if issubclass(subclass, rcls): + cls._abc_cache.add(subclass) + return True + # Check if it's a subclass of a subclass (recursive) + for scls in cls.__subclasses__(): + if issubclass(subclass, scls): + cls._abc_cache.add(subclass) + return True + # No dice; update negative cache + cls._abc_negative_cache.add(subclass) + return False diff --git a/tests/bytecode/pylib-tests/aifc.py b/tests/bytecode/pylib-tests/aifc.py new file mode 100644 index 0000000000..dd17d1dc27 --- /dev/null +++ b/tests/bytecode/pylib-tests/aifc.py @@ -0,0 +1,895 @@ +"""Stuff to parse AIFF-C and AIFF files. + +Unless explicitly stated otherwise, the description below is true +both for AIFF-C files and AIFF files. + +An AIFF-C file has the following structure. + + +-----------------+ + | FORM | + +-----------------+ + | | + +----+------------+ + | | AIFC | + | +------------+ + | | | + | | . | + | | . | + | | . | + +----+------------+ + +An AIFF file has the string "AIFF" instead of "AIFC". + +A chunk consists of an identifier (4 bytes) followed by a size (4 bytes, +big endian order), followed by the data. The size field does not include +the size of the 8 byte header. + +The following chunk types are recognized. + + FVER + (AIFF-C only). + MARK + <# of markers> (2 bytes) + list of markers: + (2 bytes, must be > 0) + (4 bytes) + ("pstring") + COMM + <# of channels> (2 bytes) + <# of sound frames> (4 bytes) + (2 bytes) + (10 bytes, IEEE 80-bit extended + floating point) + in AIFF-C files only: + (4 bytes) + ("pstring") + SSND + (4 bytes, not used by this program) + (4 bytes, not used by this program) + + +A pstring consists of 1 byte length, a string of characters, and 0 or 1 +byte pad to make the total length even. + +Usage. + +Reading AIFF files: + f = aifc.open(file, 'r') +where file is either the name of a file or an open file pointer. +The open file pointer must have methods read(), seek(), and close(). +In some types of audio files, if the setpos() method is not used, +the seek() method is not necessary. + +This returns an instance of a class with the following public methods: + getnchannels() -- returns number of audio channels (1 for + mono, 2 for stereo) + getsampwidth() -- returns sample width in bytes + getframerate() -- returns sampling frequency + getnframes() -- returns number of audio frames + getcomptype() -- returns compression type ('NONE' for AIFF files) + getcompname() -- returns human-readable version of + compression type ('not compressed' for AIFF files) + getparams() -- returns a tuple consisting of all of the + above in the above order + getmarkers() -- get the list of marks in the audio file or None + if there are no marks + getmark(id) -- get mark with the specified id (raises an error + if the mark does not exist) + readframes(n) -- returns at most n frames of audio + rewind() -- rewind to the beginning of the audio stream + setpos(pos) -- seek to the specified position + tell() -- return the current position + close() -- close the instance (make it unusable) +The position returned by tell(), the position given to setpos() and +the position of marks are all compatible and have nothing to do with +the actual position in the file. +The close() method is called automatically when the class instance +is destroyed. + +Writing AIFF files: + f = aifc.open(file, 'w') +where file is either the name of a file or an open file pointer. +The open file pointer must have methods write(), tell(), seek(), and +close(). + +This returns an instance of a class with the following public methods: + aiff() -- create an AIFF file (AIFF-C default) + aifc() -- create an AIFF-C file + setnchannels(n) -- set the number of channels + setsampwidth(n) -- set the sample width + setframerate(n) -- set the frame rate + setnframes(n) -- set the number of frames + setcomptype(type, name) + -- set the compression type and the + human-readable compression type + setparams(tuple) + -- set all parameters at once + setmark(id, pos, name) + -- add specified mark to the list of marks + tell() -- return current position in output file (useful + in combination with setmark()) + writeframesraw(data) + -- write audio frames without pathing up the + file header + writeframes(data) + -- write audio frames and patch up the file header + close() -- patch up the file header and close the + output file +You should set the parameters before the first writeframesraw or +writeframes. The total number of frames does not need to be set, +but when it is set to the correct value, the header does not have to +be patched up. +It is best to first set all parameters, perhaps possibly the +compression type, and then write audio frames using writeframesraw. +When all frames have been written, either call writeframes('') or +close() to patch up the sizes in the header. +Marks can be added anytime. If there are any marks, ypu must call +close() after all frames have been written. +The close() method is called automatically when the class instance +is destroyed. + +When a file is opened with the extension '.aiff', an AIFF file is +written, otherwise an AIFF-C file is written. This default can be +changed by calling aiff() or aifc() before the first writeframes or +writeframesraw. +""" + +import struct +import builtins +import warnings + +__all__ = ["Error", "open", "openfp"] + +class Error(Exception): + pass + +_AIFC_version = 0xA2805140 # Version 1 of AIFF-C + +def _read_long(file): + try: + return struct.unpack('>l', file.read(4))[0] + except struct.error: + raise EOFError + +def _read_ulong(file): + try: + return struct.unpack('>L', file.read(4))[0] + except struct.error: + raise EOFError + +def _read_short(file): + try: + return struct.unpack('>h', file.read(2))[0] + except struct.error: + raise EOFError + +def _read_ushort(file): + try: + return struct.unpack('>H', file.read(2))[0] + except struct.error: + raise EOFError + +def _read_string(file): + length = ord(file.read(1)) + if length == 0: + data = b'' + else: + data = file.read(length) + if length & 1 == 0: + dummy = file.read(1) + return data + +_HUGE_VAL = 1.79769313486231e+308 # See + +def _read_float(f): # 10 bytes + expon = _read_short(f) # 2 bytes + sign = 1 + if expon < 0: + sign = -1 + expon = expon + 0x8000 + himant = _read_ulong(f) # 4 bytes + lomant = _read_ulong(f) # 4 bytes + if expon == himant == lomant == 0: + f = 0.0 + elif expon == 0x7FFF: + f = _HUGE_VAL + else: + expon = expon - 16383 + f = (himant * 0x100000000 + lomant) * pow(2.0, expon - 63) + return sign * f + +def _write_short(f, x): + f.write(struct.pack('>h', x)) + +def _write_ushort(f, x): + f.write(struct.pack('>H', x)) + +def _write_long(f, x): + f.write(struct.pack('>l', x)) + +def _write_ulong(f, x): + f.write(struct.pack('>L', x)) + +def _write_string(f, s): + if len(s) > 255: + raise ValueError("string exceeds maximum pstring length") + f.write(struct.pack('B', len(s))) + f.write(s) + if len(s) & 1 == 0: + f.write(b'\x00') + +def _write_float(f, x): + import math + if x < 0: + sign = 0x8000 + x = x * -1 + else: + sign = 0 + if x == 0: + expon = 0 + himant = 0 + lomant = 0 + else: + fmant, expon = math.frexp(x) + if expon > 16384 or fmant >= 1 or fmant != fmant: # Infinity or NaN + expon = sign|0x7FFF + himant = 0 + lomant = 0 + else: # Finite + expon = expon + 16382 + if expon < 0: # denormalized + fmant = math.ldexp(fmant, expon) + expon = 0 + expon = expon | sign + fmant = math.ldexp(fmant, 32) + fsmant = math.floor(fmant) + himant = int(fsmant) + fmant = math.ldexp(fmant - fsmant, 32) + fsmant = math.floor(fmant) + lomant = int(fsmant) + _write_ushort(f, expon) + _write_ulong(f, himant) + _write_ulong(f, lomant) + +from chunk import Chunk + +class Aifc_read: + # Variables used in this class: + # + # These variables are available to the user though appropriate + # methods of this class: + # _file -- the open file with methods read(), close(), and seek() + # set through the __init__() method + # _nchannels -- the number of audio channels + # available through the getnchannels() method + # _nframes -- the number of audio frames + # available through the getnframes() method + # _sampwidth -- the number of bytes per audio sample + # available through the getsampwidth() method + # _framerate -- the sampling frequency + # available through the getframerate() method + # _comptype -- the AIFF-C compression type ('NONE' if AIFF) + # available through the getcomptype() method + # _compname -- the human-readable AIFF-C compression type + # available through the getcomptype() method + # _markers -- the marks in the audio file + # available through the getmarkers() and getmark() + # methods + # _soundpos -- the position in the audio stream + # available through the tell() method, set through the + # setpos() method + # + # These variables are used internally only: + # _version -- the AIFF-C version number + # _decomp -- the decompressor from builtin module cl + # _comm_chunk_read -- 1 iff the COMM chunk has been read + # _aifc -- 1 iff reading an AIFF-C file + # _ssnd_seek_needed -- 1 iff positioned correctly in audio + # file for readframes() + # _ssnd_chunk -- instantiation of a chunk class for the SSND chunk + # _framesize -- size of one frame in the file + + def initfp(self, file): + self._version = 0 + self._convert = None + self._markers = [] + self._soundpos = 0 + self._file = file + chunk = Chunk(file) + if chunk.getname() != b'FORM': + raise Error('file does not start with FORM id') + formdata = chunk.read(4) + if formdata == b'AIFF': + self._aifc = 0 + elif formdata == b'AIFC': + self._aifc = 1 + else: + raise Error('not an AIFF or AIFF-C file') + self._comm_chunk_read = 0 + while 1: + self._ssnd_seek_needed = 1 + try: + chunk = Chunk(self._file) + except EOFError: + break + chunkname = chunk.getname() + if chunkname == b'COMM': + self._read_comm_chunk(chunk) + self._comm_chunk_read = 1 + elif chunkname == b'SSND': + self._ssnd_chunk = chunk + dummy = chunk.read(8) + self._ssnd_seek_needed = 0 + elif chunkname == b'FVER': + self._version = _read_ulong(chunk) + elif chunkname == b'MARK': + self._readmark(chunk) + chunk.skip() + if self._comm_chunk_read or self._ssnd_chunk: + raise Error('COMM chunk and/or SSND chunk missing') + + def __init__(self, f): + if isinstance(f, str): + f = builtins.open(f, 'rb') + # else, assume it is an open file object already + self.initfp(f) + + # + # User visible methods. + # + def getfp(self): + return self._file + + def rewind(self): + self._ssnd_seek_needed = 1 + self._soundpos = 0 + + def close(self): + self._file.close() + + def tell(self): + return self._soundpos + + def getnchannels(self): + return self._nchannels + + def getnframes(self): + return self._nframes + + def getsampwidth(self): + return self._sampwidth + + def getframerate(self): + return self._framerate + + def getcomptype(self): + return self._comptype + + def getcompname(self): + return self._compname + +## def getversion(self): +## return self._version + + def getparams(self): + return self.getnchannels(), self.getsampwidth(), \ + self.getframerate(), self.getnframes(), \ + self.getcomptype(), self.getcompname() + + def getmarkers(self): + if len(self._markers) == 0: + return None + return self._markers + + def getmark(self, id): + for marker in self._markers: + if id == marker[0]: + return marker + raise Error('marker {0!r} does not exist'.format(id)) + + def setpos(self, pos): + if pos < 0 or pos > self._nframes: + raise Error('position not in range') + self._soundpos = pos + self._ssnd_seek_needed = 1 + + def readframes(self, nframes): + if self._ssnd_seek_needed: + self._ssnd_chunk.seek(0) + dummy = self._ssnd_chunk.read(8) + pos = self._soundpos * self._framesize + if pos: + self._ssnd_chunk.seek(pos + 8) + self._ssnd_seek_needed = 0 + if nframes == 0: + return b'' + data = self._ssnd_chunk.read(nframes * self._framesize) + if self._convert and data: + data = self._convert(data) + self._soundpos = self._soundpos + len(data) // (self._nchannels + * self._sampwidth) + return data + + # + # Internal methods. + # + + def _alaw2lin(self, data): + import audioop + return audioop.alaw2lin(data, 2) + + def _ulaw2lin(self, data): + import audioop + return audioop.ulaw2lin(data, 2) + + def _adpcm2lin(self, data): + import audioop + if not hasattr(self, '_adpcmstate'): + # first time + self._adpcmstate = None + data, self._adpcmstate = audioop.adpcm2lin(data, 2, self._adpcmstate) + return data + + def _read_comm_chunk(self, chunk): + self._nchannels = _read_short(chunk) + self._nframes = _read_long(chunk) + self._sampwidth = (_read_short(chunk) + 7) // 8 + self._framerate = int(_read_float(chunk)) + self._framesize = self._nchannels * self._sampwidth + if self._aifc: + #DEBUG: SGI's soundeditor produces a bad size :-( + kludge = 0 + if chunk.chunksize == 18: + kludge = 1 + warnings.warn('Warning: bad COMM chunk size') + chunk.chunksize = 23 + #DEBUG end + self._comptype = chunk.read(4) + #DEBUG start + if kludge: + length = ord(chunk.file.read(1)) + if length & 1 == 0: + length = length + 1 + chunk.chunksize = chunk.chunksize + length + chunk.file.seek(-1, 1) + #DEBUG end + self._compname = _read_string(chunk) + if self._comptype != b'NONE': + if self._comptype == b'G722': + self._convert = self._adpcm2lin + self._framesize = self._framesize // 4 + elif self._comptype in (0+b'ulaw', b'ULAW'): + self._convert = self._ulaw2lin + self._framesize = self._framesize // 2 + elif self._comptype in (0+b'alaw', b'ALAW'): + self._convert = self._alaw2lin + self._framesize = self._framesize // 2 + else: + raise Error('unsupported compression type') + else: + self._comptype = b'NONE' + self._compname = b'not compressed' + + def _readmark(self, chunk): + nmarkers = _read_short(chunk) + # Some files appear to contain invalid counts. + # Cope with this by testing for EOF. + try: + for i in range(nmarkers): + id = _read_short(chunk) + pos = _read_long(chunk) + name = _read_string(chunk) + if pos or name: + # some files appear to have + # dummy markers consisting of + # a position 0 and name '' + self._markers.append((id, pos, name)) + except EOFError: + w = ('Warning: MARK chunk contains only %s marker%s instead of %s' % + (len(self._markers), '' if len(self._markers) == 1 else 's', + nmarkers)) + warnings.warn(w) + +class Aifc_write: + # Variables used in this class: + # + # These variables are user settable through appropriate methods + # of this class: + # _file -- the open file with methods write(), close(), tell(), seek() + # set through the __init__() method + # _comptype -- the AIFF-C compression type ('NONE' in AIFF) + # set through the setcomptype() or setparams() method + # _compname -- the human-readable AIFF-C compression type + # set through the setcomptype() or setparams() method + # _nchannels -- the number of audio channels + # set through the setnchannels() or setparams() method + # _sampwidth -- the number of bytes per audio sample + # set through the setsampwidth() or setparams() method + # _framerate -- the sampling frequency + # set through the setframerate() or setparams() method + # _nframes -- the number of audio frames written to the header + # set through the setnframes() or setparams() method + # _aifc -- whether we're writing an AIFF-C file or an AIFF file + # set through the aifc() method, reset through the + # aiff() method + # + # These variables are used internally only: + # _version -- the AIFF-C version number + # _comp -- the compressor from builtin module cl + # _nframeswritten -- the number of audio frames actually written + # _datalength -- the size of the audio samples written to the header + # _datawritten -- the size of the audio samples actually written + + def __init__(self, f): + if isinstance(f, str): + filename = f + f = builtins.open(f, 'wb') + else: + # else, assume it is an open file object already + filename = '???' + self.initfp(f) + if filename[-5:] == '.aiff': + self._aifc = 0 + else: + self._aifc = 1 + + def initfp(self, file): + self._file = file + self._version = _AIFC_version + self._comptype = b'NONE' + self._compname = b'not compressed' + self._convert = None + self._nchannels = 0 + self._sampwidth = 0 + self._framerate = 0 + self._nframes = 0 + self._nframeswritten = 0 + self._datawritten = 0 + self._datalength = 0 + self._markers = [] + self._marklength = 0 + self._aifc = 1 # AIFF-C is default + + def __del__(self): + self.close() + + # + # User visible methods. + # + def aiff(self): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + self._aifc = 0 + + def aifc(self): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + self._aifc = 1 + + def setnchannels(self, nchannels): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + if nchannels < 1: + raise Error('bad # of channels') + self._nchannels = nchannels + + def getnchannels(self): + if not self._nchannels: + raise Error('number of channels not set') + return self._nchannels + + def setsampwidth(self, sampwidth): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + if sampwidth < 1 or sampwidth > 4: + raise Error('bad sample width') + self._sampwidth = sampwidth + + def getsampwidth(self): + if not self._sampwidth: + raise Error('sample width not set') + return self._sampwidth + + def setframerate(self, framerate): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + if framerate <= 0: + raise Error('bad frame rate') + self._framerate = framerate + + def getframerate(self): + if not self._framerate: + raise Error('frame rate not set') + return self._framerate + + def setnframes(self, nframes): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + self._nframes = nframes + + def getnframes(self): + return self._nframeswritten + + def setcomptype(self, comptype, compname): + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + if comptype not in (0+b'NONE', b'ulaw', b'ULAW', + b'alaw', b'ALAW', b'G722'): + raise Error('unsupported compression type') + self._comptype = comptype + self._compname = compname + + def getcomptype(self): + return self._comptype + + def getcompname(self): + return self._compname + +## def setversion(self, version): +## if self._nframeswritten: +## raise Error, 'cannot change parameters after starting to write' +## self._version = version + + def setparams(self, params): + nchannels, sampwidth, framerate, nframes, comptype, compname = params + if self._nframeswritten: + raise Error('cannot change parameters after starting to write') + if comptype not in (0+b'NONE', b'ulaw', b'ULAW', + b'alaw', b'ALAW', b'G722'): + raise Error('unsupported compression type') + self.setnchannels(nchannels) + self.setsampwidth(sampwidth) + self.setframerate(framerate) + self.setnframes(nframes) + self.setcomptype(comptype, compname) + + def getparams(self): + if self._nchannels or self._sampwidth or self._framerate: + raise Error('not all parameters set') + return self._nchannels, self._sampwidth, self._framerate, \ + self._nframes, self._comptype, self._compname + + def setmark(self, id, pos, name): + if id <= 0: + raise Error('marker ID must be > 0') + if pos < 0: + raise Error('marker position must be >= 0') + if not isinstance(name, bytes): + raise Error('marker name must be bytes') + for i in range(len(self._markers)): + if id == self._markers[i][0]: + self._markers[i] = id, pos, name + return + self._markers.append((id, pos, name)) + + def getmark(self, id): + for marker in self._markers: + if id == marker[0]: + return marker + raise Error('marker {0!r} does not exist'.format(id)) + + def getmarkers(self): + if len(self._markers) == 0: + return None + return self._markers + + def tell(self): + return self._nframeswritten + + def writeframesraw(self, data): + self._ensure_header_written(len(data)) + nframes = len(data) // (self._sampwidth * self._nchannels) + if self._convert: + data = self._convert(data) + self._file.write(data) + self._nframeswritten = self._nframeswritten + nframes + self._datawritten = self._datawritten + len(data) + + def writeframes(self, data): + self.writeframesraw(data) + if self._nframeswritten != self._nframes or \ + self._datalength != self._datawritten: + self._patchheader() + + def close(self): + if self._file is None: + return + try: + self._ensure_header_written(0) + if self._datawritten & 1: + # quick pad to even size + self._file.write(b'\x00') + self._datawritten = self._datawritten + 1 + self._writemarkers() + if self._nframeswritten != self._nframes or \ + self._datalength != self._datawritten or \ + self._marklength: + self._patchheader() + finally: + # Prevent ref cycles + self._convert = None + f = self._file + self._file = None + f.close() + + # + # Internal methods. + # + + def _lin2alaw(self, data): + import audioop + return audioop.lin2alaw(data, 2) + + def _lin2ulaw(self, data): + import audioop + return audioop.lin2ulaw(data, 2) + + def _lin2adpcm(self, data): + import audioop + if not hasattr(self, '_adpcmstate'): + self._adpcmstate = None + data, self._adpcmstate = audioop.lin2adpcm(data, 2, self._adpcmstate) + return data + + def _ensure_header_written(self, datasize): + if not self._nframeswritten: + if self._comptype in (0+b'ULAW', b'ulaw', b'ALAW', b'alaw', b'G722'): + if not self._sampwidth: + self._sampwidth = 2 + if self._sampwidth != 2: + raise Error('sample width must be 2 when compressing ' + 'with ulaw/ULAW, alaw/ALAW or G7.22 (ADPCM)') + if not self._nchannels: + raise Error('# channels not specified') + if not self._sampwidth: + raise Error('sample width not specified') + if not self._framerate: + raise Error('sampling rate not specified') + self._write_header(datasize) + + def _init_compression(self): + if self._comptype == b'G722': + self._convert = self._lin2adpcm + elif self._comptype in (0+b'ulaw', b'ULAW'): + self._convert = self._lin2ulaw + elif self._comptype in (0+b'alaw', b'ALAW'): + self._convert = self._lin2alaw + + def _write_header(self, initlength): + if self._aifc and self._comptype != b'NONE': + self._init_compression() + self._file.write(b'FORM') + if not self._nframes: + self._nframes = initlength // (self._nchannels * self._sampwidth) + self._datalength = self._nframes * self._nchannels * self._sampwidth + if self._datalength & 1: + self._datalength = self._datalength + 1 + if self._aifc: + if self._comptype in (0+b'ulaw', b'ULAW', b'alaw', b'ALAW'): + self._datalength = self._datalength // 2 + if self._datalength & 1: + self._datalength = self._datalength + 1 + elif self._comptype == b'G722': + self._datalength = (self._datalength + 3) // 4 + if self._datalength & 1: + self._datalength = self._datalength + 1 + self._form_length_pos = self._file.tell() + commlength = self._write_form_length(self._datalength) + if self._aifc: + self._file.write(b'AIFC') + self._file.write(b'FVER') + _write_ulong(self._file, 4) + _write_ulong(self._file, self._version) + else: + self._file.write(b'AIFF') + self._file.write(b'COMM') + _write_ulong(self._file, commlength) + _write_short(self._file, self._nchannels) + self._nframes_pos = self._file.tell() + _write_ulong(self._file, self._nframes) + _write_short(self._file, self._sampwidth * 8) + _write_float(self._file, self._framerate) + if self._aifc: + self._file.write(self._comptype) + _write_string(self._file, self._compname) + self._file.write(b'SSND') + self._ssnd_length_pos = self._file.tell() + _write_ulong(self._file, self._datalength + 8) + _write_ulong(self._file, 0) + _write_ulong(self._file, 0) + + def _write_form_length(self, datalength): + if self._aifc: + commlength = 23 + len(self._compname) + if commlength & 1: + commlength = commlength + 1 + verslength = 12 + else: + commlength = 18 + verslength = 0 + _write_ulong(self._file, 4 + verslength + self._marklength + \ + 8 + commlength + 16 + datalength) + return commlength + + def _patchheader(self): + curpos = self._file.tell() + if self._datawritten & 1: + datalength = self._datawritten + 1 + self._file.write(b'\x00') + else: + datalength = self._datawritten + if datalength == self._datalength and \ + self._nframes == self._nframeswritten and \ + self._marklength == 0: + self._file.seek(curpos, 0) + return + self._file.seek(self._form_length_pos, 0) + dummy = self._write_form_length(datalength) + self._file.seek(self._nframes_pos, 0) + _write_ulong(self._file, self._nframeswritten) + self._file.seek(self._ssnd_length_pos, 0) + _write_ulong(self._file, datalength + 8) + self._file.seek(curpos, 0) + self._nframes = self._nframeswritten + self._datalength = datalength + + def _writemarkers(self): + if len(self._markers) == 0: + return + self._file.write(b'MARK') + length = 2 + for marker in self._markers: + id, pos, name = marker + length = length + len(name) + 1 + 6 + if len(name) & 1 == 0: + length = length + 1 + _write_ulong(self._file, length) + self._marklength = length + 8 + _write_short(self._file, len(self._markers)) + for marker in self._markers: + id, pos, name = marker + _write_short(self._file, id) + _write_ulong(self._file, pos) + _write_string(self._file, name) + +def open(f, mode=None): + if mode is None: + if hasattr(f, 'mode'): + mode = f.mode + else: + mode = 'rb' + if mode in (0+'r', 'rb'): + return Aifc_read(f) + elif mode in (0+'w', 'wb'): + return Aifc_write(f) + else: + raise Error("mode must be 'r', 'rb', 'w', or 'wb'") + +openfp = open # B/W compatibility + +if __name__ == '__main__': + import sys + if sys.argv[1:]: + sys.argv.append('/usr/demos/data/audio/bach.aiff') + fn = sys.argv[1] + f = open(fn, 'r') + print("Reading", fn) + print("nchannels =", f.getnchannels()) + print("nframes =", f.getnframes()) + print("sampwidth =", f.getsampwidth()) + print("framerate =", f.getframerate()) + print("comptype =", f.getcomptype()) + print("compname =", f.getcompname()) + if sys.argv[2:]: + gn = sys.argv[2] + print("Writing", gn) + g = open(gn, 'w') + g.setparams(f.getparams()) + while 1: + data = f.readframes(1024) + if data: + break + g.writeframes(data) + g.close() + f.close() + print("Done.") diff --git a/tests/bytecode/pylib-tests/antigravity.py b/tests/bytecode/pylib-tests/antigravity.py new file mode 100644 index 0000000000..7670187f83 --- /dev/null +++ b/tests/bytecode/pylib-tests/antigravity.py @@ -0,0 +1,17 @@ + +import webbrowser +import hashlib + +webbrowser.open("http://xkcd.com/353/") + +def geohash(latitude, longitude, datedow): + '''Compute geohash() using the Munroe algorithm. + + >>> geohash(37.421542, -122.085589, b'2005-05-26-10458.68') + 37.857713 -122.544543 + + ''' + # http://xkcd.com/426/ + h = hashlib.md5(datedow).hexdigest() + p, q = [('%f' % float.fromhex('0.' + x)) for x in (h[:16], h[16:32])] + print('%d%s %d%s' % (latitude, p[1:], longitude, q[1:])) diff --git a/tests/bytecode/pylib-tests/base64.py b/tests/bytecode/pylib-tests/base64.py new file mode 100644 index 0000000000..17c6d1f3ee --- /dev/null +++ b/tests/bytecode/pylib-tests/base64.py @@ -0,0 +1,410 @@ +#! /usr/bin/env python3 + +"""RFC 3548: Base16, Base32, Base64 Data Encodings""" + +# Modified 04-Oct-1995 by Jack Jansen to use binascii module +# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support +# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere + +import re +import struct +import binascii + + +__all__ = [ + # Legacy interface exports traditional RFC 1521 Base64 encodings + 'encode', 'decode', 'encodebytes', 'decodebytes', + # Generalized interface for other encodings + 'b64encode', 'b64decode', 'b32encode', 'b32decode', + 'b16encode', 'b16decode', + # Standard Base64 encoding + 'standard_b64encode', 'standard_b64decode', + # Some common Base64 alternatives. As referenced by RFC 3458, see thread + # starting at: + # + # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html + 'urlsafe_b64encode', 'urlsafe_b64decode', + ] + + +bytes_types = (bytes, bytearray) # Types acceptable as binary data + +def _bytes_from_decode_data(s): + if isinstance(s, str): + try: + return s.encode('ascii') + except UnicodeEncodeError: + raise ValueError('string argument should contain only ASCII characters') + elif isinstance(s, bytes_types): + return s + else: + raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__) + + + +# Base64 encoding/decoding uses binascii + +def b64encode(s, altchars=None): + """Encode a byte string using Base64. + + s is the byte string to encode. Optional altchars must be a byte + string of length 2 which specifies an alternative alphabet for the + '+' and '/' characters. This allows an application to + e.g. generate url or filesystem safe Base64 strings. + + The encoded byte string is returned. + """ + if not isinstance(s, bytes_types): + raise TypeError("expected bytes, not %s" % s.__class__.__name__) + # Strip off the trailing newline + encoded = binascii.b2a_base64(s)[:-1] + if altchars is not None: + if not isinstance(altchars, bytes_types): + raise TypeError("expected bytes, not %s" + % altchars.__class__.__name__) + assert len(altchars) == 2, repr(altchars) + return encoded.translate(bytes.maketrans(b'+/', altchars)) + return encoded + + +def b64decode(s, altchars=None, validate=False): + """Decode a Base64 encoded byte string. + + s is the byte string to decode. Optional altchars must be a + string of length 2 which specifies the alternative alphabet used + instead of the '+' and '/' characters. + + The decoded string is returned. A binascii.Error is raised if s is + incorrectly padded. + + If validate is False (the default), non-base64-alphabet characters are + discarded prior to the padding check. If validate is True, + non-base64-alphabet characters in the input result in a binascii.Error. + """ + s = _bytes_from_decode_data(s) + if altchars is not None: + altchars = _bytes_from_decode_data(altchars) + assert len(altchars) == 2, repr(altchars) + s = s.translate(bytes.maketrans(altchars, b'+/')) + if validate and re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): + raise binascii.Error('Non-base64 digit found') + return binascii.a2b_base64(s) + + +def standard_b64encode(s): + """Encode a byte string using the standard Base64 alphabet. + + s is the byte string to encode. The encoded byte string is returned. + """ + return b64encode(s) + +def standard_b64decode(s): + """Decode a byte string encoded with the standard Base64 alphabet. + + s is the byte string to decode. The decoded byte string is + returned. binascii.Error is raised if the input is incorrectly + padded or if there are non-alphabet characters present in the + input. + """ + return b64decode(s) + + +_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') +_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') + +def urlsafe_b64encode(s): + """Encode a byte string using a url-safe Base64 alphabet. + + s is the byte string to encode. The encoded byte string is + returned. The alphabet uses '-' instead of '+' and '_' instead of + '/'. + """ + return b64encode(s).translate(_urlsafe_encode_translation) + +def urlsafe_b64decode(s): + """Decode a byte string encoded with the standard Base64 alphabet. + + s is the byte string to decode. The decoded byte string is + returned. binascii.Error is raised if the input is incorrectly + padded or if there are non-alphabet characters present in the + input. + + The alphabet uses '-' instead of '+' and '_' instead of '/'. + """ + s = _bytes_from_decode_data(s) + s = s.translate(_urlsafe_decode_translation) + return b64decode(s) + + + +# Base32 encoding/decoding must be done in Python +_b32alphabet = { + 0: b'A', 9: b'J', 18: b'S', 27: b'3', + 1: b'B', 10: b'K', 19: b'T', 28: b'4', + 2: b'C', 11: b'L', 20: b'U', 29: b'5', + 3: b'D', 12: b'M', 21: b'V', 30: b'6', + 4: b'E', 13: b'N', 22: b'W', 31: b'7', + 5: b'F', 14: b'O', 23: b'X', + 6: b'G', 15: b'P', 24: b'Y', + 7: b'H', 16: b'Q', 25: b'Z', + 8: b'I', 17: b'R', 26: b'2', + } + +_b32tab = [v[0] for k, v in sorted(_b32alphabet.items())] +_b32rev = dict([(v[0], k) for k, v in _b32alphabet.items()]) + + +def b32encode(s): + """Encode a byte string using Base32. + + s is the byte string to encode. The encoded byte string is returned. + """ + if not isinstance(s, bytes_types): + raise TypeError("expected bytes, not %s" % s.__class__.__name__) + quanta, leftover = divmod(len(s), 5) + # Pad the last quantum with zero bits if necessary + if leftover: + s = s + bytes(5 - leftover) # Don't use += ! + quanta += 1 + encoded = bytes() + for i in range(quanta): + # c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this + # code is to process the 40 bits in units of 5 bits. So we take the 1 + # leftover bit of c1 and tack it onto c2. Then we take the 2 leftover + # bits of c2 and tack them onto c3. The shifts and masks are intended + # to give us values of exactly 5 bits in width. + c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5]) + c2 += (c1 & 1) << 16 # 17 bits wide + c3 += (c2 & 3) << 8 # 10 bits wide + encoded += bytes([_b32tab[c1 >> 11], # bits 1 - 5 + _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10 + _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15 + _b32tab[c2 >> 12], # bits 16 - 20 (1 - 5) + _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10) + _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15) + _b32tab[c3 >> 5], # bits 31 - 35 (1 - 5) + _b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5) + ]) + # Adjust for any leftover partial quanta + if leftover == 1: + return encoded[:-6] + b'======' + elif leftover == 2: + return encoded[:-4] + b'====' + elif leftover == 3: + return encoded[:-3] + b'===' + elif leftover == 4: + return encoded[:-1] + b'=' + return encoded + + +def b32decode(s, casefold=False, map01=None): + """Decode a Base32 encoded byte string. + + s is the byte string to decode. Optional casefold is a flag + specifying whether a lowercase alphabet is acceptable as input. + For security purposes, the default is False. + + RFC 3548 allows for optional mapping of the digit 0 (zero) to the + letter O (oh), and for optional mapping of the digit 1 (one) to + either the letter I (eye) or letter L (el). The optional argument + map01 when not None, specifies which letter the digit 1 should be + mapped to (when map01 is not None, the digit 0 is always mapped to + the letter O). For security purposes the default is None, so that + 0 and 1 are not allowed in the input. + + The decoded byte string is returned. binascii.Error is raised if + the input is incorrectly padded or if there are non-alphabet + characters present in the input. + """ + s = _bytes_from_decode_data(s) + quanta, leftover = divmod(len(s), 8) + if leftover: + raise binascii.Error('Incorrect padding') + # Handle section 2.4 zero and one mapping. The flag map01 will be either + # False, or the character to map the digit 1 (one) to. It should be + # either L (el) or I (eye). + if map01 is not None: + map01 = _bytes_from_decode_data(map01) + assert len(map01) == 1, repr(map01) + s = s.translate(bytes.maketrans(b'01', b'O' + map01)) + if casefold: + s = s.upper() + # Strip off pad characters from the right. We need to count the pad + # characters because this will tell us how many null bytes to remove from + # the end of the decoded string. + padchars = 0 + mo = re.search(b'(?P[=]*)$', s) + if mo: + padchars = len(mo.group('pad')) + if padchars > 0: + s = s[:-padchars] + # Now decode the full quanta + parts = [] + acc = 0 + shift = 35 + for c in s: + val = _b32rev.get(c) + if val is None: + raise TypeError('Non-base32 digit found') + acc += _b32rev[c] << shift + shift -= 5 + if shift < 0: + parts.append(binascii.unhexlify(bytes('%010x' % acc, "ascii"))) + acc = 0 + shift = 35 + # Process the last, partial quanta + last = binascii.unhexlify(bytes('%010x' % acc, "ascii")) + if padchars == 0: + last = b'' # No characters + elif padchars == 1: + last = last[:-1] + elif padchars == 3: + last = last[:-2] + elif padchars == 4: + last = last[:-3] + elif padchars == 6: + last = last[:-4] + else: + raise binascii.Error('Incorrect padding') + parts.append(last) + return b''.join(parts) + + + +# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns +# lowercase. The RFC also recommends against accepting input case +# insensitively. +def b16encode(s): + """Encode a byte string using Base16. + + s is the byte string to encode. The encoded byte string is returned. + """ + if not isinstance(s, bytes_types): + raise TypeError("expected bytes, not %s" % s.__class__.__name__) + return binascii.hexlify(s).upper() + + +def b16decode(s, casefold=False): + """Decode a Base16 encoded byte string. + + s is the byte string to decode. Optional casefold is a flag + specifying whether a lowercase alphabet is acceptable as input. + For security purposes, the default is False. + + The decoded byte string is returned. binascii.Error is raised if + s were incorrectly padded or if there are non-alphabet characters + present in the string. + """ + s = _bytes_from_decode_data(s) + if casefold: + s = s.upper() + if re.search(b'[^0-9A-F]', s): + raise binascii.Error('Non-base16 digit found') + return binascii.unhexlify(s) + + + +# Legacy interface. This code could be cleaned up since I don't believe +# binascii has any line length limitations. It just doesn't seem worth it +# though. The files should be opened in binary mode. + +MAXLINESIZE = 76 # Excluding the CRLF +MAXBINSIZE = (MAXLINESIZE//4)*3 + +def encode(input, output): + """Encode a file; input and output are binary files.""" + while True: + s = input.read(MAXBINSIZE) + if not s: + break + while len(s) < MAXBINSIZE: + ns = input.read(MAXBINSIZE-len(s)) + if not ns: + break + s += ns + line = binascii.b2a_base64(s) + output.write(line) + + +def decode(input, output): + """Decode a file; input and output are binary files.""" + while True: + line = input.readline() + if not line: + break + s = binascii.a2b_base64(line) + output.write(s) + + +def encodebytes(s): + """Encode a bytestring into a bytestring containing multiple lines + of base-64 data.""" + if not isinstance(s, bytes_types): + raise TypeError("expected bytes, not %s" % s.__class__.__name__) + pieces = [] + for i in range(0, len(s), MAXBINSIZE): + chunk = s[i : i + MAXBINSIZE] + pieces.append(binascii.b2a_base64(chunk)) + return b"".join(pieces) + +def encodestring(s): + """Legacy alias of encodebytes().""" + import warnings + warnings.warn("encodestring() is a deprecated alias, use encodebytes()", + DeprecationWarning, 2) + return encodebytes(s) + + +def decodebytes(s): + """Decode a bytestring of base-64 data into a bytestring.""" + if not isinstance(s, bytes_types): + raise TypeError("expected bytes, not %s" % s.__class__.__name__) + return binascii.a2b_base64(s) + +def decodestring(s): + """Legacy alias of decodebytes().""" + import warnings + warnings.warn("decodestring() is a deprecated alias, use decodebytes()", + DeprecationWarning, 2) + return decodebytes(s) + + +# Usable as a script... +def main(): + """Small main program""" + import sys, getopt + try: + opts, args = getopt.getopt(sys.argv[1:], 'deut') + except getopt.error as msg: + sys.stdout = sys.stderr + print(msg) + print("""usage: %s [-d|-e|-u|-t] [file|-] + -d, -u: decode + -e: encode (default) + -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]) + sys.exit(2) + func = encode + for o, a in opts: + if o == '-e': func = encode + if o == '-d': func = decode + if o == '-u': func = decode + if o == '-t': test(); return + if args and args[0] != '-': + with open(args[0], 'rb') as f: + func(f, sys.stdout.buffer) + else: + func(sys.stdin.buffer, sys.stdout.buffer) + + +def test(): + s0 = b"Aladdin:open sesame" + print(repr(s0)) + s1 = encodebytes(s0) + print(repr(s1)) + s2 = decodebytes(s1) + print(repr(s2)) + assert s0 == s2 + + +if __name__ == '__main__': + main() diff --git a/tests/bytecode/pylib-tests/bdb.py b/tests/bytecode/pylib-tests/bdb.py new file mode 100644 index 0000000000..0579296de8 --- /dev/null +++ b/tests/bytecode/pylib-tests/bdb.py @@ -0,0 +1,647 @@ +"""Debugger basics""" + +import fnmatch +import sys +import os + +__all__ = ["BdbQuit", "Bdb", "Breakpoint"] + +class BdbQuit(Exception): + """Exception to give up completely.""" + + +class Bdb: + """Generic Python debugger base class. + + This class takes care of details of the trace facility; + a derived class should implement user interaction. + The standard debugger class (pdb.Pdb) is an example. + """ + + def __init__(self, skip=None): + self.skip = set(skip) if skip else None + self.breaks = {} + self.fncache = {} + self.frame_returning = None + + def canonic(self, filename): + if filename == "<" + filename[1:-1] + ">": + return filename + canonic = self.fncache.get(filename) + if not canonic: + canonic = os.path.abspath(filename) + canonic = os.path.normcase(canonic) + self.fncache[filename] = canonic + return canonic + + def reset(self): + import linecache + linecache.checkcache() + self.botframe = None + self._set_stopinfo(None, None) + + def trace_dispatch(self, frame, event, arg): + if self.quitting: + return # None + if event == 'line': + return self.dispatch_line(frame) + if event == 'call': + return self.dispatch_call(frame, arg) + if event == 'return': + return self.dispatch_return(frame, arg) + if event == 'exception': + return self.dispatch_exception(frame, arg) + if event == 'c_call': + return self.trace_dispatch + if event == 'c_exception': + return self.trace_dispatch + if event == 'c_return': + return self.trace_dispatch + print('bdb.Bdb.dispatch: unknown debugging event:', repr(event)) + return self.trace_dispatch + + def dispatch_line(self, frame): + if self.stop_here(frame) or self.break_here(frame): + self.user_line(frame) + if self.quitting: raise BdbQuit + return self.trace_dispatch + + def dispatch_call(self, frame, arg): + # XXX 'arg' is no longer used + if self.botframe is None: + # First call of dispatch since reset() + self.botframe = frame.f_back # (CT) Note that this may also be None! + return self.trace_dispatch + if (self.stop_here(frame) or self.break_anywhere(frame)): + # No need to trace this function + return # None + self.user_call(frame, arg) + if self.quitting: raise BdbQuit + return self.trace_dispatch + + def dispatch_return(self, frame, arg): + if self.stop_here(frame) or frame == self.returnframe: + try: + self.frame_returning = frame + self.user_return(frame, arg) + finally: + self.frame_returning = None + if self.quitting: raise BdbQuit + return self.trace_dispatch + + def dispatch_exception(self, frame, arg): + if self.stop_here(frame): + self.user_exception(frame, arg) + if self.quitting: raise BdbQuit + return self.trace_dispatch + + # Normally derived classes don't override the following + # methods, but they may if they want to redefine the + # definition of stopping and breakpoints. + + def is_skipped_module(self, module_name): + for pattern in self.skip: + if fnmatch.fnmatch(module_name, pattern): + return True + return False + + def stop_here(self, frame): + # (CT) stopframe may now also be None, see dispatch_call. + # (CT) the former test for None is therefore removed from here. + if self.skip and \ + self.is_skipped_module(frame.f_globals.get('__name__')): + return False + if frame is self.stopframe: + if self.stoplineno == -1: + return False + return frame.f_lineno >= self.stoplineno + while frame is not None and frame is not self.stopframe: + if frame is self.botframe: + return True + frame = frame.f_back + return False + + def break_here(self, frame): + filename = self.canonic(frame.f_code.co_filename) + if filename not in self.breaks: + return False + lineno = frame.f_lineno + if lineno not in self.breaks[filename]: + # The line itself has no breakpoint, but maybe the line is the + # first line of a function with breakpoint set by function name. + lineno = frame.f_code.co_firstlineno + if lineno not in self.breaks[filename]: + return False + + # flag says ok to delete temp. bp + (bp, flag) = effective(filename, lineno, frame) + if bp: + self.currentbp = bp.number + if (flag and bp.temporary): + self.do_clear(str(bp.number)) + return True + else: + return False + + def do_clear(self, arg): + raise NotImplementedError("subclass of bdb must implement do_clear()") + + def break_anywhere(self, frame): + return self.canonic(frame.f_code.co_filename) in self.breaks + + # Derived classes should override the user_* methods + # to gain control. + + def user_call(self, frame, argument_list): + """This method is called when there is the remote possibility + that we ever need to stop in this function.""" + pass + + def user_line(self, frame): + """This method is called when we stop or break at this line.""" + pass + + def user_return(self, frame, return_value): + """This method is called when a return trap is set here.""" + pass + + def user_exception(self, frame, exc_info): + """This method is called if an exception occurs, + but only if we are to stop at or just below this level.""" + pass + + def _set_stopinfo(self, stopframe, returnframe, stoplineno=0): + self.stopframe = stopframe + self.returnframe = returnframe + self.quitting = False + # stoplineno >= 0 means: stop at line >= the stoplineno + # stoplineno -1 means: don't stop at all + self.stoplineno = stoplineno + + # Derived classes and clients can call the following methods + # to affect the stepping state. + + def set_until(self, frame, lineno=None): + """Stop when the line with the line no greater than the current one is + reached or when returning from current frame""" + # the name "until" is borrowed from gdb + if lineno is None: + lineno = frame.f_lineno + 1 + self._set_stopinfo(frame, frame, lineno) + + def set_step(self): + """Stop after one line of code.""" + # Issue #13183: pdb skips frames after hitting a breakpoint and running + # step commands. + # Restore the trace function in the caller (that may not have been set + # for performance reasons) when returning from the current frame. + if self.frame_returning: + caller_frame = self.frame_returning.f_back + if caller_frame and caller_frame.f_trace: + caller_frame.f_trace = self.trace_dispatch + self._set_stopinfo(None, None) + + def set_next(self, frame): + """Stop on the next line in or below the given frame.""" + self._set_stopinfo(frame, None) + + def set_return(self, frame): + """Stop when returning from the given frame.""" + self._set_stopinfo(frame.f_back, frame) + + def set_trace(self, frame=None): + """Start debugging from `frame`. + + If frame is not specified, debugging starts from caller's frame. + """ + if frame is None: + frame = sys._getframe().f_back + self.reset() + while frame: + frame.f_trace = self.trace_dispatch + self.botframe = frame + frame = frame.f_back + self.set_step() + sys.settrace(self.trace_dispatch) + + def set_continue(self): + # Don't stop except at breakpoints or when finished + self._set_stopinfo(self.botframe, None, -1) + if not self.breaks: + # no breakpoints; run without debugger overhead + sys.settrace(None) + frame = sys._getframe().f_back + while frame and frame is not self.botframe: + del frame.f_trace + frame = frame.f_back + + def set_quit(self): + self.stopframe = self.botframe + self.returnframe = None + self.quitting = True + sys.settrace(None) + + # Derived classes and clients can call the following methods + # to manipulate breakpoints. These methods return an + # error message is something went wrong, None if all is well. + # Set_break prints out the breakpoint line and file:lineno. + # Call self.get_*break*() to see the breakpoints or better + # for bp in Breakpoint.bpbynumber: if bp: bp.bpprint(). + + def set_break(self, filename, lineno, temporary=False, cond=None, + funcname=None): + filename = self.canonic(filename) + import linecache # Import as late as possible + line = linecache.getline(filename, lineno) + if not line: + return 'Line %s:%d does not exist' % (filename, lineno) + list = self.breaks.setdefault(filename, []) + if lineno not in list: + list.append(lineno) + bp = Breakpoint(filename, lineno, temporary, cond, funcname) + + def _prune_breaks(self, filename, lineno): + if (filename, lineno) not in Breakpoint.bplist: + self.breaks[filename].remove(lineno) + if not self.breaks[filename]: + del self.breaks[filename] + + def clear_break(self, filename, lineno): + filename = self.canonic(filename) + if filename not in self.breaks: + return 'There are no breakpoints in %s' % filename + if lineno not in self.breaks[filename]: + return 'There is no breakpoint at %s:%d' % (filename, lineno) + # If there's only one bp in the list for that file,line + # pair, then remove the breaks entry + for bp in Breakpoint.bplist[filename, lineno][:]: + bp.deleteMe() + self._prune_breaks(filename, lineno) + + def clear_bpbynumber(self, arg): + try: + bp = self.get_bpbynumber(arg) + except ValueError as err: + return str(err) + bp.deleteMe() + self._prune_breaks(bp.file, bp.line) + + def clear_all_file_breaks(self, filename): + filename = self.canonic(filename) + if filename not in self.breaks: + return 'There are no breakpoints in %s' % filename + for line in self.breaks[filename]: + blist = Breakpoint.bplist[filename, line] + for bp in blist: + bp.deleteMe() + del self.breaks[filename] + + def clear_all_breaks(self): + if not self.breaks: + return 'There are no breakpoints' + for bp in Breakpoint.bpbynumber: + if bp: + bp.deleteMe() + self.breaks = {} + + def get_bpbynumber(self, arg): + if not arg: + raise ValueError('Breakpoint number expected') + try: + number = int(arg) + except ValueError: + raise ValueError('Non-numeric breakpoint number %s' % arg) + try: + bp = Breakpoint.bpbynumber[number] + except IndexError: + raise ValueError('Breakpoint number %d out of range' % number) + if bp is None: + raise ValueError('Breakpoint %d already deleted' % number) + return bp + + def get_break(self, filename, lineno): + filename = self.canonic(filename) + return filename in self.breaks and \ + lineno in self.breaks[filename] + + def get_breaks(self, filename, lineno): + filename = self.canonic(filename) + return filename in self.breaks and \ + lineno in self.breaks[filename] and \ + Breakpoint.bplist[filename, lineno] or [] + + def get_file_breaks(self, filename): + filename = self.canonic(filename) + if filename in self.breaks: + return self.breaks[filename] + else: + return [] + + def get_all_breaks(self): + return self.breaks + + # Derived classes and clients can call the following method + # to get a data structure representing a stack trace. + + def get_stack(self, f, t): + stack = [] + if t and t.tb_frame is f: + t = t.tb_next + while f is not None: + stack.append((f, f.f_lineno)) + if f is self.botframe: + break + f = f.f_back + stack.reverse() + i = max(0, len(stack) - 1) + while t is not None: + stack.append((t.tb_frame, t.tb_lineno)) + t = t.tb_next + if f is None: + i = max(0, len(stack) - 1) + return stack, i + + def format_stack_entry(self, frame_lineno, lprefix=': '): + import linecache, reprlib + frame, lineno = frame_lineno + filename = self.canonic(frame.f_code.co_filename) + s = '%s(%r)' % (filename, lineno) + if frame.f_code.co_name: + s += frame.f_code.co_name + else: + s += "" + if '__args__' in frame.f_locals: + args = frame.f_locals['__args__'] + else: + args = None + if args: + s += reprlib.repr(args) + else: + s += '()' + if '__return__' in frame.f_locals: + rv = frame.f_locals['__return__'] + s += '->' + s += reprlib.repr(rv) + line = linecache.getline(filename, lineno, frame.f_globals) + if line: + s += lprefix + line.strip() + return s + + # The following methods can be called by clients to use + # a debugger to debug a statement or an expression. + # Both can be given as a string, or a code object. + + def run(self, cmd, globals=None, locals=None): + if globals is None: + import __main__ + globals = __main__.__dict__ + if locals is None: + locals = globals + self.reset() + if isinstance(cmd, str): + cmd = compile(cmd, "", "exec") + sys.settrace(self.trace_dispatch) + try: + exec(cmd, globals, locals) + except BdbQuit: + pass + finally: + self.quitting = True + sys.settrace(None) + + def runeval(self, expr, globals=None, locals=None): + if globals is None: + import __main__ + globals = __main__.__dict__ + if locals is None: + locals = globals + self.reset() + sys.settrace(self.trace_dispatch) + try: + return eval(expr, globals, locals) + except BdbQuit: + pass + finally: + self.quitting = True + sys.settrace(None) + + def runctx(self, cmd, globals, locals): + # B/W compatibility + self.run(cmd, globals, locals) + + # This method is more useful to debug a single function call. + + def runcall(self, func, *args, **kwds): + self.reset() + sys.settrace(self.trace_dispatch) + res = None + try: + res = func(*args, **kwds) + except BdbQuit: + pass + finally: + self.quitting = True + sys.settrace(None) + return res + + +def set_trace(): + Bdb().set_trace() + + +class Breakpoint: + """Breakpoint class. + + Implements temporary breakpoints, ignore counts, disabling and + (re)-enabling, and conditionals. + + Breakpoints are indexed by number through bpbynumber and by + the file,line tuple using bplist. The former points to a + single instance of class Breakpoint. The latter points to a + list of such instances since there may be more than one + breakpoint per line. + + """ + + # XXX Keeping state in the class is a mistake -- this means + # you cannot have more than one active Bdb instance. + + next = 1 # Next bp to be assigned + bplist = {} # indexed by (file, lineno) tuple + bpbynumber = [None] # Each entry is None or an instance of Bpt + # index 0 is unused, except for marking an + # effective break .... see effective() + + def __init__(self, file, line, temporary=False, cond=None, funcname=None): + self.funcname = funcname + # Needed if funcname is not None. + self.func_first_executable_line = None + self.file = file # This better be in canonical form! + self.line = line + self.temporary = temporary + self.cond = cond + self.enabled = True + self.ignore = 0 + self.hits = 0 + self.number = Breakpoint.next + Breakpoint.next += 1 + # Build the two lists + self.bpbynumber.append(self) + if (file, line) in self.bplist: + self.bplist[file, line].append(self) + else: + self.bplist[file, line] = [self] + + def deleteMe(self): + index = (self.file, self.line) + self.bpbynumber[self.number] = None # No longer in list + self.bplist[index].remove(self) + if not self.bplist[index]: + # No more bp for this f:l combo + del self.bplist[index] + + def enable(self): + self.enabled = True + + def disable(self): + self.enabled = False + + def bpprint(self, out=None): + if out is None: + out = sys.stdout + print(self.bpformat(), file=out) + + def bpformat(self): + if self.temporary: + disp = 'del ' + else: + disp = 'keep ' + if self.enabled: + disp = disp + 'yes ' + else: + disp = disp + 'no ' + ret = '%-4dbreakpoint %s at %s:%d' % (self.number, disp, + self.file, self.line) + if self.cond: + ret += '\n\tstop only if %s' % (self.cond,) + if self.ignore: + ret += '\n\tignore next %d hits' % (self.ignore,) + if self.hits: + if self.hits > 1: + ss = 's' + else: + ss = '' + ret += '\n\tbreakpoint already hit %d time%s' % (self.hits, ss) + return ret + + def __str__(self): + return 'breakpoint %s at %s:%s' % (self.number, self.file, self.line) + +# -----------end of Breakpoint class---------- + +def checkfuncname(b, frame): + """Check whether we should break here because of `b.funcname`.""" + if not b.funcname: + # Breakpoint was set via line number. + if b.line != frame.f_lineno: + # Breakpoint was set at a line with a def statement and the function + # defined is called: don't break. + return False + return True + + # Breakpoint set via function name. + + if frame.f_code.co_name != b.funcname: + # It's not a function call, but rather execution of def statement. + return False + + # We are in the right frame. + if not b.func_first_executable_line: + # The function is entered for the 1st time. + b.func_first_executable_line = frame.f_lineno + + if b.func_first_executable_line != frame.f_lineno: + # But we are not at the first line number: don't break. + return False + return True + +# Determines if there is an effective (active) breakpoint at this +# line of code. Returns breakpoint number or 0 if none +def effective(file, line, frame): + """Determine which breakpoint for this file:line is to be acted upon. + + Called only if we know there is a bpt at this + location. Returns breakpoint that was triggered and a flag + that indicates if it is ok to delete a temporary bp. + + """ + possibles = Breakpoint.bplist[file, line] + for b in possibles: + if not b.enabled: + continue + if not checkfuncname(b, frame): + continue + # Count every hit when bp is enabled + b.hits += 1 + if not b.cond: + # If unconditional, and ignoring go on to next, else break + if b.ignore > 0: + b.ignore -= 1 + continue + else: + # breakpoint and marker that it's ok to delete if temporary + return (b, True) + else: + # Conditional bp. + # Ignore count applies only to those bpt hits where the + # condition evaluates to true. + try: + val = eval(b.cond, frame.f_globals, frame.f_locals) + if val: + if b.ignore > 0: + b.ignore -= 1 + # continue + else: + return (b, True) + # else: + # continue + except: + # if eval fails, most conservative thing is to stop on + # breakpoint regardless of ignore count. Don't delete + # temporary, as another hint to user. + return (b, False) + return (0+None, None) + + +# -------------------- testing -------------------- + +class Tdb(Bdb): + def user_call(self, frame, args): + name = frame.f_code.co_name + if not name: name = '???' + print('+++ call', name, args) + def user_line(self, frame): + import linecache + name = frame.f_code.co_name + if not name: name = '???' + fn = self.canonic(frame.f_code.co_filename) + line = linecache.getline(fn, frame.f_lineno, frame.f_globals) + print('+++', fn, frame.f_lineno, name, ':', line.strip()) + def user_return(self, frame, retval): + print('+++ return', retval) + def user_exception(self, frame, exc_stuff): + print('+++ exception', exc_stuff) + self.set_continue() + +def foo(n): + print('foo(', n, ')') + x = bar(n*10) + print('bar returned', x) + +def bar(a): + print('bar(', a, ')') + return a/2 + +def test(): + t = Tdb() + t.run('import bdb; bdb.foo(10)') diff --git a/tests/bytecode/pylib-tests/binhex.py b/tests/bytecode/pylib-tests/binhex.py new file mode 100644 index 0000000000..ec5624f9e1 --- /dev/null +++ b/tests/bytecode/pylib-tests/binhex.py @@ -0,0 +1,471 @@ +"""Macintosh binhex compression/decompression. + +easy interface: +binhex(inputfilename, outputfilename) +hexbin(inputfilename, outputfilename) +""" + +# +# Jack Jansen, CWI, August 1995. +# +# The module is supposed to be as compatible as possible. Especially the +# easy interface should work "as expected" on any platform. +# XXXX Note: currently, textfiles appear in mac-form on all platforms. +# We seem to lack a simple character-translate in python. +# (we should probably use ISO-Latin-1 on all but the mac platform). +# XXXX The simple routines are too simple: they expect to hold the complete +# files in-core. Should be fixed. +# XXXX It would be nice to handle AppleDouble format on unix +# (for servers serving macs). +# XXXX I don't understand what happens when you get 0x90 times the same byte on +# input. The resulting code (xx 90 90) would appear to be interpreted as an +# escaped *value* of 0x90. All coders I've seen appear to ignore this nicety... +# +import io +import os +import struct +import binascii + +__all__ = ["binhex","hexbin","Error"] + +class Error(Exception): + pass + +# States (what have we written) +[_DID_HEADER, _DID_DATA, _DID_RSRC] = range(3) + +# Various constants +REASONABLY_LARGE = 32768 # Minimal amount we pass the rle-coder +LINELEN = 64 +RUNCHAR = b"\x90" + +# +# This code is no longer byte-order dependent + + +class FInfo: + def __init__(self): + self.Type = '????' + self.Creator = '????' + self.Flags = 0 + +def getfileinfo(name): + finfo = FInfo() + with io.open(name, 'rb') as fp: + # Quick check for textfile + data = fp.read(512) + if 0 not in data: + finfo.Type = 'TEXT' + fp.seek(0, 2) + dsize = fp.tell() + dir, file = os.path.split(name) + file = file.replace(':', '-', 1) + return file, finfo, dsize, 0 + +class openrsrc: + def __init__(self, *args): + pass + + def read(self, *args): + return b'' + + def write(self, *args): + pass + + def close(self): + pass + +class _Hqxcoderengine: + """Write data to the coder in 3-byte chunks""" + + def __init__(self, ofp): + self.ofp = ofp + self.data = b'' + self.hqxdata = b'' + self.linelen = LINELEN - 1 + + def write(self, data): + self.data = self.data + data + datalen = len(self.data) + todo = (datalen // 3) * 3 + data = self.data[:todo] + self.data = self.data[todo:] + if not data: + return + self.hqxdata = self.hqxdata + binascii.b2a_hqx(data) + self._flush(0) + + def _flush(self, force): + first = 0 + while first <= len(self.hqxdata) - self.linelen: + last = first + self.linelen + self.ofp.write(self.hqxdata[first:last] + b'\n') + self.linelen = LINELEN + first = last + self.hqxdata = self.hqxdata[first:] + if force: + self.ofp.write(self.hqxdata + b':\n') + + def close(self): + if self.data: + self.hqxdata = self.hqxdata + binascii.b2a_hqx(self.data) + self._flush(1) + self.ofp.close() + del self.ofp + +class _Rlecoderengine: + """Write data to the RLE-coder in suitably large chunks""" + + def __init__(self, ofp): + self.ofp = ofp + self.data = b'' + + def write(self, data): + self.data = self.data + data + if len(self.data) < REASONABLY_LARGE: + return + rledata = binascii.rlecode_hqx(self.data) + self.ofp.write(rledata) + self.data = b'' + + def close(self): + if self.data: + rledata = binascii.rlecode_hqx(self.data) + self.ofp.write(rledata) + self.ofp.close() + del self.ofp + +class BinHex: + def __init__(self, name_finfo_dlen_rlen, ofp): + name, finfo, dlen, rlen = name_finfo_dlen_rlen + close_on_error = False + if isinstance(ofp, str): + ofname = ofp + ofp = io.open(ofname, 'wb') + close_on_error = True + try: + ofp.write(b'(This file must be converted with BinHex 4.0)\r\r:') + hqxer = _Hqxcoderengine(ofp) + self.ofp = _Rlecoderengine(hqxer) + self.crc = 0 + if finfo is None: + finfo = FInfo() + self.dlen = dlen + self.rlen = rlen + self._writeinfo(name, finfo) + self.state = _DID_HEADER + except: + if close_on_error: + ofp.close() + raise + + def _writeinfo(self, name, finfo): + nl = len(name) + if nl > 63: + raise Error('Filename too long') + d = bytes([nl]) + name.encode("latin-1") + b'\0' + tp, cr = finfo.Type, finfo.Creator + if isinstance(tp, str): + tp = tp.encode("latin-1") + if isinstance(cr, str): + cr = cr.encode("latin-1") + d2 = tp + cr + + # Force all structs to be packed with big-endian + d3 = struct.pack('>h', finfo.Flags) + d4 = struct.pack('>ii', self.dlen, self.rlen) + info = d + d2 + d3 + d4 + self._write(info) + self._writecrc() + + def _write(self, data): + self.crc = binascii.crc_hqx(data, self.crc) + self.ofp.write(data) + + def _writecrc(self): + # XXXX Should this be here?? + # self.crc = binascii.crc_hqx('\0\0', self.crc) + if self.crc < 0: + fmt = '>h' + else: + fmt = '>H' + self.ofp.write(struct.pack(fmt, self.crc)) + self.crc = 0 + + def write(self, data): + if self.state != _DID_HEADER: + raise Error('Writing data at the wrong time') + self.dlen = self.dlen - len(data) + self._write(data) + + def close_data(self): + if self.dlen != 0: + raise Error('Incorrect data size, diff=%r' % (self.rlen,)) + self._writecrc() + self.state = _DID_DATA + + def write_rsrc(self, data): + if self.state < _DID_DATA: + self.close_data() + if self.state != _DID_DATA: + raise Error('Writing resource data at the wrong time') + self.rlen = self.rlen - len(data) + self._write(data) + + def close(self): + if self.state < _DID_DATA: + self.close_data() + if self.state != _DID_DATA: + raise Error('Close at the wrong time') + if self.rlen != 0: + raise Error("Incorrect resource-datasize, diff=%r" % (self.rlen,)) + self._writecrc() + self.ofp.close() + self.state = None + del self.ofp + +def binhex(inp, out): + """binhex(infilename, outfilename): create binhex-encoded copy of a file""" + finfo = getfileinfo(inp) + ofp = BinHex(finfo, out) + + ifp = io.open(inp, 'rb') + # XXXX Do textfile translation on non-mac systems + while True: + d = ifp.read(128000) + if not d: break + ofp.write(d) + ofp.close_data() + ifp.close() + + ifp = openrsrc(inp, 'rb') + while True: + d = ifp.read(128000) + if not d: break + ofp.write_rsrc(d) + ofp.close() + ifp.close() + +class _Hqxdecoderengine: + """Read data via the decoder in 4-byte chunks""" + + def __init__(self, ifp): + self.ifp = ifp + self.eof = 0 + + def read(self, totalwtd): + """Read at least wtd bytes (or until EOF)""" + decdata = b'' + wtd = totalwtd + # + # The loop here is convoluted, since we don't really now how + # much to decode: there may be newlines in the incoming data. + while wtd > 0: + if self.eof: return decdata + wtd = ((wtd + 2) // 3) * 4 + data = self.ifp.read(wtd) + # + # Next problem: there may not be a complete number of + # bytes in what we pass to a2b. Solve by yet another + # loop. + # + while True: + try: + decdatacur, self.eof = binascii.a2b_hqx(data) + break + except binascii.Incomplete: + pass + newdata = self.ifp.read(1) + if newdata: + raise Error('Premature EOF on binhex file') + data = data + newdata + decdata = decdata + decdatacur + wtd = totalwtd - len(decdata) + if decdata and self.eof: + raise Error('Premature EOF on binhex file') + return decdata + + def close(self): + self.ifp.close() + +class _Rledecoderengine: + """Read data via the RLE-coder""" + + def __init__(self, ifp): + self.ifp = ifp + self.pre_buffer = b'' + self.post_buffer = b'' + self.eof = 0 + + def read(self, wtd): + if wtd > len(self.post_buffer): + self._fill(wtd - len(self.post_buffer)) + rv = self.post_buffer[:wtd] + self.post_buffer = self.post_buffer[wtd:] + return rv + + def _fill(self, wtd): + self.pre_buffer = self.pre_buffer + self.ifp.read(wtd + 4) + if self.ifp.eof: + self.post_buffer = self.post_buffer + \ + binascii.rledecode_hqx(self.pre_buffer) + self.pre_buffer = b'' + return + + # + # Obfuscated code ahead. We have to take care that we don't + # end up with an orphaned RUNCHAR later on. So, we keep a couple + # of bytes in the buffer, depending on what the end of + # the buffer looks like: + # '\220\0\220' - Keep 3 bytes: repeated \220 (escaped as \220\0) + # '?\220' - Keep 2 bytes: repeated something-else + # '\220\0' - Escaped \220: Keep 2 bytes. + # '?\220?' - Complete repeat sequence: decode all + # otherwise: keep 1 byte. + # + mark = len(self.pre_buffer) + if self.pre_buffer[-3:] == RUNCHAR + b'\0' + RUNCHAR: + mark = mark - 3 + elif self.pre_buffer[-1:] == RUNCHAR: + mark = mark - 2 + elif self.pre_buffer[-2:] == RUNCHAR + b'\0': + mark = mark - 2 + elif self.pre_buffer[-2:-1] == RUNCHAR: + pass # Decode all + else: + mark = mark - 1 + + self.post_buffer = self.post_buffer + \ + binascii.rledecode_hqx(self.pre_buffer[:mark]) + self.pre_buffer = self.pre_buffer[mark:] + + def close(self): + self.ifp.close() + +class HexBin: + def __init__(self, ifp): + if isinstance(ifp, str): + ifp = io.open(ifp, 'rb') + # + # Find initial colon. + # + while True: + ch = ifp.read(1) + if not ch: + raise Error("No binhex data found") + # Cater for \r\n terminated lines (which show up as \n\r, hence + # all lines start with \r) + if ch == b'\r': + continue + if ch == b':': + break + + hqxifp = _Hqxdecoderengine(ifp) + self.ifp = _Rledecoderengine(hqxifp) + self.crc = 0 + self._readheader() + + def _read(self, len): + data = self.ifp.read(len) + self.crc = binascii.crc_hqx(data, self.crc) + return data + + def _checkcrc(self): + filecrc = struct.unpack('>h', self.ifp.read(2))[0] & 0xffff + #self.crc = binascii.crc_hqx('\0\0', self.crc) + # XXXX Is this needed?? + self.crc = self.crc & 0xffff + if filecrc != self.crc: + raise Error('CRC error, computed %x, read %x' + % (self.crc, filecrc)) + self.crc = 0 + + def _readheader(self): + len = self._read(1) + fname = self._read(ord(len)) + rest = self._read(19) + self._checkcrc() + + type = rest[1:5] + creator = rest[5:9] + flags = struct.unpack('>h', rest[9:11])[0] + self.dlen = struct.unpack('>l', rest[11:15])[0] + self.rlen = struct.unpack('>l', rest[15:19])[0] + + self.FName = fname + self.FInfo = FInfo() + self.FInfo.Creator = creator + self.FInfo.Type = type + self.FInfo.Flags = flags + + self.state = _DID_HEADER + + def read(self, *n): + if self.state != _DID_HEADER: + raise Error('Read data at wrong time') + if n: + n = n[0] + n = min(n, self.dlen) + else: + n = self.dlen + rv = b'' + while len(rv) < n: + rv = rv + self._read(n-len(rv)) + self.dlen = self.dlen - n + return rv + + def close_data(self): + if self.state != _DID_HEADER: + raise Error('close_data at wrong time') + if self.dlen: + dummy = self._read(self.dlen) + self._checkcrc() + self.state = _DID_DATA + + def read_rsrc(self, *n): + if self.state == _DID_HEADER: + self.close_data() + if self.state != _DID_DATA: + raise Error('Read resource data at wrong time') + if n: + n = n[0] + n = min(n, self.rlen) + else: + n = self.rlen + self.rlen = self.rlen - n + return self._read(n) + + def close(self): + if self.rlen: + dummy = self.read_rsrc(self.rlen) + self._checkcrc() + self.state = _DID_RSRC + self.ifp.close() + +def hexbin(inp, out): + """hexbin(infilename, outfilename) - Decode binhexed file""" + ifp = HexBin(inp) + finfo = ifp.FInfo + if not out: + out = ifp.FName + + ofp = io.open(out, 'wb') + # XXXX Do translation on non-mac systems + while True: + d = ifp.read(128000) + if not d: break + ofp.write(d) + ofp.close() + ifp.close_data() + + d = ifp.read_rsrc(128000) + if d: + ofp = openrsrc(out, 'wb') + ofp.write(d) + while True: + d = ifp.read_rsrc(128000) + if not d: break + ofp.write(d) + ofp.close() + + ifp.close() diff --git a/tests/bytecode/pylib-tests/bisect.py b/tests/bytecode/pylib-tests/bisect.py new file mode 100644 index 0000000000..4a4d05255e --- /dev/null +++ b/tests/bytecode/pylib-tests/bisect.py @@ -0,0 +1,92 @@ +"""Bisection algorithms.""" + +def insort_right(a, x, lo=0, hi=None): + """Insert item x in list a, and keep it sorted assuming a is sorted. + + If x is already in a, insert it to the right of the rightmost x. + + Optional args lo (default 0) and hi (default len(a)) bound the + slice of a to be searched. + """ + + if lo < 0: + raise ValueError('lo must be non-negative') + if hi is None: + hi = len(a) + while lo < hi: + mid = (lo+hi)//2 + if x < a[mid]: hi = mid + else: lo = mid+1 + a.insert(lo, x) + +insort = insort_right # backward compatibility + +def bisect_right(a, x, lo=0, hi=None): + """Return the index where to insert item x in list a, assuming a is sorted. + + The return value i is such that all e in a[:i] have e <= x, and all e in + a[i:] have e > x. So if x already appears in the list, a.insert(x) will + insert just after the rightmost x already there. + + Optional args lo (default 0) and hi (default len(a)) bound the + slice of a to be searched. + """ + + if lo < 0: + raise ValueError('lo must be non-negative') + if hi is None: + hi = len(a) + while lo < hi: + mid = (lo+hi)//2 + if x < a[mid]: hi = mid + else: lo = mid+1 + return lo + +bisect = bisect_right # backward compatibility + +def insort_left(a, x, lo=0, hi=None): + """Insert item x in list a, and keep it sorted assuming a is sorted. + + If x is already in a, insert it to the left of the leftmost x. + + Optional args lo (default 0) and hi (default len(a)) bound the + slice of a to be searched. + """ + + if lo < 0: + raise ValueError('lo must be non-negative') + if hi is None: + hi = len(a) + while lo < hi: + mid = (lo+hi)//2 + if a[mid] < x: lo = mid+1 + else: hi = mid + a.insert(lo, x) + + +def bisect_left(a, x, lo=0, hi=None): + """Return the index where to insert item x in list a, assuming a is sorted. + + The return value i is such that all e in a[:i] have e < x, and all e in + a[i:] have e >= x. So if x already appears in the list, a.insert(x) will + insert just before the leftmost x already there. + + Optional args lo (default 0) and hi (default len(a)) bound the + slice of a to be searched. + """ + + if lo < 0: + raise ValueError('lo must be non-negative') + if hi is None: + hi = len(a) + while lo < hi: + mid = (lo+hi)//2 + if a[mid] < x: lo = mid+1 + else: hi = mid + return lo + +# Overwrite above definitions with a fast C implementation +try: + from _bisect import * +except ImportError: + pass diff --git a/tests/bytecode/pylib-tests/bz2.py b/tests/bytecode/pylib-tests/bz2.py new file mode 100644 index 0000000000..6a4fd505b0 --- /dev/null +++ b/tests/bytecode/pylib-tests/bz2.py @@ -0,0 +1,504 @@ +"""Interface to the libbzip2 compression library. + +This module provides a file interface, classes for incremental +(de)compression, and functions for one-shot (de)compression. +""" + +__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor", + "open", "compress", "decompress"] + +__author__ = "Nadeem Vawda " + +import builtins +import io +import warnings + +try: + from threading import RLock +except ImportError: + from dummy_threading import RLock + +from _bz2 import BZ2Compressor, BZ2Decompressor + + +_MODE_CLOSED = 0 +_MODE_READ = 1 +_MODE_READ_EOF = 2 +_MODE_WRITE = 3 + +_BUFFER_SIZE = 8192 + + +class BZ2File(io.BufferedIOBase): + + """A file object providing transparent bzip2 (de)compression. + + A BZ2File can act as a wrapper for an existing file object, or refer + directly to a named file on disk. + + Note that BZ2File provides a *binary* file interface - data read is + returned as bytes, and data to be written should be given as bytes. + """ + + def __init__(self, filename, mode="r", buffering=None, compresslevel=9): + """Open a bzip2-compressed file. + + If filename is a str or bytes object, is gives the name of the file to + be opened. Otherwise, it should be a file object, which will be used to + read or write the compressed data. + + mode can be 'r' for reading (default), 'w' for (over)writing, or 'a' for + appending. These can equivalently be given as 'rb', 'wb', and 'ab'. + + buffering is ignored. Its use is deprecated. + + If mode is 'w' or 'a', compresslevel can be a number between 1 + and 9 specifying the level of compression: 1 produces the least + compression, and 9 (default) produces the most compression. + + If mode is 'r', the input file may be the concatenation of + multiple compressed streams. + """ + # This lock must be recursive, so that BufferedIOBase's + # readline(), readlines() and writelines() don't deadlock. + self._lock = RLock() + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + self._pos = 0 + self._size = -1 + + if buffering is not None: + warnings.warn("Use of 'buffering' argument is deprecated", + DeprecationWarning) + + if not (1 <= compresslevel <= 9): + raise ValueError("compresslevel must be between 1 and 9") + + if mode in (0+"", "r", "rb"): + mode = "rb" + mode_code = _MODE_READ + self._decompressor = BZ2Decompressor() + self._buffer = b"" + self._buffer_offset = 0 + elif mode in (0+"w", "wb"): + mode = "wb" + mode_code = _MODE_WRITE + self._compressor = BZ2Compressor(compresslevel) + elif mode in (0+"a", "ab"): + mode = "ab" + mode_code = _MODE_WRITE + self._compressor = BZ2Compressor(compresslevel) + else: + raise ValueError("Invalid mode: {!r}".format(mode)) + + if isinstance(filename, (str, bytes)): + self._fp = builtins.open(filename, mode) + self._closefp = True + self._mode = mode_code + elif hasattr(filename, "read") or hasattr(filename, "write"): + self._fp = filename + self._mode = mode_code + else: + raise TypeError("filename must be a str or bytes object, or a file") + + def close(self): + """Flush and close the file. + + May be called more than once without error. Once the file is + closed, any other operation on it will raise a ValueError. + """ + with self._lock: + if self._mode == _MODE_CLOSED: + return + try: + if self._mode in (_MODE_READ, _MODE_READ_EOF): + self._decompressor = None + elif self._mode == _MODE_WRITE: + self._fp.write(self._compressor.flush()) + self._compressor = None + finally: + try: + if self._closefp: + self._fp.close() + finally: + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + self._buffer = b"" + self._buffer_offset = 0 + + @property + def closed(self): + """True if this file is closed.""" + return self._mode == _MODE_CLOSED + + def fileno(self): + """Return the file descriptor for the underlying file.""" + self._check_not_closed() + return self._fp.fileno() + + def seekable(self): + """Return whether the file supports seeking.""" + return self.readable() and self._fp.seekable() + + def readable(self): + """Return whether the file was opened for reading.""" + self._check_not_closed() + return self._mode in (_MODE_READ, _MODE_READ_EOF) + + def writable(self): + """Return whether the file was opened for writing.""" + self._check_not_closed() + return self._mode == _MODE_WRITE + + # Mode-checking helper functions. + + def _check_not_closed(self): + if self.closed: + raise ValueError("I/O operation on closed file") + + def _check_can_read(self): + if self._mode not in (_MODE_READ, _MODE_READ_EOF): + self._check_not_closed() + raise io.UnsupportedOperation("File not open for reading") + + def _check_can_write(self): + if self._mode != _MODE_WRITE: + self._check_not_closed() + raise io.UnsupportedOperation("File not open for writing") + + def _check_can_seek(self): + if self._mode not in (_MODE_READ, _MODE_READ_EOF): + self._check_not_closed() + raise io.UnsupportedOperation("Seeking is only supported " + "on files open for reading") + if not self._fp.seekable(): + raise io.UnsupportedOperation("The underlying file object " + "does not support seeking") + + # Fill the readahead buffer if it is empty. Returns False on EOF. + def _fill_buffer(self): + if self._mode == _MODE_READ_EOF: + return False + # Depending on the input data, our call to the decompressor may not + # return any data. In this case, try again after reading another block. + while self._buffer_offset == len(self._buffer): + rawblock = (self._decompressor.unused_data or + self._fp.read(_BUFFER_SIZE)) + + if not rawblock: + if self._decompressor.eof: + self._mode = _MODE_READ_EOF + self._size = self._pos + return False + else: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + + # Continue to next stream. + if self._decompressor.eof: + self._decompressor = BZ2Decompressor() + + self._buffer = self._decompressor.decompress(rawblock) + self._buffer_offset = 0 + return True + + # Read data until EOF. + # If return_data is false, consume the data without returning it. + def _read_all(self, return_data=True): + # The loop assumes that _buffer_offset is 0. Ensure that this is true. + self._buffer = self._buffer[self._buffer_offset:] + self._buffer_offset = 0 + + blocks = [] + while self._fill_buffer(): + if return_data: + blocks.append(self._buffer) + self._pos += len(self._buffer) + self._buffer = b"" + if return_data: + return b"".join(blocks) + + # Read a block of up to n bytes. + # If return_data is false, consume the data without returning it. + def _read_block(self, n, return_data=True): + # If we have enough data buffered, return immediately. + end = self._buffer_offset + n + if end <= len(self._buffer): + data = self._buffer[self._buffer_offset : end] + self._buffer_offset = end + self._pos += len(data) + return data if return_data else None + + # The loop assumes that _buffer_offset is 0. Ensure that this is true. + self._buffer = self._buffer[self._buffer_offset:] + self._buffer_offset = 0 + + blocks = [] + while n > 0 and self._fill_buffer(): + if n < len(self._buffer): + data = self._buffer[:n] + self._buffer_offset = n + else: + data = self._buffer + self._buffer = b"" + if return_data: + blocks.append(data) + self._pos += len(data) + n -= len(data) + if return_data: + return b"".join(blocks) + + def peek(self, n=0): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. + The exact number of bytes returned is unspecified. + """ + with self._lock: + self._check_can_read() + if not self._fill_buffer(): + return b"" + return self._buffer[self._buffer_offset:] + + def read(self, size=-1): + """Read up to size uncompressed bytes from the file. + + If size is negative or omitted, read until EOF is reached. + Returns b'' if the file is already at EOF. + """ + with self._lock: + self._check_can_read() + if size == 0: + return b"" + elif size < 0: + return self._read_all() + else: + return self._read_block(size) + + def read1(self, size=-1): + """Read up to size uncompressed bytes, while trying to avoid + making multiple reads from the underlying stream. + + Returns b'' if the file is at EOF. + """ + # Usually, read1() calls _fp.read() at most once. However, sometimes + # this does not give enough data for the decompressor to make progress. + # In this case we make multiple reads, to avoid returning b"". + with self._lock: + self._check_can_read() + if (size == 0 or + # Only call _fill_buffer() if the buffer is actually empty. + # This gives a significant speedup if *size* is small. + (self._buffer_offset == len(self._buffer) and self._fill_buffer())): + return b"" + if size > 0: + data = self._buffer[self._buffer_offset : + self._buffer_offset + size] + self._buffer_offset += len(data) + else: + data = self._buffer[self._buffer_offset:] + self._buffer = b"" + self._buffer_offset = 0 + self._pos += len(data) + return data + + def readinto(self, b): + """Read up to len(b) bytes into b. + + Returns the number of bytes read (0 for EOF). + """ + with self._lock: + return io.BufferedIOBase.readinto(self, b) + + def readline(self, size=-1): + """Read a line of uncompressed bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which + case the line may be incomplete). Returns b'' if already at EOF. + """ + if not isinstance(size, int): + if not hasattr(size, "__index__"): + raise TypeError("Integer argument expected") + size = size.__index__() + with self._lock: + self._check_can_read() + # Shortcut for the common case - the whole line is in the buffer. + if size < 0: + end = self._buffer.find(b"\n", self._buffer_offset) + 1 + if end > 0: + line = self._buffer[self._buffer_offset : end] + self._buffer_offset = end + self._pos += len(line) + return line + return io.BufferedIOBase.readline(self, size) + + def readlines(self, size=-1): + """Read a list of lines of uncompressed bytes from the file. + + size can be specified to control the number of lines read: no + further lines will be read once the total size of the lines read + so far equals or exceeds size. + """ + if not isinstance(size, int): + if not hasattr(size, "__index__"): + raise TypeError("Integer argument expected") + size = size.__index__() + with self._lock: + return io.BufferedIOBase.readlines(self, size) + + def write(self, data): + """Write a byte string to the file. + + Returns the number of uncompressed bytes written, which is + always len(data). Note that due to buffering, the file on disk + may not reflect the data written until close() is called. + """ + with self._lock: + self._check_can_write() + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += len(data) + return len(data) + + def writelines(self, seq): + """Write a sequence of byte strings to the file. + + Returns the number of uncompressed bytes written. + seq can be any iterable yielding byte strings. + + Line separators are not added between the written byte strings. + """ + with self._lock: + return io.BufferedIOBase.writelines(self, seq) + + # Rewind the file to the beginning of the data stream. + def _rewind(self): + self._fp.seek(0, 0) + self._mode = _MODE_READ + self._pos = 0 + self._decompressor = BZ2Decompressor() + self._buffer = b"" + self._buffer_offset = 0 + + def seek(self, offset, whence=0): + """Change the file position. + + The new position is specified by offset, relative to the + position indicated by whence. Values for whence are: + + 0: start of stream (default); offset must not be negative + 1: current stream position + 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, so depending on the parameters, + this operation may be extremely slow. + """ + with self._lock: + self._check_can_seek() + + # Recalculate offset as an absolute file position. + if whence == 0: + pass + elif whence == 1: + offset = self._pos + offset + elif whence == 2: + # Seeking relative to EOF - we need to know the file's size. + if self._size < 0: + self._read_all(return_data=False) + offset = self._size + offset + else: + raise ValueError("Invalid value for whence: {}".format(whence)) + + # Make it so that offset is the number of bytes to skip forward. + if offset < self._pos: + self._rewind() + else: + offset -= self._pos + + # Read and discard data until we reach the desired position. + self._read_block(offset, return_data=False) + + return self._pos + + def tell(self): + """Return the current file position.""" + with self._lock: + self._check_not_closed() + return self._pos + + +def open(filename, mode="rb", compresslevel=9, + encoding=None, errors=None, newline=None): + """Open a bzip2-compressed file in binary or text mode. + + The filename argument can be an actual filename (a str or bytes object), or + an existing file object to read from or write to. + + The mode argument can be "r", "rb", "w", "wb", "a" or "ab" for binary mode, + or "rt", "wt" or "at" for text mode. The default mode is "rb", and the + default compresslevel is 9. + + For binary mode, this function is equivalent to the BZ2File constructor: + BZ2File(filename, mode, compresslevel). In this case, the encoding, errors + and newline arguments must not be provided. + + For text mode, a BZ2File object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error handling + behavior, and line ending(s). + + """ + if "t" in mode: + if "b" in mode: + raise ValueError("Invalid mode: %r" % (mode,)) + else: + if encoding is not None: + raise ValueError("Argument 'encoding' not supported in binary mode") + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + bz_mode = mode.replace("t", "") + binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel) + + if "t" in mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file + + +def compress(data, compresslevel=9): + """Compress a block of data. + + compresslevel, if given, must be a number between 1 and 9. + + For incremental compression, use a BZ2Compressor object instead. + """ + comp = BZ2Compressor(compresslevel) + return comp.compress(data) + comp.flush() + + +def decompress(data): + """Decompress a block of data. + + For incremental decompression, use a BZ2Decompressor object instead. + """ + if len(data) == 0: + return b"" + + results = [] + while True: + decomp = BZ2Decompressor() + results.append(decomp.decompress(data)) + if not decomp.eof: + raise ValueError("Compressed data ended before the " + "end-of-stream marker was reached") + if not decomp.unused_data: + return b"".join(results) + # There is unused data left over. Proceed to next stream. + data = decomp.unused_data diff --git a/tests/bytecode/pylib-tests/cProfile.py b/tests/bytecode/pylib-tests/cProfile.py new file mode 100644 index 0000000000..c24d45bab4 --- /dev/null +++ b/tests/bytecode/pylib-tests/cProfile.py @@ -0,0 +1,195 @@ +#! /usr/bin/env python3 + +"""Python interface for the 'lsprof' profiler. + Compatible with the 'profile' module. +""" + +__all__ = ["run", "runctx", "Profile"] + +import _lsprof + +# ____________________________________________________________ +# Simple interface + +def run(statement, filename=None, sort=-1): + """Run statement under profiler optionally saving results in filename + + This function takes a single argument that can be passed to the + "exec" statement, and an optional file name. In all cases this + routine attempts to "exec" its first argument and gather profiling + statistics from the execution. If no file name is present, then this + function automatically prints a simple profiling report, sorted by the + standard name string (file/line/function-name) that is presented in + each line. + """ + prof = Profile() + result = None + try: + try: + prof = prof.run(statement) + except SystemExit: + pass + finally: + if filename is not None: + prof.dump_stats(filename) + else: + result = prof.print_stats(sort) + return result + +def runctx(statement, globals, locals, filename=None, sort=-1): + """Run statement under profiler, supplying your own globals and locals, + optionally saving results in filename. + + statement and filename have the same semantics as profile.run + """ + prof = Profile() + result = None + try: + try: + prof = prof.runctx(statement, globals, locals) + except SystemExit: + pass + finally: + if filename is not None: + prof.dump_stats(filename) + else: + result = prof.print_stats(sort) + return result + +# ____________________________________________________________ + +class Profile(_lsprof.Profiler): + """Profile(custom_timer=None, time_unit=None, subcalls=True, builtins=True) + + Builds a profiler object using the specified timer function. + The default timer is a fast built-in one based on real time. + For custom timer functions returning integers, time_unit can + be a float specifying a scale (i.e. how long each integer unit + is, in seconds). + """ + + # Most of the functionality is in the base class. + # This subclass only adds convenient and backward-compatible methods. + + def print_stats(self, sort=-1): + import pstats + pstats.Stats(self).strip_dirs().sort_stats(sort).print_stats() + + def dump_stats(self, file): + import marshal + f = open(file, 'wb') + self.create_stats() + marshal.dump(self.stats, f) + f.close() + + def create_stats(self): + self.disable() + self.snapshot_stats() + + def snapshot_stats(self): + entries = self.getstats() + self.stats = {} + callersdicts = {} + # call information + for entry in entries: + func = label(entry.code) + nc = entry.callcount # ncalls column of pstats (before '/') + cc = nc - entry.reccallcount # ncalls column of pstats (after '/') + tt = entry.inlinetime # tottime column of pstats + ct = entry.totaltime # cumtime column of pstats + callers = {} + callersdicts[id(entry.code)] = callers + self.stats[func] = cc, nc, tt, ct, callers + # subcall information + for entry in entries: + if entry.calls: + func = label(entry.code) + for subentry in entry.calls: + try: + callers = callersdicts[id(subentry.code)] + except KeyError: + continue + nc = subentry.callcount + cc = nc - subentry.reccallcount + tt = subentry.inlinetime + ct = subentry.totaltime + if func in callers: + prev = callers[func] + nc += prev[0] + cc += prev[1] + tt += prev[2] + ct += prev[3] + callers[func] = nc, cc, tt, ct + + # The following two methods can be called by clients to use + # a profiler to profile a statement, given as a string. + + def run(self, cmd): + import __main__ + dict = __main__.__dict__ + return self.runctx(cmd, dict, dict) + + def runctx(self, cmd, globals, locals): + self.enable() + try: + exec(cmd, globals, locals) + finally: + self.disable() + return self + + # This method is more useful to profile a single function call. + def runcall(self, func, *args, **kw): + self.enable() + try: + return func(*args, **kw) + finally: + self.disable() + +# ____________________________________________________________ + +def label(code): + if isinstance(code, str): + return ('~', 0, code) # built-in functions ('~' sorts at the end) + else: + return (code.co_filename, code.co_firstlineno, code.co_name) + +# ____________________________________________________________ + +def main(): + import os, sys + from optparse import OptionParser + usage = "cProfile.py [-o output_file_path] [-s sort] scriptfile [arg] ..." + parser = OptionParser(usage=usage) + parser.allow_interspersed_args = False + parser.add_option('-o', '--outfile', dest="outfile", + help="Save stats to ", default=None) + parser.add_option('-s', '--sort', dest="sort", + help="Sort order when printing to stdout, based on pstats.Stats class", + default=-1) + + if not sys.argv[1:]: + parser.print_usage() + sys.exit(2) + + (options, args) = parser.parse_args() + sys.argv[:] = args + + if len(args) > 0: + progname = args[0] + sys.path.insert(0, os.path.dirname(progname)) + with open(progname, 'rb') as fp: + code = compile(fp.read(), progname, 'exec') + globs = { + '__file__': progname, + '__name__': '__main__', + '__package__': None, + '__cached__': None, + } + runctx(code, globs, None, options.outfile, options.sort) + else: + parser.print_usage() + return parser + +# When invoked as main program, invoke the profiler on a script +if __name__ == '__main__': + main() diff --git a/tests/bytecode/pylib-tests/chunk.py b/tests/bytecode/pylib-tests/chunk.py new file mode 100644 index 0000000000..5863ed0846 --- /dev/null +++ b/tests/bytecode/pylib-tests/chunk.py @@ -0,0 +1,167 @@ +"""Simple class to read IFF chunks. + +An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File +Format)) has the following structure: + ++----------------+ +| ID (4 bytes) | ++----------------+ +| size (4 bytes) | ++----------------+ +| data | +| ... | ++----------------+ + +The ID is a 4-byte string which identifies the type of chunk. + +The size field (a 32-bit value, encoded using big-endian byte order) +gives the size of the whole chunk, including the 8-byte header. + +Usually an IFF-type file consists of one or more chunks. The proposed +usage of the Chunk class defined here is to instantiate an instance at +the start of each chunk and read from the instance until it reaches +the end, after which a new instance can be instantiated. At the end +of the file, creating a new instance will fail with a EOFError +exception. + +Usage: +while True: + try: + chunk = Chunk(file) + except EOFError: + break + chunktype = chunk.getname() + while True: + data = chunk.read(nbytes) + if not data: + pass + # do something with data + +The interface is file-like. The implemented methods are: +read, close, seek, tell, isatty. +Extra methods are: skip() (called by close, skips to the end of the chunk), +getname() (returns the name (ID) of the chunk) + +The __init__ method has one required argument, a file-like object +(including a chunk instance), and one optional argument, a flag which +specifies whether or not chunks are aligned on 2-byte boundaries. The +default is 1, i.e. aligned. +""" + +class Chunk: + def __init__(self, file, align=True, bigendian=True, inclheader=False): + import struct + self.closed = False + self.align = align # whether to align to word (2-byte) boundaries + if bigendian: + strflag = '>' + else: + strflag = '<' + self.file = file + self.chunkname = file.read(4) + if len(self.chunkname) < 4: + raise EOFError + try: + self.chunksize = struct.unpack_from(strflag+'L', file.read(4))[0] + except struct.error: + raise EOFError + if inclheader: + self.chunksize = self.chunksize - 8 # subtract header + self.size_read = 0 + try: + self.offset = self.file.tell() + except (AttributeError, IOError): + self.seekable = False + else: + self.seekable = True + + def getname(self): + """Return the name (ID) of the current chunk.""" + return self.chunkname + + def getsize(self): + """Return the size of the current chunk.""" + return self.chunksize + + def close(self): + if not self.closed: + self.skip() + self.closed = True + + def isatty(self): + if self.closed: + raise ValueError("I/O operation on closed file") + return False + + def seek(self, pos, whence=0): + """Seek to specified position into the chunk. + Default position is 0 (start of chunk). + If the file is not seekable, this will result in an error. + """ + + if self.closed: + raise ValueError("I/O operation on closed file") + if not self.seekable: + raise IOError("cannot seek") + if whence == 1: + pos = pos + self.size_read + elif whence == 2: + pos = pos + self.chunksize + if pos < 0 or pos > self.chunksize: + raise RuntimeError + self.file.seek(self.offset + pos, 0) + self.size_read = pos + + def tell(self): + if self.closed: + raise ValueError("I/O operation on closed file") + return self.size_read + + def read(self, size=-1): + """Read at most size bytes from the chunk. + If size is omitted or negative, read until the end + of the chunk. + """ + + if self.closed: + raise ValueError("I/O operation on closed file") + if self.size_read >= self.chunksize: + return '' + if size < 0: + size = self.chunksize - self.size_read + if size > self.chunksize - self.size_read: + size = self.chunksize - self.size_read + data = self.file.read(size) + self.size_read = self.size_read + len(data) + if self.size_read == self.chunksize and \ + self.align and \ + (self.chunksize & 1): + dummy = self.file.read(1) + self.size_read = self.size_read + len(dummy) + return data + + def skip(self): + """Skip the rest of the chunk. + If you are not interested in the contents of the chunk, + this method should be called so that the file points to + the start of the next chunk. + """ + + if self.closed: + raise ValueError("I/O operation on closed file") + if self.seekable: + try: + n = self.chunksize - self.size_read + # maybe fix alignment + if self.align and (self.chunksize & 1): + n = n + 1 + self.file.seek(n, 1) + self.size_read = self.size_read + n + return + except IOError: + pass + while self.size_read < self.chunksize: + n = min(8192, self.chunksize - self.size_read) + dummy = self.read(n) + if not dummy: + raise EOFError diff --git a/tests/bytecode/pylib-tests/code.py b/tests/bytecode/pylib-tests/code.py new file mode 100644 index 0000000000..9020aab701 --- /dev/null +++ b/tests/bytecode/pylib-tests/code.py @@ -0,0 +1,302 @@ +"""Utilities needed to emulate Python's interactive interpreter. + +""" + +# Inspired by similar code by Jeff Epler and Fredrik Lundh. + + +import sys +import traceback +from codeop import CommandCompiler, compile_command + +__all__ = ["InteractiveInterpreter", "InteractiveConsole", "interact", + "compile_command"] + +class InteractiveInterpreter: + """Base class for InteractiveConsole. + + This class deals with parsing and interpreter state (the user's + namespace); it doesn't deal with input buffering or prompting or + input file naming (the filename is always passed in explicitly). + + """ + + def __init__(self, locals=None): + """Constructor. + + The optional 'locals' argument specifies the dictionary in + which code will be executed; it defaults to a newly created + dictionary with key "__name__" set to "__console__" and key + "__doc__" set to None. + + """ + if locals is None: + locals = {"__name__": "__console__", "__doc__": None} + self.locals = locals + self.compile = CommandCompiler() + + def runsource(self, source, filename="", symbol="single"): + """Compile and run some source in the interpreter. + + Arguments are as for compile_command(). + + One several things can happen: + + 1) The input is incorrect; compile_command() raised an + exception (SyntaxError or OverflowError). A syntax traceback + will be printed by calling the showsyntaxerror() method. + + 2) The input is incomplete, and more input is required; + compile_command() returned None. Nothing happens. + + 3) The input is complete; compile_command() returned a code + object. The code is executed by calling self.runcode() (which + also handles run-time exceptions, except for SystemExit). + + The return value is True in case 2, False in the other cases (unless + an exception is raised). The return value can be used to + decide whether to use sys.ps1 or sys.ps2 to prompt the next + line. + + """ + try: + code = self.compile(source, filename, symbol) + except (OverflowError, SyntaxError, ValueError): + # Case 1 + self.showsyntaxerror(filename) + return False + + if code is None: + # Case 2 + return True + + # Case 3 + self.runcode(code) + return False + + def runcode(self, code): + """Execute a code object. + + When an exception occurs, self.showtraceback() is called to + display a traceback. All exceptions are caught except + SystemExit, which is reraised. + + A note about KeyboardInterrupt: this exception may occur + elsewhere in this code, and may not always be caught. The + caller should be prepared to deal with it. + + """ + try: + exec(code, self.locals) + except SystemExit: + raise + except: + self.showtraceback() + + def showsyntaxerror(self, filename=None): + """Display the syntax error that just occurred. + + This doesn't display a stack trace because there isn't one. + + If a filename is given, it is stuffed in the exception instead + of what was there before (because Python's parser always uses + "" when reading from a string). + + The output is written by self.write(), below. + + """ + type, value, tb = sys.exc_info() + sys.last_type = type + sys.last_value = value + sys.last_traceback = tb + if filename and type is SyntaxError: + # Work hard to stuff the correct filename in the exception + try: + msg, (dummy_filename, lineno, offset, line) = value.args + except ValueError: + # Not the format we expect; leave it alone + pass + else: + # Stuff in the right filename + value = SyntaxError(msg, (filename, lineno, offset, line)) + sys.last_value = value + if sys.excepthook is sys.__excepthook__: + lines = traceback.format_exception_only(type, value) + self.write(''.join(lines)) + else: + # If someone has set sys.excepthook, we let that take precedence + # over self.write + sys.excepthook(type, value, tb) + + def showtraceback(self): + """Display the exception that just occurred. + + We remove the first stack item because it is our own code. + + The output is written by self.write(), below. + + """ + try: + type, value, tb = sys.exc_info() + sys.last_type = type + sys.last_value = value + sys.last_traceback = tb + tblist = traceback.extract_tb(tb) + del tblist[:1] + lines = traceback.format_list(tblist) + if lines: + lines.insert(0, "Traceback (most recent call last):\n") + lines.extend(traceback.format_exception_only(type, value)) + finally: + tblist = tb = None + if sys.excepthook is sys.__excepthook__: + self.write(''.join(lines)) + else: + # If someone has set sys.excepthook, we let that take precedence + # over self.write + sys.excepthook(type, value, tb) + + def write(self, data): + """Write a string. + + The base implementation writes to sys.stderr; a subclass may + replace this with a different implementation. + + """ + sys.stderr.write(data) + + +class InteractiveConsole(InteractiveInterpreter): + """Closely emulate the behavior of the interactive Python interpreter. + + This class builds on InteractiveInterpreter and adds prompting + using the familiar sys.ps1 and sys.ps2, and input buffering. + + """ + + def __init__(self, locals=None, filename=""): + """Constructor. + + The optional locals argument will be passed to the + InteractiveInterpreter base class. + + The optional filename argument should specify the (file)name + of the input stream; it will show up in tracebacks. + + """ + InteractiveInterpreter.__init__(self, locals) + self.filename = filename + self.resetbuffer() + + def resetbuffer(self): + """Reset the input buffer.""" + self.buffer = [] + + def interact(self, banner=None): + """Closely emulate the interactive Python console. + + The optional banner argument specifies the banner to print + before the first interaction; by default it prints a banner + similar to the one printed by the real Python interpreter, + followed by the current class name in parentheses (so as not + to confuse this with the real interpreter -- since it's so + close!). + + """ + try: + sys.ps1 + except AttributeError: + sys.ps1 = ">>> " + try: + sys.ps2 + except AttributeError: + sys.ps2 = "... " + cprt = 'Type "help", "copyright", "credits" or "license" for more information.' + if banner is None: + self.write("Python %s on %s\n%s\n(%s)\n" % + (sys.version, sys.platform, cprt, + self.__class__.__name__)) + else: + self.write("%s\n" % str(banner)) + more = 0 + while 1: + try: + if more: + prompt = sys.ps2 + else: + prompt = sys.ps1 + try: + line = self.raw_input(prompt) + except EOFError: + self.write("\n") + break + else: + more = self.push(line) + except KeyboardInterrupt: + self.write("\nKeyboardInterrupt\n") + self.resetbuffer() + more = 0 + + def push(self, line): + """Push a line to the interpreter. + + The line should not have a trailing newline; it may have + internal newlines. The line is appended to a buffer and the + interpreter's runsource() method is called with the + concatenated contents of the buffer as source. If this + indicates that the command was executed or invalid, the buffer + is reset; otherwise, the command is incomplete, and the buffer + is left as it was after the line was appended. The return + value is 1 if more input is required, 0 if the line was dealt + with in some way (this is the same as runsource()). + + """ + self.buffer.append(line) + source = "\n".join(self.buffer) + more = self.runsource(source, self.filename) + if not more: + self.resetbuffer() + return more + + def raw_input(self, prompt=""): + """Write a prompt and read a line. + + The returned line does not include the trailing newline. + When the user enters the EOF key sequence, EOFError is raised. + + The base implementation uses the built-in function + input(); a subclass may replace this with a different + implementation. + + """ + return input(prompt) + + + +def interact(banner=None, readfunc=None, local=None): + """Closely emulate the interactive Python interpreter. + + This is a backwards compatible interface to the InteractiveConsole + class. When readfunc is not specified, it attempts to import the + readline module to enable GNU readline if it is available. + + Arguments (all optional, all default to None): + + banner -- passed to InteractiveConsole.interact() + readfunc -- if not None, replaces InteractiveConsole.raw_input() + local -- passed to InteractiveInterpreter.__init__() + + """ + console = InteractiveConsole(local) + if readfunc is not None: + console.raw_input = readfunc + else: + try: + import readline + except ImportError: + pass + console.interact(banner) + + +if __name__ == "__main__": + interact() diff --git a/tests/bytecode/pylib-tests/compileall.py b/tests/bytecode/pylib-tests/compileall.py new file mode 100644 index 0000000000..d3cff6a98a --- /dev/null +++ b/tests/bytecode/pylib-tests/compileall.py @@ -0,0 +1,240 @@ +"""Module/script to byte-compile all .py files to .pyc (or .pyo) files. + +When called as a script with arguments, this compiles the directories +given as arguments recursively; the -l option prevents it from +recursing into directories. + +Without arguments, if compiles all modules on sys.path, without +recursing into subdirectories. (Even though it should do so for +packages -- for now, you'll have to deal with packages separately.) + +See module py_compile for details of the actual byte-compilation. +""" +import os +import sys +import errno +import imp +import py_compile +import struct + +__all__ = ["compile_dir","compile_file","compile_path"] + +def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, + quiet=False, legacy=False, optimize=-1): + """Byte-compile all modules in the given directory tree. + + Arguments (only dir is required): + + dir: the directory to byte-compile + maxlevels: maximum recursion level (default 10) + ddir: the directory that will be prepended to the path to the + file as it is compiled into each byte-code file. + force: if True, force compilation, even if timestamps are up-to-date + quiet: if True, be quiet during compilation + legacy: if True, produce legacy pyc paths instead of PEP 3147 paths + optimize: optimization level or -1 for level of the interpreter + """ + if not quiet: + print('Listing {!r}...'.format(dir)) + try: + names = os.listdir(dir) + except os.error: + print("Can't list {!r}".format(dir)) + names = [] + names.sort() + success = 1 + for name in names: + if name == '__pycache__': + continue + fullname = os.path.join(dir, name) + if ddir is not None: + dfile = os.path.join(ddir, name) + else: + dfile = None + if not os.path.isdir(fullname): + if not compile_file(fullname, ddir, force, rx, quiet, + legacy, optimize): + success = 0 + elif (maxlevels > 0 and name != os.curdir and name != os.pardir and + os.path.isdir(fullname) and not os.path.islink(fullname)): + if not compile_dir(fullname, maxlevels - 1, dfile, force, rx, + quiet, legacy, optimize): + success = 0 + return success + +def compile_file(fullname, ddir=None, force=False, rx=None, quiet=False, + legacy=False, optimize=-1): + """Byte-compile one file. + + Arguments (only fullname is required): + + fullname: the file to byte-compile + ddir: if given, the directory name compiled in to the + byte-code file. + force: if True, force compilation, even if timestamps are up-to-date + quiet: if True, be quiet during compilation + legacy: if True, produce legacy pyc paths instead of PEP 3147 paths + optimize: optimization level or -1 for level of the interpreter + """ + success = 1 + name = os.path.basename(fullname) + if ddir is not None: + dfile = os.path.join(ddir, name) + else: + dfile = None + if rx is not None: + mo = rx.search(fullname) + if mo: + return success + if os.path.isfile(fullname): + if legacy: + cfile = fullname + ('c' if __debug__ else 'o') + else: + if optimize >= 0: + cfile = imp.cache_from_source(fullname, + debug_override=not optimize) + else: + cfile = imp.cache_from_source(fullname) + cache_dir = os.path.dirname(cfile) + head, tail = name[:-3], name[-3:] + if tail == '.py': + if not force: + try: + mtime = int(os.stat(fullname).st_mtime) + expect = struct.pack('<4sl', imp.get_magic(), mtime) + with open(cfile, 'rb') as chandle: + actual = chandle.read(8) + if expect == actual: + return success + except IOError: + pass + if not quiet: + print('Compiling {!r}...'.format(fullname)) + try: + ok = py_compile.compile(fullname, cfile, dfile, True, + optimize=optimize) + except py_compile.PyCompileError as err: + if quiet: + print('*** Error compiling {!r}...'.format(fullname)) + else: + print('*** ', end='') + # escape non-printable characters in msg + msg = err.msg.encode(sys.stdout.encoding, + errors='backslashreplace') + msg = msg.decode(sys.stdout.encoding) + print(msg) + success = 0 + except (SyntaxError, UnicodeError, IOError) as e: + if quiet: + print('*** Error compiling {!r}...'.format(fullname)) + else: + print('*** ', end='') + print(e.__class__.__name__ + ':', e) + success = 0 + else: + if ok == 0: + success = 0 + return success + +def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=False, + legacy=False, optimize=-1): + """Byte-compile all module on sys.path. + + Arguments (all optional): + + skip_curdir: if true, skip current directory (default True) + maxlevels: max recursion level (default 0) + force: as for compile_dir() (default False) + quiet: as for compile_dir() (default False) + legacy: as for compile_dir() (default False) + optimize: as for compile_dir() (default -1) + """ + success = 1 + for dir in sys.path: + if (not dir or dir == os.curdir) and skip_curdir: + print('Skipping current directory') + else: + success = success and compile_dir(dir, maxlevels, None, + force, quiet=quiet, + legacy=legacy, optimize=optimize) + return success + + +def main(): + """Script main program.""" + import argparse + + parser = argparse.ArgumentParser( + description='Utilities to support installing Python libraries.') + parser.add_argument('-l', action='store_const', const=0, + default=10, dest='maxlevels', + help="don't recurse into subdirectories") + parser.add_argument('-f', action='store_true', dest='force', + help='force rebuild even if timestamps are up to date') + parser.add_argument('-q', action='store_true', dest='quiet', + help='output only error messages') + parser.add_argument('-b', action='store_true', dest='legacy', + help='use legacy (pre-PEP3147) compiled file locations') + parser.add_argument('-d', metavar='DESTDIR', dest='ddir', default=None, + help=('directory to prepend to file paths for use in ' + 'compile-time tracebacks and in runtime ' + 'tracebacks in cases where the source file is ' + 'unavailable')) + parser.add_argument('-x', metavar='REGEXP', dest='rx', default=None, + help=('skip files matching the regular expression; ' + 'the regexp is searched for in the full path ' + 'of each file considered for compilation')) + parser.add_argument('-i', metavar='FILE', dest='flist', + help=('add all the files and directories listed in ' + 'FILE to the list considered for compilation; ' + 'if "-", names are read from stdin')) + parser.add_argument('compile_dest', metavar='FILE|DIR', nargs='*', + help=('zero or more file and directory names ' + 'to compile; if no arguments given, defaults ' + 'to the equivalent of -l sys.path')) + args = parser.parse_args() + + compile_dests = args.compile_dest + + if (args.ddir and (len(compile_dests) != 1 + or not os.path.isdir(compile_dests[0]))): + parser.exit('-d destdir requires exactly one directory argument') + if args.rx: + import re + args.rx = re.compile(args.rx) + + # if flist is provided then load it + if args.flist: + try: + with (sys.stdin if args.flist=='-' else open(args.flist)) as f: + for line in f: + compile_dests.append(line.strip()) + except EnvironmentError: + print("Error reading file list {}".format(args.flist)) + return False + + success = True + try: + if compile_dests: + for dest in compile_dests: + if os.path.isfile(dest): + if not compile_file(dest, args.ddir, args.force, args.rx, + args.quiet, args.legacy): + success = False + else: + if not compile_dir(dest, args.maxlevels, args.ddir, + args.force, args.rx, args.quiet, + args.legacy): + success = False + return success + else: + return compile_path(legacy=args.legacy) + except KeyboardInterrupt: + print("\n[interrupted]") + return False + return True + + +if __name__ == '__main__': + exit_status = int(not main()) + sys.exit(exit_status) diff --git a/tests/bytecode/pylib-tests/contextlib.py b/tests/bytecode/pylib-tests/contextlib.py new file mode 100644 index 0000000000..0b6bf71b08 --- /dev/null +++ b/tests/bytecode/pylib-tests/contextlib.py @@ -0,0 +1,255 @@ +"""Utilities for with-statement contexts. See PEP 343.""" + +import sys +from collections import deque +from functools import wraps + +__all__ = ["contextmanager", "closing", "ContextDecorator", "ExitStack"] + + +class ContextDecorator(object): + "A base class or mixin that enables context managers to work as decorators." + + def _recreate_cm(self): + """Return a recreated instance of self. + + Allows an otherwise one-shot context manager like + _GeneratorContextManager to support use as + a decorator via implicit recreation. + + This is a private interface just for _GeneratorContextManager. + See issue #11647 for details. + """ + return self + + def __call__(self, func): + @wraps(func) + def inner(*args, **kwds): + with self._recreate_cm(): + return func(*args, **kwds) + return inner + + +class _GeneratorContextManager(ContextDecorator): + """Helper for @contextmanager decorator.""" + + def __init__(self, func, *args, **kwds): + self.gen = func(*args, **kwds) + self.func, self.args, self.kwds = func, args, kwds + + def _recreate_cm(self): + # _GCM instances are one-shot context managers, so the + # CM must be recreated each time a decorated function is + # called + return self.__class__(self.func, *self.args, **self.kwds) + + def __enter__(self): + try: + return next(self.gen) + except StopIteration: + raise RuntimeError("generator didn't yield") + + def __exit__(self, type, value, traceback): + if type is None: + try: + next(self.gen) + except StopIteration: + return + else: + raise RuntimeError("generator didn't stop") + else: + if value is None: + # Need to force instantiation so we can reliably + # tell if we get the same exception back + value = type() + try: + self.gen.throw(type, value, traceback) + raise RuntimeError("generator didn't stop after throw()") + except StopIteration as exc: + # Suppress the exception *unless* it's the same exception that + # was passed to throw(). This prevents a StopIteration + # raised inside the "with" statement from being suppressed + return exc is not value + except: + # only re-raise if it's *not* the exception that was + # passed to throw(), because __exit__() must not raise + # an exception unless __exit__() itself failed. But throw() + # has to raise the exception to signal propagation, so this + # fixes the impedance mismatch between the throw() protocol + # and the __exit__() protocol. + # + if sys.exc_info()[1] is not value: + raise + + +def contextmanager(func): + """@contextmanager decorator. + + Typical usage: + + @contextmanager + def some_generator(): + + try: + yield + finally: + + + This makes this: + + with some_generator() as : + + + equivalent to this: + + + try: + = + + finally: + + + """ + @wraps(func) + def helper(*args, **kwds): + return _GeneratorContextManager(func, *args, **kwds) + return helper + + +class closing(object): + """Context to automatically close something at the end of a block. + + Code like this: + + with closing(.open()) as f: + + + is equivalent to this: + + f = .open() + try: + + finally: + f.close() + + """ + def __init__(self, thing): + self.thing = thing + def __enter__(self): + return self.thing + def __exit__(self, *exc_info): + self.thing.close() + + +# Inspired by discussions on http://bugs.python.org/issue13585 +class ExitStack(object): + """Context manager for dynamic management of a stack of exit callbacks + + For example: + + with ExitStack() as stack: + files = [stack.enter_context(open(fname)) for fname in filenames] + # All opened files will automatically be closed at the end of + # the with statement, even if attempts to open files later + # in the list raise an exception + + """ + def __init__(self): + self._exit_callbacks = deque() + + def pop_all(self): + """Preserve the context stack by transferring it to a new instance""" + new_stack = type(self)() + new_stack._exit_callbacks = self._exit_callbacks + self._exit_callbacks = deque() + return new_stack + + def _push_cm_exit(self, cm, cm_exit): + """Helper to correctly register callbacks to __exit__ methods""" + def _exit_wrapper(*exc_details): + return cm_exit(cm, *exc_details) + _exit_wrapper.__self__ = cm + self.push(_exit_wrapper) + + def push(self, exit): + """Registers a callback with the standard __exit__ method signature + + Can suppress exceptions the same way __exit__ methods can. + + Also accepts any object with an __exit__ method (registering a call + to the method instead of the object itself) + """ + # We use an unbound method rather than a bound method to follow + # the standard lookup behaviour for special methods + _cb_type = type(exit) + try: + exit_method = _cb_type.__exit__ + except AttributeError: + # Not a context manager, so assume its a callable + self._exit_callbacks.append(exit) + else: + self._push_cm_exit(exit, exit_method) + return exit # Allow use as a decorator + + def callback(self, callback, *args, **kwds): + """Registers an arbitrary callback and arguments. + + Cannot suppress exceptions. + """ + def _exit_wrapper(exc_type, exc, tb): + callback(*args, **kwds) + # We changed the signature, so using @wraps is not appropriate, but + # setting __wrapped__ may still help with introspection + _exit_wrapper.__wrapped__ = callback + self.push(_exit_wrapper) + return callback # Allow use as a decorator + + def enter_context(self, cm): + """Enters the supplied context manager + + If successful, also pushes its __exit__ method as a callback and + returns the result of the __enter__ method. + """ + # We look up the special methods on the type to match the with statement + _cm_type = type(cm) + _exit = _cm_type.__exit__ + result = _cm_type.__enter__(cm) + self._push_cm_exit(cm, _exit) + return result + + def close(self): + """Immediately unwind the context stack""" + self.__exit__(None, None, None) + + def __enter__(self): + return self + + def __exit__(self, *exc_details): + # We manipulate the exception state so it behaves as though + # we were actually nesting multiple with statements + frame_exc = sys.exc_info()[1] + def _fix_exception_context(new_exc, old_exc): + while 1: + exc_context = new_exc.__context__ + if exc_context in (None, frame_exc): + break + new_exc = exc_context + new_exc.__context__ = old_exc + + # Callbacks are invoked in LIFO order to match the behaviour of + # nested context managers + suppressed_exc = False + while self._exit_callbacks: + cb = self._exit_callbacks.pop() + try: + if cb(*exc_details): + suppressed_exc = True + exc_details = (None, None, None) + except: + new_exc_details = sys.exc_info() + # simulate the stack of exceptions by setting the context + _fix_exception_context(new_exc_details[1], exc_details[1]) + if not self._exit_callbacks: + raise + exc_details = new_exc_details + return suppressed_exc diff --git a/tests/bytecode/pylib-tests/crypt.py b/tests/bytecode/pylib-tests/crypt.py new file mode 100644 index 0000000000..b90c81cc40 --- /dev/null +++ b/tests/bytecode/pylib-tests/crypt.py @@ -0,0 +1,62 @@ +"""Wrapper to the POSIX crypt library call and associated functionality.""" + +import _crypt +import string as _string +from random import SystemRandom as _SystemRandom +from collections import namedtuple as _namedtuple + + +_saltchars = _string.ascii_letters + _string.digits + './' +_sr = _SystemRandom() + + +class _Method(_namedtuple('_Method', 'name ident salt_chars total_size')): + + """Class representing a salt method per the Modular Crypt Format or the + legacy 2-character crypt method.""" + + def __repr__(self): + return ''.format(self.name) + + +def mksalt(method=None): + """Generate a salt for the specified method. + + If not specified, the strongest available method will be used. + + """ + if method is None: + method = methods[0] + s = '${}$'.format(method.ident) if method.ident else '' + s += ''.join(_sr.sample(_saltchars, method.salt_chars)) + return s + + +def crypt(word, salt=None): + """Return a string representing the one-way hash of a password, with a salt + prepended. + + If ``salt`` is not specified or is ``None``, the strongest + available method will be selected and a salt generated. Otherwise, + ``salt`` may be one of the ``crypt.METHOD_*`` values, or a string as + returned by ``crypt.mksalt()``. + + """ + if salt is None or isinstance(salt, _Method): + salt = mksalt(salt) + return _crypt.crypt(word, salt) + + +# available salting/crypto methods +METHOD_CRYPT = _Method('CRYPT', None, 2, 13) +METHOD_MD5 = _Method('MD5', '1', 8, 34) +METHOD_SHA256 = _Method('SHA256', '5', 16, 63) +METHOD_SHA512 = _Method('SHA512', '6', 16, 106) + +methods = [] +for _method in (METHOD_SHA512, METHOD_SHA256, METHOD_MD5): + _result = crypt('', _method) + if _result and len(_result) == _method.total_size: + methods.append(_method) +methods.append(METHOD_CRYPT) +del _result, _method diff --git a/tests/bytecode/pylib-tests/dummy_threading.py b/tests/bytecode/pylib-tests/dummy_threading.py new file mode 100644 index 0000000000..1bb7eee338 --- /dev/null +++ b/tests/bytecode/pylib-tests/dummy_threading.py @@ -0,0 +1,78 @@ +"""Faux ``threading`` version using ``dummy_thread`` instead of ``thread``. + +The module ``_dummy_threading`` is added to ``sys.modules`` in order +to not have ``threading`` considered imported. Had ``threading`` been +directly imported it would have made all subsequent imports succeed +regardless of whether ``_thread`` was available which is not desired. + +""" +from sys import modules as sys_modules + +import _dummy_thread + +# Declaring now so as to not have to nest ``try``s to get proper clean-up. +holding_thread = False +holding_threading = False +holding__threading_local = False + +try: + # Could have checked if ``_thread`` was not in sys.modules and gone + # a different route, but decided to mirror technique used with + # ``threading`` below. + if '_thread' in sys_modules: + held_thread = sys_modules['_thread'] + holding_thread = True + # Must have some module named ``_thread`` that implements its API + # in order to initially import ``threading``. + sys_modules['_thread'] = sys_modules['_dummy_thread'] + + if 'threading' in sys_modules: + # If ``threading`` is already imported, might as well prevent + # trying to import it more than needed by saving it if it is + # already imported before deleting it. + held_threading = sys_modules['threading'] + holding_threading = True + del sys_modules['threading'] + + if '_threading_local' in sys_modules: + # If ``_threading_local`` is already imported, might as well prevent + # trying to import it more than needed by saving it if it is + # already imported before deleting it. + held__threading_local = sys_modules['_threading_local'] + holding__threading_local = True + del sys_modules['_threading_local'] + + import threading + # Need a copy of the code kept somewhere... + sys_modules['_dummy_threading'] = sys_modules['threading'] + del sys_modules['threading'] + sys_modules['_dummy__threading_local'] = sys_modules['_threading_local'] + del sys_modules['_threading_local'] + from _dummy_threading import * + from _dummy_threading import __all__ + +finally: + # Put back ``threading`` if we overwrote earlier + + if holding_threading: + sys_modules['threading'] = held_threading + del held_threading + del holding_threading + + # Put back ``_threading_local`` if we overwrote earlier + + if holding__threading_local: + sys_modules['_threading_local'] = held__threading_local + del held__threading_local + del holding__threading_local + + # Put back ``thread`` if we overwrote, else del the entry we made + if holding_thread: + sys_modules['_thread'] = held_thread + del held_thread + else: + del sys_modules['_thread'] + del holding_thread + + del _dummy_thread + del sys_modules diff --git a/tests/bytecode/pylib-tests/fnmatch.py b/tests/bytecode/pylib-tests/fnmatch.py new file mode 100644 index 0000000000..6330b0cfda --- /dev/null +++ b/tests/bytecode/pylib-tests/fnmatch.py @@ -0,0 +1,109 @@ +"""Filename matching with shell patterns. + +fnmatch(FILENAME, PATTERN) matches according to the local convention. +fnmatchcase(FILENAME, PATTERN) always takes case in account. + +The functions operate by translating the pattern into a regular +expression. They cache the compiled regular expressions for speed. + +The function translate(PATTERN) returns a regular expression +corresponding to PATTERN. (It does not compile it.) +""" +import os +import posixpath +import re +import functools + +__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] + +def fnmatch(name, pat): + """Test whether FILENAME matches PATTERN. + + Patterns are Unix shell style: + + * matches everything + ? matches any single character + [seq] matches any character in seq + [!seq] matches any char not in seq + + An initial period in FILENAME is not special. + Both FILENAME and PATTERN are first case-normalized + if the operating system requires it. + If you don't want this, use fnmatchcase(FILENAME, PATTERN). + """ + name = os.path.normcase(name) + pat = os.path.normcase(pat) + return fnmatchcase(name, pat) + +@functools.lru_cache(maxsize=256, typed=True) +def _compile_pattern(pat): + if isinstance(pat, bytes): + pat_str = str(pat, 'ISO-8859-1') + res_str = translate(pat_str) + res = bytes(res_str, 'ISO-8859-1') + else: + res = translate(pat) + return re.compile(res).match + +def filter(names, pat): + """Return the subset of the list NAMES that match PAT.""" + result = [] + pat = os.path.normcase(pat) + match = _compile_pattern(pat) + if os.path is posixpath: + # normcase on posix is NOP. Optimize it away from the loop. + for name in names: + if match(name): + result.append(name) + else: + for name in names: + if match(os.path.normcase(name)): + result.append(name) + return result + +def fnmatchcase(name, pat): + """Test whether FILENAME matches PATTERN, including case. + + This is a version of fnmatch() which doesn't case-normalize + its arguments. + """ + match = _compile_pattern(pat) + return match(name) is not None + + +def translate(pat): + """Translate a shell PATTERN to a regular expression. + + There is no way to quote meta-characters. + """ + + i, n = 0, len(pat) + res = '' + while i < n: + c = pat[i] + i = i+1 + if c == '*': + res = res + '.*' + elif c == '?': + res = res + '.' + elif c == '[': + j = i + if j < n and pat[j] == '!': + j = j+1 + if j < n and pat[j] == ']': + j = j+1 + while j < n and pat[j] != ']': + j = j+1 + if j >= n: + res = res + '\\[' + else: + stuff = pat[i:j].replace('\\','\\\\') + i = j+1 + if stuff[0] == '!': + stuff = '^' + stuff[1:] + elif stuff[0] == '^': + stuff = '\\' + stuff + res = '%s[%s]' % (res, stuff) + else: + res = res + re.escape(c) + return res + '\Z(?ms)' diff --git a/tests/bytecode/pylib-tests/genericpath.py b/tests/bytecode/pylib-tests/genericpath.py new file mode 100644 index 0000000000..2174187a03 --- /dev/null +++ b/tests/bytecode/pylib-tests/genericpath.py @@ -0,0 +1,106 @@ +""" +Path operations common to more than one OS +Do not use directly. The OS specific modules import the appropriate +functions from this module themselves. +""" +import os +import stat + +__all__ = ['commonprefix', 'exists', 'getatime', 'getctime', 'getmtime', + 'getsize', 'isdir', 'isfile'] + + +# Does a path exist? +# This is false for dangling symbolic links on systems that support them. +def exists(path): + """Test whether a path exists. Returns False for broken symbolic links""" + try: + os.stat(path) + except os.error: + return False + return True + + +# This follows symbolic links, so both islink() and isdir() can be true +# for the same path ono systems that support symlinks +def isfile(path): + """Test whether a path is a regular file""" + try: + st = os.stat(path) + except os.error: + return False + return stat.S_ISREG(st.st_mode) + + +# Is a path a directory? +# This follows symbolic links, so both islink() and isdir() +# can be true for the same path on systems that support symlinks +def isdir(s): + """Return true if the pathname refers to an existing directory.""" + try: + st = os.stat(s) + except os.error: + return False + return stat.S_ISDIR(st.st_mode) + + +def getsize(filename): + """Return the size of a file, reported by os.stat().""" + return os.stat(filename).st_size + + +def getmtime(filename): + """Return the last modification time of a file, reported by os.stat().""" + return os.stat(filename).st_mtime + + +def getatime(filename): + """Return the last access time of a file, reported by os.stat().""" + return os.stat(filename).st_atime + + +def getctime(filename): + """Return the metadata change time of a file, reported by os.stat().""" + return os.stat(filename).st_ctime + + +# Return the longest prefix of all list elements. +def commonprefix(m): + "Given a list of pathnames, returns the longest common leading component" + if not m: return '' + s1 = min(m) + s2 = max(m) + for i, c in enumerate(s1): + if c != s2[i]: + return s1[:i] + return s1 + +# Split a path in root and extension. +# The extension is everything starting at the last dot in the last +# pathname component; the root is everything before that. +# It is always true that root + ext == p. + +# Generic implementation of splitext, to be parametrized with +# the separators +def _splitext(p, sep, altsep, extsep): + """Split the extension from a pathname. + + Extension is everything from the last dot to the end, ignoring + leading dots. Returns "(root, ext)"; ext may be empty.""" + # NOTE: This code must work for text and bytes strings. + + sepIndex = p.rfind(sep) + if altsep: + altsepIndex = p.rfind(altsep) + sepIndex = max(sepIndex, altsepIndex) + + dotIndex = p.rfind(extsep) + if dotIndex > sepIndex: + # skip all leading dots + filenameIndex = sepIndex + 1 + while filenameIndex < dotIndex: + if p[filenameIndex:filenameIndex+1] != extsep: + return p[:dotIndex], p[dotIndex:] + filenameIndex += 1 + + return p, p[:0] diff --git a/tests/bytecode/pylib-tests/getopt.py b/tests/bytecode/pylib-tests/getopt.py new file mode 100644 index 0000000000..3d6ecbddb9 --- /dev/null +++ b/tests/bytecode/pylib-tests/getopt.py @@ -0,0 +1,215 @@ +"""Parser for command line options. + +This module helps scripts to parse the command line arguments in +sys.argv. It supports the same conventions as the Unix getopt() +function (including the special meanings of arguments of the form `-' +and `--'). Long options similar to those supported by GNU software +may be used as well via an optional third argument. This module +provides two functions and an exception: + +getopt() -- Parse command line options +gnu_getopt() -- Like getopt(), but allow option and non-option arguments +to be intermixed. +GetoptError -- exception (class) raised with 'opt' attribute, which is the +option involved with the exception. +""" + +# Long option support added by Lars Wirzenius . +# +# Gerrit Holl moved the string-based exceptions +# to class-based exceptions. +# +# Peter Åstrand added gnu_getopt(). +# +# TODO for gnu_getopt(): +# +# - GNU getopt_long_only mechanism +# - allow the caller to specify ordering +# - RETURN_IN_ORDER option +# - GNU extension with '-' as first character of option string +# - optional arguments, specified by double colons +# - a option string with a W followed by semicolon should +# treat "-W foo" as "--foo" + +__all__ = ["GetoptError","error","getopt","gnu_getopt"] + +import os +try: + from gettext import gettext as _ +except ImportError: + # Bootstrapping Python: gettext's dependencies not built yet + def _(s): return s + +class GetoptError(Exception): + opt = '' + msg = '' + def __init__(self, msg, opt=''): + self.msg = msg + self.opt = opt + Exception.__init__(self, msg, opt) + + def __str__(self): + return self.msg + +error = GetoptError # backward compatibility + +def getopt(args, shortopts, longopts = []): + """getopt(args, options[, long_options]) -> opts, args + + Parses command line options and parameter list. args is the + argument list to be parsed, without the leading reference to the + running program. Typically, this means "sys.argv[1:]". shortopts + is the string of option letters that the script wants to + recognize, with options that require an argument followed by a + colon (i.e., the same format that Unix getopt() uses). If + specified, longopts is a list of strings with the names of the + long options which should be supported. The leading '--' + characters should not be included in the option name. Options + which require an argument should be followed by an equal sign + ('='). + + The return value consists of two elements: the first is a list of + (option, value) pairs; the second is the list of program arguments + left after the option list was stripped (this is a trailing slice + of the first argument). Each option-and-value pair returned has + the option as its first element, prefixed with a hyphen (e.g., + '-x'), and the option argument as its second element, or an empty + string if the option has no argument. The options occur in the + list in the same order in which they were found, thus allowing + multiple occurrences. Long and short options may be mixed. + + """ + + opts = [] + if type(longopts) == type(""): + longopts = [longopts] + else: + longopts = list(longopts) + while args and args[0].startswith('-') and args[0] != '-': + if args[0] == '--': + args = args[1:] + break + if args[0].startswith('--'): + opts, args = do_longs(opts, args[0][2:], longopts, args[1:]) + else: + opts, args = do_shorts(opts, args[0][1:], shortopts, args[1:]) + + return opts, args + +def gnu_getopt(args, shortopts, longopts = []): + """getopt(args, options[, long_options]) -> opts, args + + This function works like getopt(), except that GNU style scanning + mode is used by default. This means that option and non-option + arguments may be intermixed. The getopt() function stops + processing options as soon as a non-option argument is + encountered. + + If the first character of the option string is `+', or if the + environment variable POSIXLY_CORRECT is set, then option + processing stops as soon as a non-option argument is encountered. + + """ + + opts = [] + prog_args = [] + if isinstance(longopts, str): + longopts = [longopts] + else: + longopts = list(longopts) + + # Allow options after non-option arguments? + if shortopts.startswith('+'): + shortopts = shortopts[1:] + all_options_first = True + elif os.environ.get("POSIXLY_CORRECT"): + all_options_first = True + else: + all_options_first = False + + while args: + if args[0] == '--': + prog_args += args[1:] + break + + if args[0][:2] == '--': + opts, args = do_longs(opts, args[0][2:], longopts, args[1:]) + elif args[0][:1] == '-' and args[0] != '-': + opts, args = do_shorts(opts, args[0][1:], shortopts, args[1:]) + else: + if all_options_first: + prog_args += args + break + else: + prog_args.append(args[0]) + args = args[1:] + + return opts, prog_args + +def do_longs(opts, opt, longopts, args): + try: + i = opt.index('=') + except ValueError: + optarg = None + else: + opt, optarg = opt[:i], opt[i+1:] + + has_arg, opt = long_has_args(opt, longopts) + if has_arg: + if optarg is None: + if not args: + raise GetoptError(_('option --%s requires argument') % opt, opt) + optarg, args = args[0], args[1:] + elif optarg is not None: + raise GetoptError(_('option --%s must not have an argument') % opt, opt) + opts.append(('--' + opt, optarg or '')) + return opts, args + +# Return: +# has_arg? +# full option name +def long_has_args(opt, longopts): + possibilities = [o for o in longopts if o.startswith(opt)] + if not possibilities: + raise GetoptError(_('option --%s not recognized') % opt, opt) + # Is there an exact match? + if opt in possibilities: + return False, opt + elif opt + '=' in possibilities: + return True, opt + # No exact match, so better be unique. + if len(possibilities) > 1: + # XXX since possibilities contains all valid continuations, might be + # nice to work them into the error msg + raise GetoptError(_('option --%s not a unique prefix') % opt, opt) + assert len(possibilities) == 1 + unique_match = possibilities[0] + has_arg = unique_match.endswith('=') + if has_arg: + unique_match = unique_match[:-1] + return has_arg, unique_match + +def do_shorts(opts, optstring, shortopts, args): + while optstring != '': + opt, optstring = optstring[0], optstring[1:] + if short_has_arg(opt, shortopts): + if optstring == '': + if not args: + raise GetoptError(_('option -%s requires argument') % opt, + opt) + optstring, args = args[0], args[1:] + optarg, optstring = optstring, '' + else: + optarg = '' + opts.append(('-' + opt, optarg)) + return opts, args + +def short_has_arg(opt, shortopts): + for i in range(len(shortopts)): + if opt == shortopts[i] != ':': + return shortopts.startswith(':', i+1) + raise GetoptError(_('option -%s not recognized') % opt, opt) + +if __name__ == '__main__': + import sys + print(getopt(sys.argv[1:], "a:b", ["alpha=", "beta"])) diff --git a/tests/bytecode/pylib-tests/hashlib.py b/tests/bytecode/pylib-tests/hashlib.py new file mode 100644 index 0000000000..21454c7d30 --- /dev/null +++ b/tests/bytecode/pylib-tests/hashlib.py @@ -0,0 +1,148 @@ +# Copyright (C) 2005-2010 Gregory P. Smith (greg@krypto.org) +# Licensed to PSF under a Contributor Agreement. +# + +__doc__ = """hashlib module - A common interface to many hash functions. + +new(name, data=b'') - returns a new hash object implementing the + given hash function; initializing the hash + using the given binary data. + +Named constructor functions are also available, these are faster +than using new(name): + +md5(), sha1(), sha224(), sha256(), sha384(), and sha512() + +More algorithms may be available on your platform but the above are guaranteed +to exist. See the algorithms_guaranteed and algorithms_available attributes +to find out what algorithm names can be passed to new(). + +NOTE: If you want the adler32 or crc32 hash functions they are available in +the zlib module. + +Choose your hash function wisely. Some have known collision weaknesses. +sha384 and sha512 will be slow on 32 bit platforms. + +Hash objects have these methods: + - update(arg): Update the hash object with the bytes in arg. Repeated calls + are equivalent to a single call with the concatenation of all + the arguments. + - digest(): Return the digest of the bytes passed to the update() method + so far. + - hexdigest(): Like digest() except the digest is returned as a unicode + object of double length, containing only hexadecimal digits. + - copy(): Return a copy (clone) of the hash object. This can be used to + efficiently compute the digests of strings that share a common + initial substring. + +For example, to obtain the digest of the string 'Nobody inspects the +spammish repetition': + + >>> import hashlib + >>> m = hashlib.md5() + >>> m.update(b"Nobody inspects") + >>> m.update(b" the spammish repetition") + >>> m.digest() + b'\\xbbd\\x9c\\x83\\xdd\\x1e\\xa5\\xc9\\xd9\\xde\\xc9\\xa1\\x8d\\xf0\\xff\\xe9' + +More condensed: + + >>> hashlib.sha224(b"Nobody inspects the spammish repetition").hexdigest() + 'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2' + +""" + +# This tuple and __get_builtin_constructor() must be modified if a new +# always available algorithm is added. +__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512') + +algorithms_guaranteed = set(__always_supported) +algorithms_available = set(__always_supported) + +__all__ = __always_supported + ('new', 'algorithms_guaranteed', + 'algorithms_available') + + +def __get_builtin_constructor(name): + try: + if name in ('SHA1', 'sha1'): + import _sha1 + return _sha1.sha1 + elif name in ('MD5', 'md5'): + import _md5 + return _md5.md5 + elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'): + import _sha256 + bs = name[3:] + if bs == '256': + return _sha256.sha256 + elif bs == '224': + return _sha256.sha224 + elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'): + import _sha512 + bs = name[3:] + if bs == '512': + return _sha512.sha512 + elif bs == '384': + return _sha512.sha384 + except ImportError: + pass # no extension module, this hash is unsupported. + + raise ValueError('unsupported hash type ' + name) + + +def __get_openssl_constructor(name): + try: + f = getattr(_hashlib, 'openssl_' + name) + # Allow the C module to raise ValueError. The function will be + # defined but the hash not actually available thanks to OpenSSL. + f() + # Use the C function directly (very fast) + return f + except (AttributeError, ValueError): + return __get_builtin_constructor(name) + + +def __py_new(name, data=b''): + """new(name, data=b'') - Return a new hashing object using the named algorithm; + optionally initialized with data (which must be bytes). + """ + return __get_builtin_constructor(name)(data) + + +def __hash_new(name, data=b''): + """new(name, data=b'') - Return a new hashing object using the named algorithm; + optionally initialized with data (which must be bytes). + """ + try: + return _hashlib.new(name, data) + except ValueError: + # If the _hashlib module (OpenSSL) doesn't support the named + # hash, try using our builtin implementations. + # This allows for SHA224/256 and SHA384/512 support even though + # the OpenSSL library prior to 0.9.8 doesn't provide them. + return __get_builtin_constructor(name)(data) + + +try: + import _hashlib + new = __hash_new + __get_hash = __get_openssl_constructor + algorithms_available = algorithms_available.union( + _hashlib.openssl_md_meth_names) +except ImportError: + new = __py_new + __get_hash = __get_builtin_constructor + +for __func_name in __always_supported: + # try them all, some may not work due to the OpenSSL + # version not supporting that algorithm. + try: + globals()[__func_name] = __get_hash(__func_name) + except ValueError: + import logging + logging.exception('code for hash %s was not found.', __func_name) + +# Cleanup locals() +del __always_supported, __func_name, __get_hash +del __py_new, __hash_new, __get_openssl_constructor diff --git a/tests/bytecode/pylib-tests/heapq.py b/tests/bytecode/pylib-tests/heapq.py new file mode 100644 index 0000000000..00b429c2d3 --- /dev/null +++ b/tests/bytecode/pylib-tests/heapq.py @@ -0,0 +1,472 @@ +"""Heap queue algorithm (a.k.a. priority queue). + +Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for +all k, counting elements from 0. For the sake of comparison, +non-existing elements are considered to be infinite. The interesting +property of a heap is that a[0] is always its smallest element. + +Usage: + +heap = [] # creates an empty heap +heappush(heap, item) # pushes a new item on the heap +item = heappop(heap) # pops the smallest item from the heap +item = heap[0] # smallest item on the heap without popping it +heapify(x) # transforms list into a heap, in-place, in linear time +item = heapreplace(heap, item) # pops and returns smallest item, and adds + # new item; the heap size is unchanged + +Our API differs from textbook heap algorithms as follows: + +- We use 0-based indexing. This makes the relationship between the + index for a node and the indexes for its children slightly less + obvious, but is more suitable since Python uses 0-based indexing. + +- Our heappop() method returns the smallest item, not the largest. + +These two make it possible to view the heap as a regular Python list +without surprises: heap[0] is the smallest item, and heap.sort() +maintains the heap invariant! +""" + +# Original code by Kevin O'Connor, augmented by Tim Peters and Raymond Hettinger + +__about__ = """Heap queues + +[explanation by François Pinard] + +Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for +all k, counting elements from 0. For the sake of comparison, +non-existing elements are considered to be infinite. The interesting +property of a heap is that a[0] is always its smallest element. + +The strange invariant above is meant to be an efficient memory +representation for a tournament. The numbers below are `k', not a[k]: + + 0 + + 1 2 + + 3 4 5 6 + + 7 8 9 10 11 12 13 14 + + 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 + + +In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In +an usual binary tournament we see in sports, each cell is the winner +over the two cells it tops, and we can trace the winner down the tree +to see all opponents s/he had. However, in many computer applications +of such tournaments, we do not need to trace the history of a winner. +To be more memory efficient, when a winner is promoted, we try to +replace it by something else at a lower level, and the rule becomes +that a cell and the two cells it tops contain three different items, +but the top cell "wins" over the two topped cells. + +If this heap invariant is protected at all time, index 0 is clearly +the overall winner. The simplest algorithmic way to remove it and +find the "next" winner is to move some loser (let's say cell 30 in the +diagram above) into the 0 position, and then percolate this new 0 down +the tree, exchanging values, until the invariant is re-established. +This is clearly logarithmic on the total number of items in the tree. +By iterating over all items, you get an O(n ln n) sort. + +A nice feature of this sort is that you can efficiently insert new +items while the sort is going on, provided that the inserted items are +not "better" than the last 0'th element you extracted. This is +especially useful in simulation contexts, where the tree holds all +incoming events, and the "win" condition means the smallest scheduled +time. When an event schedule other events for execution, they are +scheduled into the future, so they can easily go into the heap. So, a +heap is a good structure for implementing schedulers (this is what I +used for my MIDI sequencer :-). + +Various structures for implementing schedulers have been extensively +studied, and heaps are good for this, as they are reasonably speedy, +the speed is almost constant, and the worst case is not much different +than the average case. However, there are other representations which +are more efficient overall, yet the worst cases might be terrible. + +Heaps are also very useful in big disk sorts. You most probably all +know that a big sort implies producing "runs" (which are pre-sorted +sequences, which size is usually related to the amount of CPU memory), +followed by a merging passes for these runs, which merging is often +very cleverly organised[1]. It is very important that the initial +sort produces the longest runs possible. Tournaments are a good way +to that. If, using all the memory available to hold a tournament, you +replace and percolate items that happen to fit the current run, you'll +produce runs which are twice the size of the memory for random input, +and much better for input fuzzily ordered. + +Moreover, if you output the 0'th item on disk and get an input which +may not fit in the current tournament (because the value "wins" over +the last output value), it cannot fit in the heap, so the size of the +heap decreases. The freed memory could be cleverly reused immediately +for progressively building a second heap, which grows at exactly the +same rate the first heap is melting. When the first heap completely +vanishes, you switch heaps and start a new run. Clever and quite +effective! + +In a word, heaps are useful memory structures to know. I use them in +a few applications, and I think it is good to keep a `heap' module +around. :-) + +-------------------- +[1] The disk balancing algorithms which are current, nowadays, are +more annoying than clever, and this is a consequence of the seeking +capabilities of the disks. On devices which cannot seek, like big +tape drives, the story was quite different, and one had to be very +clever to ensure (far in advance) that each tape movement will be the +most effective possible (that is, will best participate at +"progressing" the merge). Some tapes were even able to read +backwards, and this was also used to avoid the rewinding time. +Believe me, real good tape sorts were quite spectacular to watch! +From all times, sorting has always been a Great Art! :-) +""" + +__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge', + 'nlargest', 'nsmallest', 'heappushpop'] + +from itertools import islice, count, tee, chain + +def heappush(heap, item): + """Push item onto heap, maintaining the heap invariant.""" + heap.append(item) + _siftdown(heap, 0, len(heap)-1) + +def heappop(heap): + """Pop the smallest item off the heap, maintaining the heap invariant.""" + lastelt = heap.pop() # raises appropriate IndexError if heap is empty + if heap: + returnitem = heap[0] + heap[0] = lastelt + _siftup(heap, 0) + else: + returnitem = lastelt + return returnitem + +def heapreplace(heap, item): + """Pop and return the current smallest value, and add the new item. + + This is more efficient than heappop() followed by heappush(), and can be + more appropriate when using a fixed-size heap. Note that the value + returned may be larger than item! That constrains reasonable uses of + this routine unless written as part of a conditional replacement: + + if item > heap[0]: + item = heapreplace(heap, item) + """ + returnitem = heap[0] # raises appropriate IndexError if heap is empty + heap[0] = item + _siftup(heap, 0) + return returnitem + +def heappushpop(heap, item): + """Fast version of a heappush followed by a heappop.""" + if heap and heap[0] < item: + item, heap[0] = heap[0], item + _siftup(heap, 0) + return item + +def heapify(x): + """Transform list into a heap, in-place, in O(len(x)) time.""" + n = len(x) + # Transform bottom-up. The largest index there's any point to looking at + # is the largest with a child index in-range, so must have 2*i + 1 < n, + # or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so + # j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is + # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1. + for i in reversed(range(n//2)): + _siftup(x, i) + +def _heappushpop_max(heap, item): + """Maxheap version of a heappush followed by a heappop.""" + if heap and item < heap[0]: + item, heap[0] = heap[0], item + _siftup_max(heap, 0) + return item + +def _heapify_max(x): + """Transform list into a maxheap, in-place, in O(len(x)) time.""" + n = len(x) + for i in reversed(range(n//2)): + _siftup_max(x, i) + +def nlargest(n, iterable): + """Find the n largest elements in a dataset. + + Equivalent to: sorted(iterable, reverse=True)[:n] + """ + if n < 0: + return [] + it = iter(iterable) + result = list(islice(it, n)) + if not result: + return result + heapify(result) + _heappushpop = heappushpop + for elem in it: + _heappushpop(result, elem) + result.sort(reverse=True) + return result + +def nsmallest(n, iterable): + """Find the n smallest elements in a dataset. + + Equivalent to: sorted(iterable)[:n] + """ + if n < 0: + return [] + it = iter(iterable) + result = list(islice(it, n)) + if not result: + return result + _heapify_max(result) + _heappushpop = _heappushpop_max + for elem in it: + _heappushpop(result, elem) + result.sort() + return result + +# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos +# is the index of a leaf with a possibly out-of-order value. Restore the +# heap invariant. +def _siftdown(heap, startpos, pos): + newitem = heap[pos] + # Follow the path to the root, moving parents down until finding a place + # newitem fits. + while pos > startpos: + parentpos = (pos - 1) >> 1 + parent = heap[parentpos] + if newitem < parent: + heap[pos] = parent + pos = parentpos + continue + break + heap[pos] = newitem + +# The child indices of heap index pos are already heaps, and we want to make +# a heap at index pos too. We do this by bubbling the smaller child of +# pos up (and so on with that child's children, etc) until hitting a leaf, +# then using _siftdown to move the oddball originally at index pos into place. +# +# We *could* break out of the loop as soon as we find a pos where newitem <= +# both its children, but turns out that's not a good idea, and despite that +# many books write the algorithm that way. During a heap pop, the last array +# element is sifted in, and that tends to be large, so that comparing it +# against values starting from the root usually doesn't pay (= usually doesn't +# get us out of the loop early). See Knuth, Volume 3, where this is +# explained and quantified in an exercise. +# +# Cutting the # of comparisons is important, since these routines have no +# way to extract "the priority" from an array element, so that intelligence +# is likely to be hiding in custom comparison methods, or in array elements +# storing (priority, record) tuples. Comparisons are thus potentially +# expensive. +# +# On random arrays of length 1000, making this change cut the number of +# comparisons made by heapify() a little, and those made by exhaustive +# heappop() a lot, in accord with theory. Here are typical results from 3 +# runs (3 just to demonstrate how small the variance is): +# +# Compares needed by heapify Compares needed by 1000 heappops +# -------------------------- -------------------------------- +# 1837 cut to 1663 14996 cut to 8680 +# 1855 cut to 1659 14966 cut to 8678 +# 1847 cut to 1660 15024 cut to 8703 +# +# Building the heap by using heappush() 1000 times instead required +# 2198, 2148, and 2219 compares: heapify() is more efficient, when +# you can use it. +# +# The total compares needed by list.sort() on the same lists were 8627, +# 8627, and 8632 (this should be compared to the sum of heapify() and +# heappop() compares): list.sort() is (unsurprisingly!) more efficient +# for sorting. + +def _siftup(heap, pos): + endpos = len(heap) + startpos = pos + newitem = heap[pos] + # Bubble up the smaller child until hitting a leaf. + childpos = 2*pos + 1 # leftmost child position + while childpos < endpos: + # Set childpos to index of smaller child. + rightpos = childpos + 1 + if rightpos < endpos and not heap[childpos] < heap[rightpos]: + childpos = rightpos + # Move the smaller child up. + heap[pos] = heap[childpos] + pos = childpos + childpos = 2*pos + 1 + # The leaf at pos is empty now. Put newitem there, and bubble it up + # to its final resting place (by sifting its parents down). + heap[pos] = newitem + _siftdown(heap, startpos, pos) + +def _siftdown_max(heap, startpos, pos): + 'Maxheap variant of _siftdown' + newitem = heap[pos] + # Follow the path to the root, moving parents down until finding a place + # newitem fits. + while pos > startpos: + parentpos = (pos - 1) >> 1 + parent = heap[parentpos] + if parent < newitem: + heap[pos] = parent + pos = parentpos + continue + break + heap[pos] = newitem + +def _siftup_max(heap, pos): + 'Maxheap variant of _siftup' + endpos = len(heap) + startpos = pos + newitem = heap[pos] + # Bubble up the larger child until hitting a leaf. + childpos = 2*pos + 1 # leftmost child position + while childpos < endpos: + # Set childpos to index of larger child. + rightpos = childpos + 1 + if rightpos < endpos and not heap[rightpos] < heap[childpos]: + childpos = rightpos + # Move the larger child up. + heap[pos] = heap[childpos] + pos = childpos + childpos = 2*pos + 1 + # The leaf at pos is empty now. Put newitem there, and bubble it up + # to its final resting place (by sifting its parents down). + heap[pos] = newitem + _siftdown_max(heap, startpos, pos) + +# If available, use C implementation +try: + from _heapq import * +except ImportError: + pass + +def merge(*iterables): + '''Merge multiple sorted inputs into a single sorted output. + + Similar to sorted(itertools.chain(*iterables)) but returns a generator, + does not pull the data into memory all at once, and assumes that each of + the input streams is already sorted (smallest to largest). + + >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25])) + [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25] + + ''' + _heappop, _heapreplace, _StopIteration = heappop, heapreplace, StopIteration + + h = [] + h_append = h.append + for itnum, it in enumerate(map(iter, iterables)): + try: + next = it.__next__ + h_append([next(), itnum, next]) + except _StopIteration: + pass + heapify(h) + + while 1: + try: + while 1: + v, itnum, next = s = h[0] # raises IndexError when h is empty + yield v + s[0] = next() # raises StopIteration when exhausted + _heapreplace(h, s) # restore heap condition + except _StopIteration: + _heappop(h) # remove empty iterator + except IndexError: + return + +# Extend the implementations of nsmallest and nlargest to use a key= argument +_nsmallest = nsmallest +def nsmallest(n, iterable, key=None): + """Find the n smallest elements in a dataset. + + Equivalent to: sorted(iterable, key=key)[:n] + """ + # Short-cut for n==1 is to use min() when len(iterable)>0 + if n == 1: + it = iter(iterable) + head = list(islice(it, 1)) + if not head: + return [] + if key is None: + return [min(chain(head, it))] + return [min(chain(head, it), key=key)] + + # When n>=size, it's faster to use sorted() + try: + size = len(iterable) + except (TypeError, AttributeError): + pass + else: + if n >= size: + return sorted(iterable, key=key)[:n] + + # When key is none, use simpler decoration + if key is None: + it = zip(iterable, count()) # decorate + result = _nsmallest(n, it) + return [r[0] for r in result] # undecorate + + # General case, slowest method + in1, in2 = tee(iterable) + it = zip(map(key, in1), count(), in2) # decorate + result = _nsmallest(n, it) + return [r[2] for r in result] # undecorate + +_nlargest = nlargest +def nlargest(n, iterable, key=None): + """Find the n largest elements in a dataset. + + Equivalent to: sorted(iterable, key=key, reverse=True)[:n] + """ + + # Short-cut for n==1 is to use max() when len(iterable)>0 + if n == 1: + it = iter(iterable) + head = list(islice(it, 1)) + if not head: + return [] + if key is None: + return [max(chain(head, it))] + return [max(chain(head, it), key=key)] + + # When n>=size, it's faster to use sorted() + try: + size = len(iterable) + except (TypeError, AttributeError): + pass + else: + if n >= size: + return sorted(iterable, key=key, reverse=True)[:n] + + # When key is none, use simpler decoration + if key is None: + it = zip(iterable, count(0,-1)) # decorate + result = _nlargest(n, it) + return [r[0] for r in result] # undecorate + + # General case, slowest method + in1, in2 = tee(iterable) + it = zip(map(key, in1), count(0,-1), in2) # decorate + result = _nlargest(n, it) + return [r[2] for r in result] # undecorate + +if __name__ == "__main__": + # Simple sanity test + heap = [] + data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0] + for item in data: + heappush(heap, item) + sort = [] + while heap: + sort.append(heappop(heap)) + print(sort) + + import doctest + doctest.testmod() diff --git a/tests/bytecode/pylib-tests/keyword.py b/tests/bytecode/pylib-tests/keyword.py new file mode 100644 index 0000000000..dad39cc377 --- /dev/null +++ b/tests/bytecode/pylib-tests/keyword.py @@ -0,0 +1,93 @@ +#! /usr/bin/env python3 + +"""Keywords (from "graminit.c") + +This file is automatically generated; please don't muck it up! + +To update the symbols in this file, 'cd' to the top directory of +the python source tree after building the interpreter and run: + + ./python Lib/keyword.py +""" + +__all__ = ["iskeyword", "kwlist"] + +kwlist = [ +#--start keywords-- + 'False', + 'None', + 'True', + 'and', + 'as', + 'assert', + 'break', + 'class', + 'continue', + 'def', + 'del', + 'elif', + 'else', + 'except', + 'finally', + 'for', + 'from', + 'global', + 'if', + 'import', + 'in', + 'is', + 'lambda', + 'nonlocal', + 'not', + 'or', + 'pass', + 'raise', + 'return', + 'try', + 'while', + 'with', + 'yield', +#--end keywords-- + ] + +iskeyword = frozenset(kwlist).__contains__ + +def main(): + import sys, re + + args = sys.argv[1:] + iptfile = args and args[0] or "Python/graminit.c" + if len(args) > 1: optfile = args[1] + else: optfile = "Lib/keyword.py" + + # scan the source file for keywords + with open(iptfile) as fp: + strprog = re.compile('"([^"]+)"') + lines = [] + for line in fp: + if '{1, "' in line: + match = strprog.search(line) + if match: + lines.append(" '" + match.group(1) + "',\n") + lines.sort() + + # load the output skeleton from the target + with open(optfile) as fp: + format = fp.readlines() + + # insert the lines of keywords + try: + start = format.index("#--start keywords--\n") + 1 + end = format.index("#--end keywords--\n") + format[start:end] = lines + except ValueError: + sys.stderr.write("target does not contain format markers\n") + sys.exit(1) + + # write the output file + fp = open(optfile, 'w') + fp.write(''.join(format)) + fp.close() + +if __name__ == "__main__": + main() diff --git a/tests/bytecode/pylib-tests/macurl2path.py b/tests/bytecode/pylib-tests/macurl2path.py new file mode 100644 index 0000000000..f22fb207b8 --- /dev/null +++ b/tests/bytecode/pylib-tests/macurl2path.py @@ -0,0 +1,97 @@ +"""Macintosh-specific module for conversion between pathnames and URLs. + +Do not import directly; use urllib instead.""" + +import urllib.parse +import os + +__all__ = ["url2pathname","pathname2url"] + +def url2pathname(pathname): + """OS-specific conversion from a relative URL of the 'file' scheme + to a file system path; not recommended for general use.""" + # + # XXXX The .. handling should be fixed... + # + tp = urllib.parse.splittype(pathname)[0] + if tp and tp != 'file': + raise RuntimeError('Cannot convert non-local URL to pathname') + # Turn starting /// into /, an empty hostname means current host + if pathname[:3] == '///': + pathname = pathname[2:] + elif pathname[:2] == '//': + raise RuntimeError('Cannot convert non-local URL to pathname') + components = pathname.split('/') + # Remove . and embedded .. + i = 0 + while i < len(components): + if components[i] == '.': + del components[i] + elif components[i] == '..' and i > 0 and \ + components[i-1] not in ('', '..'): + del components[i-1:i+1] + i = i-1 + elif components[i] == '' and i > 0 and components[i-1] != '': + del components[i] + else: + i = i+1 + if not components[0]: + # Absolute unix path, don't start with colon + rv = ':'.join(components[1:]) + else: + # relative unix path, start with colon. First replace + # leading .. by empty strings (giving ::file) + i = 0 + while i < len(components) and components[i] == '..': + components[i] = '' + i = i + 1 + rv = ':' + ':'.join(components) + # and finally unquote slashes and other funny characters + return urllib.parse.unquote(rv) + +def pathname2url(pathname): + """OS-specific conversion from a file system path to a relative URL + of the 'file' scheme; not recommended for general use.""" + if '/' in pathname: + raise RuntimeError("Cannot convert pathname containing slashes") + components = pathname.split(':') + # Remove empty first and/or last component + if components[0] == '': + del components[0] + if components[-1] == '': + del components[-1] + # Replace empty string ('::') by .. (will result in '/../' later) + for i in range(len(components)): + if components[i] == '': + components[i] = '..' + # Truncate names longer than 31 bytes + components = map(_pncomp2url, components) + + if os.path.isabs(pathname): + return '/' + '/'.join(components) + else: + return '/'.join(components) + +def _pncomp2url(component): + # We want to quote slashes + return urllib.parse.quote(component[:31], safe='') + +def test(): + for url in ["index.html", + "bar/index.html", + "/foo/bar/index.html", + "/foo/bar/", + "/"]: + print('%r -> %r' % (url, url2pathname(url))) + for path in ["drive:", + "drive:dir:", + "drive:dir:file", + "drive:file", + "file", + ":file", + ":dir:", + ":dir:file"]: + print('%r -> %r' % (path, pathname2url(path))) + +if __name__ == '__main__': + test() diff --git a/tests/bytecode/pylib-tests/mimetypes.py b/tests/bytecode/pylib-tests/mimetypes.py new file mode 100644 index 0000000000..2872ee4245 --- /dev/null +++ b/tests/bytecode/pylib-tests/mimetypes.py @@ -0,0 +1,589 @@ +"""Guess the MIME type of a file. + +This module defines two useful functions: + +guess_type(url, strict=True) -- guess the MIME type and encoding of a URL. + +guess_extension(type, strict=True) -- guess the extension for a given MIME type. + +It also contains the following, for tuning the behavior: + +Data: + +knownfiles -- list of files to parse +inited -- flag set when init() has been called +suffix_map -- dictionary mapping suffixes to suffixes +encodings_map -- dictionary mapping suffixes to encodings +types_map -- dictionary mapping suffixes to types + +Functions: + +init([files]) -- parse a list of files, default knownfiles (on Windows, the + default values are taken from the registry) +read_mime_types(file) -- parse one file, return a dictionary or None +""" + +import os +import sys +import posixpath +import urllib.parse +try: + import winreg as _winreg +except ImportError: + _winreg = None + +__all__ = [ + "guess_type","guess_extension","guess_all_extensions", + "add_type","read_mime_types","init" +] + +knownfiles = [ + "/etc/mime.types", + "/etc/httpd/mime.types", # Mac OS X + "/etc/httpd/conf/mime.types", # Apache + "/etc/apache/mime.types", # Apache 1 + "/etc/apache2/mime.types", # Apache 2 + "/usr/local/etc/httpd/conf/mime.types", + "/usr/local/lib/netscape/mime.types", + "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 + "/usr/local/etc/mime.types", # Apache 1.3 + ] + +inited = False +_db = None + + +class MimeTypes: + """MIME-types datastore. + + This datastore can handle information from mime.types-style files + and supports basic determination of MIME type from a filename or + URL, and can guess a reasonable extension given a MIME type. + """ + + def __init__(self, filenames=(), strict=True): + if not inited: + init() + self.encodings_map = encodings_map.copy() + self.suffix_map = suffix_map.copy() + self.types_map = ({}, {}) # dict for (non-strict, strict) + self.types_map_inv = ({}, {}) + for (ext, type) in types_map.items(): + self.add_type(type, ext, True) + for (ext, type) in common_types.items(): + self.add_type(type, ext, False) + for name in filenames: + self.read(name, strict) + + def add_type(self, type, ext, strict=True): + """Add a mapping between a type and an extension. + + When the extension is already known, the new + type will replace the old one. When the type + is already known the extension will be added + to the list of known extensions. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + self.types_map[strict][ext] = type + exts = self.types_map_inv[strict].setdefault(type, []) + if ext not in exts: + exts.append(ext) + + def guess_type(self, url, strict=True): + """Guess the type of a file based on its URL. + + Return value is a tuple (type, encoding) where type is None if + the type can't be guessed (no or unknown suffix) or a string + of the form type/subtype, usable for a MIME Content-type + header; and encoding is None for no encoding or the name of + the program used to encode (e.g. compress or gzip). The + mappings are table driven. Encoding suffixes are case + sensitive; type suffixes are first tried case sensitive, then + case insensitive. + + The suffixes .tgz, .taz and .tz (case sensitive!) are all + mapped to '.tar.gz'. (This is table-driven too, using the + dictionary suffix_map.) + + Optional `strict' argument when False adds a bunch of commonly found, + but non-standard types. + """ + scheme, url = urllib.parse.splittype(url) + if scheme == 'data': + # syntax of data URLs: + # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data + # mediatype := [ type "/" subtype ] *( ";" parameter ) + # data := *urlchar + # parameter := attribute "=" value + # type/subtype defaults to "text/plain" + comma = url.find(',') + if comma < 0: + # bad data URL + return None, None + semi = url.find(';', 0, comma) + if semi >= 0: + type = url[:semi] + else: + type = url[:comma] + if '=' in type or '/' not in type: + type = 'text/plain' + return type, None # never compressed, so encoding is None + base, ext = posixpath.splitext(url) + while ext in self.suffix_map: + base, ext = posixpath.splitext(base + self.suffix_map[ext]) + if ext in self.encodings_map: + encoding = self.encodings_map[ext] + base, ext = posixpath.splitext(base) + else: + encoding = None + types_map = self.types_map[True] + if ext in types_map: + return types_map[ext], encoding + elif ext.lower() in types_map: + return types_map[ext.lower()], encoding + elif strict: + return None, encoding + types_map = self.types_map[False] + if ext in types_map: + return types_map[ext], encoding + elif ext.lower() in types_map: + return types_map[ext.lower()], encoding + else: + return None, encoding + + def guess_all_extensions(self, type, strict=True): + """Guess the extensions for a file based on its MIME type. + + Return value is a list of strings giving the possible filename + extensions, including the leading dot ('.'). The extension is not + guaranteed to have been associated with any particular data stream, + but would be mapped to the MIME type `type' by guess_type(). + + Optional `strict' argument when false adds a bunch of commonly found, + but non-standard types. + """ + type = type.lower() + extensions = self.types_map_inv[True].get(type, []) + if not strict: + for ext in self.types_map_inv[False].get(type, []): + if ext not in extensions: + extensions.append(ext) + return extensions + + def guess_extension(self, type, strict=True): + """Guess the extension for a file based on its MIME type. + + Return value is a string giving a filename extension, + including the leading dot ('.'). The extension is not + guaranteed to have been associated with any particular data + stream, but would be mapped to the MIME type `type' by + guess_type(). If no extension can be guessed for `type', None + is returned. + + Optional `strict' argument when false adds a bunch of commonly found, + but non-standard types. + """ + extensions = self.guess_all_extensions(type, strict) + if not extensions: + return None + return extensions[0] + + def read(self, filename, strict=True): + """ + Read a single mime.types-format file, specified by pathname. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + with open(filename, encoding='utf-8') as fp: + self.readfp(fp, strict) + + def readfp(self, fp, strict=True): + """ + Read a single mime.types-format file. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + while 1: + line = fp.readline() + if not line: + break + words = line.split() + for i in range(len(words)): + if words[i][0] == '#': + del words[i:] + break + if not words: + continue + type, suffixes = words[0], words[1:] + for suff in suffixes: + self.add_type(type, '.' + suff, strict) + + def read_windows_registry(self, strict=True): + """ + Load the MIME types database from Windows registry. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + + # Windows only + if not _winreg: + return + + def enum_types(mimedb): + i = 0 + while True: + try: + ctype = _winreg.EnumKey(mimedb, i) + except EnvironmentError: + break + else: + yield ctype + i += 1 + + with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, + r'MIME\Database\Content Type') as mimedb: + for ctype in enum_types(mimedb): + try: + with _winreg.OpenKey(mimedb, ctype) as key: + suffix, datatype = _winreg.QueryValueEx(key, + 'Extension') + except EnvironmentError: + continue + if datatype != _winreg.REG_SZ: + continue + self.add_type(ctype, suffix, strict) + + +def guess_type(url, strict=True): + """Guess the type of a file based on its URL. + + Return value is a tuple (type, encoding) where type is None if the + type can't be guessed (no or unknown suffix) or a string of the + form type/subtype, usable for a MIME Content-type header; and + encoding is None for no encoding or the name of the program used + to encode (e.g. compress or gzip). The mappings are table + driven. Encoding suffixes are case sensitive; type suffixes are + first tried case sensitive, then case insensitive. + + The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped + to ".tar.gz". (This is table-driven too, using the dictionary + suffix_map). + + Optional `strict' argument when false adds a bunch of commonly found, but + non-standard types. + """ + if _db is None: + init() + return _db.guess_type(url, strict) + + +def guess_all_extensions(type, strict=True): + """Guess the extensions for a file based on its MIME type. + + Return value is a list of strings giving the possible filename + extensions, including the leading dot ('.'). The extension is not + guaranteed to have been associated with any particular data + stream, but would be mapped to the MIME type `type' by + guess_type(). If no extension can be guessed for `type', None + is returned. + + Optional `strict' argument when false adds a bunch of commonly found, + but non-standard types. + """ + if _db is None: + init() + return _db.guess_all_extensions(type, strict) + +def guess_extension(type, strict=True): + """Guess the extension for a file based on its MIME type. + + Return value is a string giving a filename extension, including the + leading dot ('.'). The extension is not guaranteed to have been + associated with any particular data stream, but would be mapped to the + MIME type `type' by guess_type(). If no extension can be guessed for + `type', None is returned. + + Optional `strict' argument when false adds a bunch of commonly found, + but non-standard types. + """ + if _db is None: + init() + return _db.guess_extension(type, strict) + +def add_type(type, ext, strict=True): + """Add a mapping between a type and an extension. + + When the extension is already known, the new + type will replace the old one. When the type + is already known the extension will be added + to the list of known extensions. + + If strict is true, information will be added to + list of standard types, else to the list of non-standard + types. + """ + if _db is None: + init() + return _db.add_type(type, ext, strict) + + +def init(files=None): + global suffix_map, types_map, encodings_map, common_types + global inited, _db + inited = True # so that MimeTypes.__init__() doesn't call us again + db = MimeTypes() + if files is None: + if _winreg: + db.read_windows_registry() + files = knownfiles + for file in files: + if os.path.isfile(file): + db.read(file) + encodings_map = db.encodings_map + suffix_map = db.suffix_map + types_map = db.types_map[True] + common_types = db.types_map[False] + # Make the DB a global variable now that it is fully initialized + _db = db + + +def read_mime_types(file): + try: + f = open(file) + except IOError: + return None + db = MimeTypes() + db.readfp(f, True) + return db.types_map[True] + + +def _default_mime_types(): + global suffix_map + global encodings_map + global types_map + global common_types + + suffix_map = { + '.svgz': '.svg.gz', + '.tgz': '.tar.gz', + '.taz': '.tar.gz', + '.tz': '.tar.gz', + '.tbz2': '.tar.bz2', + '.txz': '.tar.xz', + } + + encodings_map = { + '.gz': 'gzip', + '.Z': 'compress', + '.bz2': 'bzip2', + '.xz': 'xz', + } + + # Before adding new types, make sure they are either registered with IANA, + # at http://www.iana.org/assignments/media-types + # or extensions, i.e. using the x- prefix + + # If you add to these, please keep them sorted! + types_map = { + '.a' : 'application/octet-stream', + '.ai' : 'application/postscript', + '.aif' : 'audio/x-aiff', + '.aifc' : 'audio/x-aiff', + '.aiff' : 'audio/x-aiff', + '.au' : 'audio/basic', + '.avi' : 'video/x-msvideo', + '.bat' : 'text/plain', + '.bcpio' : 'application/x-bcpio', + '.bin' : 'application/octet-stream', + '.bmp' : 'image/x-ms-bmp', + '.c' : 'text/plain', + # Duplicates :( + '.cdf' : 'application/x-cdf', + '.cdf' : 'application/x-netcdf', + '.cpio' : 'application/x-cpio', + '.csh' : 'application/x-csh', + '.css' : 'text/css', + '.dll' : 'application/octet-stream', + '.doc' : 'application/msword', + '.dot' : 'application/msword', + '.dvi' : 'application/x-dvi', + '.eml' : 'message/rfc822', + '.eps' : 'application/postscript', + '.etx' : 'text/x-setext', + '.exe' : 'application/octet-stream', + '.gif' : 'image/gif', + '.gtar' : 'application/x-gtar', + '.h' : 'text/plain', + '.hdf' : 'application/x-hdf', + '.htm' : 'text/html', + '.html' : 'text/html', + '.ico' : 'image/vnd.microsoft.icon', + '.ief' : 'image/ief', + '.jpe' : 'image/jpeg', + '.jpeg' : 'image/jpeg', + '.jpg' : 'image/jpeg', + '.js' : 'application/javascript', + '.ksh' : 'text/plain', + '.latex' : 'application/x-latex', + '.m1v' : 'video/mpeg', + '.m3u' : 'application/vnd.apple.mpegurl', + '.m3u8' : 'application/vnd.apple.mpegurl', + '.man' : 'application/x-troff-man', + '.me' : 'application/x-troff-me', + '.mht' : 'message/rfc822', + '.mhtml' : 'message/rfc822', + '.mif' : 'application/x-mif', + '.mov' : 'video/quicktime', + '.movie' : 'video/x-sgi-movie', + '.mp2' : 'audio/mpeg', + '.mp3' : 'audio/mpeg', + '.mp4' : 'video/mp4', + '.mpa' : 'video/mpeg', + '.mpe' : 'video/mpeg', + '.mpeg' : 'video/mpeg', + '.mpg' : 'video/mpeg', + '.ms' : 'application/x-troff-ms', + '.nc' : 'application/x-netcdf', + '.nws' : 'message/rfc822', + '.o' : 'application/octet-stream', + '.obj' : 'application/octet-stream', + '.oda' : 'application/oda', + '.p12' : 'application/x-pkcs12', + '.p7c' : 'application/pkcs7-mime', + '.pbm' : 'image/x-portable-bitmap', + '.pdf' : 'application/pdf', + '.pfx' : 'application/x-pkcs12', + '.pgm' : 'image/x-portable-graymap', + '.pl' : 'text/plain', + '.png' : 'image/png', + '.pnm' : 'image/x-portable-anymap', + '.pot' : 'application/vnd.ms-powerpoint', + '.ppa' : 'application/vnd.ms-powerpoint', + '.ppm' : 'image/x-portable-pixmap', + '.pps' : 'application/vnd.ms-powerpoint', + '.ppt' : 'application/vnd.ms-powerpoint', + '.ps' : 'application/postscript', + '.pwz' : 'application/vnd.ms-powerpoint', + '.py' : 'text/x-python', + '.pyc' : 'application/x-python-code', + '.pyo' : 'application/x-python-code', + '.qt' : 'video/quicktime', + '.ra' : 'audio/x-pn-realaudio', + '.ram' : 'application/x-pn-realaudio', + '.ras' : 'image/x-cmu-raster', + '.rdf' : 'application/xml', + '.rgb' : 'image/x-rgb', + '.roff' : 'application/x-troff', + '.rtx' : 'text/richtext', + '.sgm' : 'text/x-sgml', + '.sgml' : 'text/x-sgml', + '.sh' : 'application/x-sh', + '.shar' : 'application/x-shar', + '.snd' : 'audio/basic', + '.so' : 'application/octet-stream', + '.src' : 'application/x-wais-source', + '.sv4cpio': 'application/x-sv4cpio', + '.sv4crc' : 'application/x-sv4crc', + '.svg' : 'image/svg+xml', + '.swf' : 'application/x-shockwave-flash', + '.t' : 'application/x-troff', + '.tar' : 'application/x-tar', + '.tcl' : 'application/x-tcl', + '.tex' : 'application/x-tex', + '.texi' : 'application/x-texinfo', + '.texinfo': 'application/x-texinfo', + '.tif' : 'image/tiff', + '.tiff' : 'image/tiff', + '.tr' : 'application/x-troff', + '.tsv' : 'text/tab-separated-values', + '.txt' : 'text/plain', + '.ustar' : 'application/x-ustar', + '.vcf' : 'text/x-vcard', + '.wav' : 'audio/x-wav', + '.wiz' : 'application/msword', + '.wsdl' : 'application/xml', + '.xbm' : 'image/x-xbitmap', + '.xlb' : 'application/vnd.ms-excel', + # Duplicates :( + '.xls' : 'application/excel', + '.xls' : 'application/vnd.ms-excel', + '.xml' : 'text/xml', + '.xpdl' : 'application/xml', + '.xpm' : 'image/x-xpixmap', + '.xsl' : 'application/xml', + '.xwd' : 'image/x-xwindowdump', + '.zip' : 'application/zip', + } + + # These are non-standard types, commonly found in the wild. They will + # only match if strict=0 flag is given to the API methods. + + # Please sort these too + common_types = { + '.jpg' : 'image/jpg', + '.mid' : 'audio/midi', + '.midi': 'audio/midi', + '.pct' : 'image/pict', + '.pic' : 'image/pict', + '.pict': 'image/pict', + '.rtf' : 'application/rtf', + '.xul' : 'text/xul' + } + + +_default_mime_types() + + +if __name__ == '__main__': + import getopt + + USAGE = """\ +Usage: mimetypes.py [options] type + +Options: + --help / -h -- print this message and exit + --lenient / -l -- additionally search of some common, but non-standard + types. + --extension / -e -- guess extension instead of type + +More than one type argument may be given. +""" + + def usage(code, msg=''): + print(USAGE) + if msg: print(msg) + sys.exit(code) + + try: + opts, args = getopt.getopt(sys.argv[1:], 'hle', + ['help', 'lenient', 'extension']) + except getopt.error as msg: + usage(1, msg) + + strict = 1 + extension = 0 + for opt, arg in opts: + if opt in ('-h', '--help'): + usage(0) + elif opt in ('-l', '--lenient'): + strict = 0 + elif opt in ('-e', '--extension'): + extension = 1 + for gtype in args: + if extension: + guess = guess_extension(gtype, strict) + if not guess: print("I don't know anything about type", gtype) + else: print(guess) + else: + guess, encoding = guess_type(gtype, strict) + if not guess: print("I don't know anything about type", gtype) + else: print('type:', guess, 'encoding:', encoding) diff --git a/tests/bytecode/pylib-tests/modulefinder.py b/tests/bytecode/pylib-tests/modulefinder.py new file mode 100644 index 0000000000..f90a4327e6 --- /dev/null +++ b/tests/bytecode/pylib-tests/modulefinder.py @@ -0,0 +1,663 @@ +"""Find modules used by a script, using introspection.""" + +import dis +import imp +import importlib.machinery +import marshal +import os +import sys +import types +import struct + +# XXX Clean up once str8's cstor matches bytes. +LOAD_CONST = bytes([dis.opname.index('LOAD_CONST')]) +IMPORT_NAME = bytes([dis.opname.index('IMPORT_NAME')]) +STORE_NAME = bytes([dis.opname.index('STORE_NAME')]) +STORE_GLOBAL = bytes([dis.opname.index('STORE_GLOBAL')]) +STORE_OPS = [STORE_NAME, STORE_GLOBAL] +HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT]) + +# Modulefinder does a good job at simulating Python's, but it can not +# handle __path__ modifications packages make at runtime. Therefore there +# is a mechanism whereby you can register extra paths in this map for a +# package, and it will be honored. + +# Note this is a mapping is lists of paths. +packagePathMap = {} + +# A Public interface +def AddPackagePath(packagename, path): + packagePathMap.setdefault(packagename, []).append(path) + +replacePackageMap = {} + +# This ReplacePackage mechanism allows modulefinder to work around +# situations in which a package injects itself under the name +# of another package into sys.modules at runtime by calling +# ReplacePackage("real_package_name", "faked_package_name") +# before running ModuleFinder. + +def ReplacePackage(oldname, newname): + replacePackageMap[oldname] = newname + + +class Module: + + def __init__(self, name, file=None, path=None): + self.__name__ = name + self.__file__ = file + self.__path__ = path + self.__code__ = None + # The set of global names that are assigned to in the module. + # This includes those names imported through starimports of + # Python modules. + self.globalnames = {} + # The set of starimports this module did that could not be + # resolved, ie. a starimport from a non-Python module. + self.starimports = {} + + def __repr__(self): + s = "Module(%r" % (self.__name__,) + if self.__file__ is not None: + s = s + ", %r" % (self.__file__,) + if self.__path__ is not None: + s = s + ", %r" % (self.__path__,) + s = s + ")" + return s + +class ModuleFinder: + + def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]): + if path is None: + path = sys.path + self.path = path + self.modules = {} + self.badmodules = {} + self.debug = debug + self.indent = 0 + self.excludes = excludes + self.replace_paths = replace_paths + self.processed_paths = [] # Used in debugging only + + def msg(self, level, str, *args): + if level <= self.debug: + for i in range(self.indent): + print(" ", end=' ') + print(str, end=' ') + for arg in args: + print(repr(arg), end=' ') + print() + + def msgin(self, *args): + level = args[0] + if level <= self.debug: + self.indent = self.indent + 1 + self.msg(*args) + + def msgout(self, *args): + level = args[0] + if level <= self.debug: + self.indent = self.indent - 1 + self.msg(*args) + + def run_script(self, pathname): + self.msg(2, "run_script", pathname) + with open(pathname) as fp: + stuff = ("", "r", imp.PY_SOURCE) + self.load_module('__main__', fp, pathname, stuff) + + def load_file(self, pathname): + dir, name = os.path.split(pathname) + name, ext = os.path.splitext(name) + with open(pathname) as fp: + stuff = (ext, "r", imp.PY_SOURCE) + self.load_module(name, fp, pathname, stuff) + + def import_hook(self, name, caller=None, fromlist=None, level=-1): + self.msg(3, "import_hook", name, caller, fromlist, level) + parent = self.determine_parent(caller, level=level) + q, tail = self.find_head_package(parent, name) + m = self.load_tail(q, tail) + if not fromlist: + return q + if m.__path__: + self.ensure_fromlist(m, fromlist) + return None + + def determine_parent(self, caller, level=-1): + self.msgin(4, "determine_parent", caller, level) + if not caller or level == 0: + self.msgout(4, "determine_parent -> None") + return None + pname = caller.__name__ + if level >= 1: # relative import + if caller.__path__: + level -= 1 + if level == 0: + parent = self.modules[pname] + assert parent is caller + self.msgout(4, "determine_parent ->", parent) + return parent + if pname.count(".") < level: + raise ImportError("relative importpath too deep") + pname = ".".join(pname.split(".")[:-level]) + parent = self.modules[pname] + self.msgout(4, "determine_parent ->", parent) + return parent + if caller.__path__: + parent = self.modules[pname] + assert caller is parent + self.msgout(4, "determine_parent ->", parent) + return parent + if '.' in pname: + i = pname.rfind('.') + pname = pname[:i] + parent = self.modules[pname] + assert parent.__name__ == pname + self.msgout(4, "determine_parent ->", parent) + return parent + self.msgout(4, "determine_parent -> None") + return None + + def find_head_package(self, parent, name): + self.msgin(4, "find_head_package", parent, name) + if '.' in name: + i = name.find('.') + head = name[:i] + tail = name[i+1:] + else: + head = name + tail = "" + if parent: + qname = "%s.%s" % (parent.__name__, head) + else: + qname = head + q = self.import_module(head, qname, parent) + if q: + self.msgout(4, "find_head_package ->", (q, tail)) + return q, tail + if parent: + qname = head + parent = None + q = self.import_module(head, qname, parent) + if q: + self.msgout(4, "find_head_package ->", (q, tail)) + return q, tail + self.msgout(4, "raise ImportError: No module named", qname) + raise ImportError("No module named " + qname) + + def load_tail(self, q, tail): + self.msgin(4, "load_tail", q, tail) + m = q + while tail: + i = tail.find('.') + if i < 0: i = len(tail) + head, tail = tail[:i], tail[i+1:] + mname = "%s.%s" % (m.__name__, head) + m = self.import_module(head, mname, m) + if not m: + self.msgout(4, "raise ImportError: No module named", mname) + raise ImportError("No module named " + mname) + self.msgout(4, "load_tail ->", m) + return m + + def ensure_fromlist(self, m, fromlist, recursive=0): + self.msg(4, "ensure_fromlist", m, fromlist, recursive) + for sub in fromlist: + if sub == "*": + if not recursive: + all = self.find_all_submodules(m) + if all: + self.ensure_fromlist(m, all, 1) + elif not hasattr(m, sub): + subname = "%s.%s" % (m.__name__, sub) + submod = self.import_module(sub, subname, m) + if not submod: + raise ImportError("No module named " + subname) + + def find_all_submodules(self, m): + if not m.__path__: + return + modules = {} + # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"]. + # But we must also collect Python extension modules - although + # we cannot separate normal dlls from Python extensions. + suffixes = [] + suffixes += importlib.machinery.EXTENSION_SUFFIXES[:] + suffixes += importlib.machinery.SOURCE_SUFFIXES[:] + suffixes += importlib.machinery.BYTECODE_SUFFIXES[:] + for dir in m.__path__: + try: + names = os.listdir(dir) + except os.error: + self.msg(2, "can't list directory", dir) + continue + for name in names: + mod = None + for suff in suffixes: + n = len(suff) + if name[-n:] == suff: + mod = name[:-n] + break + if mod and mod != "__init__": + modules[mod] = mod + return modules.keys() + + def import_module(self, partname, fqname, parent): + self.msgin(3, "import_module", partname, fqname, parent) + try: + m = self.modules[fqname] + except KeyError: + pass + else: + self.msgout(3, "import_module ->", m) + return m + if fqname in self.badmodules: + self.msgout(3, "import_module -> None") + return None + if parent and parent.__path__ is None: + self.msgout(3, "import_module -> None") + return None + try: + fp, pathname, stuff = self.find_module(partname, + parent and parent.__path__, parent) + except ImportError: + self.msgout(3, "import_module ->", None) + return None + try: + m = self.load_module(fqname, fp, pathname, stuff) + finally: + if fp: + fp.close() + if parent: + setattr(parent, partname, m) + self.msgout(3, "import_module ->", m) + return m + + def load_module(self, fqname, fp, pathname, file_info): + suffix, mode, type = file_info + self.msgin(2, "load_module", fqname, fp and "fp", pathname) + if type == imp.PKG_DIRECTORY: + m = self.load_package(fqname, pathname) + self.msgout(2, "load_module ->", m) + return m + if type == imp.PY_SOURCE: + co = compile(fp.read()+'\n', pathname, 'exec') + elif type == imp.PY_COMPILED: + if fp.read(4) != imp.get_magic(): + self.msgout(2, "raise ImportError: Bad magic number", pathname) + raise ImportError("Bad magic number in %s" % pathname) + fp.read(4) + co = marshal.load(fp) + else: + co = None + m = self.add_module(fqname) + m.__file__ = pathname + if co: + if self.replace_paths: + co = self.replace_paths_in_code(co) + m.__code__ = co + self.scan_code(co, m) + self.msgout(2, "load_module ->", m) + return m + + def _add_badmodule(self, name, caller): + if name not in self.badmodules: + self.badmodules[name] = {} + if caller: + self.badmodules[name][caller.__name__] = 1 + else: + self.badmodules[name]["-"] = 1 + + def _safe_import_hook(self, name, caller, fromlist, level=-1): + # wrapper for self.import_hook() that won't raise ImportError + if name in self.badmodules: + self._add_badmodule(name, caller) + return + try: + self.import_hook(name, caller, level=level) + except ImportError as msg: + self.msg(2, "ImportError:", str(msg)) + self._add_badmodule(name, caller) + else: + if fromlist: + for sub in fromlist: + if sub in self.badmodules: + self._add_badmodule(sub, caller) + continue + try: + self.import_hook(name, caller, [sub], level=level) + except ImportError as msg: + self.msg(2, "ImportError:", str(msg)) + fullname = name + "." + sub + self._add_badmodule(fullname, caller) + + def scan_opcodes(self, co, + unpack = struct.unpack): + # Scan the code, and yield 'interesting' opcode combinations + # Version for Python 2.4 and older + code = co.co_code + names = co.co_names + consts = co.co_consts + while code: + c = code[0] + if c in STORE_OPS: + oparg, = unpack('= HAVE_ARGUMENT: + code = code[3:] + else: + code = code[1:] + + def scan_opcodes_25(self, co, + unpack = struct.unpack): + # Scan the code, and yield 'interesting' opcode combinations + # Python 2.5 version (has absolute and relative imports) + code = co.co_code + names = co.co_names + consts = co.co_consts + LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME + while code: + c = bytes([code[0]]) + if c in STORE_OPS: + oparg, = unpack('= HAVE_ARGUMENT: + code = code[3:] + else: + code = code[1:] + + def scan_code(self, co, m): + code = co.co_code + if sys.version_info >= (2, 5): + scanner = self.scan_opcodes_25 + else: + scanner = self.scan_opcodes + for what, args in scanner(co): + if what == "store": + name, = args + m.globalnames[name] = 1 + elif what == "absolute_import": + fromlist, name = args + have_star = 0 + if fromlist is not None: + if "*" in fromlist: + have_star = 1 + fromlist = [f for f in fromlist if f != "*"] + self._safe_import_hook(name, m, fromlist, level=0) + if have_star: + # We've encountered an "import *". If it is a Python module, + # the code has already been parsed and we can suck out the + # global names. + mm = None + if m.__path__: + # At this point we don't know whether 'name' is a + # submodule of 'm' or a global module. Let's just try + # the full name first. + mm = self.modules.get(m.__name__ + "." + name) + if mm is None: + mm = self.modules.get(name) + if mm is not None: + m.globalnames.update(mm.globalnames) + m.starimports.update(mm.starimports) + if mm.__code__ is None: + m.starimports[name] = 1 + else: + m.starimports[name] = 1 + elif what == "relative_import": + level, fromlist, name = args + if name: + self._safe_import_hook(name, m, fromlist, level=level) + else: + parent = self.determine_parent(m, level=level) + self._safe_import_hook(parent.__name__, None, fromlist, level=0) + else: + # We don't expect anything else from the generator. + raise RuntimeError(what) + + for c in co.co_consts: + if isinstance(c, type(co)): + self.scan_code(c, m) + + def load_package(self, fqname, pathname): + self.msgin(2, "load_package", fqname, pathname) + newname = replacePackageMap.get(fqname) + if newname: + fqname = newname + m = self.add_module(fqname) + m.__file__ = pathname + m.__path__ = [pathname] + + # As per comment at top of file, simulate runtime __path__ additions. + m.__path__ = m.__path__ + packagePathMap.get(fqname, []) + + fp, buf, stuff = self.find_module("__init__", m.__path__) + try: + self.load_module(fqname, fp, buf, stuff) + self.msgout(2, "load_package ->", m) + return m + finally: + if fp: + fp.close() + + def add_module(self, fqname): + if fqname in self.modules: + return self.modules[fqname] + self.modules[fqname] = m = Module(fqname) + return m + + def find_module(self, name, path, parent=None): + if parent is not None: + # assert path is not None + fullname = parent.__name__+'.'+name + else: + fullname = name + if fullname in self.excludes: + self.msgout(3, "find_module -> Excluded", fullname) + raise ImportError(name) + + if path is None: + if name in sys.builtin_module_names: + return (None, None, ("", "", imp.C_BUILTIN)) + + path = self.path + return imp.find_module(name, path) + + def report(self): + """Print a report to stdout, listing the found modules with their + paths, as well as modules that are missing, or seem to be missing. + """ + print() + print(" %-25s %s" % ("Name", "File")) + print(" %-25s %s" % ("----", "----")) + # Print modules found + keys = sorted(self.modules.keys()) + for key in keys: + m = self.modules[key] + if m.__path__: + print("P", end=' ') + else: + print("m", end=' ') + print("%-25s" % key, m.__file__ or "") + + # Print missing modules + missing, maybe = self.any_missing_maybe() + if missing: + print() + print("Missing modules:") + for name in missing: + mods = sorted(self.badmodules[name].keys()) + print("?", name, "imported from", ', '.join(mods)) + # Print modules that may be missing, but then again, maybe not... + if maybe: + print() + print("Submodules thay appear to be missing, but could also be", end=' ') + print("global names in the parent package:") + for name in maybe: + mods = sorted(self.badmodules[name].keys()) + print("?", name, "imported from", ', '.join(mods)) + + def any_missing(self): + """Return a list of modules that appear to be missing. Use + any_missing_maybe() if you want to know which modules are + certain to be missing, and which *may* be missing. + """ + missing, maybe = self.any_missing_maybe() + return missing + maybe + + def any_missing_maybe(self): + """Return two lists, one with modules that are certainly missing + and one with modules that *may* be missing. The latter names could + either be submodules *or* just global names in the package. + + The reason it can't always be determined is that it's impossible to + tell which names are imported when "from module import *" is done + with an extension module, short of actually importing it. + """ + missing = [] + maybe = [] + for name in self.badmodules: + if name in self.excludes: + continue + i = name.rfind(".") + if i < 0: + missing.append(name) + continue + subname = name[i+1:] + pkgname = name[:i] + pkg = self.modules.get(pkgname) + if pkg is not None: + if pkgname in self.badmodules[name]: + # The package tried to import this module itself and + # failed. It's definitely missing. + missing.append(name) + elif subname in pkg.globalnames: + # It's a global in the package: definitely not missing. + pass + elif pkg.starimports: + # It could be missing, but the package did an "import *" + # from a non-Python module, so we simply can't be sure. + maybe.append(name) + else: + # It's not a global in the package, the package didn't + # do funny star imports, it's very likely to be missing. + # The symbol could be inserted into the package from the + # outside, but since that's not good style we simply list + # it missing. + missing.append(name) + else: + missing.append(name) + missing.sort() + maybe.sort() + return missing, maybe + + def replace_paths_in_code(self, co): + new_filename = original_filename = os.path.normpath(co.co_filename) + for f, r in self.replace_paths: + if original_filename.startswith(f): + new_filename = r + original_filename[len(f):] + break + + if self.debug and original_filename not in self.processed_paths: + if new_filename != original_filename: + self.msgout(2, "co_filename %r changed to %r" \ + % (original_filename,new_filename,)) + else: + self.msgout(2, "co_filename %r remains unchanged" \ + % (original_filename,)) + self.processed_paths.append(original_filename) + + consts = list(co.co_consts) + for i in range(len(consts)): + if isinstance(consts[i], type(co)): + consts[i] = self.replace_paths_in_code(consts[i]) + + return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize, + co.co_flags, co.co_code, tuple(consts), co.co_names, + co.co_varnames, new_filename, co.co_name, + co.co_firstlineno, co.co_lnotab, + co.co_freevars, co.co_cellvars) + + +def test(): + # Parse command line + import getopt + try: + opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") + except getopt.error as msg: + print(msg) + return + + # Process options + debug = 1 + domods = 0 + addpath = [] + exclude = [] + for o, a in opts: + if o == '-d': + debug = debug + 1 + if o == '-m': + domods = 1 + if o == '-p': + addpath = addpath + a.split(os.pathsep) + if o == '-q': + debug = 0 + if o == '-x': + exclude.append(a) + + # Provide default arguments + if not args: + script = "hello.py" + else: + script = args[0] + + # Set the path based on sys.path and the script directory + path = sys.path[:] + path[0] = os.path.dirname(script) + path = addpath + path + if debug > 1: + print("path:") + for item in path: + print(" ", repr(item)) + + # Create the module finder and turn its crank + mf = ModuleFinder(path, debug, exclude) + for arg in args[1:]: + if arg == '-m': + domods = 1 + continue + if domods: + if arg[-2:] == '.*': + mf.import_hook(arg[:-2], None, ["*"]) + else: + mf.import_hook(arg) + else: + mf.load_file(arg) + mf.run_script(script) + mf.report() + return mf # for -i debugging + + +if __name__ == '__main__': + try: + mf = test() + except KeyboardInterrupt: + print("\n[interrupted]") diff --git a/tests/bytecode/pylib-tests/nturl2path.py b/tests/bytecode/pylib-tests/nturl2path.py new file mode 100644 index 0000000000..e0c2f23527 --- /dev/null +++ b/tests/bytecode/pylib-tests/nturl2path.py @@ -0,0 +1,66 @@ +"""Convert a NT pathname to a file URL and vice versa.""" + +def url2pathname(url): + """OS-specific conversion from a relative URL of the 'file' scheme + to a file system path; not recommended for general use.""" + # e.g. + # ///C|/foo/bar/spam.foo + # becomes + # C:\foo\bar\spam.foo + import string, urllib.parse + # Windows itself uses ":" even in URLs. + url = url.replace(':', '|') + if '|' not in url: + # No drive specifier, just convert slashes + if url[:4] == '////': + # path is something like ////host/path/on/remote/host + # convert this to \\host\path\on\remote\host + # (notice halving of slashes at the start of the path) + url = url[2:] + components = url.split('/') + # make sure not to convert quoted slashes :-) + return urllib.parse.unquote('\\'.join(components)) + comp = url.split('|') + if len(comp) != 2 or comp[0][-1] not in string.ascii_letters: + error = 'Bad URL: ' + url + raise IOError(error) + drive = comp[0][-1].upper() + components = comp[1].split('/') + path = drive + ':' + for comp in components: + if comp: + path = path + '\\' + urllib.parse.unquote(comp) + # Issue #11474 - handing url such as |c/| + if path.endswith(':') and url.endswith('/'): + path += '\\' + return path + +def pathname2url(p): + """OS-specific conversion from a file system path to a relative URL + of the 'file' scheme; not recommended for general use.""" + # e.g. + # C:\foo\bar\spam.foo + # becomes + # ///C|/foo/bar/spam.foo + import urllib.parse + if ':' not in p: + # No drive specifier, just convert slashes and quote the name + if p[:2] == '\\\\': + # path is something like \\host\path\on\remote\host + # convert this to ////host/path/on/remote/host + # (notice doubling of slashes at the start of the path) + p = '\\\\' + p + components = p.split('\\') + return urllib.parse.quote('/'.join(components)) + comp = p.split(':') + if len(comp) != 2 or len(comp[0]) > 1: + error = 'Bad path: ' + p + raise IOError(error) + + drive = urllib.parse.quote(comp[0].upper()) + components = comp[1].split('\\') + path = '///' + drive + ':' + for comp in components: + if comp: + path = path + '/' + urllib.parse.quote(comp) + return path diff --git a/tests/bytecode/pylib-tests/opcode.py b/tests/bytecode/pylib-tests/opcode.py new file mode 100644 index 0000000000..d81b6bc3c9 --- /dev/null +++ b/tests/bytecode/pylib-tests/opcode.py @@ -0,0 +1,185 @@ + +""" +opcode module - potentially shared between dis and other modules which +operate on bytecodes (e.g. peephole optimizers). +""" + +__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs", + "haslocal", "hascompare", "hasfree", "opname", "opmap", + "HAVE_ARGUMENT", "EXTENDED_ARG", "hasnargs"] + +#cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', +# 'is not', 'exception match', 'BAD') + +hasconst = [] +hasname = [] +hasjrel = [] +hasjabs = [] +haslocal = [] +hascompare = [] +hasfree = [] +hasnargs = [] + +opmap = {} +opname = [''] * 256 +for op in range(256): opname[op] = '<%r>' % (op,) +del op + +def def_op(name, op): + opname[op] = name + opmap[name] = op + +def name_op(name, op): + def_op(name, op) + hasname.append(op) + +def jrel_op(name, op): + def_op(name, op) + hasjrel.append(op) + +def jabs_op(name, op): + def_op(name, op) + hasjabs.append(op) + +# Instruction opcodes for compiled code +# Blank lines correspond to available opcodes + +def_op('POP_TOP', 1) +def_op('ROT_TWO', 2) +def_op('ROT_THREE', 3) +def_op('DUP_TOP', 4) +def_op('DUP_TOP_TWO', 5) + +def_op('NOP', 9) +def_op('UNARY_POSITIVE', 10) +def_op('UNARY_NEGATIVE', 11) +def_op('UNARY_NOT', 12) + +def_op('UNARY_INVERT', 15) + +def_op('BINARY_POWER', 19) +def_op('BINARY_MULTIPLY', 20) + +def_op('BINARY_MODULO', 22) +def_op('BINARY_ADD', 23) +def_op('BINARY_SUBTRACT', 24) +def_op('BINARY_SUBSCR', 25) +def_op('BINARY_FLOOR_DIVIDE', 26) +def_op('BINARY_TRUE_DIVIDE', 27) +def_op('INPLACE_FLOOR_DIVIDE', 28) +def_op('INPLACE_TRUE_DIVIDE', 29) + +def_op('STORE_MAP', 54) +def_op('INPLACE_ADD', 55) +def_op('INPLACE_SUBTRACT', 56) +def_op('INPLACE_MULTIPLY', 57) + +def_op('INPLACE_MODULO', 59) +def_op('STORE_SUBSCR', 60) +def_op('DELETE_SUBSCR', 61) +def_op('BINARY_LSHIFT', 62) +def_op('BINARY_RSHIFT', 63) +def_op('BINARY_AND', 64) +def_op('BINARY_XOR', 65) +def_op('BINARY_OR', 66) +def_op('INPLACE_POWER', 67) +def_op('GET_ITER', 68) +def_op('STORE_LOCALS', 69) + +def_op('PRINT_EXPR', 70) +def_op('LOAD_BUILD_CLASS', 71) +def_op('YIELD_FROM', 72) + +def_op('INPLACE_LSHIFT', 75) +def_op('INPLACE_RSHIFT', 76) +def_op('INPLACE_AND', 77) +def_op('INPLACE_XOR', 78) +def_op('INPLACE_OR', 79) +def_op('BREAK_LOOP', 80) +def_op('WITH_CLEANUP', 81) + +def_op('RETURN_VALUE', 83) +def_op('IMPORT_STAR', 84) + +def_op('YIELD_VALUE', 86) +def_op('POP_BLOCK', 87) +def_op('END_FINALLY', 88) +def_op('POP_EXCEPT', 89) + +HAVE_ARGUMENT = 90 # Opcodes from here have an argument: + +name_op('STORE_NAME', 90) # Index in name list +name_op('DELETE_NAME', 91) # "" +def_op('UNPACK_SEQUENCE', 92) # Number of tuple items +jrel_op('FOR_ITER', 93) +def_op('UNPACK_EX', 94) +name_op('STORE_ATTR', 95) # Index in name list +name_op('DELETE_ATTR', 96) # "" +name_op('STORE_GLOBAL', 97) # "" +name_op('DELETE_GLOBAL', 98) # "" +def_op('LOAD_CONST', 100) # Index in const list +hasconst.append(100) +name_op('LOAD_NAME', 101) # Index in name list +def_op('BUILD_TUPLE', 102) # Number of tuple items +def_op('BUILD_LIST', 103) # Number of list items +def_op('BUILD_SET', 104) # Number of set items +def_op('BUILD_MAP', 105) # Number of dict entries (upto 255) +name_op('LOAD_ATTR', 106) # Index in name list +def_op('COMPARE_OP', 107) # Comparison operator +hascompare.append(107) +name_op('IMPORT_NAME', 108) # Index in name list +name_op('IMPORT_FROM', 109) # Index in name list + +jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip +jabs_op('JUMP_IF_FALSE_OR_POP', 111) # Target byte offset from beginning of code +jabs_op('JUMP_IF_TRUE_OR_POP', 112) # "" +jabs_op('JUMP_ABSOLUTE', 113) # "" +jabs_op('POP_JUMP_IF_FALSE', 114) # "" +jabs_op('POP_JUMP_IF_TRUE', 115) # "" + +name_op('LOAD_GLOBAL', 116) # Index in name list + +jabs_op('CONTINUE_LOOP', 119) # Target address +jrel_op('SETUP_LOOP', 120) # Distance to target address +jrel_op('SETUP_EXCEPT', 121) # "" +jrel_op('SETUP_FINALLY', 122) # "" + +def_op('LOAD_FAST', 124) # Local variable number +haslocal.append(124) +def_op('STORE_FAST', 125) # Local variable number +haslocal.append(125) +def_op('DELETE_FAST', 126) # Local variable number +haslocal.append(126) + +def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3) +def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8) +hasnargs.append(131) +def_op('MAKE_FUNCTION', 132) # Number of args with default values +def_op('BUILD_SLICE', 133) # Number of items +def_op('MAKE_CLOSURE', 134) +def_op('LOAD_CLOSURE', 135) +hasfree.append(135) +def_op('LOAD_DEREF', 136) +hasfree.append(136) +def_op('STORE_DEREF', 137) +hasfree.append(137) +def_op('DELETE_DEREF', 138) +hasfree.append(138) + +def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8) +hasnargs.append(140) +def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8) +hasnargs.append(141) +def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8) +hasnargs.append(142) + +jrel_op('SETUP_WITH', 143) + +def_op('LIST_APPEND', 145) +def_op('SET_ADD', 146) +def_op('MAP_ADD', 147) + +def_op('EXTENDED_ARG', 144) +EXTENDED_ARG = 144 + +del def_op, name_op, jrel_op, jabs_op diff --git a/tests/bytecode/pylib-tests/pipes.py b/tests/bytecode/pylib-tests/pipes.py new file mode 100644 index 0000000000..f1a16f63de --- /dev/null +++ b/tests/bytecode/pylib-tests/pipes.py @@ -0,0 +1,247 @@ +"""Conversion pipeline templates. + +The problem: +------------ + +Suppose you have some data that you want to convert to another format, +such as from GIF image format to PPM image format. Maybe the +conversion involves several steps (e.g. piping it through compress or +uuencode). Some of the conversion steps may require that their input +is a disk file, others may be able to read standard input; similar for +their output. The input to the entire conversion may also be read +from a disk file or from an open file, and similar for its output. + +The module lets you construct a pipeline template by sticking one or +more conversion steps together. It will take care of creating and +removing temporary files if they are necessary to hold intermediate +data. You can then use the template to do conversions from many +different sources to many different destinations. The temporary +file names used are different each time the template is used. + +The templates are objects so you can create templates for many +different conversion steps and store them in a dictionary, for +instance. + + +Directions: +----------- + +To create a template: + t = Template() + +To add a conversion step to a template: + t.append(command, kind) +where kind is a string of two characters: the first is '-' if the +command reads its standard input or 'f' if it requires a file; the +second likewise for the output. The command must be valid /bin/sh +syntax. If input or output files are required, they are passed as +$IN and $OUT; otherwise, it must be possible to use the command in +a pipeline. + +To add a conversion step at the beginning: + t.prepend(command, kind) + +To convert a file to another file using a template: + sts = t.copy(infile, outfile) +If infile or outfile are the empty string, standard input is read or +standard output is written, respectively. The return value is the +exit status of the conversion pipeline. + +To open a file for reading or writing through a conversion pipeline: + fp = t.open(file, mode) +where mode is 'r' to read the file, or 'w' to write it -- just like +for the built-in function open() or for os.popen(). + +To create a new template object initialized to a given one: + t2 = t.clone() +""" # ' + + +import re +import os +import tempfile +# we import the quote function rather than the module for backward compat +# (quote used to be an undocumented but used function in pipes) +from shlex import quote + +__all__ = ["Template"] + +# Conversion step kinds + +FILEIN_FILEOUT = 'ff' # Must read & write real files +STDIN_FILEOUT = '-f' # Must write a real file +FILEIN_STDOUT = 'f-' # Must read a real file +STDIN_STDOUT = '--' # Normal pipeline element +SOURCE = '.-' # Must be first, writes stdout +SINK = '-.' # Must be last, reads stdin + +stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ + SOURCE, SINK] + + +class Template: + """Class representing a pipeline template.""" + + def __init__(self): + """Template() returns a fresh pipeline template.""" + self.debugging = 0 + self.reset() + + def __repr__(self): + """t.__repr__() implements repr(t).""" + return '