micropython-lib/xmltok/xmltok.py

116 wiersze
2.7 KiB
Python
Czysty Zwykły widok Historia

import string
TEXT = "TEXT"
START_TAG = "START_TAG"
START_TAG_DONE = "START_TAG_DONE"
END_TAG = "END_TAG"
PI = "PI"
PI_DONE = "PI_DONE"
ATTR = "ATTR"
ATTR_VAL = "ATTR_VAL"
class XMLSyntaxError(Exception):
pass
class XMLTokenizer:
def __init__(self, f):
self.f = f
self.nextch()
def curch(self):
return self.c
def getch(self):
c = self.c
self.nextch()
return c
def eof(self):
return self.c == ""
def nextch(self):
self.c = self.f.read(1)
if not self.c:
raise StopIteration
return self.c
def skip_ws(self):
while self.curch().isspace():
self.nextch()
def isident(self):
self.skip_ws()
return self.curch().isalpha()
def getident(self):
self.skip_ws()
ident = ""
while self.curch() in (string.ascii_letters + string.digits):
ident += self.getch()
return ident
def match(self, c):
self.skip_ws()
if self.curch() == c:
self.nextch()
return True
return False
def expect(self, c):
if not self.match(c):
raise XMLSyntaxError
def lex_attrs_till(self):
while self.isident():
attr = self.getident()
yield (ATTR, attr)
self.expect("=")
self.expect('"')
val = ""
while self.curch() != '"':
val += self.getch()
yield (ATTR_VAL, val)
self.expect('"')
def tokenize(self):
while not self.eof():
if self.match("<"):
if self.match("/"):
yield (END_TAG, self.getident())
self.expect(">")
elif self.match("?"):
yield (PI, self.getident())
yield from self.lex_attrs_till()
self.expect("?")
self.expect(">")
else:
tag = self.getident()
yield (START_TAG, tag)
yield from self.lex_attrs_till()
if self.match("/"):
yield (END_TAG, tag)
self.expect(">")
else:
text = ""
while self.curch() != "<":
text += self.getch()
if text:
yield (TEXT, text)
def gfind(gen, pred):
for i in gen:
if pred(i):
return i
def text_of(gen, tag):
# Return text content of a leaf tag
gfind(gen, lambda i: i == (START_TAG, tag))
t, val = next(gen)
assert t == TEXT
return val
def tokenize(file):
return XMLTokenizer(file).tokenize()