X-Git-Url: http://git.ithinksw.org/~jspiros/python-ebml.git/blobdiff_plain/b7bbcd6f281fcc0c5418db2c6005088968c0779c..d916e4476d1c5e51ce1949d5e8e2329608f4c3b7:/ebml/schema/base.py diff --git a/ebml/schema/base.py b/ebml/schema/base.py index 2686732..c226f10 100644 --- a/ebml/schema/base.py +++ b/ebml/schema/base.py @@ -1,202 +1,234 @@ -__all__ = ('INT', 'UINT', 'FLOAT', 'STRING', 'UNICODE', 'DATE', 'BINARY', 'CONTAINER', 'Element', 'EBML') +import abc, os +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO +from ..core import * -INT, UINT, FLOAT, STRING, UNICODE, DATE, BINARY, CONTAINER = range(0, 8) - - -SCHEMA_TYPES = { - 'int': INT, - 'uint': UINT, - 'float': FLOAT, - 'string': STRING, - 'date': DATE, - 'binary': BINARY -} - - -class Element(object): - class_id = None - class_name = 'Unknown' - class_parents = () - class_global = False - class_root = False - data_type = BINARY - - -class EBMLElement(Element): - class_id = 0x1A45DFA3 - class_name = 'EBML' - class_root = True - data_type = CONTAINER - - -class EBMLVersionElement(Element): - class_id = 0x4286 - class_name = 'EBMLVersion' - class_parents = (EBMLElement,) - data_type = UINT +__all__ = ('UnknownElement', 'Element', 'Document', 'INT', 'UINT', 'FLOAT', 'STRING', 'UNICODE', 'DATE', 'BINARY', 'CONTAINER') -class EBMLReadVersionElement(Element): - class_id = 0x42F7 - class_name = 'EBMLReadVersion' - class_parents = (EBMLElement,) - data_type = UINT - - -class EBMLMaxIDLengthElement(Element): - class_id = 0x42F2 - class_name = 'EBMLMaxIDLength' - class_parents = (EBMLElement,) - data_type = UINT - - -class EBMLMaxSizeLengthElement(Element): - class_id = 0x42F3 - class_name = 'EBMLMaxSizeLength' - class_parents = (EBMLElement,) - data_type = UINT - - -class DocTypeElement(Element): - class_id = 0x4282 - class_name = 'DocType' - class_parents = (EBMLElement,) - data_type = STRING - - -class DocTypeVersionElement(Element): - class_id = 0x4287 - class_name = 'DocTypeVersion' - class_parents = (EBMLElement,) - data_type = UINT +INT, UINT, FLOAT, STRING, UNICODE, DATE, BINARY, CONTAINER = range(0, 8) -class DocTypeReadVersionElement(Element): - class_id = 0x4285 - class_name = 'DocTypeReadVersion' - class_parents = (EBMLElement,) - data_type = UINT +READERS = { + INT: read_signed_integer, + UINT: read_unsigned_integer, + FLOAT: read_float, + STRING: read_string, + UNICODE: read_unicode_string, + DATE: read_date, + BINARY: lambda stream, size: bytearray(stream.read(size)) +} -class CRC32Element(Element): - class_id = 0xBF - class_name = 'CRC-32' - class_global = True - data_type = BINARY +class Stream(object): + class Substream(object): + def __init__(self, stream, offset, size): + self.stream = stream + self.offset = offset + self.size = size + + def read(self, size): + current_offset = self.tell() + if current_offset == 0: + self.stream.seek(self.offset) + if size > self.size: + return self.stream.read(self.size) + else: + return self.stream.read(size) + else: + if current_offset > self.size: + return b'' + else: + max_size = (self.size - current_offset) + if size <= max_size: + return self.stream.read(size) + else: + return self.stream.read(max_size) + + def seek(self, offset, whence=os.SEEK_SET): + if whence == os.SEEK_SET: + desired_offset = self.offset + offset + elif whence == os.SEEK_CUR: + desired_offset = self.stream.tell() + offset + elif whence == os.SEEK_END: + desired_offset = self.offset + self.size + offset + + if not self.offset <= desired_offset: + raise IOError + + self.stream.seek(desired_offset, os.SEEK_SET) + + def tell(self): + stream_offset = self.stream.tell() + if stream_offset <= self.offset: + return 0 + else: + return stream_offset - self.offset + + def substream(self, offset, size): + if offset + size <= self.size: + return self.stream.substream(self.offset + offset, size) + else: + raise IOError + + def __getitem__(self, key): + if isinstance(key, (int, long)): + self.seek(key) + return self.read(1) + elif isinstance(key, slice): + if key.start is None or key.stop is None or key.step is not None: + raise IndexError + return self.substream(key.start, (key.stop - key.start)) + else: + raise TypeError + + def __init__(self, file_like): + self.file = file_like + self.file.seek(0, os.SEEK_END) + self.size = self.file.tell() + self.file.seek(0, os.SEEK_SET) + self.substreams = {} + + def read(self, size): + return self.file.read(size) + + def seek(self, offset, whence=os.SEEK_SET): + return self.file.seek(offset, whence) + + def tell(self): + return self.file.tell() + + def substream(self, offset, size): + if offset + size <= self.size: + if (offset, size) not in self.substreams: + self.substreams[(offset, size)] = self.Substream(self, offset, size) + return self.substreams[(offset, size)] + else: + raise IOError + + def __getitem__(self, key): + if isinstance(key, (int, long)): + self.seek(key) + return self.read(1) + elif isinstance(key, slice): + if key.start is None or key.stop is None or key.step is not None: + raise IndexError + return self.substream(key.start, (key.stop - key.start)) + else: + raise TypeError -class VoidElement(Element): - class_id = 0xEC - class_name = 'Void' - class_global = True - data_type = BINARY +class Element(object): + __metaclass__ = abc.ABCMeta + + id = abc.abstractproperty() + name = abc.abstractproperty() + type = abc.abstractproperty() + default = None + children = () + mandatory = False + multiple = False + + def __init__(self, document, stream): + self.document = document + self.stream = stream + + @property + def value(self): + if not hasattr(self, 'cached_value'): + if self.type in READERS: + self.cached_value = READERS[self.type](self.body_stream, self.body_size) + elif self.type == CONTAINER: + self.cached_value = read_elements(self.body_stream, self.document, self.children) + else: + self.cached_value = None + return self.cached_value + + @property + def id_size(self): + if not hasattr(self, 'cached_id_size'): + self.stream.seek(0) + _, self.cached_id_size = read_element_id(self.stream) + return self.cached_id_size + + @property + def size_size(self): + if not hasattr(self, 'cached_size_size'): + self.stream.seek(self.id_size) + _, self.cached_size_size = read_element_size(self.stream) + return self.cached_size_size + + @property + def head_size(self): + return self.id_size + self.size_size + + @property + def body_size(self): + return self.size - self.head_size + + @property + def body_stream(self): + return self.stream.substream(self.head_size, self.body_size) + + @property + def size(self): + return self.stream.size -class SignatureSlotElement(Element): - class_id = 0x1B538667 - class_name = 'SignatureSlot' - class_global = True - data_type = CONTAINER - - -class SignatureAlgoElement(Element): - class_id = 0x7E8A - class_name = 'SignatureAlgo' - class_parents = (SignatureSlotElement,) - data_type = UINT - - -class SignatureHashElement(Element): - class_id = 0x7E9A - class_name = 'SignatureHash' - class_parents = (SignatureSlotElement,) - data_type = UINT - - -class SignaturePublicKeyElement(Element): - class_id = 0x7EA5 - class_name = 'SignaturePublicKey' - class_parents = (SignatureSlotElement,) - data_type = BINARY - - -class SignatureElement(Element): - class_id = 0x7EB5 - class_name = 'Signature' - class_parents = (SignatureSlotElement,) - data_type = BINARY - - -class SignatureElementsElement(Element): - class_id = 0x7E5B - class_name = 'SignatureElements' - class_parents = (SignatureSlotElement,) - data_type = CONTAINER - - -class SignatureElementListElement(Element): - class_id = 0x7E7B - class_name = 'SignatureElementList' - class_parents = (SignatureElementsElement,) - data_type = CONTAINER - - -class SignedElementElement(Element): - class_id = 0x6532 - class_name = 'SignedElement' - class_parents = (SignatureElementListElement,) - data_type = BINARY - - -class Schema(object): - doc_type = None - version = None - elements_by_class_id = None - - @classmethod - def element_with_class_id(cls, class_id): - if cls.elements_by_class_id is None: - cls.elements_by_class_id = {} - for element in cls.elements: - cls.elements_by_class_id[element.class_id] = element - return cls.elements_by_class_id[class_id] - - @classmethod - def global_elements(cls): - return [element for element in cls.elements if element.class_global] - - @classmethod - def root_elements(cls): - return [element for element in cls.elements if element.class_root] - - @classmethod - def child_elements_of_element(cls, parent): - children = [element for element in cls.elements if parent in element.class_parents] - children += cls.global_elements() - if 'self' in parent.class_parents and parent not in children: - children.append(parent) - return children - - -class EBML(Schema): - elements = ( - EBMLElement, - EBMLVersionElement, - EBMLReadVersionElement, - EBMLMaxIDLengthElement, - EBMLMaxSizeLengthElement, - DocTypeElement, - DocTypeVersionElement, - DocTypeReadVersionElement, - CRC32Element, - VoidElement, - SignatureSlotElement, - SignatureAlgoElement, - SignatureHashElement, - SignaturePublicKeyElement, - SignatureElement, - SignatureElementsElement, - SignatureElementListElement, - SignedElementElement - ) \ No newline at end of file +class UnknownElement(Element): + id = None + name = 'Unknown' + type = BINARY + + def __init__(self, document, stream, id): + self.id = id + super(UnknownElement, self).__init__(document, stream) + + +def read_elements(stream, document, children): + elements = [] + size = stream.size + while size: + element_offset = stream.size - size + stream.seek(element_offset) + element_id, element_id_size = read_element_id(stream) + element_size, element_size_size = read_element_size(stream) + element_stream_size = element_id_size + element_size_size + element_size + element_stream = stream.substream(element_offset, element_stream_size) + size -= element_stream_size + + element_class = None + for child in (children + document.globals): + if child.id == element_id: + element_class = child + break + + if element_class is None: + element = UnknownElement(document, element_stream, element_id) + else: + element = element_class(document, element_stream) + + elements.append(element) + return elements + + +class Document(object): + __metaclass__ = abc.ABCMeta + + type = abc.abstractproperty() + version = abc.abstractproperty() + children = () + globals = () + + def __init__(self, file_like): + self.stream = Stream(file_like) + self._roots = None + + @property + def roots(self): + if self._roots is None: + self._roots = read_elements(self.stream, self, self.children) + return self._roots \ No newline at end of file