-__all__ = ('INT', 'UINT', 'FLOAT', 'STRING', 'UNICODE', 'DATE', 'BINARY', 'CONTAINER', 'Element', 'EBML')
+import abc, os
+try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
+from ..core import *
-INT, UINT, FLOAT, STRING, UNICODE, DATE, BINARY, CONTAINER = range(0, 8)
-
-
-SCHEMA_TYPES = {
- 'int': INT,
- 'uint': UINT,
- 'float': FLOAT,
- 'string': STRING,
- 'date': DATE,
- 'binary': BINARY
-}
-
-
-class Element(object):
- class_id = None
- class_name = 'Unknown'
- class_parents = ()
- class_global = False
- class_root = False
- data_type = BINARY
-
-
-class EBMLElement(Element):
- class_id = 0x1A45DFA3
- class_name = 'EBML'
- class_root = True
- data_type = CONTAINER
-
-
-class EBMLVersionElement(Element):
- class_id = 0x4286
- class_name = 'EBMLVersion'
- class_parents = (EBMLElement,)
- data_type = UINT
+__all__ = ('UnknownElement', 'Element', 'Document', 'INT', 'UINT', 'FLOAT', 'STRING', 'UNICODE', 'DATE', 'BINARY', 'CONTAINER')
-class EBMLReadVersionElement(Element):
- class_id = 0x42F7
- class_name = 'EBMLReadVersion'
- class_parents = (EBMLElement,)
- data_type = UINT
-
-
-class EBMLMaxIDLengthElement(Element):
- class_id = 0x42F2
- class_name = 'EBMLMaxIDLength'
- class_parents = (EBMLElement,)
- data_type = UINT
-
-
-class EBMLMaxSizeLengthElement(Element):
- class_id = 0x42F3
- class_name = 'EBMLMaxSizeLength'
- class_parents = (EBMLElement,)
- data_type = UINT
-
-
-class DocTypeElement(Element):
- class_id = 0x4282
- class_name = 'DocType'
- class_parents = (EBMLElement,)
- data_type = STRING
-
-
-class DocTypeVersionElement(Element):
- class_id = 0x4287
- class_name = 'DocTypeVersion'
- class_parents = (EBMLElement,)
- data_type = UINT
+INT, UINT, FLOAT, STRING, UNICODE, DATE, BINARY, CONTAINER = range(0, 8)
-class DocTypeReadVersionElement(Element):
- class_id = 0x4285
- class_name = 'DocTypeReadVersion'
- class_parents = (EBMLElement,)
- data_type = UINT
+READERS = {
+ INT: read_signed_integer,
+ UINT: read_unsigned_integer,
+ FLOAT: read_float,
+ STRING: read_string,
+ UNICODE: read_unicode_string,
+ DATE: read_date,
+ BINARY: lambda stream, size: bytearray(stream.read(size))
+}
-class CRC32Element(Element):
- class_id = 0xBF
- class_name = 'CRC-32'
- class_global = True
- data_type = BINARY
+class Stream(object):
+ class Substream(object):
+ def __init__(self, stream, offset, size):
+ self.stream = stream
+ self.offset = offset
+ self.size = size
+
+ def read(self, size):
+ current_offset = self.tell()
+ if current_offset == 0:
+ self.stream.seek(self.offset)
+ if size > self.size:
+ return self.stream.read(self.size)
+ else:
+ return self.stream.read(size)
+ else:
+ if current_offset > self.size:
+ return b''
+ else:
+ max_size = (self.size - current_offset)
+ if size <= max_size:
+ return self.stream.read(size)
+ else:
+ return self.stream.read(max_size)
+
+ def seek(self, offset, whence=os.SEEK_SET):
+ if whence == os.SEEK_SET:
+ desired_offset = self.offset + offset
+ elif whence == os.SEEK_CUR:
+ desired_offset = self.stream.tell() + offset
+ elif whence == os.SEEK_END:
+ desired_offset = self.offset + self.size + offset
+
+ if not self.offset <= desired_offset:
+ raise IOError
+
+ self.stream.seek(desired_offset, os.SEEK_SET)
+
+ def tell(self):
+ stream_offset = self.stream.tell()
+ if stream_offset <= self.offset:
+ return 0
+ else:
+ return stream_offset - self.offset
+
+ def substream(self, offset, size):
+ if offset + size <= self.size:
+ return self.stream.substream(self.offset + offset, size)
+ else:
+ raise IOError
+
+ def __getitem__(self, key):
+ if isinstance(key, (int, long)):
+ self.seek(key)
+ return self.read(1)
+ elif isinstance(key, slice):
+ if key.start is None or key.stop is None or key.step is not None:
+ raise IndexError
+ return self.substream(key.start, (key.stop - key.start))
+ else:
+ raise TypeError
+
+ def __init__(self, file_like):
+ self.file = file_like
+ self.file.seek(0, os.SEEK_END)
+ self.size = self.file.tell()
+ self.file.seek(0, os.SEEK_SET)
+ self.substreams = {}
+
+ def read(self, size):
+ return self.file.read(size)
+
+ def seek(self, offset, whence=os.SEEK_SET):
+ return self.file.seek(offset, whence)
+
+ def tell(self):
+ return self.file.tell()
+
+ def substream(self, offset, size):
+ if offset + size <= self.size:
+ if (offset, size) not in self.substreams:
+ self.substreams[(offset, size)] = self.Substream(self, offset, size)
+ return self.substreams[(offset, size)]
+ else:
+ raise IOError
+
+ def __getitem__(self, key):
+ if isinstance(key, (int, long)):
+ self.seek(key)
+ return self.read(1)
+ elif isinstance(key, slice):
+ if key.start is None or key.stop is None or key.step is not None:
+ raise IndexError
+ return self.substream(key.start, (key.stop - key.start))
+ else:
+ raise TypeError
-class VoidElement(Element):
- class_id = 0xEC
- class_name = 'Void'
- class_global = True
- data_type = BINARY
+class Element(object):
+ __metaclass__ = abc.ABCMeta
+
+ id = abc.abstractproperty()
+ name = abc.abstractproperty()
+ type = abc.abstractproperty()
+ default = None
+ children = ()
+ mandatory = False
+ multiple = False
+
+ def __init__(self, document, stream):
+ self.document = document
+ self.stream = stream
+
+ @property
+ def value(self):
+ if not hasattr(self, 'cached_value'):
+ if self.type in READERS:
+ self.cached_value = READERS[self.type](self.body_stream, self.body_size)
+ elif self.type == CONTAINER:
+ self.cached_value = read_elements(self.body_stream, self.document, self.children)
+ else:
+ self.cached_value = None
+ return self.cached_value
+
+ @property
+ def id_size(self):
+ if not hasattr(self, 'cached_id_size'):
+ self.stream.seek(0)
+ _, self.cached_id_size = read_element_id(self.stream)
+ return self.cached_id_size
+
+ @property
+ def size_size(self):
+ if not hasattr(self, 'cached_size_size'):
+ self.stream.seek(self.id_size)
+ _, self.cached_size_size = read_element_size(self.stream)
+ return self.cached_size_size
+
+ @property
+ def head_size(self):
+ return self.id_size + self.size_size
+
+ @property
+ def body_size(self):
+ return self.size - self.head_size
+
+ @property
+ def body_stream(self):
+ return self.stream.substream(self.head_size, self.body_size)
+
+ @property
+ def size(self):
+ return self.stream.size
-class SignatureSlotElement(Element):
- class_id = 0x1B538667
- class_name = 'SignatureSlot'
- class_global = True
- data_type = CONTAINER
-
-
-class SignatureAlgoElement(Element):
- class_id = 0x7E8A
- class_name = 'SignatureAlgo'
- class_parents = (SignatureSlotElement,)
- data_type = UINT
-
-
-class SignatureHashElement(Element):
- class_id = 0x7E9A
- class_name = 'SignatureHash'
- class_parents = (SignatureSlotElement,)
- data_type = UINT
-
-
-class SignaturePublicKeyElement(Element):
- class_id = 0x7EA5
- class_name = 'SignaturePublicKey'
- class_parents = (SignatureSlotElement,)
- data_type = BINARY
-
-
-class SignatureElement(Element):
- class_id = 0x7EB5
- class_name = 'Signature'
- class_parents = (SignatureSlotElement,)
- data_type = BINARY
-
-
-class SignatureElementsElement(Element):
- class_id = 0x7E5B
- class_name = 'SignatureElements'
- class_parents = (SignatureSlotElement,)
- data_type = CONTAINER
-
-
-class SignatureElementListElement(Element):
- class_id = 0x7E7B
- class_name = 'SignatureElementList'
- class_parents = (SignatureElementsElement,)
- data_type = CONTAINER
-
-
-class SignedElementElement(Element):
- class_id = 0x6532
- class_name = 'SignedElement'
- class_parents = (SignatureElementListElement,)
- data_type = BINARY
-
-
-class Schema(object):
- doc_type = None
- version = None
- elements_by_class_id = None
-
- @classmethod
- def element_with_class_id(cls, class_id):
- if cls.elements_by_class_id is None:
- cls.elements_by_class_id = {}
- for element in cls.elements:
- cls.elements_by_class_id[element.class_id] = element
- return cls.elements_by_class_id[class_id]
-
- @classmethod
- def global_elements(cls):
- return [element for element in cls.elements if element.class_global]
-
- @classmethod
- def root_elements(cls):
- return [element for element in cls.elements if element.class_root]
-
- @classmethod
- def child_elements_of_element(cls, parent):
- children = [element for element in cls.elements if parent in element.class_parents]
- children += cls.global_elements()
- if 'self' in parent.class_parents and parent not in children:
- children.append(parent)
- return children
-
-
-class EBML(Schema):
- elements = (
- EBMLElement,
- EBMLVersionElement,
- EBMLReadVersionElement,
- EBMLMaxIDLengthElement,
- EBMLMaxSizeLengthElement,
- DocTypeElement,
- DocTypeVersionElement,
- DocTypeReadVersionElement,
- CRC32Element,
- VoidElement,
- SignatureSlotElement,
- SignatureAlgoElement,
- SignatureHashElement,
- SignaturePublicKeyElement,
- SignatureElement,
- SignatureElementsElement,
- SignatureElementListElement,
- SignedElementElement
- )
\ No newline at end of file
+class UnknownElement(Element):
+ id = None
+ name = 'Unknown'
+ type = BINARY
+
+ def __init__(self, document, stream, id):
+ self.id = id
+ super(UnknownElement, self).__init__(document, stream)
+
+
+def read_elements(stream, document, children):
+ elements = []
+ size = stream.size
+ while size:
+ element_offset = stream.size - size
+ stream.seek(element_offset)
+ element_id, element_id_size = read_element_id(stream)
+ element_size, element_size_size = read_element_size(stream)
+ element_stream_size = element_id_size + element_size_size + element_size
+ element_stream = stream.substream(element_offset, element_stream_size)
+ size -= element_stream_size
+
+ element_class = None
+ for child in (children + document.globals):
+ if child.id == element_id:
+ element_class = child
+ break
+
+ if element_class is None:
+ element = UnknownElement(document, element_stream, element_id)
+ else:
+ element = element_class(document, element_stream)
+
+ elements.append(element)
+ return elements
+
+
+class Document(object):
+ __metaclass__ = abc.ABCMeta
+
+ type = abc.abstractproperty()
+ version = abc.abstractproperty()
+ children = ()
+ globals = ()
+
+ def __init__(self, file_like):
+ self.stream = Stream(file_like)
+ self._roots = None
+
+ @property
+ def roots(self):
+ if self._roots is None:
+ self._roots = read_elements(self.stream, self, self.children)
+ return self._roots
\ No newline at end of file