Overhauled the schema system to make more sense. Implemented a dump_structure utility.
[~jspiros/python-ebml.git] / ebml / schema / base.py
index 8c4ecdf..a99bf67 100644 (file)
-__all__ = ('INT', 'UINT', 'FLOAT', 'STRING', 'UNICODE', 'DATE', 'BINARY', 'CONTAINER', 'Element', 'EBML')
+import abc
+try:
+       from cStringIO import StringIO
+except ImportError:
+       from StringIO import StringIO
+from ..core import *
 
 
-INT, UINT, FLOAT, STRING, UNICODE, DATE, BINARY, CONTAINER = range(0, 8)
-
-
-class Element(object):
-       class_id = None
-       class_name = 'Unknown'
-       class_parents = ()
-       class_global = False
-       class_root = False
-       data_type = BINARY
-
-
-class EBMLElement(Element):
-       class_id = 0x1A45DFA3
-       class_name = 'EBML'
-       class_root = True
-       data_type = CONTAINER
-
-
-class EBMLVersionElement(Element):
-       class_id = 0x4286
-       class_name = 'EBMLVersion'
-       class_parents = (EBMLElement,)
-       data_type = UINT
-
-
-class EBMLReadVersionElement(Element):
-       class_id = 0x42F7
-       class_name = 'EBMLReadVersion'
-       class_parents = (EBMLElement,)
-       data_type = UINT
-
-
-class EBMLMaxIDLengthElement(Element):
-       class_id = 0x42F2
-       class_name = 'EBMLMaxIDLength'
-       class_parents = (EBMLElement,)
-       data_type = UINT
-
-
-class EBMLMaxSizeLengthElement(Element):
-       class_id = 0x42F3
-       class_name = 'EBMLMaxSizeLength'
-       class_parents = (EBMLElement,)
-       data_type = UINT
-
-
-class DocTypeElement(Element):
-       class_id = 0x4282
-       class_name = 'DocType'
-       class_parents = (EBMLElement,)
-       data_type = STRING
-
-
-class DocTypeVersionElement(Element):
-       class_id = 0x4287
-       class_name = 'DocTypeVersion'
-       class_parents = (EBMLElement,)
-       data_type = UINT
-
-
-class DocTypeReadVersionElement(Element):
-       class_id = 0x4285
-       class_name = 'DocTypeReadVersion'
-       class_parents = (EBMLElement,)
-       data_type = UINT
-
-
-class CRC32Element(Element):
-       class_id = 0xBF
-       class_name = 'CRC-32'
-       class_global = True
-       data_type = BINARY
-
-
-class VoidElement(Element):
-       class_id = 0xEC
-       class_name = 'Void'
-       class_global = True
-       data_type = BINARY
-
-
-class SignatureSlotElement(Element):
-       class_id = 0x1B538667
-       class_name = 'SignatureSlot'
-       class_global = True
-       data_type = CONTAINER
-
-
-class SignatureAlgoElement(Element):
-       class_id = 0x7E8A
-       class_name = 'SignatureAlgo'
-       class_parents = (SignatureSlotElement,)
-       data_type = UINT
+__all__ = ('UnknownElement', 'Element', 'Document', 'INT', 'UINT', 'FLOAT', 'STRING', 'UNICODE', 'DATE', 'BINARY', 'CONTAINER')
 
 
-class SignatureHashElement(Element):
-       class_id = 0x7E9A
-       class_name = 'SignatureHash'
-       class_parents = (SignatureSlotElement,)
-       data_type = UINT
-
-
-class SignaturePublicKeyElement(Element):
-       class_id = 0x7EA5
-       class_name = 'SignaturePublicKey'
-       class_parents = (SignatureSlotElement,)
-       data_type = BINARY
-
-
-class SignatureElement(Element):
-       class_id = 0x7EB5
-       class_name = 'Signature'
-       class_parents = (SignatureSlotElement,)
-       data_type = BINARY
-
-
-class SignatureElementsElement(Element):
-       class_id = 0x7E5B
-       class_name = 'SignatureElements'
-       class_parents = (SignatureSlotElement,)
-       data_type = CONTAINER
-
-
-class SignatureElementListElement(Element):
-       class_id = 0x7E7B
-       class_name = 'SignatureElementList'
-       class_parents = (SignatureElementsElement,)
-       data_type = CONTAINER
-
-
-class SignedElementElement(Element):
-       class_id = 0x6532
-       class_name = 'SignedElement'
-       class_parents = (SignatureElementListElement,)
-       data_type = BINARY
+INT, UINT, FLOAT, STRING, UNICODE, DATE, BINARY, CONTAINER = range(0, 8)
 
 
-class Schema(object):
-       doc_type = None
-       version = None
-       elements_by_class_id = None
+READERS = {
+       INT: read_signed_integer,
+       UINT: read_unsigned_integer,
+       FLOAT: read_float,
+       STRING: read_string,
+       UNICODE: read_unicode_string,
+       DATE: read_date,
+       BINARY: lambda stream, size: bytearray(stream.read(size))
+}
+
+
+ENCODERS = {
+       INT: encode_signed_integer,
+       UINT: encode_unsigned_integer,
+       FLOAT: encode_float,
+       STRING: encode_string,
+       UNICODE: encode_unicode_string,
+       DATE: encode_date,
+       BINARY: lambda binary, length: binary
+}
+
+
+VALIDATORS = {
+       INT: lambda value: True if isinstance(value, (int, long)) else False,
+       UINT: lambda value: True if isinstance(value, (int, long)) and value == abs(value) else False,
+       FLOAT: lambda value: True if isinstance(value, float) else False,
+       STRING: lambda value: True if isinstance(value, str) else False,
+       UNICODE: lambda value: True if isinstance(value, basestring) else False,
+       DATE: lambda value: True if isinstance(value, datetime.datetime) else False,
+       BINARY: lambda value: True if isinstance(value, (str, bytes, bytearray)) else False
+}
+
+
+class BaseElement(object):
+       __metaclass__ = abc.ABCMeta
        
-       @classmethod
-       def element_with_class_id(cls, class_id):
-               if cls.elements_by_class_id is None:
-                       cls.elements_by_class_id = {}
-                       for element in cls.elements:
-                               cls.elements_by_class_id[element.class_id] = element
-               return cls.elements_by_class_id[class_id]
+       id = abc.abstractproperty()
+       name = abc.abstractproperty()
+       type = abc.abstractproperty()
+       default = None
+       children = ()
+       mandatory = False
+       multiple = False
+
+
+class UnknownElement(BaseElement):
+       id = None
+       name = 'Unknown'
+       type = BINARY
        
+       def __init__(self, id, encoding):
+               self.id = id
+               self.encoding = encoding
+
+
+def read_elements(stream, size, document, children):
+       elements = []
+       while (size if size is not None else True):
+               try:
+                       element_id, element_id_size = read_element_id(stream)
+                       element_size, element_size_size = read_element_size(stream)
+                       element_encoding = (element_size, bytearray(stream.read(element_size)))
+               except:
+                       break
+               else:
+                       element_class = None
+                       for child in (children + document.globals):
+                               if child.id == element_id:
+                                       element_class = child
+                                       break
+                       if element_class is None:
+                               element = UnknownElement(element_id, element_encoding)
+                       else:
+                               element = element_class(document, encoding=element_encoding)
+                       elements.append(element)
+                       if size is not None:
+                               size -= element_id_size + element_size_size + element_size
+       return elements
+
+
+class Element(BaseElement):
        @classmethod
-       def global_elements(cls):
-               return [element for element in cls.elements if element.class_global]
+       def check_value(cls, value):
+               if cls.type in VALIDATORS:
+                       return VALIDATORS[cls.type](value)
+               elif cls.type == CONTAINER:
+                       if isinstance(value, (list, tuple)):
+                               for item in value:
+                                       if not isinstance(value, Element):
+                                               return False
+                               return True
+                       elif isinstance(value, Element):
+                               return True
+                       else:
+                               return False
+               else:
+                       raise NotImplementedError('Unsupported element type.')
        
-       @classmethod
-       def root_elements(cls):
-               return [element for element in cls.elements if element.class_root]
+       def __init__(self, document, value=None, encoding=None):
+               self.document = document
+               self._value = value
+               self._encoding = encoding
        
-       @classmethod
-       def child_elements_of_element(cls, parent):
-               children = [element for element in cls.elements if parent in element.class_parents]
-               children += cls.global_elements()
-               if 'self' in parent.class_parents and parent not in children:
-                       children.append(parent)
-               return children
-
-
-class EBML(Schema):
-       elements = (
-               EBMLElement,
-               EBMLVersionElement,
-               EBMLReadVersionElement,
-               EBMLMaxIDLengthElement,
-               EBMLMaxSizeLengthElement,
-               DocTypeElement,
-               DocTypeVersionElement,
-               DocTypeReadVersionElement,
-               CRC32Element,
-               VoidElement,
-               SignatureSlotElement,
-               SignatureAlgoElement,
-               SignatureHashElement,
-               SignaturePublicKeyElement,
-               SignatureElement,
-               SignatureElementsElement,
-               SignatureElementListElement,
-               SignedElementElement
-       )
\ No newline at end of file
+       @property
+       def value(self):
+               if self._value is None and self._encoding is not None:
+                       if self.type in READERS:
+                               self._value = READERS[self.type](StringIO(self._encoding[1]), self._encoding[0])
+                       elif self.type == CONTAINER:
+                               self._value = read_elements(StringIO(self._encoding[1]), self._encoding[0], self.document, self.children)
+               return self._value
+       
+       @value.setter
+       def set_value(self, value):
+               if not self.check_value(value):
+                       raise ValueError('Unsupported element value.')
+               self._value = value
+               self._encoding = None
+       
+       @property
+       def encoding(self):
+               if self._encoding is None:
+                       size = 0
+                       data = bytearray()
+                       if self._value is not None:
+                               if self.type in ENCODERS:
+                                       data = ENCODERS[self.type](self._value)
+                                       size = len(data)
+                               elif self.type == CONTAINER:
+                                       for element in self._value:
+                                               size += element.size
+                                               data.extend(element.encoding[1])
+                       self._encoding = (size, data)
+               return self._encoding
+       
+       @property
+       def id_size(self):
+               return len(encode_element_id(self.id))
+       
+       @property
+       def size_size(self):
+               return len(encode_element_size(self.body_size))
+       
+       @property
+       def head_size(self):
+               return self.id_size + self.size_size
+       
+       @property
+       def body_size(self):
+               return self.encoding[0]
+       
+       @property
+       def size(self):
+               return self.head_size + self.body_size
+
+
+class Document(object):
+       __metaclass__ = abc.ABCMeta
+       
+       type = abc.abstractproperty()
+       version = abc.abstractproperty()
+       children = ()
+       globals = ()
+       
+       def __init__(self, stream):
+               self.stream = stream
+               self._roots = None
+       
+       @property
+       def roots(self):
+               if self._roots is None:
+                       self._roots = read_elements(self.stream, None, self, self.children)
+               return self._roots
\ No newline at end of file