Simplified schema API to read-only for now, and minimized the number of reads require...
[~jspiros/python-ebml.git] / ebml / schema / base.py
1 import abc, os
2 try:
3         from cStringIO import StringIO
4 except ImportError:
5         from StringIO import StringIO
6 from ..core import *
7
8
9 __all__ = ('UnknownElement', 'Element', 'Document', 'INT', 'UINT', 'FLOAT', 'STRING', 'UNICODE', 'DATE', 'BINARY', 'CONTAINER')
10
11
12 INT, UINT, FLOAT, STRING, UNICODE, DATE, BINARY, CONTAINER = range(0, 8)
13
14
15 READERS = {
16         INT: read_signed_integer,
17         UINT: read_unsigned_integer,
18         FLOAT: read_float,
19         STRING: read_string,
20         UNICODE: read_unicode_string,
21         DATE: read_date,
22         BINARY: lambda stream, size: bytearray(stream.read(size))
23 }
24
25
26 class Stream(object):
27         class Substream(object):
28                 def __init__(self, stream, offset, size):
29                         self.stream = stream
30                         self.offset = offset
31                         self.size = size
32                 
33                 def read(self, size):
34                         current_offset = self.tell()
35                         if current_offset == 0:
36                                 self.stream.seek(self.offset)
37                                 if size > self.size:
38                                         return self.stream.read(self.size)
39                                 else:
40                                         return self.stream.read(size)
41                         else:
42                                 if current_offset > self.size:
43                                         return b''
44                                 else:
45                                         max_size = (self.size - current_offset)
46                                         if size <= max_size:
47                                                 return self.stream.read(size)
48                                         else:
49                                                 return self.stream.read(max_size)
50                 
51                 def seek(self, offset, whence=os.SEEK_SET):
52                         if whence == os.SEEK_SET:
53                                 desired_offset = self.offset + offset
54                         elif whence == os.SEEK_CUR:
55                                 desired_offset = self.stream.tell() + offset
56                         elif whence == os.SEEK_END:
57                                 desired_offset = self.offset + self.size + offset
58                         
59                         if not self.offset <= desired_offset:
60                                 raise IOError
61                         
62                         self.stream.seek(desired_offset, os.SEEK_SET)
63                 
64                 def tell(self):
65                         stream_offset = self.stream.tell()
66                         if stream_offset <= self.offset:
67                                 return 0
68                         else:
69                                 return stream_offset - self.offset
70                 
71                 def substream(self, offset, size):
72                         if offset + size <= self.size:
73                                 return self.stream.substream(self.offset + offset, size)
74                         else:
75                                 raise IOError
76                 
77                 def __getitem__(self, key):
78                         if isinstance(key, (int, long)):
79                                 self.seek(key)
80                                 return self.read(1)
81                         elif isinstance(key, slice):
82                                 if key.start is None or key.stop is None or key.step is not None:
83                                         raise IndexError
84                                 return self.substream(key.start, (key.stop - key.start))
85                         else:
86                                 raise TypeError
87         
88         def __init__(self, file_like):
89                 self.file = file_like
90                 self.file.seek(0, os.SEEK_END)
91                 self.size = self.file.tell()
92                 self.file.seek(0, os.SEEK_SET)
93                 self.substreams = {}
94         
95         def read(self, size):
96                 return self.file.read(size)
97         
98         def seek(self, offset, whence=os.SEEK_SET):
99                 return self.file.seek(offset, whence)
100         
101         def tell(self):
102                 return self.file.tell()
103         
104         def substream(self, offset, size):
105                 if offset + size <= self.size:
106                         if (offset, size) not in self.substreams:
107                                 self.substreams[(offset, size)] = self.Substream(self, offset, size)
108                         return self.substreams[(offset, size)]
109                 else:
110                         raise IOError
111         
112         def __getitem__(self, key):
113                 if isinstance(key, (int, long)):
114                         self.seek(key)
115                         return self.read(1)
116                 elif isinstance(key, slice):
117                         if key.start is None or key.stop is None or key.step is not None:
118                                 raise IndexError
119                         return self.substream(key.start, (key.stop - key.start))
120                 else:
121                         raise TypeError
122
123
124 class Element(object):
125         __metaclass__ = abc.ABCMeta
126         
127         id = abc.abstractproperty()
128         name = abc.abstractproperty()
129         type = abc.abstractproperty()
130         default = None
131         children = ()
132         mandatory = False
133         multiple = False
134         
135         def __init__(self, document, stream):
136                 self.document = document
137                 self.stream = stream
138         
139         @property
140         def value(self):
141                 if not hasattr(self, 'cached_value'):
142                         if self.type in READERS:
143                                 self.cached_value = READERS[self.type](self.body_stream, self.body_size)
144                         elif self.type == CONTAINER:
145                                 self.cached_value = read_elements(self.body_stream, self.document, self.children)
146                         else:
147                                 self.cached_value = None
148                 return self.cached_value
149         
150         @property
151         def id_size(self):
152                 if not hasattr(self, 'cached_id_size'):
153                         self.stream.seek(0)
154                         _, self.cached_id_size = read_element_id(self.stream)
155                 return self.cached_id_size
156         
157         @property
158         def size_size(self):
159                 if not hasattr(self, 'cached_size_size'):
160                         self.stream.seek(self.id_size)
161                         _, self.cached_size_size = read_element_size(self.stream)
162                 return self.cached_size_size
163         
164         @property
165         def head_size(self):
166                 return self.id_size + self.size_size
167         
168         @property
169         def body_size(self):
170                 return self.size - self.head_size
171         
172         @property
173         def body_stream(self):
174                 return self.stream.substream(self.head_size, self.body_size)
175         
176         @property
177         def size(self):
178                 return self.stream.size
179
180
181 class UnknownElement(Element):
182         id = None
183         name = 'Unknown'
184         type = BINARY
185         
186         def __init__(self, document, stream, id):
187                 self.id = id
188                 super(UnknownElement, self).__init__(document, stream)
189
190
191 def read_elements(stream, document, children):
192         elements = []
193         size = stream.size
194         while size:
195                 element_offset = stream.size - size
196                 stream.seek(element_offset)
197                 element_id, element_id_size = read_element_id(stream)
198                 element_size, element_size_size = read_element_size(stream)
199                 element_stream_size = element_id_size + element_size_size + element_size
200                 element_stream = stream.substream(element_offset, element_stream_size)
201                 size -= element_stream_size
202                 
203                 element_class = None
204                 for child in (children + document.globals):
205                         if child.id == element_id:
206                                 element_class = child
207                                 break
208                 
209                 if element_class is None:
210                         element = UnknownElement(document, element_stream, element_id)
211                 else:
212                         element = element_class(document, element_stream)
213                 
214                 elements.append(element)
215         return elements
216
217
218 class Document(object):
219         __metaclass__ = abc.ABCMeta
220         
221         type = abc.abstractproperty()
222         version = abc.abstractproperty()
223         children = ()
224         globals = ()
225         
226         def __init__(self, file_like):
227                 self.stream = Stream(file_like)
228                 self._roots = None
229         
230         @property
231         def roots(self):
232                 if self._roots is None:
233                         self._roots = read_elements(self.stream, self, self.children)
234                 return self._roots