Implemented basic EBMLFile class for parsing files.
[~jspiros/python-ebml.git] / ebml / files.py
1 from .schema import *
2 from .core import read_element_id, read_element_size, read_int, read_uint, read_float, read_string, read_unicode, read_date
3
4
5 __all__ = ('EBMLFile', 'MatroskaFile')
6
7
8 TYPE_READERS = {
9         INT: read_int,
10         UINT: read_uint,
11         FLOAT: read_float,
12         STRING: read_string,
13         UNICODE: read_unicode,
14         DATE: lambda stream, size: read_date(stream)
15 }
16
17
18 class EBMLFileElement(object):
19         def __init__(self, stream, schema, parent=None):
20                 self.stream = stream
21                 self.schema = schema
22                 self.parent = parent
23                 self.class_id, self.class_id_len = read_element_id(self.stream)
24                 try:
25                         self.element = schema.element_with_class_id(self.class_id)
26                 except:
27                         self.element = None
28                 else:
29                         if self.parent is None:
30                                 if not self.element in self.schema.root_elements():
31                                         self.element = None
32                         else:
33                                 if not self.element in self.schema.child_elements_of_element(self.parent):
34                                         self.element = None
35                 self.size, self.size_len = read_element_size(self.stream)
36                 self.offset = self.stream.tell()
37                 self._read_contents()
38         
39         def _read_contents(self):
40                 contents = None
41                 if self.element is not None:
42                         if self.element.data_type in TYPE_READERS:
43                                 contents = TYPE_READERS[self.element.data_type](self.stream, self.size)
44                         elif self.element.data_type == CONTAINER:
45                                 read_len = 0
46                                 contents = []
47                                 while self.size > read_len:
48                                         sub_el = EBMLFileElement(self.stream, self.schema, self.element)
49                                         read_len += (sub_el.class_id_len + sub_el.size_len + sub_el.size)
50                                         contents.append(sub_el)
51                         else:
52                                 self.stream.seek(self.offset + self.size, 0)
53                 else:
54                         self.stream.seek(self.offset + self.size, 0)
55                 self.contents = contents
56         
57         def pprint(self, indent=0):
58                 sargs = {
59                         'class_name': self.element.class_name or 'Unknown',
60                         'class_id': self.class_id,
61                         'size': self.size,
62                         'value': self.contents or None
63                 }
64                 def pprint_(foo):
65                         print ('\t' * indent) + foo
66                 if not self.contents:
67                         pprint_('<%(class_name)s id=\'%(class_id)x\' size=\'%(size)i\' />' % sargs)
68                 else:
69                         if self.element.data_type == CONTAINER:
70                                 pprint_('<%(class_name)s id=\'%(class_id)x\' size=\'%(size)i\'>' % sargs)
71                                 for sub_el in self.contents:
72                                         sub_el.pprint(indent + 1)
73                                 pprint_('</%(class_name)s>' % sargs)
74                         else:
75                                 pprint_('<%(class_name)s id=\'%(class_id)x\' size=\'%(size)i\'>%(value)s</%(class_name)s>' % sargs)
76         
77         def __repr__(self):
78                 return '<%(class_name)s id=%(class_id)x size=%(size)i>' % {
79                         'class_name': self.element.class_name or '?',
80                         'class_id': self.element.class_id or self.class_id,
81                         'size': self.size
82                 }
83
84
85 class EBMLFile(object):
86         default_schema = EBML
87         
88         def __init__(self, name_or_stream, schema=None):
89                 if schema is None:
90                         schema = self.default_schema
91                 self.schema = schema
92                 
93                 if isinstance(name_or_stream, basestring):
94                         self.stream = open(name_or_stream, 'rb')
95                 else:
96                         self.stream = name_or_stream
97                 
98                 self._read_contents()
99         
100         def _read_contents(self):
101                 self.contents = []
102                 while True:
103                         try:
104                                 self.contents.append(EBMLFileElement(self.stream, self.schema, None))
105                         except:
106                                 break
107         
108         def pprint(self):
109                 for el in self.contents:
110                         el.pprint()
111
112
113 class MatroskaFile(EBMLFile):
114         default_schema = Matroska