Overhauled the schema system to make more sense. Implemented a dump_structure utility.
[~jspiros/python-ebml.git] / ebml / schema / specs.py
1 from xml.etree.ElementTree import parse as parse_xml
2 from .base import INT, UINT, FLOAT, STRING, UNICODE, DATE, BINARY, CONTAINER, Element, Document
3
4
5 SPECDATA_TYPES = {
6         'integer': INT,
7         'uinteger': UINT,
8         'float': FLOAT,
9         'string': STRING,
10         'utf-8': UNICODE,
11         'date': DATE,
12         'binary': BINARY,
13         'master': CONTAINER
14 }
15
16
17 def parse_specdata(source, doc_name, doc_type, doc_version):
18         """
19         
20         Reads a schema specification from a file (e.g., specdata.xml) or file-like object, and returns a tuple containing:
21         
22                 * a mapping of class names to Element subclasses
23                 * a Document subclass
24         
25         :arg source: the file or file-like object
26         :type source: str or file-like object
27         :arg schema_name: the name of the schema
28         :type schema_name: str
29         :returns: tuple
30         
31         """
32         
33         tree = parse_xml(source)
34         elements = {}
35         globals = []
36         
37         def child_elements(parent_level, element_list):
38                 children = []
39                 while element_list:
40                         raw_element = element_list[0]
41                         raw_attrs = raw_element.attrib
42                         
43                         element_level = int(raw_attrs['level'])
44                         
45                         is_global = False
46                         if element_level == -1:
47                                 is_global = True
48                         elif parent_level is not None and not element_level > parent_level:
49                                 break
50                         element_list = element_list[1:]
51
52                         element_name = '%sElement' % raw_attrs.get('cppname', raw_attrs.get('name')).translate(None, '-')
53                         element_attrs = {
54                                 '__module__': None,
55                                 'id': int(raw_attrs['id'], 0),
56                                 'name': raw_attrs['name'],
57                                 'type': SPECDATA_TYPES[raw_attrs['type']],
58                                 'mandatory': True if raw_attrs.get('mandatory', False) == '1' else False,
59                                 'multiple': True if raw_attrs.get('multiple', False) == '1' else False
60                         }
61                         try:
62                                 element_attrs['default'] = {
63                                         INT: lambda default: int(default),
64                                         UINT: lambda default: int(default),
65                                         FLOAT: lambda default: float(default),
66                                         STRING: lambda default: str(default),
67                                         UNICODE: lambda default: unicode(default)
68                                 }.get(element_attrs['type'], lambda default: default)(raw_attrs['default'])
69                         except (KeyError, ValueError):
70                                 element_attrs['default'] = None
71                         
72                         element_attrs['children'], element_list = child_elements(element_level if not is_global else 0, element_list)
73                         
74                         element = type(element_name, (Element,), element_attrs)
75                         elements[element_name] = element
76                         if is_global:
77                                 globals.append(element)
78                         else:
79                                 children.append(element)
80                 return tuple(children), element_list
81         
82         children = child_elements(None, tree.getroot().getchildren())[0]
83         
84         document_attrs = {
85                 '__module__': None,
86                 'type': doc_type,
87                 'version': doc_version,
88                 'children': children,
89                 'globals': tuple(globals)
90         }
91         document = type(doc_name, (Document,), document_attrs)
92         
93         return elements, document