3 from .exceptions import *
10 def _read_vint_to_bytearray(stream, max_width=EBMLMaxSizeLength):
13 Reads a vint from stream and returns a bytearray containing all of the bytes without doing any decoding.
15 :arg stream: the source of the bytes
16 :type stream: a file-like object
17 :arg max_width: the maximum length, in bytes, of the vint (defaults to :data:`EBMLMaxSizeLength`)
24 vint_bytes = bytearray()
26 while not marker_found:
28 if vint_len > max_width:
29 raise ParseError('vint exceeds max_width (%(max_width)i)' % {
30 'max_width': max_width
32 byte = ord(stream.read(1))
33 vint_bytes.append(byte)
34 for pos in range(0, 8):
35 mask = 0b10000000 >> pos
41 remaining_bytes_len = vint_len - len(vint_bytes)
42 if remaining_bytes_len > 0:
43 vint_bytes.extend(ord(remaining_byte) for remaining_byte in stream.read(remaining_bytes_len))
45 if len(vint_bytes) != vint_len:
46 raise ParseError('Unable to read truncated vint of width %(vint_len)s from stream (%(vint_bytes)s bytes available)' % {
48 'vint_bytes': len(vint_bytes)
54 def read_element_size(stream, max_width=EBMLMaxSizeLength):
57 Reads an EBML element size vint from stream and returns a tuple containing:
59 * the size as an integer, or None if the size is undefined
60 * the length in bytes of the size descriptor (the vint) itself
62 :arg stream: the source of the bytes
63 :type stream: a file-like object
64 :arg max_width: the maximum length, in bytes, of the vint storing the element size (defaults to :data:`EBMLMaxSizeLength`)
70 vint_bytes = _read_vint_to_bytearray(stream, max_width)
71 vint_len = len(vint_bytes)
73 int_bytes = vint_bytes[((vint_len - 1) // 8):]
74 first_byte_mask = 0b10000000 >> ((vint_len - 1) % 8)
77 value = int_bytes[0] & (first_byte_mask - 1)
79 if value == (first_byte_mask - 1):
82 for int_byte in int_bytes[1:]:
83 if int_byte == 0b11111111:
85 value = (value << 8) | int_byte
87 if max_bytes == len(int_bytes):
90 return value, vint_len
93 def read_element_id(stream, max_width=EBMLMaxIDLength):
96 Reads an EBML element ID vint from stream and returns a tuple containing:
98 * the ID as an integer
99 * the length in bytes of the ID descriptor (the vint) itself
101 :arg stream: the source of the bytes
102 :type stream: a file-like object
103 :arg max_width: the maximum length, in bytes, of the vint storing the element ID (defaults to :data:`EBMLMaxIDLength`)
109 vint_bytes = _read_vint_to_bytearray(stream, max_width)
110 vint_len = len(vint_bytes)
116 for vint_byte in vint_bytes:
117 if vint_byte == 0b11111111:
121 value = (value << 8) | vint_byte
123 if max_bytes == vint_len:
124 raise ReservedElementIDError('All value bits set to 1')
125 elif min_bytes == vint_len:
126 raise ReservedElementIDError('All value bits set to 0')
128 return value, vint_len
131 def read_int(stream, size):
134 byte = ord(stream.read(1))
135 if (byte & 0b10000000) == 0b10000000:
138 for i in range(1, size):
139 byte = ord(stream.read(1))
140 value = (value << 1) | byte
144 def read_uint(stream, size):
146 for i in range(0, size):
147 byte = ord(stream.read(1))
148 value = (value << 8) | byte
152 def read_float(stream, size):
153 if size not in (0, 4, 8):
154 # http://www.matroska.org/technical/specs/rfc/index.html allows for 10-byte floats.
155 # http://www.matroska.org/technical/specs/index.html specifies 4-byte and 8-byte only.
156 # I'm following the latter due to it being more up-to-date than the former, and because it's easier to implement.
157 raise ValueError('floats must be 0, 4, or 8 bytes long')
160 data = stream.read(size)
161 value = struct.unpack({
168 def read_string(stream, size):
171 value = stream.read(size)
175 def read_unicode(stream, size):
178 data = stream.read(size)
179 value = unicode(data, 'utf_8')
183 def read_date(stream):
184 size = 8 # date is always an 8-byte signed integer
185 data = stream.read(size)
186 nanoseconds = struct.unpack('>q', data)[0]
187 delta = datetime.timedelta(microseconds=(nanoseconds // 1000))
188 return datetime.datetime(2001, 1, 1) + delta
191 def read_binary(stream, size):
192 return stream.read(size)