8 'read_unsigned_integer',
12 'read_unicode_string',
15 'encode_element_size',
16 'encode_unsigned_integer',
17 'encode_signed_integer',
20 'encode_unicode_string',
25 MAXIMUM_ELEMENT_ID_LENGTH = 4
26 MAXIMUM_ELEMENT_SIZE_LENGTH = 8
27 MAXIMUM_UNSIGNED_INTEGER_LENGTH = 8
28 MAXIMUM_SIGNED_INTEGER_LENGTH = 8
31 def maximum_element_size_for_length(length):
34 Returns the maximum element size representable in a given number of bytes.
36 :arg length: the limit on the length of the encoded representation in bytes
38 :returns: the maximum element size representable
43 return (2**(7*length)) - 2
46 def decode_vint_length(byte, mask=True):
49 for n in xrange(1, 9):
50 if byte & (2**8 - (2**(8 - n))) == 2**(8 - n):
52 value_mask = (2**(8 - n)) - 1
55 raise IOError('Cannot decode invalid varible-length integer.')
57 byte = byte & value_mask
61 def read_element_id(stream):
64 Reads an element ID from a file-like object.
66 :arg stream: the file-like object
67 :returns: the decoded element ID and its length in bytes
72 byte = ord(stream.read(1))
73 length, id_ = decode_vint_length(byte, False)
75 raise IOError('Cannot decode element ID with length > 8.')
76 for i in xrange(0, length - 1):
77 byte = ord(stream.read(1))
78 id_ = (id_ * 2**8) + byte
82 def read_element_size(stream):
85 Reads an element size from a file-like object.
87 :arg stream: the file-like object
88 :returns: the decoded size (or None if unknown) and the length of the descriptor in bytes
93 byte = ord(stream.read(1))
94 length, size = decode_vint_length(byte)
96 for i in xrange(0, length - 1):
97 byte = ord(stream.read(1))
98 size = (size * 2**8) + byte
100 if size == maximum_element_size_for_length(length) + 1:
106 def read_unsigned_integer(stream, size):
109 Reads an encoded unsigned integer value from a file-like object.
111 :arg stream: the file-like object
112 :arg size: the number of bytes to read and decode
114 :returns: the decoded unsigned integer value
120 for i in xrange(0, size):
121 byte = ord(stream.read(1))
122 value = (value << 8) | byte
126 def read_signed_integer(stream, size):
129 Reads an encoded signed integer value from a file-like object.
131 :arg stream: the file-like object
132 :arg size: the number of bytes to read and decode
134 :returns: the decoded signed integer value
141 first_byte = ord(stream.read(1))
143 for i in xrange(1, size):
144 byte = ord(stream.read(1))
145 value = (value << 8) | byte
146 if (first_byte & 0b10000000) == 0b10000000:
147 value = -(2**(size*8) - value)
151 def read_float(stream, size):
154 Reads an encoded floating point value from a file-like object.
156 :arg stream: the file-like object
157 :arg size: the number of bytes to read and decode (must be 0, 4, or 8)
159 :returns: the decoded floating point value
164 if size not in (0, 4, 8):
165 raise IOError('Cannot read floating point values with lengths other than 0, 4, or 8 bytes.')
168 data = stream.read(size)
169 value = struct.unpack({
176 def read_string(stream, size):
179 Reads an encoded ASCII string value from a file-like object.
181 :arg stream: the file-like object
182 :arg size: the number of bytes to read and decode
184 :returns: the decoded ASCII string value
191 value = stream.read(size)
192 value = value.partition(chr(0))[0]
196 def read_unicode_string(stream, size):
199 Reads an encoded unicode string value from a file-like object.
201 :arg stream: the file-like object
202 :arg size: the number of bytes to read and decode
204 :returns: the decoded unicode string value
211 data = stream.read(size)
212 data = data.partition(chr(0))[0]
213 value = unicode(data, 'utf_8')
217 def read_date(stream, size):
220 Reads an encoded date (and time) value from a file-like object.
222 :arg stream: the file-like object
223 :arg size: the number of bytes to read and decode (must be 8)
225 :returns: the decoded date (and time) value
231 raise IOError('Cannot read date values with lengths other than 8 bytes.')
232 data = stream.read(size)
233 nanoseconds = struct.unpack('>q', data)[0]
234 delta = datetime.timedelta(microseconds=(nanoseconds // 1000))
235 return datetime.datetime(2001, 1, 1, tzinfo=None) + delta
241 Limits an integer or byte to 8 bits.
245 return n & 0b11111111
248 def vint_mask_for_length(length):
251 Returns the bitmask for the first byte of a variable-length integer (used for element ID and size descriptors).
253 :arg length: the length of the variable-length integer
255 :returns: the bitmask for the first byte of the variable-length integer
260 return 0b10000000 >> (length - 1)
263 def encode_element_id(element_id):
266 Encodes an element ID.
268 :arg element_id: an element ID
269 :type element_id: int
270 :returns: the encoded representation bytes
275 length = MAXIMUM_ELEMENT_ID_LENGTH
276 while length and not (element_id & (vint_mask_for_length(length) << ((length - 1) * 8))):
279 raise ValueError('Cannot encode invalid element ID %s.' % hex(element_id))
281 data = bytearray(length)
282 for index in reversed(xrange(length)):
283 data[index] = octet(element_id)
289 def encode_element_size(element_size, length=None):
292 Encodes an element size. If element_size is None, the size will be encoded as unknown. If length is not None, the size will be encoded in that many bytes; otherwise, the size will be encoded in the minimum number of bytes required, or in 8 bytes if the size is unknown (element_size is None).
294 :arg element_size: the element size, or None if unknown
295 :type element_size: int or None
296 :arg length: the length of the encoded representation, or None for the minimum length required (defaults to None)
297 :type length: int or None
298 :returns: the encoded representation bytes
303 if length is not None and (length < 1 or length > MAXIMUM_ELEMENT_SIZE_LENGTH):
304 raise ValueError('Cannot encode element sizes into representations shorter than one byte long or longer than %i bytes long.' % MAXIMUM_ELEMENT_SIZE_LENGTH)
305 if element_size is not None:
306 if element_size > maximum_element_size_for_length(MAXIMUM_ELEMENT_SIZE_LENGTH if length is None else length):
307 raise ValueError('Cannot encode element size %i as it would have an encoded representation longer than %i bytes.' % (element_size, (MAXIMUM_ELEMENT_SIZE_LENGTH if length is None else length)))
309 while (element_size >> ((req_length - 1) * 8)) >= (vint_mask_for_length(req_length) - 1) and req_length < MAXIMUM_ELEMENT_SIZE_LENGTH:
315 length = 8 # other libraries do this, so unless another length is specified for the unknown size descriptor, do as they do to avoid compatibility issues.
316 element_size = maximum_element_size_for_length(length) + 1
318 data = bytearray(length)
319 for index in reversed(xrange(length)):
320 data[index] = octet(element_size)
323 data[index] = data[index] | vint_mask_for_length(length)
328 def encode_unsigned_integer(uint, length=None):
331 Encodes an unsigned integer value. If length is not None, uint will be encoded in that many bytes; otherwise, uint will be encoded in the minimum number of bytes required. If uint is None or 0, the minimum number of bytes required is 0.
333 :arg uint: the unsigned integer value
335 :arg length: the length of the encoded representation, or None for the minimum length required (defaults to None)
336 :type length: int or None
337 :returns: the encoded representation bytes
344 if uint > ((2**((MAXIMUM_UNSIGNED_INTEGER_LENGTH if length is None else length) * 8)) - 1):
345 raise ValueError('Cannot encode unsigned integer value %i as it would have an encoded representation longer than %i bytes.' % (uint, (MAXIMUM_UNSIGNED_INTEGER_LENGTH if length is None else length)))
350 while uint >= (1 << (req_length * 8)) and req_length < MAXIMUM_UNSIGNED_INTEGER_LENGTH:
355 data = bytearray(length)
356 for index in reversed(xrange(length)):
357 data[index] = octet(uint)
363 def encode_signed_integer(sint, length=None):
366 Encodes a signed integer value. If length is not None, sint will be encoded in that many bytes; otherwise, sint will be encoded in the minimum number of bytes required. If sint is None or 0, the minimum number of bytes required is 0.
368 :arg sint: the signed integer value
370 :arg length: the length of the encoded representation, or None for the minimum length required (defaults to None)
371 :type length: int or None
372 :returns: the encoded representation bytes
379 if not (-(2**(7+(8*((MAXIMUM_SIGNED_INTEGER_LENGTH if length is None else length)-1)))) <= sint <= (2**(7+(8*((MAXIMUM_SIGNED_INTEGER_LENGTH if length is None else length)-1))))-1):
380 raise ValueError('Cannot encode signed integer value %i as it would have an encoded representation longer than %i bytes.' % (sint, (MAXIMUM_SIGNED_INTEGER_LENGTH if length is None else length)))
387 uint = ((-sint - 1) << 1) if sint < 0 else (sint << 1)
389 while uint >= (1 << (req_length * 8)) and req_length < MAXIMUM_UNSIGNED_INTEGER_LENGTH:
396 uint = 2**(length*8) - abs(sint)
398 data = bytearray(length)
399 for index in reversed(xrange(length)):
400 data[index] = octet(uint)
406 def encode_float(float_, length=None):
409 Encodes a floating point value. If length is not None, float_ will be encoded in that many bytes; otherwise, float_ will be encoded in 0 bytes if float_ is None or 0, and 8 bytes in all other cases. If float_ is not None or 0 and length is 0, ValueError will be raised.
411 :arg float_: the floating point value
413 :arg length: the length of the encoded representation, or None (defaults to None)
414 :type length: int or None
415 :returns: the encoded representation bytes
420 if length not in (None, 0, 4, 8):
421 raise ValueError('Cannot encode floating point values with lengths other than 0, 4, or 8 bytes.')
431 raise ValueError('Cannot encode floating point value %f as it would have an encoded representation longer than 0 bytes.' % float_)
434 data = bytearray(struct.pack({
444 def encode_string(string, length=None):
447 Encodes an ASCII string value. If length is not None, string will be encoded in that many bytes by padding with zero bytes at the end if necessary; otherwise, string will be encoded in the minimum number of bytes required. If string is None or empty, the minimum number of bytes required is 0.
449 :arg string: the ASCII string value
451 :arg length: the length of the encoded representation, or None for the minimum length required (defaults to None)
452 :type length: int or None
453 :returns: the encoded representation bytes
463 if length < len(string):
464 raise ValueError('Cannot encode ASCII string value \'%s\' as it would have an encoded representation longer than %i bytes.' % (string, length))
465 elif length > len(string):
466 for i in xrange(0, (length - len(string))):
469 return bytearray(string)
472 def encode_unicode_string(string, length=None):
475 Encodes a unicode string value. If length is not None, string will be encoded in that many bytes by padding with zero bytes at the end if necessary; otherwise, string will be encoded in the minimum number of bytes required. If string is None or empty, the minimum number of bytes required is 0.
477 :arg string: the unicode string value
478 :type string: unicode
479 :arg length: the length of the encoded representation, or None for the minimum length required (defaults to None)
480 :type length: int or None
481 :returns: the encoded representation bytes
488 return encode_string(string.encode('utf_8'), length)
491 def encode_date(date, length=None):
494 Encodes a date (and time) value. If length is not None, it must be 8. If date is None, the current date (and time) will be encoded.
496 :arg date: the date (and time) value
497 :type date: datetime.datettime
498 :arg length: the length of the encoded representation (must be 8), or None
499 :type length: int or None
500 :returns: the encoded representation bytes
506 date = datetime.datetime.utcnow()
508 date = (date - date.utcoffset()).replace(tzinfo=None)
512 raise ValueError('Cannot encode date value %s with any length other than 8 bytes.')
514 delta = date - datetime.datetime(2001, 1, 1, tzinfo=None)
515 nanoseconds = (delta.microseconds + ((delta.seconds + (delta.days * 24 * 60 * 60)) * 10**6)) * 10**3
516 return encode_signed_integer(nanoseconds, length)