In this tip, I will show you a serializer and deserializer that I developed for saving machine learning parameters.
Introduction
I want to show you how I developed binary serializer and deserializer that I called ko-saver. It can be used as a prototype to rewrite code on C++ to place it on a simple controller, like Arduino. We may learn neiro nets on computer on Python and use learned network on controller, thus we need serializer and deserializer. In networks, we often need to use 2D matrices like:
//
//
//
l=[[12.2, 5.6, 9.8],
[34.6, 78.7, 10.8]]
ko-saver serializes them in such format:
- <byte-code that creates 2D matrix>::=
Dmatrix2D
<string as name of the matrix> <matrix row amount> <matrix cols amount> - <matrix element amount> <serialized ribbon of elements, each element as
float32
>
Here, the function that accepts 2D list matrix, its rows amount and its cols amount:
//
//
//
def dump_matrix2D(matrix2D_list, H, W)
N=W * H
int_length=4
float_length=4
out_vector=[0] * (N * float_length)
N_bin=st.pack('<i', N)
out_vector_=[0] * (int_length + 2)
out_vector_[0]=H
out_vector_[1]=W
out_vector_[2]=N_bin[0]
out_vector_[3]=N_bin[1]
out_vector_[4]=N_bin[2]
out_vector_[5]=N_bin[3]
k=0
for row in range(H):
for elem in range(W):
for inner_elem in range(float_length):
elem_bin=st.pack('<f', matrix2D_list[row][elem])
out_vector[k]=elem_bin[inner_elem]
k+=1
out_vector_.extend(out_vector)
return out_vector_ X
It returns the list where it is all serialized as bytes (numbers between 0 and 255). Elements amount in matrix are encoded in the function by code:
//
//
//
N_bin=st.pack('<i', N)
String
as the name of the matrix encoded by function:
//
//
//
def make_bin_string(string:str)->tuple:
return (*map(ord, string), 0)
It encodes with zero byte on string
s end. Serializator which serializes one matrix represented in function ser_item
which accepts input as list of integers (that maybe bytecode
to create matrix and stop serializing) and Python string
that are names of matrix and Python list that is the matrix:
//
//
//
def ser_item(inputt):
out_vector=[]
for i in inputt:
if isinstance(i, int):
out_vector.append(i)
elif isinstance(i, list):
H=len(i)
W=len(i[0])
out_vector.extend(dump_matrix2D(i, H, W))
elif isinstance(i, np.ndarray):
H=i.shape[0]
W=i.shape[1]
out_vector.extend(dump_matrix2D(i, H, W))
elif isinstance(i, str):
out_vector.extend(make_bin_string(i))
return out_vector
It returns list that is the serialized matrix with bytecodes. String
s are decoded by function:
//
//
//
def make_string_from_bin(inputt, pb):
strr=""
pb_=pb
lenn=0
byte=inputt[pb_]
while byte!=0:
strr+=chr(byte)
pb_+=1
byte=inputt[pb_]
lenn+=1
return strr, lenn + 1
pb
is pointer of bytes that is passed from deser_mache
function. Number of elements in matrix are decoded by function:
//
//
//
def get_next4bytes(inputt, pb):
f=inputt[pb]
pb+=1
sec=inputt[pb]
pb+=1
third=inputt[pb]
pb+=1
fourth=inputt[pb]
return st.unpack('<i', bytes((f, sec, third, fourth)))[0]
pb
is pointer of bytes that is passed from deser_machine
function.
2D matrices decoded from bytes are returned from function:
//
//
//
def get_matrix_float32(inputt, pb, rows, cols, elems_N):
float_size=4
pb_=pb
out_matrix=rows * [[0] * cols]
flatten_len=elems_N * float_size
flatten=[]
i=0
k=0
e=0
tmp_buff=[0] * float_size
while i < flatten_len + 1 :
if k==0 or k % 4!=0:
tmp_buff[k]=inputt[pb_]
k+=1
else:
val=st.unpack('<f', bytes(tmp_buff))[0]
tmp_buff[0]=inputt[pb_]
k=1
flatten.append(val)
e+=1
i+=1
pb_+=1
for row in range(rows):
for elem in range(cols):
out_matrix[row][elem]=flatten[row * cols + elem]
return out_matrix
The whole code of ko-saver is:
//
//
//
import struct as st
import numpy as np
(Dmatrix2D, Istop)=range(2)
def make_bin_string(string:str)->tuple:
return (*map(ord, string), 0)
def make_string_from_bin(inputt, pb):
strr=""
pb_=pb
lenn=0
byte=inputt[pb_]
while byte!=0:
strr+=chr(byte)
pb_+=1
byte=inputt[pb_]
lenn+=1
return strr, lenn + 1
def dump_matrix2D(matrix2D_list, H, W):
N=W * H
int_length=4
float_length=4
out_vector=[0] * (N * float_length)
N_bin=st.pack('<i', N)
out_vector_=[0] * (int_length + 2)
out_vector_[0]=H
out_vector_[1]=W
out_vector_[2]=N_bin[0]
out_vector_[3]=N_bin[1]
out_vector_[4]=N_bin[2]
out_vector_[5]=N_bin[3]
k=0
for row in range(H):
for elem in range(W):
for inner_elem in range(float_length):
elem_bin=st.pack('<f', matrix2D_list[row][elem])
out_vector[k]=elem_bin[inner_elem]
k+=1
out_vector_.extend(out_vector)
return out_vector_
def get_next4bytes(inputt, pb):
f=inputt[pb]
pb+=1
sec=inputt[pb]
pb+=1
third=inputt[pb]
pb+=1
fourth=inputt[pb]
return st.unpack('<i', bytes((f, sec, third, fourth)))[0]
def get_matrix_float32(inputt, pb, rows, cols, elems_N):
float_size=4
pb_=pb
out_matrix=rows * [[0] * cols]
flatten_len=elems_N * float_size
flatten=[]
i=0
k=0
e=0
tmp_buff=[0] * float_size
while i < flatten_len + 1 :
if k==0 or k % 4!=0:
tmp_buff[k]=inputt[pb_]
k+=1
else:
val=st.unpack('<f', bytes(tmp_buff))[0]
tmp_buff[0]=inputt[pb_]
k=1
flatten.append(val)
e+=1
i+=1
pb_+=1
for row in range(rows):
for elem in range(cols):
out_matrix[row][elem]=flatten[row * cols + elem]
return out_matrix
def ser_item(inputt):
out_vector=[]
for i in inputt:
if isinstance(i, int):
out_vector.append(i)
elif isinstance(i, list):
H=len(i)
W=len(i[0])
out_vector.extend(dump_matrix2D(i, H, W))
elif isinstance(i, np.ndarray):
H=i.shape[0]
W=i.shape[1]
out_vector.extend(dump_matrix2D(i, H, W))
elif isinstance(i, str):
out_vector.extend(make_bin_string(i))
return out_vector
def ser_dumps(inputt:dict):
out_vector_=[]
for i in inputt.items():
out_vector=ser_item((Dmatrix2D, i[0], i[1]))
out_vector_.extend(out_vector)
out_vector_.append(Istop)
return out_vector_
def ser(dictt, fname):
with open(fname, 'wb') as f:
vector=ser_dumps(dictt)
f.write(bytes(vector))
def deser_machine(inputt):
pb=0
data={}
float_size=4
while True:
command=inputt[pb]
if command==Dmatrix2D:
pb+=1
name, lenn=make_string_from_bin(inputt, pb)
pb+=lenn
rows=inputt[pb]
pb+=1
cols=inputt[pb]
pb+=1
elems_N=get_next4bytes(inputt, pb)
pb+=4
matrix=get_matrix_float32(inputt, pb, rows, cols, elems_N)
pb+=(elems_N * float_size - 1)
data[name]=matrix
elif command==Istop:
return data
pb+=1
def deser(fname):
content=b''
with open(fname, 'rb') as f:
content=f.read()
return deser_machine(content)
if __name__ == '__main__':
test_matrix=[[10, 11]]
test_matrix_new=[[10, 20, 30], [10, 9, 40]]
ser({'test': test_matrix, 'test_matrix_new': test_matrix_new}, 'matr.bin')
outp=deser('matr.bin')
print('outp', outp)
"""
Out:
There creates file matr.bin.
It reads as:
outp {'test': [[10.0, 11.0]], 'test_matrix_new': [[10.0, 9.0, 40.0], [10.0, 9.0, 40.0]]}
"""
History
- 25th August, 2021: Initial version