Click here to Skip to main content
65,938 articles
CodeProject is changing. Read more.
Articles / Languages / Python

My Own Serializer and Deserializer on Python for Machine Learning Goals

2.00/5 (2 votes)
25 Aug 2021CPOL1 min read 4.7K   30  
Create serializer and deserializer to save machine learning parameters
In this tip, I will show you a serializer and deserializer that I developed for saving machine learning parameters.

Introduction

I want to show you how I developed binary serializer and deserializer that I called ko-saver. It can be used as a prototype to rewrite code on C++ to place it on a simple controller, like Arduino. We may learn neiro nets on computer on Python and use learned network on controller, thus we need serializer and deserializer. In networks, we often need to use 2D matrices like:

Python
//
//
// 
l=[[12.2, 5.6, 9.8],
   [34.6, 78.7, 10.8]]

ko-saver serializes them in such format:

  • <byte-code that creates 2D matrix>::=Dmatrix2D <string as name of the matrix> <matrix row amount> <matrix cols amount>
  • <matrix element amount> <serialized ribbon of elements, each element as float32>

Here, the function that accepts 2D list matrix, its rows amount and its cols amount:

Python
//
//
//
def dump_matrix2D(matrix2D_list, H, W)
    N=W * H 
    int_length=4
    float_length=4
    out_vector=[0] * (N * float_length) 
    N_bin=st.pack('<i', N)  
    out_vector_=[0] * (int_length + 2)
    out_vector_[0]=H
    out_vector_[1]=W 
    out_vector_[2]=N_bin[0]
    out_vector_[3]=N_bin[1]
    out_vector_[4]=N_bin[2]
    out_vector_[5]=N_bin[3]
    k=0
    for row in range(H):
        for elem in range(W):
              for inner_elem in range(float_length):
                    elem_bin=st.pack('<f', matrix2D_list[row][elem])  # kak float32
                    out_vector[k]=elem_bin[inner_elem]
                    k+=1
    out_vector_.extend(out_vector)
    return out_vector_ X

It returns the list where it is all serialized as bytes (numbers between 0 and 255). Elements amount in matrix are encoded in the function by code:

Python
//
//
//
N_bin=st.pack('<i', N)

String as the name of the matrix encoded by function:

Python
//
//
//
def make_bin_string(string:str)->tuple:
    return (*map(ord, string), 0)

It encodes with zero byte on strings end. Serializator which serializes one matrix represented in function ser_item which accepts input as list of integers (that maybe bytecode to create matrix and stop serializing) and Python string that are names of matrix and Python list that is the matrix:

Python
//
//
//
# Serializator.
def ser_item(inputt):
    out_vector=[]
    for i in inputt:
        if isinstance(i, int):
            out_vector.append(i)
        elif isinstance(i, list):
            H=len(i)
            W=len(i[0])
            out_vector.extend(dump_matrix2D(i, H, W))
        elif isinstance(i, np.ndarray):
            H=i.shape[0]
            W=i.shape[1]
            out_vector.extend(dump_matrix2D(i, H, W))     
        elif isinstance(i, str):
            out_vector.extend(make_bin_string(i)) 
    return out_vector

It returns list that is the serialized matrix with bytecodes. Strings are decoded by function:

Python
//
//
//
def make_string_from_bin(inputt, pb):
    strr=""
    pb_=pb
    lenn=0
    byte=inputt[pb_]
    while byte!=0: 
       strr+=chr(byte)
       pb_+=1
       byte=inputt[pb_] 
       lenn+=1 
    return strr, lenn + 1 # len with zero including  

pb is pointer of bytes that is passed from deser_mache function. Number of elements in matrix are decoded by function:

Python
//
//
//
def get_next4bytes(inputt, pb):  
  f=inputt[pb]
  pb+=1
  sec=inputt[pb]
  pb+=1
  third=inputt[pb]
  pb+=1
  fourth=inputt[pb]
  return st.unpack('<i', bytes((f, sec, third, fourth)))[0]

pb is pointer of bytes that is passed from deser_machine function.

2D matrices decoded from bytes are returned from function:

Python
//
//
//
def get_matrix_float32(inputt, pb, rows, cols, elems_N):
    float_size=4
    pb_=pb
    out_matrix=rows * [[0] * cols]  
    flatten_len=elems_N * float_size

    flatten=[]
    
    i=0
    k=0
    e=0
    tmp_buff=[0] * float_size
    while i < flatten_len + 1 :
        
        if k==0 or  k % 4!=0:
            tmp_buff[k]=inputt[pb_]
            k+=1
        else:
            val=st.unpack('<f', bytes(tmp_buff))[0]
            tmp_buff[0]=inputt[pb_] 
            k=1
            flatten.append(val)
            e+=1  
        
        i+=1
        pb_+=1

    for row in range(rows):
        for elem in range(cols):
            out_matrix[row][elem]=flatten[row * cols + elem]    

    return out_matrix 

The whole code of ko-saver is:

Python
//
//
//

import struct as st
import numpy as np

# D - data, I - instruction.
(Dmatrix2D, Istop)=range(2)

def make_bin_string(string:str)->tuple:
    return (*map(ord, string), 0)

def make_string_from_bin(inputt, pb):
    strr=""
    pb_=pb
    lenn=0
    byte=inputt[pb_]
    while byte!=0: 
       strr+=chr(byte)
       pb_+=1
       byte=inputt[pb_] 
       lenn+=1 
    return strr, lenn + 1 # len with zero including   

def dump_matrix2D(matrix2D_list, H, W):
    N=W * H 
    int_length=4
    float_length=4
    out_vector=[0] * (N * float_length)  
    N_bin=st.pack('<i', N)  
    out_vector_=[0] * (int_length + 2)
    out_vector_[0]=H
    out_vector_[1]=W 
    out_vector_[2]=N_bin[0]
    out_vector_[3]=N_bin[1]
    out_vector_[4]=N_bin[2]
    out_vector_[5]=N_bin[3]
    k=0
    for row in range(H):
        for elem in range(W):
              for inner_elem in range(float_length):
                    elem_bin=st.pack('<f', matrix2D_list[row][elem])  # kak float32
                    out_vector[k]=elem_bin[inner_elem]
                    k+=1
    out_vector_.extend(out_vector)
    return out_vector_ 

def get_next4bytes(inputt, pb):  
  f=inputt[pb]
  pb+=1
  sec=inputt[pb]
  pb+=1
  third=inputt[pb]
  pb+=1
  fourth=inputt[pb]
  return st.unpack('<i', bytes((f, sec, third, fourth)))[0]

def get_matrix_float32(inputt, pb, rows, cols, elems_N):
    float_size=4
    pb_=pb
    out_matrix=rows * [[0] * cols]  
    flatten_len=elems_N * float_size

    flatten=[]
    
    i=0
    k=0
    e=0
    tmp_buff=[0] * float_size
    while i < flatten_len + 1 :
        
        if k==0 or  k % 4!=0:
            tmp_buff[k]=inputt[pb_]
            k+=1
        else:
            val=st.unpack('<f', bytes(tmp_buff))[0]
            tmp_buff[0]=inputt[pb_] 
            k=1
            flatten.append(val)
            e+=1  
        
        i+=1
        pb_+=1

    for row in range(rows):
        for elem in range(cols):
            out_matrix[row][elem]=flatten[row * cols + elem]    

    return out_matrix        

# Serializator.
def ser_item(inputt):
    out_vector=[]
    for i in inputt:
        if isinstance(i, int):
            out_vector.append(i)
        elif isinstance(i, list):
            H=len(i)
            W=len(i[0])
            out_vector.extend(dump_matrix2D(i, H, W))
        elif isinstance(i, np.ndarray):
            H=i.shape[0]
            W=i.shape[1]
            out_vector.extend(dump_matrix2D(i, H, W))     
        elif isinstance(i, str):
            out_vector.extend(make_bin_string(i)) 
    return out_vector

def ser_dumps(inputt:dict):
    out_vector_=[]
    for i in inputt.items():
        out_vector=ser_item((Dmatrix2D, i[0], i[1]))
        out_vector_.extend(out_vector)
    out_vector_.append(Istop)    
    return out_vector_

def ser(dictt, fname):
    with open(fname, 'wb') as f:
        vector=ser_dumps(dictt)
        f.write(bytes(vector))

# Deserializator. 
def deser_machine(inputt):
    pb=0
    data={}
    float_size=4
    while True:
      command=inputt[pb]  

      if command==Dmatrix2D:
          pb+=1
          name, lenn=make_string_from_bin(inputt, pb)
          pb+=lenn
          rows=inputt[pb]
          pb+=1
          cols=inputt[pb]
          pb+=1
          elems_N=get_next4bytes(inputt, pb)
          pb+=4
          matrix=get_matrix_float32(inputt, pb, rows, cols, elems_N)
          pb+=(elems_N * float_size - 1)
          data[name]=matrix
      elif command==Istop:
          return data

      pb+=1

def deser(fname):
   content=b''  
   with open(fname, 'rb') as f:
       content=f.read()

   return deser_machine(content) 

if __name__ == '__main__':

  test_matrix=[[10, 11]]

  test_matrix_new=[[10, 20, 30], [10, 9, 40]]

  ser({'test': test_matrix, 'test_matrix_new': test_matrix_new}, 'matr.bin')
  outp=deser('matr.bin') 
  print('outp', outp)

"""
Out:
There creates file matr.bin.
It reads as:
outp {'test': [[10.0, 11.0]], 'test_matrix_new': [[10.0, 9.0, 40.0], [10.0, 9.0, 40.0]]}
"""

History

  • 25th August, 2021: Initial version

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)