Preparing a BYOA Model with Flatbuffer
Flatbuffer is a serialization format developed by google that allows for a standardization of the format of the data across different clients.
Creating Models from Schema
In order to start using Flatbuffer we need to install the flatbuffer compiler.
After flatbuffer compiler flatc is installed we can start generating models.
BYOA models are compatible with flatc version 1.9.0
In order to generate models we need to first have the BYOA Model schema. The schema defines the fields and types that that will be serialized.
Add
modelSchema.fbs
to your project.
Project
│ README.md
│ modelSchema.fbs
...
The
modelSchema.fbs
will contain the following schema definition.
namespace linearmodel;
table GroupModels {
models:[CalibLogModel];
}
table CalibLogModel {
group:string; // this is the goal type
beta:[float];
features:[string];
weights:[float];
preds:[float];
bounds:[float];
}
root_type GroupModels;
Note:
- Group is the goal type
- All goal types in group:string; need to be lowercase: e.g.
"cpa", "cpe", "cpc", "ctr", "roi", "vcr", "vcpm", "viewability_rate"
- The intercept field in Features needs to be named:
"__const"
- The beta array needs to be of size 3
- The Preds and Bounds array need to be empty (size = 0)
Looking at the schema there are two objects defined: GroupModels
and CalibLogModel
.
GroupModels has a single field models and models is a vector of CalibLogModel. Looking at
CalibLogModel
we can see that it has fields that are used to evaluate the model.
Flatbuffer allows us to generate helper classes and methods across many different languages. In this example we are going to create model in python. All we need to do is to run a simple command.
flatc --python modelSchema.fbs
After running this command we should see a directory linearmodel
in the root of the project. Inside of this folder we can see the generated files.
Project
│ README.md
└───linearmodel
│ │ CalibLogModel.py
│ │ GroupModels.py
...
In those files we can find the python representation of the models.
GroupModels.py
# automatically generated by the FlatBuffers compiler, do not modify
# namespace: linearmodel
import flatbuffers
class GroupModels(object):
__slots__ = ['_tab']
@classmethod
def GetRootAsGroupModels(cls, buf, offset):
n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
x = GroupModels()
x.Init(buf, n + offset)
return x
# GroupModels
def Init(self, buf, pos):
self._tab = flatbuffers.table.Table(buf, pos)
# GroupModels
def Models(self, j):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
if o != 0:
x = self._tab.Vector(o)
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
x = self._tab.Indirect(x)
from .CalibLogModel import CalibLogModel
obj = CalibLogModel()
obj.Init(self._tab.Bytes, x)
return obj
return None
# GroupModels
def ModelsLength(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
if o != 0:
return self._tab.VectorLen(o)
return 0
def GroupModelsStart(builder): builder.StartObject(1)
def GroupModelsAddModels(builder, models): builder.PrependUOffsetTRelativeSlot(0, flatbuffers.number_types.UOffsetTFlags.py_type(models), 0)
def GroupModelsStartModelsVector(builder, numElems): return builder.StartVector(4, numElems, 4)
def GroupModelsEnd(builder): return builder.EndObject()
CalibLogModel.py
# automatically generated by the FlatBuffers compiler, do not modify
# namespace: linearmodel
import flatbuffers
class CalibLogModel(object):
__slots__ = ['_tab']
@classmethod
def GetRootAsCalibLogModel(cls, buf, offset):
n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset)
x = CalibLogModel()
x.Init(buf, n + offset)
return x
# CalibLogModel
def Init(self, buf, pos):
self._tab = flatbuffers.table.Table(buf, pos)
# CalibLogModel
def Group(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
if o != 0:
return self._tab.String(o + self._tab.Pos)
return None
# CalibLogModel
def Beta(self, j):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
if o != 0:
a = self._tab.Vector(o)
return self._tab.Get(flatbuffers.number_types.Float32Flags, a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4))
return 0
# CalibLogModel
def BetaAsNumpy(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
if o != 0:
return self._tab.GetVectorAsNumpy(flatbuffers.number_types.Float32Flags, o)
return 0
# CalibLogModel
def BetaLength(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
if o != 0:
return self._tab.VectorLen(o)
return 0
# CalibLogModel
def Features(self, j):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
if o != 0:
a = self._tab.Vector(o)
return self._tab.String(a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4))
return ""
# CalibLogModel
def FeaturesLength(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
if o != 0:
return self._tab.VectorLen(o)
return 0
# CalibLogModel
def Weights(self, j):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(10))
if o != 0:
a = self._tab.Vector(o)
return self._tab.Get(flatbuffers.number_types.Float32Flags, a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4))
return 0
# CalibLogModel
def WeightsAsNumpy(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(10))
if o != 0:
return self._tab.GetVectorAsNumpy(flatbuffers.number_types.Float32Flags, o)
return 0
# CalibLogModel
def WeightsLength(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(10))
if o != 0:
return self._tab.VectorLen(o)
return 0
# CalibLogModel
def Preds(self, j):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(12))
if o != 0:
a = self._tab.Vector(o)
return self._tab.Get(flatbuffers.number_types.Float32Flags, a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4))
return 0
# CalibLogModel
def PredsAsNumpy(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(12))
if o != 0:
return self._tab.GetVectorAsNumpy(flatbuffers.number_types.Float32Flags, o)
return 0
# CalibLogModel
def PredsLength(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(12))
if o != 0:
return self._tab.VectorLen(o)
return 0
# CalibLogModel
def Bounds(self, j):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14))
if o != 0:
a = self._tab.Vector(o)
return self._tab.Get(flatbuffers.number_types.Float32Flags, a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4))
return 0
# CalibLogModel
def BoundsAsNumpy(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14))
if o != 0:
return self._tab.GetVectorAsNumpy(flatbuffers.number_types.Float32Flags, o)
return 0
# CalibLogModel
def BoundsLength(self):
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14))
if o != 0:
return self._tab.VectorLen(o)
return 0
def CalibLogModelStart(builder): builder.StartObject(6)
def CalibLogModelAddGroup(builder, group): builder.PrependUOffsetTRelativeSlot(0, flatbuffers.number_types.UOffsetTFlags.py_type(group), 0)
def CalibLogModelAddBeta(builder, beta): builder.PrependUOffsetTRelativeSlot(1, flatbuffers.number_types.UOffsetTFlags.py_type(beta), 0)
def CalibLogModelStartBetaVector(builder, numElems): return builder.StartVector(4, numElems, 4)
def CalibLogModelAddFeatures(builder, features): builder.PrependUOffsetTRelativeSlot(2, flatbuffers.number_types.UOffsetTFlags.py_type(features), 0)
def CalibLogModelStartFeaturesVector(builder, numElems): return builder.StartVector(4, numElems, 4)
def CalibLogModelAddWeights(builder, weights): builder.PrependUOffsetTRelativeSlot(3, flatbuffers.number_types.UOffsetTFlags.py_type(weights), 0)
def CalibLogModelStartWeightsVector(builder, numElems): return builder.StartVector(4, numElems, 4)
def CalibLogModelAddPreds(builder, preds): builder.PrependUOffsetTRelativeSlot(4, flatbuffers.number_types.UOffsetTFlags.py_type(preds), 0)
def CalibLogModelStartPredsVector(builder, numElems): return builder.StartVector(4, numElems, 4)
def CalibLogModelAddBounds(builder, bounds): builder.PrependUOffsetTRelativeSlot(5, flatbuffers.number_types.UOffsetTFlags.py_type(bounds), 0)
def CalibLogModelStartBoundsVector(builder, numElems): return builder.StartVector(4, numElems, 4)
def CalibLogModelEnd(builder): return builder.EndObject()
Downsampling Correction
b = beta[0]
b
is for downsampling bias correction. It is nothing but ratio of positive to negatives seen during training.
Note: If the training algorithm does not downsample during training, you can set
b
to1.0
so that your response rate is not affected.
Bias correction (effect on response rates):
Goal Type | Calculation |
---|---|
ROI | p' = p * b / (b - (b-1) * math.Exp(-p)) |
Other goal types | p' = p * b / (1 + (b-1)*p) |
Calibration
platt_alpha = beta[1]
platt_beta = beta[2]
platt_alpha
and platt_beta
are platt calibration coefficients which you have to learn during training.
If you do not need calibration
platt_alpha
has to be0.0
andplatt_beta
has to be1.0
so that response rates are not affected
Calibration (effect on response rates):
p' - response rates after downsampling correction
Goal Type | Calculation |
---|---|
ROI | z' = log(p') |
ROI | p_calib = exp(platt_alpha + platt_beta * z') |
Other goal types | z' = log(p'/(1-p')) |
Other goal types | p_calib = sigmoid(platt_alpha + platt_beta * z') |
Working with Models
We are going to take some simple data and build the flatbuffer model from this data.
{
"Data": [
{
"GoalType": "cpa",
"Beta": [
0.0004438111209310591,
-2.4949839115142822,
0.6438648700714111
],
"Features": {
"__const": -0.07439646124839783,
"bidder_pixel_frequency^mm:1246263": 0.020179834216833115,
"bidder_pixel_frequency^mm:1246265": 0.003873385488986969,
"bidder_pixel_frequency^mm:1246267": 0.0038846954703330994,
"bidder_pixel_frequency^mm:1246271": 0.11398277431726456,
"bidder_pixel_recency^mm:1246263": 0.2703247368335724,
"bidder_pixel_recency^mm:1246265": 0.2470093071460724,
"bidder_pixel_recency^mm:1246267": 0.2959757149219513,
"bidder_pixel_recency^mm:1246271": 0.02602006494998932,
"size^10486360": -0.1345592439174652,
"size^11796630": -0.0033570826053619385,
"size^19660850": 0.08247312903404236,
"size^19661050": -0.055712759494781494,
"size^19661400": -0.0033570826053619385,
"size^20972000": -0.12369024753570557,
"size^22020376": 0.04631700739264488,
"size^30670908": -0.1345592439174652,
"size^41943280": -0.005682997405529022,
"size^47710298": -0.1242508590221405,
"size^63570010": -0.022922880947589874,
"size^63570170": -0.1242508590221405,
"size^7864920": -0.15811769664287567,
"user_frequency^99": -0.07439646124839783,
"week_part^0": -0.011629588901996613,
"week_part^1": -0.06539076566696167
},
"Predictions": null,
"Boundaries": null
},
{
"GoalType": "cpc",
"Beta": [
0.0004438111209310591,
-2.4949839115142822,
0.6438648700714111
],
"Features": {
"__const": -0.07439646124839783,
"bidder_pixel_frequency^mm:1246263": 0.020179834216833115,
"bidder_pixel_frequency^mm:1246265": 0.003873385488986969,
"bidder_pixel_frequency^mm:1246267": 0.0038846954703330994,
"bidder_pixel_frequency^mm:1246271": 0.11398277431726456,
"bidder_pixel_recency^mm:1246263": 0.2703247368335724,
"bidder_pixel_recency^mm:1246265": 0.2470093071460724,
"bidder_pixel_recency^mm:1246267": 0.2959757149219513,
"bidder_pixel_recency^mm:1246271": 0.02602006494998932,
"size^10486360": -0.1345592439174652,
"size^11796630": -0.0033570826053619385,
"size^19660850": 0.08247312903404236,
"size^19661050": -0.055712759494781494,
"size^19661400": -0.0033570826053619385,
"size^20972000": -0.12369024753570557,
"size^22020376": 0.04631700739264488,
"size^30670908": -0.1345592439174652,
"size^41943280": -0.005682997405529022,
"size^47710298": -0.1242508590221405,
"size^63570010": -0.022922880947589874,
"size^63570170": -0.1242508590221405,
"size^7864920": -0.15811769664287567,
"user_frequency^99": -0.07439646124839783,
"week_part^0": -0.011629588901996613,
"week_part^1": -0.06539076566696167
},
"Predictions": null,
"Boundaries": null
}
]
}
In order to start building the model in flatbuffer we need to install one more dependency.
pip install flatbuffers
Now we can start working with flatbuffers by making necessary imports.
import flatbuffers
import base64
import json
import linearmodel.GroupModels
import linearmodel.CalibLogModel
First we need create the the builder object
# a simple buffer, it will grow as needed.
builder = flatbuffers.Builder(1024)
Flatbuffers are structured from inside out meaning all array and other complex structures need to be populated first.
If we look back to our json data we will see it has a data key and the data key has two members. Each member represents the model for a specific goal type.
In order for us to build our GroupModel
we will need to build CalibLogModel
for each goal type first.
So let's start with building our first flatbuffer model with the following file
import flatbuffers
import base64
import json
import linearmodel.GroupModels
import linearmodel.CalibLogModel
model_json = json.loads("model.json")
# a simple buffer, it will grow as needed.
builder = flatbuffers.Builder(1024)
cpa_model = model_json.get("Data")[0]
roi_model = model_json.get("Data")[1]
# looking back to our schema
# group:string; // this is the goal type
# beta:[float];
# features:[string];
# weights:[float];
# preds:[float];
# bounds:[float];
# We are going to create 5 vectors.
features = cpa_model.get('Features')
beta = cpa_model.get('Beta')
goal_type = cpa_model.get('GoalType')
# Since our schema has separate vectors for features and
# weight we would need to extract them from our features variable.
feature_weights = list(features.items()) # we make it a list of tuple to preserve an order.
# now we going to create the weights since it is a scalar it will be easier to demonstrate the concept.
# we call the CalibLogModelStartWeightsVector that creates start location in builder.
linearmodel.CalibLogModel.CalibLogModelStartWeightsVector(builder, len(feature_weights))
for key, val in feature_weights:
builder.PrependFloat32(val)
weights_vec = builder.EndVector(len(feature_weights))
# at the end we store the offset of weights vector in weights_vec.
# because features are strings we would need to create a list
# that would hold offsets for created string first.
features_offset = []
for key, val in features:
features_offset.append(builder.CreateString(key))
# we repeat process for creating a vector for features as well.
linearmodel.CalibLogModel.CalibLogModelStartFeaturesVector(builder, len(features_offset))
for feature in features_offset:
builder.PrependUOffsetTRelative(feature)
features_vec = builder.EndVector(len(features_offset))
# we repeat the process of creating vectors and storing an offset of those vectors for all scalar types.
# creating vec with Beta values
linearmodel.CalibLogModel.CalibLogModelStartBetaVector(builder, len(beta))
for b in beta:
builder.PrependFloat64(b)
beta_vec = builder.EndVector(len(beta))
# Finally we build the outermost object that is CalibLogModel
linearmodel.CalibLogModel.CalibLogModelStart(builder)
linearmodel.CalibLogModel.CalibLogModelAddWeights(builder, weights_vec)
linearmodel.CalibLogModel.CalibLogModelAddFeatures(builder, features_vec)
linearmodel.CalibLogModel.CalibLogModelAddBet(builder, beta_vec)
# Building the buffered model and storing the offset
cpa_buf_model = linearmodel.CalibLogModel.CalibLogModelEnd(builder)
# I am going to skip the step of building the second model for roi type
# It is the same process.
roi_buf_model = linearmodel.CalibLogModel.CalibLogModelEnd(builder)
# now we have two buffered model and we can create the final object linearmodel.GroupModels
linearmodel.GroupModels.GroupModelsStart(builder)
linearmodel.GroupModels.GroupModelsAddModels(builder, [cpa_buf_model, roi_buf_model])
group_model = linearmodel.GroupModels.GroupModelsEnd(builder)
# as the final step we call Finish() on our bulder.
builder.Finish(group_model)
# to get the serialized data that we discussed in Getting Started and
# upload it to the BYOA api we can call Output on builder.
buf = builder.Output()
# buffer is a bytearray representation of the model and is ready to be
# uploaded through the BYOA model upload endpoint.