1. Introduction
For the introduction and use cases, please see the explainer.md.
For illustration purposes, the API and examples use the TF Lite flatbuffer format.
2. API
enum { // Tensorflow-lite flatbuffer.
MLModelFormat };
"tflite" enum { // Let the backend selects the most suitable device.
MLDevicePreference , // The backend will use GPU to do model inference. If some operator is not // supported by GPU, it will fall back to CPU.
"auto" , // The backend will use CPU to do model inference.
"gpu" };
"cpu" enum { // Let the backend selects the most suitable behavior.
MLPowerPreference , // Prioritizes execution speed over power consumption.
"auto" , // Prioritizes power consumption over other considerations such as execution // speed.
"high-performance" , };
"low-power" dictionary { // Preferred kind of device to use.
MLContextOptions MLDevicePreference = "auto"; // Preference as related to power consumption.
devicePreference MLPowerPreference = "auto"; // Model format for the model loader API.
powerPreference MLModelFormat = "tflite"; // Number of thread to use. // "0" means the backend can determine it automatically.
modelFormat unsigned long = 0; }; [
numThreads Exposed =Window ]interface {
ML Promise <MLContext >(
createContext optional MLContextOptions = {}); };
options enum { // "Unknown" doesn’t mean "unsupported". The background can support more types // than which are explicitly listed here (e.g., TfLite has complex numbers). // We treat them as "unknown" to avoid exposing too many details of the // backends from the beginning.
MLDataType ,
"unknown" ,
"int64" ,
"uint64" ,
"float64" ,
"int32" ,
"uint32" ,
"float32" ,
"int16" ,
"uint16" ,
"float16" ,
"int8" ,
"uint8" , };
"bool" dictionary {
MLTensor required ArrayBufferView ;
data required sequence <unsigned long >; };
dimensions dictionary {
MLTensorInfo required DOMString ;
name required MLDataType ;
type required sequence <unsigned long >; }; [
dimensions SecureContext ,Exposed =Window ]interface {
MLModel Promise <record <DOMString ,MLTensor >>(
compute record <DOMString ,MLTensor >);
inputs sequence <MLTensorInfo >();
inputs sequence <MLTensorInfo >(); }; [
outputs Exposed =Window ]interface {
MLModelLoader (
constructor MLContext );
context Promise <MLModel >(
load ArrayBuffer ); };
modelBuffer
3. Examples
// First, create an MLContext. This is consistent with the WebNN API. And we will // add two new fields, “numThread” and "modelFormat". const context= await navigator. ml. createContext( { devicePreference: "cpu" , powerPreference: "low-power" , numThread: 0 , // the default 0 means // "decide automatically". modelFormat: "tflite" }); // Then create the model loader using the ML context. loader= new MLModelLoader( context); // In the first version, we only support loading models from ArrayBuffers. We // believe this covers most of the usage cases. Web developers can download the // model, e.g., by the fetch API. We can add new "load" functions in the future // if they are really needed. const modelUrl= 'https://path/to/model/file' ; const modelBuffer= await fetch( modelUrl) . then( response=> response. arrayBuffer()); // Load the model. model= await loader. load( modelBuffer); // Use the `model.compute` function to get the output of the model from some // inputs. Example ways of using this function includes, // 1. When there is only one input tensor of the model, one can simply input the // tensor, without specifying the name of it (the user can still designate this // input tensor by name if they like). z= await model. compute({ data: new Float32Array([ 10 ]), dimensions: [ 1 ]) }); // 2. When there are multiple input tensors, the user has to designate the name // of the input tensors by their names. z= await model. compute({ x: { data: new Float32Array([ 10 ]), dimensions: [ 1 ] }, y: { data: new Float32Array([ 20 ]), dimensions: [ 1 ] } }); // 3. The client can also specify the output tensor. This is consistent with the // WebNN API and can be useful, e.g., when the output tensor is a GPU buffer. At // this time, the function will return an empty promise. The dimension of the // output tensor specified must match the dimensions of the output tensor of the // model. z_buffer= ml. tensor({ data: new Float64Array( 1 ), dimensions: [ 1 ] }); await model. compute({ data: new Float32Array([ 10 ]), dimensions: [ 1 ] }, z_buffer); // For the output tensor(s), // Similar to the input arguments, if there is only one output tensor, the // `compute` function returns a tensor in case 1 and 2, and there is no need to // specify the name of the output tensor in case 3. But if there are multiple // output tensors, the output in case 1 and 2 will be a map from tensor name to // tensors, and in case 3, the output argument must be a map from tensor name to // tensors too. // For case 1 and 2, where the actual output data locate will depend on the // context: if it is CPU context, the output tensor’s buffer will be RAM buffer(s) // and if the context is GPU context, the output tensor’s buffer will be GPU // buffer(s).