1. Introduction
For the introduction and use cases, please see the explainer.md.
For illustration purposes, the API and examples use the TF Lite flatbuffer format.
2. API
enum { // Tensorflow-lite flatbuffer.MLModelFormat };"tflite" enum { // Let the backend selects the most suitable device.MLDevicePreference , // The backend will use GPU to do model inference. If some operator is not // supported by GPU, it will fall back to CPU."auto" , // The backend will use CPU to do model inference."gpu" };"cpu" enum { // Let the backend selects the most suitable behavior.MLPowerPreference , // Prioritizes execution speed over power consumption."auto" , // Prioritizes power consumption over other considerations such as execution // speed."high-performance" , };"low-power" dictionary { // Preferred kind of device to use.MLContextOptions MLDevicePreference = "auto"; // Preference as related to power consumption.devicePreference MLPowerPreference = "auto"; // Model format for the model loader API.powerPreference MLModelFormat = "tflite"; // Number of thread to use. // "0" means the backend can determine it automatically.modelFormat unsigned long = 0; }; [numThreads Exposed =Window ]interface {ML Promise <MLContext >(createContext optional MLContextOptions = {}); };options enum { // "Unknown" doesn’t mean "unsupported". The background can support more types // than which are explicitly listed here (e.g., TfLite has complex numbers). // We treat them as "unknown" to avoid exposing too many details of the // backends from the beginning.MLDataType ,"unknown" ,"int64" ,"uint64" ,"float64" ,"int32" ,"uint32" ,"float32" ,"int16" ,"uint16" ,"float16" ,"int8" ,"uint8" , };"bool" dictionary {MLTensor required ArrayBufferView ;data required sequence <unsigned long >; };dimensions dictionary {MLTensorInfo required DOMString ;name required MLDataType ;type required sequence <unsigned long >; }; [dimensions SecureContext ,Exposed =Window ]interface {MLModel Promise <record <DOMString ,MLTensor >>(compute record <DOMString ,MLTensor >);inputs sequence <MLTensorInfo >();inputs sequence <MLTensorInfo >(); }; [outputs Exposed =Window ]interface {MLModelLoader (constructor MLContext );context Promise <MLModel >(load ArrayBuffer ); };modelBuffer
3. Examples
// First, create an MLContext. This is consistent with the WebNN API. And we will // add two new fields, “numThread” and "modelFormat". const context= await navigator. ml. createContext( { devicePreference: "cpu" , powerPreference: "low-power" , numThread: 0 , // the default 0 means // "decide automatically". modelFormat: "tflite" }); // Then create the model loader using the ML context. loader= new MLModelLoader( context); // In the first version, we only support loading models from ArrayBuffers. We // believe this covers most of the usage cases. Web developers can download the // model, e.g., by the fetch API. We can add new "load" functions in the future // if they are really needed. const modelUrl= 'https://path/to/model/file' ; const modelBuffer= await fetch( modelUrl) . then( response=> response. arrayBuffer()); // Load the model. model= await loader. load( modelBuffer); // Use the `model.compute` function to get the output of the model from some // inputs. Example ways of using this function includes, // 1. When there is only one input tensor of the model, one can simply input the // tensor, without specifying the name of it (the user can still designate this // input tensor by name if they like). z= await model. compute({ data: new Float32Array([ 10 ]), dimensions: [ 1 ]) }); // 2. When there are multiple input tensors, the user has to designate the name // of the input tensors by their names. z= await model. compute({ x: { data: new Float32Array([ 10 ]), dimensions: [ 1 ] }, y: { data: new Float32Array([ 20 ]), dimensions: [ 1 ] } }); // 3. The client can also specify the output tensor. This is consistent with the // WebNN API and can be useful, e.g., when the output tensor is a GPU buffer. At // this time, the function will return an empty promise. The dimension of the // output tensor specified must match the dimensions of the output tensor of the // model. z_buffer= ml. tensor({ data: new Float64Array( 1 ), dimensions: [ 1 ] }); await model. compute({ data: new Float32Array([ 10 ]), dimensions: [ 1 ] }, z_buffer); // For the output tensor(s), // Similar to the input arguments, if there is only one output tensor, the // `compute` function returns a tensor in case 1 and 2, and there is no need to // specify the name of the output tensor in case 3. But if there are multiple // output tensors, the output in case 1 and 2 will be a map from tensor name to // tensors, and in case 3, the output argument must be a map from tensor name to // tensors too. // For case 1 and 2, where the actual output data locate will depend on the // context: if it is CPU context, the output tensor’s buffer will be RAM buffer(s) // and if the context is GPU context, the output tensor’s buffer will be GPU // buffer(s).