/* * GStreamer gstreamer-onnxinference * Copyright (C) 2023-2025 Collabora Ltd. * * gstonnxinference.c * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ /** * SECTION:element-onnxinference * @short_description: Run ONNX inference model on video buffers * * This element can apply an ONNX model to video buffers. It attaches * the tensor output to the buffer as a @ref GstTensorMeta. * * To install ONNX on your system, follow the instructions in the * README.md in with this plugin. * * ## Example launch command: * * Test image file, model file (SSD) and label file can be found here : * https://gitlab.collabora.com/gstreamer/onnx-models * * GST_DEBUG=ssdobjectdetector:5 \ * gst-launch-1.0 filesrc location=onnx-models/images/bus.jpg ! \ * jpegdec ! videoconvert ! onnxinference execution-provider=cpu model-file=onnx-models/models/ssd_mobilenet_v1_coco.onnx ! \ * ssdobjectdetector label-file=onnx-models/labels/COCO_classes.txt ! videoconvert ! imagefreeze ! autovideosink * * * Note: in order for downstream tensor decoders to correctly parse the tensor * data in the GstTensorMeta, meta data must be attached to the ONNX model * assigning a unique string id to each output layer. These unique string ids * and corresponding GQuark ids are currently stored in the tensor decoder's * header file, in this case gstssdobjectdetector.h. If the meta data is absent, * the pipeline will fail. * * As a convenience, there is a python script * currently stored at * https://gitlab.collabora.com/gstreamer/onnx-models/-/blob/master/scripts/modify_onnx_metadata.py * to enable users to easily add and remove meta data from json files. It can also dump * the names of all output layers, which can then be used to craft the json meta data file. * * Since: 1.20 */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "gstonnxinference.h" #include #include #include #ifdef HAVE_VSI_NPU #include #endif typedef enum { GST_ONNX_OPTIMIZATION_LEVEL_DISABLE_ALL, GST_ONNX_OPTIMIZATION_LEVEL_ENABLE_BASIC, GST_ONNX_OPTIMIZATION_LEVEL_ENABLE_EXTENDED, GST_ONNX_OPTIMIZATION_LEVEL_ENABLE_ALL, } GstOnnxOptimizationLevel; typedef enum { GST_ONNX_EXECUTION_PROVIDER_CPU, GST_ONNX_EXECUTION_PROVIDER_CUDA, GST_ONNX_EXECUTION_PROVIDER_VSI, } GstOnnxExecutionProvider; struct _GstOnnxInference { GstBaseTransform basetransform; gchar *model_file; GstOnnxOptimizationLevel optimization_level; GstOnnxExecutionProvider execution_provider; GstVideoInfo video_info; GstCaps *input_tensors_caps; GstCaps *output_tensors_caps; OrtEnv *env; OrtSession *session; OrtMemoryInfo *memory_info; OrtAllocator *allocator; int32_t width; int32_t height; int32_t channels; gboolean planar; gint height_dim; gint width_dim; gint channels_dim; gint batch_dim; uint8_t *dest; size_t output_count; gchar **output_names; GQuark *output_ids; GstTensorDimOrder *output_dims_orders; GstTensorDataType input_data_type; bool fixedInputImageSize; double *scales; double *offsets; gsize num_channels; }; static const OrtApi *api = NULL; GST_DEBUG_CATEGORY (onnx_inference_debug); GST_DEBUG_CATEGORY (onnx_runtime_debug); #define GST_CAT_DEFAULT onnx_inference_debug GST_ELEMENT_REGISTER_DEFINE (onnx_inference, "onnxinference", GST_RANK_PRIMARY, GST_TYPE_ONNX_INFERENCE); /* GstOnnxInference properties */ enum { PROP_0, PROP_MODEL_FILE, PROP_OPTIMIZATION_LEVEL, PROP_EXECUTION_PROVIDER, PROP_INPUT_OFFSET, PROP_INPUT_SCALE }; #define GST_ONNX_INFERENCE_DEFAULT_EXECUTION_PROVIDER GST_ONNX_EXECUTION_PROVIDER_CPU #define GST_ONNX_INFERENCE_DEFAULT_OPTIMIZATION_LEVEL GST_ONNX_OPTIMIZATION_LEVEL_ENABLE_EXTENDED static GstStaticPadTemplate gst_onnx_inference_src_template = GST_STATIC_PAD_TEMPLATE ("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE ("{ RGB,RGBA,BGR,BGRA }")) ); static GstStaticPadTemplate gst_onnx_inference_sink_template = GST_STATIC_PAD_TEMPLATE ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE ("{ RGB,RGBA,BGR,BGRA }")) ); static void gst_onnx_inference_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec); static void gst_onnx_inference_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); static void gst_onnx_inference_finalize (GObject * object); static GstFlowReturn gst_onnx_inference_transform_ip (GstBaseTransform * trans, GstBuffer * buf); static GstCaps *gst_onnx_inference_transform_caps (GstBaseTransform * trans, GstPadDirection direction, GstCaps * caps, GstCaps * filter_caps); static gboolean gst_onnx_inference_set_caps (GstBaseTransform * trans, GstCaps * incaps, GstCaps * outcaps); static gboolean gst_onnx_inference_start (GstBaseTransform * trans); static gboolean gst_onnx_inference_stop (GstBaseTransform * trans); G_DEFINE_TYPE (GstOnnxInference, gst_onnx_inference, GST_TYPE_BASE_TRANSFORM); GType gst_onnx_optimization_level_get_type (void); #define GST_TYPE_ONNX_OPTIMIZATION_LEVEL (gst_onnx_optimization_level_get_type ()) GType gst_onnx_execution_provider_get_type (void); #define GST_TYPE_ONNX_EXECUTION_PROVIDER (gst_onnx_execution_provider_get_type ()) GType gst_onnx_optimization_level_get_type (void) { static GType onnx_optimization_type = 0; if (g_once_init_enter (&onnx_optimization_type)) { static GEnumValue optimization_level_types[] = { {GST_ONNX_OPTIMIZATION_LEVEL_DISABLE_ALL, "Disable all optimization", "disable-all"}, {GST_ONNX_OPTIMIZATION_LEVEL_ENABLE_BASIC, "Enable basic optimizations (redundant node removals))", "enable-basic"}, {GST_ONNX_OPTIMIZATION_LEVEL_ENABLE_EXTENDED, "Enable extended optimizations (redundant node removals + node fusions)", "enable-extended"}, {GST_ONNX_OPTIMIZATION_LEVEL_ENABLE_ALL, "Enable all possible optimizations", "enable-all"}, {0, NULL, NULL}, }; GType temp = g_enum_register_static ("GstOnnxOptimizationLevel", optimization_level_types); g_once_init_leave (&onnx_optimization_type, temp); } return onnx_optimization_type; } GType gst_onnx_execution_provider_get_type (void) { static GType onnx_execution_type = 0; if (g_once_init_enter (&onnx_execution_type)) { static GEnumValue execution_provider_types[] = { {GST_ONNX_EXECUTION_PROVIDER_CPU, "CPU execution provider", "cpu"}, #if HAVE_CUDA {GST_ONNX_EXECUTION_PROVIDER_CUDA, "CUDA execution provider", "cuda"}, #else {GST_ONNX_EXECUTION_PROVIDER_CUDA, "CUDA execution provider (compiled out, will use CPU)", "cuda"}, #endif #ifdef HAVE_VSI_NPU {GST_ONNX_EXECUTION_PROVIDER_VSI, "VeriSilicon NPU execution provider", "vsi"}, #else {GST_ONNX_EXECUTION_PROVIDER_VSI, "VeriSilicon NPU execution provider (compiled out, will use CPU)", "vsi"}, #endif {0, NULL, NULL}, }; GType temp = g_enum_register_static ("GstOnnxExecutionProvider", execution_provider_types); g_once_init_leave (&onnx_execution_type, temp); } return onnx_execution_type; } static void gst_onnx_inference_class_init (GstOnnxInferenceClass * klass) { GObjectClass *gobject_class = (GObjectClass *) klass; GstElementClass *element_class = (GstElementClass *) klass; GstBaseTransformClass *basetransform_class = (GstBaseTransformClass *) klass; GST_DEBUG_CATEGORY_INIT (onnx_inference_debug, "onnxinference", 0, "ONNX Runtime Inference"); GST_DEBUG_CATEGORY_INIT (onnx_runtime_debug, "onnxruntime", 0, "ONNX Runtime"); gobject_class->set_property = gst_onnx_inference_set_property; gobject_class->get_property = gst_onnx_inference_get_property; gobject_class->finalize = gst_onnx_inference_finalize; /** * GstOnnxInference:model-file * * ONNX model file * * Since: 1.24 */ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_MODEL_FILE, g_param_spec_string ("model-file", "ONNX model file", "ONNX model file", NULL, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); /** * GstOnnxInference:optimization-level * * ONNX optimization level * * Since: 1.24 */ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_OPTIMIZATION_LEVEL, g_param_spec_enum ("optimization-level", "Optimization level", "ONNX optimization level", GST_TYPE_ONNX_OPTIMIZATION_LEVEL, GST_ONNX_OPTIMIZATION_LEVEL_ENABLE_EXTENDED, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); /** * GstOnnxInference:execution-provider * * ONNX execution provider * * Since: 1.24 */ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_EXECUTION_PROVIDER, g_param_spec_enum ("execution-provider", "Execution provider", "ONNX execution provider", GST_TYPE_ONNX_EXECUTION_PROVIDER, GST_ONNX_EXECUTION_PROVIDER_CPU, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); gst_element_class_set_static_metadata (element_class, "onnxinference", "Filter/Video", "Apply neural network to video frames and create tensor output", "Aaron Boxer "); gst_element_class_add_pad_template (element_class, gst_static_pad_template_get (&gst_onnx_inference_sink_template)); gst_element_class_add_pad_template (element_class, gst_static_pad_template_get (&gst_onnx_inference_src_template)); basetransform_class->transform_ip = GST_DEBUG_FUNCPTR (gst_onnx_inference_transform_ip); basetransform_class->transform_caps = GST_DEBUG_FUNCPTR (gst_onnx_inference_transform_caps); basetransform_class->set_caps = GST_DEBUG_FUNCPTR (gst_onnx_inference_set_caps); basetransform_class->start = GST_DEBUG_FUNCPTR (gst_onnx_inference_start); basetransform_class->stop = GST_DEBUG_FUNCPTR (gst_onnx_inference_stop); gst_type_mark_as_plugin_api (GST_TYPE_ONNX_OPTIMIZATION_LEVEL, (GstPluginAPIFlags) 0); gst_type_mark_as_plugin_api (GST_TYPE_ONNX_EXECUTION_PROVIDER, (GstPluginAPIFlags) 0); api = OrtGetApiBase ()->GetApi (ORT_API_VERSION); } static void gst_onnx_inference_init (GstOnnxInference * self) { /* TODO: at the moment onnx inference only support video output. We * should revisit this aspect once we generalize it */ self->input_tensors_caps = gst_caps_new_empty_simple ("video/x-raw"); self->output_tensors_caps = gst_caps_new_empty_simple ("video/x-raw"); self->execution_provider = GST_ONNX_EXECUTION_PROVIDER_CPU; self->scales = NULL; self->offsets = NULL; self->num_channels = 0; self->height_dim = -1; self->width_dim = -1; self->channels_dim = -1; self->batch_dim = -1; /* Passthrough would propagate tensors caps upstream */ gst_base_transform_set_prefer_passthrough (GST_BASE_TRANSFORM (self), FALSE); } static void gst_onnx_inference_finalize (GObject * object) { GstOnnxInference *self = GST_ONNX_INFERENCE (object); g_free (self->model_file); g_free (self->scales); g_free (self->offsets); gst_caps_unref (self->input_tensors_caps); gst_caps_unref (self->output_tensors_caps); G_OBJECT_CLASS (gst_onnx_inference_parent_class)->finalize (object); } static void gst_onnx_inference_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec) { GstOnnxInference *self = GST_ONNX_INFERENCE (object); const gchar *filename; switch (prop_id) { case PROP_MODEL_FILE: filename = g_value_get_string (value); if (filename && g_file_test (filename, (GFileTest) (G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR))) { if (self->model_file) g_free (self->model_file); self->model_file = g_strdup (filename); } else { GST_WARNING_OBJECT (self, "Model file '%s' not found!", filename); } break; case PROP_OPTIMIZATION_LEVEL: self->optimization_level = (GstOnnxOptimizationLevel) g_value_get_enum (value); break; case PROP_EXECUTION_PROVIDER: self->execution_provider = (GstOnnxExecutionProvider) g_value_get_enum (value); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; } } static void gst_onnx_inference_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { GstOnnxInference *self = GST_ONNX_INFERENCE (object); switch (prop_id) { case PROP_MODEL_FILE: g_value_set_string (value, self->model_file); break; case PROP_OPTIMIZATION_LEVEL: g_value_set_enum (value, self->optimization_level); break; case PROP_EXECUTION_PROVIDER: g_value_set_enum (value, self->execution_provider); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; } } static gsize get_tensor_type_size (GstTensorDataType data_type) { switch (data_type) { case GST_TENSOR_DATA_TYPE_UINT8: return sizeof (uint8_t); case GST_TENSOR_DATA_TYPE_UINT16: return sizeof (uint16_t); case GST_TENSOR_DATA_TYPE_UINT32: return sizeof (uint32_t); case GST_TENSOR_DATA_TYPE_INT32: return sizeof (int32_t); case GST_TENSOR_DATA_TYPE_FLOAT16: return 2; case GST_TENSOR_DATA_TYPE_FLOAT32: return sizeof (float); default: g_error ("Data type %d not handled", data_type); return 0; }; } static GstCaps * gst_onnx_inference_transform_caps (GstBaseTransform * trans, GstPadDirection direction, GstCaps * caps, GstCaps * filter_caps) { GstOnnxInference *self = GST_ONNX_INFERENCE (trans); GstCaps *other_caps; GstCaps *restrictions; bool has_session; GST_OBJECT_LOCK (self); has_session = self->session != NULL; GST_OBJECT_UNLOCK (self); if (!has_session) { other_caps = gst_caps_ref (caps); goto done; } GST_LOG_OBJECT (self, "transforming caps %" GST_PTR_FORMAT, caps); GST_DEBUG_OBJECT (self, "Applying model input tensors caps restrictions: %" GST_PTR_FORMAT, self->input_tensors_caps); restrictions = gst_caps_ref (self->input_tensors_caps); if (direction == GST_PAD_SINK) { /* Create tensors_caps from output_tensor_caps and intersect with * restrictions */ GstCaps *tensors_caps = gst_caps_copy (self->output_tensors_caps); GstCaps *intersect = gst_caps_intersect_full (restrictions, tensors_caps, GST_CAPS_INTERSECT_FIRST); gst_caps_replace (&restrictions, intersect); gst_caps_unref (tensors_caps); gst_caps_unref (intersect); other_caps = gst_caps_intersect_full (caps, restrictions, GST_CAPS_INTERSECT_FIRST); } else if (direction == GST_PAD_SRC) { /* Remove tensors from caps to prevent upstream propagation. */ GstCaps *tmp_caps = gst_caps_copy (caps); if (!gst_caps_is_empty (tmp_caps)) { GstStructure *tstruct = gst_caps_get_structure (tmp_caps, 0); gst_structure_remove_field (tstruct, "tensors"); } other_caps = gst_caps_intersect_full (tmp_caps, restrictions, GST_CAPS_INTERSECT_FIRST); gst_caps_unref (tmp_caps); } else { other_caps = gst_caps_intersect_full (caps, restrictions, GST_CAPS_INTERSECT_FIRST); } gst_caps_unref (restrictions); done: if (filter_caps) { GstCaps *tmp = gst_caps_intersect_full (other_caps, filter_caps, GST_CAPS_INTERSECT_FIRST); gst_caps_replace (&other_caps, tmp); gst_caps_unref (tmp); } return other_caps; } static GstTensorDataType onnx_data_type_to_gst (ONNXTensorElementDataType dt) { const gint ONNX_TO_GST_TENSOR_DATATYPE[] = { -1, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED */ GST_TENSOR_DATA_TYPE_FLOAT32, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT */ GST_TENSOR_DATA_TYPE_UINT8, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8 */ GST_TENSOR_DATA_TYPE_INT8, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8 */ GST_TENSOR_DATA_TYPE_UINT16, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16 */ GST_TENSOR_DATA_TYPE_INT16, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16 */ GST_TENSOR_DATA_TYPE_INT32, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32 */ GST_TENSOR_DATA_TYPE_INT64, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64 */ GST_TENSOR_DATA_TYPE_STRING, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING */ GST_TENSOR_DATA_TYPE_BOOL, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL */ GST_TENSOR_DATA_TYPE_FLOAT16, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16 */ GST_TENSOR_DATA_TYPE_FLOAT64, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE */ GST_TENSOR_DATA_TYPE_UINT32, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32 */ GST_TENSOR_DATA_TYPE_UINT64, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64 */ GST_TENSOR_DATA_TYPE_COMPLEX64, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64 */ GST_TENSOR_DATA_TYPE_COMPLEX128, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128 */ GST_TENSOR_DATA_TYPE_BFLOAT16, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16 */ GST_TENSOR_DATA_TYPE_FLOAT8E4M3FN, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FN */ GST_TENSOR_DATA_TYPE_FLOAT8E4M3FNUZ, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E4M3FNUZ */ GST_TENSOR_DATA_TYPE_FLOAT8E5M2, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2 */ GST_TENSOR_DATA_TYPE_FLOAT8E5M2FNUZ, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2FNUZ */ GST_TENSOR_DATA_TYPE_UINT4, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT4 */ GST_TENSOR_DATA_TYPE_INT4, /* ONNX_TENSOR_ELEMENT_DATA_TYPE_INT4 */ }; if (dt > ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED && dt <= ONNX_TENSOR_ELEMENT_DATA_TYPE_INT4) { return ONNX_TO_GST_TENSOR_DATATYPE[dt]; } g_error ("Unexpected datatype: %d", dt); } static gboolean gst_onnx_inference_set_tensordec_datatype (GstOnnxInference * self, ONNXTensorElementDataType dt, GstStructure * tensor_desc) { GValue val = G_VALUE_INIT; GstTensorDataType gst_dt; g_value_init (&val, G_TYPE_STRING); if (dt > ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED && dt <= ONNX_TENSOR_ELEMENT_DATA_TYPE_INT4) { gst_dt = onnx_data_type_to_gst (dt); g_value_set_string (&val, gst_tensor_data_type_get_name (gst_dt)); } else { GST_ERROR_OBJECT (self, "Unexpected datatype: %d", dt); g_value_unset (&val); return FALSE; } gst_structure_take_value (tensor_desc, "type", &val); g_value_unset (&val); return TRUE; } static void gst_onnx_log_function (void *param, OrtLoggingLevel severity, const char *category, const char *logid, const char *code_location, const char *message) { GObject *obj = param; GstDebugLevel level = GST_LEVEL_ERROR; switch (severity) { case ORT_LOGGING_LEVEL_VERBOSE: level = GST_LEVEL_LOG; break; case ORT_LOGGING_LEVEL_INFO: level = GST_LEVEL_INFO; break; case ORT_LOGGING_LEVEL_WARNING: level = GST_LEVEL_WARNING; break; case ORT_LOGGING_LEVEL_ERROR: case ORT_LOGGING_LEVEL_FATAL: level = GST_LEVEL_ERROR; break; } gst_debug_log (onnx_runtime_debug, level, code_location, "gst_onnx_log_function", 0, obj, "%s", message); } /* FIXME: This is copied from Gsttfliteinference and we should create something * more generic */ static gboolean _guess_tensor_data_type (GstOnnxInference * self, gsize dims_count, gsize * dims, const gchar ** gst_format) { self->height_dim = -1; self->width_dim = -1; self->channels_dim = -1; self->batch_dim = -1; if (dims_count < 2 || dims_count > 4) { GST_ERROR_OBJECT (self, "Don't know how to interpret tensors with %zu dimensions", dims_count); return FALSE; } switch (dims_count) { case 2: *gst_format = "GRAY8"; self->height_dim = 0; self->width_dim = 1; break; case 3: if (dims[0] == 1 || dims[0] == 3) { self->channels_dim = 0; if (dims[0] == 1) { *gst_format = "GRAY8"; } else { *gst_format = "RGBP"; } self->height_dim = 1; self->width_dim = 2; } else if (dims[2] == 1 || dims[2] == 3) { self->channels_dim = 2; if (dims[2] == 1) *gst_format = "GRAY"; else *gst_format = "RGB"; self->height_dim = 0; self->width_dim = 1; } else { GST_ERROR_OBJECT (self, "Don't know how to interpret dims"); return FALSE; } break; case 4: /* Assuming dims[0] is a batch */ self->batch_dim = 0; if (dims[1] == 1 || dims[1] == 3) { self->channels_dim = 1; self->height_dim = 2; self->width_dim = 3; } else if (dims[3] == 1 || dims[3] == 3) { self->height_dim = 1; self->width_dim = 2; self->channels_dim = 3; } else { GST_ERROR_OBJECT (self, "Don't know how to interpret dims"); return FALSE; } if (dims[self->channels_dim] == 1) { *gst_format = "GRAY8"; } else if (dims[self->channels_dim] == 3) { if (self->planar) *gst_format = "RGBP"; else *gst_format = "RGB"; } else { g_assert_not_reached (); } break; } return TRUE; } static gchar * build_dims_str (gsize dims_count, gsize * dims) { GString *dims_gstr = g_string_new (""); gsize j; if (dims_count == 0) goto done; if (dims[0] == G_MAXSIZE) g_string_append (dims_gstr, "-1"); else g_string_append_printf (dims_gstr, "%zu", dims[0]); for (j = 1; j < dims_count; j++) if (dims[j] == G_MAXSIZE) g_string_append (dims_gstr, ",-1"); else g_string_append_printf (dims_gstr, ",%zu", dims[j]); done: return g_string_free (dims_gstr, FALSE); } static gboolean gst_onnx_inference_start (GstBaseTransform * trans) { GstOnnxInference *self = GST_ONNX_INFERENCE (trans); gboolean ret = FALSE; OrtStatus *status = NULL; OrtSessionOptions *session_options = NULL; OrtTypeInfo *input_type_info = NULL; const OrtTensorTypeAndShapeInfo *input_tensor_info = NULL; GraphOptimizationLevel onnx_optim; size_t num_input_dims; int64_t *input_dims; gsize *gst_input_dims; ONNXTensorElementDataType element_type; size_t i; const gchar *gst_format; GstAnalyticsModelInfo *modelinfo = NULL; const gchar *onnx_input_tensor_name = NULL; gchar *tensor_name = NULL; GST_OBJECT_LOCK (self); if (self->session) { ret = TRUE; goto done; } if (self->model_file == NULL) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("model-file property not set")); goto done; } modelinfo = gst_analytics_modelinfo_load (self->model_file); if (!modelinfo) { GST_ERROR_OBJECT (self, "Failed to load modelinfo for %s. " "This could be due to: file not found, unsupported version, " "or invalid file format.", self->model_file); goto error; } if (self->session) { ret = TRUE; goto done; } // Create environment OrtLoggingLevel ort_logging; switch (gst_debug_category_get_threshold (GST_CAT_DEFAULT)) { case GST_LEVEL_NONE: case GST_LEVEL_ERROR: ort_logging = ORT_LOGGING_LEVEL_ERROR; break; case GST_LEVEL_WARNING: case GST_LEVEL_FIXME: ort_logging = ORT_LOGGING_LEVEL_WARNING; break; case GST_LEVEL_INFO: ort_logging = ORT_LOGGING_LEVEL_INFO; break; case GST_LEVEL_DEBUG: case GST_LEVEL_LOG: case GST_LEVEL_TRACE: case GST_LEVEL_MEMDUMP: default: ort_logging = ORT_LOGGING_LEVEL_VERBOSE; break; } status = api->CreateEnvWithCustomLogger (gst_onnx_log_function, self, ort_logging, "GstOnnx", &self->env); if (status) { GST_ERROR_OBJECT (self, "Failed to create environment: %s", api->GetErrorMessage (status)); goto error; } // Create session options status = api->CreateSessionOptions (&session_options); if (status) { GST_ERROR_OBJECT (self, "Failed to create session options: %s", api->GetErrorMessage (status)); goto error; } // Set graph optimization level switch (self->optimization_level) { case GST_ONNX_OPTIMIZATION_LEVEL_DISABLE_ALL: onnx_optim = ORT_DISABLE_ALL; break; case GST_ONNX_OPTIMIZATION_LEVEL_ENABLE_BASIC: onnx_optim = ORT_ENABLE_BASIC; break; case GST_ONNX_OPTIMIZATION_LEVEL_ENABLE_EXTENDED: onnx_optim = ORT_ENABLE_EXTENDED; break; case GST_ONNX_OPTIMIZATION_LEVEL_ENABLE_ALL: onnx_optim = ORT_ENABLE_ALL; break; default: onnx_optim = ORT_ENABLE_EXTENDED; break; } status = api->SetSessionGraphOptimizationLevel (session_options, onnx_optim); if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Failed to set optimization level: %s", api->GetErrorMessage (status))); goto error; } // Set execution provider switch (self->execution_provider) { case GST_ONNX_EXECUTION_PROVIDER_CUDA: { OrtCUDAProviderOptionsV2 *cuda_options = NULL; status = api->CreateCUDAProviderOptions (&cuda_options); if (status) { GST_ERROR_OBJECT (self, "Failed to create CUDA provider %s", api->GetErrorMessage (status)); goto error; } status = api->SessionOptionsAppendExecutionProvider_CUDA_V2 (session_options, cuda_options); api->ReleaseCUDAProviderOptions (cuda_options); if (status) { GST_ERROR_OBJECT (self, "Failed to append CUDA provider: %s", api->GetErrorMessage (status)); goto error; } break; } case GST_ONNX_EXECUTION_PROVIDER_VSI: #ifdef HAVE_VSI_NPU status = OrtSessionOptionsAppendExecutionProvider_VSINPU (session_options); if (status) { GST_ERROR_OBJECT (self, "Failed to set VSINPU AI execution provider:" " %s", api->GetErrorMessage (status)); goto error; } api->DisableCpuMemArena (session_options); #else GST_ERROR_OBJECT (self, "Compiled without VSI support"); goto error; #endif break; default: break; } // Create session status = api->CreateSession (self->env, self->model_file, session_options, &self->session); if (status) { GST_ERROR_OBJECT (self, "Failed to create session: %s", api->GetErrorMessage (status)); self->session = NULL; goto error; } api->ReleaseSessionOptions (session_options); session_options = NULL; // Get allocator status = api->GetAllocatorWithDefaultOptions (&self->allocator); if (status) { GST_ERROR_OBJECT (self, "Failed to get allocator: %s", api->GetErrorMessage (status)); goto error; } // Get input info status = api->SessionGetInputTypeInfo (self->session, 0, &input_type_info); if (status) { GST_ERROR_OBJECT (self, "Failed to get input type info: %s", api->GetErrorMessage (status)); goto error; } status = api->CastTypeInfoToTensorInfo (input_type_info, &input_tensor_info); if (status) { GST_ERROR_OBJECT (self, "Failed to cast type info: %s", api->GetErrorMessage (status)); goto error; } status = api->GetDimensionsCount (input_tensor_info, &num_input_dims); if (status) { GST_ERROR_OBJECT (self, "Failed to get dimensions count: %s", api->GetErrorMessage (status)); goto error; } input_dims = (int64_t *) g_alloca (num_input_dims * sizeof (int64_t)); gst_input_dims = (gsize *) g_alloca (num_input_dims * sizeof (gsize)); status = api->GetDimensions (input_tensor_info, input_dims, num_input_dims); if (status) { GST_ERROR_OBJECT (self, "Failed to get dimensions: %s", api->GetErrorMessage (status)); goto error; } for (i = 0; i < num_input_dims; i++) { if (input_dims[i] < 0) gst_input_dims[i] = G_MAXSIZE; else gst_input_dims[i] = input_dims[i]; } gchar *dims = build_dims_str (num_input_dims, gst_input_dims); GST_DEBUG_OBJECT (self, "Input dimensions: %s", dims); g_free (dims); if (!_guess_tensor_data_type (self, num_input_dims, gst_input_dims, &gst_format)) goto error; self->height = gst_input_dims[self->height_dim]; self->width = gst_input_dims[self->width_dim]; if (self->channels_dim >= 0) { self->channels = gst_input_dims[self->channels_dim]; self->planar = (self->channels_dim != num_input_dims - 1); } else { self->channels = 1; } GST_DEBUG_OBJECT (self, "height dim[%d]=%d, width dim[%d]=%d," " channels dim[%d]=%d, batch_dim[%d]=%zu planar=%d", self->height_dim, self->height, self->width_dim, self->width, self->channels_dim, self->channels, self->batch_dim, self->batch_dim >= 0 ? gst_input_dims[self->batch_dim] : 0, self->planar); self->fixedInputImageSize = self->width > 0 && self->height > 0; status = api->SessionGetOutputCount (self->session, &self->output_count); if (status) { GST_ERROR_OBJECT (self, "Could to retrieve output count: %s", api->GetErrorMessage (status)); goto error; } GST_DEBUG_OBJECT (self, "Number of Output Nodes: %zu", self->output_count); if (self->output_count == 0) { GST_ERROR_OBJECT (self, "Model with 0 output nodes is not " "supported."); goto error; } status = api->GetTensorElementType (input_tensor_info, &element_type); if (status) { GST_ERROR_OBJECT (self, "Failed to get element type: %s", api->GetErrorMessage (status)); goto error; } api->ReleaseTypeInfo (input_type_info); input_type_info = NULL; self->input_data_type = onnx_data_type_to_gst (element_type); /* Get input tensor name from ONNX file */ status = api->SessionGetInputName (self->session, 0, self->allocator, (char **) &onnx_input_tensor_name); if (status) { GST_ERROR_OBJECT (self, "Failed to get input name: %s", api->GetErrorMessage (status)); goto error; } tensor_name = gst_analytics_modelinfo_find_tensor_name (modelinfo, MODELINFO_DIRECTION_INPUT, 0, onnx_input_tensor_name, self->input_data_type, num_input_dims, gst_input_dims); if (!tensor_name) { gchar *dims_str = build_dims_str (num_input_dims, gst_input_dims); GST_ERROR_OBJECT (self, "Model info file doesn't contain info for input_tensor[0]:%s matching the" " type %s and dims %s", onnx_input_tensor_name, gst_tensor_data_type_get_name (self->input_data_type), dims_str); g_free (dims_str); if (onnx_input_tensor_name) self->allocator->Free (self->allocator, (char *) onnx_input_tensor_name); goto error; } /* Validation: modelinfo successfully matched dims and datatype from ONNX */ GST_INFO_OBJECT (self, "Input tensor[0]:%s validated - modelinfo matches ONNX model (type: %s)", onnx_input_tensor_name, gst_tensor_data_type_get_name (self->input_data_type)); /* Get per-channel scales and offsets from modelinfo */ /* For video input, we assume uint8 pixel values in range [0, 255] */ { gdouble *input_mins = NULL; gdouble *input_maxs = NULL; gsize num_target_ranges; gsize j; /* First, get the number of target ranges from modelinfo to allocate input ranges */ if (!gst_analytics_modelinfo_get_target_ranges (modelinfo, tensor_name, &num_target_ranges, &input_mins, &input_maxs)) { GST_ERROR_OBJECT (self, "Failed to get target ranges from modelinfo for tensor %s", tensor_name); g_free (tensor_name); if (onnx_input_tensor_name) self->allocator->Free (self->allocator, (char *) onnx_input_tensor_name); goto error; } /* Free the target ranges - we only needed them to know the count */ g_free (input_mins); g_free (input_maxs); /* Prepare input ranges - for video uint8 input, range is [0, 255] for all channels */ input_mins = g_new (gdouble, num_target_ranges); input_maxs = g_new (gdouble, num_target_ranges); for (j = 0; j < num_target_ranges; j++) { input_mins[j] = 0.0; input_maxs[j] = 255.0; } if (!gst_analytics_modelinfo_get_input_scales_offsets (modelinfo, tensor_name, num_target_ranges, input_mins, input_maxs, &self->num_channels, &self->scales, &self->offsets)) { GST_ERROR_OBJECT (self, "Failed to get scales/offsets for tensor %s", tensor_name); g_free (input_mins); g_free (input_maxs); g_free (tensor_name); if (onnx_input_tensor_name) self->allocator->Free (self->allocator, (char *) onnx_input_tensor_name); goto error; } g_free (input_mins); g_free (input_maxs); } GST_INFO_OBJECT (self, "Input tensor normalization: %zu channel(s)", self->num_channels); for (i = 0; i < self->num_channels; i++) { GST_DEBUG_OBJECT (self, " Channel[%zu]: scale=%f, offset=%f", i, self->scales[i], self->offsets[i]); } g_free (tensor_name); if (onnx_input_tensor_name) self->allocator->Free (self->allocator, (char *) onnx_input_tensor_name); /* Setting input tensor caps */ self->input_tensors_caps = gst_caps_make_writable (self->input_tensors_caps); /* Check if all channels are passthrough (scale=1.0, offset=0.0) */ gboolean is_passthrough = TRUE; if (self->scales && self->offsets) { for (i = 0; i < self->num_channels; i++) { if (self->scales[i] != 1.0 || self->offsets[i] != 0.0) { is_passthrough = FALSE; break; } } } if (self->input_data_type == GST_TENSOR_DATA_TYPE_UINT8 && gst_format && is_passthrough) gst_caps_set_simple (self->input_tensors_caps, "format", G_TYPE_STRING, gst_format, NULL); if (self->fixedInputImageSize) gst_caps_set_simple (self->input_tensors_caps, "width", G_TYPE_INT, self->width, "height", G_TYPE_INT, self->height, NULL); // Get output names self->output_names = g_new0 (char *, self->output_count); for (i = 0; i < self->output_count; ++i) { status = api->SessionGetOutputName (self->session, i, self->allocator, &self->output_names[i]); if (status) { GST_ERROR_OBJECT (self, "Failed to get output name %zu: %s", i, api->GetErrorMessage (status)); goto error; } GST_DEBUG_OBJECT (self, "Output name %lu:%s", i, self->output_names[i]); } GValue v_tensors_set = G_VALUE_INIT; GstStructure *tensors_s = NULL; gchar *group_id = NULL; g_value_init (&v_tensors_set, GST_TYPE_UNIQUE_LIST); self->output_ids = g_new0 (GQuark, self->output_count); self->output_dims_orders = g_new0 (GstTensorDimOrder, self->output_count); for (i = 0; i < self->output_count; i++) { OrtTypeInfo *output_type_info = NULL; const OrtTensorTypeAndShapeInfo *output_tensor_info = NULL; size_t card; ONNXTensorElementDataType type; GstTensorDataType gst_data_type; size_t j; gchar *tensor_name = NULL; gchar *tensor_id = NULL; gsize *output_dims = NULL; status = api->SessionGetOutputTypeInfo (self->session, i, &output_type_info); if (status) { GST_ERROR_OBJECT (self, "Failed to get info for output tensor %zu: %s", i, api->GetErrorMessage (status)); goto error; } status = api->CastTypeInfoToTensorInfo (output_type_info, &output_tensor_info); if (status) { GST_ERROR_OBJECT (self, "Failed to get cast type for output tensor" " %zu: %s", i, api->GetErrorMessage (status)); api->ReleaseTypeInfo (output_type_info); goto error; } status = api->GetDimensionsCount (output_tensor_info, &card); if (status) { GST_ERROR_OBJECT (self, "Failed to get cardinality for output tensor" " %zu: %s", i, api->GetErrorMessage (status)); api->ReleaseTypeInfo (output_type_info); goto error; } status = api->GetTensorElementType (output_tensor_info, &type); if (status) { GST_ERROR_OBJECT (self, "Failed to get element type for output tensor" " %zu: %s", i, api->GetErrorMessage (status)); api->ReleaseTypeInfo (output_type_info); goto error; } gst_data_type = onnx_data_type_to_gst (type); /* Get dimensions from ONNX */ int64_t *shape = (int64_t *) g_alloca (card * sizeof (int64_t)); output_dims = (gsize *) g_malloc0 (card * sizeof (gsize)); status = api->GetDimensions (output_tensor_info, shape, card); if (status) { GST_ERROR_OBJECT (self, "Failed to get output tensor (%s) dimensions", self->output_names[i]); api->ReleaseStatus (status); status = NULL; g_free (output_dims); api->ReleaseTypeInfo (output_type_info); goto error; } for (j = 0; j < card; j++) { output_dims[j] = shape[j] > 0 ? shape[j] : G_MAXSIZE; } /* Look up tensor name in modelinfo */ tensor_name = gst_analytics_modelinfo_find_tensor_name (modelinfo, MODELINFO_DIRECTION_OUTPUT, i, self->output_names[i], gst_data_type, card, output_dims); if (!tensor_name) { gchar *dims_str = build_dims_str (card, output_dims); GST_ERROR_OBJECT (self, "Model info file doesn't contain info for output_tensor[%zu]:%s matching the" " type %s and dims %s", i, self->output_names[i], gst_tensor_data_type_get_name (gst_data_type), dims_str); g_free (dims_str); g_free (output_dims); api->ReleaseTypeInfo (output_type_info); goto error; } /* Validation: modelinfo successfully matched dims and datatype from ONNX */ GST_INFO_OBJECT (self, "Output tensor[%zu]:%s validated - modelinfo matches ONNX model " "(type: %s)", i, self->output_names[i], gst_tensor_data_type_get_name (gst_data_type)); /* Get tensor ID from modelinfo */ tensor_id = gst_analytics_modelinfo_get_id (modelinfo, tensor_name); if (!tensor_id) { GST_ERROR_OBJECT (self, "Model info doesn't have 'id' for tensor %s", tensor_name); g_free (tensor_name); g_free (output_dims); api->ReleaseTypeInfo (output_type_info); goto error; } GST_DEBUG_OBJECT (self, "Mapping output_tensor[%zu]:%s of type %s to id %s", i, self->output_names[i], gst_tensor_data_type_get_name (gst_data_type), tensor_id); self->output_ids[i] = g_quark_from_string (tensor_id); /* tensor description */ GstStructure *tensor_desc = gst_structure_new_empty ("tensor/strided"); /* Setting dims */ GValue val_dims = G_VALUE_INIT, val = G_VALUE_INIT; GValue val_caps = G_VALUE_INIT; gst_value_array_init (&val_dims, card); g_value_init (&val, G_TYPE_INT); g_value_init (&val_caps, GST_TYPE_CAPS); for (j = 0; j < card; j++) { g_value_set_int (&val, output_dims[j] != G_MAXSIZE ? output_dims[j] : 0); gst_value_array_append_value (&val_dims, &val); } /* Get dims-order from modelinfo (defaults to row-major if not specified) */ GstTensorDimOrder dims_order = gst_analytics_modelinfo_get_dims_order (modelinfo, tensor_name); self->output_dims_orders[i] = dims_order; const gchar *dims_order_str = dims_order == GST_TENSOR_DIM_ORDER_COL_MAJOR ? "col-major" : "row-major"; gst_structure_set (tensor_desc, "dims-order", G_TYPE_STRING, dims_order_str, "tensor-id", G_TYPE_STRING, g_quark_to_string (self->output_ids[i]), NULL); GST_INFO_OBJECT (self, "%s[dims-order]=%s", g_quark_to_string (self->output_ids[i]), dims_order_str); gst_structure_take_value (tensor_desc, "dims", &val_dims); g_value_unset (&val); /* Setting datatype */ if (!gst_onnx_inference_set_tensordec_datatype (self, type, tensor_desc)) { GST_ERROR_OBJECT (self, "Failed to datatype for output tensor (%s) dimensions", self->output_names[i]); gst_structure_free (tensor_desc); g_value_unset (&v_tensors_set); api->ReleaseTypeInfo (output_type_info); goto error; } /* tensor caps */ GstCaps *tensor_caps = gst_caps_new_full (tensor_desc, NULL); /* Append tensor caps to set */ gst_value_set_caps (&val_caps, tensor_caps); gst_caps_unref (tensor_caps); gst_value_unique_list_append_and_take_value (&v_tensors_set, &val_caps); /* Get group-id from modelinfo on last tensor */ if (i == (self->output_count - 1)) { group_id = gst_analytics_modelinfo_get_group_id (modelinfo); if (!group_id) { GST_ERROR_OBJECT (self, "Model info doesn't have 'group-id'"); g_free (tensor_name); g_free (tensor_id); g_free (output_dims); api->ReleaseTypeInfo (output_type_info); goto error; } } /* Cleanup */ g_free (tensor_name); g_free (tensor_id); g_free (output_dims); api->ReleaseTypeInfo (output_type_info); } if (!tensors_s) tensors_s = gst_structure_new_empty ("tensorgroups"); GstStructure *output_caps_struct; gst_structure_set_value (tensors_s, group_id, &v_tensors_set); output_caps_struct = gst_caps_get_structure (self->output_tensors_caps, 0); gst_structure_set (output_caps_struct, "tensors", GST_TYPE_STRUCTURE, tensors_s, NULL); gst_structure_free (tensors_s); g_value_unset (&v_tensors_set); if (group_id) g_free (group_id); // Create memory info for CPU status = api->CreateCpuMemoryInfo (OrtArenaAllocator, OrtMemTypeDefault, &self->memory_info); if (status) { GST_WARNING_OBJECT (self, "Failed to create memory info: %s", api->GetErrorMessage (status)); goto error; } ret = TRUE; done: if (modelinfo) gst_analytics_modelinfo_free (modelinfo); GST_OBJECT_UNLOCK (self); return ret; error: if (status) api->ReleaseStatus (status); if (input_type_info) api->ReleaseTypeInfo (input_type_info); if (session_options) api->ReleaseSessionOptions (session_options); if (modelinfo) gst_analytics_modelinfo_free (modelinfo); GST_OBJECT_UNLOCK (self); gst_onnx_inference_stop (trans); return ret; } static gboolean gst_onnx_inference_stop (GstBaseTransform * trans) { GstOnnxInference *self = GST_ONNX_INFERENCE (trans); size_t i; GST_OBJECT_LOCK (self); if (!self->session) goto done; // Clean up output names if (self->output_names) { for (i = 0; i < self->output_count; i++) { if (self->output_names[i]) self->allocator->Free (self->allocator, self->output_names[i]); } } g_free (self->output_names); self->output_names = NULL; g_free (self->output_ids); self->output_ids = NULL; g_free (self->output_dims_orders); self->output_dims_orders = NULL; self->output_count = 0; if (self->memory_info) api->ReleaseMemoryInfo (self->memory_info); self->memory_info = NULL; api->ReleaseSession (self->session); self->session = NULL; if (self->env) api->ReleaseEnv (self->env); self->env = NULL; done: GST_OBJECT_UNLOCK (self); return TRUE; } static gboolean gst_onnx_inference_set_caps (GstBaseTransform * trans, GstCaps * incaps, GstCaps * outcaps) { GstOnnxInference *self = GST_ONNX_INFERENCE (trans); if (!gst_video_info_from_caps (&self->video_info, incaps)) { GST_ERROR_OBJECT (self, "Failed to parse caps"); return FALSE; } if (self->fixedInputImageSize && (self->video_info.width != self->width || self->video_info.height != self->height)) { GST_ERROR_OBJECT (self, "Dimensions from caps %ux%u doesn't match model" " dimensions %dx%d", self->video_info.width, self->video_info.height, self->width, self->height); return FALSE; } if (self->dest == NULL || self->width * self->height != self->video_info.width * self->video_info.height) { gsize element_size = get_tensor_type_size (self->input_data_type); gsize alloc_size; /* Use GLib's checked multiplication to prevent overflow */ if (!g_size_checked_mul (&alloc_size, self->video_info.width, self->video_info.height) || !g_size_checked_mul (&alloc_size, alloc_size, self->channels) || !g_size_checked_mul (&alloc_size, alloc_size, element_size)) { GST_ERROR_OBJECT (self, "Integer overflow in buffer allocation: %dx%d pixels, %u channels, %zu bytes per element", self->video_info.width, self->video_info.height, self->channels, element_size); return FALSE; } g_free (self->dest); self->dest = g_malloc (alloc_size); } self->width = self->video_info.width; self->height = self->video_info.height; return TRUE; } #define _convert_image_scale_offset(Type) \ G_STMT_START { \ size_t destIndex = 0; \ Type tmp; \ \ if (!planar) { \ for (int32_t j = 0; j < dstHeight; ++j) { \ for (int32_t i = 0; i < dstWidth; ++i) { \ for (int32_t k = 0; k < dstChannels; ++k) { \ tmp = *srcPtr[k]; \ dst[destIndex++] = (Type)(tmp * scales[k] + offsets[k]); \ srcPtr[k] += pixel_stride; \ } \ } \ /* correct for stride */ \ for (uint32_t k = 0; k < 3; ++k) \ srcPtr[k] += stride - pixel_stride * dstWidth; \ } \ } else { \ size_t frameSize = dstWidth * dstHeight; \ Type *destPtr[3] = { dst, dst + frameSize, dst + 2 * frameSize }; \ for (int32_t j = 0; j < dstHeight; ++j) { \ for (int32_t i = 0; i < dstWidth; ++i) { \ for (int32_t k = 0; k < dstChannels; ++k) { \ tmp = *srcPtr[k]; \ destPtr[k][destIndex] = (Type)(tmp * scales[k] + offsets[k]); \ srcPtr[k] += pixel_stride; \ } \ destIndex++; \ } \ /* correct for stride */ \ for (uint32_t k = 0; k < 3; ++k) \ srcPtr[k] += stride - pixel_stride * dstWidth; \ } \ } \ } \ G_STMT_END; static void convert_image_scale_offset_u8 (guint8 * dst, gint dstWidth, gint dstHeight, gint dstChannels, gboolean planar, guint8 ** srcPtr, guint8 pixel_stride, guint32 stride, const gdouble * scales, const gdouble * offsets) { _convert_image_scale_offset (guint8); } static void convert_image_scale_offset_f32 (gfloat * dst, gint dstWidth, gint dstHeight, gint dstChannels, gboolean planar, guint8 ** srcPtr, guint8 pixel_stride, guint32 stride, const gdouble * scales, const gdouble * offsets) { _convert_image_scale_offset (gfloat); } static GstFlowReturn gst_onnx_inference_transform_ip (GstBaseTransform * trans, GstBuffer * buf) { GstOnnxInference *self = GST_ONNX_INFERENCE (trans); GstMapInfo info; OrtStatus *status = NULL; OrtTypeInfo *input_type_info = NULL; OrtValue *input_tensor = NULL; OrtValue **output_tensors = NULL; const OrtTensorTypeAndShapeInfo *input_tensor_info; size_t num_dims; int64_t *input_dims; uint8_t *srcPtr[3]; size_t inputTensorSize; char *input_names[1]; GstTensorMeta *tmeta = NULL; OrtTensorTypeAndShapeInfo *output_tensor_info = NULL; if (!gst_buffer_map (buf, &info, GST_MAP_READ)) { GST_ELEMENT_ERROR (trans, STREAM, FAILED, (NULL), ("Could not map input buffer")); return GST_FLOW_ERROR; } status = api->SessionGetInputName (self->session, 0, self->allocator, input_names); if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Failed to get input name")); goto error; } status = api->SessionGetInputTypeInfo (self->session, 0, &input_type_info); if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Failed to get input type info: %s", api->GetErrorMessage (status))); goto error; } status = api->CastTypeInfoToTensorInfo (input_type_info, &input_tensor_info); if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Failed to cast type info: %s", api->GetErrorMessage (status))); goto error; } status = api->GetDimensionsCount (input_tensor_info, &num_dims); if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Failed to get dimensions count: %s", api->GetErrorMessage (status))); goto error; } input_dims = (int64_t *) g_alloca (num_dims * sizeof (int64_t)); status = api->GetDimensions (input_tensor_info, input_dims, num_dims); if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Failed to get dimensions: %s", api->GetErrorMessage (status))); goto error; } api->ReleaseTypeInfo (input_type_info); input_type_info = NULL; if (self->batch_dim >= 0) input_dims[self->batch_dim] = 1; if (input_dims[self->height_dim] >= 0) { if (input_dims[self->height_dim] != self->height) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Buffer has height %d, but model expects %zu", self->height, input_dims[self->height_dim])); goto error; } } else { input_dims[self->height_dim] = self->height; } if (input_dims[self->width_dim] >= 0) { if (input_dims[self->width_dim] != self->width) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Buffer has width %d, but model expects %zu", self->width, input_dims[self->width_dim])); goto error; } } else { input_dims[self->width_dim] = self->width; } GST_LOG_OBJECT (self, "Input dimensions: %" G_GINT64_FORMAT ":%" G_GINT64_FORMAT ":%" G_GINT64_FORMAT ":%" G_GINT64_FORMAT, input_dims[0], input_dims[1], input_dims[2], input_dims[3]); // copy video frame switch (self->video_info.finfo->format) { case GST_VIDEO_FORMAT_RGBA: srcPtr[0] = info.data; srcPtr[1] = info.data + 1; srcPtr[2] = info.data + 2; break; case GST_VIDEO_FORMAT_BGRA: srcPtr[0] = info.data + 2; srcPtr[1] = info.data + 1; srcPtr[2] = info.data + 0; break; case GST_VIDEO_FORMAT_ARGB: srcPtr[0] = info.data + 1; srcPtr[1] = info.data + 2; srcPtr[2] = info.data + 3; break; case GST_VIDEO_FORMAT_ABGR: srcPtr[0] = info.data + 3; srcPtr[1] = info.data + 2; srcPtr[2] = info.data + 1; break; case GST_VIDEO_FORMAT_RGB: srcPtr[0] = info.data; srcPtr[1] = info.data + 1; srcPtr[2] = info.data + 2; break; case GST_VIDEO_FORMAT_BGR: srcPtr[0] = info.data + 2; srcPtr[1] = info.data + 1; srcPtr[2] = info.data + 0; break; default: g_assert_not_reached (); break; } inputTensorSize = self->width * self->height * self->channels * get_tensor_type_size (self->input_data_type); /* Check if all channels are passthrough (scale=1.0, offset=0.0) */ gboolean is_passthrough_transform = TRUE; if (self->scales && self->offsets) { for (gsize c = 0; c < self->num_channels; c++) { if (self->scales[c] != 1.0 || self->offsets[c] != 0.0) { is_passthrough_transform = FALSE; break; } } } switch (self->input_data_type) { case GST_TENSOR_DATA_TYPE_UINT8:{ uint8_t *src_data; if (is_passthrough_transform) { src_data = info.data; } else { convert_image_scale_offset_u8 (self->dest, self->width, self->height, self->channels, self->planar, srcPtr, GST_VIDEO_INFO_COMP_PSTRIDE (&self->video_info, 0), GST_VIDEO_INFO_PLANE_STRIDE (&self->video_info, 0), self->scales, self->offsets); src_data = self->dest; } status = api->CreateTensorWithDataAsOrtValue (self->memory_info, src_data, inputTensorSize, input_dims, num_dims, ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8, &input_tensor); break; } case GST_TENSOR_DATA_TYPE_FLOAT32:{ convert_image_scale_offset_f32 ((float *) self->dest, self->width, self->height, self->channels, self->planar, srcPtr, GST_VIDEO_INFO_COMP_PSTRIDE (&self->video_info, 0), GST_VIDEO_INFO_PLANE_STRIDE (&self->video_info, 0), self->scales, self->offsets); status = api->CreateTensorWithDataAsOrtValue (self->memory_info, (float *) self->dest, inputTensorSize, input_dims, num_dims, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor); break; } default: GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Unsupported input datatype")); goto error; } if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Failed to create input tensor: %s", api->GetErrorMessage (status))); goto error; } output_tensors = g_new0 (OrtValue *, self->output_count); status = api->Run (self->session, NULL, (const char *const *) input_names, (const OrtValue * const *) &input_tensor, 1, (const char *const *) self->output_names, self->output_count, output_tensors); if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Failed to run inference: %s", api->GetErrorMessage (status))); goto error; } self->allocator->Free (self->allocator, input_names[0]); api->ReleaseValue (input_tensor); if (!output_tensors || self->output_count == 0) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("ONNX inference failed to produce outputs")); goto error; } tmeta = gst_buffer_add_tensor_meta (buf); tmeta->num_tensors = self->output_count; tmeta->tensors = g_new0 (GstTensor *, self->output_count); for (size_t i = 0; i < self->output_count; i++) { size_t j; ONNXTensorElementDataType tensor_type; size_t num_dims; size_t num_elements; void *tensor_data; status = api->GetTensorTypeAndShape (output_tensors[i], &output_tensor_info); if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Failed to get tensor info: %s", api->GetErrorMessage (status))); goto error; } status = api->GetTensorElementType (output_tensor_info, &tensor_type); if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Failed to get tensor type: %s", api->GetErrorMessage (status))); goto error; } status = api->GetDimensionsCount (output_tensor_info, &num_dims); if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Failed to get dimensions count: %s", api->GetErrorMessage (status))); api->ReleaseTensorTypeAndShapeInfo (output_tensor_info); goto error; } int64_t *shape = (int64_t *) g_alloca (num_dims * sizeof (int64_t)); status = api->GetDimensions (output_tensor_info, shape, num_dims); if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Failed to get dimensions: %s", api->GetErrorMessage (status))); goto error; } GstTensor *tensor = gst_tensor_alloc (num_dims); tmeta->tensors[i] = tensor; tensor->id = self->output_ids[i]; tensor->dims_order = self->output_dims_orders[i]; for (j = 0; j < num_dims; ++j) tensor->dims[j] = shape[j]; status = api->GetTensorShapeElementCount (output_tensor_info, &num_elements); if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Could not get the number of elements in the tensor: %s", api->GetErrorMessage (status))); goto error; } api->ReleaseTensorTypeAndShapeInfo (output_tensor_info); output_tensor_info = NULL; status = api->GetTensorMutableData (output_tensors[i], &tensor_data); if (status) { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Failed to get tensor data: %s", api->GetErrorMessage (status))); goto error; } if (tensor_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) { size_t buffer_size = num_elements * sizeof (float); tensor->data = gst_buffer_new_allocate (NULL, buffer_size, NULL); gst_buffer_fill (tensor->data, 0, tensor_data, buffer_size); tensor->data_type = GST_TENSOR_DATA_TYPE_FLOAT32; } else if (tensor_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) { size_t buffer_size = num_elements * sizeof (int); tensor->data = gst_buffer_new_allocate (NULL, buffer_size, NULL); gst_buffer_fill (tensor->data, 0, tensor_data, buffer_size); tensor->data_type = GST_TENSOR_DATA_TYPE_INT32; } else { GST_ELEMENT_ERROR (self, STREAM, FAILED, (NULL), ("Output tensor is not FLOAT32 or INT32, not supported")); goto error; } } // Clean up output tensors for (size_t i = 0; i < self->output_count; i++) { if (output_tensors[i]) api->ReleaseValue (output_tensors[i]); } g_free (output_tensors); GST_TRACE_OBJECT (trans, "Num tensors:%zu", self->output_count); gst_buffer_unmap (buf, &info); return GST_FLOW_OK; error: if (status) api->ReleaseStatus (status); if (input_names[0]) self->allocator->Free (self->allocator, input_names[0]); if (input_type_info) api->ReleaseTypeInfo (input_type_info); if (input_tensor) api->ReleaseValue (input_tensor); if (output_tensors) { for (size_t i = 0; i < self->output_count; i++) { if (output_tensors[i]) api->ReleaseValue (output_tensors[i]); } g_free (output_tensors); } if (output_tensor_info) api->ReleaseTensorTypeAndShapeInfo (output_tensor_info); if (tmeta) gst_buffer_remove_meta (buf, (GstMeta *) tmeta); gst_buffer_unmap (buf, &info); return GST_FLOW_ERROR; }