/* GStreamer * Copyright (C) 2010, 2013 Ole André Vadla Ravnås * Copyright (C) 2012-2016 Alessandro Decina * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin Street, Suite 500, * Boston, MA 02110-1335, USA. */ /** * SECTION:element-vtdec * @title: vtdec * * Apple VideoToolbox based decoder which might use a HW or a SW * implementation depending on the device. * * ## Example launch line * |[ * gst-launch-1.0 -v filesrc location=file.mov ! qtdemux ! queue ! h264parse ! vtdec ! videoconvert ! autovideosink * ]| * Decode h264 video from a mov file. * */ /** * SECTION:element-vtdec_hw * @title: vtdec_hw * * Apple VideoToolbox based HW-only decoder. * * ## Example launch line * |[ * gst-launch-1.0 -v filesrc location=file.mov ! qtdemux ! queue ! h264parse ! vtdec_hw ! videoconvert ! autovideosink * ]| * Decode h264 video from a mov file. * */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include "vtdec.h" #include #include #include #include #include #if TARGET_OS_OSX || TARGET_OS_IOS || TARGET_OS_TV || TARGET_OS_VISION #define HAVE_SUPPLEMENTAL #if (TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED >= 110000) || (TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED >= 260200) || (TARGET_OS_TV && __TV_OS_VERSION_MAX_ALLOWED >= 260200) || (TARGET_OS_VISION && __VISION_OS_VERSION_MAX_ALLOWED >= 260200) #define HAVE_SUPPLEMENTAL_DEFINITION #else #include #endif #endif #include "vtutil.h" #include "helpers.h" #include "corevideobuffer.h" #include "coremediabuffer.h" #include "videotexturecache-gl.h" #if defined(APPLEMEDIA_MOLTENVK) #include "videotexturecache-vulkan.h" #endif GST_DEBUG_CATEGORY_STATIC (gst_vtdec_debug_category); #define GST_CAT_DEFAULT gst_vtdec_debug_category enum { /* leave some headroom for new GstVideoCodecFrameFlags flags */ VTDEC_FRAME_FLAG_SKIP = (1 << 10), VTDEC_FRAME_FLAG_DROP = (1 << 11), VTDEC_FRAME_FLAG_ERROR = (1 << 12), }; #if (defined(__IPHONE_OS_VERSION_MAX_ALLOWED) && __IPHONE_OS_VERSION_MAX_ALLOWED < 140000) || (defined(MAC_OS_X_VERSION_MAX_ALLOWED) && MAC_OS_X_VERSION_MAX_ALLOWED < 110000) #define kCMVideoCodecType_VP9 'vp09' #endif static void gst_vtdec_finalize (GObject * object); static gboolean gst_vtdec_start (GstVideoDecoder * decoder); static gboolean gst_vtdec_stop (GstVideoDecoder * decoder); static void gst_vtdec_output_loop (GstVtdec * self); static gboolean gst_vtdec_negotiate (GstVideoDecoder * decoder); static gboolean gst_vtdec_set_format (GstVideoDecoder * decoder, GstVideoCodecState * state); static gboolean gst_vtdec_flush (GstVideoDecoder * decoder); static GstFlowReturn gst_vtdec_finish (GstVideoDecoder * decoder); static GstFlowReturn gst_vtdec_drain (GstVideoDecoder * decoder); static gboolean gst_vtdec_sink_event (GstVideoDecoder * decoder, GstEvent * event); static GstStateChangeReturn gst_vtdec_change_state (GstElement * element, GstStateChange transition); static GstFlowReturn gst_vtdec_handle_frame (GstVideoDecoder * decoder, GstVideoCodecFrame * frame); static OSStatus gst_vtdec_create_session (GstVtdec * vtdec, GstVideoFormat format, gboolean enable_hardware); static void gst_vtdec_invalidate_session (GstVtdec * vtdec); static CMSampleBufferRef cm_sample_buffer_from_gst_buffer (GstVtdec * vtdec, GstBuffer * buf); static GstFlowReturn gst_vtdec_drain_decoder (GstVideoDecoder * decoder, gboolean flush); static CMFormatDescriptionRef create_format_description (GstVtdec * vtdec, CMVideoCodecType cm_format); static CMFormatDescriptionRef create_format_description_from_codec_data (GstVtdec * vtdec, CMVideoCodecType cm_format, GstBuffer * codec_data); static void gst_vtdec_session_output_callback (void *decompression_output_ref_con, void *source_frame_ref_con, OSStatus status, VTDecodeInfoFlags info_flags, CVImageBufferRef image_buffer, CMTime pts, CMTime duration); static gboolean compute_h264_decode_picture_buffer_size (GstVtdec * vtdec, GstBuffer * codec_data, int *length); static gboolean compute_hevc_decode_picture_buffer_size (GstVtdec * vtdec, GstBuffer * codec_data, int *length); static gboolean gst_vtdec_compute_dpb_size (GstVtdec * vtdec, CMVideoCodecType cm_format, GstBuffer * codec_data); static gboolean gst_vtdec_check_vp9_support (GstVtdec * vtdec); static gboolean gst_vtdec_build_vp9_vpcc_from_caps (GstVtdec * vtdec, GstStructure * caps_struct); static gboolean gst_vtdec_check_av1_support (GstVtdec * vtdec); static gboolean gst_vtdec_handle_av1_sequence_header (GstVtdec * vtdec, GstVideoCodecFrame * frame); static void gst_vtdec_set_latency (GstVtdec * vtdec); static void gst_vtdec_set_context (GstElement * element, GstContext * context); static GstCaps *gst_vtdec_getcaps (GstVideoDecoder * decoder, GstCaps * filter); static GstStaticPadTemplate gst_vtdec_sink_template = GST_STATIC_PAD_TEMPLATE ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS ("video/x-h264, stream-format=avc, alignment=au," " width=(int)[8, MAX], height=(int)[8, MAX];" "video/x-h265, stream-format=(string){ hev1, hvc1 }, alignment=au," " width=(int)[16, MAX], height=(int)[16, MAX];" "video/x-av1, stream-format=obu-stream, alignment=(string){ tu, frame }, " "width=(int)[64, MAX], height=(int)[64, MAX];" "video/mpeg, mpegversion=2, systemstream=false, parsed=true;" "image/jpeg;" "video/x-prores, variant = { (string)standard, (string)hq, (string)lt," " (string)proxy, (string)4444, (string)4444xq };" "video/x-vp9, profile=(string){ 0, 2 }, " " width=(int)[64, MAX], height=(int)[64, MAX];") ); /* define EnableHardwareAcceleratedVideoDecoder in < 10.9 */ #if defined(MAC_OS_X_VERSION_MAX_ALLOWED) && MAC_OS_X_VERSION_MAX_ALLOWED < 1090 const CFStringRef kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder = CFSTR ("EnableHardwareAcceleratedVideoDecoder"); const CFStringRef kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder = CFSTR ("RequireHardwareAcceleratedVideoDecoder"); #endif #define VIDEO_SRC_CAPS_FORMATS "{ NV12, AYUV64, ARGB64_BE, P010_10LE }" #define VIDEO_SRC_CAPS_NATIVE \ GST_VIDEO_CAPS_MAKE(VIDEO_SRC_CAPS_FORMATS) ";" \ GST_VIDEO_CAPS_MAKE_WITH_FEATURES(GST_CAPS_FEATURE_MEMORY_GL_MEMORY,\ VIDEO_SRC_CAPS_FORMATS) ", " \ "texture-target = (string) rectangle " #if defined(APPLEMEDIA_MOLTENVK) #define VIDEO_SRC_CAPS VIDEO_SRC_CAPS_NATIVE "; " \ GST_VIDEO_CAPS_MAKE_WITH_FEATURES(GST_CAPS_FEATURE_MEMORY_VULKAN_IMAGE, \ VIDEO_SRC_CAPS_FORMATS) #else #define VIDEO_SRC_CAPS VIDEO_SRC_CAPS_NATIVE #endif G_DEFINE_TYPE (GstVtdec, gst_vtdec, GST_TYPE_VIDEO_DECODER); static void gst_vtdec_class_init (GstVtdecClass * klass) { GObjectClass *gobject_class = G_OBJECT_CLASS (klass); GstElementClass *element_class = GST_ELEMENT_CLASS (klass); GstVideoDecoderClass *video_decoder_class = GST_VIDEO_DECODER_CLASS (klass); /* Setting up pads and setting metadata should be moved to base_class_init if you intend to subclass this class. */ gst_element_class_add_static_pad_template (element_class, &gst_vtdec_sink_template); { GstCaps *caps = gst_caps_from_string (VIDEO_SRC_CAPS); /* RGBA64_LE is kCVPixelFormatType_64RGBALE, only available on macOS 11.3+ */ if (GST_APPLEMEDIA_HAVE_64RGBALE) caps = gst_vtutil_caps_append_video_format (caps, "RGBA64_LE"); gst_element_class_add_pad_template (element_class, gst_pad_template_new ("src", GST_PAD_SRC, GST_PAD_ALWAYS, caps)); } gst_element_class_set_static_metadata (element_class, "Apple VideoToolbox decoder", "Codec/Decoder/Video/Hardware", "Apple VideoToolbox Decoder", "Ole André Vadla Ravnås ; " "Alessandro Decina "); gobject_class->finalize = gst_vtdec_finalize; element_class->set_context = gst_vtdec_set_context; element_class->change_state = gst_vtdec_change_state; video_decoder_class->start = GST_DEBUG_FUNCPTR (gst_vtdec_start); video_decoder_class->stop = GST_DEBUG_FUNCPTR (gst_vtdec_stop); video_decoder_class->negotiate = GST_DEBUG_FUNCPTR (gst_vtdec_negotiate); video_decoder_class->set_format = GST_DEBUG_FUNCPTR (gst_vtdec_set_format); video_decoder_class->flush = GST_DEBUG_FUNCPTR (gst_vtdec_flush); video_decoder_class->finish = GST_DEBUG_FUNCPTR (gst_vtdec_finish); video_decoder_class->drain = GST_DEBUG_FUNCPTR (gst_vtdec_drain); video_decoder_class->handle_frame = GST_DEBUG_FUNCPTR (gst_vtdec_handle_frame); video_decoder_class->sink_event = GST_DEBUG_FUNCPTR (gst_vtdec_sink_event); video_decoder_class->getcaps = GST_DEBUG_FUNCPTR (gst_vtdec_getcaps); } static void gst_vtdec_init (GstVtdec * vtdec) { g_mutex_init (&vtdec->queue_mutex); g_cond_init (&vtdec->queue_cond); } void gst_vtdec_finalize (GObject * object) { GstVtdec *vtdec = GST_VTDEC (object); GST_DEBUG_OBJECT (vtdec, "finalize"); g_mutex_clear (&vtdec->queue_mutex); g_cond_clear (&vtdec->queue_cond); G_OBJECT_CLASS (gst_vtdec_parent_class)->finalize (object); } static gboolean gst_vtdec_start (GstVideoDecoder * decoder) { GstVtdec *vtdec = GST_VTDEC (decoder); GST_DEBUG_OBJECT (vtdec, "start"); vtdec->is_flushing = FALSE; vtdec->is_draining = FALSE; vtdec->downstream_ret = GST_FLOW_OK; vtdec->reorder_queue = gst_vec_deque_new (0); /* Create the output task, but pause it immediately */ vtdec->pause_task = TRUE; if (!gst_pad_start_task (GST_VIDEO_DECODER_SRC_PAD (decoder), (GstTaskFunction) gst_vtdec_output_loop, vtdec, NULL)) { GST_ERROR_OBJECT (vtdec, "failed to start output thread"); return FALSE; } /* This blocks until the loop actually pauses */ gst_pad_pause_task (GST_VIDEO_DECODER_SRC_PAD (decoder)); vtdec->pause_task = FALSE; if (!vtdec->ctxh) vtdec->ctxh = gst_gl_context_helper_new (GST_ELEMENT (decoder)); return TRUE; } static gboolean gst_vtdec_stop (GstVideoDecoder * decoder) { GstVideoCodecFrame *frame; GstVtdec *vtdec = GST_VTDEC (decoder); GST_DEBUG_OBJECT (vtdec, "stop"); gst_vtdec_drain_decoder (GST_VIDEO_DECODER_CAST (vtdec), TRUE); vtdec->downstream_ret = GST_FLOW_FLUSHING; while ((frame = gst_vec_deque_pop_head (vtdec->reorder_queue))) { gst_video_decoder_release_frame (decoder, frame); } gst_vec_deque_free (vtdec->reorder_queue); vtdec->reorder_queue = NULL; gst_pad_stop_task (GST_VIDEO_DECODER_SRC_PAD (decoder)); if (vtdec->input_state) gst_video_codec_state_unref (vtdec->input_state); vtdec->input_state = NULL; if (vtdec->session) gst_vtdec_invalidate_session (vtdec); if (vtdec->texture_cache) g_object_unref (vtdec->texture_cache); vtdec->texture_cache = NULL; if (vtdec->ctxh) gst_gl_context_helper_free (vtdec->ctxh); vtdec->ctxh = NULL; if (vtdec->format_description) CFRelease (vtdec->format_description); vtdec->format_description = NULL; g_clear_pointer (&vtdec->vp9_vpcc, g_free); vtdec->vp9_vpcc_size = 0; if (vtdec->av1_sequence_header_obu) gst_buffer_unref (vtdec->av1_sequence_header_obu); vtdec->av1_sequence_header_obu = NULL; #if defined(APPLEMEDIA_MOLTENVK) gst_clear_object (&vtdec->device); gst_clear_object (&vtdec->instance); #endif return TRUE; } static void gst_vtdec_output_loop (GstVtdec * vtdec) { GstVideoCodecFrame *frame; GstFlowReturn ret = GST_FLOW_OK; GstVideoDecoder *decoder = GST_VIDEO_DECODER (vtdec); g_mutex_lock (&vtdec->queue_mutex); while (gst_vec_deque_get_length (vtdec->reorder_queue) <= vtdec->dbp_size && !vtdec->pause_task && !vtdec->is_flushing && !vtdec->is_draining) { g_cond_wait (&vtdec->queue_cond, &vtdec->queue_mutex); } /* If we're currently draining/flushing, make sure to not pause before we output all the frames */ if (vtdec->pause_task && ((!vtdec->is_flushing && !vtdec->is_draining) || gst_vec_deque_is_empty (vtdec->reorder_queue))) { GST_DEBUG_OBJECT (vtdec, "pausing output loop as requested"); g_mutex_unlock (&vtdec->queue_mutex); gst_pad_pause_task (GST_VIDEO_DECODER_SRC_PAD (decoder)); return; } /* push a buffer if there are enough frames to guarantee * that we push in PTS order, or if we're draining/flushing */ while ((gst_vec_deque_get_length (vtdec->reorder_queue) > vtdec->dbp_size) || vtdec->is_flushing || vtdec->is_draining) { gboolean is_flushing; frame = gst_vec_deque_pop_head (vtdec->reorder_queue); is_flushing = vtdec->is_flushing; g_cond_signal (&vtdec->queue_cond); g_mutex_unlock (&vtdec->queue_mutex); /* we need to check this in case dpb_size=0 (jpeg for * example) or we're draining/flushing */ if (frame) { GST_VIDEO_DECODER_STREAM_LOCK (vtdec); if (frame->flags & VTDEC_FRAME_FLAG_ERROR) { GST_VIDEO_DECODER_ERROR (vtdec, 1, STREAM, DECODE, ("Got frame %d with an error flag", frame->system_frame_number), (NULL), ret); gst_video_decoder_release_frame (decoder, frame); } else if (is_flushing || (frame->flags & VTDEC_FRAME_FLAG_SKIP)) { GST_LOG_OBJECT (vtdec, "flushing frame %d", frame->system_frame_number); gst_video_decoder_release_frame (decoder, frame); } else if (frame->flags & VTDEC_FRAME_FLAG_DROP) { GST_LOG_OBJECT (vtdec, "dropping frame %d", frame->system_frame_number); gst_video_decoder_drop_frame (decoder, frame); } else { guint32 frame_num = frame->system_frame_number; GST_TRACE_OBJECT (vtdec, "pushing frame %d", frame_num); ret = gst_video_decoder_finish_frame (decoder, frame); GST_TRACE_OBJECT (vtdec, "frame %d push ret %s", frame_num, gst_flow_get_name (ret)); } GST_VIDEO_DECODER_STREAM_UNLOCK (vtdec); } g_mutex_lock (&vtdec->queue_mutex); if (!frame || ret != GST_FLOW_OK) break; } g_mutex_unlock (&vtdec->queue_mutex); GST_VIDEO_DECODER_STREAM_LOCK (vtdec); /* We need to empty the queue immediately so that session_output_callback() * can push out the current buffer, otherwise it can deadlock */ if (ret != GST_FLOW_OK) { g_mutex_lock (&vtdec->queue_mutex); while ((frame = gst_vec_deque_pop_head (vtdec->reorder_queue))) { GST_LOG_OBJECT (vtdec, "flushing frame %d", frame->system_frame_number); gst_video_decoder_release_frame (decoder, frame); } /* Don't consider the FLUSHING ret an error if something flagged is_flushing in the meantime */ if (vtdec->is_flushing && ret == GST_FLOW_FLUSHING) { ret = GST_FLOW_OK; } g_cond_signal (&vtdec->queue_cond); g_mutex_unlock (&vtdec->queue_mutex); } vtdec->downstream_ret = ret; GST_VIDEO_DECODER_STREAM_UNLOCK (vtdec); if (ret != GST_FLOW_OK) { GST_DEBUG_OBJECT (vtdec, "pausing output task because of downstream: %s", gst_flow_get_name (ret)); gst_pad_pause_task (GST_VIDEO_DECODER_SRC_PAD (decoder)); } } static gboolean gst_vtdec_ensure_output_loop (GstVtdec * vtdec) { GstPad *pad = GST_VIDEO_DECODER_SRC_PAD (vtdec); GstTask *task = GST_PAD_TASK (pad); return gst_task_resume (task); } static void gst_vtdec_pause_output_loop (GstVtdec * vtdec) { g_mutex_lock (&vtdec->queue_mutex); vtdec->pause_task = TRUE; g_cond_signal (&vtdec->queue_cond); g_mutex_unlock (&vtdec->queue_mutex); gst_pad_pause_task (GST_VIDEO_DECODER_SRC_PAD (vtdec)); GST_DEBUG_OBJECT (vtdec, "paused output thread"); g_mutex_lock (&vtdec->queue_mutex); vtdec->pause_task = FALSE; g_mutex_unlock (&vtdec->queue_mutex); } static void setup_texture_cache (GstVtdec * vtdec, GstVideoFormat format) { GstVideoCodecState *output_state; GST_INFO_OBJECT (vtdec, "setting up texture cache"); output_state = gst_video_decoder_get_output_state (GST_VIDEO_DECODER (vtdec)); gst_video_texture_cache_set_format (vtdec->texture_cache, format, output_state->caps); gst_video_codec_state_unref (output_state); } /* * Unconditionally output a high bit-depth + alpha format when decoding Apple * ProRes video if downstream supports it. * TODO: read src_pix_fmt to get the preferred output format * https://wiki.multimedia.cx/index.php/Apple_ProRes#Frame_header */ static GstVideoFormat get_preferred_video_format (GstStructure * s, gboolean prores) { const GValue *list = gst_structure_get_value (s, "format"); guint i, size = gst_value_list_get_size (list); for (i = 0; i < size; i++) { const GValue *value = gst_value_list_get_value (list, i); const char *fmt = g_value_get_string (value); GstVideoFormat vfmt = gst_video_format_from_string (fmt); switch (vfmt) { case GST_VIDEO_FORMAT_NV12: case GST_VIDEO_FORMAT_P010_10LE: if (!prores) return vfmt; break; case GST_VIDEO_FORMAT_AYUV64: case GST_VIDEO_FORMAT_ARGB64_BE: if (prores) return vfmt; break; case GST_VIDEO_FORMAT_RGBA64_LE: if (GST_APPLEMEDIA_HAVE_64RGBALE) { if (prores) return vfmt; } else { /* Codepath will never be hit on macOS older than Big Sur (11.3) */ g_warn_if_reached (); } break; default: break; } } return GST_VIDEO_FORMAT_UNKNOWN; } static gboolean gst_vtdec_needs_new_session (GstCaps * old_caps, GstCaps * new_caps) { GstCaps *old_copy, *new_copy; gboolean ret; if (!old_caps) return TRUE; if (!new_caps) return FALSE; old_copy = gst_caps_copy (old_caps); new_copy = gst_caps_copy (new_caps); /* Just ignore the framerate for now, was causing decoding errors with some fmp4 files */ gst_structure_remove_field (gst_caps_get_structure (old_copy, 0), "framerate"); gst_structure_remove_field (gst_caps_get_structure (new_copy, 0), "framerate"); ret = !gst_caps_is_equal (old_copy, new_copy); gst_caps_unref (old_copy); gst_caps_unref (new_copy); return ret; } static gboolean gst_vtdec_negotiate (GstVideoDecoder * decoder) { GstVideoCodecState *output_state = NULL; GstCaps *peercaps = NULL, *caps = NULL, *templcaps = NULL, *prevcaps = NULL; GstVideoFormat format = GST_VIDEO_FORMAT_UNKNOWN; GstVtdec *vtdec; OSStatus err = noErr; GstCapsFeatures *features = NULL; gboolean output_textures = FALSE; #if defined(APPLEMEDIA_MOLTENVK) gboolean output_vulkan = FALSE; #endif GST_DEBUG_OBJECT (decoder, "negotiating"); vtdec = GST_VTDEC (decoder); if (vtdec->session) gst_vtdec_drain_decoder (GST_VIDEO_DECODER_CAST (vtdec), FALSE); output_state = gst_video_decoder_get_output_state (GST_VIDEO_DECODER (vtdec)); if (output_state) { prevcaps = gst_caps_ref (output_state->caps); gst_video_codec_state_unref (output_state); } templcaps = gst_pad_get_pad_template_caps (GST_VIDEO_DECODER_SRC_PAD (decoder)); peercaps = gst_pad_peer_query_caps (GST_VIDEO_DECODER_SRC_PAD (vtdec), templcaps); gst_caps_unref (templcaps); if (gst_caps_is_empty (peercaps)) { GST_INFO_OBJECT (vtdec, "empty peer caps, can't negotiate"); gst_caps_unref (peercaps); if (prevcaps) gst_caps_unref (prevcaps); return FALSE; } if (prevcaps && gst_caps_can_intersect (prevcaps, peercaps)) { /* The hardware decoder can become (temporarily) unavailable across * VTDecompressionSessionCreate/Destroy calls. So if the currently configured * caps are still accepted by downstream we keep them so we don't have to * destroy and recreate the session. */ GST_INFO_OBJECT (vtdec, "current and peer caps are compatible, keeping current caps"); caps = gst_caps_ref (prevcaps); gst_caps_unref (peercaps); } else { caps = peercaps; } caps = gst_caps_truncate (gst_caps_make_writable (caps)); /* Try to use whatever video format downstream prefers */ { GstStructure *s = gst_caps_get_structure (caps, 0); if (gst_structure_has_field_typed (s, "format", GST_TYPE_LIST)) { GstStructure *is = gst_caps_get_structure (vtdec->input_state->caps, 0); const char *name = gst_structure_get_name (is); format = get_preferred_video_format (s, g_strcmp0 (name, "video/x-prores") == 0); } if (format == GST_VIDEO_FORMAT_UNKNOWN) { const char *fmt; gst_structure_fixate_field (s, "format"); fmt = gst_structure_get_string (s, "format"); if (fmt) format = gst_video_format_from_string (fmt); else /* If all fails, just use NV12 */ format = GST_VIDEO_FORMAT_NV12; } } features = gst_caps_get_features (caps, 0); if (features) features = gst_caps_features_copy (features); output_state = gst_video_decoder_set_output_state (GST_VIDEO_DECODER (vtdec), format, vtdec->video_info.width, vtdec->video_info.height, vtdec->input_state); output_state->caps = gst_video_info_to_caps (&output_state->info); if (features) { gst_caps_set_features (output_state->caps, 0, features); #if TARGET_OS_OSX || TARGET_OS_IOS || TARGET_OS_TV output_textures = gst_caps_features_contains (features, GST_CAPS_FEATURE_MEMORY_GL_MEMORY); if (output_textures) gst_caps_set_simple (output_state->caps, "texture-target", G_TYPE_STRING, #if TARGET_OS_OSX GST_GL_TEXTURE_TARGET_RECTANGLE_STR, #else GST_GL_TEXTURE_TARGET_2D_STR, #endif NULL); #endif #if defined(APPLEMEDIA_MOLTENVK) output_vulkan = gst_caps_features_contains (features, GST_CAPS_FEATURE_MEMORY_VULKAN_IMAGE); #endif } gst_caps_unref (caps); if (!prevcaps || !gst_caps_is_equal (prevcaps, output_state->caps)) { gboolean renegotiating = vtdec->session != NULL; GST_INFO_OBJECT (vtdec, "negotiated output format %" GST_PTR_FORMAT " previous %" GST_PTR_FORMAT, output_state->caps, prevcaps); /* Only recreate session if something other than framerate changed */ if (gst_vtdec_needs_new_session (prevcaps, output_state->caps)) { if (vtdec->session) gst_vtdec_invalidate_session (vtdec); err = gst_vtdec_create_session (vtdec, format, TRUE); if (err == noErr) { GST_INFO_OBJECT (vtdec, "using hardware decoder"); } else if (err == kVTVideoDecoderNotAvailableNowErr && renegotiating) { GST_WARNING_OBJECT (vtdec, "hw decoder not available anymore"); err = gst_vtdec_create_session (vtdec, format, FALSE); } if (err != noErr) { GST_ELEMENT_ERROR (vtdec, RESOURCE, FAILED, (NULL), ("VTDecompressionSessionCreate returned %d", (int) err)); } } else { GST_INFO_OBJECT (vtdec, "no need to recreate VT session for this change"); } } gst_video_codec_state_unref (output_state); if (vtdec->texture_cache != NULL && ((GST_IS_VIDEO_TEXTURE_CACHE_GL (vtdec->texture_cache) && !output_textures) #if defined(APPLEMEDIA_MOLTENVK) || (GST_IS_VIDEO_TEXTURE_CACHE_VULKAN (vtdec->texture_cache) && !output_vulkan) #endif )) { g_object_unref (vtdec->texture_cache); vtdec->texture_cache = NULL; } if (err == noErr) { if (output_textures) { GstVideoTextureCacheGL *cache_gl = NULL; if (vtdec->texture_cache) cache_gl = GST_VIDEO_TEXTURE_CACHE_GL (vtdec->texture_cache); /* call this regardless of whether caps have changed or not since a new * local context could have become available */ if (!vtdec->ctxh) vtdec->ctxh = gst_gl_context_helper_new (GST_ELEMENT (vtdec)); gst_gl_context_helper_ensure_context (vtdec->ctxh); GST_INFO_OBJECT (vtdec, "pushing GL textures, context %p old context %p", vtdec->ctxh->context, cache_gl ? cache_gl->ctx : NULL); if (cache_gl && cache_gl->ctx != vtdec->ctxh->context) { g_object_unref (vtdec->texture_cache); vtdec->texture_cache = NULL; } if (!vtdec->texture_cache) { vtdec->texture_cache = gst_video_texture_cache_gl_new (vtdec->ctxh->context); setup_texture_cache (vtdec, format); } } #if defined(APPLEMEDIA_MOLTENVK) if (output_vulkan) { GstVideoTextureCacheVulkan *cache_vulkan = NULL; if (vtdec->texture_cache) cache_vulkan = GST_VIDEO_TEXTURE_CACHE_VULKAN (vtdec->texture_cache); gst_vulkan_ensure_element_data (GST_ELEMENT (vtdec), NULL, &vtdec->instance); if (!gst_vulkan_ensure_element_device (GST_ELEMENT (vtdec), vtdec->instance, &vtdec->device, 0)) { return FALSE; } GST_INFO_OBJECT (vtdec, "pushing vulkan images, device %" GST_PTR_FORMAT " old device %" GST_PTR_FORMAT, vtdec->device, cache_vulkan ? cache_vulkan->device : NULL); if (cache_vulkan && cache_vulkan->device != vtdec->device) { g_object_unref (vtdec->texture_cache); vtdec->texture_cache = NULL; } if (!vtdec->texture_cache) { vtdec->texture_cache = gst_video_texture_cache_vulkan_new (vtdec->device); setup_texture_cache (vtdec, format); } } #endif } if (prevcaps) gst_caps_unref (prevcaps); if (err != noErr) return FALSE; return GST_VIDEO_DECODER_CLASS (gst_vtdec_parent_class)->negotiate (decoder); } static gboolean gst_vtdec_set_format (GstVideoDecoder * decoder, GstVideoCodecState * state) { gboolean negotiate_now = TRUE; GstStructure *structure; CMVideoCodecType cm_format = 0; CMFormatDescriptionRef format_description = NULL; const char *caps_name; GstVtdec *vtdec = GST_VTDEC (decoder); GST_DEBUG_OBJECT (vtdec, "set_format"); structure = gst_caps_get_structure (state->caps, 0); caps_name = gst_structure_get_name (structure); if (!strcmp (caps_name, "video/x-h264")) { cm_format = kCMVideoCodecType_H264; } else if (!strcmp (caps_name, "video/x-h265")) { cm_format = kCMVideoCodecType_HEVC; } else if (!strcmp (caps_name, "video/mpeg")) { cm_format = kCMVideoCodecType_MPEG2Video; } else if (!strcmp (caps_name, "image/jpeg")) { cm_format = kCMVideoCodecType_JPEG; } else if (!strcmp (caps_name, "video/x-prores")) { const char *variant = gst_structure_get_string (structure, "variant"); if (variant) cm_format = gst_vtutil_codec_type_from_prores_variant (variant); if (cm_format == GST_kCMVideoCodecType_Some_AppleProRes) { GST_ERROR_OBJECT (vtdec, "Invalid ProRes variant %s", variant); return FALSE; } } else if (!strcmp (caps_name, "video/x-vp9")) { GST_INFO_OBJECT (vtdec, "cm_format is VP9"); cm_format = kCMVideoCodecType_VP9; } else if (!strcmp (caps_name, "video/x-av1")) { GST_INFO_OBJECT (vtdec, "Setting up for AV1 - will wait for sequence header"); cm_format = kCMVideoCodecType_AV1; vtdec->av1_needs_sequence_header = TRUE; /* Delay session creation until we get sequence header */ } if ((cm_format == kCMVideoCodecType_H264 || cm_format == kCMVideoCodecType_HEVC) && state->codec_data == NULL) { GST_INFO_OBJECT (vtdec, "waiting for codec_data before negotiation"); negotiate_now = FALSE; } else if (cm_format == kCMVideoCodecType_VP9) { negotiate_now = gst_vtdec_build_vp9_vpcc_from_caps (vtdec, structure); } if (cm_format == kCMVideoCodecType_AV1 && vtdec->av1_needs_sequence_header) { GST_INFO_OBJECT (vtdec, "waiting for AV1 sequence header before negotiation"); negotiate_now = FALSE; } gst_video_info_from_caps (&vtdec->video_info, state->caps); if (negotiate_now && !gst_vtdec_compute_dpb_size (vtdec, cm_format, state->codec_data)) { GST_INFO_OBJECT (vtdec, "Failed to compute DPB size"); return FALSE; } if (negotiate_now) gst_vtdec_set_latency (vtdec); if (state->codec_data) { format_description = create_format_description_from_codec_data (vtdec, cm_format, state->codec_data); } else { format_description = create_format_description (vtdec, cm_format); } if (vtdec->format_description) CFRelease (vtdec->format_description); vtdec->format_description = format_description; if (vtdec->input_state) gst_video_codec_state_unref (vtdec->input_state); vtdec->input_state = gst_video_codec_state_ref (state); return negotiate_now ? gst_vtdec_negotiate (decoder) : TRUE; } static gboolean gst_vtdec_flush (GstVideoDecoder * decoder) { GstVtdec *vtdec = GST_VTDEC (decoder); GST_DEBUG_OBJECT (vtdec, "flush"); gst_vtdec_drain_decoder (GST_VIDEO_DECODER_CAST (vtdec), TRUE); vtdec->downstream_ret = GST_FLOW_OK; return TRUE; } static GstFlowReturn gst_vtdec_finish (GstVideoDecoder * decoder) { GstVtdec *vtdec = GST_VTDEC (decoder); GST_DEBUG_OBJECT (vtdec, "finish"); return gst_vtdec_drain_decoder (GST_VIDEO_DECODER_CAST (vtdec), FALSE); } static GstFlowReturn gst_vtdec_drain (GstVideoDecoder * decoder) { GstVtdec *vtdec = GST_VTDEC (decoder); GST_DEBUG_OBJECT (vtdec, "drain"); gst_vtdec_finish (decoder); gst_vtdec_flush (decoder); return GST_FLOW_OK; } static gboolean gst_vtdec_sink_event (GstVideoDecoder * decoder, GstEvent * event) { GstVtdec *vtdec = GST_VTDEC (decoder); GstEventType type = GST_EVENT_TYPE (event); gboolean ret; switch (type) { case GST_EVENT_FLUSH_START: GST_DEBUG_OBJECT (vtdec, "flush start received, setting flushing flag"); g_mutex_lock (&vtdec->queue_mutex); vtdec->is_flushing = TRUE; g_cond_signal (&vtdec->queue_cond); g_mutex_unlock (&vtdec->queue_mutex); break; default: break; } ret = GST_VIDEO_DECODER_CLASS (gst_vtdec_parent_class)->sink_event (decoder, event); switch (type) { case GST_EVENT_FLUSH_STOP: /* The base class handles this event and calls _flush(). * We can then safely reset the flushing flag. */ GST_DEBUG_OBJECT (vtdec, "flush stop received, removing flushing flag"); g_mutex_lock (&vtdec->queue_mutex); vtdec->is_flushing = FALSE; g_mutex_unlock (&vtdec->queue_mutex); break; default: break; } return ret; } static GstStateChangeReturn gst_vtdec_change_state (GstElement * element, GstStateChange transition) { GstVtdec *self = GST_VTDEC (element); if (transition == GST_STATE_CHANGE_PAUSED_TO_READY) { GST_DEBUG_OBJECT (self, "pausing output loop on PAUSED->READY"); gst_vtdec_pause_output_loop (self); } return GST_ELEMENT_CLASS (gst_vtdec_parent_class)->change_state (element, transition); } static GstFlowReturn gst_vtdec_handle_frame (GstVideoDecoder * decoder, GstVideoCodecFrame * frame) { OSStatus status; CMSampleBufferRef cm_sample_buffer = NULL; VTDecodeFrameFlags input_flags; GstVtdec *vtdec = GST_VTDEC (decoder); GstFlowReturn ret = GST_FLOW_OK; int decode_frame_number = frame->decode_frame_number; GstTaskState task_state; if (vtdec->format_description == NULL) { ret = GST_FLOW_NOT_NEGOTIATED; goto drop; } /* Check if we need to extract AV1 sequence header for delayed initialization */ if (vtdec->av1_needs_sequence_header && vtdec->session == NULL) { if (gst_vtdec_handle_av1_sequence_header (vtdec, frame)) { GST_INFO_OBJECT (vtdec, "Successfully initialized AV1 decoder with sequence header"); vtdec->av1_needs_sequence_header = FALSE; /* Recreate the format description with the sequence header OBU */ if (vtdec->format_description) CFRelease (vtdec->format_description); vtdec->format_description = create_format_description_from_codec_data (vtdec, kCMVideoCodecType_AV1, vtdec->input_state->codec_data); if (!vtdec->format_description) { GST_ERROR_OBJECT (vtdec, "Failed to create format description with sequence header"); ret = GST_FLOW_NOT_NEGOTIATED; goto drop; } /* Compute DPB size and set latency for AV1 */ if (!gst_vtdec_compute_dpb_size (vtdec, kCMVideoCodecType_AV1, vtdec->input_state->codec_data)) { GST_ERROR_OBJECT (vtdec, "Failed to compute DPB size for AV1"); ret = GST_FLOW_NOT_NEGOTIATED; goto drop; } gst_vtdec_set_latency (vtdec); /* Now negotiate with the complete format description */ if (!gst_vtdec_negotiate (decoder)) { GST_ERROR_OBJECT (vtdec, "Failed to negotiate after AV1 sequence header"); ret = GST_FLOW_NOT_NEGOTIATED; goto drop; } } else { GST_DEBUG_OBJECT (vtdec, "Waiting for AV1 sequence header, dropping frame"); ret = GST_FLOW_OK; goto drop; } } /* Negotiate now so that we know whether we need to use the GL upload meta or not. * gst_vtenc_negotiate() will drain before attempting to negotiate. */ if (gst_pad_check_reconfigure (decoder->srcpad)) { if (!gst_vtdec_negotiate (decoder)) { gst_pad_mark_reconfigure (decoder->srcpad); if (GST_PAD_IS_FLUSHING (decoder->srcpad)) ret = GST_FLOW_FLUSHING; else ret = GST_FLOW_NOT_NEGOTIATED; goto drop; } } task_state = gst_pad_get_task_state (GST_VIDEO_DECODER_SRC_PAD (vtdec)); if (task_state == GST_TASK_STOPPED || task_state == GST_TASK_PAUSED) { /* Abort if our loop failed to push frames downstream... */ if (vtdec->downstream_ret != GST_FLOW_OK) { GST_DEBUG_OBJECT (vtdec, "Output loop stopped because of %s, ignoring frame", gst_flow_get_name (vtdec->downstream_ret)); ret = vtdec->downstream_ret; goto drop; } /* ...or if it stopped because of the flushing flag while the queue * was empty, in which case we didn't get GST_FLOW_FLUSHING... */ g_mutex_lock (&vtdec->queue_mutex); if (vtdec->is_flushing) { g_mutex_unlock (&vtdec->queue_mutex); GST_DEBUG_OBJECT (vtdec, "Flushing flag set, ignoring frame"); ret = GST_FLOW_FLUSHING; goto drop; } g_mutex_unlock (&vtdec->queue_mutex); /* .. or if it refuses to resume - e.g. it was stopped instead of paused */ if (!gst_vtdec_ensure_output_loop (vtdec)) { GST_ERROR_OBJECT (vtdec, "Output loop failed to resume"); ret = GST_FLOW_ERROR; goto drop; } } GST_LOG_OBJECT (vtdec, "got input frame %d", decode_frame_number); /* don't bother enabling kVTDecodeFrame_EnableTemporalProcessing at all since * it's not mandatory for the underlying VT codec to respect it. KISS and do * reordering ourselves. */ input_flags = kVTDecodeFrame_EnableAsynchronousDecompression; cm_sample_buffer = cm_sample_buffer_from_gst_buffer (vtdec, frame->input_buffer); /* We need to unlock the stream lock here because * the decode call can wait until gst_vtdec_session_output_callback() * is finished, which in turn can wait until there's space in the * output queue, which is being handled by the output loop, * which also uses the stream lock... */ GST_VIDEO_DECODER_STREAM_UNLOCK (vtdec); status = VTDecompressionSessionDecodeFrame (vtdec->session, cm_sample_buffer, input_flags, frame, NULL); GST_VIDEO_DECODER_STREAM_LOCK (vtdec); if (status != noErr) { GST_VIDEO_DECODER_ERROR (vtdec, 1, STREAM, DECODE, ("Failed to decode frame"), ("VTDecompressionSessionDecodeFrame returned %d", (int) status), ret); goto out; } GST_LOG_OBJECT (vtdec, "submitted input frame %d", decode_frame_number); frame = NULL; out: if (cm_sample_buffer) CFRelease (cm_sample_buffer); return ret; drop: gst_video_decoder_release_frame (decoder, frame); goto out; } static void gst_vtdec_invalidate_session (GstVtdec * vtdec) { g_return_if_fail (vtdec->session); VTDecompressionSessionInvalidate (vtdec->session); CFRelease (vtdec->session); vtdec->session = NULL; } static OSStatus gst_vtdec_create_session (GstVtdec * vtdec, GstVideoFormat format, gboolean enable_hardware) { CFMutableDictionaryRef output_image_buffer_attrs; VTDecompressionOutputCallbackRecord callback; CFMutableDictionaryRef videoDecoderSpecification; OSStatus status; guint32 cv_format = gst_video_format_to_cvpixelformat (format); g_return_val_if_fail (vtdec->session == NULL, FALSE); videoDecoderSpecification = CFDictionaryCreateMutable (NULL, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); #if TARGET_OS_OSX || TARGET_OS_VISION || TARGET_OS_IOS || TARGET_OS_TV if (__builtin_available (macOS 10.9, iOS 17.0, tvOS 17.0, visionOS 1.0, *)) { gst_vtutil_dict_set_boolean (videoDecoderSpecification, kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder, enable_hardware); if (enable_hardware && vtdec->require_hardware) gst_vtutil_dict_set_boolean (videoDecoderSpecification, kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder, TRUE); } #endif output_image_buffer_attrs = CFDictionaryCreateMutable (NULL, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); gst_vtutil_dict_set_i32 (output_image_buffer_attrs, kCVPixelBufferPixelFormatTypeKey, cv_format); gst_vtutil_dict_set_i32 (output_image_buffer_attrs, kCVPixelBufferWidthKey, vtdec->video_info.width); gst_vtutil_dict_set_i32 (output_image_buffer_attrs, kCVPixelBufferHeightKey, vtdec->video_info.height); callback.decompressionOutputCallback = gst_vtdec_session_output_callback; callback.decompressionOutputRefCon = vtdec; status = VTDecompressionSessionCreate (NULL, vtdec->format_description, videoDecoderSpecification, output_image_buffer_attrs, &callback, &vtdec->session); if (videoDecoderSpecification) CFRelease (videoDecoderSpecification); CFRelease (output_image_buffer_attrs); return status; } /* https://www.webmproject.org/vp9/mp4/#vp-codec-configuration-box */ static gboolean gst_vtdec_build_vp9_vpcc_from_caps (GstVtdec * vtdec, GstStructure * caps_struct) { GST_INFO_OBJECT (vtdec, "gst_vtdec_build_vp9_vpcc_from_caps"); gint profile = 0; /* Undefined profile 0 is generally acceptable. */ guint bit_depth = 8; guint bit_depth_chroma = 8; /* Default to 4:2:0 */ guint8 chroma_subsampling = 1; const gchar *chroma_format = NULL; /* Default to BT.709 limited range */ gboolean video_full_range = FALSE; guint8 colour_primaries = 1; guint8 transfer_characteristics = 1; guint8 matrix_coefficients = 1; const gchar *colorimetry_str = NULL; guint8 color_info_field = 0; gboolean hdl = TRUE; GstByteWriter writer; if (!gst_structure_has_name (caps_struct, "video/x-vp9")) { return FALSE; } gst_byte_writer_init (&writer); /* version is always 1 */ hdl &= gst_byte_writer_put_uint8 (&writer, 1); hdl &= gst_byte_writer_put_uint8 (&writer, 0); hdl &= gst_byte_writer_put_uint8 (&writer, 0); hdl &= gst_byte_writer_put_uint8 (&writer, 0); gst_structure_get_int (caps_struct, "profile", &profile); hdl &= gst_byte_writer_put_uint8 (&writer, profile); /* level is not in caps for VP9; 0 is acceptable */ hdl &= gst_byte_writer_put_uint8 (&writer, 0); gst_structure_get_uint (caps_struct, "bit-depth-luma", &bit_depth); /* ensure chroma bit depth matches luma if present */ if (gst_structure_get_uint (caps_struct, "bit-depth-chroma", &bit_depth_chroma) && (bit_depth != bit_depth_chroma)) { GST_WARNING_OBJECT (vtdec, "bit-depth-luma and bit-depth-chroma in caps disagree"); } chroma_format = gst_structure_get_string (caps_struct, "chroma-format"); if (chroma_format) { if (g_strcmp0 (chroma_format, "4:2:0") == 0) { const gchar *chroma_site = gst_structure_get_string (caps_struct, "chroma-site"); if (chroma_site) { const GstVideoChromaSite site = gst_video_chroma_site_from_string (chroma_site); if (site == GST_VIDEO_CHROMA_SITE_V_COSITED) { chroma_subsampling = 0; } } } else if (g_strcmp0 (chroma_format, "4:2:2") == 0) { chroma_subsampling = 2; } else if (g_strcmp0 (chroma_format, "4:4:4") == 0) { chroma_subsampling = 3; } } colorimetry_str = gst_structure_get_string (caps_struct, "colorimetry"); if (colorimetry_str) { GstVideoColorimetry vid_col; if (gst_video_colorimetry_from_string (&vid_col, colorimetry_str)) { video_full_range = (vid_col.range == GST_VIDEO_COLOR_RANGE_0_255) ? TRUE : FALSE; colour_primaries = gst_video_color_primaries_to_iso (vid_col.primaries); transfer_characteristics = gst_video_transfer_function_to_iso (vid_col.transfer); matrix_coefficients = gst_video_color_matrix_to_iso (vid_col.matrix); } } color_info_field |= (bit_depth & 0xF) << 4; color_info_field |= (chroma_subsampling & 0x3) << 1; color_info_field |= !(!video_full_range); hdl &= gst_byte_writer_put_uint8 (&writer, color_info_field); hdl &= gst_byte_writer_put_uint8 (&writer, colour_primaries); hdl &= gst_byte_writer_put_uint8 (&writer, transfer_characteristics); hdl &= gst_byte_writer_put_uint8 (&writer, matrix_coefficients); /* codec initialization data, unused for VP9 */ hdl &= gst_byte_writer_put_uint16_le (&writer, 0); if (!hdl) { GST_ERROR_OBJECT (vtdec, "error creating vpcC header"); return FALSE; } guint vpcc_size = gst_byte_writer_get_size (&writer); vtdec->vp9_vpcc = gst_byte_writer_reset_and_get_data (&writer); if (vtdec->vp9_vpcc == NULL) { GST_ERROR_OBJECT (vtdec, "error acquiring vpcC header"); return FALSE; } vtdec->vp9_vpcc_size = vpcc_size; return TRUE; } static CMFormatDescriptionRef create_format_description (GstVtdec * vtdec, CMVideoCodecType cm_format) { OSStatus status; CMFormatDescriptionRef format_description = NULL; CFMutableDictionaryRef extensions = NULL; if (vtdec->vp9_vpcc) { CFMutableDictionaryRef atoms = CFDictionaryCreateMutable (NULL, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); gst_vtutil_dict_set_data (atoms, CFSTR ("vpcC"), vtdec->vp9_vpcc, vtdec->vp9_vpcc_size); extensions = CFDictionaryCreateMutable (NULL, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); gst_vtutil_dict_set_object (extensions, CFSTR ("SampleDescriptionExtensionAtoms"), (CFTypeRef *) atoms); } status = CMVideoFormatDescriptionCreate (NULL, cm_format, vtdec->video_info.width, vtdec->video_info.height, extensions, &format_description); if (extensions) CFRelease (extensions); if (status != noErr) return NULL; return format_description; } static CMFormatDescriptionRef create_format_description_from_codec_data (GstVtdec * vtdec, CMVideoCodecType cm_format, GstBuffer * codec_data) { CMFormatDescriptionRef fmt_desc; CFMutableDictionaryRef extensions, par, atoms; GstMapInfo map; OSStatus status; /* Extensions dict */ extensions = CFDictionaryCreateMutable (NULL, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); gst_vtutil_dict_set_string (extensions, CFSTR ("CVImageBufferChromaLocationBottomField"), "left"); gst_vtutil_dict_set_string (extensions, CFSTR ("CVImageBufferChromaLocationTopField"), "left"); gst_vtutil_dict_set_boolean (extensions, CFSTR ("FullRangeVideo"), FALSE); /* CVPixelAspectRatio dict */ par = CFDictionaryCreateMutable (NULL, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); gst_vtutil_dict_set_i32 (par, CFSTR ("HorizontalSpacing"), vtdec->video_info.par_n); gst_vtutil_dict_set_i32 (par, CFSTR ("VerticalSpacing"), vtdec->video_info.par_d); gst_vtutil_dict_set_object (extensions, CFSTR ("CVPixelAspectRatio"), (CFTypeRef *) par); /* SampleDescriptionExtensionAtoms dict */ gst_buffer_map (codec_data, &map, GST_MAP_READ); atoms = CFDictionaryCreateMutable (NULL, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); if (cm_format == kCMVideoCodecType_HEVC) gst_vtutil_dict_set_data (atoms, CFSTR ("hvcC"), map.data, map.size); else if (cm_format == kCMVideoCodecType_AV1) { GST_INFO_OBJECT (vtdec, "Creating av1C atom for VideoToolbox"); if (vtdec->av1_sequence_header_obu) { /* The av1C atom should contain the 4-byte header followed by the sequence header OBU */ GstMapInfo seq_map; if (gst_buffer_map (vtdec->av1_sequence_header_obu, &seq_map, GST_MAP_READ)) { gsize total_size = 4 + seq_map.size; /* 4-byte av1C header + sequence header OBU */ guint8 *av1c_with_obu = g_malloc (total_size); /* Copy the 4-byte av1C header */ memcpy (av1c_with_obu, map.data, 4); /* Append the sequence header OBU */ memcpy (av1c_with_obu + 4, seq_map.data, seq_map.size); GST_INFO_OBJECT (vtdec, "Creating av1C with sequence header OBU: %zu bytes total", total_size); gst_vtutil_dict_set_data (atoms, CFSTR ("av1C"), av1c_with_obu, total_size); g_free (av1c_with_obu); gst_buffer_unmap (vtdec->av1_sequence_header_obu, &seq_map); } else { GST_ERROR_OBJECT (vtdec, "Missing sequence header OBU"); return NULL; } } else { /* No sequence header OBU yet, just use the 4-byte header */ gst_vtutil_dict_set_data (atoms, CFSTR ("av1C"), map.data, MIN (map.size, 4)); } } else { gst_vtutil_dict_set_data (atoms, CFSTR ("avcC"), map.data, map.size); } gst_vtutil_dict_set_object (extensions, CFSTR ("SampleDescriptionExtensionAtoms"), (CFTypeRef *) atoms); gst_buffer_unmap (codec_data, &map); status = CMVideoFormatDescriptionCreate (NULL, cm_format, vtdec->video_info.width, vtdec->video_info.height, extensions, &fmt_desc); if (extensions) CFRelease (extensions); if (status == noErr) return fmt_desc; else return NULL; } /* Custom FreeBlock function for CMBlockBuffer */ static void cm_block_buffer_freeblock (void *refCon, void *doomedMemoryBlock, size_t sizeInBytes) { GstMapInfo *info = (GstMapInfo *) refCon; GstMemory *mem = info->memory; gst_memory_unmap (mem, info); gst_memory_unref (mem); g_slice_free (GstMapInfo, info); } static CMBlockBufferRef cm_block_buffer_from_gst_buffer (GstBuffer * buf, GstMapFlags flags) { OSStatus status; CMBlockBufferRef bbuf; CMBlockBufferCustomBlockSource blockSource; guint memcount, i; /* Initialize custom block source structure */ blockSource.version = kCMBlockBufferCustomBlockSourceVersion; blockSource.AllocateBlock = NULL; blockSource.FreeBlock = cm_block_buffer_freeblock; /* Determine number of memory blocks */ memcount = gst_buffer_n_memory (buf); status = CMBlockBufferCreateEmpty (NULL, memcount, 0, &bbuf); if (status != kCMBlockBufferNoErr) { GST_ERROR ("CMBlockBufferCreateEmpty returned %d", (int) status); return NULL; } /* Go over all GstMemory objects and add them to the CMBlockBuffer */ for (i = 0; i < memcount; ++i) { GstMemory *mem; GstMapInfo *info; mem = gst_buffer_get_memory (buf, i); info = g_slice_new (GstMapInfo); if (!gst_memory_map (mem, info, flags)) { GST_ERROR ("failed mapping memory"); g_slice_free (GstMapInfo, info); gst_memory_unref (mem); CFRelease (bbuf); return NULL; } blockSource.refCon = info; status = CMBlockBufferAppendMemoryBlock (bbuf, info->data, info->size, NULL, &blockSource, 0, info->size, 0); if (status != kCMBlockBufferNoErr) { GST_ERROR ("CMBlockBufferAppendMemoryBlock returned %d", (int) status); gst_memory_unmap (mem, info); g_slice_free (GstMapInfo, info); gst_memory_unref (mem); CFRelease (bbuf); return NULL; } } return bbuf; } static CMSampleBufferRef cm_sample_buffer_from_gst_buffer (GstVtdec * vtdec, GstBuffer * buf) { OSStatus status; CMBlockBufferRef bbuf = NULL; CMSampleBufferRef sbuf = NULL; CMSampleTimingInfo sample_timing; CMSampleTimingInfo time_array[1]; g_return_val_if_fail (vtdec->format_description, NULL); /* create a block buffer */ bbuf = cm_block_buffer_from_gst_buffer (buf, GST_MAP_READ); if (bbuf == NULL) { GST_ELEMENT_ERROR (vtdec, RESOURCE, FAILED, (NULL), ("failed creating CMBlockBuffer")); return NULL; } /* create a sample buffer */ if (GST_BUFFER_DURATION_IS_VALID (buf)) sample_timing.duration = CMTimeMake (GST_BUFFER_DURATION (buf), GST_SECOND); else sample_timing.duration = kCMTimeInvalid; if (GST_BUFFER_PTS_IS_VALID (buf)) sample_timing.presentationTimeStamp = CMTimeMake (GST_BUFFER_PTS (buf), GST_SECOND); else sample_timing.presentationTimeStamp = kCMTimeInvalid; if (GST_BUFFER_DTS_IS_VALID (buf)) sample_timing.decodeTimeStamp = CMTimeMake (GST_BUFFER_DTS (buf), GST_SECOND); else sample_timing.decodeTimeStamp = kCMTimeInvalid; time_array[0] = sample_timing; status = CMSampleBufferCreate (NULL, bbuf, TRUE, 0, 0, vtdec->format_description, 1, 1, time_array, 0, NULL, &sbuf); CFRelease (bbuf); if (status != noErr) { GST_ELEMENT_ERROR (vtdec, RESOURCE, FAILED, (NULL), ("CMSampleBufferCreate returned %d", (int) status)); return NULL; } return sbuf; } static gint sort_frames_by_pts (gconstpointer f1, gconstpointer f2, gpointer user_data) { GstVideoCodecFrame *frame1, *frame2; GstClockTime pts1, pts2; frame1 = (GstVideoCodecFrame *) f1; frame2 = (GstVideoCodecFrame *) f2; pts1 = pts2 = GST_CLOCK_TIME_NONE; if (frame1->output_buffer) pts1 = GST_BUFFER_PTS (frame1->output_buffer); if (frame2->output_buffer) pts2 = GST_BUFFER_PTS (frame2->output_buffer); if (!GST_CLOCK_TIME_IS_VALID (pts1) || !GST_CLOCK_TIME_IS_VALID (pts2)) return 0; if (pts1 < pts2) return -1; else if (pts1 == pts2) return 0; else return 1; } static void gst_vtdec_session_output_callback (void *decompression_output_ref_con, void *source_frame_ref_con, OSStatus status, VTDecodeInfoFlags info_flags, CVImageBufferRef image_buffer, CMTime pts, CMTime duration) { GstVtdec *vtdec = (GstVtdec *) decompression_output_ref_con; GstVideoCodecFrame *frame = (GstVideoCodecFrame *) source_frame_ref_con; GstVideoCodecState *state; gboolean push_anyway = FALSE; GST_LOG_OBJECT (vtdec, "got output frame %p %d and VT buffer %p", frame, frame->decode_frame_number, image_buffer); frame->output_buffer = NULL; if (status != noErr) { switch (status) { case kVTVideoDecoderReferenceMissingErr: /* ReferenceMissingErr is not critical, when it occurs the frame * usually has the kVTDecodeInfo_FrameDropped flag set. Log only for debugging purposes. */ GST_DEBUG_OBJECT (vtdec, "ReferenceMissingErr when decoding frame %d", frame->decode_frame_number); break; #if TARGET_OS_OSX case codecBadDataErr: /* SW decoder on macOS uses a different code from the hardware one... */ #endif case kVTVideoDecoderBadDataErr: /* BadDataErr also shouldn't cause an error to be displayed immediately. * Set the error flag so the output loop will log a warning * and only error out if this happens too many times. */ GST_DEBUG_OBJECT (vtdec, "BadDataErr when decoding frame %d", frame->decode_frame_number); frame->flags |= VTDEC_FRAME_FLAG_ERROR; break; default: GST_ERROR_OBJECT (vtdec, "Error decoding frame %d: %d", frame->decode_frame_number, (int) status); frame->flags |= VTDEC_FRAME_FLAG_ERROR; break; } } if (image_buffer) { GstBuffer *buf = NULL; /* FIXME: use gst_video_decoder_allocate_output_buffer */ state = gst_video_decoder_get_output_state (GST_VIDEO_DECODER (vtdec)); if (state == NULL) { GST_WARNING_OBJECT (vtdec, "Output state not configured, release buffer"); frame->flags &= VTDEC_FRAME_FLAG_SKIP; } else { buf = gst_core_video_buffer_new (image_buffer, &state->info, vtdec->texture_cache); gst_video_codec_state_unref (state); GST_BUFFER_PTS (buf) = pts.value; GST_BUFFER_DURATION (buf) = duration.value; frame->output_buffer = buf; } } else { if (info_flags & kVTDecodeInfo_FrameDropped) { GST_DEBUG_OBJECT (vtdec, "Frame %d dropped by VideoToolbox (%p)", frame->decode_frame_number, frame); frame->flags |= VTDEC_FRAME_FLAG_DROP; } else { GST_DEBUG_OBJECT (vtdec, "Decoded frame is NULL"); frame->flags |= VTDEC_FRAME_FLAG_SKIP; } } /* Limit the amount of frames in our output queue * to avoid processing too many frames ahead. * The DPB * 2 size limit is completely arbitrary. */ g_mutex_lock (&vtdec->queue_mutex); /* If negotiate() gets called from the output loop (via finish_frame()), * it can attempt to drain and call VTDecompressionSessionWaitForAsynchronousFrames, * which will lock up if we decide to wait in this callback, creating a deadlock. */ push_anyway = vtdec->is_flushing || vtdec->is_draining; while (!push_anyway && gst_vec_deque_get_length (vtdec->reorder_queue) > vtdec->dbp_size * 2 + 1) { g_cond_wait (&vtdec->queue_cond, &vtdec->queue_mutex); push_anyway = vtdec->is_flushing || vtdec->is_draining; } gst_vec_deque_push_sorted (vtdec->reorder_queue, frame, sort_frames_by_pts, NULL); GST_LOG ("pushed frame %d, queue length %" G_GSIZE_FORMAT, frame->decode_frame_number, gst_vec_deque_get_length (vtdec->reorder_queue)); g_cond_signal (&vtdec->queue_cond); g_mutex_unlock (&vtdec->queue_mutex); } static GstFlowReturn gst_vtdec_drain_decoder (GstVideoDecoder * decoder, gboolean flush) { GstVtdec *vtdec = GST_VTDEC (decoder); OSStatus vt_status; GST_DEBUG_OBJECT (vtdec, "drain_decoder, flushing: %d", flush); /* In case of EOS before the first buffer/caps */ if (vtdec->session == NULL) return GST_FLOW_OK; /* Only early-return here if we're draining (as that needs to output frames). * Flushing doesn't care about errors from downstream. */ if (!flush && vtdec->downstream_ret != GST_FLOW_OK) { /* Makes sure the output callback won't get stuck waiting for space in the queue */ g_mutex_lock (&vtdec->queue_mutex); vtdec->is_flushing = TRUE; g_cond_signal (&vtdec->queue_cond); g_mutex_unlock (&vtdec->queue_mutex); GST_WARNING_OBJECT (vtdec, "Output loop stopped with error (%s), leaving", gst_flow_get_name (vtdec->downstream_ret)); return vtdec->downstream_ret; } g_mutex_lock (&vtdec->queue_mutex); if (flush) { GST_DEBUG_OBJECT (vtdec, "setting flushing flag"); vtdec->is_flushing = TRUE; } else { GST_DEBUG_OBJECT (vtdec, "setting draining flag"); vtdec->is_draining = TRUE; } g_cond_signal (&vtdec->queue_cond); g_mutex_unlock (&vtdec->queue_mutex); if (!gst_vtdec_ensure_output_loop (vtdec)) { GST_ERROR_OBJECT (vtdec, "Output loop failed to resume"); return GST_FLOW_ERROR; } GST_DEBUG_OBJECT (vtdec, "draining VT session"); GST_VIDEO_DECODER_STREAM_UNLOCK (vtdec); vt_status = VTDecompressionSessionWaitForAsynchronousFrames (vtdec->session); if (vt_status != noErr) { GST_WARNING_OBJECT (vtdec, "VTDecompressionSessionWaitForAsynchronousFrames returned %d", (int) vt_status); } /* This will only pause after all frames are out because is_flushing/is_draining=TRUE */ gst_vtdec_pause_output_loop (vtdec); GST_VIDEO_DECODER_STREAM_LOCK (vtdec); if (flush) { GST_DEBUG_OBJECT (vtdec, "clearing flushing flag"); vtdec->is_flushing = FALSE; } else { GST_DEBUG_OBJECT (vtdec, "clearing draining flag"); vtdec->is_draining = FALSE; } if (vtdec->downstream_ret == GST_FLOW_OK) GST_DEBUG_OBJECT (vtdec, "buffer queue cleaned"); else GST_DEBUG_OBJECT (vtdec, "buffer queue not cleaned, output thread returned %s", gst_flow_get_name (vtdec->downstream_ret)); return vtdec->downstream_ret; } static int get_dpb_max_mb_s_from_level (GstVtdec * vtdec, int level) { switch (level) { case 10: /* 1b?? */ return 396; case 11: return 900; case 12: case 13: case 20: return 2376; case 21: return 4752; case 22: case 30: return 8100; case 31: return 18000; case 32: return 20480; case 40: case 41: return 32768; case 42: return 34816; case 50: return 110400; case 51: case 52: return 184320; case 60: case 61: case 62: return 696320; default: GST_ERROR_OBJECT (vtdec, "unknown level %d", level); return -1; } } static gboolean gst_vtdec_compute_dpb_size (GstVtdec * vtdec, CMVideoCodecType cm_format, GstBuffer * codec_data) { if (cm_format == kCMVideoCodecType_H264) { if (!compute_h264_decode_picture_buffer_size (vtdec, codec_data, &vtdec->dbp_size)) { return FALSE; } } else if (cm_format == kCMVideoCodecType_HEVC) { if (!compute_hevc_decode_picture_buffer_size (vtdec, codec_data, &vtdec->dbp_size)) { return FALSE; } } else if (cm_format == kCMVideoCodecType_AV1) { vtdec->dbp_size = GST_AV1_NUM_REF_FRAMES; } else { vtdec->dbp_size = 0; } GST_DEBUG_OBJECT (vtdec, "Calculated DPB size: %d", vtdec->dbp_size); return TRUE; } static gboolean parse_h264_decoder_config_record (GstVtdec * vtdec, GstBuffer * codec_data, GstH264DecoderConfigRecord ** config) { GstH264NalParser *parser = gst_h264_nal_parser_new (); GstMapInfo map; gboolean ret = TRUE; gst_buffer_map (codec_data, &map, GST_MAP_READ); if (gst_h264_parser_parse_decoder_config_record (parser, map.data, map.size, config) != GST_H264_PARSER_OK) { GST_WARNING_OBJECT (vtdec, "Failed to parse codec-data"); ret = FALSE; } gst_h264_nal_parser_free (parser); gst_buffer_unmap (codec_data, &map); return ret; } static gboolean get_h264_dpb_size_from_sps (GstVtdec * vtdec, GstH264NalUnit * nalu, gint * dpb_size) { GstH264ParserResult result; GstH264SPS sps; gint width_mb, height_mb; gint max_dpb_frames, max_dpb_size, max_dpb_mbs; result = gst_h264_parse_sps (nalu, &sps); if (result != GST_H264_PARSER_OK) { GST_WARNING_OBJECT (vtdec, "Failed to parse SPS, result %d", result); return FALSE; } max_dpb_mbs = get_dpb_max_mb_s_from_level (vtdec, sps.level_idc); if (max_dpb_mbs == -1) { GST_ELEMENT_ERROR (vtdec, STREAM, DECODE, (NULL), ("invalid level found in SPS, could not compute max_dpb_mbs")); gst_h264_sps_clear (&sps); return FALSE; } /* This formula is specified in sections A.3.1.h and A.3.2.f of the 2009 * edition of the standard */ width_mb = sps.width / 16; height_mb = sps.height / 16; max_dpb_frames = MIN (max_dpb_mbs / (width_mb * height_mb), GST_VTDEC_DPB_MAX_SIZE); if (sps.vui_parameters_present_flag && sps.vui_parameters.bitstream_restriction_flag) max_dpb_frames = MAX (1, sps.vui_parameters.max_dec_frame_buffering); /* Some non-conforming H264 streams may request a number of frames * larger than the calculated limit. * See https://chromium-review.googlesource.com/c/chromium/src/+/760276/ */ max_dpb_size = MAX (max_dpb_frames, sps.num_ref_frames); if (max_dpb_size > GST_VTDEC_DPB_MAX_SIZE) { GST_WARNING_OBJECT (vtdec, "Too large calculated DPB size %d", max_dpb_size); max_dpb_size = GST_VTDEC_DPB_MAX_SIZE; } *dpb_size = max_dpb_size; gst_h264_sps_clear (&sps); return TRUE; } static gboolean compute_h264_decode_picture_buffer_size (GstVtdec * vtdec, GstBuffer * codec_data, gint * length) { GstH264DecoderConfigRecord *config = NULL; GstH264NalUnit *nalu; guint8 profile, level; gboolean ret = TRUE; gint new_length; guint i; *length = 0; if (vtdec->video_info.width == 0 || vtdec->video_info.height == 0) return FALSE; if (!parse_h264_decoder_config_record (vtdec, codec_data, &config)) return FALSE; profile = config->profile_indication; level = config->level_indication; GST_INFO_OBJECT (vtdec, "parsed profile %d, level %d", profile, level); if (profile == 66) { /* baseline or constrained-baseline, we don't need to reorder */ goto out; } for (i = 0; i < config->sps->len; i++) { nalu = &g_array_index (config->sps, GstH264NalUnit, i); if (nalu->type != GST_H264_NAL_SPS) continue; if (!get_h264_dpb_size_from_sps (vtdec, nalu, &new_length)) GST_WARNING_OBJECT (vtdec, "Failed to get DPB size from SPS"); else *length = MAX (*length, new_length); } out: gst_h264_decoder_config_record_free (config); return ret; } static gboolean compute_hevc_decode_picture_buffer_size (GstVtdec * vtdec, GstBuffer * codec_data, int *length) { /* This value should be level dependent (table A.8) * but let's assume the maximum possible one for simplicity. */ const gint max_luma_ps = 35651584; const gint max_dpb_pic_buf = 6; gint max_dbp_size, pic_size_samples_y; if (vtdec->video_info.width == 0 || vtdec->video_info.height == 0) return FALSE; /* A.4.2 */ pic_size_samples_y = vtdec->video_info.width * vtdec->video_info.height; if (pic_size_samples_y <= (max_luma_ps >> 2)) max_dbp_size = max_dpb_pic_buf * 4; else if (pic_size_samples_y <= (max_luma_ps >> 1)) max_dbp_size = max_dpb_pic_buf * 2; else if (pic_size_samples_y <= ((3 * max_luma_ps) >> 2)) max_dbp_size = (max_dpb_pic_buf * 4) / 3; else max_dbp_size = max_dpb_pic_buf; *length = MIN (max_dbp_size, 16); return TRUE; } static void gst_vtdec_set_latency (GstVtdec * vtdec) { GstClockTime frame_duration; GstClockTime latency; if (vtdec->video_info.fps_n == 0) { GST_INFO_OBJECT (vtdec, "Framerate not known, can't set latency"); return; } frame_duration = gst_util_uint64_scale (GST_SECOND, vtdec->video_info.fps_d, vtdec->video_info.fps_n); latency = frame_duration * vtdec->dbp_size; GST_INFO_OBJECT (vtdec, "setting latency frames:%d time:%" GST_TIME_FORMAT, vtdec->dbp_size, GST_TIME_ARGS (latency)); gst_video_decoder_set_latency (GST_VIDEO_DECODER (vtdec), latency, latency); } typedef void (*VTRegisterSupplementalVideoDecoderIfAvailableFunc) (CMVideoCodecType codecType); static gboolean gst_vtdec_check_vp9_support (GstVtdec * vtdec) { gboolean vp9_supported = FALSE; GST_DEBUG_OBJECT (vtdec, "Checking VP9 VideoToolbox support"); #ifdef HAVE_SUPPLEMENTAL #ifdef HAVE_SUPPLEMENTAL_DEFINITION if (__builtin_available (macOS 11.0, iOS 26.2, tvOS 26.2, visionOS 26.2, *)) { VTRegisterSupplementalVideoDecoderIfAvailable (kCMVideoCodecType_VP9); } #else /* Needed temporarily till we can require a new-enough Xcode that has * VTRegisterSupplementalVideoDecoderIfAvailable on iOS, tvOS, visionOS 26.2 */ VTRegisterSupplementalVideoDecoderIfAvailableFunc func = (VTRegisterSupplementalVideoDecoderIfAvailableFunc) dlsym (RTLD_DEFAULT, "VTRegisterSupplementalVideoDecoderIfAvailable"); if (func != NULL) { func (kCMVideoCodecType_VP9); } #endif #endif vp9_supported = VTIsHardwareDecodeSupported (kCMVideoCodecType_VP9); if (vp9_supported) { GST_INFO_OBJECT (vtdec, "VP9 hardware decoding is supported"); } else { GST_INFO_OBJECT (vtdec, "VP9 hardware decoding is not supported on this system"); } return vp9_supported; } static gboolean gst_vtdec_check_av1_support (GstVtdec * vtdec) { gboolean av1_supported = FALSE; GST_DEBUG_OBJECT (vtdec, "Checking AV1 VideoToolbox support"); #ifdef HAVE_SUPPLEMENTAL #ifdef HAVE_SUPPLEMENTAL_DEFINITION if (__builtin_available (macOS 11.0, iOS 26.2, tvOS 26.2, visionOS 26.2, *)) { VTRegisterSupplementalVideoDecoderIfAvailable (kCMVideoCodecType_AV1); } #else /* Needed temporarily till we can require a new-enough Xcode that has * VTRegisterSupplementalVideoDecoderIfAvailable on iOS, tvOS, visionOS 26.2 */ VTRegisterSupplementalVideoDecoderIfAvailableFunc func = (VTRegisterSupplementalVideoDecoderIfAvailableFunc) dlsym (RTLD_DEFAULT, "VTRegisterSupplementalVideoDecoderIfAvailable"); if (func != NULL) { func (kCMVideoCodecType_AV1); } #endif #endif /* Check if hardware decode is supported for AV1 */ av1_supported = VTIsHardwareDecodeSupported (kCMVideoCodecType_AV1); if (av1_supported) { GST_INFO_OBJECT (vtdec, "AV1 hardware decoding is supported"); } else { GST_INFO_OBJECT (vtdec, "AV1 hardware decoding is not supported on this system"); } return av1_supported; } static GstCaps * gst_vtdec_getcaps (GstVideoDecoder * decoder, GstCaps * filter) { static gsize av1_once = 0; static gsize vp9_once = 0; GstVtdec *vtdec = GST_VTDEC (decoder); GstCaps *sinkcaps, *result; sinkcaps = gst_pad_get_pad_template_caps (GST_VIDEO_DECODER_SINK_PAD (decoder)); sinkcaps = gst_caps_make_writable (sinkcaps); guint n = gst_caps_get_size (sinkcaps); for (guint i = 0; i < n;) { GstStructure *s = gst_caps_get_structure (sinkcaps, i); if (gst_structure_has_name (s, "video/x-av1")) { if (g_once_init_enter (&av1_once)) { if (gst_vtdec_check_av1_support (vtdec)) vtdec->codec_support |= Av1Supported; g_once_init_leave (&av1_once, Av1Supported); } } else if (gst_structure_has_name (s, "video/x-vp9")) { if (g_once_init_enter (&vp9_once)) { if (gst_vtdec_check_vp9_support (vtdec)) vtdec->codec_support |= Vp9Supported; g_once_init_leave (&vp9_once, Vp9Supported); } } if ((gst_structure_has_name (s, "video/x-av1") && !(vtdec->codec_support & Av1Supported)) || (gst_structure_has_name (s, "video/x-vp9") && !(vtdec->codec_support & Vp9Supported))) { gst_caps_remove_structure (sinkcaps, i); n--; } else { i++; } } result = gst_video_decoder_proxy_getcaps (decoder, sinkcaps, filter); gst_caps_unref (sinkcaps); return result; } static gboolean gst_vtdec_handle_av1_sequence_header (GstVtdec * vtdec, GstVideoCodecFrame * frame) { GstMapInfo map_info; GstAV1Parser *parser; GstAV1OBU obu; GstAV1ParserResult result; guint32 consumed = 0; gboolean found_sequence_header = FALSE; if (!gst_buffer_map (frame->input_buffer, &map_info, GST_MAP_READ)) { GST_ERROR_OBJECT (vtdec, "Failed to map input buffer"); return FALSE; } GST_DEBUG_OBJECT (vtdec, "Checking for AV1 sequence header in %zu bytes", map_info.size); /* Create AV1 parser to identify and parse OBUs */ parser = gst_av1_parser_new (); if (!parser) { GST_ERROR_OBJECT (vtdec, "Failed to create AV1 parser"); gst_buffer_unmap (frame->input_buffer, &map_info); return FALSE; } /* Search for sequence header OBU */ while (consumed < map_info.size) { guint32 bytes_consumed = 0; result = gst_av1_parser_identify_one_obu (parser, map_info.data + consumed, map_info.size - consumed, &obu, &bytes_consumed); if (result != GST_AV1_PARSER_OK) { if (result == GST_AV1_PARSER_NO_MORE_DATA) break; GST_DEBUG_OBJECT (vtdec, "Failed to identify OBU: %d", result); consumed += bytes_consumed; continue; } GST_DEBUG_OBJECT (vtdec, "Found OBU type %d", obu.obu_type); if (obu.obu_type == GST_AV1_OBU_SEQUENCE_HEADER) { GST_INFO_OBJECT (vtdec, "Found AV1 sequence header OBU"); /* Store the sequence header OBU */ if (vtdec->av1_sequence_header_obu) gst_buffer_unref (vtdec->av1_sequence_header_obu); /* Calculate the complete OBU size including header */ gsize obu_offset = consumed; gsize obu_total_size = bytes_consumed; vtdec->av1_sequence_header_obu = gst_buffer_copy_region (frame->input_buffer, GST_BUFFER_COPY_MEMORY, obu_offset, obu_total_size); GST_INFO_OBJECT (vtdec, "Stored AV1 sequence header OBU (%zu bytes)", obu_total_size); found_sequence_header = TRUE; break; } consumed += bytes_consumed; } gst_av1_parser_free (parser); gst_buffer_unmap (frame->input_buffer, &map_info); return found_sequence_header; } static void gst_vtdec_set_context (GstElement * element, GstContext * context) { GstVtdec *vtdec = GST_VTDEC (element); GST_INFO_OBJECT (element, "setting context %s", gst_context_get_context_type (context)); if (!vtdec->ctxh) vtdec->ctxh = gst_gl_context_helper_new (element); gst_gl_handle_set_context (element, context, &vtdec->ctxh->display, &vtdec->ctxh->other_context); #if defined (APPLEMEDIA_MOLTENVK) gst_vulkan_handle_set_context (element, context, NULL, &vtdec->instance); #endif GST_ELEMENT_CLASS (gst_vtdec_parent_class)->set_context (element, context); } #define GST_TYPE_VTDEC_HW (gst_vtdec_hw_get_type()) #define GST_VTDEC_HW(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_VTDEC_HW,GstVtdecHw)) #define GST_VTDEC_HW_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_VTDEC_HW,GstVtdecHwClass)) #define GST_IS_VTDEC_HW(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_VTDEC_HW)) #define GST_IS_VTDEC_HW_CLASS(obj) (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_VTDEC_HW)) typedef GstVtdec GstVtdecHw; typedef GstVtdecClass GstVtdecHwClass; GType gst_vtdec_hw_get_type (void); G_DEFINE_TYPE (GstVtdecHw, gst_vtdec_hw, GST_TYPE_VTDEC); static void gst_vtdec_hw_class_init (GstVtdecHwClass * klass) { gst_element_class_set_static_metadata (GST_ELEMENT_CLASS (klass), "Apple VideoToolbox decoder (hardware only)", "Codec/Decoder/Video/Hardware", "Apple VideoToolbox Decoder", "Ole André Vadla Ravnås ; " "Alessandro Decina "); } static void gst_vtdec_hw_init (GstVtdecHw * vtdec) { GST_VTDEC (vtdec)->require_hardware = TRUE; } static void gst_vtdec_init_once (void) { static gsize init_once = 0; if (g_once_init_enter (&init_once)) { gst_applemedia_init_once (); GST_DEBUG_CATEGORY_INIT (gst_vtdec_debug_category, "vtdec", 0, "debug category for vtdec element"); g_once_init_leave (&init_once, 1); } } static gboolean gst_vtdec_register_vtdec (GstPlugin * plugin) { gint rank = GST_RANK_PRIMARY; gst_vtdec_init_once (); #if !TARGET_OS_WATCH if (__builtin_available (macOS 10.9, iOS 17.0, tvOS 17.0, visionOS 1.0, *)) rank = GST_RANK_SECONDARY; #endif return gst_element_register (plugin, "vtdec", rank, GST_TYPE_VTDEC); } #if !TARGET_OS_WATCH static gboolean gst_vtdec_register_vtdec_hw (GstPlugin * plugin) { gst_vtdec_init_once (); if (__builtin_available (macOS 10.9, iOS 17.0, tvOS 17.0, visionOS 1.0, *)) { return gst_element_register (plugin, "vtdec_hw", GST_RANK_PRIMARY + 1, GST_TYPE_VTDEC_HW); } return TRUE; } #endif GST_ELEMENT_REGISTER_DEFINE_CUSTOM (vtdec, gst_vtdec_register_vtdec); #if !TARGET_OS_WATCH GST_ELEMENT_REGISTER_DEFINE_CUSTOM (vtdec_hw, gst_vtdec_register_vtdec_hw); #endif gboolean gst_vtdec_register_elements (GstPlugin * plugin) { gboolean ret = FALSE; #if !TARGET_OS_WATCH ret |= GST_ELEMENT_REGISTER (vtdec_hw, plugin); #endif ret |= GST_ELEMENT_REGISTER (vtdec, plugin); return ret; }