30#define MAX_SUPPORTED_WIDTH 1950
31#define MAX_SUPPORTED_HEIGHT 1100
34#include "libavutil/hwcontext_vaapi.h"
36typedef struct VAAPIDecodeContext {
38 VAEntrypoint va_entrypoint;
40 VAContextID va_context;
42#if FF_API_STRUCT_VAAPI_CONTEXT
45 struct vaapi_context *old_context;
46 AVBufferRef *device_ref;
50 AVHWDeviceContext *device;
51 AVVAAPIDeviceContext *hwctx;
53 AVHWFramesContext *frames;
54 AVVAAPIFramesContext *hwfc;
56 enum AVPixelFormat surface_format;
72 : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), NO_PTS_OFFSET(-99999),
73 path(
path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), is_open(false),
74 seek_audio_frame_found(0), seek_video_frame_found(0),is_duration_known(false), largest_frame_processed(0),
76 video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
77 pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0},
85 pts_offset_seconds = NO_PTS_OFFSET;
86 video_pts_seconds = NO_PTS_OFFSET;
87 audio_pts_seconds = NO_PTS_OFFSET;
116 if (abs(diff) <= amount)
127static enum AVPixelFormat get_hw_dec_format(AVCodecContext *ctx,
const enum AVPixelFormat *pix_fmts)
129 const enum AVPixelFormat *p;
131 for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
133#if defined(__linux__)
135 case AV_PIX_FMT_VAAPI:
140 case AV_PIX_FMT_VDPAU:
148 case AV_PIX_FMT_DXVA2_VLD:
153 case AV_PIX_FMT_D3D11:
159#if defined(__APPLE__)
161 case AV_PIX_FMT_VIDEOTOOLBOX:
168 case AV_PIX_FMT_CUDA:
184 return AV_PIX_FMT_NONE;
187int FFmpegReader::IsHardwareDecodeSupported(
int codecid)
191 case AV_CODEC_ID_H264:
192 case AV_CODEC_ID_MPEG2VIDEO:
193 case AV_CODEC_ID_VC1:
194 case AV_CODEC_ID_WMV1:
195 case AV_CODEC_ID_WMV2:
196 case AV_CODEC_ID_WMV3:
211 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
221 if (avformat_open_input(&pFormatCtx,
path.c_str(), NULL, NULL) != 0)
225 if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
232 packet_status.
reset(
true);
235 for (
unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
237 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_VIDEO && videoStream < 0) {
244 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_AUDIO && audioStream < 0) {
251 if (videoStream == -1 && audioStream == -1)
255 if (videoStream != -1) {
260 pStream = pFormatCtx->streams[videoStream];
266 const AVCodec *pCodec = avcodec_find_decoder(codecId);
267 AVDictionary *
opts = NULL;
268 int retry_decode_open = 2;
273 if (
hw_de_on && (retry_decode_open==2)) {
275 hw_de_supported = IsHardwareDecodeSupported(pCodecCtx->codec_id);
278 retry_decode_open = 0;
283 if (pCodec == NULL) {
284 throw InvalidCodec(
"A valid video codec could not be found for this file.",
path);
288 av_dict_set(&
opts,
"strict",
"experimental", 0);
292 int i_decoder_hw = 0;
294 char *adapter_ptr = NULL;
297 fprintf(stderr,
"Hardware decoding device number: %d\n", adapter_num);
300 pCodecCtx->get_format = get_hw_dec_format;
302 if (adapter_num < 3 && adapter_num >=0) {
303#if defined(__linux__)
304 snprintf(adapter,
sizeof(adapter),
"/dev/dri/renderD%d", adapter_num+128);
305 adapter_ptr = adapter;
307 switch (i_decoder_hw) {
309 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
312 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
315 hw_de_av_device_type = AV_HWDEVICE_TYPE_VDPAU;
318 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
321 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
328 switch (i_decoder_hw) {
330 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
333 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
336 hw_de_av_device_type = AV_HWDEVICE_TYPE_D3D11VA;
339 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
342 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
345#elif defined(__APPLE__)
348 switch (i_decoder_hw) {
350 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
353 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
356 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
366#if defined(__linux__)
367 if( adapter_ptr != NULL && access( adapter_ptr, W_OK ) == 0 ) {
369 if( adapter_ptr != NULL ) {
370#elif defined(__APPLE__)
371 if( adapter_ptr != NULL ) {
380 hw_device_ctx = NULL;
382 if (av_hwdevice_ctx_create(&hw_device_ctx, hw_de_av_device_type, adapter_ptr, NULL, 0) >= 0) {
383 if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) {
425 pCodecCtx->thread_type &= ~FF_THREAD_FRAME;
429 int avcodec_return = avcodec_open2(pCodecCtx, pCodec, &
opts);
430 if (avcodec_return < 0) {
431 std::stringstream avcodec_error_msg;
432 avcodec_error_msg <<
"A video codec was found, but could not be opened. Error: " << av_err2string(avcodec_return);
438 AVHWFramesConstraints *constraints = NULL;
439 void *hwconfig = NULL;
440 hwconfig = av_hwdevice_hwconfig_alloc(hw_device_ctx);
444 ((AVVAAPIHWConfig *)hwconfig)->config_id = ((VAAPIDecodeContext *)(pCodecCtx->priv_data))->va_config;
445 constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx,hwconfig);
448 if (pCodecCtx->coded_width < constraints->min_width ||
449 pCodecCtx->coded_height < constraints->min_height ||
450 pCodecCtx->coded_width > constraints->max_width ||
451 pCodecCtx->coded_height > constraints->max_height) {
454 retry_decode_open = 1;
457 av_buffer_unref(&hw_device_ctx);
458 hw_device_ctx = NULL;
463 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Min width :", constraints->min_width,
"Min Height :", constraints->min_height,
"MaxWidth :", constraints->max_width,
"MaxHeight :", constraints->max_height,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
464 retry_decode_open = 0;
466 av_hwframe_constraints_free(&constraints);
479 if (pCodecCtx->coded_width < 0 ||
480 pCodecCtx->coded_height < 0 ||
481 pCodecCtx->coded_width > max_w ||
482 pCodecCtx->coded_height > max_h ) {
483 ZmqLogger::Instance()->
AppendDebugMethod(
"DIMENSIONS ARE TOO LARGE for hardware acceleration\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
485 retry_decode_open = 1;
488 av_buffer_unref(&hw_device_ctx);
489 hw_device_ctx = NULL;
493 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
494 retry_decode_open = 0;
502 retry_decode_open = 0;
504 }
while (retry_decode_open);
513 if (audioStream != -1) {
518 aStream = pFormatCtx->streams[audioStream];
524 const AVCodec *aCodec = avcodec_find_decoder(codecId);
530 if (aCodec == NULL) {
531 throw InvalidCodec(
"A valid audio codec could not be found for this file.",
path);
535 AVDictionary *
opts = NULL;
536 av_dict_set(&
opts,
"strict",
"experimental", 0);
539 if (avcodec_open2(aCodecCtx, aCodec, &
opts) < 0)
540 throw InvalidCodec(
"An audio codec was found, but could not be opened.",
path);
550 AVDictionaryEntry *tag = NULL;
551 while ((tag = av_dict_get(pFormatCtx->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
552 QString str_key = tag->key;
553 QString str_value = tag->value;
554 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
558 previous_packet_location.
frame = -1;
590 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
596 AVPacket *recent_packet = packet;
601 int max_attempts = 128;
606 "attempts", attempts);
618 RemoveAVPacket(recent_packet);
623 if(avcodec_is_open(pCodecCtx)) {
624 avcodec_flush_buffers(pCodecCtx);
630 av_buffer_unref(&hw_device_ctx);
631 hw_device_ctx = NULL;
639 if(avcodec_is_open(aCodecCtx)) {
640 avcodec_flush_buffers(aCodecCtx);
647 working_cache.
Clear();
650 avformat_close_input(&pFormatCtx);
651 av_freep(&pFormatCtx);
656 largest_frame_processed = 0;
657 seek_audio_frame_found = 0;
658 seek_video_frame_found = 0;
659 current_video_frame = 0;
660 last_video_frame.reset();
664bool FFmpegReader::HasAlbumArt() {
668 return pFormatCtx && videoStream >= 0 && pFormatCtx->streams[videoStream]
669 && (pFormatCtx->streams[videoStream]->disposition & AV_DISPOSITION_ATTACHED_PIC);
672void FFmpegReader::UpdateAudioInfo() {
689 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
710 if (aStream->duration > 0 && aStream->duration >
info.
duration) {
713 }
else if (pFormatCtx->duration > 0 &&
info.
duration <= 0.0f) {
715 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
758 AVDictionaryEntry *tag = NULL;
759 while ((tag = av_dict_get(aStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
760 QString str_key = tag->key;
761 QString str_value = tag->value;
762 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
766void FFmpegReader::UpdateVideoInfo() {
774 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
781 AVRational framerate = av_guess_frame_rate(pFormatCtx, pStream, NULL);
793 if (pStream->sample_aspect_ratio.num != 0) {
816 if (!check_interlace) {
817 check_interlace =
true;
819 switch(field_order) {
820 case AV_FIELD_PROGRESSIVE:
833 case AV_FIELD_UNKNOWN:
835 check_interlace =
false;
850 if (
info.
duration <= 0.0f && pFormatCtx->duration >= 0) {
852 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
862 if (
info.
duration <= 0.0f && pStream->duration == AV_NOPTS_VALUE && pFormatCtx->duration == AV_NOPTS_VALUE) {
880 is_duration_known =
false;
883 is_duration_known =
true;
893 AVDictionaryEntry *tag = NULL;
894 while ((tag = av_dict_get(pStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
895 QString str_key = tag->key;
896 QString str_value = tag->value;
897 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
902 return this->is_duration_known;
908 throw ReaderClosed(
"The FFmpegReader is closed. Call Open() before calling this method.",
path);
911 if (requested_frame < 1)
917 throw InvalidFile(
"Could not detect the duration of the video or audio stream.",
path);
933 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
947 int64_t diff = requested_frame - last_frame;
948 if (diff >= 1 && diff <= 20) {
950 frame = ReadStream(requested_frame);
955 Seek(requested_frame);
964 frame = ReadStream(requested_frame);
972std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {
974 bool check_seek =
false;
975 int packet_error = -1;
985 CheckWorkingFrames(requested_frame);
990 if (is_cache_found) {
994 if (!hold_packet || !packet) {
996 packet_error = GetNextPacket();
997 if (packet_error < 0 && !packet) {
1008 check_seek = CheckSeek(
false);
1020 if ((
info.
has_video && packet && packet->stream_index == videoStream) ||
1024 ProcessVideoPacket(requested_frame);
1027 if ((
info.
has_audio && packet && packet->stream_index == audioStream) ||
1031 ProcessAudioPacket(requested_frame);
1036 if ((!
info.
has_video && packet && packet->stream_index == videoStream) ||
1037 (!
info.
has_audio && packet && packet->stream_index == audioStream)) {
1039 if (packet->stream_index == videoStream) {
1041 }
else if (packet->stream_index == audioStream) {
1047 RemoveAVPacket(packet);
1057 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ReadStream (force EOF)",
"packets_read", packet_status.
packets_read(),
"packets_decoded", packet_status.
packets_decoded(),
"packets_eof", packet_status.
packets_eof,
"video_eof", packet_status.
video_eof,
"audio_eof", packet_status.
audio_eof,
"end_of_file", packet_status.
end_of_file);
1074 "largest_frame_processed", largest_frame_processed,
1075 "Working Cache Count", working_cache.
Count());
1084 CheckWorkingFrames(requested_frame);
1100 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1103 if (!frame->has_image_data) {
1108 frame->AddAudioSilence(samples_in_frame);
1113 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1115 f->AddAudioSilence(samples_in_frame);
1123int FFmpegReader::GetNextPacket() {
1124 int found_packet = 0;
1125 AVPacket *next_packet;
1126 next_packet =
new AVPacket();
1127 found_packet = av_read_frame(pFormatCtx, next_packet);
1131 RemoveAVPacket(packet);
1134 if (found_packet >= 0) {
1136 packet = next_packet;
1139 if (packet->stream_index == videoStream) {
1141 }
else if (packet->stream_index == audioStream) {
1150 return found_packet;
1154bool FFmpegReader::GetAVFrame() {
1155 int frameFinished = 0;
1161 int send_packet_err = 0;
1162 int64_t send_packet_pts = 0;
1163 if ((packet && packet->stream_index == videoStream) || !packet) {
1164 send_packet_err = avcodec_send_packet(pCodecCtx, packet);
1166 if (packet && send_packet_err >= 0) {
1167 send_packet_pts = GetPacketPTS();
1168 hold_packet =
false;
1178 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1179 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: Not sent [" + av_err2string(send_packet_err) +
"])",
"send_packet_err", send_packet_err,
"send_packet_pts", send_packet_pts);
1180 if (send_packet_err == AVERROR(EAGAIN)) {
1182 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EAGAIN): user must read output with avcodec_receive_frame()",
"send_packet_pts", send_packet_pts);
1184 if (send_packet_err == AVERROR(EINVAL)) {
1185 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush",
"send_packet_pts", send_packet_pts);
1187 if (send_packet_err == AVERROR(ENOMEM)) {
1188 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(ENOMEM): failed to add packet to internal queue, or legitimate decoding errors",
"send_packet_pts", send_packet_pts);
1195 int receive_frame_err = 0;
1196 AVFrame *next_frame2;
1204 next_frame2 = next_frame;
1207 while (receive_frame_err >= 0) {
1208 receive_frame_err = avcodec_receive_frame(pCodecCtx, next_frame2);
1210 if (receive_frame_err != 0) {
1211 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (receive frame: frame not ready yet from decoder [\" + av_err2string(receive_frame_err) + \"])",
"receive_frame_err", receive_frame_err,
"send_packet_pts", send_packet_pts);
1213 if (receive_frame_err == AVERROR_EOF) {
1215 "FFmpegReader::GetAVFrame (receive frame: AVERROR_EOF: EOF detected from decoder, flushing buffers)",
"send_packet_pts", send_packet_pts);
1216 avcodec_flush_buffers(pCodecCtx);
1219 if (receive_frame_err == AVERROR(EINVAL)) {
1221 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EINVAL): invalid frame received, flushing buffers)",
"send_packet_pts", send_packet_pts);
1222 avcodec_flush_buffers(pCodecCtx);
1224 if (receive_frame_err == AVERROR(EAGAIN)) {
1226 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EAGAIN): output is not available in this state - user must try to send new input)",
"send_packet_pts", send_packet_pts);
1228 if (receive_frame_err == AVERROR_INPUT_CHANGED) {
1230 "FFmpegReader::GetAVFrame (receive frame: AVERROR_INPUT_CHANGED: current decoded frame has changed parameters with respect to first decoded frame)",
"send_packet_pts", send_packet_pts);
1241 if (next_frame2->format == hw_de_av_pix_fmt) {
1242 next_frame->format = AV_PIX_FMT_YUV420P;
1243 if ((err = av_hwframe_transfer_data(next_frame,next_frame2,0)) < 0) {
1246 if ((err = av_frame_copy_props(next_frame,next_frame2)) < 0) {
1254 next_frame = next_frame2;
1262 av_image_alloc(pFrame->data, pFrame->linesize,
info.
width,
info.
height, (AVPixelFormat)(pStream->codecpar->format), 1);
1263 av_image_copy(pFrame->data, pFrame->linesize, (
const uint8_t**)next_frame->data, next_frame->linesize,
1270 if (next_frame->pts != AV_NOPTS_VALUE) {
1273 video_pts = next_frame->pts;
1274 }
else if (next_frame->pkt_dts != AV_NOPTS_VALUE) {
1276 video_pts = next_frame->pkt_dts;
1280 "FFmpegReader::GetAVFrame (Successful frame received)",
"video_pts", video_pts,
"send_packet_pts", send_packet_pts);
1291 avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet);
1297 if (frameFinished) {
1301 av_picture_copy((AVPicture *) pFrame, (AVPicture *) next_frame, pCodecCtx->pix_fmt,
info.
width,
1310 return frameFinished;
1314bool FFmpegReader::CheckSeek(
bool is_video) {
1319 if ((is_video_seek && !seek_video_frame_found) || (!is_video_seek && !seek_audio_frame_found))
1327 int64_t max_seeked_frame = std::max(seek_audio_frame_found, seek_video_frame_found);
1330 if (max_seeked_frame >= seeking_frame) {
1333 "is_video_seek", is_video_seek,
1334 "max_seeked_frame", max_seeked_frame,
1335 "seeking_frame", seeking_frame,
1336 "seeking_pts", seeking_pts,
1337 "seek_video_frame_found", seek_video_frame_found,
1338 "seek_audio_frame_found", seek_audio_frame_found);
1341 Seek(seeking_frame - (10 * seek_count * seek_count));
1345 "is_video_seek", is_video_seek,
1346 "packet->pts", GetPacketPTS(),
1347 "seeking_pts", seeking_pts,
1348 "seeking_frame", seeking_frame,
1349 "seek_video_frame_found", seek_video_frame_found,
1350 "seek_audio_frame_found", seek_audio_frame_found);
1364void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
1367 int frame_finished = GetAVFrame();
1370 if (!frame_finished) {
1373 RemoveAVFrame(pFrame);
1379 int64_t current_frame = ConvertVideoPTStoFrame(video_pts);
1382 if (!seek_video_frame_found && is_seeking)
1383 seek_video_frame_found = current_frame;
1389 working_cache.
Add(CreateFrame(requested_frame));
1401 AVFrame *pFrameRGB =
nullptr;
1402 uint8_t *buffer =
nullptr;
1406 if (pFrameRGB ==
nullptr)
1428 max_width = std::max(
float(max_width), max_width * max_scale_x);
1429 max_height = std::max(
float(max_height), max_height * max_scale_y);
1435 QSize width_size(max_width * max_scale_x,
1438 max_height * max_scale_y);
1440 if (width_size.width() >= max_width && width_size.height() >= max_height) {
1441 max_width = std::max(max_width, width_size.width());
1442 max_height = std::max(max_height, width_size.height());
1444 max_width = std::max(max_width, height_size.width());
1445 max_height = std::max(max_height, height_size.height());
1452 float preview_ratio = 1.0;
1459 max_width =
info.
width * max_scale_x * preview_ratio;
1460 max_height =
info.
height * max_scale_y * preview_ratio;
1465 int original_height = height;
1466 if (max_width != 0 && max_height != 0 && max_width < width && max_height < height) {
1468 float ratio = float(width) / float(height);
1469 int possible_width = round(max_height * ratio);
1470 int possible_height = round(max_width / ratio);
1472 if (possible_width <= max_width) {
1474 width = possible_width;
1475 height = max_height;
1479 height = possible_height;
1484 const int bytes_per_pixel = 4;
1485 int buffer_size = (width * height * bytes_per_pixel) + 128;
1486 buffer =
new unsigned char[buffer_size]();
1491 int scale_mode = SWS_FAST_BILINEAR;
1493 scale_mode = SWS_BICUBIC;
1499 sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0,
1500 original_height, pFrameRGB->data, pFrameRGB->linesize);
1503 std::shared_ptr<Frame> f = CreateFrame(current_frame);
1508 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888_Premultiplied, buffer);
1511 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888, buffer);
1515 working_cache.
Add(f);
1518 last_video_frame = f;
1524 RemoveAVFrame(pFrame);
1525 sws_freeContext(img_convert_ctx);
1531 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ProcessVideoPacket (After)",
"requested_frame", requested_frame,
"current_frame", current_frame,
"f->number", f->number,
"video_pts_seconds", video_pts_seconds);
1535void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
1538 if (packet && packet->pts != AV_NOPTS_VALUE) {
1540 location = GetAudioPTSLocation(packet->pts);
1543 if (!seek_audio_frame_found && is_seeking)
1544 seek_audio_frame_found = location.
frame;
1551 working_cache.
Add(CreateFrame(requested_frame));
1555 "requested_frame", requested_frame,
1556 "target_frame", location.
frame,
1560 int frame_finished = 0;
1564 int packet_samples = 0;
1568 int send_packet_err = avcodec_send_packet(aCodecCtx, packet);
1569 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1573 int receive_frame_err = avcodec_receive_frame(aCodecCtx, audio_frame);
1574 if (receive_frame_err >= 0) {
1577 if (receive_frame_err == AVERROR_EOF) {
1581 if (receive_frame_err == AVERROR(EINVAL) || receive_frame_err == AVERROR_EOF) {
1583 avcodec_flush_buffers(aCodecCtx);
1585 if (receive_frame_err != 0) {
1590 int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, packet);
1593 if (frame_finished) {
1599 audio_pts = audio_frame->pts;
1602 location = GetAudioPTSLocation(audio_pts);
1605 int plane_size = -1;
1611 data_size = av_samples_get_buffer_size(&plane_size, nb_channels,
1615 packet_samples = audio_frame->nb_samples * nb_channels;
1624 int pts_remaining_samples = packet_samples /
info.
channels;
1627 if (pts_remaining_samples == 0) {
1629 "packet_samples", packet_samples,
1631 "pts_remaining_samples", pts_remaining_samples);
1635 while (pts_remaining_samples) {
1640 int samples = samples_per_frame - previous_packet_location.
sample_start;
1641 if (samples > pts_remaining_samples)
1642 samples = pts_remaining_samples;
1645 pts_remaining_samples -= samples;
1647 if (pts_remaining_samples > 0) {
1649 previous_packet_location.
frame++;
1658 "packet_samples", packet_samples,
1666 audio_converted->nb_samples = audio_frame->nb_samples;
1667 av_samples_alloc(audio_converted->data, audio_converted->linesize,
info.
channels, audio_frame->nb_samples, AV_SAMPLE_FMT_FLTP, 0);
1683 av_opt_set_int(avr,
"out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
1690 audio_converted->data,
1691 audio_converted->linesize[0],
1692 audio_converted->nb_samples,
1694 audio_frame->linesize[0],
1695 audio_frame->nb_samples);
1702 int64_t starting_frame_number = -1;
1703 for (
int channel_filter = 0; channel_filter <
info.
channels; channel_filter++) {
1705 starting_frame_number = location.
frame;
1706 int channel_buffer_size = nb_samples;
1707 auto *channel_buffer = (
float *) (audio_converted->data[channel_filter]);
1711 int remaining_samples = channel_buffer_size;
1712 while (remaining_samples > 0) {
1717 int samples = std::fmin(samples_per_frame - start, remaining_samples);
1720 std::shared_ptr<Frame> f = CreateFrame(starting_frame_number);
1723 f->AddAudio(
true, channel_filter, start, channel_buffer, samples, 1.0f);
1727 "frame", starting_frame_number,
1730 "channel", channel_filter,
1731 "samples_per_frame", samples_per_frame);
1734 working_cache.
Add(f);
1737 remaining_samples -= samples;
1740 if (remaining_samples > 0)
1741 channel_buffer += samples;
1744 starting_frame_number++;
1752 av_free(audio_converted->data[0]);
1761 "requested_frame", requested_frame,
1762 "starting_frame", location.
frame,
1763 "end_frame", starting_frame_number - 1,
1764 "audio_pts_seconds", audio_pts_seconds);
1770void FFmpegReader::Seek(int64_t requested_frame) {
1772 if (requested_frame < 1)
1773 requested_frame = 1;
1776 if (requested_frame > largest_frame_processed && packet_status.
end_of_file) {
1783 "requested_frame", requested_frame,
1784 "seek_count", seek_count,
1785 "last_frame", last_frame);
1788 working_cache.
Clear();
1792 video_pts_seconds = NO_PTS_OFFSET;
1794 audio_pts_seconds = NO_PTS_OFFSET;
1795 hold_packet =
false;
1797 current_video_frame = 0;
1798 largest_frame_processed = 0;
1803 packet_status.
reset(
false);
1809 int buffer_amount = std::max(max_concurrent_frames, 8);
1810 if (requested_frame - buffer_amount < 20) {
1824 if (seek_count == 1) {
1827 seeking_pts = ConvertFrameToVideoPTS(1);
1829 seek_audio_frame_found = 0;
1830 seek_video_frame_found = 0;
1834 bool seek_worked =
false;
1835 int64_t seek_target = 0;
1839 seek_target = ConvertFrameToVideoPTS(requested_frame - buffer_amount);
1841 fprintf(stderr,
"%s: error while seeking video stream\n", pFormatCtx->AV_FILENAME);
1844 is_video_seek =
true;
1851 seek_target = ConvertFrameToAudioPTS(requested_frame - buffer_amount);
1853 fprintf(stderr,
"%s: error while seeking audio stream\n", pFormatCtx->AV_FILENAME);
1856 is_video_seek =
false;
1865 avcodec_flush_buffers(aCodecCtx);
1869 avcodec_flush_buffers(pCodecCtx);
1872 previous_packet_location.
frame = -1;
1877 if (seek_count == 1) {
1879 seeking_pts = seek_target;
1880 seeking_frame = requested_frame;
1882 seek_audio_frame_found = 0;
1883 seek_video_frame_found = 0;
1911int64_t FFmpegReader::GetPacketPTS() {
1913 int64_t current_pts = packet->pts;
1914 if (current_pts == AV_NOPTS_VALUE && packet->dts != AV_NOPTS_VALUE)
1915 current_pts = packet->dts;
1921 return AV_NOPTS_VALUE;
1926void FFmpegReader::UpdatePTSOffset() {
1927 if (pts_offset_seconds != NO_PTS_OFFSET) {
1931 pts_offset_seconds = 0.0;
1932 double video_pts_offset_seconds = 0.0;
1933 double audio_pts_offset_seconds = 0.0;
1935 bool has_video_pts =
false;
1938 has_video_pts =
true;
1940 bool has_audio_pts =
false;
1943 has_audio_pts =
true;
1947 while (!has_video_pts || !has_audio_pts) {
1949 if (GetNextPacket() < 0)
1954 int64_t pts = GetPacketPTS();
1957 if (!has_video_pts && packet->stream_index == videoStream) {
1963 if (std::abs(video_pts_offset_seconds) <= 10.0) {
1964 has_video_pts =
true;
1967 else if (!has_audio_pts && packet->stream_index == audioStream) {
1973 if (std::abs(audio_pts_offset_seconds) <= 10.0) {
1974 has_audio_pts =
true;
1980 if (has_video_pts && has_audio_pts) {
1992 pts_offset_seconds = std::max(video_pts_offset_seconds, audio_pts_offset_seconds);
1997int64_t FFmpegReader::ConvertVideoPTStoFrame(int64_t pts) {
1999 int64_t previous_video_frame = current_video_frame;
2008 if (current_video_frame == 0)
2009 current_video_frame = frame;
2013 if (frame == previous_video_frame) {
2018 current_video_frame++;
2027int64_t FFmpegReader::ConvertFrameToVideoPTS(int64_t frame_number) {
2029 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2039int64_t FFmpegReader::ConvertFrameToAudioPTS(int64_t frame_number) {
2041 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2051AudioLocation FFmpegReader::GetAudioPTSLocation(int64_t pts) {
2059 int64_t whole_frame = int64_t(frame);
2062 double sample_start_percentage = frame - double(whole_frame);
2068 int sample_start = round(
double(samples_per_frame) * sample_start_percentage);
2071 if (whole_frame < 1)
2073 if (sample_start < 0)
2080 if (previous_packet_location.
frame != -1) {
2081 if (location.
is_near(previous_packet_location, samples_per_frame, samples_per_frame)) {
2082 int64_t orig_frame = location.
frame;
2087 location.
frame = previous_packet_location.
frame;
2090 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAudioPTSLocation (Audio Gap Detected)",
"Source Frame", orig_frame,
"Source Audio Sample", orig_start,
"Target Frame", location.
frame,
"Target Audio Sample", location.
sample_start,
"pts", pts);
2099 previous_packet_location = location;
2106std::shared_ptr<Frame> FFmpegReader::CreateFrame(int64_t requested_frame) {
2108 std::shared_ptr<Frame> output = working_cache.
GetFrame(requested_frame);
2112 output = working_cache.
GetFrame(requested_frame);
2113 if(output)
return output;
2121 working_cache.
Add(output);
2124 if (requested_frame > largest_frame_processed)
2125 largest_frame_processed = requested_frame;
2132bool FFmpegReader::IsPartialFrame(int64_t requested_frame) {
2135 bool seek_trash =
false;
2136 int64_t max_seeked_frame = seek_audio_frame_found;
2137 if (seek_video_frame_found > max_seeked_frame) {
2138 max_seeked_frame = seek_video_frame_found;
2140 if ((
info.
has_audio && seek_audio_frame_found && max_seeked_frame >= requested_frame) ||
2141 (
info.
has_video && seek_video_frame_found && max_seeked_frame >= requested_frame)) {
2149void FFmpegReader::CheckWorkingFrames(int64_t requested_frame) {
2152 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
2155 std::vector<std::shared_ptr<openshot::Frame>> working_frames = working_cache.
GetFrames();
2156 std::vector<std::shared_ptr<openshot::Frame>>::iterator working_itr;
2159 for(working_itr = working_frames.begin(); working_itr != working_frames.end(); ++working_itr)
2162 std::shared_ptr<Frame> f = *working_itr;
2165 if (!f || f->number > requested_frame) {
2171 double frame_pts_seconds = (double(f->number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2172 double recent_pts_seconds = std::max(video_pts_seconds, audio_pts_seconds);
2175 bool is_video_ready =
false;
2176 bool is_audio_ready =
false;
2177 double recent_pts_diff = recent_pts_seconds - frame_pts_seconds;
2178 if ((frame_pts_seconds <= video_pts_seconds)
2179 || (recent_pts_diff > 1.5)
2183 is_video_ready =
true;
2185 "frame_number", f->number,
2186 "frame_pts_seconds", frame_pts_seconds,
2187 "video_pts_seconds", video_pts_seconds,
2188 "recent_pts_diff", recent_pts_diff);
2192 for (int64_t previous_frame = requested_frame - 1; previous_frame > 0; previous_frame--) {
2194 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2196 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2201 if (last_video_frame && !f->has_image_data) {
2203 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2204 }
else if (!f->has_image_data) {
2205 f->AddColor(
"#000000");
2210 double audio_pts_diff = audio_pts_seconds - frame_pts_seconds;
2211 if ((frame_pts_seconds < audio_pts_seconds && audio_pts_diff > 1.0)
2212 || (recent_pts_diff > 1.5)
2217 is_audio_ready =
true;
2219 "frame_number", f->number,
2220 "frame_pts_seconds", frame_pts_seconds,
2221 "audio_pts_seconds", audio_pts_seconds,
2222 "audio_pts_diff", audio_pts_diff,
2223 "recent_pts_diff", recent_pts_diff);
2225 bool is_seek_trash = IsPartialFrame(f->number);
2233 "frame_number", f->number,
2234 "is_video_ready", is_video_ready,
2235 "is_audio_ready", is_audio_ready,
2241 if ((!packet_status.
end_of_file && is_video_ready && is_audio_ready) || packet_status.
end_of_file || is_seek_trash) {
2244 "requested_frame", requested_frame,
2245 "f->number", f->number,
2246 "is_seek_trash", is_seek_trash,
2247 "Working Cache Count", working_cache.
Count(),
2251 if (!is_seek_trash) {
2256 working_cache.
Remove(f->number);
2259 last_frame = f->number;
2262 working_cache.
Remove(f->number);
2269 working_frames.clear();
2270 working_frames.shrink_to_fit();
2274void FFmpegReader::CheckFPS() {
2282 int frames_per_second[3] = {0,0,0};
2283 int max_fps_index =
sizeof(frames_per_second) /
sizeof(frames_per_second[0]);
2286 int all_frames_detected = 0;
2287 int starting_frames_detected = 0;
2292 if (GetNextPacket() < 0)
2297 if (packet->stream_index == videoStream) {
2300 fps_index = int(video_seconds);
2303 if (fps_index >= 0 && fps_index < max_fps_index) {
2305 starting_frames_detected++;
2306 frames_per_second[fps_index]++;
2310 all_frames_detected++;
2315 float avg_fps = 30.0;
2316 if (starting_frames_detected > 0 && fps_index > 0) {
2317 avg_fps = float(starting_frames_detected) / std::min(fps_index, max_fps_index);
2321 if (avg_fps < 8.0) {
2330 if (all_frames_detected > 0) {
2344void FFmpegReader::RemoveAVFrame(AVFrame *remove_frame) {
2348 av_freep(&remove_frame->data[0]);
2356void FFmpegReader::RemoveAVPacket(AVPacket *remove_packet) {
2361 delete remove_packet;
2376 root[
"type"] =
"FFmpegReader";
2377 root[
"path"] =
path;
2392 catch (
const std::exception& e) {
2394 throw InvalidJSON(
"JSON is invalid (missing keys or invalid data types)");
2405 if (!root[
"path"].isNull())
2406 path = root[
"path"].asString();
Header file for all Exception classes.
AVPixelFormat hw_de_av_pix_fmt_global
AVHWDeviceType hw_de_av_device_type_global
Header file for FFmpegReader class.
Header file for FFmpegUtilities.
#define AV_FREE_CONTEXT(av_context)
#define AV_FREE_FRAME(av_frame)
#define SWR_CONVERT(ctx, out, linesize, out_count, in, linesize2, in_count)
#define AV_GET_CODEC_TYPE(av_stream)
#define AV_GET_CODEC_PIXEL_FORMAT(av_stream, av_context)
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec)
#define AV_FIND_DECODER_CODEC_ID(av_stream)
#define AV_ALLOCATE_FRAME()
#define AV_COPY_PICTURE_DATA(av_frame, buffer, pix_fmt, width, height)
#define AV_FREE_PACKET(av_packet)
#define AVCODEC_REGISTER_ALL
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context)
#define AV_GET_SAMPLE_FORMAT(av_stream, av_context)
#define AV_RESET_FRAME(av_frame)
#define FF_NUM_PROCESSORS
#define OPEN_MP_NUM_PROCESSORS
Header file for Timeline class.
Header file for ZeroMQ-based Logger class.
void SetMaxBytesFromInfo(int64_t number_of_frames, int width, int height, int sample_rate, int channels)
Set maximum bytes to a different amount based on a ReaderInfo struct.
int64_t Count()
Count the frames in the queue.
void Add(std::shared_ptr< openshot::Frame > frame)
Add a Frame to the cache.
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number)
Get a frame from the cache.
std::vector< std::shared_ptr< openshot::Frame > > GetFrames()
Get an array of all Frames.
void Remove(int64_t frame_number)
Remove a specific frame.
void Clear()
Clear the cache of all frames.
This class represents a clip (used to arrange readers on the timeline)
openshot::Keyframe scale_x
Curve representing the horizontal scaling in percent (0 to 1)
openshot::TimelineBase * ParentTimeline() override
Get the associated Timeline pointer (if any)
openshot::Keyframe scale_y
Curve representing the vertical scaling in percent (0 to 1)
openshot::ScaleType scale
The scale determines how a clip should be resized to fit its parent.
double Y
The Y value of the coordinate (usually representing the value of the property being animated)
void Open() override
Open File - which is called by the constructor automatically.
FFmpegReader(const std::string &path, bool inspect_reader=true)
Constructor for FFmpegReader.
Json::Value JsonValue() const override
Generate Json::Value for this object.
bool GetIsDurationKnown()
Return true if frame can be read with GetFrame()
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
CacheMemory final_cache
Final cache object used to hold final frames.
virtual ~FFmpegReader()
Destructor.
std::string Json() const override
Generate JSON string of this object.
std::shared_ptr< openshot::Frame > GetFrame(int64_t requested_frame) override
void Close() override
Close File.
void SetJson(const std::string value) override
Load JSON string into this object.
This class represents a fraction.
int num
Numerator for the fraction.
float ToFloat()
Return this fraction as a float (i.e. 1/2 = 0.5)
double ToDouble() const
Return this fraction as a double (i.e. 1/2 = 0.5)
int den
Denominator for the fraction.
int GetSamplesPerFrame(openshot::Fraction fps, int sample_rate, int channels)
Calculate the # of samples per video frame (for the current frame number)
Exception when no valid codec is found for a file.
Exception for files that can not be found or opened.
Exception for invalid JSON.
Point GetMaxPoint() const
Get max point (by Y coordinate)
Exception when no streams are found in the file.
Exception when memory could not be allocated.
Coordinate co
This is the primary coordinate.
openshot::ReaderInfo info
Information about the current media file.
virtual void SetJsonValue(const Json::Value root)=0
Load Json::Value into this object.
virtual Json::Value JsonValue() const =0
Generate Json::Value for this object.
std::recursive_mutex getFrameMutex
Mutex for multiple threads.
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
Exception when a reader is closed, and a frame is requested.
int DE_LIMIT_WIDTH_MAX
Maximum columns that hardware decode can handle.
int HW_DE_DEVICE_SET
Which GPU to use to decode (0 is the first)
int DE_LIMIT_HEIGHT_MAX
Maximum rows that hardware decode can handle.
static Settings * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
int HARDWARE_DECODER
Use video codec for faster video decoding (if supported)
int preview_height
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
int preview_width
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
This class represents a timeline.
void AppendDebugMethod(std::string method_name, std::string arg1_name="", float arg1_value=-1.0, std::string arg2_name="", float arg2_value=-1.0, std::string arg3_name="", float arg3_value=-1.0, std::string arg4_name="", float arg4_value=-1.0, std::string arg5_name="", float arg5_value=-1.0, std::string arg6_name="", float arg6_value=-1.0)
Append debug information.
static ZmqLogger * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
This namespace is the default namespace for all code in the openshot library.
@ SCALE_FIT
Scale the clip until either height or width fills the canvas (with no cropping)
@ SCALE_STRETCH
Scale the clip until both height and width fill the canvas (distort to fit)
@ SCALE_CROP
Scale the clip until both height and width fill the canvas (cropping the overlap)
ChannelLayout
This enumeration determines the audio channel layout (such as stereo, mono, 5 point surround,...
const Json::Value stringToJson(const std::string value)
This struct holds the associated video frame and starting sample # for an audio packet.
bool is_near(AudioLocation location, int samples_per_frame, int64_t amount)
int64_t packets_decoded()
int audio_bit_rate
The bit rate of the audio stream (in bytes)
int video_bit_rate
The bit rate of the video stream (in bytes)
bool has_single_image
Determines if this file only contains a single image.
float duration
Length of time (in seconds)
openshot::Fraction audio_timebase
The audio timebase determines how long each audio packet should be played.
int width
The width of the video (in pixesl)
int channels
The number of audio channels used in the audio stream.
openshot::Fraction fps
Frames per second, as a fraction (i.e. 24/1 = 24 fps)
openshot::Fraction display_ratio
The ratio of width to height of the video stream (i.e. 640x480 has a ratio of 4/3)
int height
The height of the video (in pixels)
int pixel_format
The pixel format (i.e. YUV420P, RGB24, etc...)
int64_t video_length
The number of frames in the video stream.
std::string acodec
The name of the audio codec used to encode / decode the video stream.
std::map< std::string, std::string > metadata
An optional map/dictionary of metadata for this reader.
std::string vcodec
The name of the video codec used to encode / decode the video stream.
openshot::Fraction pixel_ratio
The pixel ratio of the video stream as a fraction (i.e. some pixels are not square)
openshot::ChannelLayout channel_layout
The channel layout (mono, stereo, 5 point surround, etc...)
bool has_video
Determines if this file has a video stream.
bool has_audio
Determines if this file has an audio stream.
openshot::Fraction video_timebase
The video timebase determines how long each frame stays on the screen.
int video_stream_index
The index of the video stream.
int sample_rate
The number of audio samples per second (44100 is a common sample rate)
int audio_stream_index
The index of the audio stream.
int64_t file_size
Size of file (in bytes)