OpenShot Library | libopenshot 0.3.3
Loading...
Searching...
No Matches
ObjectDetection.cpp
Go to the documentation of this file.
1
10// Copyright (c) 2008-2019 OpenShot Studios, LLC
11//
12// SPDX-License-Identifier: LGPL-3.0-or-later
13
14#include <fstream>
15#include <iostream>
16
18#include "effects/Tracker.h"
19#include "Exceptions.h"
20#include "Timeline.h"
21#include "objdetectdata.pb.h"
22
23#include <QImage>
24#include <QPainter>
25#include <QRectF>
26#include <QString>
27#include <QStringList>
28using namespace std;
29using namespace openshot;
30
31
33ObjectDetection::ObjectDetection(std::string clipObDetectDataPath) :
34display_box_text(1.0), display_boxes(1.0)
35{
36 // Init effect properties
37 init_effect_details();
38
39 // Tries to load the tracker data from protobuf
40 LoadObjDetectdData(clipObDetectDataPath);
41
42 // Initialize the selected object index as the first object index
43 selectedObjectIndex = trackedObjects.begin()->first;
44}
45
46// Default constructor
48 display_box_text(1.0), display_boxes(1.0)
49{
50 // Init effect properties
51 init_effect_details();
52
53 // Initialize the selected object index as the first object index
54 selectedObjectIndex = trackedObjects.begin()->first;
55}
56
57// Init effect settings
58void ObjectDetection::init_effect_details()
59{
62
64 info.class_name = "ObjectDetection";
65 info.name = "Object Detector";
66 info.description = "Detect objects through the video.";
67 info.has_audio = false;
68 info.has_video = true;
70}
71
72// This method is required for all derived classes of EffectBase, and returns a
73// modified openshot::Frame object
74std::shared_ptr<Frame> ObjectDetection::GetFrame(std::shared_ptr<Frame> frame, int64_t frame_number) {
75 // Get the frame's QImage
76 std::shared_ptr<QImage> frame_image = frame->GetImage();
77
78 // Check if frame isn't NULL
79 if(!frame_image || frame_image->isNull()) {
80 return frame;
81 }
82
83 QPainter painter(frame_image.get());
84 painter.setRenderHints(QPainter::Antialiasing | QPainter::SmoothPixmapTransform);
85
86 if (detectionsData.find(frame_number) != detectionsData.end()) {
87 DetectionData detections = detectionsData[frame_number];
88 for (int i = 0; i < detections.boxes.size(); i++) {
89 if (detections.confidences.at(i) < confidence_threshold ||
90 (!display_classes.empty() &&
91 std::find(display_classes.begin(), display_classes.end(), classNames[detections.classIds.at(i)]) == display_classes.end())) {
92 continue;
93 }
94
95 int objectId = detections.objectIds.at(i);
96 auto trackedObject_it = trackedObjects.find(objectId);
97
98 if (trackedObject_it != trackedObjects.end()) {
99 std::shared_ptr<TrackedObjectBBox> trackedObject = std::static_pointer_cast<TrackedObjectBBox>(trackedObject_it->second);
100
101 Clip* parentClip = (Clip*) trackedObject->ParentClip();
102 if (parentClip && trackedObject->Contains(frame_number) && trackedObject->visible.GetValue(frame_number) == 1) {
103 BBox trackedBox = trackedObject->GetBox(frame_number);
104 QRectF boxRect((trackedBox.cx - trackedBox.width / 2) * frame_image->width(),
105 (trackedBox.cy - trackedBox.height / 2) * frame_image->height(),
106 trackedBox.width * frame_image->width(),
107 trackedBox.height * frame_image->height());
108
109 // Get properties of tracked object (i.e. colors, stroke width, etc...)
110 std::vector<int> stroke_rgba = trackedObject->stroke.GetColorRGBA(frame_number);
111 std::vector<int> bg_rgba = trackedObject->background.GetColorRGBA(frame_number);
112 int stroke_width = trackedObject->stroke_width.GetValue(frame_number);
113 float stroke_alpha = trackedObject->stroke_alpha.GetValue(frame_number);
114 float bg_alpha = trackedObject->background_alpha.GetValue(frame_number);
115 float bg_corner = trackedObject->background_corner.GetValue(frame_number);
116
117 // Set the pen for the border
118 QPen pen(QColor(stroke_rgba[0], stroke_rgba[1], stroke_rgba[2], 255 * stroke_alpha));
119 pen.setWidth(stroke_width);
120 painter.setPen(pen);
121
122 // Set the brush for the background
123 QBrush brush(QColor(bg_rgba[0], bg_rgba[1], bg_rgba[2], 255 * bg_alpha));
124 painter.setBrush(brush);
125
126 if (display_boxes.GetValue(frame_number) == 1 && trackedObject->draw_box.GetValue(frame_number) == 1) {
127 // Only draw boxes if both properties are set to YES (draw all boxes, and draw box of the selected box)
128 painter.drawRoundedRect(boxRect, bg_corner, bg_corner);
129 }
130
131 if(display_box_text.GetValue(frame_number) == 1) {
132 // Draw text label above bounding box
133 // Get the confidence and classId for the current detection
134 int classId = detections.classIds.at(i);
135
136 // Get the label for the class name and its confidence
137 QString label = QString::number(objectId);
138 if (!classNames.empty()) {
139 label = QString::fromStdString(classNames[classId]) + ":" + label;
140 }
141
142 // Set up the painter, font, and pen
143 QFont font;
144 font.setPixelSize(14);
145 painter.setFont(font);
146
147 // Calculate the size of the text
148 QFontMetrics fontMetrics(font);
149 QSize labelSize = fontMetrics.size(Qt::TextSingleLine, label);
150
151 // Define the top left point of the rectangle
152 double left = boxRect.center().x() - (labelSize.width() / 2.0);
153 double top = std::max(static_cast<int>(boxRect.top()), labelSize.height()) - 4.0;
154
155 // Draw the text
156 painter.drawText(QPointF(left, top), label);
157 }
158 }
159 }
160 }
161 }
162
163 painter.end();
164
165 // The frame's QImage has been modified in place, so we just return the original frame
166 return frame;
167}
168
169// Load protobuf data file
170bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
171 // Create tracker message
172 pb_objdetect::ObjDetect objMessage;
173
174 // Read the existing tracker message.
175 std::fstream input(inputFilePath, std::ios::in | std::ios::binary);
176 if (!objMessage.ParseFromIstream(&input)) {
177 std::cerr << "Failed to parse protobuf message." << std::endl;
178 return false;
179 }
180
181 // Make sure classNames, detectionsData and trackedObjects are empty
182 classNames.clear();
183 detectionsData.clear();
184 trackedObjects.clear();
185
186 // Seed to generate same random numbers
187 std::srand(1);
188 // Get all classes names and assign a color to them
189 for(int i = 0; i < objMessage.classnames_size(); i++)
190 {
191 classNames.push_back(objMessage.classnames(i));
192 classesColor.push_back(cv::Scalar(std::rand()%205 + 50, std::rand()%205 + 50, std::rand()%205 + 50));
193 }
194
195 // Iterate over all frames of the saved message
196 for (size_t i = 0; i < objMessage.frame_size(); i++)
197 {
198 // Create protobuf message reader
199 const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
200
201 // Get frame Id
202 size_t id = pbFrameData.id();
203
204 // Load bounding box data
205 const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
206
207 // Construct data vectors related to detections in the current frame
208 std::vector<int> classIds;
209 std::vector<float> confidences;
210 std::vector<cv::Rect_<float>> boxes;
211 std::vector<int> objectIds;
212
213 // Iterate through the detected objects
214 for(int i = 0; i < pbFrameData.bounding_box_size(); i++)
215 {
216 // Get bounding box coordinates
217 float x = pBox.Get(i).x();
218 float y = pBox.Get(i).y();
219 float w = pBox.Get(i).w();
220 float h = pBox.Get(i).h();
221 // Get class Id (which will be assign to a class name)
222 int classId = pBox.Get(i).classid();
223 // Get prediction confidence
224 float confidence = pBox.Get(i).confidence();
225
226 // Get the object Id
227 int objectId = pBox.Get(i).objectid();
228
229 // Search for the object id on trackedObjects map
230 auto trackedObject = trackedObjects.find(objectId);
231 // Check if object already exists on the map
232 if (trackedObject != trackedObjects.end())
233 {
234 // Add a new BBox to it
235 trackedObject->second->AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
236 }
237 else
238 {
239 // There is no tracked object with that id, so insert a new one
240 TrackedObjectBBox trackedObj((int)classesColor[classId](0), (int)classesColor[classId](1), (int)classesColor[classId](2), (int)0);
241 trackedObj.stroke_alpha = Keyframe(1.0);
242 trackedObj.AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
243
244 std::shared_ptr<TrackedObjectBBox> trackedObjPtr = std::make_shared<TrackedObjectBBox>(trackedObj);
245 ClipBase* parentClip = this->ParentClip();
246 trackedObjPtr->ParentClip(parentClip);
247
248 // Create a temp ID. This ID is necessary to initialize the object_id Json list
249 // this Id will be replaced by the one created in the UI
250 trackedObjPtr->Id(std::to_string(objectId));
251 trackedObjects.insert({objectId, trackedObjPtr});
252 }
253
254 // Create OpenCV rectangle with the bouding box info
255 cv::Rect_<float> box(x, y, w, h);
256
257 // Push back data into vectors
258 boxes.push_back(box);
259 classIds.push_back(classId);
260 confidences.push_back(confidence);
261 objectIds.push_back(objectId);
262 }
263
264 // Assign data to object detector map
265 detectionsData[id] = DetectionData(classIds, confidences, boxes, id, objectIds);
266 }
267
268 // Delete all global objects allocated by libprotobuf.
269 google::protobuf::ShutdownProtobufLibrary();
270
271 return true;
272}
273
274// Get the indexes and IDs of all visible objects in the given frame
275std::string ObjectDetection::GetVisibleObjects(int64_t frame_number) const{
276
277 // Initialize the JSON objects
278 Json::Value root;
279 root["visible_objects_index"] = Json::Value(Json::arrayValue);
280 root["visible_objects_id"] = Json::Value(Json::arrayValue);
281 root["visible_class_names"] = Json::Value(Json::arrayValue);
282
283 // Check if track data exists for the requested frame
284 if (detectionsData.find(frame_number) == detectionsData.end()){
285 return root.toStyledString();
286 }
287 DetectionData detections = detectionsData.at(frame_number);
288
289 // Iterate through the tracked objects
290 for(int i = 0; i<detections.boxes.size(); i++){
291 // Does not show boxes with confidence below the threshold
292 if(detections.confidences.at(i) < confidence_threshold){
293 continue;
294 }
295
296 // Get class name of tracked object
297 auto className = classNames[detections.classIds.at(i)];
298
299 // If display_classes is not empty, check if className is in it
300 if (!display_classes.empty()) {
301 auto it = std::find(display_classes.begin(), display_classes.end(), className);
302 if (it == display_classes.end()) {
303 // If not in display_classes, skip this detection
304 continue;
305 }
306 root["visible_class_names"].append(className);
307 } else {
308 // include all class names
309 root["visible_class_names"].append(className);
310 }
311
312 int objectId = detections.objectIds.at(i);
313 // Search for the object in the trackedObjects map
314 auto trackedObject = trackedObjects.find(objectId);
315
316 // Get the tracked object JSON properties for this frame
317 Json::Value trackedObjectJSON = trackedObject->second->PropertiesJSON(frame_number);
318
319 if (trackedObjectJSON["visible"]["value"].asBool() &&
320 trackedObject->second->ExactlyContains(frame_number)){
321 // Save the object's index and ID if it's visible in this frame
322 root["visible_objects_index"].append(trackedObject->first);
323 root["visible_objects_id"].append(trackedObject->second->Id());
324 }
325 }
326
327 return root.toStyledString();
328}
329
330// Generate JSON string of this object
331std::string ObjectDetection::Json() const {
332
333 // Return formatted string
334 return JsonValue().toStyledString();
335}
336
337// Generate Json::Value for this object
338Json::Value ObjectDetection::JsonValue() const {
339
340 // Create root json object
341 Json::Value root = EffectBase::JsonValue(); // get parent properties
342 root["type"] = info.class_name;
343 root["protobuf_data_path"] = protobuf_data_path;
344 root["selected_object_index"] = selectedObjectIndex;
345 root["confidence_threshold"] = confidence_threshold;
346 root["display_box_text"] = display_box_text.JsonValue();
347 root["display_boxes"] = display_boxes.JsonValue();
348
349 // Add tracked object's IDs to root
350 Json::Value objects;
351 for (auto const& trackedObject : trackedObjects){
352 Json::Value trackedObjectJSON = trackedObject.second->JsonValue();
353 // add object json
354 objects[trackedObject.second->Id()] = trackedObjectJSON;
355 }
356 root["objects"] = objects;
357
358 // return JsonValue
359 return root;
360}
361
362// Load JSON string into this object
363void ObjectDetection::SetJson(const std::string value) {
364
365 // Parse JSON string into JSON objects
366 try
367 {
368 const Json::Value root = openshot::stringToJson(value);
369 // Set all values that match
370 SetJsonValue(root);
371 }
372 catch (const std::exception& e)
373 {
374 // Error parsing JSON (or missing keys)
375 throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
376 }
377}
378
379// Load Json::Value into this object
380void ObjectDetection::SetJsonValue(const Json::Value root) {
381 // Set parent data
383
384 // Set data from Json (if key is found)
385 if (!root["protobuf_data_path"].isNull() && protobuf_data_path.size() <= 1){
386 protobuf_data_path = root["protobuf_data_path"].asString();
387
388 if(!LoadObjDetectdData(protobuf_data_path)){
389 throw InvalidFile("Invalid protobuf data path", "");
390 protobuf_data_path = "";
391 }
392 }
393
394 // Set the selected object index
395 if (!root["selected_object_index"].isNull())
396 selectedObjectIndex = root["selected_object_index"].asInt();
397
398 if (!root["confidence_threshold"].isNull())
399 confidence_threshold = root["confidence_threshold"].asFloat();
400
401 if (!root["display_box_text"].isNull())
402 display_box_text.SetJsonValue(root["display_box_text"]);
403
404 if (!root["display_boxes"].isNull())
405 display_boxes.SetJsonValue(root["display_boxes"]);
406
407 if (!root["class_filter"].isNull()) {
408 class_filter = root["class_filter"].asString();
409
410 // Convert the class_filter to a QString
411 QString qClassFilter = QString::fromStdString(root["class_filter"].asString());
412
413 // Split the QString by commas and automatically trim each resulting string
414 QStringList classList = qClassFilter.split(',', QString::SkipEmptyParts);
415 display_classes.clear();
416
417 // Iterate over the QStringList and add each trimmed, non-empty string
418 for (const QString &classItem : classList) {
419 QString trimmedItem = classItem.trimmed().toLower();
420 if (!trimmedItem.isEmpty()) {
421 display_classes.push_back(trimmedItem.toStdString());
422 }
423 }
424 }
425
426 if (!root["objects"].isNull()){
427 for (auto const& trackedObject : trackedObjects){
428 std::string obj_id = std::to_string(trackedObject.first);
429 if(!root["objects"][obj_id].isNull()){
430 trackedObject.second->SetJsonValue(root["objects"][obj_id]);
431 }
432 }
433 }
434
435 // Set the tracked object's ids
436 if (!root["objects_id"].isNull()){
437 for (auto const& trackedObject : trackedObjects){
438 Json::Value trackedObjectJSON;
439 trackedObjectJSON["box_id"] = root["objects_id"][trackedObject.first].asString();
440 trackedObject.second->SetJsonValue(trackedObjectJSON);
441 }
442 }
443}
444
445// Get all properties for a specific frame
446std::string ObjectDetection::PropertiesJSON(int64_t requested_frame) const {
447
448 // Generate JSON properties list
449 Json::Value root = BasePropertiesJSON(requested_frame);
450
451 Json::Value objects;
452 if(trackedObjects.count(selectedObjectIndex) != 0){
453 auto selectedObject = trackedObjects.at(selectedObjectIndex);
454 if (selectedObject){
455 Json::Value trackedObjectJSON = selectedObject->PropertiesJSON(requested_frame);
456 // add object json
457 objects[selectedObject->Id()] = trackedObjectJSON;
458 }
459 }
460 root["objects"] = objects;
461
462 root["selected_object_index"] = add_property_json("Selected Object", selectedObjectIndex, "int", "", NULL, 0, 200, false, requested_frame);
463 root["confidence_threshold"] = add_property_json("Confidence Theshold", confidence_threshold, "float", "", NULL, 0, 1, false, requested_frame);
464 root["class_filter"] = add_property_json("Class Filter", 0.0, "string", class_filter, NULL, -1, -1, false, requested_frame);
465
466 root["display_box_text"] = add_property_json("Draw All Text", display_box_text.GetValue(requested_frame), "int", "", &display_box_text, 0, 1, false, requested_frame);
467 root["display_box_text"]["choices"].append(add_property_choice_json("Yes", true, display_box_text.GetValue(requested_frame)));
468 root["display_box_text"]["choices"].append(add_property_choice_json("No", false, display_box_text.GetValue(requested_frame)));
469
470 root["display_boxes"] = add_property_json("Draw All Boxes", display_boxes.GetValue(requested_frame), "int", "", &display_boxes, 0, 1, false, requested_frame);
471 root["display_boxes"]["choices"].append(add_property_choice_json("Yes", true, display_boxes.GetValue(requested_frame)));
472 root["display_boxes"]["choices"].append(add_property_choice_json("No", false, display_boxes.GetValue(requested_frame)));
473
474 // Return formatted string
475 return root.toStyledString();
476}
Header file for all Exception classes.
Header file for Object Detection effect class.
Header file for Timeline class.
Header file for Tracker effect class.
This abstract class is the base class, used by all clips in libopenshot.
Definition ClipBase.h:33
Json::Value add_property_choice_json(std::string name, int value, int selected_value) const
Generate JSON choice for a property (dropdown properties)
Definition ClipBase.cpp:132
std::string id
ID Property for all derived Clip and Effect classes.
Definition ClipBase.h:35
Json::Value add_property_json(std::string name, float value, std::string type, std::string memo, const Keyframe *keyframe, float min_value, float max_value, bool readonly, int64_t requested_frame) const
Generate JSON for a property.
Definition ClipBase.cpp:96
This class represents a clip (used to arrange readers on the timeline)
Definition Clip.h:89
virtual Json::Value JsonValue() const
Generate Json::Value for this object.
openshot::ClipBase * ParentClip()
Parent clip object of this effect (which can be unparented and NULL)
Json::Value BasePropertiesJSON(int64_t requested_frame) const
Generate JSON object of base properties (recommended to be used by all effects)
virtual void SetJsonValue(const Json::Value root)
Load Json::Value into this object.
EffectInfoStruct info
Information about the current effect.
Definition EffectBase.h:69
std::map< int, std::shared_ptr< openshot::TrackedObjectBase > > trackedObjects
Map of Tracked Object's by their indices (used by Effects that track objects on clips)
Definition EffectBase.h:66
Exception for files that can not be found or opened.
Definition Exceptions.h:188
Exception for invalid JSON.
Definition Exceptions.h:218
A Keyframe is a collection of Point instances, which is used to vary a number or property over time.
Definition KeyFrame.h:53
void SetJsonValue(const Json::Value root)
Load Json::Value into this object.
Definition KeyFrame.cpp:372
double GetValue(int64_t index) const
Get the value at a specific index.
Definition KeyFrame.cpp:258
Json::Value JsonValue() const
Generate Json::Value for this object.
Definition KeyFrame.cpp:339
Json::Value JsonValue() const override
Generate Json::Value for this object.
int selectedObjectIndex
Index of the Tracked Object that was selected to modify it's properties.
std::shared_ptr< Frame > GetFrame(std::shared_ptr< Frame > frame, int64_t frame_number) override
This method is required for all derived classes of EffectBase, and returns a modified openshot::Frame...
ObjectDetection()
Default constructor.
bool LoadObjDetectdData(std::string inputFilePath)
Load protobuf data file.
std::string GetVisibleObjects(int64_t frame_number) const override
Get the indexes and IDs of all visible objects in the given frame.
std::string Json() const override
Generate JSON string of this object.
std::string PropertiesJSON(int64_t requested_frame) const override
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
void SetJson(const std::string value) override
Load JSON string into this object.
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
This class contains the properties of a tracked object and functions to manipulate it.
void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) override
Add a BBox to the BoxVec map.
Keyframe stroke_alpha
Stroke box opacity.
This namespace is the default namespace for all code in the openshot library.
Definition Compressor.h:29
const Json::Value stringToJson(const std::string value)
Definition Json.cpp:16
std::vector< cv::Rect_< float > > boxes
std::vector< float > confidences
std::vector< int > classIds
std::vector< int > objectIds
This struct holds the information of a bounding-box.
float cy
y-coordinate of the bounding box center
float height
bounding box height
float cx
x-coordinate of the bounding box center
float width
bounding box width
bool has_video
Determines if this effect manipulates the image of a frame.
Definition EffectBase.h:40
bool has_audio
Determines if this effect manipulates the audio of a frame.
Definition EffectBase.h:41
std::string class_name
The class name of the effect.
Definition EffectBase.h:36
std::string name
The name of the effect.
Definition EffectBase.h:37
std::string description
The description of this effect and what it does.
Definition EffectBase.h:38
bool has_tracked_object
Determines if this effect track objects through the clip.
Definition EffectBase.h:42