OpenShot Library | libopenshot 0.3.3
Loading...
Searching...
No Matches
CVObjectDetection.cpp
Go to the documentation of this file.
1
10// Copyright (c) 2008-2019 OpenShot Studios, LLC
11//
12// SPDX-License-Identifier: LGPL-3.0-or-later
13
14#include <fstream>
15#include <iomanip>
16#include <iostream>
17
18#include "CVObjectDetection.h"
19#include "Exceptions.h"
20
21#include "objdetectdata.pb.h"
22#include <google/protobuf/util/time_util.h>
23
24using namespace std;
25using namespace openshot;
26using google::protobuf::util::TimeUtil;
27
28CVObjectDetection::CVObjectDetection(std::string processInfoJson, ProcessingController &processingController)
29: processingController(&processingController), processingDevice("CPU"){
30 SetJson(processInfoJson);
31 confThreshold = 0.5;
32 nmsThreshold = 0.1;
33}
34
35void CVObjectDetection::setProcessingDevice(){
36 if(processingDevice == "GPU"){
37 net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
38 net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
39 }
40 else if(processingDevice == "CPU"){
41 net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
42 net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
43 }
44}
45
46void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start, size_t _end, bool process_interval)
47{
48
49 start = _start; end = _end;
50
51 video.Open();
52
53 if(error){
54 return;
55 }
56
57 processingController->SetError(false, "");
58
59 // Load names of classes
60 std::ifstream ifs(classesFile.c_str());
61 std::string line;
62 while (std::getline(ifs, line)) classNames.push_back(line);
63
64 // Load the network
65 if(classesFile == "" || modelConfiguration == "" || modelWeights == "")
66 return;
67 net = cv::dnn::readNetFromDarknet(modelConfiguration, modelWeights);
68 setProcessingDevice();
69
70 size_t frame_number;
71 if(!process_interval || end <= 1 || end-start == 0){
72 // Get total number of frames in video
73 start = (int)(video.Start() * video.Reader()->info.fps.ToFloat());
74 end = (int)(video.End() * video.Reader()->info.fps.ToFloat());
75 }
76
77 for (frame_number = start; frame_number <= end; frame_number++)
78 {
79 // Stop the feature tracker process
80 if(processingController->ShouldStop()){
81 return;
82 }
83
84 std::shared_ptr<openshot::Frame> f = video.GetFrame(frame_number);
85
86 // Grab OpenCV Mat image
87 cv::Mat cvimage = f->GetImageCV();
88
89 DetectObjects(cvimage, frame_number);
90
91 // Update progress
92 processingController->SetProgress(uint(100*(frame_number-start)/(end-start)));
93
94 }
95}
96
97void CVObjectDetection::DetectObjects(const cv::Mat &frame, size_t frameId){
98 // Get frame as OpenCV Mat
99 cv::Mat blob;
100
101 // Create a 4D blob from the frame.
102 int inpWidth, inpHeight;
103 inpWidth = inpHeight = 416;
104
105 cv::dnn::blobFromImage(frame, blob, 1/255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0,0,0), true, false);
106
107 //Sets the input to the network
108 net.setInput(blob);
109
110 // Runs the forward pass to get output of the output layers
111 std::vector<cv::Mat> outs;
112 net.forward(outs, getOutputsNames(net));
113
114 // Remove the bounding boxes with low confidence
115 postprocess(frame.size(), outs, frameId);
116
117}
118
119
120// Remove the bounding boxes with low confidence using non-maxima suppression
121void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector<cv::Mat>& outs, size_t frameId)
122{
123 std::vector<int> classIds;
124 std::vector<float> confidences;
125 std::vector<cv::Rect> boxes;
126 std::vector<int> objectIds;
127
128 for (size_t i = 0; i < outs.size(); ++i)
129 {
130 // Scan through all the bounding boxes output from the network and keep only the
131 // ones with high confidence scores. Assign the box's class label as the class
132 // with the highest score for the box.
133 float* data = (float*)outs[i].data;
134 for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
135 {
136 cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
137 cv::Point classIdPoint;
138 double confidence;
139 // Get the value and location of the maximum score
140 cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
141 if (confidence > confThreshold)
142 {
143 int centerX = (int)(data[0] * frameDims.width);
144 int centerY = (int)(data[1] * frameDims.height);
145 int width = (int)(data[2] * frameDims.width);
146 int height = (int)(data[3] * frameDims.height);
147 int left = centerX - width / 2;
148 int top = centerY - height / 2;
149
150 classIds.push_back(classIdPoint.x);
151 confidences.push_back((float)confidence);
152 boxes.push_back(cv::Rect(left, top, width, height));
153 }
154 }
155 }
156
157 // Perform non maximum suppression to eliminate redundant overlapping boxes with
158 // lower confidences
159 std::vector<int> indices;
160 cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
161
162 // Pass boxes to SORT algorithm
163 std::vector<cv::Rect> sortBoxes;
164 for(auto box : boxes)
165 sortBoxes.push_back(box);
166 sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds);
167
168 // Clear data vectors
169 boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear();
170 // Get SORT predicted boxes
171 for(auto TBox : sort.frameTrackingResult){
172 if(TBox.frame == frameId){
173 boxes.push_back(TBox.box);
174 confidences.push_back(TBox.confidence);
175 classIds.push_back(TBox.classId);
176 objectIds.push_back(TBox.id);
177 }
178 }
179
180 // Remove boxes based on controids distance
181 for(uint i = 0; i<boxes.size(); i++){
182 for(uint j = i+1; j<boxes.size(); j++){
183 int xc_1 = boxes[i].x + (int)(boxes[i].width/2), yc_1 = boxes[i].y + (int)(boxes[i].width/2);
184 int xc_2 = boxes[j].x + (int)(boxes[j].width/2), yc_2 = boxes[j].y + (int)(boxes[j].width/2);
185
186 if(fabs(xc_1 - xc_2) < 10 && fabs(yc_1 - yc_2) < 10){
187 if(classIds[i] == classIds[j]){
188 if(confidences[i] >= confidences[j]){
189 boxes.erase(boxes.begin() + j);
190 classIds.erase(classIds.begin() + j);
191 confidences.erase(confidences.begin() + j);
192 objectIds.erase(objectIds.begin() + j);
193 break;
194 }
195 else{
196 boxes.erase(boxes.begin() + i);
197 classIds.erase(classIds.begin() + i);
198 confidences.erase(confidences.begin() + i);
199 objectIds.erase(objectIds.begin() + i);
200 i = 0;
201 break;
202 }
203 }
204 }
205 }
206 }
207
208 // Remove boxes based in IOU score
209 for(uint i = 0; i<boxes.size(); i++){
210 for(uint j = i+1; j<boxes.size(); j++){
211
212 if( iou(boxes[i], boxes[j])){
213 if(classIds[i] == classIds[j]){
214 if(confidences[i] >= confidences[j]){
215 boxes.erase(boxes.begin() + j);
216 classIds.erase(classIds.begin() + j);
217 confidences.erase(confidences.begin() + j);
218 objectIds.erase(objectIds.begin() + j);
219 break;
220 }
221 else{
222 boxes.erase(boxes.begin() + i);
223 classIds.erase(classIds.begin() + i);
224 confidences.erase(confidences.begin() + i);
225 objectIds.erase(objectIds.begin() + i);
226 i = 0;
227 break;
228 }
229 }
230 }
231 }
232 }
233
234 // Normalize boxes coordinates
235 std::vector<cv::Rect_<float>> normalized_boxes;
236 for(auto box : boxes){
237 cv::Rect_<float> normalized_box;
238 normalized_box.x = (box.x)/(float)frameDims.width;
239 normalized_box.y = (box.y)/(float)frameDims.height;
240 normalized_box.width = (box.width)/(float)frameDims.width;
241 normalized_box.height = (box.height)/(float)frameDims.height;
242 normalized_boxes.push_back(normalized_box);
243 }
244
245 detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId, objectIds);
246}
247
248// Compute IOU between 2 boxes
249bool CVObjectDetection::iou(cv::Rect pred_box, cv::Rect sort_box){
250 // Determine the (x, y)-coordinates of the intersection rectangle
251 int xA = std::max(pred_box.x, sort_box.x);
252 int yA = std::max(pred_box.y, sort_box.y);
253 int xB = std::min(pred_box.x + pred_box.width, sort_box.x + sort_box.width);
254 int yB = std::min(pred_box.y + pred_box.height, sort_box.y + sort_box.height);
255
256 // Compute the area of intersection rectangle
257 int interArea = std::max(0, xB - xA + 1) * std::max(0, yB - yA + 1);
258
259 // Compute the area of both the prediction and ground-truth rectangles
260 int boxAArea = (pred_box.width + 1) * (pred_box.height + 1);
261 int boxBArea = (sort_box.width + 1) * (sort_box.height + 1);
262
263 // Compute the intersection over union by taking the intersection
264 float iou = interArea / (float)(boxAArea + boxBArea - interArea);
265
266 // If IOU is above this value the boxes are very close (probably a variation of the same bounding box)
267 if(iou > 0.5)
268 return true;
269 return false;
270}
271
272// Get the names of the output layers
273std::vector<cv::String> CVObjectDetection::getOutputsNames(const cv::dnn::Net& net)
274{
275 static std::vector<cv::String> names;
276
277 //Get the indices of the output layers, i.e. the layers with unconnected outputs
278 std::vector<int> outLayers = net.getUnconnectedOutLayers();
279
280 //get the names of all the layers in the network
281 std::vector<cv::String> layersNames = net.getLayerNames();
282
283 // Get the names of the output layers in names
284 names.resize(outLayers.size());
285 for (size_t i = 0; i < outLayers.size(); ++i)
286 names[i] = layersNames[outLayers[i] - 1];
287 return names;
288}
289
291 // Check if the stabilizer info for the requested frame exists
292 if ( detectionsData.find(frameId) == detectionsData.end() ) {
293
294 return CVDetectionData();
295 } else {
296
297 return detectionsData[frameId];
298 }
299}
300
302 // Create tracker message
303 pb_objdetect::ObjDetect objMessage;
304
305 //Save class names in protobuf message
306 for(int i = 0; i<classNames.size(); i++){
307 std::string* className = objMessage.add_classnames();
308 className->assign(classNames.at(i));
309 }
310
311 // Iterate over all frames data and save in protobuf message
312 for(std::map<size_t,CVDetectionData>::iterator it=detectionsData.begin(); it!=detectionsData.end(); ++it){
313 CVDetectionData dData = it->second;
314 pb_objdetect::Frame* pbFrameData;
315 AddFrameDataToProto(objMessage.add_frame(), dData);
316 }
317
318 // Add timestamp
319 *objMessage.mutable_last_updated() = TimeUtil::SecondsToTimestamp(time(NULL));
320
321 {
322 // Write the new message to disk.
323 std::fstream output(protobuf_data_path, ios::out | ios::trunc | ios::binary);
324 if (!objMessage.SerializeToOstream(&output)) {
325 cerr << "Failed to write protobuf message." << endl;
326 return false;
327 }
328 }
329
330 // Delete all global objects allocated by libprotobuf.
331 google::protobuf::ShutdownProtobufLibrary();
332
333 return true;
334
335}
336
337// Add frame object detection into protobuf message.
338void CVObjectDetection::AddFrameDataToProto(pb_objdetect::Frame* pbFrameData, CVDetectionData& dData) {
339
340 // Save frame number and rotation
341 pbFrameData->set_id(dData.frameId);
342
343 for(size_t i = 0; i < dData.boxes.size(); i++){
344 pb_objdetect::Frame_Box* box = pbFrameData->add_bounding_box();
345
346 // Save bounding box data
347 box->set_x(dData.boxes.at(i).x);
348 box->set_y(dData.boxes.at(i).y);
349 box->set_w(dData.boxes.at(i).width);
350 box->set_h(dData.boxes.at(i).height);
351 box->set_classid(dData.classIds.at(i));
352 box->set_confidence(dData.confidences.at(i));
353 box->set_objectid(dData.objectIds.at(i));
354
355 }
356}
357
358// Load JSON string into this object
359void CVObjectDetection::SetJson(const std::string value) {
360 // Parse JSON string into JSON objects
361 try
362 {
363 const Json::Value root = openshot::stringToJson(value);
364 // Set all values that match
365
366 SetJsonValue(root);
367 }
368 catch (const std::exception& e)
369 {
370 // Error parsing JSON (or missing keys)
371 // throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
372 std::cout<<"JSON is invalid (missing keys or invalid data types)"<<std::endl;
373 }
374}
375
376// Load Json::Value into this object
377void CVObjectDetection::SetJsonValue(const Json::Value root) {
378
379 // Set data from Json (if key is found)
380 if (!root["protobuf_data_path"].isNull()){
381 protobuf_data_path = (root["protobuf_data_path"].asString());
382 }
383 if (!root["processing-device"].isNull()){
384 processingDevice = (root["processing-device"].asString());
385 }
386 if (!root["model-config"].isNull()){
387 modelConfiguration = (root["model-config"].asString());
388 std::ifstream infile(modelConfiguration);
389 if(!infile.good()){
390 processingController->SetError(true, "Incorrect path to model config file");
391 error = true;
392 }
393
394 }
395 if (!root["model-weights"].isNull()){
396 modelWeights= (root["model-weights"].asString());
397 std::ifstream infile(modelWeights);
398 if(!infile.good()){
399 processingController->SetError(true, "Incorrect path to model weight file");
400 error = true;
401 }
402
403 }
404 if (!root["class-names"].isNull()){
405 classesFile = (root["class-names"].asString());
406
407 std::ifstream infile(classesFile);
408 if(!infile.good()){
409 processingController->SetError(true, "Incorrect path to class name file");
410 error = true;
411 }
412
413 }
414}
415
416/*
417||||||||||||||||||||||||||||||||||||||||||||||||||
418 ONLY FOR MAKE TEST
419||||||||||||||||||||||||||||||||||||||||||||||||||
420*/
421
422// Load protobuf data file
424 // Create tracker message
425 pb_objdetect::ObjDetect objMessage;
426
427 {
428 // Read the existing tracker message.
429 fstream input(protobuf_data_path, ios::in | ios::binary);
430 if (!objMessage.ParseFromIstream(&input)) {
431 cerr << "Failed to parse protobuf message." << endl;
432 return false;
433 }
434 }
435
436 // Make sure classNames and detectionsData are empty
437 classNames.clear(); detectionsData.clear();
438
439 // Get all classes names and assign a color to them
440 for(int i = 0; i < objMessage.classnames_size(); i++){
441 classNames.push_back(objMessage.classnames(i));
442 }
443
444 // Iterate over all frames of the saved message
445 for (size_t i = 0; i < objMessage.frame_size(); i++) {
446 // Create protobuf message reader
447 const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
448
449 // Get frame Id
450 size_t id = pbFrameData.id();
451
452 // Load bounding box data
453 const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
454
455 // Construct data vectors related to detections in the current frame
456 std::vector<int> classIds;
457 std::vector<float> confidences;
458 std::vector<cv::Rect_<float>> boxes;
459 std::vector<int> objectIds;
460
461 for(int i = 0; i < pbFrameData.bounding_box_size(); i++){
462 // Get bounding box coordinates
463 float x = pBox.Get(i).x(); float y = pBox.Get(i).y();
464 float w = pBox.Get(i).w(); float h = pBox.Get(i).h();
465 // Create OpenCV rectangle with the bouding box info
466 cv::Rect_<float> box(x, y, w, h);
467
468 // Get class Id (which will be assign to a class name) and prediction confidence
469 int classId = pBox.Get(i).classid(); float confidence = pBox.Get(i).confidence();
470 // Get object Id
471 int objectId = pBox.Get(i).objectid();
472
473 // Push back data into vectors
474 boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence);
475 }
476
477 // Assign data to object detector map
478 detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id, objectIds);
479 }
480
481 // Delete all global objects allocated by libprotobuf.
482 google::protobuf::ShutdownProtobufLibrary();
483
484 return true;
485}
Header file for CVObjectDetection class.
Header file for all Exception classes.
void SetError(bool err, std::string message)
void update(std::vector< cv::Rect > detection, int frame_count, double image_diagonal, std::vector< float > confidences, std::vector< int > classIds)
Definition sort.cpp:45
void SetJsonValue(const Json::Value root)
Load Json::Value into this object.
CVObjectDetection(std::string processInfoJson, ProcessingController &processingController)
void detectObjectsClip(openshot::Clip &video, size_t start=0, size_t end=0, bool process_interval=false)
void AddFrameDataToProto(pb_objdetect::Frame *pbFrameData, CVDetectionData &dData)
CVDetectionData GetDetectionData(size_t frameId)
std::map< size_t, CVDetectionData > detectionsData
void SetJson(const std::string value)
Load JSON string into this object.
bool SaveObjDetectedData()
Protobuf Save and Load methods.
float Start() const
Get start position (in seconds) of clip (trim start of video)
Definition ClipBase.h:88
This class represents a clip (used to arrange readers on the timeline)
Definition Clip.h:89
void Open() override
Open the internal reader.
Definition Clip.cpp:320
float End() const override
Get end position (in seconds) of clip (trim end of video), which can be affected by the time curve.
Definition Clip.cpp:356
std::shared_ptr< openshot::Frame > GetFrame(int64_t clip_frame_number) override
Get an openshot::Frame object for a specific frame number of this clip. The image size and number of ...
Definition Clip.cpp:391
void Reader(openshot::ReaderBase *new_reader)
Set the current reader.
Definition Clip.cpp:274
This namespace is the default namespace for all code in the openshot library.
Definition Compressor.h:29
const Json::Value stringToJson(const std::string value)
Definition Json.cpp:16
std::vector< int > objectIds
std::vector< cv::Rect_< float > > boxes
std::vector< int > classIds
std::vector< float > confidences