21#include "objdetectdata.pb.h"
22#include <google/protobuf/util/time_util.h>
26using google::protobuf::util::TimeUtil;
29: processingController(&processingController), processingDevice(
"CPU"){
35void CVObjectDetection::setProcessingDevice(){
36 if(processingDevice ==
"GPU"){
37 net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
38 net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
40 else if(processingDevice ==
"CPU"){
41 net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
42 net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
49 start = _start; end = _end;
57 processingController->
SetError(
false,
"");
60 std::ifstream ifs(classesFile.c_str());
62 while (std::getline(ifs, line)) classNames.push_back(line);
65 if(classesFile ==
"" || modelConfiguration ==
"" || modelWeights ==
"")
67 net = cv::dnn::readNetFromDarknet(modelConfiguration, modelWeights);
68 setProcessingDevice();
71 if(!process_interval || end <= 1 || end-start == 0){
73 start = (int)(video.
Start() * video.
Reader()->info.fps.ToFloat());
74 end = (int)(video.
End() * video.
Reader()->info.fps.ToFloat());
77 for (frame_number = start; frame_number <= end; frame_number++)
84 std::shared_ptr<openshot::Frame> f = video.
GetFrame(frame_number);
87 cv::Mat cvimage = f->GetImageCV();
89 DetectObjects(cvimage, frame_number);
92 processingController->
SetProgress(uint(100*(frame_number-start)/(end-start)));
97void CVObjectDetection::DetectObjects(
const cv::Mat &frame,
size_t frameId){
102 int inpWidth, inpHeight;
103 inpWidth = inpHeight = 416;
105 cv::dnn::blobFromImage(frame, blob, 1/255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0,0,0),
true,
false);
111 std::vector<cv::Mat> outs;
112 net.forward(outs, getOutputsNames(net));
115 postprocess(frame.size(), outs, frameId);
121void CVObjectDetection::postprocess(
const cv::Size &frameDims,
const std::vector<cv::Mat>& outs,
size_t frameId)
123 std::vector<int> classIds;
124 std::vector<float> confidences;
125 std::vector<cv::Rect> boxes;
126 std::vector<int> objectIds;
128 for (
size_t i = 0; i < outs.size(); ++i)
133 float* data = (
float*)outs[i].data;
134 for (
int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
136 cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
137 cv::Point classIdPoint;
140 cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
141 if (confidence > confThreshold)
143 int centerX = (int)(data[0] * frameDims.width);
144 int centerY = (int)(data[1] * frameDims.height);
145 int width = (int)(data[2] * frameDims.width);
146 int height = (int)(data[3] * frameDims.height);
147 int left = centerX - width / 2;
148 int top = centerY - height / 2;
150 classIds.push_back(classIdPoint.x);
151 confidences.push_back((
float)confidence);
152 boxes.push_back(cv::Rect(left, top, width, height));
159 std::vector<int> indices;
160 cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
163 std::vector<cv::Rect> sortBoxes;
164 for(
auto box : boxes)
165 sortBoxes.push_back(box);
166 sort.
update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds);
169 boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear();
171 for(
auto TBox : sort.frameTrackingResult){
172 if(TBox.frame == frameId){
173 boxes.push_back(TBox.box);
174 confidences.push_back(TBox.confidence);
175 classIds.push_back(TBox.classId);
176 objectIds.push_back(TBox.id);
181 for(uint i = 0; i<boxes.size(); i++){
182 for(uint j = i+1; j<boxes.size(); j++){
183 int xc_1 = boxes[i].x + (int)(boxes[i].width/2), yc_1 = boxes[i].y + (int)(boxes[i].width/2);
184 int xc_2 = boxes[j].x + (int)(boxes[j].width/2), yc_2 = boxes[j].y + (int)(boxes[j].width/2);
186 if(fabs(xc_1 - xc_2) < 10 && fabs(yc_1 - yc_2) < 10){
187 if(classIds[i] == classIds[j]){
188 if(confidences[i] >= confidences[j]){
189 boxes.erase(boxes.begin() + j);
190 classIds.erase(classIds.begin() + j);
191 confidences.erase(confidences.begin() + j);
192 objectIds.erase(objectIds.begin() + j);
196 boxes.erase(boxes.begin() + i);
197 classIds.erase(classIds.begin() + i);
198 confidences.erase(confidences.begin() + i);
199 objectIds.erase(objectIds.begin() + i);
209 for(uint i = 0; i<boxes.size(); i++){
210 for(uint j = i+1; j<boxes.size(); j++){
212 if( iou(boxes[i], boxes[j])){
213 if(classIds[i] == classIds[j]){
214 if(confidences[i] >= confidences[j]){
215 boxes.erase(boxes.begin() + j);
216 classIds.erase(classIds.begin() + j);
217 confidences.erase(confidences.begin() + j);
218 objectIds.erase(objectIds.begin() + j);
222 boxes.erase(boxes.begin() + i);
223 classIds.erase(classIds.begin() + i);
224 confidences.erase(confidences.begin() + i);
225 objectIds.erase(objectIds.begin() + i);
235 std::vector<cv::Rect_<float>> normalized_boxes;
236 for(
auto box : boxes){
237 cv::Rect_<float> normalized_box;
238 normalized_box.x = (box.x)/(
float)frameDims.width;
239 normalized_box.y = (box.y)/(
float)frameDims.height;
240 normalized_box.width = (box.width)/(
float)frameDims.width;
241 normalized_box.height = (box.height)/(
float)frameDims.height;
242 normalized_boxes.push_back(normalized_box);
249bool CVObjectDetection::iou(cv::Rect pred_box, cv::Rect sort_box){
251 int xA = std::max(pred_box.x, sort_box.x);
252 int yA = std::max(pred_box.y, sort_box.y);
253 int xB = std::min(pred_box.x + pred_box.width, sort_box.x + sort_box.width);
254 int yB = std::min(pred_box.y + pred_box.height, sort_box.y + sort_box.height);
257 int interArea = std::max(0, xB - xA + 1) * std::max(0, yB - yA + 1);
260 int boxAArea = (pred_box.width + 1) * (pred_box.height + 1);
261 int boxBArea = (sort_box.width + 1) * (sort_box.height + 1);
264 float iou = interArea / (float)(boxAArea + boxBArea - interArea);
273std::vector<cv::String> CVObjectDetection::getOutputsNames(
const cv::dnn::Net& net)
275 static std::vector<cv::String> names;
278 std::vector<int> outLayers = net.getUnconnectedOutLayers();
281 std::vector<cv::String> layersNames = net.getLayerNames();
284 names.resize(outLayers.size());
285 for (
size_t i = 0; i < outLayers.size(); ++i)
286 names[i] = layersNames[outLayers[i] - 1];
303 pb_objdetect::ObjDetect objMessage;
306 for(
int i = 0; i<classNames.size(); i++){
307 std::string* className = objMessage.add_classnames();
308 className->assign(classNames.at(i));
314 pb_objdetect::Frame* pbFrameData;
319 *objMessage.mutable_last_updated() = TimeUtil::SecondsToTimestamp(time(NULL));
323 std::fstream output(protobuf_data_path, ios::out | ios::trunc | ios::binary);
324 if (!objMessage.SerializeToOstream(&output)) {
325 cerr <<
"Failed to write protobuf message." << endl;
331 google::protobuf::ShutdownProtobufLibrary();
341 pbFrameData->set_id(dData.
frameId);
343 for(
size_t i = 0; i < dData.
boxes.size(); i++){
344 pb_objdetect::Frame_Box* box = pbFrameData->add_bounding_box();
347 box->set_x(dData.
boxes.at(i).x);
348 box->set_y(dData.
boxes.at(i).y);
349 box->set_w(dData.
boxes.at(i).width);
350 box->set_h(dData.
boxes.at(i).height);
351 box->set_classid(dData.
classIds.at(i));
353 box->set_objectid(dData.
objectIds.at(i));
368 catch (
const std::exception& e)
372 std::cout<<
"JSON is invalid (missing keys or invalid data types)"<<std::endl;
380 if (!root[
"protobuf_data_path"].isNull()){
381 protobuf_data_path = (root[
"protobuf_data_path"].asString());
383 if (!root[
"processing-device"].isNull()){
384 processingDevice = (root[
"processing-device"].asString());
386 if (!root[
"model-config"].isNull()){
387 modelConfiguration = (root[
"model-config"].asString());
388 std::ifstream infile(modelConfiguration);
390 processingController->
SetError(
true,
"Incorrect path to model config file");
395 if (!root[
"model-weights"].isNull()){
396 modelWeights= (root[
"model-weights"].asString());
397 std::ifstream infile(modelWeights);
399 processingController->
SetError(
true,
"Incorrect path to model weight file");
404 if (!root[
"class-names"].isNull()){
405 classesFile = (root[
"class-names"].asString());
407 std::ifstream infile(classesFile);
409 processingController->
SetError(
true,
"Incorrect path to class name file");
425 pb_objdetect::ObjDetect objMessage;
429 fstream input(protobuf_data_path, ios::in | ios::binary);
430 if (!objMessage.ParseFromIstream(&input)) {
431 cerr <<
"Failed to parse protobuf message." << endl;
440 for(
int i = 0; i < objMessage.classnames_size(); i++){
441 classNames.push_back(objMessage.classnames(i));
445 for (
size_t i = 0; i < objMessage.frame_size(); i++) {
447 const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
450 size_t id = pbFrameData.id();
453 const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
456 std::vector<int> classIds;
457 std::vector<float> confidences;
458 std::vector<cv::Rect_<float>> boxes;
459 std::vector<int> objectIds;
461 for(
int i = 0; i < pbFrameData.bounding_box_size(); i++){
463 float x = pBox.Get(i).x();
float y = pBox.Get(i).y();
464 float w = pBox.Get(i).w();
float h = pBox.Get(i).h();
466 cv::Rect_<float> box(x, y, w, h);
469 int classId = pBox.Get(i).classid();
float confidence = pBox.Get(i).confidence();
471 int objectId = pBox.Get(i).objectid();
474 boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence);
482 google::protobuf::ShutdownProtobufLibrary();
Header file for CVObjectDetection class.
Header file for all Exception classes.
void SetError(bool err, std::string message)
void update(std::vector< cv::Rect > detection, int frame_count, double image_diagonal, std::vector< float > confidences, std::vector< int > classIds)
void SetJsonValue(const Json::Value root)
Load Json::Value into this object.
CVObjectDetection(std::string processInfoJson, ProcessingController &processingController)
bool _LoadObjDetectdData()
void detectObjectsClip(openshot::Clip &video, size_t start=0, size_t end=0, bool process_interval=false)
void AddFrameDataToProto(pb_objdetect::Frame *pbFrameData, CVDetectionData &dData)
CVDetectionData GetDetectionData(size_t frameId)
std::map< size_t, CVDetectionData > detectionsData
void SetJson(const std::string value)
Load JSON string into this object.
bool SaveObjDetectedData()
Protobuf Save and Load methods.
float Start() const
Get start position (in seconds) of clip (trim start of video)
This class represents a clip (used to arrange readers on the timeline)
void Open() override
Open the internal reader.
float End() const override
Get end position (in seconds) of clip (trim end of video), which can be affected by the time curve.
std::shared_ptr< openshot::Frame > GetFrame(int64_t clip_frame_number) override
Get an openshot::Frame object for a specific frame number of this clip. The image size and number of ...
void Reader(openshot::ReaderBase *new_reader)
Set the current reader.
This namespace is the default namespace for all code in the openshot library.
const Json::Value stringToJson(const std::string value)
std::vector< int > objectIds
std::vector< cv::Rect_< float > > boxes
std::vector< int > classIds
std::vector< float > confidences