diff --git a/algorithm/veri/cuda/veri_det_cuda_impl.cpp b/algorithm/veri/cuda/veri_det_cuda_impl.cpp index d5225ec..bf1fcc4 100644 --- a/algorithm/veri/cuda/veri_det_cuda_impl.cpp +++ b/algorithm/veri/cuda/veri_det_cuda_impl.cpp @@ -74,10 +74,10 @@ namespace sv bool VeriDetectorCUDAImpl::cudaSetup() { #ifdef WITH_CUDA - std::string trt_model_fn = get_home() + SV_MODEL_DIR + "model.engine"; + std::string trt_model_fn = get_home() + SV_MODEL_DIR + "veri.engine"; if (!is_file_exist(trt_model_fn)) { - throw std::runtime_error("SpireCV (104) Error loading the VERI TensorRT model (File Not Exist)"); + throw std::runtime_error("SpireCV (104) Error loading the VeriDetector TensorRT model (File Not Exist)"); } char *trt_model_stream{nullptr}; size_t trt_model_size{0}; @@ -107,7 +107,7 @@ namespace sv delete[] trt_model_stream; const ICudaEngine &cu_engine = this->_trt_context->getEngine(); - assert(cu_engine.getNbBindings() == 2); + assert(cu_engine.getNbBindings() == 3); this->_input_index = cu_engine.getBindingIndex("input"); this->_output_index1 = cu_engine.getBindingIndex("output"); @@ -123,7 +123,6 @@ namespace sv this->_p_data = new float[2 * 3 * 224 * 224]; this->_p_prob1 = new float[2 * 576]; this->_p_prob2 = new float[2 * 1280]; - this->_p_prob3 = new float[2 * 1280]; // Input TRTCHECK(cudaMemcpyAsync(_p_buffers[this->_input_index], this->_p_data, 2 * 3 * 224 * 224 * sizeof(float), cudaMemcpyHostToDevice, this->_cu_stream)); // this->_trt_context->enqueue(1, _p_buffers, this->_cu_stream, nullptr); @@ -139,11 +138,12 @@ namespace sv void VeriDetectorCUDAImpl::cudaRoiCNN( std::vector &input_rois_, - std::vector &output_labels_) + std::vector &output_labels_) { #ifdef WITH_CUDA - for (int i = 0; i < 2; ++i) + + for (int i = 0; i < 2; i++) { for (int row = 0; row < 224; ++row) { @@ -151,14 +151,15 @@ namespace sv for (int col = 0; col < 224; ++col) { // mean=[136.20, 141.50, 145.41], std=[44.77, 44.20, 44.30] - this->_p_data[224 * 224 * 3 * i + col + row * 224] = ((float)uc_pixel[0] - 136.20f) / 44.77f; - this->_p_data[224 * 224 * 3 * i + col + row * 224 + 224 * 224] = ((float)uc_pixel[1] - 141.50f) / 44.20f; - this->_p_data[224 * 224 * 3 * i + col + row * 224 + 224 * 224 * 2] = ((float)uc_pixel[2] - 145.41f) / 44.30f; + this->_p_data[col + row * 224 + 224 * 224 * 3 * i] = ((float)uc_pixel[0] - 136.20f) / 44.77f; + this->_p_data[col + row * 224 + 224 * 224 + 224 * 224 * 3 * i] = ((float)uc_pixel[1] - 141.50f) / 44.20f; + this->_p_data[col + row * 224 + 224 * 224 * 2 + 224 * 224 * 3 * i] = ((float)uc_pixel[2] - 145.41f) / 44.30f; uc_pixel += 3; } } } + // Input TRTCHECK(cudaMemcpyAsync(_p_buffers[this->_input_index], this->_p_data, 2 * 3 * 224 * 224 * sizeof(float), cudaMemcpyHostToDevice, this->_cu_stream)); // this->_trt_context->enqueue(1, _p_buffers, this->_cu_stream, nullptr); @@ -180,10 +181,9 @@ namespace sv } } - // 计算两个数组的余弦相似性 - float similarity = cosineSimilarity(_p_prob2, _p_prob2 + 1280, 1280); - std::cout << "余弦相似性: " << similarity << std::endl; - std::cout << "VERI LABEL: " << label << std::endl; + float similarity = cosineSimilarity(this->_p_prob2, this->_p_prob2 + 1280, 1280); + output_labels_.push_back(label); + output_labels_.push_back(similarity); } #endif } diff --git a/algorithm/veri/cuda/veri_det_cuda_impl.h b/algorithm/veri/cuda/veri_det_cuda_impl.h index 559a67f..a77f6b8 100644 --- a/algorithm/veri/cuda/veri_det_cuda_impl.h +++ b/algorithm/veri/cuda/veri_det_cuda_impl.h @@ -29,14 +29,13 @@ public: bool cudaSetup(); void cudaRoiCNN( std::vector& input_rois_, - std::vector& output_labels_ + std::vector& output_labels_ ); #ifdef WITH_CUDA float *_p_data; float *_p_prob1; float *_p_prob2; - float *_p_prob3; nvinfer1::IExecutionContext *_trt_context; int _input_index; int _output_index1; diff --git a/algorithm/veri/sv_veri_det.cpp b/algorithm/veri/sv_veri_det.cpp index a7df6d7..bf12630 100644 --- a/algorithm/veri/sv_veri_det.cpp +++ b/algorithm/veri/sv_veri_det.cpp @@ -1,61 +1,150 @@ #include "sv_veri_det.h" #include #include +#include "gason.h" +#include "sv_util.h" + #ifdef WITH_CUDA #include #include #include "veri_det_cuda_impl.h" #endif +#define SV_ROOT_DIR "/SpireCV/" -namespace sv { - - -VeriDetector::VeriDetector() +namespace sv { - this->_cuda_impl = new VeriDetectorCUDAImpl; -} -VeriDetector::~VeriDetector() -{ -} -bool VeriDetector::setupImpl() -{ -#ifdef WITH_CUDA - return this->_cuda_impl->cudaSetup(); -#endif - return false; -} - -void VeriDetector::roiCNN( - std::vector& input_rois_, - std::vector& output_labels_ -) -{ -#ifdef WITH_CUDA - this->_cuda_impl->cudaRoiCNN( - input_rois_, - output_labels_ - ); -#endif -} - - - -void VeriDetector::detect(cv::Mat img1_, cv::Mat img2_, TargetsInFrame& tgts_) -{ - if (!_params_loaded) + VeriDetector::VeriDetector() + { + this->_cuda_impl = new VeriDetectorCUDAImpl; + } + VeriDetector::~VeriDetector() { - this->_load(); - this->_loadLabels(); - _params_loaded = true; } - std::vector e_roi = {img1_, img2_}; + void VeriDetector::_load() + { + JsonValue all_value; + JsonAllocator allocator; + _load_all_json(this->alg_params_fn, all_value, allocator); - std::vector output_labels; - roiCNN(e_roi, output_labels); -} + JsonValue veriliner_params_value; + _parser_algorithm_params("VeriDetector", all_value, veriliner_params_value); + + for (auto i : veriliner_params_value) + { + if ("vehicle_ID" == std::string(i->key)) + { + this->vehicle_id = i->value.toString(); + std::cout << "vehicle_ID Load Sucess!" << std::endl; + } + } + } + + bool VeriDetector::setupImpl() + { +#ifdef WITH_CUDA + return this->_cuda_impl->cudaSetup(); +#endif + + return false; + } + + + void VeriDetector::roiCNN( + std::vector &input_rois_, + std::vector &output_labels_) + { +#ifdef WITH_CUDA + this->_cuda_impl->cudaRoiCNN( + input_rois_, + output_labels_); +#endif + } + + void VeriDetector::detect(cv::Mat img_, const cv::Rect &bounding_box_, sv::Target &tgt) + { + if (!_params_loaded) + { + this->_load(); + this->_loadLabels(); + _params_loaded = true; + } + + // convert Rect2d from left-up to center. + targetPos[0] = float(bounding_box_.x) + float(bounding_box_.width) * 0.5f; + targetPos[1] = float(bounding_box_.y) + float(bounding_box_.height) * 0.5f; + + targetSz[0] = float(bounding_box_.width); + targetSz[1] = float(bounding_box_.height); + + // Extent the bounding box. + float sumSz = targetSz[0] + targetSz[1]; + float wExtent = targetSz[0] + 0.5 * (sumSz); + float hExtent = targetSz[1] + 0.5 * (sumSz); + int sz = int(cv::sqrt(wExtent * hExtent)); + + cv::Mat crop; + getSubwindow(crop, img_, sz, 224); + + std::string img_ground_dir = get_home() + SV_ROOT_DIR + this->vehicle_id; + cv::Mat img_ground = cv::imread(img_ground_dir); + cv::resize(img_ground, img_ground, cv::Size(224, 224)); + std::vector input_rois_ = {crop, img_ground}; + +#ifdef WITH_CUDA + std::vector output_labels; + roiCNN(input_rois_, output_labels); +#endif + // auto t1 = std::chrono::system_clock::now(); + // tgts_.setFPS(1000.0 / std::chrono::duration_cast(t1 - this->_t0).count()); + // this->_t0 = std::chrono::system_clock::now(); + // tgts_.setTimeNow(); + + if (output_labels.size() > 0) + { + // tgt.category_id = output_labels[0]; + tgt.sim_score = output_labels[1]; + // tgts_.targets.push_back(tgt); + } + } + + void VeriDetector::getSubwindow(cv::Mat &dstCrop, cv::Mat &srcImg, int originalSz, int resizeSz) + { + cv::Scalar avgChans = mean(srcImg); + cv::Size imgSz = srcImg.size(); + int c = (originalSz + 1) / 2; + + int context_xmin = (int)(targetPos[0]) - c; + int context_xmax = context_xmin + originalSz - 1; + int context_ymin = (int)(targetPos[1]) - c; + int context_ymax = context_ymin + originalSz - 1; + + int left_pad = std::max(0, -context_xmin); + int top_pad = std::max(0, -context_ymin); + int right_pad = std::max(0, context_xmax - imgSz.width + 1); + int bottom_pad = std::max(0, context_ymax - imgSz.height + 1); + + context_xmin += left_pad; + context_xmax += left_pad; + context_ymin += top_pad; + context_ymax += top_pad; + + cv::Mat cropImg; + if (left_pad == 0 && top_pad == 0 && right_pad == 0 && bottom_pad == 0) + { + // Crop image without padding. + cropImg = srcImg(cv::Rect(context_xmin, context_ymin, + context_xmax - context_xmin + 1, context_ymax - context_ymin + 1)); + } + else // Crop image with padding, and the padding value is avgChans + { + cv::Mat tmpMat; + cv::copyMakeBorder(srcImg, tmpMat, top_pad, bottom_pad, left_pad, right_pad, cv::BORDER_CONSTANT, avgChans); + cropImg = tmpMat(cv::Rect(context_xmin, context_ymin, context_xmax - context_xmin + 1, context_ymax - context_ymin + 1)); + } + resize(cropImg, dstCrop, cv::Size(resizeSz, resizeSz)); + } } - diff --git a/include/sv_veri_det.h b/include/sv_veri_det.h index d372919..08e3a00 100644 --- a/include/sv_veri_det.h +++ b/include/sv_veri_det.h @@ -8,29 +8,36 @@ #include #include - -namespace sv { - -class VeriDetectorCUDAImpl; - -class VeriDetector : public LandingMarkerDetectorBase +namespace sv { -public: - VeriDetector(); - ~VeriDetector(); - void detect(cv::Mat img1_, cv::Mat img2_, TargetsInFrame &tgts_); + class VeriDetectorCUDAImpl; -protected: - bool setupImpl(); - void roiCNN( - std::vector& input_rois_, - std::vector& output_labels_ - ); + class VeriDetector : public LandingMarkerDetectorBase + { + public: + VeriDetector(); + ~VeriDetector(); - VeriDetectorCUDAImpl* _cuda_impl; -}; + void detect(cv::Mat img_, const cv::Rect &bounding_box_, sv::Target &tgt); + protected: + void _load(); + bool setupImpl(); + void roiCNN( + std::vector &input_rois_, + std::vector &output_labels_); + void getSubwindow(cv::Mat &dstCrop, cv::Mat &srcImg, int originalSz, int resizeSz); + + std::string vehicle_id; + + // Save the target bounding box for each frame. + std::vector targetSz = {0, 0}; // H and W of bounding box + std::vector targetPos = {0, 0}; // center point of bounding box (x, y) + + + VeriDetectorCUDAImpl *_cuda_impl; + }; } #endif diff --git a/include/sv_video_base.h b/include/sv_video_base.h index f026ea3..5c20051 100644 --- a/include/sv_video_base.h +++ b/include/sv_video_base.h @@ -90,6 +90,9 @@ public: double los_ay; //! The angle of the target in the image coordinate system, (unit: degree) [-180, 180]. double yaw_a; + + //! Similarity, Confidence, (0, 1] + double sim_score; //! Whether the height&width of the target can be obtained. bool has_hw; diff --git a/samples/demo/udp_detection_info_receiver_by_airborne.cpp b/samples/demo/udp_detection_info_receiver_by_airborne.cpp new file mode 100644 index 0000000..9eb2ef1 --- /dev/null +++ b/samples/demo/udp_detection_info_receiver_by_airborne.cpp @@ -0,0 +1,140 @@ +#include +#include +#include +#include +#include +#include + +#define PORT 12346 // UDP端口号 + +// 枚举定义消息类型 +enum MessageType { + TIMESTAMP = 0x00, + CLICK_COORDINATES = 0x01, + TARGET_BOX_COORDINATES = 0x02, + CLICK_COORDINATES_FRAME_ID = 0x03, + TARGET_BOX_COORDINATES_FRAME_ID = 0x04 +}; + +// 定义消息结构体 +#pragma pack(1) // 使用1字节对齐方式 +struct Message { + unsigned char header[2]; // 帧头 + unsigned char type; // 消息类型 + unsigned char reserved; // 保留字段 + unsigned int requestID; // 请求ID + unsigned int dataLength; // 数据长度 + unsigned char* data; // 数据段 +}; +#pragma pack() // 恢复默认对齐方式 + +// 解析时间戳消息 +void parseTimestampMessage(unsigned char* data) { + unsigned short year = ntohs(*reinterpret_cast(data)); + unsigned char month = *(data + 2); + unsigned char day = *(data + 3); + unsigned char hour = *(data + 4); + unsigned char minute = *(data + 5); + unsigned char second = *(data + 6); + unsigned short millisecond = ntohs(*reinterpret_cast(data + 7)); + + std::cout << "Timestamp: " << static_cast(year) << "-" << static_cast(month) + << "-" << static_cast(day) << " " << static_cast(hour) << ":" + << static_cast(minute) << ":" << static_cast(second) << "." + << static_cast(millisecond) << std::endl; +} + +// 解析点击坐标消息 +void parseClickCoordinatesMessage(unsigned char* data) { + float x = *reinterpret_cast(data); + float y = *reinterpret_cast(data + 4); + + std::cout << "Click coordinates: (" << x << ", " << y << ")" << std::endl; +} + +// 解析目标框坐标消息 +void parseTargetBoxCoordinatesMessage(unsigned char* data) { + float x1 = *reinterpret_cast(data); + float y1 = *reinterpret_cast(data + 4); + float x2 = *reinterpret_cast(data + 8); + float y2 = *reinterpret_cast(data + 12); + + std::cout << "Target box coordinates: (" << x1 << ", " << y1 << ") - (" + << x2 << ", " << y2 << ")" << std::endl; +} + +// 解析UDP数据包 +void parseUDPData(unsigned char* buffer, int length) { + if (length < 13) { + std::cout << "Invalid UDP data format" << std::endl; + return; + } + + Message message; + memcpy(&message, buffer, sizeof(Message)); + + // 解析消息类型 + MessageType messageType = static_cast(message.type); + + // 根据消息类型处理数据 + switch (messageType) { + case TIMESTAMP: + if (length != 9) { + std::cout << "Invalid timestamp message length" << std::endl; + return; + } + parseTimestampMessage(message.data); + break; + case CLICK_COORDINATES: + if (length != 13) { + std::cout << "Invalid click coordinates message length" << std::endl; + return; + } + parseClickCoordinatesMessage(message.data); + break; + case TARGET_BOX_COORDINATES: + if (length != 21) { + std::cout << "Invalid target box coordinates message length" << std::endl; + return; + } + parseTargetBoxCoordinatesMessage(message.data); + break; + default: + std::cout << "Unsupported message type" << std::endl; + break; + } +} + +int main() { + int sockfd; + struct sockaddr_in servaddr, cliaddr; + unsigned char buffer[4096]; + + // 创建UDP套接字 + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + + memset(&servaddr, 0, sizeof(servaddr)); + memset(&cliaddr, 0, sizeof(cliaddr)); + + servaddr.sin_family = AF_INET; + servaddr.sin_addr.s_addr = htonl(INADDR_ANY); + servaddr.sin_port = htons(PORT); + + // 绑定UDP套接字到端口 + bind(sockfd, (const struct sockaddr*)&servaddr, sizeof(servaddr)); + + while (true) { + socklen_t len = sizeof(cliaddr); + + // 接收UDP数据包 + ssize_t n = recvfrom(sockfd, buffer, sizeof(buffer), MSG_WAITALL, + (struct sockaddr*)&cliaddr, &len); + + // 解析UDP数据包 + parseUDPData(buffer, n); + } + + close(sockfd); + + return 0; +} diff --git a/samples/demo/veri.cpp b/samples/demo/veri.cpp index cbe71b6..96caf74 100644 --- a/samples/demo/veri.cpp +++ b/samples/demo/veri.cpp @@ -5,41 +5,85 @@ using namespace std; -int main(int argc, char *argv[]) { - // 打开摄像头 +struct node +{ + double x, y; +}; + +node p1; + +// 框选到的矩形 +cv::Rect rect_sel; + +int main(int argc, char *argv[]) +{ + // 实例化 框选目标跟踪类 sv::VeriDetector veri; + // 手动导入相机参数,如果使用Amov的G1等吊舱或相机,则可以忽略该步骤,将自动下载相机参数文件 veri.loadCameraParams(sv::get_home() + "/SpireCV/calib_webcam_640x480.yaml"); - cv::VideoCapture cap1("/home/amov/Videos/com/FlyVideo_2023-09-02_11-36-00.avi"); - cv::VideoCapture cap2("/home/amov/Videos/com/FlyVideo_2023-09-02_11-41-55.avi"); + sv::CommonObjectDetector cod; + cod.loadCameraParams(sv::get_home() + "/SpireCV/calib_webcam_640x480.yaml"); + + // 打开摄像头 + sv::Camera cap; // cap.setWH(640, 480); // cap.setFps(30); - //cap.open(sv::CameraType::WEBCAM, 0); // CameraID 0 + cap.open(sv::CameraType::WEBCAM, 0); // CameraID 0 // 实例化OpenCV的Mat类,用于内存单帧图像 - - - cv::Mat img1,img2; + cv::Mat img; int frame_id = 0; while (1) { + // 实例化SpireCV的 单帧检测结果 接口类 TargetsInFrame sv::TargetsInFrame tgts(frame_id++); - // 读取一帧图像到img - cap1.read(img1); - cap2.read(img2); + cap.read(img); + cv::resize(img, img, cv::Size(cod.image_width, cod.image_height)); - cv::resize(img1, img1, cv::Size(224, 224)); - cv::resize(img2, img2, cv::Size(224, 224)); + // 执行通用目标检测 + cod.detect(img, tgts); - veri.detect(img1, img2, tgts); + // 可视化检测结果,叠加到img上 + sv::drawTargetsInFrame(img, tgts); - + // 控制台打印通用目标检测结果 + printf("Frame-[%d]\n", frame_id); + // 打印当前检测的FPS + printf(" FPS = %.2f\n", tgts.fps); + // 打印当前相机的视场角(degree) + printf(" FOV (fx, fy) = (%.2f, %.2f)\n", tgts.fov_x, tgts.fov_y); + for (int i = 0; i < tgts.targets.size(); i++) + { + printf("Frame-[%d], Object-[%d]\n", frame_id, i); + // 打印每个目标的中心位置,cx,cy的值域为[0, 1] + printf(" Object Center (cx, cy) = (%.3f, %.3f)\n", tgts.targets[i].cx, tgts.targets[i].cy); + // 打印每个目标的外接矩形框的宽度、高度,w,h的值域为(0, 1] + printf(" Object Size (w, h) = (%.3f, %.3f)\n", tgts.targets[i].w, tgts.targets[i].h); + // 打印每个目标的置信度 + printf(" Object Score = %.3f\n", tgts.targets[i].score); + // 打印每个目标的类别,字符串类型 + printf(" Object Category = %s, Category ID = [%d]\n", tgts.targets[i].category.c_str(), tgts.targets[i].category_id); + // 打印每个目标的视线角,跟相机视场相关 + printf(" Object Line-of-sight (ax, ay) = (%.3f, %.3f)\n", tgts.targets[i].los_ax, tgts.targets[i].los_ay); + // 打印每个目标的3D位置(在相机坐标系下),跟目标实际长宽、相机参数相关 + printf(" Object Position = (x, y, z) = (%.3f, %.3f, %.3f)\n", tgts.targets[i].px, tgts.targets[i].py, tgts.targets[i].pz); - // 显示img - // cv::imshow("img", img); - // cv::waitKey(10); + if (tgts.targets[i].category_id == 2) + { + p1.x = tgts.targets[i].cx * tgts.width - tgts.targets[i].w * tgts.width / 2; + p1.y = tgts.targets[i].cy * tgts.height - tgts.targets[i].h * tgts.height / 2; + rect_sel = cv::Rect(p1.x, p1.y, tgts.targets[i].w * tgts.width, tgts.targets[i].h * tgts.height); + veri.detect(img, rect_sel, tgts.targets[i]); + // 打印每个目标的CosineSimilarity + printf(" CosineSimilarity Score = %.3f\n", tgts.targets[i].sim_score); + } + } + + // 显示检测结果img + cv::imshow("img", img); + cv::waitKey(10); } - return 0; }