赞
踩
基本思想:sections在OpenMP里面属于分迭代任务的分担,总感觉这玩意像FPGA硬件逻辑代码中的分段模块
(1)分段执行代码逻辑sections
- #pragma omp parallel sections
- {
- #pragma omp section
- for (int i = 0; i < num/2; i++) {
- .....
- }
- #pragma omp section
- for (int i = num/2; i < num; i++) {
- ........
- }
- }
测试代码
- #include <iostream>
- #include <omp.h>
- #include<chrono>
- using namespace std;
- using namespace chrono;
-
- void sequentialProgram(int num)
- {
-
- for(int i=0;i<num;i++)
- {
- // std::cout<<"hello world"<<std::endl;
- printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
- }
- }
-
- void parallelProgram(int num) {
-
-
-
- #pragma omp parallel sections
- {
- #pragma omp section
- for (int i = 0; i < num/2; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
- #pragma omp section
- for (int i = num/2; i < num; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
- }
-
- }
-
- int main() {
-
-
- int num=omp_get_num_procs();
- auto start_time=std::chrono::steady_clock::now();
- sequentialProgram(num);
- auto end_time=std::chrono::steady_clock::now();
- std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
-
- start_time=std::chrono::steady_clock::now();
- parallelProgram(num);
- end_time=std::chrono::steady_clock::now();
- std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
- return 0;
- }
测试结果,可以看到 两个for 循环会被分配到各个线程里面独立处理
- F:\OpenMP\cmake-build-debug\OpenMP.exe
- i=0 the current thread id: 0
- i=1 the current thread id: 0
- i=2 the current thread id: 0
- i=3 the current thread id: 0
- i=4 the current thread id: 0
- i=5 the current thread id: 0
- i=6 the current thread id: 0
- i=7 the current thread id: 0
- i=8 the current thread id: 0
- i=9 the current thread id: 0
- i=10 the current thread id: 0
- i=11 the current thread id: 0
- sequentialProgram elapse time: 0.0373026 seconds
- i=0 the current thread id: 1
- i=1 the current thread id: 1
- i=2 the current thread id: 1
- i=3 the current thread id: 1
- i=4 the current thread id: 1
- i=5 the current thread id: 1
- i=6 the current thread id: 0
- i=7 the current thread id: 0
- i=8 the current thread id: 0
- i=9 the current thread id: 0
- i=10 the current thread id: 0
- i=11 the current thread id: 0
- parallelProgram elapse time: 0.0211751 seconds
-
- Process finished with exit code 0
(2)可以同时并行处理两个任务sections
- #pragma omp parallel
- {
- #pragma omp sections
- {
-
- #pragma omp section
- for (int i = 0; i < num/4; i++) {
- .....
- }
- #pragma omp section
- for (int i = num/4; i < num/2; i++) {
- .....
- }
- }
- #pragma omp sections
- {
-
- #pragma omp section
- for (int i = num/2; i < 3*num/4; i++) {
- .....
- }
- #pragma omp section
- for (int i = 3*num/4; i < num; i++) {
- .....
- }
- }
-
- }
测试代码
- #include <iostream>
- #include <omp.h>
- #include<chrono>
- using namespace std;
- using namespace chrono;
-
- void sequentialProgram(int num)
- {
-
- for(int i=0;i<num;i++)
- {
- // std::cout<<"hello world"<<std::endl;
- printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
- }
- }
-
- void parallelProgram(int num) {
-
-
-
- #pragma omp parallel
- {
- #pragma omp sections
- {
-
- #pragma omp section
- for (int i = 0; i < num/4; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
- #pragma omp section
- for (int i = num/4; i < num/2; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
- }
- #pragma omp sections
- {
-
- #pragma omp section
- for (int i = num/2; i < 3*num/4; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
- #pragma omp section
- for (int i = 3*num/4; i < num; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
- }
-
- }
-
- }
-
- int main() {
-
-
- int num=omp_get_num_procs();
- auto start_time=std::chrono::steady_clock::now();
- sequentialProgram(num);
- auto end_time=std::chrono::steady_clock::now();
- std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
-
- start_time=std::chrono::steady_clock::now();
- parallelProgram(num);
- end_time=std::chrono::steady_clock::now();
- std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
- return 0;
- }
测试结果,可以看到各个线程是独立运行的,这里发现 各个section还是按照代码的先后顺序执行的&同时还需要让各个section的任务量尽量均衡~
- F:\OpenMP\cmake-build-debug\OpenMP.exe
- i=0 the current thread id: 0
- i=1 the current thread id: 0
- i=2 the current thread id: 0
- i=3 the current thread id: 0
- i=4 the current thread id: 0
- i=5 the current thread id: 0
- i=6 the current thread id: 0
- i=7 the current thread id: 0
- i=8 the current thread id: 0
- i=9 the current thread id: 0
- i=10 the current thread id: 0
- i=11 the current thread id: 0
- sequentialProgram elapse time: 0.0212041 seconds
- i=0 the current thread id: 2
- i=1 the current thread id: 2
- i=2 the current thread id: 2
- i=3 the current thread id: 8
- i=4 the current thread id: 8
- i=5 the current thread id: 8
- i=6 the current thread id: 11
- i=7 the current thread id: 11
- i=8 the current thread id: 11
- i=9 the current thread id: 9
- i=10 the current thread id: 9
- i=11 the current thread id: 9
- parallelProgram elapse time: 0.0202144 seconds
-
- Process finished with exit code 0
(3)单线程运行制导指令 single
- #pragma omp parallel
- {
- #pragma omp single
- ....
- for (int i = 0; i < num; i++) {
- ....
- }
-
- };
-
-
- #pragma omp parallel
- {
- #pragma omp single
- {
- ....
- }
- #pragma omp single nowait
- {
- ....
- }
- for (int i = 0; i < num; i++) {
- ....
- }
-
- };
其中single保证,其限制的任务为单线程执行
测试代码
- #include <iostream>
- #include <omp.h>
- #include<chrono>
- using namespace std;
- using namespace chrono;
-
- void sequentialProgram(int num)
- {
-
- for(int i=0;i<num;i++)
- {
- // std::cout<<"hello world"<<std::endl;
- printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
- }
- }
-
- void parallelProgram(int num) {
-
-
-
- #pragma omp parallel
- {
- #pragma omp single
- printf("i am students the current thread id: %d\n", omp_get_thread_num());
- for (int i = 0; i < num; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
-
- };
-
- printf("--------------------\n");
- #pragma omp parallel
- {
- #pragma omp single
- {
- printf("i am students the current thread id: %d\n", omp_get_thread_num());
- }
- #pragma omp single nowait
- {
- printf("i am college the current thread id: %d\n", omp_get_thread_num());
- }
- for (int i = 0; i < num; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
-
- };
-
- }
-
- int main() {
-
-
- int num=1;
- auto start_time=std::chrono::steady_clock::now();
- sequentialProgram(num);
- auto end_time=std::chrono::steady_clock::now();
- std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
-
- start_time=std::chrono::steady_clock::now();
- parallelProgram(num);
- end_time=std::chrono::steady_clock::now();
- std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
- return 0;
- }
其中nowait保证 该线程也是为独立的线程执行,但是并不用等待线程执行结束,别的线程向下执行就行
- F:\OpenMP\cmake-build-debug\OpenMP.exe
- i=0 the current thread id: 0
- sequentialProgram elapse time: 0.0025314 seconds
- i am students the current thread id: 1
- i=0 the current thread id: 0
- i=0 the current thread id: 4
- i=0 the current thread id: 5
- i=0 the current thread id: 7
- i=0 the current thread id: 3
- i=0 the current thread id: 8
- i=0 the current thread id: 10
- i=0 the current thread id: 9
- i=0 the current thread id: 2
- i=0 the current thread id: 6
- i=0 the current thread id: 11
- i=0 the current thread id: 1
- --------------------
- i am students the current thread id: 5
- i am college the current thread id: 7
- i=0 the current thread id: 7
- i=0 the current thread id: 6
- i=0 the current thread id: 9
- i=0 the current thread id: 11
- i=0 the current thread id: 1
- i=0 the current thread id: 8
- i=0 the current thread id: 0
- i=0 the current thread id: 4
- i=0 the current thread id: 10
- i=0 the current thread id: 2
- i=0 the current thread id: 5
- i=0 the current thread id: 3
- parallelProgram elapse time: 0.0525391 seconds
-
- Process finished with exit code 0
执行结果中这两个线程号(nowait)
- i am college the current thread id: 7
- i=0 the current thread id: 7
另一种测试方法,在并行的for循环上、section段上都可以使用nowait,不别再等待同步点执行~
- #include <iostream>
- #include <omp.h>
- #include<chrono>
- #include<vector>
- #include<thread>
- using namespace std;
- using namespace chrono;
-
- void sequentialProgram(int num)
- {
-
- for(int i=0;i<num;i++)
- {
- // std::cout<<"hello world"<<std::endl;
- printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
- }
- }
-
- void parallelProgram(int num) {
-
-
-
-
- #pragma omp parallel
- {
- #pragma omp for nowait
- for (int i = 0; i < num/2; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("A i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
- #pragma omp for
- for (int i = num/2; i < num; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("B i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
- };
-
- }
-
- int main() {
-
-
- int num=omp_get_num_procs();
- auto start_time=std::chrono::steady_clock::now();
- sequentialProgram(num);
- auto end_time=std::chrono::steady_clock::now();
- std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
-
- start_time=std::chrono::steady_clock::now();
- parallelProgram(num);
- end_time=std::chrono::steady_clock::now();
- std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
- return 0;
- }
测试结果中看出,两个并行for循环,将真正的不存在先后到关系,开始并行执行
- F:\OpenMP\cmake-build-debug\OpenMP.exe
- i=0 the current thread id: 0
- i=1 the current thread id: 0
- i=2 the current thread id: 0
- i=3 the current thread id: 0
- i=4 the current thread id: 0
- i=5 the current thread id: 0
- i=6 the current thread id: 0
- i=7 the current thread id: 0
- i=8 the current thread id: 0
- i=9 the current thread id: 0
- i=10 the current thread id: 0
- i=11 the current thread id: 0
- sequentialProgram elapse time: 0.0194814 seconds
- A i=1 the current thread id: 1
- B i=7 the current thread id: 1
- A i=2 the current thread id: 2
- B i=8 the current thread id: 2
- A i=0 the current thread id: 0
- B i=6 the current thread id: 0
- A i=3 the current thread id: 3
- B i=9 the current thread id: 3
- A i=4 the current thread id: 4
- A i=5 the current thread id: 5
- B i=11 the current thread id: 5
- B i=10 the current thread id: 4
- parallelProgram elapse time: 0.0217952 seconds
-
- Process finished with exit code 0
(4) 设置路障 等待前面的线程运行完成,才能往下运行 barrier
- #pragma omp barrier
- {
- .....
- }
测试代码
- #include <iostream>
- #include <omp.h>
- #include<chrono>
- #include<vector>
- #include<thread>
- using namespace std;
- using namespace chrono;
-
- void sequentialProgram(int num)
- {
-
- for(int i=0;i<num;i++)
- {
- // std::cout<<"hello world"<<std::endl;
- printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
- }
- }
-
- void parallelProgram(int num) {
-
-
-
-
- #pragma omp parallel
- {
- #pragma omp for nowait
- for (int i = 0; i < num/2; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("A i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
- #pragma omp barrier
- {
- #pragma omp for
- for (int i = num/2; i < num; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("B i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
- }
- };
-
- }
-
- int main() {
-
-
- int num=omp_get_num_procs();
- auto start_time=std::chrono::steady_clock::now();
- sequentialProgram(num);
- auto end_time=std::chrono::steady_clock::now();
- std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
-
- start_time=std::chrono::steady_clock::now();
- parallelProgram(num);
- end_time=std::chrono::steady_clock::now();
- std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
- return 0;
- }
测试结果
- F:\OpenMP\cmake-build-debug\OpenMP.exe
- i=0 the current thread id: 0
- i=1 the current thread id: 0
- i=2 the current thread id: 0
- i=3 the current thread id: 0
- i=4 the current thread id: 0
- i=5 the current thread id: 0
- i=6 the current thread id: 0
- i=7 the current thread id: 0
- i=8 the current thread id: 0
- i=9 the current thread id: 0
- i=10 the current thread id: 0
- i=11 the current thread id: 0
- sequentialProgram elapse time: 0.0240967 seconds
- A i=1 the current thread id: 1
- A i=2 the current thread id: 2
- A i=4 the current thread id: 4
- A i=3 the current thread id: 3
- A i=5 the current thread id: 5
- A i=0 the current thread id: 0
- B i=7 the current thread id: 1
- B i=9 the current thread id: 3
- B i=11 the current thread id: 5
- B i=8 the current thread id: 2
- B i=10 the current thread id: 4
- B i=6 the current thread id: 0
- parallelProgram elapse time: 0.0256972 seconds
-
- Process finished with exit code 0
(5)设置只有一个线程去执行任务
- #pragma omp master
- for (int i = 0; i < num; i++) {
- ......
- }
测试代码
- #include <iostream>
- #include <omp.h>
- #include<chrono>
- #include<vector>
- #include<thread>
- using namespace std;
- using namespace chrono;
-
- void sequentialProgram(int num)
- {
-
- for(int i=0;i<num;i++)
- {
- // std::cout<<"hello world"<<std::endl;
- printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
- }
- }
-
- void parallelProgram(int num) {
-
-
-
-
- #pragma omp parallel
- {
- #pragma omp for
- for (int i = 0; i < num; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("A i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
-
- #pragma omp single
- for (int i = 0; i < num; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("B i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
-
- #pragma omp master
- for (int i = 0; i < num; i++) {
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("C i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
- #pragma omp for ordered
- for (int i = 0; i < num; i++) {
-
- //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
- printf("D i=%d the current thread id: %d\n", i, omp_get_thread_num());
- }
- };
-
- }
-
- int main() {
-
-
- int num=omp_get_num_procs();
- auto start_time=std::chrono::steady_clock::now();
- sequentialProgram(num);
- auto end_time=std::chrono::steady_clock::now();
- std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
-
- start_time=std::chrono::steady_clock::now();
- parallelProgram(num);
- end_time=std::chrono::steady_clock::now();
- std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
- return 0;
- }
测试结果 抛开single的功能点说明,single和master在某种意义很类似都是控制任务为单一线程去完成。
其中的order 可以保证线程依次顺序处理各个任务
- #pragma omp for ordered
- for (int i = 0; i < num; i++) {
- .......
- }
- F:\OpenMP\cmake-build-debug\OpenMP.exe
- i=0 the current thread id: 0
- i=1 the current thread id: 0
- i=2 the current thread id: 0
- i=3 the current thread id: 0
- i=4 the current thread id: 0
- i=5 the current thread id: 0
- i=6 the current thread id: 0
- i=7 the current thread id: 0
- i=8 the current thread id: 0
- i=9 the current thread id: 0
- i=10 the current thread id: 0
- i=11 the current thread id: 0
- sequentialProgram elapse time: 0.0232429 seconds
- A i=1 the current thread id: 1
- A i=2 the current thread id: 2
- A i=3 the current thread id: 3
- A i=5 the current thread id: 5
- A i=11 the current thread id: 11
- A i=0 the current thread id: 0
- A i=4 the current thread id: 4
- A i=8 the current thread id: 8
- A i=6 the current thread id: 6
- A i=7 the current thread id: 7
- A i=9 the current thread id: 9
- A i=10 the current thread id: 10
- B i=0 the current thread id: 1
- B i=1 the current thread id: 1
- B i=2 the current thread id: 1
- B i=3 the current thread id: 1
- B i=4 the current thread id: 1
- B i=5 the current thread id: 1
- B i=6 the current thread id: 1
- B i=7 the current thread id: 1
- B i=8 the current thread id: 1
- B i=9 the current thread id: 1
- B i=10 the current thread id: 1
- B i=11 the current thread id: 1
- C i=0 the current thread id: 0
- C i=1 the current thread id: 0
- C i=2 the current thread id: 0
- C i=3 the current thread id: 0
- C i=4 the current thread id: 0
- C i=5 the current thread id: 0
- C i=6 the current thread id: 0
- C i=7 the current thread id: 0
- C i=8 the current thread id: 0
- C i=9 the current thread id: 0
- C i=10 the current thread id: 0
- C i=11 the current thread id: 0
- D i=0 the current thread id: 0
- D i=8 the current thread id: 8
- D i=2 the current thread id: 2
- D i=1 the current thread id: 1
- D i=9 the current thread id: 9
- D i=3 the current thread id: 3
- D i=5 the current thread id: 5
- D i=4 the current thread id: 4
- D i=11 the current thread id: 11
- D i=7 the current thread id: 7
- D i=10 the current thread id: 10
- D i=6 the current thread id: 6
- parallelProgram elapse time: 0.112705 seconds
-
- Process finished with exit code 0
测试ncnn 提供的yolov5.cpp源码 每次测试都不一样,好像大部分修改的快一点
https://github.com/Tencent/ncnn/blob/master/examples/yolov5.cpp
代码片段
- // anchor setting from yolov5/models/yolov5s.yaml
- auto start_time=std::chrono::steady_clock::now();
-
- //#pragma omp parallel sections firstprivate(ex)
- {
- //#pragma omp section
- // stride 8
- {
- ncnn::Mat out;
- ex.extract("output", out);
-
- ncnn::Mat anchors(6);
-
- anchors[0] = 10.f;
- anchors[1] = 13.f;
- anchors[2] = 16.f;
- anchors[3] = 30.f;
- anchors[4] = 33.f;
- anchors[5] = 23.f;
-
-
- generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects);
- proposals.insert(proposals.end(), objects.begin(), objects.end());
- printf("the current thread id: %d\n",omp_get_thread_num());
-
- }
- //#pragma omp section
- // stride 16
- {
- ncnn::Mat out;
- ex.extract("781", out);
-
- ncnn::Mat anchors(6);
- anchors[0] = 30.f;
- anchors[1] = 61.f;
- anchors[2] = 62.f;
- anchors[3] = 45.f;
- anchors[4] = 59.f;
- anchors[5] = 119.f;
-
-
- generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects);
- proposals.insert(proposals.end(), objects.begin(), objects.end());
- printf("the current thread id: %d\n",omp_get_thread_num());
-
- }
- //#pragma omp section
- // stride 32
- {
- ncnn::Mat out;
-
- ex.extract("801", out);
-
- ncnn::Mat anchors(6);
- anchors[0] = 116.f;
- anchors[1] = 90.f;
- anchors[2] = 156.f;
- anchors[3] = 198.f;
- anchors[4] = 373.f;
- anchors[5] = 326.f;
-
-
-
- generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects);
- proposals.insert(proposals.end(), objects.begin(), objects.end());
- printf("the current thread id: %d\n",omp_get_thread_num());
-
- }
- }
- // sort all proposals by score from highest to lowest
-
-
- auto end_time=std::chrono::steady_clock::now();
- std::cout<<"output elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
测试时间
- the current thread id: 0
- the current thread id: 0
- the current thread id: 0
- output elapse time: 0.278379 seconds
- yolov5s elapse time: 0.338553 seconds
- 15 = 0.54197 at 256.27 15.57 826.90 x 603.65
修改代码
- auto start_time=std::chrono::steady_clock::now();
-
- ncnn::Mat out0;
- ex.extract("output", out0);
-
- ncnn::Mat out1;
- ex.extract("781", out1);
-
- ncnn::Mat out2;
-
- ex.extract("801", out2);
-
- #pragma omp parallel sections
- {
- #pragma omp section
- // stride 8
- {
-
-
- ncnn::Mat anchors(6);
-
- anchors[0] = 10.f;
- anchors[1] = 13.f;
- anchors[2] = 16.f;
- anchors[3] = 30.f;
- anchors[4] = 33.f;
- anchors[5] = 23.f;
-
-
- generate_proposals(anchors, 8, in_pad, out0, prob_threshold, objects);
- proposals.insert(proposals.end(), objects.begin(), objects.end());
- printf("the current thread id: %d\n",omp_get_thread_num());
-
- }
- #pragma omp section
- // stride 16
- {
- ncnn::Mat anchors(6);
- anchors[0] = 30.f;
- anchors[1] = 61.f;
- anchors[2] = 62.f;
- anchors[3] = 45.f;
- anchors[4] = 59.f;
- anchors[5] = 119.f;
-
-
- generate_proposals(anchors, 16, in_pad, out1, prob_threshold, objects);
- proposals.insert(proposals.end(), objects.begin(), objects.end());
- printf("the current thread id: %d\n",omp_get_thread_num());
-
- }
- #pragma omp section
- // stride 32
- {
-
-
- ncnn::Mat anchors(6);
- anchors[0] = 116.f;
- anchors[1] = 90.f;
- anchors[2] = 156.f;
- anchors[3] = 198.f;
- anchors[4] = 373.f;
- anchors[5] = 326.f;
-
-
-
- generate_proposals(anchors, 32, in_pad, out2, prob_threshold, objects);
- proposals.insert(proposals.end(), objects.begin(), objects.end());
- printf("the current thread id: %d\n",omp_get_thread_num());
-
- }
- }
- // sort all proposals by score from highest to lowest
-
-
- auto end_time=std::chrono::steady_clock::now();
- std::cout<<"output elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
测试时间 好像大部分比原来的快 ~
- F:\window10_yolo5_mingw32\cmake-build-debug\window10_yolo5_mingw32.exe
- the current thread id: 5
- the current thread id: 3
- the current thread id: 11
- output elapse time: 0.244162 seconds
- yolov5s elapse time: 0.303863 seconds
- 15 = 0.54197 at 256.27 15.57 826.90 x 603.65
-
- Process finished with exit cod
这样修改 耗时比较长
- auto start_time=std::chrono::steady_clock::now();
-
-
-
-
- #pragma omp parallel sections firstprivate(ex)
- {
- #pragma omp section
- // stride 8
- {
- ncnn::Mat out;
- ex.extract("output", out);
-
- ncnn::Mat anchors(6);
-
- anchors[0] = 10.f;
- anchors[1] = 13.f;
- anchors[2] = 16.f;
- anchors[3] = 30.f;
- anchors[4] = 33.f;
- anchors[5] = 23.f;
-
-
- generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects);
- proposals.insert(proposals.end(), objects.begin(), objects.end());
- printf("the current thread id: %d\n",omp_get_thread_num());
-
- }
- #pragma omp section
- // stride 16
- {
- ncnn::Mat out;
- ex.extract("781", out);
-
- ncnn::Mat anchors(6);
- anchors[0] = 30.f;
- anchors[1] = 61.f;
- anchors[2] = 62.f;
- anchors[3] = 45.f;
- anchors[4] = 59.f;
- anchors[5] = 119.f;
-
-
- generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects);
- proposals.insert(proposals.end(), objects.begin(), objects.end());
- printf("the current thread id: %d\n",omp_get_thread_num());
-
- }
- #pragma omp section
- // stride 32
- {
-
- ncnn::Mat out;
- ex.extract("801", out);
-
- ncnn::Mat anchors(6);
- anchors[0] = 116.f;
- anchors[1] = 90.f;
- anchors[2] = 156.f;
- anchors[3] = 198.f;
- anchors[4] = 373.f;
- anchors[5] = 326.f;
-
-
-
- generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects);
- proposals.insert(proposals.end(), objects.begin(), objects.end());
- printf("the current thread id: %d\n",omp_get_thread_num());
-
- }
- }
- // sort all proposals by score from highest to lowest
-
-
- auto end_time=std::chrono::steady_clock::now();
- std::cout<<"output elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
-
测试时间
- F:\window10_yolo5_mingw32\cmake-build-debug\window10_yolo5_mingw32.exe
- the current thread id: 1
- the current thread id: 0
- the current thread id: 7
- output elapse time: 0.829006 seconds
- yolov5s elapse time: 0.895948 seconds
- 15 = 0.54197 at 256.27 15.57 826.90 x 603.65
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。