当前位置:   article > 正文

3、OpenMP的分段sections、single、nowait、barrier、master、order_pragma omp section

pragma omp section

基本思想:sections在OpenMP里面属于分迭代任务的分担,总感觉这玩意像FPGA硬件逻辑代码中的分段模块

(1)分段执行代码逻辑sections

  1. #pragma omp parallel sections
  2. {
  3. #pragma omp section
  4. for (int i = 0; i < num/2; i++) {
  5. .....
  6. }
  7. #pragma omp section
  8. for (int i = num/2; i < num; i++) {
  9. ........
  10. }
  11. }

测试代码

  1. #include <iostream>
  2. #include <omp.h>
  3. #include<chrono>
  4. using namespace std;
  5. using namespace chrono;
  6. void sequentialProgram(int num)
  7. {
  8. for(int i=0;i<num;i++)
  9. {
  10. // std::cout<<"hello world"<<std::endl;
  11. printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
  12. }
  13. }
  14. void parallelProgram(int num) {
  15. #pragma omp parallel sections
  16. {
  17. #pragma omp section
  18. for (int i = 0; i < num/2; i++) {
  19. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  20. printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
  21. }
  22. #pragma omp section
  23. for (int i = num/2; i < num; i++) {
  24. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  25. printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
  26. }
  27. }
  28. }
  29. int main() {
  30. int num=omp_get_num_procs();
  31. auto start_time=std::chrono::steady_clock::now();
  32. sequentialProgram(num);
  33. auto end_time=std::chrono::steady_clock::now();
  34. std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
  35. start_time=std::chrono::steady_clock::now();
  36. parallelProgram(num);
  37. end_time=std::chrono::steady_clock::now();
  38. std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
  39. return 0;
  40. }

测试结果,可以看到 两个for 循环会被分配到各个线程里面独立处理

  1. F:\OpenMP\cmake-build-debug\OpenMP.exe
  2. i=0 the current thread id: 0
  3. i=1 the current thread id: 0
  4. i=2 the current thread id: 0
  5. i=3 the current thread id: 0
  6. i=4 the current thread id: 0
  7. i=5 the current thread id: 0
  8. i=6 the current thread id: 0
  9. i=7 the current thread id: 0
  10. i=8 the current thread id: 0
  11. i=9 the current thread id: 0
  12. i=10 the current thread id: 0
  13. i=11 the current thread id: 0
  14. sequentialProgram elapse time: 0.0373026 seconds
  15. i=0 the current thread id: 1
  16. i=1 the current thread id: 1
  17. i=2 the current thread id: 1
  18. i=3 the current thread id: 1
  19. i=4 the current thread id: 1
  20. i=5 the current thread id: 1
  21. i=6 the current thread id: 0
  22. i=7 the current thread id: 0
  23. i=8 the current thread id: 0
  24. i=9 the current thread id: 0
  25. i=10 the current thread id: 0
  26. i=11 the current thread id: 0
  27. parallelProgram elapse time: 0.0211751 seconds
  28. Process finished with exit code 0

(2)可以同时并行处理两个任务sections

  1. #pragma omp parallel
  2. {
  3. #pragma omp sections
  4. {
  5. #pragma omp section
  6. for (int i = 0; i < num/4; i++) {
  7. .....
  8. }
  9. #pragma omp section
  10. for (int i = num/4; i < num/2; i++) {
  11. .....
  12. }
  13. }
  14. #pragma omp sections
  15. {
  16. #pragma omp section
  17. for (int i = num/2; i < 3*num/4; i++) {
  18. .....
  19. }
  20. #pragma omp section
  21. for (int i = 3*num/4; i < num; i++) {
  22. .....
  23. }
  24. }
  25. }

测试代码

  1. #include <iostream>
  2. #include <omp.h>
  3. #include<chrono>
  4. using namespace std;
  5. using namespace chrono;
  6. void sequentialProgram(int num)
  7. {
  8. for(int i=0;i<num;i++)
  9. {
  10. // std::cout<<"hello world"<<std::endl;
  11. printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
  12. }
  13. }
  14. void parallelProgram(int num) {
  15. #pragma omp parallel
  16. {
  17. #pragma omp sections
  18. {
  19. #pragma omp section
  20. for (int i = 0; i < num/4; i++) {
  21. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  22. printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
  23. }
  24. #pragma omp section
  25. for (int i = num/4; i < num/2; i++) {
  26. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  27. printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
  28. }
  29. }
  30. #pragma omp sections
  31. {
  32. #pragma omp section
  33. for (int i = num/2; i < 3*num/4; i++) {
  34. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  35. printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
  36. }
  37. #pragma omp section
  38. for (int i = 3*num/4; i < num; i++) {
  39. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  40. printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
  41. }
  42. }
  43. }
  44. }
  45. int main() {
  46. int num=omp_get_num_procs();
  47. auto start_time=std::chrono::steady_clock::now();
  48. sequentialProgram(num);
  49. auto end_time=std::chrono::steady_clock::now();
  50. std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
  51. start_time=std::chrono::steady_clock::now();
  52. parallelProgram(num);
  53. end_time=std::chrono::steady_clock::now();
  54. std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
  55. return 0;
  56. }

测试结果,可以看到各个线程是独立运行的,这里发现 各个section还是按照代码的先后顺序执行的&同时还需要让各个section的任务量尽量均衡~

  1. F:\OpenMP\cmake-build-debug\OpenMP.exe
  2. i=0 the current thread id: 0
  3. i=1 the current thread id: 0
  4. i=2 the current thread id: 0
  5. i=3 the current thread id: 0
  6. i=4 the current thread id: 0
  7. i=5 the current thread id: 0
  8. i=6 the current thread id: 0
  9. i=7 the current thread id: 0
  10. i=8 the current thread id: 0
  11. i=9 the current thread id: 0
  12. i=10 the current thread id: 0
  13. i=11 the current thread id: 0
  14. sequentialProgram elapse time: 0.0212041 seconds
  15. i=0 the current thread id: 2
  16. i=1 the current thread id: 2
  17. i=2 the current thread id: 2
  18. i=3 the current thread id: 8
  19. i=4 the current thread id: 8
  20. i=5 the current thread id: 8
  21. i=6 the current thread id: 11
  22. i=7 the current thread id: 11
  23. i=8 the current thread id: 11
  24. i=9 the current thread id: 9
  25. i=10 the current thread id: 9
  26. i=11 the current thread id: 9
  27. parallelProgram elapse time: 0.0202144 seconds
  28. Process finished with exit code 0

(3)单线程运行制导指令 single

  1. #pragma omp parallel
  2. {
  3. #pragma omp single
  4. ....
  5. for (int i = 0; i < num; i++) {
  6. ....
  7. }
  8. };
  9. #pragma omp parallel
  10. {
  11. #pragma omp single
  12. {
  13. ....
  14. }
  15. #pragma omp single nowait
  16. {
  17. ....
  18. }
  19. for (int i = 0; i < num; i++) {
  20. ....
  21. }
  22. };

其中single保证,其限制的任务为单线程执行

测试代码

  1. #include <iostream>
  2. #include <omp.h>
  3. #include<chrono>
  4. using namespace std;
  5. using namespace chrono;
  6. void sequentialProgram(int num)
  7. {
  8. for(int i=0;i<num;i++)
  9. {
  10. // std::cout<<"hello world"<<std::endl;
  11. printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
  12. }
  13. }
  14. void parallelProgram(int num) {
  15. #pragma omp parallel
  16. {
  17. #pragma omp single
  18. printf("i am students the current thread id: %d\n", omp_get_thread_num());
  19. for (int i = 0; i < num; i++) {
  20. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  21. printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
  22. }
  23. };
  24. printf("--------------------\n");
  25. #pragma omp parallel
  26. {
  27. #pragma omp single
  28. {
  29. printf("i am students the current thread id: %d\n", omp_get_thread_num());
  30. }
  31. #pragma omp single nowait
  32. {
  33. printf("i am college the current thread id: %d\n", omp_get_thread_num());
  34. }
  35. for (int i = 0; i < num; i++) {
  36. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  37. printf("i=%d the current thread id: %d\n", i, omp_get_thread_num());
  38. }
  39. };
  40. }
  41. int main() {
  42. int num=1;
  43. auto start_time=std::chrono::steady_clock::now();
  44. sequentialProgram(num);
  45. auto end_time=std::chrono::steady_clock::now();
  46. std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
  47. start_time=std::chrono::steady_clock::now();
  48. parallelProgram(num);
  49. end_time=std::chrono::steady_clock::now();
  50. std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
  51. return 0;
  52. }

其中nowait保证 该线程也是为独立的线程执行,但是并不用等待线程执行结束,别的线程向下执行就行

  1. F:\OpenMP\cmake-build-debug\OpenMP.exe
  2. i=0 the current thread id: 0
  3. sequentialProgram elapse time: 0.0025314 seconds
  4. i am students the current thread id: 1
  5. i=0 the current thread id: 0
  6. i=0 the current thread id: 4
  7. i=0 the current thread id: 5
  8. i=0 the current thread id: 7
  9. i=0 the current thread id: 3
  10. i=0 the current thread id: 8
  11. i=0 the current thread id: 10
  12. i=0 the current thread id: 9
  13. i=0 the current thread id: 2
  14. i=0 the current thread id: 6
  15. i=0 the current thread id: 11
  16. i=0 the current thread id: 1
  17. --------------------
  18. i am students the current thread id: 5
  19. i am college the current thread id: 7
  20. i=0 the current thread id: 7
  21. i=0 the current thread id: 6
  22. i=0 the current thread id: 9
  23. i=0 the current thread id: 11
  24. i=0 the current thread id: 1
  25. i=0 the current thread id: 8
  26. i=0 the current thread id: 0
  27. i=0 the current thread id: 4
  28. i=0 the current thread id: 10
  29. i=0 the current thread id: 2
  30. i=0 the current thread id: 5
  31. i=0 the current thread id: 3
  32. parallelProgram elapse time: 0.0525391 seconds
  33. Process finished with exit code 0

执行结果中这两个线程号(nowait)

  1. i am college the current thread id: 7
  2. i=0 the current thread id: 7

另一种测试方法,在并行的for循环上、section段上都可以使用nowait,不别再等待同步点执行~

  1. #include <iostream>
  2. #include <omp.h>
  3. #include<chrono>
  4. #include<vector>
  5. #include<thread>
  6. using namespace std;
  7. using namespace chrono;
  8. void sequentialProgram(int num)
  9. {
  10. for(int i=0;i<num;i++)
  11. {
  12. // std::cout<<"hello world"<<std::endl;
  13. printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
  14. }
  15. }
  16. void parallelProgram(int num) {
  17. #pragma omp parallel
  18. {
  19. #pragma omp for nowait
  20. for (int i = 0; i < num/2; i++) {
  21. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  22. printf("A i=%d the current thread id: %d\n", i, omp_get_thread_num());
  23. }
  24. #pragma omp for
  25. for (int i = num/2; i < num; i++) {
  26. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  27. printf("B i=%d the current thread id: %d\n", i, omp_get_thread_num());
  28. }
  29. };
  30. }
  31. int main() {
  32. int num=omp_get_num_procs();
  33. auto start_time=std::chrono::steady_clock::now();
  34. sequentialProgram(num);
  35. auto end_time=std::chrono::steady_clock::now();
  36. std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
  37. start_time=std::chrono::steady_clock::now();
  38. parallelProgram(num);
  39. end_time=std::chrono::steady_clock::now();
  40. std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
  41. return 0;
  42. }

测试结果中看出,两个并行for循环,将真正的不存在先后到关系,开始并行执行

  1. F:\OpenMP\cmake-build-debug\OpenMP.exe
  2. i=0 the current thread id: 0
  3. i=1 the current thread id: 0
  4. i=2 the current thread id: 0
  5. i=3 the current thread id: 0
  6. i=4 the current thread id: 0
  7. i=5 the current thread id: 0
  8. i=6 the current thread id: 0
  9. i=7 the current thread id: 0
  10. i=8 the current thread id: 0
  11. i=9 the current thread id: 0
  12. i=10 the current thread id: 0
  13. i=11 the current thread id: 0
  14. sequentialProgram elapse time: 0.0194814 seconds
  15. A i=1 the current thread id: 1
  16. B i=7 the current thread id: 1
  17. A i=2 the current thread id: 2
  18. B i=8 the current thread id: 2
  19. A i=0 the current thread id: 0
  20. B i=6 the current thread id: 0
  21. A i=3 the current thread id: 3
  22. B i=9 the current thread id: 3
  23. A i=4 the current thread id: 4
  24. A i=5 the current thread id: 5
  25. B i=11 the current thread id: 5
  26. B i=10 the current thread id: 4
  27. parallelProgram elapse time: 0.0217952 seconds
  28. Process finished with exit code 0

(4) 设置路障 等待前面的线程运行完成,才能往下运行 barrier

  1. #pragma omp barrier
  2. {
  3. .....
  4. }

测试代码

  1. #include <iostream>
  2. #include <omp.h>
  3. #include<chrono>
  4. #include<vector>
  5. #include<thread>
  6. using namespace std;
  7. using namespace chrono;
  8. void sequentialProgram(int num)
  9. {
  10. for(int i=0;i<num;i++)
  11. {
  12. // std::cout<<"hello world"<<std::endl;
  13. printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
  14. }
  15. }
  16. void parallelProgram(int num) {
  17. #pragma omp parallel
  18. {
  19. #pragma omp for nowait
  20. for (int i = 0; i < num/2; i++) {
  21. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  22. printf("A i=%d the current thread id: %d\n", i, omp_get_thread_num());
  23. }
  24. #pragma omp barrier
  25. {
  26. #pragma omp for
  27. for (int i = num/2; i < num; i++) {
  28. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  29. printf("B i=%d the current thread id: %d\n", i, omp_get_thread_num());
  30. }
  31. }
  32. };
  33. }
  34. int main() {
  35. int num=omp_get_num_procs();
  36. auto start_time=std::chrono::steady_clock::now();
  37. sequentialProgram(num);
  38. auto end_time=std::chrono::steady_clock::now();
  39. std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
  40. start_time=std::chrono::steady_clock::now();
  41. parallelProgram(num);
  42. end_time=std::chrono::steady_clock::now();
  43. std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
  44. return 0;
  45. }

测试结果

  1. F:\OpenMP\cmake-build-debug\OpenMP.exe
  2. i=0 the current thread id: 0
  3. i=1 the current thread id: 0
  4. i=2 the current thread id: 0
  5. i=3 the current thread id: 0
  6. i=4 the current thread id: 0
  7. i=5 the current thread id: 0
  8. i=6 the current thread id: 0
  9. i=7 the current thread id: 0
  10. i=8 the current thread id: 0
  11. i=9 the current thread id: 0
  12. i=10 the current thread id: 0
  13. i=11 the current thread id: 0
  14. sequentialProgram elapse time: 0.0240967 seconds
  15. A i=1 the current thread id: 1
  16. A i=2 the current thread id: 2
  17. A i=4 the current thread id: 4
  18. A i=3 the current thread id: 3
  19. A i=5 the current thread id: 5
  20. A i=0 the current thread id: 0
  21. B i=7 the current thread id: 1
  22. B i=9 the current thread id: 3
  23. B i=11 the current thread id: 5
  24. B i=8 the current thread id: 2
  25. B i=10 the current thread id: 4
  26. B i=6 the current thread id: 0
  27. parallelProgram elapse time: 0.0256972 seconds
  28. Process finished with exit code 0

(5)设置只有一个线程去执行任务

  1. #pragma omp master
  2. for (int i = 0; i < num; i++) {
  3. ......
  4. }

测试代码

  1. #include <iostream>
  2. #include <omp.h>
  3. #include<chrono>
  4. #include<vector>
  5. #include<thread>
  6. using namespace std;
  7. using namespace chrono;
  8. void sequentialProgram(int num)
  9. {
  10. for(int i=0;i<num;i++)
  11. {
  12. // std::cout<<"hello world"<<std::endl;
  13. printf("i=%d the current thread id: %d\n",i,omp_get_thread_num());
  14. }
  15. }
  16. void parallelProgram(int num) {
  17. #pragma omp parallel
  18. {
  19. #pragma omp for
  20. for (int i = 0; i < num; i++) {
  21. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  22. printf("A i=%d the current thread id: %d\n", i, omp_get_thread_num());
  23. }
  24. #pragma omp single
  25. for (int i = 0; i < num; i++) {
  26. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  27. printf("B i=%d the current thread id: %d\n", i, omp_get_thread_num());
  28. }
  29. #pragma omp master
  30. for (int i = 0; i < num; i++) {
  31. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  32. printf("C i=%d the current thread id: %d\n", i, omp_get_thread_num());
  33. }
  34. #pragma omp for ordered
  35. for (int i = 0; i < num; i++) {
  36. //std::cout<<"hello world"<<"the current thread id: "<<omp_get_thread_num()<<std::endl;
  37. printf("D i=%d the current thread id: %d\n", i, omp_get_thread_num());
  38. }
  39. };
  40. }
  41. int main() {
  42. int num=omp_get_num_procs();
  43. auto start_time=std::chrono::steady_clock::now();
  44. sequentialProgram(num);
  45. auto end_time=std::chrono::steady_clock::now();
  46. std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
  47. start_time=std::chrono::steady_clock::now();
  48. parallelProgram(num);
  49. end_time=std::chrono::steady_clock::now();
  50. std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
  51. return 0;
  52. }

测试结果  抛开single的功能点说明,single和master在某种意义很类似都是控制任务为单一线程去完成。

其中的order 可以保证线程依次顺序处理各个任务

  1. #pragma omp for ordered
  2. for (int i = 0; i < num; i++) {
  3. .......
  4. }
  1. F:\OpenMP\cmake-build-debug\OpenMP.exe
  2. i=0 the current thread id: 0
  3. i=1 the current thread id: 0
  4. i=2 the current thread id: 0
  5. i=3 the current thread id: 0
  6. i=4 the current thread id: 0
  7. i=5 the current thread id: 0
  8. i=6 the current thread id: 0
  9. i=7 the current thread id: 0
  10. i=8 the current thread id: 0
  11. i=9 the current thread id: 0
  12. i=10 the current thread id: 0
  13. i=11 the current thread id: 0
  14. sequentialProgram elapse time: 0.0232429 seconds
  15. A i=1 the current thread id: 1
  16. A i=2 the current thread id: 2
  17. A i=3 the current thread id: 3
  18. A i=5 the current thread id: 5
  19. A i=11 the current thread id: 11
  20. A i=0 the current thread id: 0
  21. A i=4 the current thread id: 4
  22. A i=8 the current thread id: 8
  23. A i=6 the current thread id: 6
  24. A i=7 the current thread id: 7
  25. A i=9 the current thread id: 9
  26. A i=10 the current thread id: 10
  27. B i=0 the current thread id: 1
  28. B i=1 the current thread id: 1
  29. B i=2 the current thread id: 1
  30. B i=3 the current thread id: 1
  31. B i=4 the current thread id: 1
  32. B i=5 the current thread id: 1
  33. B i=6 the current thread id: 1
  34. B i=7 the current thread id: 1
  35. B i=8 the current thread id: 1
  36. B i=9 the current thread id: 1
  37. B i=10 the current thread id: 1
  38. B i=11 the current thread id: 1
  39. C i=0 the current thread id: 0
  40. C i=1 the current thread id: 0
  41. C i=2 the current thread id: 0
  42. C i=3 the current thread id: 0
  43. C i=4 the current thread id: 0
  44. C i=5 the current thread id: 0
  45. C i=6 the current thread id: 0
  46. C i=7 the current thread id: 0
  47. C i=8 the current thread id: 0
  48. C i=9 the current thread id: 0
  49. C i=10 the current thread id: 0
  50. C i=11 the current thread id: 0
  51. D i=0 the current thread id: 0
  52. D i=8 the current thread id: 8
  53. D i=2 the current thread id: 2
  54. D i=1 the current thread id: 1
  55. D i=9 the current thread id: 9
  56. D i=3 the current thread id: 3
  57. D i=5 the current thread id: 5
  58. D i=4 the current thread id: 4
  59. D i=11 the current thread id: 11
  60. D i=7 the current thread id: 7
  61. D i=10 the current thread id: 10
  62. D i=6 the current thread id: 6
  63. parallelProgram elapse time: 0.112705 seconds
  64. Process finished with exit code 0

 测试ncnn 提供的yolov5.cpp源码 每次测试都不一样,好像大部分修改的快一点

https://github.com/Tencent/ncnn/blob/master/examples/yolov5.cpp

代码片段

  1. // anchor setting from yolov5/models/yolov5s.yaml
  2. auto start_time=std::chrono::steady_clock::now();
  3. //#pragma omp parallel sections firstprivate(ex)
  4. {
  5. //#pragma omp section
  6. // stride 8
  7. {
  8. ncnn::Mat out;
  9. ex.extract("output", out);
  10. ncnn::Mat anchors(6);
  11. anchors[0] = 10.f;
  12. anchors[1] = 13.f;
  13. anchors[2] = 16.f;
  14. anchors[3] = 30.f;
  15. anchors[4] = 33.f;
  16. anchors[5] = 23.f;
  17. generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects);
  18. proposals.insert(proposals.end(), objects.begin(), objects.end());
  19. printf("the current thread id: %d\n",omp_get_thread_num());
  20. }
  21. //#pragma omp section
  22. // stride 16
  23. {
  24. ncnn::Mat out;
  25. ex.extract("781", out);
  26. ncnn::Mat anchors(6);
  27. anchors[0] = 30.f;
  28. anchors[1] = 61.f;
  29. anchors[2] = 62.f;
  30. anchors[3] = 45.f;
  31. anchors[4] = 59.f;
  32. anchors[5] = 119.f;
  33. generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects);
  34. proposals.insert(proposals.end(), objects.begin(), objects.end());
  35. printf("the current thread id: %d\n",omp_get_thread_num());
  36. }
  37. //#pragma omp section
  38. // stride 32
  39. {
  40. ncnn::Mat out;
  41. ex.extract("801", out);
  42. ncnn::Mat anchors(6);
  43. anchors[0] = 116.f;
  44. anchors[1] = 90.f;
  45. anchors[2] = 156.f;
  46. anchors[3] = 198.f;
  47. anchors[4] = 373.f;
  48. anchors[5] = 326.f;
  49. generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects);
  50. proposals.insert(proposals.end(), objects.begin(), objects.end());
  51. printf("the current thread id: %d\n",omp_get_thread_num());
  52. }
  53. }
  54. // sort all proposals by score from highest to lowest
  55. auto end_time=std::chrono::steady_clock::now();
  56. std::cout<<"output elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;

测试时间

  1. the current thread id: 0
  2. the current thread id: 0
  3. the current thread id: 0
  4. output elapse time: 0.278379 seconds
  5. yolov5s elapse time: 0.338553 seconds
  6. 15 = 0.54197 at 256.27 15.57 826.90 x 603.65

修改代码

  1. auto start_time=std::chrono::steady_clock::now();
  2. ncnn::Mat out0;
  3. ex.extract("output", out0);
  4. ncnn::Mat out1;
  5. ex.extract("781", out1);
  6. ncnn::Mat out2;
  7. ex.extract("801", out2);
  8. #pragma omp parallel sections
  9. {
  10. #pragma omp section
  11. // stride 8
  12. {
  13. ncnn::Mat anchors(6);
  14. anchors[0] = 10.f;
  15. anchors[1] = 13.f;
  16. anchors[2] = 16.f;
  17. anchors[3] = 30.f;
  18. anchors[4] = 33.f;
  19. anchors[5] = 23.f;
  20. generate_proposals(anchors, 8, in_pad, out0, prob_threshold, objects);
  21. proposals.insert(proposals.end(), objects.begin(), objects.end());
  22. printf("the current thread id: %d\n",omp_get_thread_num());
  23. }
  24. #pragma omp section
  25. // stride 16
  26. {
  27. ncnn::Mat anchors(6);
  28. anchors[0] = 30.f;
  29. anchors[1] = 61.f;
  30. anchors[2] = 62.f;
  31. anchors[3] = 45.f;
  32. anchors[4] = 59.f;
  33. anchors[5] = 119.f;
  34. generate_proposals(anchors, 16, in_pad, out1, prob_threshold, objects);
  35. proposals.insert(proposals.end(), objects.begin(), objects.end());
  36. printf("the current thread id: %d\n",omp_get_thread_num());
  37. }
  38. #pragma omp section
  39. // stride 32
  40. {
  41. ncnn::Mat anchors(6);
  42. anchors[0] = 116.f;
  43. anchors[1] = 90.f;
  44. anchors[2] = 156.f;
  45. anchors[3] = 198.f;
  46. anchors[4] = 373.f;
  47. anchors[5] = 326.f;
  48. generate_proposals(anchors, 32, in_pad, out2, prob_threshold, objects);
  49. proposals.insert(proposals.end(), objects.begin(), objects.end());
  50. printf("the current thread id: %d\n",omp_get_thread_num());
  51. }
  52. }
  53. // sort all proposals by score from highest to lowest
  54. auto end_time=std::chrono::steady_clock::now();
  55. std::cout<<"output elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;

测试时间 好像大部分比原来的快 ~

  1. F:\window10_yolo5_mingw32\cmake-build-debug\window10_yolo5_mingw32.exe
  2. the current thread id: 5
  3. the current thread id: 3
  4. the current thread id: 11
  5. output elapse time: 0.244162 seconds
  6. yolov5s elapse time: 0.303863 seconds
  7. 15 = 0.54197 at 256.27 15.57 826.90 x 603.65
  8. Process finished with exit cod

这样修改 耗时比较长

  1. auto start_time=std::chrono::steady_clock::now();
  2. #pragma omp parallel sections firstprivate(ex)
  3. {
  4. #pragma omp section
  5. // stride 8
  6. {
  7. ncnn::Mat out;
  8. ex.extract("output", out);
  9. ncnn::Mat anchors(6);
  10. anchors[0] = 10.f;
  11. anchors[1] = 13.f;
  12. anchors[2] = 16.f;
  13. anchors[3] = 30.f;
  14. anchors[4] = 33.f;
  15. anchors[5] = 23.f;
  16. generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects);
  17. proposals.insert(proposals.end(), objects.begin(), objects.end());
  18. printf("the current thread id: %d\n",omp_get_thread_num());
  19. }
  20. #pragma omp section
  21. // stride 16
  22. {
  23. ncnn::Mat out;
  24. ex.extract("781", out);
  25. ncnn::Mat anchors(6);
  26. anchors[0] = 30.f;
  27. anchors[1] = 61.f;
  28. anchors[2] = 62.f;
  29. anchors[3] = 45.f;
  30. anchors[4] = 59.f;
  31. anchors[5] = 119.f;
  32. generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects);
  33. proposals.insert(proposals.end(), objects.begin(), objects.end());
  34. printf("the current thread id: %d\n",omp_get_thread_num());
  35. }
  36. #pragma omp section
  37. // stride 32
  38. {
  39. ncnn::Mat out;
  40. ex.extract("801", out);
  41. ncnn::Mat anchors(6);
  42. anchors[0] = 116.f;
  43. anchors[1] = 90.f;
  44. anchors[2] = 156.f;
  45. anchors[3] = 198.f;
  46. anchors[4] = 373.f;
  47. anchors[5] = 326.f;
  48. generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects);
  49. proposals.insert(proposals.end(), objects.begin(), objects.end());
  50. printf("the current thread id: %d\n",omp_get_thread_num());
  51. }
  52. }
  53. // sort all proposals by score from highest to lowest
  54. auto end_time=std::chrono::steady_clock::now();
  55. std::cout<<"output elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;

测试时间

  1. F:\window10_yolo5_mingw32\cmake-build-debug\window10_yolo5_mingw32.exe
  2. the current thread id: 1
  3. the current thread id: 0
  4. the current thread id: 7
  5. output elapse time: 0.829006 seconds
  6. yolov5s elapse time: 0.895948 seconds
  7. 15 = 0.54197 at 256.27 15.57 826.90 x 603.65

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/AllinToyou/article/detail/265594
推荐阅读
相关标签
  

闽ICP备14008679号