OpenMP(Open Multi-Processing)是一种用于共享内存并行系统的多线程程序设计方案,支持的编程语言包括C、C++和Fortran。OpenMP提供了对并行算法的高层抽象描述,通过线程实现并行化,特别适合在多核CPU机器上的并行程序设计。编译器根据程序中添加的pragma指令,自动将程序并行处理,使用OpenMP降低了并行编程的难度和复杂度。当编译器不支持OpenMP时,程序会退化成普通(串行)程序。程序中已有的OpenMP指令不会影响程序的正常编译运行。
Fork(派生):主线程(master thread)创建一组并行化执行的线程;
Join(合并):当线程完成工作后,它们会进行同步与终止,只剩下master thread。
在VS中启用OpenMP,在项目上右键 -> 属性 -> 配置属性 ->C/C++ -> 语言 -> OpenMP支持,选择“是”即可。
编译制导指令以#pragma omp开始,后面根具体的功能指令,格式如:#pragma omp 指令[子句,[子句]…]。常用的功能指令如下:
#include<stdio.h> #include<stdlib.h> #include<omp.h> void hello(void) { //返回线程号 int my_rank = omp_get_thread_num(); //返回当前并行区域中的线程数 int thread_count = omp_get_num_threads(); printf("Hello from thread %d of %d. \n", my_rank, thread_count); } int main(int argc, char** argv) { int thread_count = strtol(argv[1],NULL,10); //num_threads指定要用多少个线程来实现hello #pragma omp parallel num_threads(thread_count) { hello(); } return 0; }
#pragma omp parallel for它告诉编译器,接下来的for循环,将会使用并行的方式执行,使用并行的时候需要满足以下四个需求:
#include<stdio.h> #include<stdlib.h> #include<omp.h> int main(int argc, char** argv) { int thread_count = strtol(argv[1],NULL,10); double a[10], b[10]; for (int i = 0; i < 10; i++) { a[i] = i; b[i] = 10 - i; } double s[10]; #pragma omp parallel for num_threads(thread_count) for (int i = 0; i < 10; i++){ s[i] = a[i] + b[i]; printf("线程编号%d: 结果%lf\n",omp_get_thread_num(), s[i]); } return 0; }
Private在#pragma中作为一个可选的,附加的选项,它能够直接的告诉编译器去使得共享变量作为每个线程中的私有变量。它的形式为 #pragma omp … private(< variable list >)
#include<stdio.h> #include<stdlib.h> #include<omp.h> int main(int argc, char** argv) { int thread_count = strtol(argv[1],NULL,10); double tmp = 1.0f; #pragma omp parallel for num_threads(thread_count) private(tmp) for (int i = 0; i < 10; i++) { tmp = 2.0f; printf("线程编号%d: 结果%lf\n", omp_get_thread_num(), tmp); } printf("tmp: %lf\n",tmp); return 0; }
#include<stdio.h> #include<stdlib.h> #include<omp.h> int main(int argc, char** argv) { int thread_count = strtol(argv[1],NULL,10); int j = 4; #pragma omp parallel for num_threads(thread_count) firstprivate(j) for (int i = 0; i < 10; i++) { j++; int my_rank = omp_get_thread_num(); int thread_count = omp_get_num_threads(); printf("thread %d of %d has j = %d.\n", my_rank,thread_count,j); } printf("tmp: %d\n",j); return 0; }
#include<stdio.h> #include<stdlib.h> #include<omp.h> int main(int argc, char** argv) { int thread_count = strtol(argv[1],NULL,10); int j; #pragma omp parallel for num_threads(thread_count) lastprivate(j) for (int i = 0; i < 10; i++) { j = 4; j++; int my_rank = omp_get_thread_num(); int thread_count = omp_get_num_threads(); printf("thread %d of %d has j = %d.\n", my_rank,thread_count,j); } printf("tmp: %d\n",j); return 0; }
#include<stdio.h> #include<stdlib.h> #include<omp.h> int main(int argc, char** argv) { int thread_count = strtol(argv[1],NULL,10); # pragma omp parallel num_threads(thread_count) { # pragma omp sections { # pragma omp section { int my_rank = omp_get_thread_num(); int thread_count = omp_get_num_threads(); printf("thread %d of %d.\n", my_rank, thread_count); } # pragma omp section { int my_rank = omp_get_thread_num(); int thread_count = omp_get_num_threads(); printf("thread %d of %d.\n", my_rank, thread_count); } # pragma omp section { int my_rank = omp_get_thread_num(); int thread_count = omp_get_num_threads(); printf("thread %d of %d.\n", my_rank, thread_count); } # pragma omp section { int my_rank = omp_get_thread_num(); int thread_count = omp_get_num_threads(); printf("thread %d of %d.\n", my_rank, thread_count); } } } return 0; }
Operator | Initial Value |
+ | 0 |
* | 1 |
- | 0 |
^ | 0 |
& | ~0 |
丨 | 0 |
&& | 1 |
丨丨 | 0 |
#include<stdio.h> #include<stdlib.h> #include<omp.h> int main(int argc, char** argv) { int thread_count = strtol(argv[1],NULL,10); int j = 10; printf("j = %d\n", j); # pragma omp parallel num_threads(thread_count) reduction(+:j) { # pragma omp sections { # pragma omp section { int my_rank = omp_get_thread_num(); int thread_count = omp_get_num_threads(); j = j + 10; printf("Hello from thread %d of %d in section 1 and j = %d.\n", my_rank, thread_count,j); } # pragma omp section { int my_rank = omp_get_thread_num(); int thread_count = omp_get_num_threads(); j = j + 20; printf("Hello from thread %d of %d in section 2 and j = %d.\n", my_rank, thread_count, j); } } } printf("j = %d\n",j); return 0; }
#pragma omp single nowait
nthreads = omp_get_num_threads();
printf("number of threads = %d\n",nthreads);
tid = omp_get_thread_num();
if(tid == 0){
nthreads = omp_get_num_threads();
printf("number of threads = %d\n",nthreads);
#pragma omp master
nthreads = omp_get_num_threads();
printf("number of threads = %d\n",nthreads);
tid = omp_get_thread_num();
if(tid == 0){
nthreads = omp_get_num_threads();
printf("number of threads = %d\n",nthreads);
它是用于实现同步的一种手段,会在代码的某个点,令线程停下直到所有的线程都到达该地方。使用的语法如下:#pragma omp barrier。许多情况下,它已经能够自动的插入到工作区结尾,比如在for,single中,但是它能够被nowait禁用。
#include<stdio.h> #include<stdlib.h> #include<omp.h> int main(int argc, char** argv) { int thread_count = strtol(argv[1],NULL,10); printf("使用barrier输出的结果:\n"); # pragma omp parallel num_threads(thread_count) { int my_rank = omp_get_thread_num(); int thread_count = omp_get_num_threads(); printf("hi from %d\n", my_rank); # pragma omp barrier if (my_rank == 0) { printf("%d threads say hi\n", thread_count); } } printf("不使用barrier输出的结果:\n"); # pragma omp parallel num_threads(thread_count) { int my_rank = omp_get_thread_num(); int thread_count = omp_get_num_threads(); printf("hi from %d\n", my_rank); //# pragma omp barrier if (my_rank == 0) { printf("%d threads say hi\n", thread_count); } } return 0; }
nowait在OpenMP中,用于打断自动添加的barrier的类型,如parallel中的for以及single,用法如下:#pragma omp for nowait 、#pragma omp single nowait。
#include<stdio.h> #include<stdlib.h> #include<omp.h> #include<windows.h> int main(int argc, char** argv) { int thread_count = strtol(argv[1],NULL,10); printf("使用nowait输出的结果:\n"); # pragma omp parallel num_threads(thread_count) { int my_rank = omp_get_thread_num(); int thread_count = omp_get_num_threads(); # pragma omp for nowait for (int i = 0; i < 10; i++) { if (my_rank == 0) Sleep(3000); printf("Hello from thread %d of %d i = %d.\n", my_rank, thread_count, i); } # pragma omp single nowait { printf("thread %d process single\n", my_rank); Sleep(3000); } printf("thread %d ok\n", my_rank); } printf("不使用nowait输出的结果:\n"); # pragma omp parallel num_threads(thread_count) { int my_rank = omp_get_thread_num(); int thread_count = omp_get_num_threads(); # pragma omp for for (int i = 0; i < 10; i++) { if (my_rank == 0) Sleep(3000); printf("Hello from thread %d of %d i = %d.\n", my_rank, thread_count, i); } # pragma omp single printf("thread %d process single\n", my_rank); Sleep(3000); printf("thread %d ok\n", my_rank); } return 0; }
OpenMP提供了一个实现互斥的接口:critical,它告诉编译器解析来的一段代码在同一个时间段将会只由一个线程进行,使用方法如下:#pragma omp critical。好处是解决了竞争现象;坏处是使用critical会让程序执行减少并行化程序,而且必须要写代码的人手动判断哪些部分需要用critical。
#include<stdio.h> #include<stdlib.h> #include<omp.h> int main(int argc, char** argv) { int thread_count = strtol(argv[1],NULL,10); int j = 0; # pragma omp parallel for num_threads(thread_count) shared(j) for (int i = 0; i < 10; i++) { int my_rank = omp_get_thread_num(); int thread_count = omp_get_num_threads(); # pragma omp critical { j = j + 1; printf("Hello from thread %d of %d i = %d,j = %d\n", my_rank, thread_count, i, j); } } return 0; }
在特殊的情况下,除了使用critical指令控制临界区以外,我们还可以使用其他选项去保证内存的控制是原子的,OpenMP提供了一个选项:atomic(原子),它只在特殊的情况下使用:在自增或自减的情况下使用;在二元操作数的情况下使用。并且其只会应用于一条指令。使用方法如下:#pragma omp atomic。
#include<omp.h> #include<windows.h> #include<time.h> void delay() { int t = 50; int my_rank = omp_get_thread_num(); t = t * (my_rank + 1); Sleep(t); } int main(int argc, char** argv) { int thread_count = strtol(argv[1],NULL,10); int j = 0; srand(time(NULL)); clock_t start, end; start = clock(); # pragma omp parallel for num_threads(thread_count) schedule(static,2) for (int i = 0; i < 100; i++) { delay(); } end = clock(); printf("first loop finish,time: %f\n", (double)end - start); start = clock(); # pragma omp parallel for num_threads(thread_count) schedule(dynamic,2) for (int i = 0; i < 100; i++) { delay(); } end = clock(); printf("second loop finish,time: %f\n", (double)end - start); start = clock(); # pragma omp parallel for num_threads(thread_count) schedule(guided,2) for (int i = 0; i < 100; i++) { delay(); } end = clock(); printf("third loop finish,time: %f\n", (double)end - start); return 0; }
函数名 | 函数作用 |
omp_in_parallel | 判断当前是否在并行域中 |
omp_get_thread_num | 返回线程号 |
omp_set_num_threads | 设置后续并行域中的线程格式 |
omp_get_num_threads | 返回当前并行区域中的线程数 |
omp_get_max_threads | 获取并行域可用的最大线程数目 |
omp_get_num_procs | 返回系统中处理器的个数 |
omp_get_dynamic | 判断是否支持动态改变线程数目 |
omp_set_dynamic | 启用或关闭线程数目的动态改变 |
omp_get_nested | 判断系统是否支持并行嵌套 |
omp_set_nested | 启用或关闭并行嵌套 |
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。