并行计算实验(2)——使用OpenMP
April 12, 2014
使用第一次实验代码
- 当 
x_dimy_dimz_dim= 500,t_steps= 3 时 
- 使 
x_dimy_dimz_dim= 1000 
- 使 
x_dimy_dimz_dim= 1500 
- 使 
t_steps= 30 
- 使 
t_steps= 300 
初步使用openMP
先直接使用openMP进行测试
	for (int t = 0; t < t_steps; t++) {
		for (int x = 1; x < x_dim_add_1; x++) {
			for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				#pragma omp parallel for
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}
		temp = out;
		out = in;
		in = temp;
	}再进行修改
	for (int t = 0; t < t_steps; t++) {
		for (int x = 1; x < x_dim_add_1; x++) {
			#pragma omp parallel for
			for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}
		temp = out;
		out = in;
		in = temp;
	}	for (int t = 0; t < t_steps; t++) {
	#pragma omp parallel for
		for (int x = 1; x < x_dim_add_1; x++) {
			for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}
		temp = out;
		out = in;
		in = temp;
	}可以看出,初始化反而占了大多数时间,修改初始化double数组操作
- 当 
x_dimy_dimz_dim= 500,t_steps= 3 时
  
     - 当 
x_dimy_dimz_dim= 1000,t_steps= 3 时
  
     - 当 
x_dimy_dimz_dim= 1000,t_steps= 30 时
  
     - 当 
x_dimy_dimz_dim= 1500,t_steps= 30 时
  
     
修改openmp调度
		#pragma omp parallel for schedule(static, 1)
		for (int x = 1; x < x_dim_add_1; x++) {	
			for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}		#pragma omp parallel for schedule(static, 3)
		for (int x = 1; x < x_dim_add_1; x++) {	
			for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}		for (int x = 1; x < x_dim_add_1; x++) {	
		#pragma omp parallel for schedule(static, 1)			
		for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}进行动态调度
		#pragma omp parallel for schedule(dynamic, 1)
		for (int x = 1; x < x_dim_add_1; x++) {	
			for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}