Twistronics Blog

并行计算实验(2)——使用OpenMP

April 12, 2014

使用第一次实验代码

  • x_dim y_dim z_dim = 500,t_steps = 3 时

3

  • 使 x_dim y_dim z_dim = 1000

5

  • 使 x_dim y_dim z_dim = 1500

6

  • 使 t_steps = 30

7

  • 使 t_steps = 300

8

初步使用openMP

先直接使用openMP进行测试

	for (int t = 0; t < t_steps; t++) {

		for (int x = 1; x < x_dim_add_1; x++) {
			for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				#pragma omp parallel for
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}

		temp = out;
		out = in;
		in = temp;
	}
  • x_dim y_dim z_dim = 500,t_steps = 3 时 2 1 可以看到,速度大幅下降,暴力并行不可取

再进行修改

	for (int t = 0; t < t_steps; t++) {

		for (int x = 1; x < x_dim_add_1; x++) {
			#pragma omp parallel for
			for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}

		temp = out;
		out = in;
		in = temp;
	}
  • x_dim y_dim z_dim = 500,t_steps = 3 时 2 2 速度有所提升
	for (int t = 0; t < t_steps; t++) {
	#pragma omp parallel for
		for (int x = 1; x < x_dim_add_1; x++) {
			for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}

		temp = out;
		out = in;
		in = temp;
	}
  • x_dim y_dim z_dim = 500,t_steps = 3 时 2 3 速度变化不大

可以看出,初始化反而占了大多数时间,修改初始化double数组操作

  • x_dim y_dim z_dim = 500,t_steps = 3 时 2 4
  • x_dim y_dim z_dim = 1000,t_steps = 3 时 2 5
  • x_dim y_dim z_dim = 1000,t_steps = 30 时 2 6
  • x_dim y_dim z_dim = 1500,t_steps = 30 时 2 7

修改openmp调度

		#pragma omp parallel for schedule(static, 1)
		for (int x = 1; x < x_dim_add_1; x++) {	

			for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}
  • x_dim y_dim z_dim = 1000,t_steps = 30 时 2 8 速度有所提升
		#pragma omp parallel for schedule(static, 3)
		for (int x = 1; x < x_dim_add_1; x++) {	

			for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}
  • 此时 2 9 速度下降 或许是存在空闲cpu的原因
		for (int x = 1; x < x_dim_add_1; x++) {	

		#pragma omp parallel for schedule(static, 1)			
		for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}
  • 此时 2 10 线程空闲时间减少,但是调度时间增加,反而速度下降

进行动态调度

		#pragma omp parallel for schedule(dynamic, 1)
		for (int x = 1; x < x_dim_add_1; x++) {	

			for (int y = 1; y < y_dim_add_1; y++) {
				#pragma ivdep
				for (int z = 1; z < z_dim_add_1; z++) {
					out[x][y][z] = 0.4 * in[x][y][z]
						+ 0.1 * (in[x - 1][y][z] + in[x + 1][y][z]
							+ in[x][y - 1][z] + in[x][y + 1][z]
							+ in[x][y][z - 1] + in[x][y][z + 1]);
				}
			}
		}
  • 此时 2 11 速度又有所提升
  • x_dim y_dim z_dim = 1500,t_steps = 30 时 2 12