mirror of
				https://github.com/smyalygames/FiniteVolumeGPU.git
				synced 2025-10-31 13:07:41 +01:00 
			
		
		
		
	Adjusting order of substep ops and benchmark scripts
This commit is contained in:
		
							parent
							
								
									acb7d2ab39
								
							
						
					
					
						commit
						c7e6f17445
					
				| @ -27,7 +27,7 @@ from mpi4py import MPI | ||||
| import time | ||||
| 
 | ||||
| import pycuda.driver as cuda | ||||
| import nvtx | ||||
| #import nvtx | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| @ -319,31 +319,30 @@ class MPISimulator(Simulator.BaseSimulator): | ||||
|         self.upload_for_exchange(self.sim.u0) | ||||
|      | ||||
|     def substep(self, dt, step_number): | ||||
|         nvtx.mark("substep start", color="red") | ||||
|         #nvtx.mark("substep start", color="red") | ||||
| 
 | ||||
|         self.profiling_data_mpi["start"]["t_step_mpi"] += time.time() | ||||
|         nvtx.mark("substep internal", color="red") | ||||
|         self.sim.substep(dt, step_number, internal=True, external=False) # "internal ghost cells" excluded | ||||
|         self.profiling_data_mpi["end"]["t_step_mpi"] += time.time() | ||||
| 
 | ||||
|         self.profiling_data_mpi["start"]["t_step_mpi"] += time.time() | ||||
|         nvtx.mark("substep external", color="blue") | ||||
|         #nvtx.mark("substep external", color="blue") | ||||
|         self.sim.substep(dt, step_number, external=True, internal=False) # only "internal ghost cells" | ||||
|         self.profiling_data_mpi["end"]["t_step_mpi"] += time.time() | ||||
| 
 | ||||
|         # NOTE: Need to download from u1, as u0<->u1 switch is not performed yet | ||||
|         nvtx.mark("download", color="red") | ||||
|         #nvtx.mark("substep internal", color="red") | ||||
|         self.sim.substep(dt, step_number, internal=True, external=False) # "internal ghost cells" excluded | ||||
|          | ||||
|         #nvtx.mark("download", color="red") | ||||
|         self.sim.swapBuffers() | ||||
|         self.download_for_exchange(self.sim.u0) | ||||
| 
 | ||||
|         nvtx.mark("sync", color="red") | ||||
|         #nvtx.mark("sync", color="red") | ||||
|         self.sim.stream.synchronize() | ||||
|         nvtx.mark("MPI", color="green") | ||||
|         #nvtx.mark("MPI", color="green") | ||||
|         self.profiling_data_mpi["end"]["t_step_mpi"] += time.time() | ||||
|         self.exchange() | ||||
|         nvtx.mark("upload", color="red") | ||||
|         self.profiling_data_mpi["start"]["t_step_mpi"] += time.time() | ||||
|         #nvtx.mark("upload", color="red") | ||||
|         self.upload_for_exchange(self.sim.u0) | ||||
| 
 | ||||
|         self.sim.internal_stream.synchronize() | ||||
|         self.profiling_data_mpi["end"]["t_step_mpi"] += time.time() | ||||
|          | ||||
|         self.profiling_data_mpi["n_time_steps"] += 1 | ||||
| 
 | ||||
|  | ||||
| @ -15,6 +15,8 @@ module load openmpi4-cuda11.2-ofed50-gcc8/4.1.0 | ||||
| # Check how many gpu's your job got | ||||
| #nvidia-smi | ||||
| 
 | ||||
| mkdir -p output_dgx-2/$NOW | ||||
| 
 | ||||
| ## Copy input files to the work directory: | ||||
| mkdir -p /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU | ||||
| cp -r . /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU | ||||
| @ -23,14 +25,16 @@ cp -r . /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU | ||||
| # (Assumes Miniconda is installed in user root dir.) | ||||
| cd /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU | ||||
| #mpirun --mca btl_openib_if_include mlx5_0 --mca btl_openib_warn_no_device_params_found 0 $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile | ||||
| #nsys profile -t nvtx,cuda mpirun -np  $SLURM_NTASKS numactl --cpunodebind=0 --localalloc $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile | ||||
| mpirun -np  $SLURM_NTASKS numactl --cpunodebind=0 --localalloc $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile | ||||
| cd $HOME/src/ShallowWaterGPU | ||||
| 
 | ||||
| ## Copy files from work directory: | ||||
| # (NOTE: Copying is not performed if job fails!) | ||||
| mkdir -p output/$SLURM_JOB_ID | ||||
| mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.log ./output/$SLURM_JOB_ID | ||||
| mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.nc ./output/$SLURM_JOB_ID | ||||
| mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.json . | ||||
| mkdir -p output_dgx-2/$NOW/$SLURM_JOB_ID | ||||
| mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.log ./output_dgx-2/$NOW/$SLURM_JOB_ID | ||||
| mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.nc ./output_dgx-2/$NOW/$SLURM_JOB_ID | ||||
| mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.json ./output_dgx-2/$NOW | ||||
| mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.qdrep ./output_dgx-2/$NOW | ||||
| 
 | ||||
| rm -rf /work/$USER/$SLURM_JOB_ID | ||||
|  | ||||
| @ -1,11 +1,22 @@ | ||||
| #!/bin/bash | ||||
| 
 | ||||
| TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S") | ||||
| 
 | ||||
| # one node: 1-8 GPUs | ||||
| sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=4096,NY=4096 dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=4096,NY=2048 dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=4096,NY=1365 dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=4096,NY=1024 dgx-2_strong_scaling_benchmark.job | ||||
| #sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=4096,NY=819 dgx-2_strong_scaling_benchmark.job | ||||
| #sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=4096,NY=683 dgx-2_strong_scaling_benchmark.job | ||||
| #sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=4096,NY=585 dgx-2_strong_scaling_benchmark.job | ||||
| #sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=4096,NY=512 dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=4096,NY=4096,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=4096,NY=2048,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=4096,NY=1365,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=4096,NY=1024,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=4096,NY=819,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=4096,NY=683,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=4096,NY=585,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=4096,NY=512,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| 
 | ||||
| sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=4096,NY=512,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=4096,NY=512,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=4096,NY=512,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=4096,NY=512,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=4096,NY=512,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=4096,NY=512,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=4096,NY=512,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
| sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=4096,NY=512,NOW=$TIMESTAMP dgx-2_strong_scaling_benchmark.job | ||||
|  | ||||
| @ -119,7 +119,8 @@ dt = 0.00001 | ||||
| gamma = 1.4 | ||||
| #save_times = np.linspace(0, 0.000009, 2) | ||||
| #save_times = np.linspace(0, 0.000099, 11) | ||||
| save_times = np.linspace(0, 0.000099, 2) | ||||
| #save_times = np.linspace(0, 0.000099, 2) | ||||
| save_times = np.linspace(0, 0.000999, 2) | ||||
| outfile = "mpi_out_" + str(MPI.COMM_WORLD.rank) + ".nc" | ||||
| save_var_names = ['rho', 'rho_u', 'rho_v', 'E'] | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Martin Lilleeng Sætra
						Martin Lilleeng Sætra