diff --git a/dgx-2-shmem-test.job b/dgx-2-shmem-test.job index 57e8e88..39e368a 100644 --- a/dgx-2-shmem-test.job +++ b/dgx-2-shmem-test.job @@ -3,14 +3,13 @@ #SBATCH -N 1 # number of nodes #SBATCH -n 1 # number of cores #SBATCH -w g001 # DGX-2 node -#SBATCH --gres=gpu:2 # number of V100's +#SBATCH --gres=gpu:1 # number of V100's #SBATCH -t 0-00:10 # time (D-HH:MM) #SBATCH -o slurm.%N.%j.out # STDOUT #SBATCH -e slurm.%N.%j.err # STDERR ulimit -s 10240 module load slurm -module load openmpi/4.0.1 module load cuda10.1/toolkit/10.1.243 # Check how many gpu's your job got @@ -24,11 +23,11 @@ cp -r . /work/$USER/ShallowWaterGPU # Run job # (Assumes Miniconda is installed in user root dir.) cd /work/$USER/ShallowWaterGPU -mpirun --mca btl_openib_if_include mlx5_0 --mca btl_openib_warn_no_device_params_found 0 $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 shmemTesting.py +nvprof -o profiler_output $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 shmemTesting.py cd $HOME/src/ShallowWaterGPU ## Copy files from work directory: # (NOTE: Copying is not performed if job fails!) cp /work/$USER/ShallowWaterGPU/*.log . cp /work/$USER/ShallowWaterGPU/*.nc . - +cp /work/$USER/ShallowWaterGPU/profiler_output .