mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 14:34:13 +02:00
Merge pull request #9 from setmar/master
Conda env and job scripts for DGX-2 (Simula) and Saga (Sigma2)
This commit is contained in:
commit
402e2c6f9f
22
conda_environment_hpc.yml
Normal file
22
conda_environment_hpc.yml
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
# Assumes that conda, pip, build-essentials and cuda are installed
|
||||||
|
---
|
||||||
|
name: ShallowWaterGPU_HPC
|
||||||
|
channels:
|
||||||
|
- conda-forge
|
||||||
|
|
||||||
|
dependencies:
|
||||||
|
- python=3.7
|
||||||
|
- numpy
|
||||||
|
- mpi4py
|
||||||
|
- six
|
||||||
|
- pytools
|
||||||
|
- netcdf4
|
||||||
|
- scipy
|
||||||
|
|
||||||
|
# Install conda environment (one-time operation):
|
||||||
|
# $ conda env create -f conda_environment_hpc.yml
|
||||||
|
# Activate environment and install the following packages using pip:
|
||||||
|
# $ conda activate ShallowWaterGPU
|
||||||
|
# - pycuda: $ pip3 install --no-deps -U pycuda
|
||||||
|
# on Windows: make sure your visual studio c++ compiler is available in PATH
|
||||||
|
# PATH should have something like C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\
|
34
dgx-2-test.job
Normal file
34
dgx-2-test.job
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#SBATCH -p dgx2q # partition (GPU queue)
|
||||||
|
#SBATCH -N 1 # number of nodes
|
||||||
|
#SBATCH -n 4 # number of cores
|
||||||
|
#SBATCH -w g001 # DGX-2 node
|
||||||
|
#SBATCH --gres=gpu:4 # number of V100's
|
||||||
|
#SBATCH --mem 10G # memory pool for all cores
|
||||||
|
#SBATCH -t 0-00:10 # time (D-HH:MM)
|
||||||
|
#SBATCH -o slurm.%N.%j.out # STDOUT
|
||||||
|
#SBATCH -e slurm.%N.%j.err # STDERR
|
||||||
|
|
||||||
|
ulimit -s 10240
|
||||||
|
module load slurm
|
||||||
|
module load openmpi/4.0.1
|
||||||
|
module load cuda10.1/toolkit/10.1.243
|
||||||
|
|
||||||
|
# Check how many gpu's your job got
|
||||||
|
#nvidia-smi
|
||||||
|
|
||||||
|
## Copy input files to the work directory:
|
||||||
|
mkdir -p /work/$USER/ShallowWaterGPU
|
||||||
|
cp -r . /work/$USER/ShallowWaterGPU
|
||||||
|
|
||||||
|
# Run job
|
||||||
|
# (Assumes Miniconda is installed in user root dir.)
|
||||||
|
cd /work/$USER/ShallowWaterGPU
|
||||||
|
mpirun --mca btl_openib_if_include mlx5_0 --mca btl_openib_warn_no_device_params_found 0 $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py
|
||||||
|
cd $HOME/src/ShallowWaterGPU
|
||||||
|
|
||||||
|
## Copy files from work directory:
|
||||||
|
# (NOTE: Copying is not performed if job fails!)
|
||||||
|
cp /work/$USER/ShallowWaterGPU/*.log .
|
||||||
|
cp /work/$USER/ShallowWaterGPU/*.nc .
|
||||||
|
|
50
saga-test.job
Normal file
50
saga-test.job
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Job name:
|
||||||
|
#SBATCH --job-name=saga-test
|
||||||
|
#
|
||||||
|
# Project:
|
||||||
|
#SBATCH --account=nn9550k
|
||||||
|
#
|
||||||
|
# Wall clock limit:
|
||||||
|
#SBATCH --time=00:10:00
|
||||||
|
#
|
||||||
|
# Ask for 1 GPU (max is 2)
|
||||||
|
# Note: The environment variable CUDA_VISIBLE_DEVICES will show which GPU
|
||||||
|
# device(s) to use. It will have values '0', '1' or '0,1' corresponding to
|
||||||
|
# /dev/nvidia0, /dev/nvidia1 or both, respectively.
|
||||||
|
#SBATCH --partition=accel --gres=gpu:1
|
||||||
|
#
|
||||||
|
# Max memory usage per task (core) - increasing this will cost more core hours:
|
||||||
|
#SBATCH --mem-per-cpu=4G
|
||||||
|
#
|
||||||
|
# Number of tasks:
|
||||||
|
#SBATCH --nodes=2 --ntasks-per-node=1
|
||||||
|
|
||||||
|
## Set up job environment: (this is done automatically behind the scenes)
|
||||||
|
## (make sure to comment '#' or remove the following line 'source ...')
|
||||||
|
# source /cluster/bin/jobsetup
|
||||||
|
|
||||||
|
module restore system # instead of 'module purge' rather set module environment to the system default
|
||||||
|
module load CUDA/10.2.89
|
||||||
|
|
||||||
|
# It is also recommended to to list loaded modules, for easier debugging:
|
||||||
|
module list
|
||||||
|
|
||||||
|
set -o errexit # exit on errors
|
||||||
|
set -o nounset # Treat unset variables as errors (added for more easily discovering issues in your batch script)
|
||||||
|
|
||||||
|
## Copy input files to the work directory:
|
||||||
|
mkdir $SCRATCH/ShallowWaterGPU
|
||||||
|
cp -r . $SCRATCH/ShallowWaterGPU
|
||||||
|
|
||||||
|
## Make sure the results are copied back to the submit directory (see Work Directory below):
|
||||||
|
# chkfile MyResultFile
|
||||||
|
# chkfile is replaced by 'savefile' on Saga
|
||||||
|
savefile "$SCRATCH/ShallowWaterGPU/*.log"
|
||||||
|
savefile "$SCRATCH/ShallowWaterGPU/*.nc"
|
||||||
|
|
||||||
|
## Do some work:
|
||||||
|
cd $SCRATCH/ShallowWaterGPU
|
||||||
|
srun /cluster/home/$HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 --version
|
||||||
|
srun /cluster/home/$HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user