mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 06:24:13 +02:00
Merge pull request #9 from setmar/master
Conda env and job scripts for DGX-2 (Simula) and Saga (Sigma2)
This commit is contained in:
commit
402e2c6f9f
22
conda_environment_hpc.yml
Normal file
22
conda_environment_hpc.yml
Normal file
@ -0,0 +1,22 @@
|
||||
# Assumes that conda, pip, build-essentials and cuda are installed
|
||||
---
|
||||
name: ShallowWaterGPU_HPC
|
||||
channels:
|
||||
- conda-forge
|
||||
|
||||
dependencies:
|
||||
- python=3.7
|
||||
- numpy
|
||||
- mpi4py
|
||||
- six
|
||||
- pytools
|
||||
- netcdf4
|
||||
- scipy
|
||||
|
||||
# Install conda environment (one-time operation):
|
||||
# $ conda env create -f conda_environment_hpc.yml
|
||||
# Activate environment and install the following packages using pip:
|
||||
# $ conda activate ShallowWaterGPU
|
||||
# - pycuda: $ pip3 install --no-deps -U pycuda
|
||||
# on Windows: make sure your visual studio c++ compiler is available in PATH
|
||||
# PATH should have something like C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\
|
34
dgx-2-test.job
Normal file
34
dgx-2-test.job
Normal file
@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -p dgx2q # partition (GPU queue)
|
||||
#SBATCH -N 1 # number of nodes
|
||||
#SBATCH -n 4 # number of cores
|
||||
#SBATCH -w g001 # DGX-2 node
|
||||
#SBATCH --gres=gpu:4 # number of V100's
|
||||
#SBATCH --mem 10G # memory pool for all cores
|
||||
#SBATCH -t 0-00:10 # time (D-HH:MM)
|
||||
#SBATCH -o slurm.%N.%j.out # STDOUT
|
||||
#SBATCH -e slurm.%N.%j.err # STDERR
|
||||
|
||||
ulimit -s 10240
|
||||
module load slurm
|
||||
module load openmpi/4.0.1
|
||||
module load cuda10.1/toolkit/10.1.243
|
||||
|
||||
# Check how many gpu's your job got
|
||||
#nvidia-smi
|
||||
|
||||
## Copy input files to the work directory:
|
||||
mkdir -p /work/$USER/ShallowWaterGPU
|
||||
cp -r . /work/$USER/ShallowWaterGPU
|
||||
|
||||
# Run job
|
||||
# (Assumes Miniconda is installed in user root dir.)
|
||||
cd /work/$USER/ShallowWaterGPU
|
||||
mpirun --mca btl_openib_if_include mlx5_0 --mca btl_openib_warn_no_device_params_found 0 $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py
|
||||
cd $HOME/src/ShallowWaterGPU
|
||||
|
||||
## Copy files from work directory:
|
||||
# (NOTE: Copying is not performed if job fails!)
|
||||
cp /work/$USER/ShallowWaterGPU/*.log .
|
||||
cp /work/$USER/ShallowWaterGPU/*.nc .
|
||||
|
50
saga-test.job
Normal file
50
saga-test.job
Normal file
@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
# Job name:
|
||||
#SBATCH --job-name=saga-test
|
||||
#
|
||||
# Project:
|
||||
#SBATCH --account=nn9550k
|
||||
#
|
||||
# Wall clock limit:
|
||||
#SBATCH --time=00:10:00
|
||||
#
|
||||
# Ask for 1 GPU (max is 2)
|
||||
# Note: The environment variable CUDA_VISIBLE_DEVICES will show which GPU
|
||||
# device(s) to use. It will have values '0', '1' or '0,1' corresponding to
|
||||
# /dev/nvidia0, /dev/nvidia1 or both, respectively.
|
||||
#SBATCH --partition=accel --gres=gpu:1
|
||||
#
|
||||
# Max memory usage per task (core) - increasing this will cost more core hours:
|
||||
#SBATCH --mem-per-cpu=4G
|
||||
#
|
||||
# Number of tasks:
|
||||
#SBATCH --nodes=2 --ntasks-per-node=1
|
||||
|
||||
## Set up job environment: (this is done automatically behind the scenes)
|
||||
## (make sure to comment '#' or remove the following line 'source ...')
|
||||
# source /cluster/bin/jobsetup
|
||||
|
||||
module restore system # instead of 'module purge' rather set module environment to the system default
|
||||
module load CUDA/10.2.89
|
||||
|
||||
# It is also recommended to to list loaded modules, for easier debugging:
|
||||
module list
|
||||
|
||||
set -o errexit # exit on errors
|
||||
set -o nounset # Treat unset variables as errors (added for more easily discovering issues in your batch script)
|
||||
|
||||
## Copy input files to the work directory:
|
||||
mkdir $SCRATCH/ShallowWaterGPU
|
||||
cp -r . $SCRATCH/ShallowWaterGPU
|
||||
|
||||
## Make sure the results are copied back to the submit directory (see Work Directory below):
|
||||
# chkfile MyResultFile
|
||||
# chkfile is replaced by 'savefile' on Saga
|
||||
savefile "$SCRATCH/ShallowWaterGPU/*.log"
|
||||
savefile "$SCRATCH/ShallowWaterGPU/*.nc"
|
||||
|
||||
## Do some work:
|
||||
cd $SCRATCH/ShallowWaterGPU
|
||||
srun /cluster/home/$HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 --version
|
||||
srun /cluster/home/$HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py
|
||||
|
Loading…
x
Reference in New Issue
Block a user