diff --git a/Figures.ipynb b/Figures.ipynb index 904b155..ec2d441 100644 --- a/Figures.ipynb +++ b/Figures.ipynb @@ -65,91 +65,148 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " t_init t_total outfile \\\n", - "0 1.248385 20.045867 /work/martinls/230527/ShallowWaterGPU/mpi_out_... \n", - "1 1.687006 21.810200 /work/martinls/230528/ShallowWaterGPU/mpi_out_... \n", - "2 2.178354 24.593490 /work/martinls/230530/ShallowWaterGPU/mpi_out_... \n", - "3 2.690906 25.624513 /work/martinls/230531/ShallowWaterGPU/mpi_out_... \n", - "4 3.629718 26.697773 /work/martinls/230532/ShallowWaterGPU/mpi_out_... \n", - "5 4.364927 27.958164 /work/martinls/230533/ShallowWaterGPU/mpi_out_... \n", - "6 5.628270 29.105025 /work/martinls/230534/ShallowWaterGPU/mpi_out_... \n", - "7 6.777608 30.504384 /work/martinls/230535/ShallowWaterGPU/mpi_out_... \n", + " t_init t_total outfile \\\n", + "0 14.511714 181.754037 /work/martinls/232557/ShallowWaterGPU/mpi_out_... \n", + "1 15.153404 188.838794 /work/martinls/232558/ShallowWaterGPU/mpi_out_... \n", + "2 15.607471 190.535054 /work/martinls/232589/ShallowWaterGPU/mpi_out_... \n", + "3 15.332916 188.146165 /work/martinls/232590/ShallowWaterGPU/mpi_out_... \n", + "4 15.941363 193.263406 /work/martinls/232591/ShallowWaterGPU/mpi_out_... \n", + "5 16.805506 194.776481 /work/martinls/232592/ShallowWaterGPU/mpi_out_... \n", + "6 18.009921 198.615131 /work/martinls/232593/ShallowWaterGPU/mpi_out_... \n", + "7 17.990572 199.018155 /work/martinls/232594/ShallowWaterGPU/mpi_out_... \n", + "8 19.366701 202.898836 /work/martinls/232595/ShallowWaterGPU/mpi_out_... \n", + "9 19.890607 205.122811 /work/martinls/232596/ShallowWaterGPU/mpi_out_... \n", + "10 20.974516 207.287065 /work/martinls/232597/ShallowWaterGPU/mpi_out_... \n", + "11 21.358601 209.105944 /work/martinls/232598/ShallowWaterGPU/mpi_out_... \n", + "12 22.813077 211.172879 /work/martinls/232599/ShallowWaterGPU/mpi_out_... \n", + "13 23.636758 212.722331 /work/martinls/232600/ShallowWaterGPU/mpi_out_... \n", + "14 23.983026 214.176335 /work/martinls/232601/ShallowWaterGPU/mpi_out_... \n", + "15 24.996966 216.951382 /work/martinls/232602/ShallowWaterGPU/mpi_out_... \n", "\n", - " t_sim_init t_nc_write t_full_step t_mpi_halo_exchange \\\n", - "0 1.880793 12.403532 4.007889 0.0 \n", - "1 2.117109 12.576457 4.909249 0.0 \n", - "2 2.050483 12.774502 7.045701 0.0 \n", - "3 2.216515 13.096246 7.060501 0.0 \n", - "4 2.259021 13.178762 7.057118 0.0 \n", - "5 2.455840 13.353797 7.218295 0.0 \n", - "6 2.354878 13.576300 6.985424 0.0 \n", - "7 2.688699 13.492631 6.996821 0.0 \n", + " t_sim_init t_nc_write t_full_step t_mpi_halo_exchange \\\n", + "0 10.661480 113.172576 42.137838 0.0 \n", + "1 11.083883 118.234985 43.038861 0.0 \n", + "2 11.338173 118.849141 43.378157 0.0 \n", + "3 11.166394 116.970772 43.362903 0.0 \n", + "4 11.167876 121.090511 43.696337 0.0 \n", + "5 11.125732 122.019746 43.435468 0.0 \n", + "6 11.410769 124.265493 43.508696 0.0 \n", + "7 11.951049 123.907622 43.785883 0.0 \n", + "8 11.861801 126.177618 44.059032 0.0 \n", + "9 12.045421 127.249941 44.542234 0.0 \n", + "10 12.357193 128.412160 44.133266 0.0 \n", + "11 12.668238 129.337771 44.327086 0.0 \n", + "12 12.733378 129.754346 44.384927 0.0 \n", + "13 12.836023 130.674045 44.157766 0.0 \n", + "14 13.105231 131.080429 44.535530 0.0 \n", + "15 13.106097 133.506058 43.892579 0.0 \n", "\n", - " t_mpi_halo_exchange_download t_mpi_halo_exchange_upload \\\n", - "0 3.883057 0.025879 \n", - "1 4.622559 0.027954 \n", - "2 3.596680 0.027832 \n", - "3 6.201660 0.028931 \n", - "4 3.875732 0.027222 \n", - "5 4.124268 0.028076 \n", - "6 4.145630 0.028564 \n", - "7 5.710327 0.030151 \n", + " t_mpi_halo_exchange_download t_mpi_halo_exchange_upload \\\n", + "0 41.482056 0.042358 \n", + "1 41.775146 0.042603 \n", + "2 41.762573 0.041992 \n", + "3 41.740112 0.041138 \n", + "4 41.728638 0.043213 \n", + "5 41.725586 0.044678 \n", + "6 41.731934 0.044067 \n", + "7 41.630493 0.043823 \n", + "8 41.810547 0.044678 \n", + "9 41.643677 0.044678 \n", + "10 41.851196 0.045288 \n", + "11 41.774414 0.046509 \n", + "12 41.790405 0.046509 \n", + "13 41.642212 0.046387 \n", + "14 41.643066 0.045044 \n", + "15 41.756714 0.047485 \n", "\n", - " t_mpi_halo_exchange_sendreceive t_mpi_step nx ny dt \\\n", - "0 0.015381 0.017944 8192.0 8192.0 0.000001 \n", - "1 0.122803 0.018860 8192.0 8192.0 0.000001 \n", - "2 3.337158 0.019775 8192.0 8192.0 0.000001 \n", - "3 0.542480 0.019165 8192.0 8192.0 0.000001 \n", - "4 0.423584 0.020264 8192.0 8192.0 0.000001 \n", - "5 2.685059 0.019531 8192.0 8192.0 0.000001 \n", - "6 0.510254 0.019775 8192.0 8192.0 0.000001 \n", - "7 0.753418 0.018982 8192.0 8192.0 0.000001 \n", + " t_mpi_halo_exchange_sendreceive t_mpi_step nx ny dt \\\n", + "0 0.059082 0.025330 22528.0 22528.0 0.000001 \n", + "1 0.402832 0.026062 22528.0 22528.0 0.000001 \n", + "2 0.779541 0.026123 22528.0 22528.0 0.000001 \n", + "3 1.217041 0.025879 22528.0 22528.0 0.000001 \n", + "4 1.111328 0.026855 22528.0 22528.0 0.000001 \n", + "5 0.885742 0.027466 22528.0 22528.0 0.000001 \n", + "6 0.954346 0.027405 22528.0 22528.0 0.000001 \n", + "7 1.984375 0.028320 22528.0 22528.0 0.000001 \n", + "8 1.729980 0.027954 22528.0 22528.0 0.000001 \n", + "9 1.878174 0.028931 22528.0 22528.0 0.000001 \n", + "10 1.613525 0.029053 22528.0 22528.0 0.000001 \n", + "11 1.831299 0.028137 22528.0 22528.0 0.000001 \n", + "12 1.806152 0.029480 22528.0 22528.0 0.000001 \n", + "13 1.662354 0.030518 22528.0 22528.0 0.000001 \n", + "14 1.943604 0.029297 22528.0 22528.0 0.000001 \n", + "15 0.937256 0.030579 22528.0 22528.0 0.000001 \n", "\n", - " n_time_steps slurm_job_id n_cuda_devices n_processes \\\n", - "0 200.0 230527.0 1 1 \n", - "1 200.0 230528.0 2 2 \n", - "2 200.0 230530.0 3 3 \n", - "3 200.0 230531.0 4 4 \n", - "4 200.0 230532.0 5 5 \n", - "5 200.0 230533.0 6 6 \n", - "6 200.0 230534.0 7 7 \n", - "7 200.0 230535.0 8 8 \n", + " n_time_steps slurm_job_id n_cuda_devices n_processes \\\n", + "0 200.0 232557.0 1 1 \n", + "1 200.0 232558.0 2 2 \n", + "2 200.0 232589.0 3 3 \n", + "3 200.0 232590.0 4 4 \n", + "4 200.0 232591.0 5 5 \n", + "5 200.0 232592.0 6 6 \n", + "6 200.0 232593.0 7 7 \n", + "7 200.0 232594.0 8 8 \n", + "8 200.0 232595.0 9 9 \n", + "9 200.0 232596.0 10 10 \n", + "10 200.0 232597.0 11 11 \n", + "11 200.0 232598.0 12 12 \n", + "12 200.0 232599.0 13 13 \n", + "13 200.0 232600.0 14 14 \n", + "14 200.0 232601.0 15 15 \n", + "15 200.0 232602.0 16 16 \n", "\n", - " git_hash \\\n", - "0 0f0cbad2dd661c59f9a2c43740eda12d90cca413\\n \n", - "1 0f0cbad2dd661c59f9a2c43740eda12d90cca413\\n \n", - "2 0f0cbad2dd661c59f9a2c43740eda12d90cca413\\n \n", - "3 0f0cbad2dd661c59f9a2c43740eda12d90cca413\\n \n", - "4 0f0cbad2dd661c59f9a2c43740eda12d90cca413\\n \n", - "5 0f0cbad2dd661c59f9a2c43740eda12d90cca413\\n \n", - "6 0f0cbad2dd661c59f9a2c43740eda12d90cca413\\n \n", - "7 0f0cbad2dd661c59f9a2c43740eda12d90cca413\\n \n", + " git_hash \\\n", + "0 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "1 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "2 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "3 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "4 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "5 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "6 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "7 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "8 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "9 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "10 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "11 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "12 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "13 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "14 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "15 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", "\n", - " git_status \n", - "0 M Figures.ipynb\\n M dgx-2_strong_scaling_benc... \n", - "1 M Figures.ipynb\\n M dgx-2_strong_scaling_benc... \n", - "2 M Figures.ipynb\\n M dgx-2_strong_scaling_benc... \n", - "3 M Figures.ipynb\\n M dgx-2_strong_scaling_benc... \n", - "4 M Figures.ipynb\\n M dgx-2_strong_scaling_benc... \n", - "5 M Figures.ipynb\\n M dgx-2_strong_scaling_benc... \n", - "6 M Figures.ipynb\\n M dgx-2_strong_scaling_benc... \n", - "7 M Figures.ipynb\\n M dgx-2_strong_scaling_benc... \n" + " git_status \n", + "0 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "1 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "2 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "3 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "4 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "5 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "6 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "7 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "8 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "9 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "10 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "11 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "12 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "13 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "14 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "15 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n" ] } ], "source": [ "# DGX-2\n", "#weak_scaling_profiling_data = read_profiling_files(\"output_dgx-2/weak_scaling/2022-06-09T134809/\")\n", + "weak_scaling_profiling_data = read_profiling_files(\"output_dgx-2/weak_scaling/2022-06-23T154025/\")\n", "\n", "# HGX\n", - "weak_scaling_profiling_data = read_profiling_files(\"output_hgx/weak_scaling/2022-06-16T162931/\")\n", + "#weak_scaling_profiling_data = read_profiling_files(\"output_hgx/weak_scaling/2022-06-16T162931/\")\n", "##weak_scaling_profiling_data = read_profiling_files(\"output_hgx/weak_scaling/2022-06-16T170630/\")\n", "\n", "print(weak_scaling_profiling_data)" @@ -186,70 +243,127 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " t_init t_total outfile \\\n", - "0 8.105802 127.329448 /work/martinls/230507/ShallowWaterGPU/mpi_out_... \n", - "1 8.391940 106.173041 /work/martinls/230508/ShallowWaterGPU/mpi_out_... \n", - "2 8.316061 89.259504 /work/martinls/230509/ShallowWaterGPU/mpi_out_... \n", - "3 9.480870 82.180610 /work/martinls/230510/ShallowWaterGPU/mpi_out_... \n", - "4 9.948056 74.482449 /work/martinls/230511/ShallowWaterGPU/mpi_out_... \n", + " t_init t_total outfile \\\n", + "0 15.227155 189.004926 /work/martinls/232634/ShallowWaterGPU/mpi_out_... \n", + "1 12.726498 145.335962 /work/martinls/232635/ShallowWaterGPU/mpi_out_... \n", + "2 11.482033 123.139408 /work/martinls/232636/ShallowWaterGPU/mpi_out_... \n", + "3 10.548483 100.839853 /work/martinls/232637/ShallowWaterGPU/mpi_out_... \n", + "4 10.746949 95.866956 /work/martinls/232638/ShallowWaterGPU/mpi_out_... \n", + "5 10.345715 87.113081 /work/martinls/232639/ShallowWaterGPU/mpi_out_... \n", + "6 9.915406 75.785243 /work/martinls/232640/ShallowWaterGPU/mpi_out_... \n", + "7 10.107682 69.963608 /work/martinls/232641/ShallowWaterGPU/mpi_out_... \n", + "8 10.620777 66.039795 /work/martinls/232642/ShallowWaterGPU/mpi_out_... \n", + "9 11.305829 63.000684 /work/martinls/232643/ShallowWaterGPU/mpi_out_... \n", + "10 11.614343 60.330283 /work/martinls/232644/ShallowWaterGPU/mpi_out_... \n", + "11 12.639043 60.506280 /work/martinls/232645/ShallowWaterGPU/mpi_out_... \n", + "12 13.312508 57.034760 /work/martinls/232646/ShallowWaterGPU/mpi_out_... \n", "\n", - " t_sim_init t_nc_write t_full_step t_mpi_halo_exchange \\\n", - "0 5.656313 88.769145 23.461966 0.0 \n", - "1 5.297291 72.174575 19.195057 0.0 \n", - "2 5.045456 58.199751 16.024106 0.0 \n", - "3 5.172412 52.463597 13.905023 0.0 \n", - "4 4.827947 46.293962 12.370357 0.0 \n", + " t_sim_init t_nc_write t_full_step t_mpi_halo_exchange \\\n", + "0 11.414952 118.087399 42.976445 0.0 \n", + "1 8.819334 90.167300 32.226894 0.0 \n", + "2 7.751206 75.901292 26.841162 0.0 \n", + "3 6.980121 58.661561 23.018016 0.0 \n", + "4 6.335883 57.172819 20.371334 0.0 \n", + "5 5.870950 51.546669 18.195306 0.0 \n", + "6 5.579971 41.915547 16.725574 0.0 \n", + "7 5.234274 38.144568 14.960167 0.0 \n", + "8 4.945005 35.074090 13.968068 0.0 \n", + "9 4.773231 32.496020 13.152020 0.0 \n", + "10 4.734492 30.088176 11.919627 0.0 \n", + "11 4.422556 30.348880 11.168828 0.0 \n", + "12 4.536324 26.665879 10.616396 0.0 \n", "\n", - " t_mpi_halo_exchange_download t_mpi_halo_exchange_upload \\\n", - "0 21.429688 0.028931 \n", - "1 15.628418 0.031372 \n", - "2 13.573486 0.030273 \n", - "3 11.412964 0.030151 \n", - "4 10.445801 0.030762 \n", + " t_mpi_halo_exchange_download t_mpi_halo_exchange_upload \\\n", + "0 41.536133 0.042114 \n", + "1 31.025757 0.041748 \n", + "2 25.926025 0.039062 \n", + "3 22.155762 0.040649 \n", + "4 19.375610 0.040161 \n", + "5 17.366577 0.039062 \n", + "6 15.636230 0.040527 \n", + "7 14.279663 0.040649 \n", + "8 13.050293 0.039307 \n", + "9 11.995850 0.039917 \n", + "10 11.195801 0.039429 \n", + "11 10.509277 0.039307 \n", + "12 9.817139 0.040283 \n", "\n", - " t_mpi_halo_exchange_sendreceive t_mpi_step nx ny dt \\\n", - "0 1.946533 0.019531 41984.0 10496.0 0.000001 \n", - "1 2.726074 0.021606 41984.0 8396.0 0.000001 \n", - "2 1.489014 0.020386 41984.0 6997.0 0.000001 \n", - "3 1.407959 0.019775 41984.0 5997.0 0.000001 \n", - "4 1.264648 0.021240 41984.0 5248.0 0.000001 \n", + " t_mpi_halo_exchange_sendreceive t_mpi_step nx ny dt \\\n", + "0 1.334229 0.025146 45056.0 11264.0 0.000001 \n", + "1 0.792480 0.026306 45056.0 8396.0 0.000001 \n", + "2 0.567139 0.025024 45056.0 6997.0 0.000001 \n", + "3 0.596924 0.025452 45056.0 5997.0 0.000001 \n", + "4 0.803955 0.024841 45056.0 5248.0 0.000001 \n", + "5 0.732422 0.025330 45056.0 4664.0 0.000001 \n", + "6 0.979492 0.026062 45056.0 4198.0 0.000001 \n", + "7 0.487793 0.025635 45056.0 3816.0 0.000001 \n", + "8 0.795654 0.024780 45056.0 3498.0 0.000001 \n", + "9 0.995605 0.025330 45056.0 3229.0 0.000001 \n", + "10 0.691406 0.025452 45056.0 2998.0 0.000001 \n", + "11 0.388672 0.025757 45056.0 2798.0 0.000001 \n", + "12 0.655518 0.025146 45056.0 2624.0 0.000001 \n", "\n", - " n_time_steps slurm_job_id n_cuda_devices n_processes \\\n", - "0 200.0 230507.0 4 4 \n", - "1 200.0 230508.0 5 5 \n", - "2 200.0 230509.0 6 6 \n", - "3 200.0 230510.0 7 7 \n", - "4 200.0 230511.0 8 8 \n", + " n_time_steps slurm_job_id n_cuda_devices n_processes \\\n", + "0 200.0 232634.0 4 4 \n", + "1 200.0 232635.0 5 5 \n", + "2 200.0 232636.0 6 6 \n", + "3 200.0 232637.0 7 7 \n", + "4 200.0 232638.0 8 8 \n", + "5 200.0 232639.0 9 9 \n", + "6 200.0 232640.0 10 10 \n", + "7 200.0 232641.0 11 11 \n", + "8 200.0 232642.0 12 12 \n", + "9 200.0 232643.0 13 13 \n", + "10 200.0 232644.0 14 14 \n", + "11 200.0 232645.0 15 15 \n", + "12 200.0 232646.0 16 16 \n", "\n", - " git_hash \\\n", - "0 0f0cbad2dd661c59f9a2c43740eda12d90cca413\\n \n", - "1 0f0cbad2dd661c59f9a2c43740eda12d90cca413\\n \n", - "2 0f0cbad2dd661c59f9a2c43740eda12d90cca413\\n \n", - "3 0f0cbad2dd661c59f9a2c43740eda12d90cca413\\n \n", - "4 0f0cbad2dd661c59f9a2c43740eda12d90cca413\\n \n", + " git_hash \\\n", + "0 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "1 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "2 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "3 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "4 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "5 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "6 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "7 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "8 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "9 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "10 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "11 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", + "12 aa693a9a468e3d591417342d96128d90c9df7884\\n \n", "\n", - " git_status \n", - "0 M Figures.ipynb\\n M dgx-2_strong_scaling_benc... \n", - "1 M Figures.ipynb\\n M dgx-2_strong_scaling_benc... \n", - "2 M Figures.ipynb\\n M dgx-2_strong_scaling_benc... \n", - "3 M Figures.ipynb\\n M dgx-2_strong_scaling_benc... \n", - "4 M Figures.ipynb\\n M dgx-2_strong_scaling_benc... \n" + " git_status \n", + "0 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "1 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "2 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "3 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "4 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "5 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "6 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "7 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "8 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "9 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "10 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "11 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n", + "12 M Figures.ipynb\\n M dgx-2_scaling_benchmark.j... \n" ] } ], "source": [ "# DGX-2\n", "#strong_scaling_profiling_data = read_profiling_files(\"output_dgx-2/strong_scaling/2022-06-09T160712/\")\n", + "strong_scaling_profiling_data = read_profiling_files(\"output_dgx-2/strong_scaling/2022-06-23T172838/\")\n", "\n", "# HGX\n", - "strong_scaling_profiling_data = read_profiling_files(\"output_hgx/strong_scaling/2022-06-16T152945/\")\n", + "#strong_scaling_profiling_data = read_profiling_files(\"output_hgx/strong_scaling/2022-06-16T152945/\")\n", "\n", "print(strong_scaling_profiling_data)" ] @@ -263,12 +377,12 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 43, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -280,6 +394,11 @@ } ], "source": [ + "plt.rcParams['font.size'] = 16\n", + "plt.rcParams['legend.fontsize'] = 14\n", + "plt.rcParams['axes.linewidth'] = 2\n", + "plt.rcParams['lines.linewidth'] = 2\n", + "\n", "fig, (ax_weak, ax_strong) = plt.subplots(1, 2, figsize=(16,6))\n", "\n", "t_total_no_init_or_file_io = weak_scaling_profiling_data[\"t_total\"] \\\n", @@ -294,13 +413,14 @@ "#ax_weak.plot(weak_scaling_profiling_data[\"n_processes\"].to_numpy(dtype=\"int\"), \n", "# speedup(t_total_no_init_or_file_io[0], t_total_no_init_or_file_io), label=\"Total (no init or file I/O)\")\n", "\n", - "ax_weak.plot(weak_scaling_profiling_data[\"n_processes\"][0:].to_numpy(dtype=\"int\"), \n", - " speedup(weak_scaling_profiling_data[\"t_full_step\"][0], weak_scaling_profiling_data[\"t_full_step\"][0:]), label=\"Total (no init or file I/O)\")\n", + "ax_weak.plot(weak_scaling_profiling_data[\"n_processes\"].to_numpy(dtype=\"int\"), \n", + " speedup(weak_scaling_profiling_data[\"t_full_step\"][0], weak_scaling_profiling_data[\"t_full_step\"]), label=\"Total runtime (except init and file I/O)\")\n", + "ax_weak.locator_params(axis=\"x\", nbins=16)\n", "\n", + "\"\"\"\n", "ax_weak.plot(weak_scaling_profiling_data[\"n_processes\"][0:].to_numpy(dtype=\"int\"), \n", " speedup(t_total_halo_exchange[0], t_total_halo_exchange[0:]), label=\"Halo exchange (D/E/U)\", linestyle=\"dashed\")\n", "\n", - "\"\"\"\n", "ax_weak.plot(weak_scaling_profiling_data[\"n_processes\"].to_numpy(dtype=\"int\"), \n", " speedup(weak_scaling_profiling_data[\"t_total\"][0], weak_scaling_profiling_data[\"t_total\"]), label=\"Total\")\n", "\n", @@ -324,7 +444,7 @@ "\n", "ax_weak.set_xlabel(\"Number of ranks/GPUs\")\n", "ax_weak.set_ylabel(\"Efficiency\")\n", - "ax_weak.legend(loc=\"lower right\")\n", + "ax_weak.legend(loc=\"upper right\", bbox_to_anchor=[1.0, 0.95])\n", "#fig.show()\n", "\n", "##############################################\n", @@ -344,12 +464,12 @@ "# speedup(t_total_no_init_or_file_io[0], t_total_no_init_or_file_io)*4, label=\"Total (no init or file I/O)\")\n", "\n", "ax_strong.plot(strong_scaling_profiling_data[\"n_processes\"].to_numpy(dtype=\"int\"), \n", - " speedup(strong_scaling_profiling_data[\"t_full_step\"][0], strong_scaling_profiling_data[\"t_full_step\"])*4, label=\"Total (no init or file I/O)\")\n", + " speedup(strong_scaling_profiling_data[\"t_full_step\"][0], strong_scaling_profiling_data[\"t_full_step\"])*4, label=\"Total runtime (except init and file I/O)\")\n", "\n", + "\"\"\"\n", "ax_strong.plot(strong_scaling_profiling_data[\"n_processes\"].to_numpy(dtype=\"int\"), \n", " speedup(t_total_halo_exchange[0], t_total_halo_exchange)*4, label=\"Halo exchange (D/E/U)\", linestyle=\"dashed\")\n", "\n", - "\"\"\"\n", "ax_strong.plot(strong_scaling_profiling_data[\"n_processes\"].to_numpy(dtype=\"int\"), \n", " speedup(strong_scaling_profiling_data[\"t_total\"][0], strong_scaling_profiling_data[\"t_total\"])*4, label=\"Total\")\n", " \n", @@ -382,7 +502,9 @@ "ax_strong.set_xlabel(\"Number of ranks/GPUs\")\n", "ax_strong.set_ylabel(\"Speedup\")\n", "ax_strong.legend(loc=\"upper left\")\n", - "fig.show()" + "fig.show()\n", + "\n", + "fig.savefig(\"dgx-2-scaling.pdf\", bbox_inches='tight')" ] }, { diff --git a/dgx-2_scaling_benchmark.job b/dgx-2_scaling_benchmark.job index fce0443..d4c7cb5 100644 --- a/dgx-2_scaling_benchmark.job +++ b/dgx-2_scaling_benchmark.job @@ -6,7 +6,7 @@ #SBATCH -t 0-00:10 # time (D-HH:MM) #SBATCH -o slurm.%N.%j.out # STDOUT #SBATCH -e slurm.%N.%j.err # STDERR -#SBATCH --reservation=martinls_8 +#SBATCH --reservation=martinls_17 # For Linux 64, Open MPI is built with CUDA awareness but this support is disabled by default. diff --git a/dgx-2_strong_scaling_benchmark.sh b/dgx-2_strong_scaling_benchmark.sh index 7414a7b..cf6121d 100644 --- a/dgx-2_strong_scaling_benchmark.sh +++ b/dgx-2_strong_scaling_benchmark.sh @@ -22,17 +22,52 @@ TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S") #sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=8192,NY=512,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job # one node: 4-16 GPUs -sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=41984,NY=10496,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=41984,NY=8396,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=41984,NY=6997,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=41984,NY=5997,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=41984,NY=5248,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +#sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=41984,NY=10496,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +#sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=41984,NY=8396,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +#sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=41984,NY=6997,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +#sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=41984,NY=5997,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +#sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=41984,NY=5248,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +# +#sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=41984,NY=4664,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +#sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=41984,NY=4198,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +#sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=41984,NY=3816,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +#sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=41984,NY=3498,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +#sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=41984,NY=3229,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +#sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=41984,NY=2998,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +#sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=41984,NY=2798,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +#sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=41984,NY=2624,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=41984,NY=4664,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=41984,NY=4198,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=41984,NY=3816,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=41984,NY=3498,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=41984,NY=3229,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=41984,NY=2998,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=41984,NY=2798,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=41984,NY=2624,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +# one node: 1-16 GPUs +sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=22528,NY=11264,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=22528,NY=7509,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=22528,NY=5632,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=22528,NY=4505,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=22528,NY=3754,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=22528,NY=3218,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=22528,NY=2816,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job + +sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=22528,NY=2503,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=22528,NY=2252,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=22528,NY=2048,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=22528,NY=1877,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=22528,NY=1732,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=22528,NY=1609,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=22528,NY=1501,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=22528,NY=1408,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job + +# one node: 4-16 GPUs +sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=45056,NY=11264,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=45056,NY=8396,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=45056,NY=6997,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=45056,NY=5997,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=45056,NY=5248,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job + +sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=45056,NY=4664,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=45056,NY=4198,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=45056,NY=3816,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=45056,NY=3498,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=45056,NY=3229,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=45056,NY=2998,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=45056,NY=2798,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=45056,NY=2624,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job diff --git a/dgx-2_weak_scaling_benchmark.sh b/dgx-2_weak_scaling_benchmark.sh index a24ee65..fddabf9 100644 --- a/dgx-2_weak_scaling_benchmark.sh +++ b/dgx-2_weak_scaling_benchmark.sh @@ -22,20 +22,20 @@ TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S") #sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job # one node: 1-16 GPUs -sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job -sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job \ No newline at end of file +sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job \ No newline at end of file