User Tools

Site Tools


tech:slurm

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
Last revisionBoth sides next revision
tech:slurm [2021/03/19 12:14] kohofertech:slurm [2022/09/09 17:01] – [Modules] kohofer
Line 14: Line 14:
  
 ===== Installation ===== ===== Installation =====
 +
  
 ===== Controller name: slurm-ctrl ===== ===== Controller name: slurm-ctrl =====
Line 241: Line 242:
   debug*       up   infinite      1   idle linux1   debug*       up   infinite      1   idle linux1
  
-If computer node is **<color #ed1c24>down</color>** or **<color #ed1c24>drain</color>**+If computer node is <color #ed1c24>down</color> or <color #ed1c24>drain</color>
  
 <code> <code>
Line 263: Line 264:
 PARTITION AVAIL  TIMELIMIT  NODES  STATE NODELIST PARTITION AVAIL  TIMELIMIT  NODES  STATE NODELIST
 debug*       up   infinite      2   idle gpu[02-03] debug*       up   infinite      2   idle gpu[02-03]
 +</code>
 +
 +<code>
 +sinfo -o "%20N  %10c  %10m  %25f  %10G "
 +NODELIST              CPUS        MEMORY      AVAIL_FEATURES             GRES       
 +gpu[02-03]            32          190000      (null)                     gpu:     
 +gpu04                 64          1000000     (null)                     gpu:4(S:0) 
 +hpcmoi01,hpcwrk01     32+         190000+     (null)                     (null)
 </code> </code>
  
Line 342: Line 351:
      
 ===== Modify user accounts ===== ===== Modify user accounts =====
 +
 +Display the accounts created:
 +
 +  # Show also associations in the accounts
 +  sacctmgr show account -s
 +  # Show all columns separated by pipe | symbol
 +  sacctmgr show account -s -P
 +  # 
 +  sacctmgr show user -s
  
 Add user Add user
  
-  sacctmgr add user <usernme> Account=gpu-users Partition=gpu+  sacctmgr add user <username> Account=gpu-users Partition=gpu
  
 Modify user, give 12000 minutes/200 hours for usage Modify user, give 12000 minutes/200 hours for usage
  
-  sacctmgr modify user misegata set GrpTRESMin=cpu=12000,gres/gpu=12000+  sacctmgr modify user <username> set GrpTRESMin=cpu=12000,gres/gpu=12000 
 + 
 +Modify user by removing it from certain account 
 + 
 +  sacctmgr remove user where user=<username> and account=<account> 
 + 
 +Delete user 
 + 
 +  sacctmgr delete user ivmilan 
 +  Deleting users... 
 +  ivmilan 
 +  Would you like to commit changes? (You have 30 seconds to decide) 
 +  (N/y): y 
  
 Restart the services: Restart the services:
Line 361: Line 392:
   systemctl status slurmdbd.service   systemctl status slurmdbd.service
  
 +==== Submit a job to a specific node using Slurm's sbatch command ====
 +
 +To run a job on a specific Node use this option in the job script
 +
 +  #SBATCH --nodelist=gpu03
  
  
Line 385: Line 421:
  
 ====== Modules ====== ====== Modules ======
 +
 +The Environment Modules package provides for the dynamic modification of a user's environment via modulefiles.
 +
 +Installing Modules on Unix
 +
 +Login into slurm-ctrl and become root
 +
 +  ssh slurm-ctrl
 +  sudo -i
 +
 +Download modules
 +
 +  curl -LJO https://github.com/cea-hpc/modules/releases/download/v4.6.0/modules-4.6.0.tar.gz
 +  tar xfz modules-4.6.0.tar.gz
 +  cd modules-4.6.0
 +
 +
 +  $ ./configure --prefix=/opt/modules
 +  $ make
 +  $ make install
 +
 +
 +
 +https://modules.readthedocs.io/en/stable/index.html
 +
 +
 +----
 +
 +===== SPACK =====
 +
  
 Add different python versions using spack! Add different python versions using spack!
Line 390: Line 456:
 1. First see which python versions are available: 1. First see which python versions are available:
  
-  spack versions python+  root@slurm-ctrl:~# spack versions python
   ==> Safe versions (already checksummed):   ==> Safe versions (already checksummed):
   3.8.2  3.7.7  3.7.4  3.7.1  3.6.7  3.6.4  3.6.1  3.5.2  3.4.10  3.2.6   2.7.17  2.7.14  2.7.11  2.7.8   3.8.2  3.7.7  3.7.4  3.7.1  3.6.7  3.6.4  3.6.1  3.5.2  3.4.10  3.2.6   2.7.17  2.7.14  2.7.11  2.7.8
Line 398: Line 464:
   3.10.0a6  3.8.7rc1  3.7.6rc1   3.6.8rc1   3.5.7rc1   3.4.9     3.4.0     3.1.2rc1   2.7.9rc1  2.6.6     2.4.5   3.10.0a6  3.8.7rc1  3.7.6rc1   3.6.8rc1   3.5.7rc1   3.4.9     3.4.0     3.1.2rc1   2.7.9rc1  2.6.6     2.4.5
   3.10.0a5  3.8.7 ....   3.10.0a5  3.8.7 ....
-  .....+  ..
 +  ...
  
 2. now select the python version you would like to install: 2. now select the python version you would like to install:
  
-  spack install python@3.8.2+  root@slurm-ctrl:~# spack install python@3.8.2 
 +  ==> 23834: Installing libiconv 
 +  ==> Using cached archive: /opt/packages/spack/var/spack/cache/_source-cache/archive/e6/e6a1b1b589654277ee790cce3734f07876ac4ccfaecbee8afa0b649cf529cc04.tar.gz 
 +  ==> Staging archive: /tmp/root/spack-stage/spack-stage-libiconv-1.16-b2wenwxf2widzewcvnhsxtjyisz3bcmc/libiconv-1.16.tar.gz 
 +  ==> Created stage in /tmp/root/spack-stage/spack-stage-libiconv-1.16-b2wenwxf2widzewcvnhsxtjyisz3bcmc 
 +  ==> No patches needed for libiconv 
 +  ==> 23834: libiconv: Building libiconv [AutotoolsPackage] 
 +  ==> 23834: libiconv: Executing phase: 'autoreconf' 
 +  ==> 23834: libiconv: Executing phase: 'configure' 
 +  ==> 23834: libiconv: Executing phase: 'build' 
 +  ==> 23834: libiconv: Executing phase: 'install' 
 +  ==> 23834: libiconv: Successfully installed libiconv 
 +  Fetch: 0.04s.  Build: 24.36s.  Total: 24.40s. 
 +  [+] /opt/packages/spack/opt/spack/linux-ubuntu18.04-skylake_avx512/gcc-9.3.0/libiconv-1.16-b2wenwxf2widzewcvnhsxtjyisz3bcmc 
 +  ==> 23834: Installing libbsd 
 +  ... 
 +  ... 
 +  ... 
 +  ==> 23834: Installing python 
 +  ==> Fetching https://www.python.org/ftp/python/3.8.2/Python-3.8.2.tgz 
 +  ############################################################################################################ 100.0% 
 +  ==> Staging archive: /tmp/root/spack-stage/spack-stage-python-3.8.2-vmyztzplzddt2arrsx7d7koebyuzvk6s/Python-3.8.2.tgz 
 +  ==> Created stage in /tmp/root/spack-stage/spack-stage-python-3.8.2-vmyztzplzddt2arrsx7d7koebyuzvk6s 
 +  ==> Ran patch() for python 
 +  ==> 23834: python: Building python [AutotoolsPackage] 
 +  ==> 23834: python: Executing phase: 'autoreconf' 
 +  ==> 23834: python: Executing phase: 'configure' 
 +  ==> 23834: python: Executing phase: 'build' 
 +  ==> 23834: python: Executing phase: 'install' 
 +  ==> 23834: python: Successfully installed python 
 +  Fetch: 1.81s.  Build: 1m 42.11s.  Total: 1m 43.91s. 
 +  [+] /opt/packages/spack/opt/spack/linux-ubuntu18.04-skylake_avx512/gcc-9.3.0/python-3.8.2-vmyztzplzddt2arrsx7d7koebyuzvk6s 
 + 
 + 
 +This will take some minutes time, depending on the type of version 
  
 3. Now you need to add a modules file 3. Now you need to add a modules file
  
 +  root@slurm-ctrl:~# vi /opt/modules/modulefiles/python-3.8.2
 +
 +<code>
 +#%Module1.0
 +proc ModulesHelp { } {
 +global dotversion
 +  
 +puts stderr "\tPython 3.8.2"
 +}
 +
 +module-whatis "Python 3.8.2"
 +
 +set     main_root       /opt/packages/spack/opt/spack/linux-ubuntu18.04-skylake_avx512/gcc-9.3.0/python-3.8.2-vmyztzplzddt2arrsx7d7koebyuzvk6s
 +set-alias       python3.8       /opt/packages/spack/opt/spack/linux-ubuntu18.04-skylake_avx512/gcc-9.3.0/python-3.8.2-vmyztzplzddt2arrsx7d7koebyuzvk6s/bin/python3.8
 +
 +prepend-path    PATH    $main_root/bin
 +prepend-path    LIBRARY_PATH    $main_root/lib
 +
 +</code>
 +
 +4. New module should now be available:
 +
 +  root@slurm-ctrl:~# module avail
 +  -------------------------------------------- /opt/modules/modulefiles -----------------------------------------
 +  anaconda3  cuda-11.2.1  intel-mpi             module-info  py-mpi4py      python-3.7.7       use.own
 +  bzip       dot          intel-mpi-benchmarks  modules      python-2.7.18  python-3.8.2
 +  cuda-10.2  gcc-6.5.0    miniconda3            null         python-3.5.7   python-3.9.2
 +  cuda-11.0  go-1.15.3    module-git            openmpi      python-3.6.10  singularity-3.6.4
 +
 +5. Load the new module
 +
 +  root@slurm-ctrl:~# module load python-3.8.2
 +
 +6. Verify it works
 +
 +  root@slurm-ctrl:~# python3.8
 +  Python 3.8.2 (default, Mar 19 2021, 11:05:37)
 +  [GCC 9.3.0] on linux
 +  Type "help", "copyright", "credits" or "license" for more information.
 +  >>> exit()
 +
 +7. Unload the new module
  
 +  module unload python-3.8.2
  
  
Line 945: Line 1090:
  
 ===== Links ===== ===== Links =====
 +
 +[[https://developer.nvidia.com/cuda-toolkit|CUDA Toolkit]]
 +
 +[[https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html|NVIDIA CUDA Installation Guide for Linux]]
 +
  
 https://www.admin-magazine.com/HPC/Articles/Warewulf-Cluster-Manager-Development-and-Run-Time/Warewulf-3-Code/MPICH2 https://www.admin-magazine.com/HPC/Articles/Warewulf-Cluster-Manager-Development-and-Run-Time/Warewulf-3-Code/MPICH2
/data/www/wiki.inf.unibz.it/data/pages/tech/slurm.txt · Last modified: 2022/11/24 16:17 by kohofer