Wellcome Sanger Institute Configuration

To use, run the pipeline with -profile sanger. This will download and launch the sanger.config which has been pre-configured with a setup suitable for the Wellcome Sanger Institute LSF cluster.

Running the workflow on the Wellcome Sanger Institute cluster

The latest version of Nextflow is not installed by default on the cluster. You will need to install it into a directory you have write access to

  • Install Nextflow : here

A recommended place to move the nextflow executable to is ~/bin so that it’s in the PATH.

Nextflow manages each process as a separate job that is submitted to the cluster by using the bsub command.

If asking Nextflow to use Singularity to run the individual jobs, you should make sure that your account has access to the Singularity binary by adding these lines to your .bashrc file

[[ -f /software/modules/ISG/singularity ]] && module load ISG/singularity

Nextflow shouldn’t run directly on the submission node but on a compute node. To do so make a shell script with a similar structure to the following code and submit with bsub < $PWD/my_script.sh

#!/bin/bash
#BSUB -o /path/to/a/log/dir/%J.o
#BSUB -e /path/to/a/log/dir/%J.e
#BSUB -M 8000
#BSUB -q oversubscribed
#BSUB -n 2
 
export HTTP_PROXY='http://wwwcache.sanger.ac.uk:3128'
export HTTPS_PROXY='http://wwwcache.sanger.ac.uk:3128'
export NXF_ANSI_LOG=false
export NXF_OPTS="-Xms8G -Xmx8G -Dnxf.pool.maxThreads=2000"
export NXF_VER=22.04.0-5697
 
 
nextflow run \
/path/to/nf-core/pipeline/main.nf \
-w /path/to/some/dir/work \
-profile sanger \
-c my_specific.config \
-qs 1000 \
-resume
 
## clean up on exit 0 - delete this if you want to keep the work dir
status=$?
if [[ $status -eq 0 ]]; then
  rm -r /path/to/some/dir/work
fi

Config file

See config file on GitHub

sanger.config
 
// Extract the name of the cluster to tune the parameters below
def clustername = "farm22"
try {
    clustername = ['/bin/bash', '-c', 'lsid | awk \'$0 ~ /^My cluster name is/ {print $5}\''].execute().text.trim()
} catch (java.io.IOException e) {
    System.err.println("WARNING: Could not run lsid to determine current cluster, defaulting to farm")
}
 
// Profile details
params {
    config_profile_description = "The Wellcome Sanger Institute HPC cluster (${clustername}) profile"
    config_profile_contact = 'Priyanka Surana (@priyanka-surana)'
    config_profile_url = 'https://www.sanger.ac.uk'
}
 
 
// Queue and LSF submission options
process {
    executor = 'lsf'
 
    // Currently a single set of rules for all clusters, but we could use $clustername to apply
    // different rules to different clusters.
    queue = {
        if ( task.time >= 15.day ) {
            if ( task.memory > 680.GB ) {
                error "There is no queue for jobs that need >680 GB and >15 days"
            } else {
                "basement"
            }
        } else if ( task.memory > 720.GB ) {
            "teramem"
        } else if ( task.memory > 350.GB ) {
            "hugemem"
        } else if ( task.time > 7.day ) {
            "basement"
        } else if ( task.time > 2.day ) {
            "week"
        } else if ( task.time > 12.hour ) {
            "long"
        } else if ( task.time > 1.min ) {
            "normal"
        } else {
            "small"
        }
    }
 
    withLabel: gpu {
        clusterOptions = { "-M "+task.memory.toMega()+" -R 'select[mem>="+task.memory.toMega()+"] rusage[mem="+task.memory.toMega()+"] span[ptile=1]' -gpu 'num=1:j_exclusive=yes'" }
        queue = { task.time > 12.h ? 'gpu-huge' : task.time > 48.h ? 'gpu-basement' : 'gpu-normal' }
        containerOptions = {
            workflow.containerEngine == "singularity" ? '--containall --cleanenv --nv':
            ( workflow.containerEngine == "docker" ? '--gpus all': null )
        }
    }
}
 
 
// Executor details
executor {
    name = 'lsf'
    perJobMemLimit = true
    poolSize = 4
    submitRateLimit = '5 sec'
    killBatchSize = 50
}
 
 
// Max resources
if (clustername.startsWith("tol")) {
    // tol cluster
    params.max_memory = 1.4.TB
    params.max_cpus = 64
    params.max_time = 89280.min // 62 days
    // As opposite to the farm settings below, we don't mount any filesystem by default.
    // Pipelines that need to see certain filesystems have to set singularity.runOptions themselves
 
    process {
        resourceLimits = [
            memory: 1.4.TB,
            cpus: 64,
            time: 89280.min
        ]
    }
 
} else {
    // defaults for the main farm
    params.max_memory = 2.9.TB
    params.max_cpus = 256
    params.max_time = 43200.min // 30 days
 
    process {
        resourceLimits = [
            memory: 2.9.TB,
            cpus: 256,
            time: 43200.min
        ]
    }
 
    // Mount all filesystems by default
    singularity.runOptions = '--bind /lustre --bind /nfs --bind /data --bind /software'
}