workflow salmon_quant_array {
    File transcriptome_fasta
    File transcript_to_gene_map
    Array[File] bams
    String bam_suffix = 'Aligned.toTranscriptome.out.bam' # default for STAR BAMs
    String library = 'A'

    # resource usage monitoring code
    Int resource_log_interval = 15
    # log resource usage for debugging purposes
    String resource_log_code = "function runtimeInfo() { echo [$(date)]; echo CPU usage: $(top -bn1 | grep -i '^cpu' | tail -n 1)%; echo Memory usage: $(free -m | grep Mem | awk '{ OFMT=" + '"%.0f"' + "; print ($3/$2)*100; }')%; echo Disk usage: $(df | grep cromwell_root | awk '{print $4}'); }\nwhile true; do runtimeInfo >> resource_usage.log; sleep " + resource_log_interval + "; done &"

    Int runtime_preemptible = 3
    Int? runtime_memory
    Int runtime_cpu = 6
    String runtime_docker = 'quay.io/biocontainers/salmon:0.14.1--h86b0361_1'

    # zones in which compute can run
    # FireCloud stores output in the US multi-region, so there should be no network costs with US regions
    # zones filtered to exclude regions with older generation default CPU platforms
    # set value to be more restrictive if input data is using regional storage
    String runtime_zones = 'us-west1-a us-west1-b us-west1-c'

    parameter_meta {
        library: 'Salmon library type: https://salmon.readthedocs.io/en/latest/salmon.html#what-s-this-libtype; by default, automatically infer'
        runtime_zones: 'Google zones in which to start VMs. Make sure you have sufficient quota.'
    }

    scatter (bam in bams) {
        call salmon_quant {
            input:
                transcriptome_fasta=transcriptome_fasta,
                transcript_to_gene_map=transcript_to_gene_map,
                bam=bam,
                bam_suffix=bam_suffix,
                library=library,
                resource_log_code=resource_log_code,
                runtime_preemptible=runtime_preemptible,
                runtime_memory=if defined(runtime_memory) then runtime_memory else 8 + ceil(size(bam, 'G') / 2),
                runtime_cpu=runtime_cpu,
                runtime_docker=runtime_docker,
                runtime_zones=runtime_zones
        }
    }

    output {
        Array[File] transcript = salmon_quant.transcript
        Array[File] gene = salmon_quant.gene
        Array[File] aux_info = salmon_quant.aux_info
        Array[File] meta_json = salmon_quant.meta_json
        Array[File] salmon_log = salmon_quant.salmon_log
    }
}

task salmon_quant {
    File transcriptome_fasta
    File transcript_to_gene_map
    File bam
    String bam_suffix
    String name = sub(basename(bam), bam_suffix, '')
    String library

    String resource_log_code
    Int runtime_preemptible
    Int runtime_memory
    Int runtime_cpu
    Int runtime_disk_space = ceil(size(transcriptome_fasta, 'G') + 1.2 * size(bam, 'G')) + 1
    String runtime_docker
    String runtime_zones

    command <<<
        ${resource_log_code}

        set -euxo pipefail
        
        mkdir salmon_quant

        # permit salmon to return non-zero, as this may indicate there are too few reads
        set +e
        salmon quant \
            -t ${transcriptome_fasta} \
            -g ${transcript_to_gene_map} \
            -l ${library} \
            -a ${bam} \
            -p ${runtime_cpu} \
            -o salmon_quant \
            --incompatPrior 0.0 \
            --seqBias \
            --gcBias \
            --reduceGCMemory \
            --posBias
        salmon_return=$?
        set -e

        ls -lhR salmon_quant

        if [ $salmon_return -eq 1 ]; then
            # if salmon returned 1, check if we have message about too few reads; else die, as other error happened
            grep minAssignedFrags salmon_quant/logs/salmon_quant.log
            # some outputs are not created in this case, so create empty placeholder files to prevent delocalization errors
            touch ${name}.quant.genes.sf.gz
            touch ${name}.aux_info.tar.gz
        else
            gzip -c salmon_quant/quant.genes.sf > ${name}.quant.genes.sf.gz
            tar -c salmon_quant/aux_info/*.gz > ${name}.aux_info.tar.gz
        fi
        
        gzip -c salmon_quant/quant.sf > ${name}.quant.sf.gz
        mv salmon_quant/aux_info/meta_info.json ${name}.meta_info.json
        mv salmon_quant/logs/salmon_quant.log ${name}.salmon_quant.log
    >>>
    
    output {
        File transcript = '${name}.quant.sf.gz'
        File gene = '${name}.quant.genes.sf.gz'
        File aux_info = '${name}.aux_info.tar.gz'
        File meta_json = '${name}.meta_info.json'
        File resource_log = 'resource_usage.log'
        File salmon_log = '${name}.salmon_quant.log'
    }

    runtime {
        disks: 'local-disk ${runtime_disk_space} HDD'
        cpu: '${runtime_cpu}'
        memory: '${runtime_memory} GB'
        docker: '${runtime_docker}'
        preemptible: '${runtime_preemptible}'
        zones: runtime_zones
    }
}