workflow salmon_quant_singlebam { File transcriptome_fasta File transcript_to_gene_map File bam String bam_suffix = 'Aligned.toTranscriptome.out.bam' # default for STAR BAMs String library = 'A' # resource usage monitoring code Int resource_log_interval = 15 # log resource usage for debugging purposes String resource_log_code = "function runtimeInfo() { echo [$(date)]; echo CPU usage: $(top -bn1 | grep -i '^cpu' | tail -n 1)%; echo Memory usage: $(free -m | grep Mem | awk '{ OFMT=" + '"%.0f"' + "; print ($3/$2)*100; }')%; echo Disk usage: $(df | grep cromwell_root | awk '{print $4}'); }\nwhile true; do runtimeInfo >> resource_usage.log; sleep " + resource_log_interval + "; done &" Int runtime_preemptible = 3 Int? runtime_memory Int runtime_cpu = 6 String runtime_docker = 'quay.io/biocontainers/salmon:0.14.1--h86b0361_1' # zones in which compute can run # FireCloud stores output in the US multi-region, so there should be no network costs with US regions # zones filtered to exclude regions with older generation default CPU platforms # set value to be more restrictive if input data is using regional storage String runtime_zones = 'us-west1-a us-west1-b us-west1-c' parameter_meta { library: 'Salmon library type: https://salmon.readthedocs.io/en/latest/salmon.html#what-s-this-libtype; by default, automatically infer' runtime_zones: 'Google zones in which to start VMs. Make sure you have sufficient quota.' } call salmon_quant { input: transcriptome_fasta=transcriptome_fasta, transcript_to_gene_map=transcript_to_gene_map, bam=bam, bam_suffix=bam_suffix, library=library, resource_log_code=resource_log_code, runtime_preemptible=runtime_preemptible, runtime_memory=if defined(runtime_memory) then runtime_memory else 8 + ceil(size(bam, 'G') / 2), runtime_cpu=runtime_cpu, runtime_docker=runtime_docker, runtime_zones=runtime_zones } output { File transcript = salmon_quant.transcript File gene = salmon_quant.gene File aux_info = salmon_quant.aux_info File meta_json = salmon_quant.meta_json File salmon_log = salmon_quant.salmon_log } } task salmon_quant { File transcriptome_fasta File transcript_to_gene_map File bam String bam_suffix String name = sub(basename(bam), bam_suffix, '') String library String resource_log_code Int runtime_preemptible Int runtime_memory Int runtime_cpu Int runtime_disk_space = ceil(size(transcriptome_fasta, 'G') + 1.2 * size(bam, 'G')) + 1 String runtime_docker String runtime_zones command <<< ${resource_log_code} set -euxo pipefail mkdir salmon_quant # permit salmon to return non-zero, as this may indicate there are too few reads set +e salmon quant \ -t ${transcriptome_fasta} \ -g ${transcript_to_gene_map} \ -l ${library} \ -a ${bam} \ -p ${runtime_cpu} \ -o salmon_quant \ --incompatPrior 0.0 \ --seqBias \ --gcBias \ --reduceGCMemory \ --posBias salmon_return=$? set -e ls -lhR salmon_quant if [ $salmon_return -eq 1 ]; then # if salmon returned 1, check if we have message about too few reads; else die, as other error happened grep minAssignedFrags salmon_quant/logs/salmon_quant.log # some outputs are not created in this case, so create empty placeholder files to prevent delocalization errors touch ${name}.quant.genes.sf.gz touch ${name}.aux_info.tar.gz else gzip -c salmon_quant/quant.genes.sf > ${name}.quant.genes.sf.gz tar -c salmon_quant/aux_info/*.gz > ${name}.aux_info.tar.gz fi gzip -c salmon_quant/quant.sf > ${name}.quant.sf.gz mv salmon_quant/aux_info/meta_info.json ${name}.meta_info.json mv salmon_quant/logs/salmon_quant.log ${name}.salmon_quant.log >>> output { File transcript = '${name}.quant.sf.gz' File gene = '${name}.quant.genes.sf.gz' File aux_info = '${name}.aux_info.tar.gz' File meta_json = '${name}.meta_info.json' File resource_log = 'resource_usage.log' File salmon_log = '${name}.salmon_quant.log' } runtime { disks: 'local-disk ${runtime_disk_space} HDD' cpu: '${runtime_cpu}' memory: '${runtime_memory} GB' docker: '${runtime_docker}' preemptible: '${runtime_preemptible}' zones: runtime_zones } }