test.smk
# =============================================================================
# Snakefile — comprehensive feature test for Snakemake highlighting
# Minimal comments; lines marked `# ERROR:` are intentionally invalid.
# =============================================================================
# ---------- Top-level Python ----------
if True: forbidden # ERROR: no text allowed after block opener
a = 1
else: forbidden # ERROR: same
a = 0
config = {
"method": "B",
"samples": ["S1", "S2"],
"ref": "ref/genome.fa",
"gtf": "ref/genes.gtf",
"threads_default": 4,
"work": "work",
"adapters": {"fwd": "AGATCGGAAGAGC", "rev": "AGATCGGAAGAGC"}
}
SAMPLES = config["samples"]
def fq(sample, mate):
return f"raw/{sample}_R{mate}.fastq.gz"
# ---------- Global Snakemake directives ----------
workdir: config["work"]
localrules: all, qc_fastqc
ruleorder: sort_index> align
report: "report/report.html"
envvars: "LD_LIBRARY_PATH", "OMP_NUM_THREADS"
wildcard_constraints:
sample = r"[A-Za-z0-9_]+"
include: "extras.smk"
container: "docker://ubuntu:22.04"
conda: "envs/global.yml"
containerized:
# Legacy/compat directives still recognized in old workflows
moduleinclude: "legacy/tools.smk" # legacy include for modules
# deprecated (kept for legacy)
subworkflow oldwf: something #ERROR: nothing allowed after colon
workdir: "oldwf"
snakefile: "workflow/Snakefile"
input: # ERROR: `input` directive not allowed in subworkflows
# ---------- Modules and use rule ----------
module asm: something #ERROR: nothing allowed after colon
snakefile: "modules/assembly.smk"
config: "modules/assembly.yaml"
input: "test" # ERROR: `input` directive not allowed in modules
# Reuse a rule from the module; inside `with:` we use regular rule directives
use rule assemble as assemble_mod with: something #ERROR: nothing allowed after colon
snakefile: # ERROR `snakefile` directive not allowed in rules
threads: 8
params:
mode = "quick"
message:
"Assembling {wildcards.sample} (module override)"
# wrong directive (kept to test error handling inside a use-body)
outpt: "SHOULD-NOT-BE-HERE" # ERROR: typo
# ---------- INTENTIONAL TOP-LEVEL ERRORS ----------
workdirr: "typo/dir" # ERROR: unknown top-level keyword
snakefile: "top/level.smk" # ERROR: only valid inside module/subworkflow
# ---------- Pipeline ----------
rule all:
input:
expand("results/{sample}/summary.txt", sample=SAMPLES)
rule qc_fastqc:
input:
r1 = lambda wc: fq(wc.sample, 1),
r2 = lambda wc: fq(wc.sample, 2),
output:
html = "qc/{sample}_fastqc.html",
zip = "qc/{sample}_fastqc.zip",
threads: 2
resources:
mem_mb = 1024
log:
"log/fastqc_{sample}.log"
params:
extra = "--nogroup"
shell:
"""
fastqc -t {threads}{params.extra} -o qc {input.r1}{input.r2}> {log} 2>&1
"""
rule trim_cutadapt:
input:
r1 = lambda wc: fq(wc.sample, 1),
r2 = lambda wc: fq(wc.sample, 2),
output:
r1 = "trim/{sample}_R1.fastq.gz", # `sample`: wildcard
r2 = "trim/{sample}_R2.fastq.gz",
report = f"{report_dir}/{{sample}}" # `report_dir`: f-string interpolation, `sample`: wildcard
params:
a = config["adapters"]["fwd"],
A = config["adapters"]["rev"],
threads: 8
conda:
"envs/cutadapt.yml"
log:
"log/cutadapt_{sample}.log"
shell:
"""
cutadapt -j {threads} -a {params.a} -A {params.A} \
-o {output.r1} -p {output.r2}{input.r1}{input.r2}> {log} 2>&1
"""
# Example of wrapper usage (version string illustrative)
rule align:
input:
r1 = "trim/{sample}_R1.fastq.gz",
r2 = "trim/{sample}_R2.fastq.gz",
ref = config["ref"],
output:
bam = "map/{sample}.unsorted.bam",
threads: 12
resources:
mem_mb = 8000
params:
# BWA-MEM2 example options
extra = "-M"
log:
"log/align_{sample}.log"
wrapper:
"0.90.0/bio/bwa/mem2"
shell:
"bwa-mem2 mem -t {threads}{params.extra}{input.ref}{input.r1}{input.r2} | samtools view -bS -> {output.bam} 2> {log}"
rule sort_index:
input:
"map/{sample}.unsorted.bam"
output:
bam = "map/{sample}.bam",
bai = "map/{sample}.bam.bai",
threads: 6
resources:
mem_mb = 4000
envmodules:
"samtools/1.16"
shadow:
"minimal"
shell:
"""
samtools sort -@ {threads} -o {output.bam}{input}
samtools index -@ {threads}{output.bam}
"""
rule quantify:
input:
bam = "map/{sample}.bam",
bai = "map/{sample}.bam.bai",
gtf = config["gtf"],
output:
counts = "counts/{sample}.txt",
threads: 4
group:
"counting"
priority:
50
params:
feature_type = "exon",
id_attr = "gene_id",
shell:
"""
featureCounts -T {threads} -a {input.gtf} -t {params.feature_type} -g {params.id_attr} \
-o {output.counts}{input.bam}
"""
# Example of script & notebook directives
rule plot_qc:
input:
"qc/{sample}_fastqc.zip"
output:
"plots/{sample}_qc.png"
script:
"scripts/plot_qc.py" # not executed; present to test directive
rule explore_notebook:
input:
"counts/{sample}.txt"
output:
"notebooks/{sample}_eda.ipynb"
notebook:
"notebooks/template.ipynb"
# Example of per-rule container / cache / benchmark / message / name / version (legacy)
rule summarize:
input:
bam = "map/{sample}.bam",
counts = "counts/{sample}.txt"
output:
txt = "results/{sample}/summary.txt"
params:
tag = "{sample}" # wildcard should highlight distinctly
message:
"Summarizing {wildcards.sample}"
name:
"summarize_{sample}"
benchmark:
"benchmark/summarize_{sample}.tsv"
cache:
"permissive"
container:
"docker://python:3.11"
version: "1.0" # legacy directive
threads: 2
resources:
mem_mb = 512
log:
"log/summarize_{sample}.log"
run:
# simple Python run block
import json
meta = {
"sample": wildcards.sample,
"bam": input.bam,
"counts": input.counts,
"tag": params.tag,
}
# write a tiny summary
import os
os.makedirs(os.path.dirname(output.txt), exist_ok=True)
with open(output.txt, "w") as fh:
fh.write(json.dumps(meta, indent=2) + "\n")
# ---------- More intentional errors inside a rule body ----------
if config["method"] == "A":
rule bad_header_examples_A:
input:
"map/{sample}.bam"
outpt: # ERROR: unknown directive
"nowhere.txt"
foo: # ERROR: unknown directive
"bar"
shell:
"true"
else:
rule bad_header_examples_B:
input:
"map/{sample}.bam"
output: # ERROR: unknown directive
"nowhere.txt"
foo: # ERROR: unknown directive
"bar"
shell:
"true"
# ---------- Using the module rule ----------
rule assemble_via_module:
input:
"trim/{sample}_R1.fastq.gz",
"trim/{sample}_R2.fastq.gz",
output:
"assembly/{sample}/contigs.fa"
threads: 8
shell:
"echo assembly> {output}"
# ---------- Default target redirection ----------
rule final_default:
output:
"FINAL.marker"
default_target:
True
shell:
"touch {output}"
rule: no text allowed here # ERROR: no text allowed after block opener
input: "back_to_normal.txt"