forked from peranti/word-count
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snakefile_all
60 lines (53 loc) · 1.66 KB
/
Snakefile_all
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# a list of all the books we are analyzing
DATA = glob_wildcards('data/{book}.txt').book
# this is for running on HPC resources
localrules: all, clean, make_archive
# the default rule
rule all:
input:
'zipf_analysis.tar.gz'
# delete everything so we can re-run things
rule clean:
shell:
'''
rm -rf source/__pycache__
rm -f zipf_analysis.tar.gz processed_data/* results/*
'''
# count words in one of our books
# logfiles from each run are put in .log files"
rule count_words:
input:
wc='source/wordcount.py',
book='data/{file}.txt'
output: 'processed_data/{file}.dat'
threads: 4
log: 'processed_data/{file}.log'
shell:
'''
echo "Running {input.wc} with {threads} cores on {input.book}." &> {log} &&
python {input.wc} {input.book} {output} >> {log} 2>&1
'''
# create a plot for each book
# shows example usage of the resources keyword
rule make_plot:
input:
plotcount='source/plotcount.py',
book='processed_data/{file}.dat'
output: 'results/{file}.png'
resources: gpu=1
shell: 'python {input.plotcount} {input.book} {output}'
# generate summary table
rule zipf_test:
input:
zipf='source/zipf_test.py',
books=expand('processed_data/{book}.dat', book=DATA)
output: 'results/results.txt'
shell: 'python {input.zipf} {input.books} > {output}'
# create an archive with all of our results
rule make_archive:
input:
expand('results/{book}.png', book=DATA),
expand('processed_data/{book}.dat', book=DATA),
'results/results.txt'
output: 'zipf_analysis.tar.gz'
shell: 'tar -czvf {output} {input}'