Skip to content

Commit

Permalink
0.4.1 (#23)
Browse files Browse the repository at this point in the history
* πŸ“ Add more examples regarding #20

* πŸ› Fix devpars by default a Namespace rather than a dict (#21, #22)

* πŸ”– 0.4.1

Co-authored-by: osdaf <[email protected]>
  • Loading branch information
pwwang and osdaf authored Jun 27, 2022
1 parent 38a3451 commit 887638b
Show file tree
Hide file tree
Showing 9 changed files with 352 additions and 248 deletions.
5 changes: 5 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 0.4.1

- πŸ“ Add more examples regarding #20
- πŸ› Fix devpars by default a Namespace rather than a dict (#21, #22)

# 0.4.0

- ⬆️ Drop support for python 3.8 (brentp/cyvcf2#181)
Expand Down
84 changes: 84 additions & 0 deletions examples/multiallelic.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
##fileformat=VCFv4.1
##samtoolsVersion=0.1.18-r572
##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads">
##INFO=<ID=FQ,Number=1,Type=Float,Description="Phred probability of all samples being the same">
##INFO=<ID=AF1,Number=1,Type=Float,Description="Max-likelihood estimate of the first ALT allele frequency (assuming HWE)">
##INFO=<ID=AC1,Number=1,Type=Float,Description="Max-likelihood estimate of the first ALT allele count (no HWE assumption)">
##INFO=<ID=G3,Number=3,Type=Float,Description="ML estimate of genotype frequencies">
##INFO=<ID=HWE,Number=1,Type=Float,Description="Chi^2 based HWE test P-value based on G3">
##INFO=<ID=CLR,Number=1,Type=Integer,Description="Log ratio of genotype likelihoods with and without the constraint">
##INFO=<ID=UGT,Number=1,Type=String,Description="The most probable unconstrained genotype configuration in the trio">
##INFO=<ID=CGT,Number=1,Type=String,Description="The most probable constrained genotype configuration in the trio">
##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias">
##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">
##INFO=<ID=PC2,Number=2,Type=Integer,Description="Phred probability of the nonRef allele frequency in group1 samples being larger (,smaller) than in group2.">
##INFO=<ID=PCHI2,Number=1,Type=Float,Description="Posterior weighted chi^2 P-value for testing the association between group1 and group2 samples.">
##INFO=<ID=QCHI2,Number=1,Type=Integer,Description="Phred scaled PCHI2.">
##INFO=<ID=PR,Number=1,Type=Integer,Description="# permutations yielding a smaller PCHI2.">
##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GL,Number=3,Type=Float,Description="Likelihoods for RR,RA,AA genotypes (R=ref,A=alt)">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases">
##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality non-reference bases">
##FORMAT=<ID=SP,Number=1,Type=Integer,Description="Phred-scaled strand bias P-value">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">
##source_20120424.1=vcf-annotate(r735) --fill-AC-AN -f +
##FILTER=<ID=StrandBias,Description="Min P-value for strand bias (INFO/PV4) [0.0001]">
##FILTER=<ID=EndDistBias,Description="Min P-value for end distance bias (INFO/PV4) [0.0001]">
##FILTER=<ID=MaxDP,Description="Maximum read depth (INFO/DP or INFO/DP4) [10000000]">
##FILTER=<ID=BaseQualBias,Description="Min P-value for baseQ bias (INFO/PV4) [0]">
##FILTER=<ID=MinAB,Description="Minimum number of alternate bases (INFO/DP4) [2]">
##FILTER=<ID=Qual,Description="Minimum value of the QUAL field [10]">
##FILTER=<ID=VDB,Description="Minimum Variant Distance Bias (INFO/VDB) [0]">
##FILTER=<ID=GapWin,Description="Window size for filtering adjacent gaps [3]">
##FILTER=<ID=MapQualBias,Description="Min P-value for mapQ bias (INFO/PV4) [0]">
##FILTER=<ID=SnpGap,Description="SNP within INT bp around a gap to be filtered [10]">
##FILTER=<ID=RefN,Description="Reference base is N []">
##FILTER=<ID=MinDP,Description="Minimum read depth (INFO/DP or INFO/DP4) [2]">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##source_20120424.2=vcf-annotate(r735) --fill-AC-AN -f +
##FILTER=<ID=MinMQ,Description="Minimum RMS mapping quality for SNPs (INFO/MQ) [30]">
##source_20120710.1=vcf-annotate(r761) -f q=30 mpileup-v1/merged.filt.vcf.gz
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B C D
1 10492 . C T 999 PASS DP=213;VDB=0.0102;AF1=0.375;AC1=3;DP4=84,74,34,19;MQ=32;FQ=999;PV4=0.2,0.11,0.057,0.13;AN=8;AC=3 GT:PL:DP:SP:GQ 0/1:85,0,255:57:3:86 0/0:0,123,255:41:0:99 0/1:255,0,255:47:0:99 0/1:114,0,255:66:4:99
1 10583 . G A,C 20 PASS DP=134;VDB=0.0071;AF1=0.1242;AC1=1;DP4=78,41,6,6;MQ=32;FQ=20;PV4=0.35,0.29,0.052,1;AN=8;AC=1 GT:PL:DP:SP:GQ 0/1:26,0,227:40:8:21 2/2:0,35,255:21:0:40 0/0:0,108,255:36:0:99 0/0:0,21,255:34:0:26
1 10821 . T A,C 49.7 PASS DP=9;VDB=0.0091;AF1=1;AC1=5;DP4=1,3,0,4;MQ=12;FQ=7.75;PV4=1,1,1,1;AN=8;AC=6 GT:PL:DP:SP:GQ 1/1:42,9,0:33:0:20 0/1:0,3,4:1:0:2 1/1:12,1,0:2:0:4 0/1:0,6,8:2:0:2
2 16103 . T G,C,A 24.3 PASS DP=110;VDB=0.0122;AF1=0.2119;AC1=2;DP4=49,26,23,2;MQ=31;FQ=24.3;PV4=0.01,1,6.9e-13,0.33;AN=8;AC=1 GT:PL:DP:SP:GQ 0/0:0,2,228:39:11:5 0/0:0,7,189:15:6:10 0/0:0,0,218:25:0:4 0/1:26,0,192:21:5:24
3 30923 . G T,A,C 999 PASS DP=107;VDB=0.0022;AF1=1;AC1=8;DP4=0,0,47,50;MQ=37;FQ=-36;AN=8;AC=8 GT:PL:DP:SP:GQ 1/1:255,75,0:25:0:99 1/1:221,30,0:10:0:72 1/1:255,117,0:39:0:99 1/1:255,69,0:23:0:99
4 51898 . C A,T 22.2 PASS DP=128;VDB=0.0230;AF1=0.1272;AC1=1;DP4=56,50,14,2;MQ=41;FQ=22.2;PV4=0.013,0.069,9.8e-17,1;AN=8;AC=1 GT:PL:DP:SP:GQ 0/1:28,0,255:23:13:23 0/0:0,35,255:19:3:40 0/0:0,59,255:44:0:64 0/0:0,11,255:36:9:16
4 51928 . G A 54.1 PASS DP=149;VDB=0.0311;AF1=0.1269;AC1=1;DP4=67,52,22,5;MQ=41;FQ=54.1;PV4=0.017,0.0073,7.3e-34,0.37;AN=8;AC=1 GT:PL:DP:SP:GQ 0/1:60,0,255:29:13:55 0/0:0,27,255:19:0:32 0/0:0,30,255:51:2:35 0/0:0,13,255:47:11:18
4 52058 . G C,T 17.5 PASS DP=132;VDB=0.0277;AF1=0.2308;AC1=2;DP4=55,57,15,2;MQ=35;FQ=17.5;PV4=0.0031,0.036,0.00091,0.03;AN=8;AC=2 GT:PL:DP:SP:GQ 0/1:13,0,178:20:7:11 0/0:0,60,255:20:0:62 0/1:12,0,255:51:8:10 0/0:0,15,255:38:9:17
4 52238 . T G,A 999 PASS DP=138;VDB=0.0125;AF1=1;AC1=8;DP4=0,0,65,60;MQ=37;FQ=-42;AN=8;AC=8 GT:PL:DP:SP:GQ 1/1:255,63,0:21:0:99 1/1:255,36,0:12:0:84 1/1:255,166,0:55:0:99 1/1:255,111,0:37:0:99
4 54586 . T C,A,G 51.1 PASS DP=116;VDB=0.0136;AF1=0.236;AC1=2;DP4=47,45,14,7;MQ=36;FQ=51.1;PV4=0.23,1,6.1e-11,1;AN=8;AC=2 GT:PL:DP:SP:GQ 0/1:48,0,186:24:0:46 0/0:0,26,252:14:0:28 0/1:11,0,255:32:7:9 0/0:0,40,255:43:9:42
4 54676 . C T 999 PASS DP=143;VDB=0.0244;AF1=0.4969;G3=1.224e-08,1,4.851e-96;HWE=0.0191;AC1=4;DP4=54,48,19,21;MQ=40;FQ=999;PV4=0.58,0.47,8.6e-13,0.2;AN=8;AC=4 GT:PL:DP:SP:GQ 0/1:82,0,233:23:7:85 0/1:121,0,237:20:2:99 0/1:173,0,255:49:6:99 0/1:13,0,255:50:2:16
4 54753 . T G,A 61.5 PASS DP=177;VDB=0.0130;AF1=0.2019;AC1=2;DP4=48,82,1,5;MQ=40;FQ=61.5;PV4=0.42,1,0.27,1;AN=8;AC=2 GT:PL:DP:SP:GQ 0/1:5,0,237:20:3:4 0/1:63,0,193:16:0:60 0/0:0,160,255:53:0:99 0/0:0,141,255:47:0:99
5 61442 . A G,T 999 PASS DP=96;VDB=0.0348;AF1=1;AC1=8;DP4=0,0,41,47;MQ=30;FQ=-27;AN=8;AC=8 GT:PL:DP:SP:GQ 1/1:197,42,0:14:0:75 1/1:108,21,0:7:0:54 1/1:255,99,0:33:0:99 1/1:255,102,0:34:0:99
6 61499 . G A 87.1 PASS DP=140;VDB=0.0120;AF1=0.3006;AC1=2;DP4=54,55,18,12;MQ=35;FQ=87.1;PV4=0.41,0.3,2.4e-12,1;AN=8;AC=2 GT:PL:DP:SP:GQ 0/1:61,0,243:24:2:60 0/0:0,1,231:14:7:4 0/1:32,0,255:43:3:31 0/0:0,48,255:58:6:49
6 61987 . A G,T,C 999 PASS DP=206;VDB=0.0287;AF1=0.5;G3=1.244e-38,1,7.859e-46;HWE=0.0185;AC1=4;DP4=46,65,42,48;MQ=39;FQ=999;PV4=0.48,0.072,0.00033,1;AN=8;AC=4 GT:PL:DP:SP:GQ 0/1:255,0,255:58:0:99 0/1:182,0,218:22:2:99 0/1:255,0,255:62:3:99 0/1:220,0,255:59:6:99
6 61989 . G C 999 PASS DP=208;VDB=0.0311;AF1=0.5;G3=3.141e-40,1,8.15e-49;HWE=0.0185;AC1=4;DP4=47,65,42,49;MQ=39;FQ=999;PV4=0.57,0.058,8.7e-05,1;AN=8;AC=4 GT:PL:DP:SP:GQ 0/1:255,0,255:59:1:99 0/1:190,0,233:22:2:99 0/1:255,0,255:62:3:99 0/1:216,0,255:60:6:99
6 62203 . T C 999 PASS DP=258;VDB=0.0354;AF1=0.5;G3=3.125e-31,1,5e-52;HWE=0.0185;AC1=4;DP4=77,73,47,52;MQ=40;FQ=999;PV4=0.61,1,2.6e-25,1;AN=8;AC=4 GT:PL:DP:SP:GQ 0/1:255,0,255:66:5:99 0/1:145,0,255:38:1:99 0/1:252,0,255:84:1:99 0/1:210,0,255:61:0:99
7 62271 . A G,T 134 PASS DP=187;VDB=0.0101;AF1=0.498;G3=2.233e-09,1,4.959e-103;HWE=0.0189;AC1=4;DP4=92,56,14,20;MQ=41;FQ=134;PV4=0.033,0.013,1.9e-22,0.0028;AN=8;AC=4 GT:PL:DP:SP:GQ 0/1:65,0,255:35:1:68 0/1:19,0,255:28:2:22 0/1:17,0,255:60:11:20 0/1:39,0,255:59:14:42
7 66162 . A T,G 999 PASS DP=215;VDB=0.0231;AF1=0.4998;G3=1.238e-10,1,1.58e-77;HWE=0.0186;AC1=4;DP4=62,67,26,36;MQ=39;FQ=999;PV4=0.44,1,3.3e-21,1;AN=8;AC=4 GT:PL:DP:SP:GQ 0/1:170,0,255:45:3:99 0/1:77,0,255:30:0:80 0/1:183,0,255:69:3:99 0/1:26,0,255:47:0:29
7 66507 . T A,C 999 PASS DP=202;VDB=0.0385;AF1=0.626;AC1=5;DP4=25,14,63,82;MQ=42;FQ=999;PV4=0.03,0.023,1,0.0014;AN=8;AC=5 GT:PL:DP:SP:GQ 0/1:255,0,205:42:7:99 0/1:255,0,20:37:12:21 0/1:255,0,155:57:4:99 1/1:255,72,0:48:0:71
8 73841 . C T 999 PASS DP=182;VDB=0.0366;AF1=0.3748;AC1=3;DP4=50,64,12,26;MQ=30;FQ=999;PV4=0.25,1.6e-10,0.084,1;AN=8;AC=3 GT:PL:DP:SP:GQ 0/1:95,0,255:33:3:96 0/1:174,0,204:27:9:99 0/1:28,0,255:53:17:29 0/0:0,64,255:39:6:63
8 79418 . G C 28.5 PASS DP=99;VDB=0.0139;AF1=0.2016;AC1=2;DP4=31,59,1,5;MQ=39;FQ=28.5;PV4=0.66,0.015,0.045,0.00068;AN=8;AC=2 GT:PL:DP:SP:GQ 0/1:5,0,236:21:3:4 0/1:30,0,229:18:0:27 0/0:0,78,255:26:0:81 0/0:0,93,255:31:0:96
8 79772 . C G 999 PASS DP=138;VDB=0.0342;AF1=0.25;AC1=2;DP4=68,47,11,9;MQ=30;FQ=999;PV4=0.81,1,0.41,1;AN=8;AC=2 GT:PL:DP:SP:GQ 0/1:132,0,127:29:6:99 0/1:84,0,219:27:2:82 0/0:0,117,255:39:0:99 0/0:0,120,255:40:0:99
9 82303 . T C 21 PASS DP=111;VDB=0.0241;AF1=0.1243;AC1=1;DP4=47,50,3,8;MQ=38;FQ=21;PV4=0.22,1,1.5e-14,1;AN=8;AC=1 GT:PL:DP:SP:GQ 0/1:27,0,255:20:5:22 0/0:0,24,255:21:0:29 0/0:0,96,255:32:0:99 0/0:0,75,255:35:0:80
9 82676 . T G,A 999 PASS DP=152;VDB=0.0213;AF1=0.25;AC1=2;DP4=70,59,9,11;MQ=34;FQ=999;PV4=0.48,0.37,0.0004,1;AN=8;AC=2 GT:PL:DP:SP:GQ 0/1:124,0,255:34:0:99 0/1:89,0,255:31:6:87 0/0:0,138,255:46:0:99 0/0:0,114,255:38:0:99
9 83084 . T A,G 999 PASS DP=84;VDB=0.0128;AF1=1;AC1=8;DP4=0,0,38,37;MQ=37;FQ=-33;AN=8;AC=8 GT:PL:DP:SP:GQ 1/1:255,48,0:16:0:87 1/1:203,27,0:9:0:66 1/1:255,72,0:24:0:99 1/1:255,78,0:26:0:99
9 84010 . G A,T 37 PASS DP=190;VDB=0.0033;AF1=0.125;AC1=1;DP4=85,71,6,3;MQ=38;FQ=37;PV4=0.73,0.2,1,0.14;AN=8;AC=1 GT:PL:DP:SP:GQ 0/0:0,126,255:42:0:99 0/0:0,81,255:27:0:86 0/1:43,0,255:38:2:38 0/0:0,64,255:58:2:69
9 84014 . G A,T 38 PASS DP=192;VDB=0.0055;AF1=0.125;AC1=1;DP4=89,67,3,4;MQ=39;FQ=38;PV4=0.47,0.21,1,0.021;AN=8;AC=1 GT:PL:DP:SP:GQ 0/1:44,0,255:42:0:39 0/0:0,84,255:28:0:89 0/0:0,72,255:36:0:77 0/0:0,172,255:57:0:99
10 84018 . G A,T 79.6 PASS DP=188;VDB=0.0107;AF1=0.2497;AC1=2;DP4=77,64,8,3;MQ=41;FQ=79.6;PV4=0.35,0.17,1,0.0032;AN=8;AC=2 GT:PL:DP:SP:GQ 0/0:0,120,255:40:0:99 0/1:28,0,255:24:0:26 0/0:0,108,255:36:0:99 0/1:60,0,255:52:6:58
10 84244 . A C 999 PASS DP=213;VDB=0.0276;AF1=0.25;AC1=2;DP4=83,93,14,22;MQ=41;FQ=999;PV4=0.46,0.29,0.019,1;AN=8;AC=2 GT:PL:DP:SP:GQ 0/1:255,0,255:47:6:99 0/1:208,0,255:49:1:99 0/0:0,178,255:59:0:99 0/0:0,172,255:57:0:99
10 85597 . A C 999 PASS DP=139;VDB=0.0342;AF1=0.25;AC1=2;DP4=47,60,16,14;MQ=30;FQ=999;PV4=0.41,0.057,1,1;AN=8;AC=2 GT:PL:DP:SP:GQ 0/1:198,0,135:26:0:99 0/1:184,0,221:35:5:99 0/0:0,114,255:38:0:99 0/0:0,114,255:38:0:99
10 86018 . C G 999 PASS DP=181;VDB=0.0399;AF1=0.25;AC1=2;DP4=70,69,13,26;MQ=42;FQ=999;PV4=0.07,1,7e-14,0.12;AN=8;AC=2 GT:PL:DP:SP:GQ 0/1:200,0,255:36:11:99 0/1:240,0,255:46:2:99 0/0:0,129,255:43:0:99 0/0:0,160,255:53:0:99
10 86303 . G T 999 PASS DP=182;VDB=0.0329;AF1=0.25;AC1=2;DP4=76,66,17,22;MQ=40;FQ=999;PV4=0.28,1,3.7e-11,0.31;AN=8;AC=2 GT:PL:DP:SP:GQ 0/1:255,0,255:44:3:99 0/1:207,0,255:31:1:99 0/0:0,138,255:46:0:99 0/0:0,181,255:60:0:99
10 86331 . A G 999 PASS DP=187;VDB=0.0331;AF1=0.25;AC1=2;DP4=69,74,18,23;MQ=40;FQ=999;PV4=0.72,1,2.4e-05,1;AN=8;AC=2 GT:PL:DP:SP:GQ 0/1:255,0,255:51:1:99 0/1:216,0,255:34:0:99 0/0:0,120,255:40:0:99 0/0:0,178,255:59:0:99
10 101686 . A G,T,C 89 PASS DP=304;VDB=0.0327;AF1=0.125;AC1=1;DP4=99,171,10,15;MQ=30;FQ=89;PV4=0.83,0.3,0.035,1;AN=8;AC=1 GT:PL:DP:SP:GQ 0/1:95,0,255:58:0:90 0/0:0,72,255:24:0:77 0/0:0,101,255:102:0:99 0/0:0,255,255:111:0:99
X 1 . A G 89 PASS DP=304;VDB=0.0327;AF1=0.125;AC1=1;DP4=99,171,10,15;MQ=30;FQ=89;PV4=0.83,0.3,0.035,1;AN=8;AC=1 GT:PL:DP 0/1:95,0,255:11 0/0:0,72,255:11 0/0:0,101,255:11 0:0,255:11
X 2 . A G 89 PASS DP=304;VDB=0.0327;AF1=0.125;AC1=1;DP4=99,171,10,15;MQ=30;FQ=89;PV4=0.83,0.3,0.035,1;AN=8;AC=1 GT:PL:DP 0/1:95,0,255:11 0/0:0,72,255:11 0/0:0,101,255:11 1:255,0:11
X 3 . A G 89 PASS DP=304;VDB=0.0327;AF1=0.125;AC1=1;DP4=99,171,10,15;MQ=30;FQ=89;PV4=0.83,0.3,0.035,1;AN=8;AC=1 GT:PL:DP 0/0:0,95,255:11 0/0:0,72,255:11 0/0:0,101,255:11 1:255,0:11
14 changes: 13 additions & 1 deletion examples/mymacros.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from vcfstats.macros import continuous
from vcfstats.macros import continuous, categorical


@continuous
Expand Down Expand Up @@ -27,3 +27,15 @@ def N_MISSING(variant):
def Percent_HETs(variant):
"""Get % of HETs per locus"""
return variant.num_het / float(len(variant.gt_types))


@categorical
def Allelic_Type(variant):
"""Get allelic type, either biallelic or multiallelic"""
return "biallelic" if len(variant.ALT) == 1 else "multi-allelic"


@categorical
def N_Allelic(variant):
"""Get number of alleles"""
return len(variant.ALT) + 1
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 887638b

Please sign in to comment.