1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
use std::path::Path;
use std::error::Error;
use bio::stats::{LogProb, PHREDProb, bayesian};
use itertools::Itertools;
use rust_htslib::bcf;
use Event;
use BCFError;
pub fn annotate<R, W, E>(inbcf: &R, outbcf: &W, events: &[E]) -> Result<(), Box<Error>> where
R: AsRef<Path>,
W: AsRef<Path>,
E: Event
{
let reader = try!(bcf::Reader::new(&inbcf));
let mut event_peps = Vec::new();
for _ in 0..events.len() {
event_peps.push(Vec::new());
}
let mut record = bcf::Record::new();
loop {
if let Err(e) = reader.read(&mut record) {
if e.is_eof() {
break;
} else {
return Err(Box::new(e));
}
}
for (event, peps) in events.iter().zip(event_peps.iter_mut()) {
let tag_name = event.tag_name("PROB");
let rec_peps = try!(record.info(tag_name.as_bytes()).float());
match rec_peps {
Some(rec_peps) => {
for &p in rec_peps.iter() {
let pep = LogProb::from(PHREDProb(p as f64)).ln_one_minus_exp();
peps.push(pep);
}
},
None => {
return Err(Box::new(BCFError::MissingTag(tag_name.clone())));
}
}
}
}
let event_fdrs = event_peps.iter().map(|peps| {
bayesian::expected_fdr(peps).iter().map(|&p| *PHREDProb::from(p) as f32).collect_vec()
}).collect_vec();
let reader = try!(bcf::Reader::new(&inbcf));
let mut outbcf = {
let mut header = bcf::Header::with_template(&reader.header);
for event in events {
header.push_record(
event.header_entry("FDR", "PHRED-scaled FDR when considering this and all better").as_bytes()
);
}
try!(bcf::Writer::new(outbcf, &header, false, false))
};
let mut i = 0;
loop {
if let Err(e) = reader.read(&mut record) {
if e.is_eof() {
break;
} else {
return Err(Box::new(e));
}
}
outbcf.translate(&mut record);
let allele_count = record.alleles().len() - 1;
for (event, fdrs) in events.iter().zip(event_fdrs.iter()) {
try!(record.push_info_float(event.tag_name("FDR").as_bytes(), &fdrs[i..i + allele_count]));
}
i += allele_count;
}
Ok(())
}