Skip to content

Commit

Permalink
upd
Browse files Browse the repository at this point in the history
  • Loading branch information
KevinyWu committed Dec 17, 2023
2 parents 02186b2 + c35fcd1 commit 0b9372a
Show file tree
Hide file tree
Showing 86 changed files with 337,061 additions and 1,062 deletions.
65 changes: 57 additions & 8 deletions build/lib/emergenet/emergenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ def __init__(self, seq, seq_trunc_length, seq_metadata=None, random_state=None):
self.seq = seq.upper()
self.seq_metadata = seq_metadata

l=len(self.seq)
if seq_trunc_length > len(self.seq):
raise ValueError('Length to truncate sequences must not be greater than target sequence length!')
self.seq=self.seq+''.join((seq_trunc_length-l)*['A'])

self.seq_trunc_length = seq_trunc_length

if random_state < 0:
raise ValueError('Seed must be between 0 and 2**32 - 1!')
self.random_state = random_state

def __repr__(self):
return "emergenet.Emergenet"
return "emergenet.Emergenet"

def __str__(self):
return self.__repr__()
Expand Down Expand Up @@ -213,7 +213,10 @@ def sequence_membership(self, seq_df, enet, sample_size=None):
membership_degrees = np.array([membership_degree(seq[:self.seq_trunc_length], enet) for seq in seqs])
return membership_degrees

def emergence_risk(self, seq_df, enet, sample_size=None):



def emergence_risk(self, seq_df, enet, sample_size=None, minimum=False):
"""Computes emergence risk score.
Parameters
Expand All @@ -226,11 +229,13 @@ def emergence_risk(self, seq_df, enet, sample_size=None):
sample_size : int
Number of strains to compute emergence risk with, sampled randomly
minimum : bool
if True return minimum instead of average
Returns
-------
emergence_risk_score : float
Emergence risk score
Emergence risk score
variance : float
Variance of emergence risk score
Expand All @@ -245,10 +250,22 @@ def emergence_risk(self, seq_df, enet, sample_size=None):
if np.isnan(qdist):
continue
qdist_list.append(qdist)
emergence_risk_score = np.average(qdist_list)
variance = np.var(qdist_list)
if not minimum:
emergence_risk_score = np.average(qdist_list)
variance = np.var(qdist_list)
else:
qdist_list=np.array(qdist_list)
qdist_list=qdist_list[qdist_list>0]
emergence_risk_score = np.min(qdist_list)
lb,ub=bootstrap_min_confidence_interval(qdist_list,confidence_level=0.95)
variance = (ub-lb)/2

return emergence_risk_score, variance





def emergence_risk_qsampling(self, seq_df, enet, sample_size=None, qsamples=None, steps=None):
"""Computes emergence risk score using qsampling.
Expand Down Expand Up @@ -378,3 +395,35 @@ def load_model(filepath):
"""
enet = load_qnet(filepath)
return enet




def bootstrap_min_confidence_interval(sample, confidence_level=0.95):
"""
Calculate the confidence interval for the minimum value of a population
based on a sample using bootstrap method with m = N-1.
Args:
sample (array-like): The sample data.
confidence_level (float): The confidence level for the interval.
Returns:
tuple: Lower and upper bounds of the confidence interval.
"""
n = len(sample)
m = n - 5 # Optimal number of bootstrap samples
bootstrap_mins = []

# Generate bootstrap samples and calculate their minima
for _ in range(m):
bootstrap_sample = np.random.choice(sample, size=m, replace=True)
bootstrap_min = np.min(bootstrap_sample)
bootstrap_mins.append(bootstrap_min)

# Calculate the confidence interval
lower_bound = np.percentile(bootstrap_mins, (1 - confidence_level) / 2 * 100)
upper_bound = np.percentile(bootstrap_mins, (1 + confidence_level) / 2 * 100)

return lower_bound, upper_bound

Binary file removed dist/emergenet-0.0.40-py3-none-any.whl
Binary file not shown.
Binary file removed dist/emergenet-0.0.40.tar.gz
Binary file not shown.
Loading

0 comments on commit 0b9372a

Please sign in to comment.