diff --git a/README.md b/README.md index 9e9d2bb..6107442 100644 --- a/README.md +++ b/README.md @@ -2,22 +2,52 @@ Requires modules `networkx`, `numpy`, `scikit-learn`, and `argparse`. -For a description of the Random Walk with Restart (RWR) algorithm, see the paper by Kohler et al. at [http://www.sciencedirect.com/science/article/pii/S0002929708001729](http://www.sciencedirect.com/science/article/pii/S0002929708001729). +For a description of the Random Walk with Restart (RWR) algorithm, which +this module implements, see the paper by Kohler et al. at +[http://www.sciencedirect.com/science/article/pii/S0002929708001729](http://www.sciencedirect.com/science/article/pii/S0002929708001729). -This module was initially created to run node removal experiments with two separate graphs, but the code in matrix\_main.py can easily be used to run the standard RWR algorithm on a single graph of any sort. +## Overview + +This module can be used to run two types of experiments: + +- A standard random walk with restart from a set of seed nodes, as in the + Kohler et al. paper referenced above. +- A random walk with restart, from a set of seed nodes, on a "tissue-specific" + network. The network is defined by a "low list" of nodes (i.e. genes) that + are not expressed in the tissue of interest. This is described in more + detail in our paper, which is currently in review. + +Examples of both experiments are described in more detail below. ## Running a random walk -The matrix\_main.py script can be used to run a random walk. The syntax looks like: +The run\_walker.py script can be used to run a random walk. The syntax looks like: -`python matrix_main.py [-l ] [-r ]` +`python run_walker.py [-l ] [-r ]` where the input graph is in edge list format, the seed is a list of nodes to start the random walk at, the optional low list is a list of nodes to down-weight -for node removal experiments (as in the tissue-specific networks paper), and the -optional node removal list is a list of nodes to remove completely from the graph. +for node removal experiments, and the optional node removal list is a list of nodes +to remove completely from the network. + +The script will write a tab-separated list of nodes and probabilities to stdout, +where the probability number represents the probability that a random walk +starting at the seed nodes will terminate at the given node. + +For more detail about the expected arguments, run `python run_walker.py -h`. + +## Examples + +To help you get up and running, a few simple examples are included in the `testdata` +folder. To run a standard random walk experiment on a simple example network, run +this command: + +`python run_walker.py testdata/test_network.ppi testdata/test_seed.txt` + +Or, to run a "tissue-specific" random walk experiment using the same +simple example network, try: -More thorough documentation to come. +`python run_walker.py testdata/test_network.ppi testdata/test_seed.txt -l testdata/test_low_list.txt` ## Using the module diff --git a/runSH.py b/runSH.py deleted file mode 100644 index 1ecdd0c..0000000 --- a/runSH.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python -import sys -import os - -def get_node_list(node_file): - node_list = [] - try: - fp = open(node_file, 'r') - except IOError: - sys.exit('Could not open file: {}'.format(node_file)) - - # read the first (i.e. largest) connected component - cur_line = fp.readline() - while cur_line and not cur_line.isspace(): - if cur_line: - node_list.append(cur_line.rstrip()) - cur_line = fp.readline() - - fp.close() - return node_list - -ppi = sys.argv[1] -seed_dir = sys.argv[2] -nodelist_file = sys.argv[3] -node_list = get_node_list(sys.argv[3]) -output_script = 'run_rwr' - -# open script file to write - -script_no = 0 -script_fp = '' -for idx, gene in enumerate(node_list): - - # for each step, write a command - if idx % 1000 == 0: - if script_fp: script_fp.close() - try: - script_fp = open(output_script + '{}.sh'.format(script_no), "w") - script_no += 1 - except IOError: - sys.exit("Error opening file {}".format(output_script)) - - seed_file = '{}/seed_{}.txt'.format(seed_dir, idx) - output_file = "/r/bcb/TissueSpecificBRAF/string_results/seed.{}.rwr".format(idx) - command = "python matrix_main.py {} {} -n {} > {}\n".format( - ppi, seed_file, nodelist_file, output_file) - script_fp.write(command) - -script_fp.close() - diff --git a/run_walker.py b/run_walker.py index 76cd639..0abd951 100644 --- a/run_walker.py +++ b/run_walker.py @@ -73,7 +73,7 @@ def main(argv): # run the experiments, and write a rank list to stdout wk = Walker(opts.input_graph, opts.low_list, remove_list) - wk.run_exp(seed_list, opts.restart_prob, + wk.run_exp(seed_list, opts.restart_prob, opts.original_graph_prob, node_list) diff --git a/testdata/test_low_list.txt b/testdata/test_low_list.txt new file mode 100644 index 0000000..7a1aafb --- /dev/null +++ b/testdata/test_low_list.txt @@ -0,0 +1,5 @@ +0 1.5 +1 2.0 +2 NA +3 1.0 +4 1.8 diff --git a/walker.py b/walker.py index 0d3a014..4b31990 100644 --- a/walker.py +++ b/walker.py @@ -98,7 +98,7 @@ def _generate_rank_list(self, p_t): def _calculate_next_p(self, p_t, p_0): """ Calculate the next probability vector. """ - if self.tsg_matrix: + if self.tsg_matrix is not None: no_epsilon = np.squeeze(np.asarray(np.dot(self.tsg_matrix, p_t) * (1 - self.og_prob))) epsilon = np.squeeze(np.asarray(np.dot(self.og_matrix, p_t) * @@ -151,7 +151,7 @@ def _build_matrices(self, original_ppi, low_list, remove_nodes): og_not_normalized, low_list) self.tsg_matrix = self._normalize_cols(tsg_not_normalized) else: - self.tsg_matrix = [] + self.tsg_matrix = None def _tsg_matrix(self, original_graph, og_matrix, low_list):