Merge pull request #7 from pokt-scan/6-test-taxonomy-script-plotted-m…

…atrix-different-from-csv-data fixed wrong graph generation
pokt-scan · Oct 23, 2024 · c5fcffb · c5fcffb
2 parents a147300 + d34bd7e
commit c5fcffb
Showing 1 changed file with 18 additions and 28 deletions.
diff --git a/apps/python/taxonomy_analyzer/test_taxonomy.py b/apps/python/taxonomy_analyzer/test_taxonomy.py
@@ -15,7 +15,6 @@
 def main():
     from taxonomy_tools import helm_data as txm_helm_data
     from taxonomy_tools import utils as txm_utils
-    from taxonomy_tools import metrics as txm_metrics
 
     # Create an ArgumentParser object
     parser = argparse.ArgumentParser(
@@ -143,13 +142,13 @@ def main():
     )
     metric_dict_list = list()
     names_list = list()
-    for metric_use in METRICS_USE:
-        print('Analyzing metric: "%s"' % metric_use)
+    for metric in METRICS_USE:
+        print('Analyzing metric: "%s"' % metric)
         metric_matrix, metric_matrix_filtered, metric_dict = (
             txm_utils.get_taxonomy_nodes_metric(
                 nodes_data_df,
                 taxonomy_graph,
-                method=metric_use,
+                method=metric,
                 verbose=True,
                 print_prefix="\t",
             )
@@ -158,15 +157,15 @@ def main():
         pd.DataFrame(metric_matrix).to_csv(
             os.path.join(
                 OUTPUT_PATH,
-                "%s" % taxonomy_name + "_full_metric_%s.csv" % metric_use,
+                "%s" % taxonomy_name + "_full_metric_%s.csv" % metric,
             ),
             index=False,
             header=False,
         )
         pd.DataFrame(metric_matrix_filtered).to_csv(
             os.path.join(
                 OUTPUT_PATH,
-                "%s" % taxonomy_name + "_filtered_metric_%s.csv" % metric_use,
+                "%s" % taxonomy_name + "_filtered_metric_%s.csv" % metric,
             ),
             index=False,
             header=False,
@@ -177,7 +176,7 @@ def main():
             txm_utils.get_taxonomy_per_edge_metric(
                 taxonomy_graph,
                 helm_samples_dict,
-                method=metric_use,
+                method=metric,
                 verbose=True,
                 print_prefix="\t",
             )
@@ -186,7 +185,7 @@ def main():
         pd.DataFrame(metric_matrix_imbalanced).to_csv(
             os.path.join(
                 OUTPUT_PATH,
-                "%s" % taxonomy_name + "_imbalanced_metric_%s.csv" % metric_use,
+                "%s" % taxonomy_name + "_imbalanced_metric_%s.csv" % metric,
             ),
             index=False,
             header=False,
@@ -195,25 +194,16 @@ def main():
         # Track names and metrics for compilation
         metric_dict_list.append(metric_dict)
         metric_dict_list.append(metric_dict_imbalanced)
-        names_list.append(metric_use)
-        names_list.append("imabalanced_" + metric_use)
+        names_list.append(metric)
+        names_list.append("imabalanced_" + metric)
 
-        # Plot compacto de todos los nodos contra todos
-        if metric_use == "mutual_information":
-            method_use = txm_metrics.node_pair_mutual_info_regression
-        elif metric_use == "success_association":
-            method_use = txm_metrics.node_pair_mutual_info_regression
-        else:
-            method_use = metric_use
-
-        # calculate metric.
-        use_data = nodes_data_df.loc[:, (nodes_data_df != 0).any()]
-        if metric_use not in txm_metrics.permutation_methods:
-            # We abuse the pandas "corr" method here.
-            metric_matrix = use_data.corr(method=method_use)
-        else:
-            # Do the calculation on each permutation
-            metric_matrix = txm_metrics.apply_to_pairs(use_data, method_use)
+        # Remove columns that hold no data to make plot more compact
+        columns_keep = list()
+        for idx, column in enumerate(nodes_data_df.columns):
+            if nodes_data_df[column].abs().sum() != 0:
+                columns_keep.append(column)
+        metric_matrix = metric_matrix[columns_keep]
+        metric_matrix = metric_matrix.loc[columns_keep]
 
         # Create a heatmap for visualization
         im = plt.matshow(metric_matrix, cmap="coolwarm")
@@ -228,12 +218,12 @@ def main():
         )
         plt.yticks(range(len(metric_matrix.columns)), metric_matrix.columns)
         # Set title
-        plt.title("%s" % TAXONOMY_PATH.split("/")[-1])
+        plt.title(f"{TAXONOMY_PATH.split('/')[-1]} - {metric}")
         plt.draw()
         plt.savefig(
             os.path.join(
                 OUTPUT_PATH,
-                "%s" % taxonomy_name + "_taxonomy_metric_%s_matrix.png" % metric_use,
+                "%s" % taxonomy_name + "_taxonomy_metric_%s_matrix.png" % metric,
             ),
             bbox_inches="tight",
         )