clip, kernel pca

jacobgil · Oct 7, 2024 · a165a40 · a165a40
1 parent 3018ba3
commit a165a40
Show file tree

Hide file tree

Showing 7 changed files with 21 additions and 183 deletions.
diff --git a/README.md b/README.md
@@ -44,8 +44,8 @@ The aim is also to serve as a benchmark of algorithms and metrics for research o
 | EigenGradCAM        | Like EigenCAM but with class discrimination: First principle component of Activations*Grad. Looks like GradCAM, but cleaner |
 | LayerCAM            | Spatially weight the activations by positive gradients. Works better especially in lower layers                             |
 | FullGrad            | Computes the gradients of the biases from all over the network, and then sums them                                          |
-| Deep Feature Factorizations           | Non Negative Matrix Factorization on the 2D activations                                                 |
-
+| Deep Feature Factorizations           | Non Negative Matrix Factorization on the 2D activations                                                   |
+|  KPCA-CAM           | Like EigenCAM but with Kernel PCA instead of PCA
 ## Visual Examples
 
 | What makes the network think the image label is 'pug, pug-dog' | What makes the network think the image label is 'tabby, tabby cat' | Combining Grad-CAM with Guided Backpropagation for the 'pug, pug-dog' class |
@@ -68,6 +68,11 @@ The aim is also to serve as a benchmark of algorithms and metrics for research o
 <img src="./examples/dff1.png">
 <img src="./examples/dff2.png">
 
+## CLIP
+| Explaining the text prompt "a dog" | "a cat" |
+| ---------------------------------------------------------------|--------------------|-----------------------------------------------------------------------------|
+ <img src="https://github.com/jacobgil/pytorch-grad-cam/blob/master/examples/clip_dog.jpg?raw=true" width="256" height="256"> | <img src="https://github.com/jacobgil/pytorch-grad-cam/blob/master/examples/clip_cat.jpg?raw=true" width="256" height="256"> |
+
 ## Classification
 
 #### Resnet50:
@@ -348,3 +353,8 @@ Suraj Srinivas, Francois Fleuret`
 https://arxiv.org/abs/1806.10206 <br>
 `Deep Feature Factorization For Concept Discovery
 Edo Collins, Radhakrishna Achanta, Sabine Süsstrunk`
+
+https://arxiv.org/abs/2410.00267 <br>
+`KPCA-CAM: Visual Explainability of Deep Computer Vision Models using Kernel PCA
+
+Sachin Karmani, Thanushon Sivakaran, Gaurav Prasad, Mehmet Ali, Wenbo Yang, Sheyang Tang`
diff --git a/examples/clip_cat.jpg b/examples/clip_cat.jpg
diff --git a/examples/clip_dog.jpg b/examples/clip_dog.jpg
diff --git a/requirements.txt b/requirements.txt
@@ -6,5 +6,4 @@ ttach
 tqdm
 opencv-python
 matplotlib
-scikit-learn
-transformers
+scikit-learn
diff --git a/setup.py b/setup.py
@@ -8,7 +8,7 @@
 
 setuptools.setup(
     name='grad-cam',
-    version='1.5.2',
+    version='1.5.4',
     author='Jacob Gildenblat',
     author_email='[email protected]',
     description='Many Class Activation Map methods implemented in Pytorch for classification, segmentation, object detection and more',

diff --git a/usage_examples/clip_example b/usage_examples/clip_example
diff --git a/usage_examples/vit_example.py b/usage_examples/vit_example.py
@@ -21,8 +21,9 @@
 
 def get_args():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--use-cuda', action='store_true', default=False,
-                        help='Use NVIDIA GPU acceleration')
+    parser.add_argument('--device', type=str, default='cpu',
+                        help='Torch device to use')
+
     parser.add_argument(
         '--image-path',
         type=str,
@@ -43,9 +44,8 @@ def get_args():
         help='Can be gradcam/gradcam++/scorecam/xgradcam/ablationcam')
 
     args = parser.parse_args()
-    args.use_cuda = args.use_cuda and torch.cuda.is_available()
-    if args.use_cuda:
-        print('Using GPU for acceleration')
+    if args.device:
+        print(f'Using device "{args.device}" for acceleration')
     else:
         print('Using CPU for computation')
 
@@ -84,11 +84,8 @@ def reshape_transform(tensor, height=14, width=14):
         raise Exception(f"method should be one of {list(methods.keys())}")
 
     model = torch.hub.load('facebookresearch/deit:main',
-                           'deit_tiny_patch16_224', pretrained=True)
-    model.eval()
+                           'deit_tiny_patch16_224', pretrained=True).to(torch.device(args.device)).eval()
 
-    if args.use_cuda:
-        model = model.cuda()
 
     target_layers = [model.blocks[-1].norm1]
 
@@ -109,7 +106,7 @@ def reshape_transform(tensor, height=14, width=14):
     rgb_img = cv2.resize(rgb_img, (224, 224))
     rgb_img = np.float32(rgb_img) / 255
     input_tensor = preprocess_image(rgb_img, mean=[0.5, 0.5, 0.5],
-                                    std=[0.5, 0.5, 0.5])
+                                    std=[0.5, 0.5, 0.5]).to(args.device)
 
     # If None, returns the map for the highest scoring category.
     # Otherwise, targets the requested category.