add new ppl and project

uwsampl · Feb 13, 2024 · 169bb4f · 169bb4f
1 parent 17a4c84
commit 169bb4f
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 0 deletions.
diff --git a/_data/people.yml b/_data/people.yml
@@ -181,6 +181,15 @@ kanzhu:
     webpage: "https://kanzhu.netlify.app/"
     role: grad
 
+kamahori:
+    display_name: "Keisuke Kamahori"
+    webpage: "https://kamahori.org/"
+    role: grad
+
+yilegu:
+    display_name: "Yile (Michael) Gu"
+    webpage: "https://ikace.github.io/"
+    role: grad
 
 # Alums
 # The order is: PhD/postdoc by year; MS by year; BS by year

diff --git a/_projects/fiddler.md b/_projects/fiddler.md
@@ -0,0 +1,20 @@
+---
+title: Fiddler
+
+description: |
+  CPU-GPU Orchestration for Fast Inference of MoE Models
+
+people:
+  - kamahori
+  - yilegu
+  - kanzhu
+  - baris
+
+layout: project
+last-updated: 2024-02-12
+---
+
+Fiddler is a fast inference system for LLMs based on Mixture-of-Experts (MoE) architecture at local devices.
+
+- Preprint: [Fiddler: CPU-GPU Orchestration for Fast Inference of Mixture-of-Experts Models](https://arxiv.org/abs/2402.07033)
+- GitHub: [Fiddler](https://github.com/efeslab/fiddler)
diff --git a/bib/pubs.bib b/bib/pubs.bib
@@ -1,3 +1,30 @@
+@misc{kamahori2024fiddler,
+      title={Fiddler: CPU-GPU Orchestration for Fast Inference of Mixture-of-Experts Models},
+      author={Keisuke Kamahori and Yile Gu and Kan Zhu and Baris Kasikci},
+      year={2024},
+      eprint={2402.07033},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG}
+}
+
+@misc{chen2023punica,
+      title={Punica: Multi-Tenant LoRA Serving},
+      author={Lequn Chen and Zihao Ye and Yongji Wu and Danyang Zhuo and Luis Ceze and Arvind Krishnamurthy},
+      year={2023},
+      eprint={2310.18547},
+      archivePrefix={arXiv},
+      primaryClass={cs.DC}
+}
+
+@misc{zhao2023atom,
+      title={Atom: Low-bit Quantization for Efficient and Accurate LLM Serving},
+      author={Yilong Zhao and Chien-Yu Lin and Kan Zhu and Zihao Ye and Lequn Chen and Size Zheng and Luis Ceze and Arvind Krishnamurthy and Tianqi Chen and Baris Kasikci},
+      year={2023},
+      eprint={2310.19102},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG}
+}
+
 @inproceedings{sparsetir,
   author    = {Zihao Ye and
                Ruihang Lai and