#0: Modify llama demo to use blocking reads

tenstorrent · Nov 2, 2024 · 9cc7436 · 9cc7436
1 parent dec1dc2
commit 9cc7436
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/models/demos/llama3/demo/demo.py b/models/demos/llama3/demo/demo.py
@@ -398,7 +398,7 @@ def run_llama3_demo(user_input, batch_size, mesh_device, instruct_mode, is_ci_en
             # Write to host
             ttnn.wait_for_event(1, op_event)
             tt_output_torch = ttnn.to_torch(
-                tt_out_tok.cpu(blocking=False, cq_id=1), mesh_composer=ttnn.ConcatMeshToTensor(mesh_device, dim=1)
+                tt_out_tok.cpu(blocking=True, cq_id=1), mesh_composer=ttnn.ConcatMeshToTensor(mesh_device, dim=1)
             )[0, 0, 0, :batch_size]
             ttnn.record_event(1, write_event)