Skip to content

Commit

Permalink
Update NCCL report strategy and tests to enforce correct data types (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
TaekyungHeo authored Oct 21, 2024
1 parent 13ac421 commit b763407
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 9 deletions.
24 changes: 19 additions & 5 deletions src/cloudai/report_generator/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,24 @@

bokeh_size_unit_js_tick_formatter = """
function tick_formatter(tick) {
var units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB'];
var i = 0;
const units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB'];
let i = 0;
// Handle negative ticks and large values safely
if (tick < 0) {
return '0B'; // Handle negative numbers by returning 0B as a fallback
}
// Loop through units until tick is smaller than 1024 or max unit is reached
while (tick >= 1024 && i < units.length - 1) {
tick /= 1024;
i++;
}
return tick.toFixed(1) + units[i];
// Use Number.isInteger() to check if tick is an integer (ES6 feature)
return Number.isInteger(tick)
? `${Math.floor(tick)}${units[i]}` // If integer, no decimal
: `${tick.toFixed(1)}${units[i]}`; // Else, one decimal point
}
return tick_formatter(tick);
"""
Expand Down Expand Up @@ -62,9 +73,12 @@ def bytes_to_human_readable(num_bytes: float) -> str:
"""
for unit in ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB"]:
if abs(num_bytes) < 1024.0:
return f"{num_bytes:3.1f}{unit}"
if num_bytes == int(num_bytes):
return f"{int(num_bytes)}{unit}"
else:
return f"{num_bytes:3.1f}{unit}"
num_bytes /= 1024.0
return f"{num_bytes:.1f}YB"
return f"{num_bytes}YB"


def add_human_readable_sizes(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,15 @@ def generate_report(self, test_name: str, directory_path: Path, sol: Optional[fl
"#Wrong In-place",
],
)
df["Size (B)"] = df["Size (B)"].astype(float)
df["Size (B)"] = df["Size (B)"].astype(int)
df["Time (us) Out-of-place"] = df["Time (us) Out-of-place"].astype(float).round(1)
df["Time (us) In-place"] = df["Time (us) In-place"].astype(float).round(1)
df["Algbw (GB/s) Out-of-place"] = df["Algbw (GB/s) Out-of-place"].astype(float)
df["Busbw (GB/s) Out-of-place"] = df["Busbw (GB/s) Out-of-place"].astype(float)
df["Algbw (GB/s) In-place"] = df["Algbw (GB/s) In-place"].astype(float)
df["Busbw (GB/s) In-place"] = df["Busbw (GB/s) In-place"].astype(float)
df = add_human_readable_sizes(df, "Size (B)", "Size Human-readable")

self._generate_bokeh_report(test_name, df, directory_path, sol)
self._generate_csv_report(df, directory_path)

Expand Down
14 changes: 11 additions & 3 deletions tests/test_slurm_report_generation_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,21 @@ def test_nccl_report_generation(setup_test_environment):
df = pd.read_csv(csv_report_path)
assert not df.empty, "CSV report is empty."

# Validate specific values if needed
# Validate data types
assert df["Size (B)"].dtype == int, "Size (B) is not an integer."
assert df["Time (us) Out-of-place"].dtype == float, "Time (us) Out-of-place is not a float."
assert df["Time (us) In-place"].dtype == float, "Time (us) In-place is not a float."

# Validate human-readable sizes
assert df.iloc[0]["Size Human-readable"] == "976.6KB", "First row Size Human-readable does not match."
assert df.iloc[-1]["Size Human-readable"] == "11.4MB", "Last row Size Human-readable does not match."

# Example: Checking that the first entry matches the expected value
assert df.iloc[0]["Size (B)"] == 1000000.0, "First row Size (B) does not match."
assert df.iloc[0]["Size (B)"] == 1000000, "First row Size (B) does not match."
assert df.iloc[0]["Algbw (GB/s) Out-of-place"] == 10.10, "First row Algbw (GB/s) Out-of-place does not match."
assert df.iloc[0]["Busbw (GB/s) Out-of-place"] == 20.20, "First row Busbw (GB/s) Out-of-place does not match."

# Checking that the last entry matches the expected value
assert df.iloc[-1]["Size (B)"] == 12000000.0, "Last row Size (B) does not match."
assert df.iloc[-1]["Size (B)"] == 12000000, "Last row Size (B) does not match."
assert df.iloc[-1]["Algbw (GB/s) Out-of-place"] == 120.30, "Last row Algbw (GB/s) Out-of-place does not match."
assert df.iloc[-1]["Busbw (GB/s) Out-of-place"] == 130.40, "Last row Busbw (GB/s) Out-of-place does not match."

0 comments on commit b763407

Please sign in to comment.