Skip to content

Commit

Permalink
skip old architecture version GPU settings time slice
Browse files Browse the repository at this point in the history
Signed-off-by: wawa0210 <[email protected]>
  • Loading branch information
wawa0210 committed Jan 29, 2024
1 parent 58ae162 commit f08aeb2
Showing 1 changed file with 50 additions and 2 deletions.
52 changes: 50 additions & 2 deletions cmd/nvidia-dra-plugin/sharing.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,17 +101,32 @@ func (t *TimeSlicingManager) SetTimeSlice(devices *PreparedDevices, config *nasc
return fmt.Errorf("setting a TimeSlice duration on MIG devices is unsupported")
}

noSupportTimeSliceIDs := []string{}
for _, gpu := range devices.Gpu.Devices {
if !detectSupportTimeSliceByArch(gpu.architecture) {
klog.InfoS("the current card does not support setting time slices and will be ignored.", "arch", gpu.architecture, "uuid", gpu)
noSupportTimeSliceIDs = append(noSupportTimeSliceIDs, gpu.uuid)
continue
}
}

supportTimeSliceIDs := difference(devices.UUIDs(), noSupportTimeSliceIDs)

if len(supportTimeSliceIDs) == 0 {
klog.InfoS("all card does not support setting time slices and will be ignored.", "uuids", noSupportTimeSliceIDs)
}

timeSlice := nascrd.DefaultTimeSlice
if config != nil && config.TimeSlice != nil {
timeSlice = *config.TimeSlice
}

err := t.nvdevlib.setComputeMode(devices.UUIDs(), "DEFAULT")
err := t.nvdevlib.setComputeMode(supportTimeSliceIDs, "DEFAULT")
if err != nil {
return fmt.Errorf("error setting compute mode: %w", err)
}

err = t.nvdevlib.setTimeSlice(devices.UUIDs(), timeSlice.Int())
err = t.nvdevlib.setTimeSlice(supportTimeSliceIDs, timeSlice.Int())
if err != nil {
return fmt.Errorf("error setting time slice: %w", err)
}
Expand Down Expand Up @@ -389,3 +404,36 @@ func (m *MpsControlDaemon) Stop(ctx context.Context) error {

return nil
}

// detactSupportTimeSliceByArch Determine whether the architecture series
// supports setting time slices based on the gpu architecture.
func detectSupportTimeSliceByArch(arch string) bool {
// todo: More information is needed to determine the support of various architectures
switch arch {
case "Pascal":
return false
case "Ada":
case "Ampere":
case "Turing":
case "Hopper":
return true
default:
return true
}
return true
}

// difference returns the elements in `a` that aren't in `b`.
func difference(a, b []string) []string {
mb := make(map[string]struct{}, len(b))
for _, x := range b {
mb[x] = struct{}{}
}
var diff []string
for _, x := range a {
if _, found := mb[x]; !found {
diff = append(diff, x)
}
}
return diff
}

0 comments on commit f08aeb2

Please sign in to comment.