Skip to content

Commit

Permalink
feat: auto resync when session expire or host reboot
Browse files Browse the repository at this point in the history
  • Loading branch information
HJ-Fan committed Sep 10, 2024
1 parent 643f744 commit b816540
Show file tree
Hide file tree
Showing 9 changed files with 104 additions and 24 deletions.
1 change: 1 addition & 0 deletions pkg/backend/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

type BackendOperations interface {
CreateSession(context.Context, *ConfigurationSettings, *CreateSessionRequest) (*CreateSessionResponse, error)
CheckSession(context.Context) bool
DeleteSession(context.Context, *ConfigurationSettings, *DeleteSessionRequest) (*DeleteSessionResponse, error)
GetMemoryResourceBlocks(context.Context, *ConfigurationSettings, *MemoryResourceBlocksRequest) (*MemoryResourceBlocksResponse, error)
GetMemoryResourceBlockById(context.Context, *ConfigurationSettings, *MemoryResourceBlockByIdRequest) (*MemoryResourceBlockByIdResponse, error)
Expand Down
25 changes: 12 additions & 13 deletions pkg/backend/httpfish.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,19 +321,6 @@ func (session *Session) queryWithJSON(operation HTTPOperationType, path string,
return response
}

// check for error due to session timeout ( service would return error code 401)
if session.xToken != "" && response.StatusCode == http.StatusUnauthorized {
// Re-authenticate
fmt.Print("Redfish session might have timed out. Re-authenticate. Warning! infinite loop might occur if the issue is from the redfish server.\n")
session.client = nil
session.xToken = ""
err := session.auth()
if err == nil {
path = session.buildPath(SessionServiceKey, session.RedfishSessionId)
response = session.queryWithJSON(operation, path, jsonData)
}
}

return response
}

Expand Down Expand Up @@ -544,6 +531,18 @@ func (service *httpfishService) CreateSession(ctx context.Context, settings *Con
return &CreateSessionResponse{SessionId: session.SessionId, Status: "Success", ServiceError: nil, ChassisSN: session.BladeSN, EnclosureSN: session.ApplianceSN}, nil
}

// CheckSession: Check if the redfish session is still alive
func (service *httpfishService) CheckSession(ctx context.Context) bool {
logger := klog.FromContext(ctx)
logger.V(4).Info("====== CheckSession ======")
session := service.service.session.(*Session)
logger.V(4).Info("check session", "session id", session.SessionId, "redfish session id", session.RedfishSessionId)

response := session.query(HTTPOperation.GET, session.buildPath(SessionServiceKey, session.RedfishSessionId))

return response.err == nil
}

// DeleteSession: Delete a session previously established with an endpoint service
func (service *httpfishService) DeleteSession(ctx context.Context, settings *ConfigurationSettings, req *DeleteSessionRequest) (*DeleteSessionResponse, error) {
logger := klog.FromContext(ctx)
Expand Down
3 changes: 2 additions & 1 deletion pkg/common/parameters.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ import (
)

const (
NumUuidCharsForId = 4 // Number of chars to strip from an interally generated uuid (starting from the right) for use in the internally generated ID's for appliance, blade and host
NumUuidCharsForId = 4 // Number of chars to strip from an interally generated uuid (starting from the right) for use in the internally generated ID's for appliance, blade and host
SyncChekTimeoutSeconds = 30.0 // Number of seconds to check session timeout
)
const (
DefaultBackend = "httpfish" // Default backend interface
Expand Down
21 changes: 20 additions & 1 deletion pkg/manager/appliance.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ func (a *Appliance) DeleteBladeById(ctx context.Context, bladeId string) (*Blade
logger.V(2).Info("force blade deletion after backend session failure", "bladeId", blade.Id, "applianceId", a.Id)
delete(a.Blades, blade.Id)

return nil, &common.RequestError{StatusCode: common.StatusBladeDeleteSessionFailure, Err: newErr}
return blade, &common.RequestError{StatusCode: common.StatusBladeDeleteSessionFailure, Err: newErr} // Still return the blade for recovery
}

// delete blade
Expand All @@ -224,6 +224,7 @@ func (a *Appliance) GetAllBladeIds() []string {
func (a *Appliance) GetBladeById(ctx context.Context, bladeId string) (*Blade, error) {
logger := klog.FromContext(ctx)
logger.V(4).Info(">>>>>> GetBladeById: ", "bladeId", bladeId, "applianceId", a.Id)
var err error

blade, ok := a.Blades[bladeId]
if !ok {
Expand All @@ -232,6 +233,24 @@ func (a *Appliance) GetBladeById(ctx context.Context, bladeId string) (*Blade, e
return nil, &common.RequestError{StatusCode: common.StatusBladeIdDoesNotExist, Err: newErr}
}

// Check for resync
if !blade.CheckSync(ctx) {
logger.V(2).Info("GetBladeById: blade might be out of sync", "bladeId", bladeId)
ok := blade.backendOps.CheckSession(ctx)
if !ok {
blade, err = a.ResyncBladeById(ctx, bladeId)
if err != nil {
newErr := fmt.Errorf("failed to resync host(add): host [%s]: %w", bladeId, err)
logger.Error(newErr, "failure: resync host")
return nil, &common.RequestError{StatusCode: err.(*common.RequestError).StatusCode, Err: newErr}
} else {
logger.V(2).Info("success: auto resync host", "bladeId", bladeId)
}
} else {
blade.SetSync(ctx)
}
}

logger.V(2).Info("success: get blade by id", "bladeId", blade.Id, "applianceId", a.Id)

return blade, nil
Expand Down
26 changes: 24 additions & 2 deletions pkg/manager/blade.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"math"
"sort"
"strings"
"time"

"cfm/pkg/backend"
"cfm/pkg/common"
Expand All @@ -33,8 +34,9 @@ type Blade struct {
ResourceSizeMib int32

// Backend access data
backendOps backend.BackendOperations
creds *openapi.Credentials // Used during resync
backendOps backend.BackendOperations
creds *openapi.Credentials // Used during resync
lastSyncTimeStamp time.Time
}

type RequestNewBlade struct {
Expand Down Expand Up @@ -69,11 +71,31 @@ func NewBlade(ctx context.Context, r *RequestNewBlade) (*Blade, error) {
return nil, newErr
}

b.SetSync(ctx)

logger.V(2).Info("success: new blade", "bladeId", b.Id, "applianceId", b.ApplianceId)

return &b, nil
}

func (b *Blade) SetSync(ctx context.Context) {
logger := klog.FromContext(ctx)
logger.V(3).Info(">>>>>> SetSyncFlag(Blade): ", "bladeId", b.Id)
b.lastSyncTimeStamp = time.Now()
}

func (b *Blade) CheckSync(ctx context.Context) bool {
logger := klog.FromContext(ctx)
logger.V(2).Info(">>>>>> CheckSyncFlag(Blade): ", "bladeId", b.Id)

if time.Since(b.lastSyncTimeStamp).Seconds() > common.SyncChekTimeoutSeconds {
return false
} else {
b.SetSync(ctx) // renew the timestamp
return true
}
}

type RequestAssignMemory struct {
MemoryId string
PortId string
Expand Down
26 changes: 24 additions & 2 deletions pkg/manager/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"context"
"fmt"
"strings"
"time"

"cfm/pkg/backend"
"cfm/pkg/common"
Expand All @@ -26,8 +27,9 @@ type Host struct {
Memory map[string]*HostMemory

// Backend access data
backendOps backend.BackendOperations
creds *openapi.Credentials // Used during resync
backendOps backend.BackendOperations
creds *openapi.Credentials // Used during resync
lastSyncTimeStamp time.Time
}

var HostMemoryDomain = map[string]openapi.MemoryType{
Expand Down Expand Up @@ -65,11 +67,31 @@ func NewHost(ctx context.Context, r *RequestNewHost) (*Host, error) {
return nil, newErr
}

h.SetSync(ctx)

logger.V(2).Info("success: new host", "hostId", h.Id)

return &h, nil
}

func (h *Host) SetSync(ctx context.Context) {
logger := klog.FromContext(ctx)
logger.V(3).Info(">>>>>> SetSyncFlag(Host): ", "hostId", h.Id)
h.lastSyncTimeStamp = time.Now()
}

func (h *Host) CheckSync(ctx context.Context) bool {
logger := klog.FromContext(ctx)
logger.V(2).Info(">>>>>> CheckSyncFlag(Host): ", "hostId", h.Id)

if time.Since(h.lastSyncTimeStamp).Seconds() > common.SyncChekTimeoutSeconds {
return false
} else {
h.SetSync(ctx) // renew the timestamp
return true
}
}

func (h *Host) ComposeMemory(ctx context.Context, r *RequestComposeMemory) (*openapi.MemoryRegion, error) {
logger := klog.FromContext(ctx)
logger.V(4).Info(">>>>>> ComposeMemory(Host): ", "request", r, "hostId", h.Id)
Expand Down
20 changes: 19 additions & 1 deletion pkg/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ func DeleteHostById(ctx context.Context, hostId string) (*Host, error) {
logger.V(2).Info("force host deletion after backend session failure", "hostId", host.Id)
deviceCache.DeleteHostById(host.Id)

return nil, &common.RequestError{StatusCode: common.StatusHostDeleteSessionFailure, Err: newErr}
return host, &common.RequestError{StatusCode: common.StatusHostDeleteSessionFailure, Err: newErr} // Still return the host for recovery
}

// delete host from cache
Expand Down Expand Up @@ -326,6 +326,24 @@ func GetHostById(ctx context.Context, hostId string) (*Host, error) {
return nil, &common.RequestError{StatusCode: err.(*common.RequestError).StatusCode, Err: newErr}
}

// Check for resync
if !host.CheckSync(ctx) {
logger.V(2).Info("GetHostById: host might be out of sync", "hostId", hostId)
ok := host.backendOps.CheckSession(ctx)
if !ok {
host, err = ResyncHostById(ctx, hostId)
if err != nil {
newErr := fmt.Errorf("failed to resync host(add): host [%s]: %w", hostId, err)
logger.Error(newErr, "failure: resync host")
return nil, &common.RequestError{StatusCode: err.(*common.RequestError).StatusCode, Err: newErr}
} else {
logger.V(2).Info("success: auto resync host", "hostId", hostId)
}
} else {
host.SetSync(ctx)
}
}

logger.V(2).Info("success: get host by id", "hostId", hostId)

return host, nil
Expand Down
3 changes: 1 addition & 2 deletions pkg/manager/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,7 @@ func (m *HostMemory) InvalidateCache() {
}

func (m *HostMemory) ValidateCache() {
// m.cacheUpdated = true
m.cacheUpdated = false // Temporarily disable host cache usage
m.cacheUpdated = true
}

func (m *HostMemory) init(ctx context.Context) error {
Expand Down
3 changes: 1 addition & 2 deletions pkg/manager/memory_device.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,7 @@ func (d *HostMemoryDevice) InvalidateCache() {
}

func (d *HostMemoryDevice) ValidateCache() {
// d.cacheUpdated = true
d.cacheUpdated = false // Temporarily disable host cache usage
d.cacheUpdated = true
}

func (d *HostMemoryDevice) init(ctx context.Context) error {
Expand Down

0 comments on commit b816540

Please sign in to comment.