Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Track application restart generations #2564

Open
wants to merge 25 commits into
base: stable-6.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
20df215
add generation field to process
CamJN Sep 9, 2024
744c2b2
use process generation to inform routing
CamJN Sep 9, 2024
b5dd97a
missed changes
CamJN Sep 9, 2024
df6f2ac
address feedback
CamJN Sep 22, 2024
38187b2
add a test to process test suite
CamJN Sep 22, 2024
a376e8e
add another test
CamJN Sep 23, 2024
824166d
test stub for proper testing of routing algo
CamJN Sep 23, 2024
60d254b
impl new algo
CamJN Sep 27, 2024
6b739f3
update changelog
CamJN Sep 30, 2024
fd21774
Fix indentation
FooBarWidget Oct 3, 2024
c0d99a6
Remove the new test in ProcessTest.cpp because it's pointless
FooBarWidget Oct 3, 2024
f8481e1
Fix typo, set test name
FooBarWidget Oct 3, 2024
6ab1b52
Use spawnStartTime instead of spawnerCreationTime
FooBarWidget Oct 3, 2024
fc86370
Merge branch 'stable-6.0' into feature/app_generations
FooBarWidget Oct 4, 2024
ee8e35c
Merge branch 'stable-6.0' into feature/app_generations
FooBarWidget Oct 12, 2024
cec7638
remove unused headers
CamJN Oct 14, 2024
a45c981
change test condition per hongli’s request
CamJN Oct 14, 2024
8cff85c
don’t select a totally busy process
CamJN Oct 14, 2024
b40c7d2
fallback to best possible process in case all are totally busy
CamJN Oct 14, 2024
506dccd
Merge remote-tracking branch 'origin/stable-6.0' into feature/app_gen…
CamJN Oct 16, 2024
04fb08b
Merge remote-tracking branch 'origin/stable-6.0' into feature/app_gen…
CamJN Oct 17, 2024
dec5f73
fix pool test for process generation incrementing
CamJN Oct 17, 2024
6de82a7
try to make test 8 match new expectations of process selection
CamJN Oct 18, 2024
3b0605a
test doesn’t process requests on totallybusy processes obviously
CamJN Oct 21, 2024
815e8e3
Merge remote-tracking branch 'origin/stable-6.0' into feature/app_gen…
CamJN Oct 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Release 6.0.24 (Not yet released)
-------------
*
* Update the order Passenger routes requests to app processes. Processes are now chosen based on being in the latest generation (Enterprise), then by newest process, then by oldest, then by
busyness. Closes GH-2551.


Release 6.0.23
Expand Down
6 changes: 3 additions & 3 deletions src/agent/Core/ApplicationPool/Group.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,9 +223,9 @@ class Group: public boost::enable_shared_from_this<Group> {
/****** Process list management ******/

Process *findProcessWithStickySessionId(unsigned int id) const;
Process *findProcessWithStickySessionIdOrLowestBusyness(unsigned int id) const;
Process *findProcessWithLowestBusyness(const ProcessList &processes) const;
Process *findEnabledProcessWithLowestBusyness() const;
Process *findBestProcessPreferringStickySessionId(unsigned int id) const;
Process *findBestProcess(const ProcessList &processes) const;
Process *findBestEnabledProcess() const;

void addProcessToList(const ProcessPtr &process, ProcessList &destination);
void removeProcessFromList(const ProcessPtr &process, ProcessList &source);
Expand Down
4 changes: 2 additions & 2 deletions src/agent/Core/ApplicationPool/Group/InternalUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ Group::createNullProcessObject() {
LockGuard l(context->memoryManagementSyncher);
Process *process = context->processObjectPool.malloc();
Guard guard(context, process);
process = new (process) Process(&info, args);
process = new (process) Process(&info, info.group->restartsInitiated, args);
process->shutdownNotRequired();
guard.clear();
return ProcessPtr(process, false);
Expand Down Expand Up @@ -215,7 +215,7 @@ Group::createProcessObject(const SpawningKit::Spawner &spawner,
LockGuard l(context->memoryManagementSyncher);
Process *process = context->processObjectPool.malloc();
Guard guard(context, process);
process = new (process) Process(&info, spawnResult, args);
process = new (process) Process(&info, info.group->restartsInitiated, spawnResult, args);
guard.clear();
return ProcessPtr(process, false);
}
Expand Down
84 changes: 47 additions & 37 deletions src/agent/Core/ApplicationPool/Group/ProcessListManagement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,71 +58,81 @@ Group::findProcessWithStickySessionId(unsigned int id) const {
}

Process *
Group::findProcessWithStickySessionIdOrLowestBusyness(unsigned int id) const {
int leastBusyProcessIndex = -1;
int lowestBusyness = 0;
unsigned int i, size = enabledProcessBusynessLevels.size();
const int *enabledProcessBusynessLevels = &this->enabledProcessBusynessLevels[0];

for (i = 0; i < size; i++) {
Process *process = enabledProcesses[i].get();
Group::findBestProcessPreferringStickySessionId(unsigned int id) const {
Process *bestProcess = nullptr;
ProcessList::const_iterator it;
ProcessList::const_iterator end = enabledProcesses.end();
for (it = enabledProcesses.begin(); it != end; it++) {
Process *process = (*it).get();
if (process->getStickySessionId() == id) {
return process;
} else if (leastBusyProcessIndex == -1 || enabledProcessBusynessLevels[i] < lowestBusyness) {
leastBusyProcessIndex = i;
lowestBusyness = enabledProcessBusynessLevels[i];
} else if (bestProcess == nullptr ||
FooBarWidget marked this conversation as resolved.
Show resolved Hide resolved
process->generation > bestProcess->generation ||
(process->generation == bestProcess->generation && process->spawnerCreationTime < bestProcess->spawnerCreationTime) ||
FooBarWidget marked this conversation as resolved.
Show resolved Hide resolved
(process->generation == bestProcess->generation && process->spawnerCreationTime == bestProcess->spawnerCreationTime && process->busyness() < bestProcess->busyness())
) {
bestProcess = process;
}
}

if (leastBusyProcessIndex == -1) {
return NULL;
} else {
return enabledProcesses[leastBusyProcessIndex].get();
}
return bestProcess;
}

Process *
Group::findProcessWithLowestBusyness(const ProcessList &processes) const {
Group::findBestProcess(const ProcessList &processes) const {
if (processes.empty()) {
return NULL;
return nullptr;
}

int lowestBusyness = -1;
Process *leastBusyProcess = NULL;
Process *bestProcess = nullptr;
ProcessList::const_iterator it;
ProcessList::const_iterator end = processes.end();
for (it = processes.begin(); it != end; it++) {
Process *process = (*it).get();
int busyness = process->busyness();
if (lowestBusyness == -1 || lowestBusyness > busyness) {
lowestBusyness = busyness;
leastBusyProcess = process;

if (bestProcess == nullptr ||
process->generation > bestProcess->generation ||
(process->generation == bestProcess->generation && process->spawnerCreationTime < bestProcess->spawnerCreationTime) ||
(process->generation == bestProcess->generation && process->spawnerCreationTime == bestProcess->spawnerCreationTime && process->busyness() < bestProcess->busyness())
) {
bestProcess = process;
}
}
return leastBusyProcess;
return bestProcess;
}

/**
* Cache-optimized version of findProcessWithLowestBusyness() for the common case.
* Cache-optimized version of findBestProcess() for the common case.
*/
Process *
Group::findEnabledProcessWithLowestBusyness() const {
Group::findBestEnabledProcess() const {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My head is clearer today and I thought of a problem with the routing algorithm. Recall that the algorithm has now turned into essentially this:

Select the first process according to the order generation DESC, startTime ASC, busyness ASC.

This means that during a restart, new requests are only routed to next-generation processes, while previous-generation processes are only allowed to finish their existing requests. That's pretty bad.

I suggest a two-phase routing algorithm:

  1. First, we select the "best process" according to the algorithm as you have already coded in this branch.
  2. If it turns out that this process isTotallyBusy(), then we fallback to the old algorithm where we select the least busy process amongst all generations. This way we won't waste the old processes.

What do you think?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes more sense to me than the last algorithm.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also since we know we're going to punt if the process isTotallyBusy() there's no need to loop twice, just reject if isTotallyBusy() is true as we iterate.

It does raise another question though, what do we do if all processes are totally busy?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I pushed a possible solution. I keep track of the best process which is totally busy, until I find one that is not totally busy, at the end of the loop I return either the best process which is not totally busy or if all processes are totally busy, then the best totally busy process.

The mechanism for choosing which process is best is decided as above for both processes, but tracked separately.

if (enabledProcesses.empty()) {
return NULL;
return nullptr;
}

int leastBusyProcessIndex = -1;
int lowestBusyness = 0;
unsigned int i, size = enabledProcessBusynessLevels.size();
Process* bestProcess = nullptr;
unsigned long long bestProcessStartTime = 0;
unsigned int bestProcessGen = 0;
int bestProcessBusyness = 0;
unsigned int size = enabledProcessBusynessLevels.size();
const int *enabledProcessBusynessLevels = &this->enabledProcessBusynessLevels[0];

for (i = 0; i < size; i++) {
if (leastBusyProcessIndex == -1 || enabledProcessBusynessLevels[i] < lowestBusyness) {
leastBusyProcessIndex = i;
lowestBusyness = enabledProcessBusynessLevels[i];
for (unsigned int i = 0; i < size; i++) {
Process *process = enabledProcesses.at(i).get();
unsigned int gen = process->generation;
unsigned long long startTime = process->spawnerCreationTime;
int busyness = enabledProcessBusynessLevels[i];
if (bestProcess == nullptr ||
gen > bestProcess->generation ||
(gen == bestProcessGen && startTime < bestProcessStartTime) ||
(gen == bestProcessGen && startTime == bestProcessStartTime && busyness < bestProcessBusyness)
) {
bestProcess = process;
bestProcessGen = gen;
bestProcessBusyness = busyness;
bestProcessStartTime = startTime;
}
}
return enabledProcesses[leastBusyProcessIndex].get();
return bestProcess;
}

/**
Expand Down
8 changes: 4 additions & 4 deletions src/agent/Core/ApplicationPool/Group/SessionManagement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ Group::RouteResult
Group::route(const Options &options) const {
if (OXT_LIKELY(enabledCount > 0)) {
if (options.stickySessionId == 0) {
Process *process = findEnabledProcessWithLowestBusyness();
Process *process = findBestEnabledProcess();
if (process->canBeRoutedTo()) {
return RouteResult(process);
} else {
return RouteResult(NULL, true);
}
} else {
Process *process = findProcessWithStickySessionIdOrLowestBusyness(
Process *process = findBestProcessPreferringStickySessionId(
options.stickySessionId);
if (process != NULL) {
if (process->canBeRoutedTo()) {
Expand All @@ -78,7 +78,7 @@ Group::route(const Options &options) const {
}
}
} else {
Process *process = findProcessWithLowestBusyness(disablingProcesses);
Process *process = findBestProcess(disablingProcesses);
if (process->canBeRoutedTo()) {
return RouteResult(process);
} else {
Expand Down Expand Up @@ -304,7 +304,7 @@ Group::get(const Options &newOptions, const GetCallback &callback,
assert(m_spawning || restarting() || poolAtFullCapacity());

if (disablingCount > 0 && !restarting()) {
Process *process = findProcessWithLowestBusyness(disablingProcesses);
Process *process = findBestProcess(disablingProcesses);
assert(process != NULL);
if (!process->isTotallyBusy()) {
return newSession(process, newOptions.currentTime);
Expand Down
32 changes: 18 additions & 14 deletions src/agent/Core/ApplicationPool/Process.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

#include <string>
#include <vector>
#include <algorithm>
#include <boost/intrusive_ptr.hpp>
#include <boost/move/core.hpp>
#include <boost/container/vector.hpp>
Expand Down Expand Up @@ -102,6 +101,12 @@ class Process {
public:
static const unsigned int MAX_SOCKETS_ACCEPTING_HTTP_REQUESTS = 3;

/**
FooBarWidget marked this conversation as resolved.
Show resolved Hide resolved
* Time at which the Spawner that created this process was created.
* Microseconds resolution.
*/
unsigned long long spawnerCreationTime;

private:
/*************************************************************
* Read-only fields, set once during initialization and never
Expand Down Expand Up @@ -141,12 +146,6 @@ class Process {
*/
StaticString codeRevision;

/**
* Time at which the Spawner that created this process was created.
* Microseconds resolution.
*/
unsigned long long spawnerCreationTime;

/** Time at which we started spawning this process. Microseconds resolution. */
unsigned long long spawnStartTime;

Expand Down Expand Up @@ -384,6 +383,10 @@ class Process {

/** Last time when a session was opened for this Process. */
unsigned long long lastUsed;
/** Which gereration of app processes this one belongs to,
FooBarWidget marked this conversation as resolved.
Show resolved Hide resolved
inherited from the app group, incremented when a restart
is initiated*/
const unsigned int generation;
/** Number of sessions currently open.
* @invariant session >= 0
*/
Expand Down Expand Up @@ -446,18 +449,18 @@ class Process {
/** Collected by Pool::collectAnalytics(). */
ProcessMetrics metrics;


Process(const BasicGroupInfo *groupInfo, const Json::Value &args)
: info(this, groupInfo, args),
Process(const BasicGroupInfo *groupInfo, const unsigned int gen, const Json::Value &args)
: spawnerCreationTime(getJsonUint64Field(args, "spawner_creation_time")),
info(this, groupInfo, args),
socketsAcceptingHttpRequestsCount(0),
spawnerCreationTime(getJsonUint64Field(args, "spawner_creation_time")),
spawnStartTime(getJsonUint64Field(args, "spawn_start_time")),
spawnEndTime(SystemTime::getUsec()),
type(args["type"] == "dummy" ? SpawningKit::Result::DUMMY : SpawningKit::Result::UNKNOWN),
requiresShutdown(false),
refcount(1),
index(-1),
lastUsed(spawnEndTime),
generation(gen),
sessions(0),
processed(0),
lifeStatus(ALIVE),
Expand All @@ -471,18 +474,19 @@ class Process {
indexSocketsAcceptingHttpRequests();
}

Process(const BasicGroupInfo *groupInfo, const SpawningKit::Result &skResult,
Process(const BasicGroupInfo *groupInfo, const unsigned int gen, const SpawningKit::Result &skResult,
const Json::Value &args)
: info(this, groupInfo, skResult),
: spawnerCreationTime(getJsonUint64Field(args, "spawner_creation_time")),
info(this, groupInfo, skResult),
socketsAcceptingHttpRequestsCount(0),
spawnerCreationTime(getJsonUint64Field(args, "spawner_creation_time")),
spawnStartTime(skResult.spawnStartTime),
spawnEndTime(skResult.spawnEndTime),
type(skResult.type),
requiresShutdown(false),
refcount(1),
index(-1),
lastUsed(spawnEndTime),
generation(gen),
sessions(0),
processed(0),
lifeStatus(ALIVE),
Expand Down
61 changes: 60 additions & 1 deletion test/cxx/Core/ApplicationPool/PoolTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include <FileTools/FileManip.h>
#include <StrIntTools/StrIntUtils.h>
#include <IOTools/MessageSerialization.h>
#include <map>
#include <vector>
#include <cerrno>
#include <signal.h>
Expand Down Expand Up @@ -632,6 +631,66 @@ namespace tut {
ensure_equals(pool->getGroupCount(), 0u);
}

TEST_METHOD(15) {
// Test that the process generation increments when the group restarts
FooBarWidget marked this conversation as resolved.
Show resolved Hide resolved
Options options = createOptions();

// Spawn a process and opens a session with it.
pool->setMax(1);
pool->asyncGet(options, callback);
EVENTUALLY(5,
result = number == 1;
);

// Close the session so that the process is now idle.
ProcessPtr process = currentSession->getProcess()->shared_from_this();
currentSession.reset();
unsigned int gen1 = process->generation;

ensure(pool->restartGroupByName(options.appRoot));
EVENTUALLY(5,
CamJN marked this conversation as resolved.
Show resolved Hide resolved
result = pool->getProcessCount() == 1;
);
pool->asyncGet(options, callback);
EVENTUALLY(5,
result = number == 2;
);

process = currentSession->getProcess()->shared_from_this();
currentSession.reset();
unsigned int gen2 = process->generation;
ensure_equals(gen1+1,gen2);
}

TEST_METHOD(16) {
// Test that the correct process from the pool is routed
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is incomplete.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know, I asked for help with this one.

Options options = createOptions();
ensureMinProcesses(2);

// async restart the group
ensure(pool->restartGroupByName(options.appRoot));
ensureMinProcesses(1);

/*
Imagine we have these processes (ordered from oldest to newest):

#. PID 1 (generation A, busyness 5)
#. PID 2 (generation A, busyness 3)
#. PID 3 (generation B, busyness 1)

The algorithm should select PID 3
*/

/*
Imagine we have these processes (ordered from oldest to newest):

#. PID 1 (generation A, busyness 1)
#. PID 2 (generation B, busyness 5)

The algorithm should select PID 1
*/
}

TEST_METHOD(17) {
// Test that restartGroupByName() spawns more processes to ensure
// that minProcesses and other constraints are met.
Expand Down
Loading