Skip to content

Commit d64a56c

Browse files
bnoordhuisrvagg
authored andcommitted
cluster: remove handles when disconnecting worker
Due to the race window between the master's "disconnect" message and the worker's "handle received" message, connections sometimes got stuck in the pending handles queue when calling `worker.disconnect()` in the master process. The observable effect from the client's perspective was a TCP or HTTP connection that simply stalled. This commit fixes that by closing open handles in the master when the "disconnect" message is sent. Fixes: #3551 PR-URL: #3677 Reviewed-By: Colin Ihrig <[email protected]> Reviewed-By: Fedor Indutny <[email protected]> Reviewed-By: James M Snell <[email protected]>
1 parent b48dbf9 commit d64a56c

File tree

4 files changed

+96
-23
lines changed

4 files changed

+96
-23
lines changed

‎lib/cluster.js‎

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ function masterInit(){
217217
// Keyed on address:port:etc. When a worker dies, we walk over the handles
218218
// and remove() the worker from each one. remove() may do a linear scan
219219
// itself so we might end up with an O(n*m) operation. Ergo, FIXME.
220-
varhandles={};
220+
consthandles=require('internal/cluster').handles;
221221

222222
varinitialized=false;
223223
cluster.setupMaster=function(options){
@@ -308,6 +308,26 @@ function masterInit(){
308308

309309
varids=0;
310310

311+
functionremoveWorker(worker){
312+
assert(worker);
313+
314+
deletecluster.workers[worker.id];
315+
316+
if(Object.keys(cluster.workers).length===0){
317+
assert(Object.keys(handles).length===0,'Resource leak detected.');
318+
intercom.emit('disconnect');
319+
}
320+
}
321+
322+
functionremoveHandlesForWorker(worker){
323+
assert(worker);
324+
325+
for(varkeyinhandles){
326+
varhandle=handles[key];
327+
if(handle.remove(worker))deletehandles[key];
328+
}
329+
}
330+
311331
cluster.fork=function(env){
312332
cluster.setupMaster();
313333
constid=++ids;
@@ -319,26 +339,6 @@ function masterInit(){
319339

320340
worker.on('message',this.emit.bind(this,'message'));
321341

322-
functionremoveWorker(worker){
323-
assert(worker);
324-
325-
deletecluster.workers[worker.id];
326-
327-
if(Object.keys(cluster.workers).length===0){
328-
assert(Object.keys(handles).length===0,'Resource leak detected.');
329-
intercom.emit('disconnect');
330-
}
331-
}
332-
333-
functionremoveHandlesForWorker(worker){
334-
assert(worker);
335-
336-
for(varkeyinhandles){
337-
varhandle=handles[key];
338-
if(handle.remove(worker))deletehandles[key];
339-
}
340-
}
341-
342342
worker.process.once('exit',function(exitCode,signalCode){
343343
/*
344344
* Remove the worker from the workers list only
@@ -404,6 +404,8 @@ function masterInit(){
404404
Worker.prototype.disconnect=function(){
405405
this.suicide=true;
406406
send(this,{act: 'disconnect'});
407+
removeHandlesForWorker(this);
408+
removeWorker(this);
407409
};
408410

409411
Worker.prototype.destroy=function(signo){
@@ -490,11 +492,12 @@ function masterInit(){
490492
cluster.emit('listening',worker,info);
491493
}
492494

493-
// Server in worker is closing, remove from list.
495+
// Server in worker is closing, remove from list. The handle may have been
496+
// removed by a prior call to removeHandlesForWorker() so guard against that.
494497
functionclose(worker,message){
495498
varkey=message.key;
496499
varhandle=handles[key];
497-
if(handle.remove(worker))deletehandles[key];
500+
if(handle&&handle.remove(worker))deletehandles[key];
498501
}
499502

500503
functionsend(worker,message,handle,cb){

‎lib/internal/cluster.js‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
'use strict';
2+
3+
// Used in tests.
4+
exports.handles={};

‎node.gyp‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
'lib/vm.js',
7171
'lib/zlib.js',
7272
'lib/internal/child_process.js',
73+
'lib/internal/cluster.js',
7374
'lib/internal/freelist.js',
7475
'lib/internal/linkedlist.js',
7576
'lib/internal/module.js',
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/* eslint-disable no-debugger */
2+
// Flags: --expose_internals
3+
'use strict';
4+
5+
constcommon=require('../common');
6+
constassert=require('assert');
7+
constcluster=require('cluster');
8+
constnet=require('net');
9+
10+
constProtocol=require('_debugger').Protocol;
11+
12+
if(common.isWindows){
13+
console.log('1..0 # Skipped: SCHED_RR not reliable on Windows');
14+
return;
15+
}
16+
17+
cluster.schedulingPolicy=cluster.SCHED_RR;
18+
19+
// Worker sends back a "I'm here" message, then immediately suspends
20+
// inside the debugger. The master connects to the debug agent first,
21+
// connects to the TCP server second, then disconnects the worker and
22+
// unsuspends it again. The ultimate goal of this tortured exercise
23+
// is to make sure the connection is still sitting in the master's
24+
// pending handle queue.
25+
if(cluster.isMaster){
26+
consthandles=require('internal/cluster').handles;
27+
// FIXME(bnoordhuis) lib/cluster.js scans the execArgv arguments for
28+
// debugger flags and renumbers any port numbers it sees starting
29+
// from the default port 5858. Add a '.' that circumvents the
30+
// scanner but is ignored by atoi(3). Heinous hack.
31+
cluster.setupMaster({execArgv: [`--debug=${common.PORT}.`]});
32+
constworker=cluster.fork();
33+
worker.on('message',common.mustCall(message=>{
34+
assert.strictEqual(Array.isArray(message),true);
35+
assert.strictEqual(message[0],'listening');
36+
constaddress=message[1];
37+
consthost=address.address;
38+
constdebugClient=net.connect({ host,port: common.PORT});
39+
constprotocol=newProtocol();
40+
debugClient.setEncoding('utf8');
41+
debugClient.on('data',data=>protocol.execute(data));
42+
debugClient.once('connect',common.mustCall(()=>{
43+
protocol.onResponse=common.mustCall(res=>{
44+
protocol.onResponse=()=>{};
45+
constconn=net.connect({ host,port: address.port});
46+
conn.once('connect',common.mustCall(()=>{
47+
conn.destroy();
48+
assert.notDeepStrictEqual(handles,{});
49+
worker.disconnect();
50+
assert.deepStrictEqual(handles,{});
51+
constreq=protocol.serialize({command: 'continue'});
52+
debugClient.write(req);
53+
}));
54+
});
55+
}));
56+
}));
57+
process.on('exit',()=>assert.deepStrictEqual(handles,{}));
58+
}else{
59+
constserver=net.createServer(socket=>socket.pipe(socket));
60+
server.listen(()=>{
61+
process.send(['listening',server.address()]);
62+
debugger;
63+
});
64+
process.on('disconnect',process.exit);
65+
}

0 commit comments

Comments
(0)