VirtualBox

source: vbox/trunk/src/VBox/Main/src-server/ClientWatcher.cpp@ 48366

Last change on this file since 48366 was 48068, checked in by vboxsync, 11 years ago

Log details on VM processes which doesn't exit cleanly.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 24.9 KB
Line 
1/** @file
2 *
3 * VirtualBox API client crash watcher
4 */
5
6/*
7 * Copyright (C) 2006-2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18#include <iprt/asm.h>
19#include <iprt/assert.h>
20#include <iprt/log.h>
21#include <iprt/semaphore.h>
22#include <iprt/process.h>
23
24#include <VBox/com/defs.h>
25
26#include <vector>
27
28#include "VirtualBoxBase.h"
29#include "AutoCaller.h"
30#include "ClientWatcher.h"
31#include "ClientToken.h"
32#include "VirtualBoxImpl.h"
33#include "MachineImpl.h"
34
35#ifdef VBOX_WITH_SYS_V_IPC_SESSION_WATCHER
36/** Table for adaptive timeouts. After an update the counter starts at the
37 * maximum value and decreases to 0, i.e. first the short timeouts are used
38 * and then the longer ones. This minimizes the detection latency in the
39 * cases where a change is expected, for crashes. */
40static const RTMSINTERVAL s_aUpdateTimeoutSteps[] = { 500, 200, 100, 50, 20, 10, 5 };
41#endif
42
43
44
45VirtualBox::ClientWatcher::ClientWatcher() :
46 mLock(LOCKCLASS_OBJECTSTATE)
47{
48 AssertReleaseFailed();
49}
50
51VirtualBox::ClientWatcher::~ClientWatcher()
52{
53 if (mThread != NIL_RTTHREAD)
54 {
55 /* signal the client watcher thread, should be exiting now */
56 update();
57 /* wait for termination */
58 RTThreadWait(mThread, RT_INDEFINITE_WAIT, NULL);
59 mThread = NIL_RTTHREAD;
60 }
61 mProcesses.clear();
62#if defined(RT_OS_WINDOWS)
63 if (mUpdateReq != NULL)
64 {
65 ::CloseHandle(mUpdateReq);
66 mUpdateReq = NULL;
67 }
68#elif defined(RT_OS_OS2) || defined(VBOX_WITH_SYS_V_IPC_SESSION_WATCHER)
69 if (mUpdateReq != NIL_RTSEMEVENT)
70 {
71 RTSemEventDestroy(mUpdateReq);
72 mUpdateReq = NIL_RTSEMEVENT;
73 }
74#else
75# error "Port me!"
76#endif
77}
78
79VirtualBox::ClientWatcher::ClientWatcher(const ComObjPtr<VirtualBox> &pVirtualBox) :
80 mVirtualBox(pVirtualBox),
81 mThread(NIL_RTTHREAD),
82 mUpdateReq(CWUPDATEREQARG),
83 mLock(LOCKCLASS_OBJECTSTATE)
84{
85#if defined(RT_OS_WINDOWS)
86 mUpdateReq = ::CreateEvent(NULL, FALSE, FALSE, NULL);
87#elif defined(RT_OS_OS2)
88 RTSemEventCreate(&mUpdateReq);
89#elif defined(VBOX_WITH_SYS_V_IPC_SESSION_WATCHER)
90 RTSemEventCreate(&mUpdateReq);
91 /* start with high timeouts, nothing to do */
92 ASMAtomicUoWriteU8(&mUpdateAdaptCtr, 0);
93#else
94# error "Port me!"
95#endif
96
97 int vrc = RTThreadCreate(&mThread,
98 worker,
99 (void *)this,
100 0,
101 RTTHREADTYPE_MAIN_WORKER,
102 RTTHREADFLAGS_WAITABLE,
103 "Watcher");
104 AssertRC(vrc);
105}
106
107bool VirtualBox::ClientWatcher::isReady()
108{
109 return mThread != NIL_RTTHREAD;
110}
111
112/**
113 * Sends a signal to the thread to rescan the clients/VMs having open sessions.
114 */
115void VirtualBox::ClientWatcher::update()
116{
117 AssertReturnVoid(mThread != NIL_RTTHREAD);
118
119 /* sent an update request */
120#if defined(RT_OS_WINDOWS)
121 ::SetEvent(mUpdateReq);
122#elif defined(RT_OS_OS2)
123 RTSemEventSignal(mUpdateReq);
124#elif defined(VBOX_WITH_SYS_V_IPC_SESSION_WATCHER)
125 /* use short timeouts, as we expect changes */
126 ASMAtomicUoWriteU8(&mUpdateAdaptCtr, RT_ELEMENTS(s_aUpdateTimeoutSteps) - 1);
127 RTSemEventSignal(mUpdateReq);
128#else
129# error "Port me!"
130#endif
131}
132
133/**
134 * Adds a process to the list of processes to be reaped. This call should be
135 * followed by a call to update() to cause the necessary actions immediately,
136 * in case the process crashes straight away.
137 */
138void VirtualBox::ClientWatcher::addProcess(RTPROCESS pid)
139{
140 AssertReturnVoid(mThread != NIL_RTTHREAD);
141 /* @todo r=klaus, do the reaping on all platforms! */
142#ifndef RT_OS_WINDOWS
143 AutoWriteLock alock(mLock COMMA_LOCKVAL_SRC_POS);
144 mProcesses.push_back(pid);
145#endif
146}
147
148/**
149 * Thread worker function that watches the termination of all client processes
150 * that have open sessions using IMachine::LockMachine()
151 */
152/*static*/
153DECLCALLBACK(int) VirtualBox::ClientWatcher::worker(RTTHREAD /* thread */, void *pvUser)
154{
155 LogFlowFuncEnter();
156
157 VirtualBox::ClientWatcher *that = (VirtualBox::ClientWatcher *)pvUser;
158 Assert(that);
159
160 typedef std::vector<ComObjPtr<Machine> > MachineVector;
161 typedef std::vector<ComObjPtr<SessionMachine> > SessionMachineVector;
162
163 SessionMachineVector machines;
164 MachineVector spawnedMachines;
165
166 size_t cnt = 0;
167 size_t cntSpawned = 0;
168
169 VirtualBoxBase::initializeComForThread();
170
171#if defined(RT_OS_WINDOWS)
172
173 /// @todo (dmik) processes reaping!
174
175 HANDLE handles[MAXIMUM_WAIT_OBJECTS];
176 handles[0] = that->mUpdateReq;
177
178 do
179 {
180 AutoCaller autoCaller(that->mVirtualBox);
181 /* VirtualBox has been early uninitialized, terminate */
182 if (!autoCaller.isOk())
183 break;
184
185 do
186 {
187 /* release the caller to let uninit() ever proceed */
188 autoCaller.release();
189
190 DWORD rc = ::WaitForMultipleObjects((DWORD)(1 + cnt + cntSpawned),
191 handles,
192 FALSE,
193 INFINITE);
194
195 /* Restore the caller before using VirtualBox. If it fails, this
196 * means VirtualBox is being uninitialized and we must terminate. */
197 autoCaller.add();
198 if (!autoCaller.isOk())
199 break;
200
201 bool update = false;
202
203 if (rc == WAIT_OBJECT_0)
204 {
205 /* update event is signaled */
206 update = true;
207 }
208 else if (rc > WAIT_OBJECT_0 && rc <= (WAIT_OBJECT_0 + cnt))
209 {
210 /* machine mutex is released */
211 (machines[rc - WAIT_OBJECT_0 - 1])->checkForDeath();
212 update = true;
213 }
214 else if (rc > WAIT_ABANDONED_0 && rc <= (WAIT_ABANDONED_0 + cnt))
215 {
216 /* machine mutex is abandoned due to client process termination */
217 (machines[rc - WAIT_ABANDONED_0 - 1])->checkForDeath();
218 update = true;
219 }
220 else if (rc > WAIT_OBJECT_0 + cnt && rc <= (WAIT_OBJECT_0 + cntSpawned))
221 {
222 /* spawned VM process has terminated (normally or abnormally) */
223 (spawnedMachines[rc - WAIT_OBJECT_0 - cnt - 1])->
224 checkForSpawnFailure();
225 update = true;
226 }
227
228 if (update)
229 {
230 /* close old process handles */
231 for (size_t i = 1 + cnt; i < 1 + cnt + cntSpawned; ++i)
232 CloseHandle(handles[i]);
233
234 // get reference to the machines list in VirtualBox
235 VirtualBox::MachinesOList &allMachines = that->mVirtualBox->getMachinesList();
236
237 // lock the machines list for reading
238 AutoReadLock thatLock(allMachines.getLockHandle() COMMA_LOCKVAL_SRC_POS);
239
240 /* obtain a new set of opened machines */
241 cnt = 0;
242 machines.clear();
243
244 for (MachinesOList::iterator it = allMachines.begin();
245 it != allMachines.end();
246 ++it)
247 {
248 /// @todo handle situations with more than 64 objects
249 AssertMsgBreak((1 + cnt) <= MAXIMUM_WAIT_OBJECTS,
250 ("MAXIMUM_WAIT_OBJECTS reached"));
251
252 ComObjPtr<SessionMachine> sm;
253 if ((*it)->isSessionOpenOrClosing(sm))
254 {
255 AutoCaller smCaller(sm);
256 if (smCaller.isOk())
257 {
258 AutoReadLock smLock(sm COMMA_LOCKVAL_SRC_POS);
259 Machine::ClientToken *ct = sm->getClientToken();
260 if (ct)
261 {
262 HANDLE ipcSem = ct->getToken();
263 machines.push_back(sm);
264 handles[1 + cnt] = ipcSem;
265 ++cnt;
266 }
267 }
268 }
269 }
270
271 LogFlowFunc(("UPDATE: direct session count = %d\n", cnt));
272
273 /* obtain a new set of spawned machines */
274 cntSpawned = 0;
275 spawnedMachines.clear();
276
277 for (MachinesOList::iterator it = allMachines.begin();
278 it != allMachines.end();
279 ++it)
280 {
281 /// @todo handle situations with more than 64 objects
282 AssertMsgBreak((1 + cnt + cntSpawned) <= MAXIMUM_WAIT_OBJECTS,
283 ("MAXIMUM_WAIT_OBJECTS reached"));
284
285 if ((*it)->isSessionSpawning())
286 {
287 ULONG pid;
288 HRESULT hrc = (*it)->COMGETTER(SessionPID)(&pid);
289 if (SUCCEEDED(hrc))
290 {
291 HANDLE ph = OpenProcess(SYNCHRONIZE, FALSE, pid);
292 AssertMsg(ph != NULL, ("OpenProcess (pid=%d) failed with %d\n",
293 pid, GetLastError()));
294 if (ph != NULL)
295 {
296 spawnedMachines.push_back(*it);
297 handles[1 + cnt + cntSpawned] = ph;
298 ++cntSpawned;
299 }
300 }
301 }
302 }
303
304 LogFlowFunc(("UPDATE: spawned session count = %d\n", cntSpawned));
305
306 // machines lock unwinds here
307 }
308 }
309 while (true);
310 }
311 while (0);
312
313 /* close old process handles */
314 for (size_t i = 1 + cnt; i < 1 + cnt + cntSpawned; ++i)
315 CloseHandle(handles[i]);
316
317 /* release sets of machines if any */
318 machines.clear();
319 spawnedMachines.clear();
320
321 ::CoUninitialize();
322
323#elif defined(RT_OS_OS2)
324
325 /// @todo (dmik) processes reaping!
326
327 /* according to PMREF, 64 is the maximum for the muxwait list */
328 SEMRECORD handles[64];
329
330 HMUX muxSem = NULLHANDLE;
331
332 do
333 {
334 AutoCaller autoCaller(that->mVirtualBox);
335 /* VirtualBox has been early uninitialized, terminate */
336 if (!autoCaller.isOk())
337 break;
338
339 do
340 {
341 /* release the caller to let uninit() ever proceed */
342 autoCaller.release();
343
344 int vrc = RTSemEventWait(that->mUpdateReq, 500);
345
346 /* Restore the caller before using VirtualBox. If it fails, this
347 * means VirtualBox is being uninitialized and we must terminate. */
348 autoCaller.add();
349 if (!autoCaller.isOk())
350 break;
351
352 bool update = false;
353 bool updateSpawned = false;
354
355 if (RT_SUCCESS(vrc))
356 {
357 /* update event is signaled */
358 update = true;
359 updateSpawned = true;
360 }
361 else
362 {
363 AssertMsg(vrc == VERR_TIMEOUT || vrc == VERR_INTERRUPTED,
364 ("RTSemEventWait returned %Rrc\n", vrc));
365
366 /* are there any mutexes? */
367 if (cnt > 0)
368 {
369 /* figure out what's going on with machines */
370
371 unsigned long semId = 0;
372 APIRET arc = ::DosWaitMuxWaitSem(muxSem,
373 SEM_IMMEDIATE_RETURN, &semId);
374
375 if (arc == NO_ERROR)
376 {
377 /* machine mutex is normally released */
378 Assert(semId >= 0 && semId < cnt);
379 if (semId >= 0 && semId < cnt)
380 {
381#if 0//def DEBUG
382 {
383 AutoReadLock machineLock(machines[semId] COMMA_LOCKVAL_SRC_POS);
384 LogFlowFunc(("released mutex: machine='%ls'\n",
385 machines[semId]->name().raw()));
386 }
387#endif
388 machines[semId]->checkForDeath();
389 }
390 update = true;
391 }
392 else if (arc == ERROR_SEM_OWNER_DIED)
393 {
394 /* machine mutex is abandoned due to client process
395 * termination; find which mutex is in the Owner Died
396 * state */
397 for (size_t i = 0; i < cnt; ++i)
398 {
399 PID pid; TID tid;
400 unsigned long reqCnt;
401 arc = DosQueryMutexSem((HMTX)handles[i].hsemCur, &pid, &tid, &reqCnt);
402 if (arc == ERROR_SEM_OWNER_DIED)
403 {
404 /* close the dead mutex as asked by PMREF */
405 ::DosCloseMutexSem((HMTX)handles[i].hsemCur);
406
407 Assert(i >= 0 && i < cnt);
408 if (i >= 0 && i < cnt)
409 {
410#if 0//def DEBUG
411 {
412 AutoReadLock machineLock(machines[semId] COMMA_LOCKVAL_SRC_POS);
413 LogFlowFunc(("mutex owner dead: machine='%ls'\n",
414 machines[i]->name().raw()));
415 }
416#endif
417 machines[i]->checkForDeath();
418 }
419 }
420 }
421 update = true;
422 }
423 else
424 AssertMsg(arc == ERROR_INTERRUPT || arc == ERROR_TIMEOUT,
425 ("DosWaitMuxWaitSem returned %d\n", arc));
426 }
427
428 /* are there any spawning sessions? */
429 if (cntSpawned > 0)
430 {
431 for (size_t i = 0; i < cntSpawned; ++i)
432 updateSpawned |= (spawnedMachines[i])->
433 checkForSpawnFailure();
434 }
435 }
436
437 if (update || updateSpawned)
438 {
439 // get reference to the machines list in VirtualBox
440 VirtualBox::MachinesOList &allMachines = that->mVirtualBox->getMachinesList();
441
442 // lock the machines list for reading
443 AutoReadLock thatLock(allMachines.getLockHandle() COMMA_LOCKVAL_SRC_POS);
444
445 if (update)
446 {
447 /* close the old muxsem */
448 if (muxSem != NULLHANDLE)
449 ::DosCloseMuxWaitSem(muxSem);
450
451 /* obtain a new set of opened machines */
452 cnt = 0;
453 machines.clear();
454
455 for (MachinesOList::iterator it = allMachines.begin();
456 it != allMachines.end(); ++it)
457 {
458 /// @todo handle situations with more than 64 objects
459 AssertMsg(cnt <= 64 /* according to PMREF */,
460 ("maximum of 64 mutex semaphores reached (%d)",
461 cnt));
462
463 ComObjPtr<SessionMachine> sm;
464 if ((*it)->isSessionOpenOrClosing(sm))
465 {
466 AutoCaller smCaller(sm);
467 if (smCaller.isOk())
468 {
469 AutoReadLock smLock(sm COMMA_LOCKVAL_SRC_POS);
470 ClientToken *ct = sm->getClientToken();
471 if (ct)
472 {
473 HMTX ipcSem = ct->getToken();
474 machines.push_back(sm);
475 handles[cnt].hsemCur = (HSEM)ipcSem;
476 handles[cnt].ulUser = cnt;
477 ++cnt;
478 }
479 }
480 }
481 }
482
483 LogFlowFunc(("UPDATE: direct session count = %d\n", cnt));
484
485 if (cnt > 0)
486 {
487 /* create a new muxsem */
488 APIRET arc = ::DosCreateMuxWaitSem(NULL, &muxSem, cnt,
489 handles,
490 DCMW_WAIT_ANY);
491 AssertMsg(arc == NO_ERROR,
492 ("DosCreateMuxWaitSem returned %d\n", arc));
493 NOREF(arc);
494 }
495 }
496
497 if (updateSpawned)
498 {
499 /* obtain a new set of spawned machines */
500 spawnedMachines.clear();
501
502 for (MachinesOList::iterator it = allMachines.begin();
503 it != allMachines.end(); ++it)
504 {
505 if ((*it)->isSessionSpawning())
506 spawnedMachines.push_back(*it);
507 }
508
509 cntSpawned = spawnedMachines.size();
510 LogFlowFunc(("UPDATE: spawned session count = %d\n", cntSpawned));
511 }
512 }
513 }
514 while (true);
515 }
516 while (0);
517
518 /* close the muxsem */
519 if (muxSem != NULLHANDLE)
520 ::DosCloseMuxWaitSem(muxSem);
521
522 /* release sets of machines if any */
523 machines.clear();
524 spawnedMachines.clear();
525
526#elif defined(VBOX_WITH_SYS_V_IPC_SESSION_WATCHER)
527
528 bool update = false;
529 bool updateSpawned = false;
530
531 do
532 {
533 AutoCaller autoCaller(that->mVirtualBox);
534 if (!autoCaller.isOk())
535 break;
536
537 do
538 {
539 /* release the caller to let uninit() ever proceed */
540 autoCaller.release();
541
542 /* determine wait timeout adaptively: after updating information
543 * relevant to the client watcher, check a few times more
544 * frequently. This ensures good reaction time when the signalling
545 * has to be done a bit before the actual change for technical
546 * reasons, and saves CPU cycles when no activities are expected. */
547 RTMSINTERVAL cMillies;
548 {
549 uint8_t uOld, uNew;
550 do
551 {
552 uOld = ASMAtomicUoReadU8(&that->mUpdateAdaptCtr);
553 uNew = uOld ? uOld - 1 : uOld;
554 } while (!ASMAtomicCmpXchgU8(&that->mUpdateAdaptCtr, uNew, uOld));
555 Assert(uOld <= RT_ELEMENTS(s_aUpdateTimeoutSteps) - 1);
556 cMillies = s_aUpdateTimeoutSteps[uOld];
557 }
558
559 int rc = RTSemEventWait(that->mUpdateReq, cMillies);
560
561 /*
562 * Restore the caller before using VirtualBox. If it fails, this
563 * means VirtualBox is being uninitialized and we must terminate.
564 */
565 autoCaller.add();
566 if (!autoCaller.isOk())
567 break;
568
569 if (RT_SUCCESS(rc) || update || updateSpawned)
570 {
571 /* RT_SUCCESS(rc) means an update event is signaled */
572
573 // get reference to the machines list in VirtualBox
574 VirtualBox::MachinesOList &allMachines = that->mVirtualBox->getMachinesList();
575
576 // lock the machines list for reading
577 AutoReadLock thatLock(allMachines.getLockHandle() COMMA_LOCKVAL_SRC_POS);
578
579 if (RT_SUCCESS(rc) || update)
580 {
581 /* obtain a new set of opened machines */
582 machines.clear();
583
584 for (MachinesOList::iterator it = allMachines.begin();
585 it != allMachines.end();
586 ++it)
587 {
588 ComObjPtr<SessionMachine> sm;
589 if ((*it)->isSessionOpenOrClosing(sm))
590 machines.push_back(sm);
591 }
592
593 cnt = machines.size();
594 LogFlowFunc(("UPDATE: direct session count = %d\n", cnt));
595 }
596
597 if (RT_SUCCESS(rc) || updateSpawned)
598 {
599 /* obtain a new set of spawned machines */
600 spawnedMachines.clear();
601
602 for (MachinesOList::iterator it = allMachines.begin();
603 it != allMachines.end();
604 ++it)
605 {
606 if ((*it)->isSessionSpawning())
607 spawnedMachines.push_back(*it);
608 }
609
610 cntSpawned = spawnedMachines.size();
611 LogFlowFunc(("UPDATE: spawned session count = %d\n", cntSpawned));
612 }
613
614 // machines lock unwinds here
615 }
616
617 update = false;
618 for (size_t i = 0; i < cnt; ++i)
619 update |= (machines[i])->checkForDeath();
620
621 updateSpawned = false;
622 for (size_t i = 0; i < cntSpawned; ++i)
623 updateSpawned |= (spawnedMachines[i])->checkForSpawnFailure();
624
625 /* reap child processes */
626 {
627 AutoWriteLock alock(that->mLock COMMA_LOCKVAL_SRC_POS);
628 if (that->mProcesses.size())
629 {
630 LogFlowFunc(("UPDATE: child process count = %d\n",
631 that->mProcesses.size()));
632 VirtualBox::ClientWatcher::ProcessList::iterator it = that->mProcesses.begin();
633 while (it != that->mProcesses.end())
634 {
635 RTPROCESS pid = *it;
636 RTPROCSTATUS status;
637 int vrc = ::RTProcWait(pid, RTPROCWAIT_FLAGS_NOBLOCK, &status);
638 if (vrc == VINF_SUCCESS)
639 {
640 if ( status.enmReason != RTPROCEXITREASON_NORMAL
641 || status.iStatus != RTEXITCODE_SUCCESS)
642 {
643 switch (status.enmReason)
644 {
645 default:
646 case RTPROCEXITREASON_NORMAL:
647 LogRel(("Reaper: Pid %d (%x) exited normally: %d (%#x)\n",
648 pid, pid, status.iStatus, status.iStatus));
649 break;
650 case RTPROCEXITREASON_ABEND:
651 LogRel(("Reaper: Pid %d (%x) abended: %d (%#x)\n",
652 pid, pid, status.iStatus, status.iStatus));
653 break;
654 case RTPROCEXITREASON_SIGNAL:
655 LogRel(("Reaper: Pid %d (%x) was signalled: %d (%#x)\n",
656 pid, pid, status.iStatus, status.iStatus));
657 break;
658 }
659 }
660 else
661 LogFlowFunc(("pid %d (%x) was reaped, status=%d, reason=%d\n",
662 pid, pid, status.iStatus,
663 status.enmReason));
664 it = that->mProcesses.erase(it);
665 }
666 else
667 {
668 LogFlowFunc(("pid %d (%x) was NOT reaped, vrc=%Rrc\n",
669 pid, pid, vrc));
670 if (vrc != VERR_PROCESS_RUNNING)
671 {
672 /* remove the process if it is not already running */
673 it = that->mProcesses.erase(it);
674 }
675 else
676 ++it;
677 }
678 }
679 }
680 }
681 }
682 while (true);
683 }
684 while (0);
685
686 /* release sets of machines if any */
687 machines.clear();
688 spawnedMachines.clear();
689
690#else
691# error "Port me!"
692#endif
693
694 VirtualBoxBase::uninitializeComForThread();
695
696 LogFlowFuncLeave();
697 return 0;
698}
699/* vi: set tabstop=4 shiftwidth=4 expandtab: */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette