VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/docs/testbox-maintenance.sh@ 64580

Last change on this file since 64580 was 64580, checked in by vboxsync, 8 years ago

updates

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.7 KB
Line 
1#!/bin/bash
2# $Id: testbox-maintenance.sh 64580 2016-11-04 15:46:01Z vboxsync $
3## @file
4# VirtualBox Validation Kit - testbox mainenance service
5#
6
7#
8# Copyright (C) 2006-2016 Oracle Corporation
9#
10# This file is part of VirtualBox Open Source Edition (OSE), as
11# available from http://www.virtualbox.org. This file is free software;
12# you can redistribute it and/or modify it under the terms of the GNU
13# General Public License (GPL) as published by the Free Software
14# Foundation, in version 2 as it comes in the "COPYING" file of the
15# VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17#
18# The contents of this file may alternatively be used under the terms
19# of the Common Development and Distribution License Version 1.0
20# (CDDL) only, as it comes in the "COPYING.CDDL" file of the
21# VirtualBox OSE distribution, in which case the provisions of the
22# CDDL are applicable instead of those of the GPL.
23#
24# You may elect to license modified versions of this file under the
25# terms and conditions of either the GPL or the CDDL or both.
26#
27
28
29#
30# Global Variables (config first).
31#
32MY_REBOOT_WHEN_DONE="yes"
33#MY_REBOOT_WHEN_DONE="" # enable this for debugging the script
34
35MY_TFTP_ROOT="/mnt/testbox-tftp"
36MY_BACKUP_ROOT="/mnt/testbox-backup"
37MY_BACKUP_MNT_TEST_FILE="/mnt/testbox-backup/testbox-backup"
38MY_GLOBAL_LOG_FILE="${MY_BACKUP_ROOT}/maintenance.log"
39
40MY_IP=""
41MY_BACKUP_DIR=""
42MY_LOG_FILE=""
43MY_PXELINUX_CFG_FILE=""
44
45
46##
47# Info message.
48#
49InfoMsg()
50{
51 echo $*;
52 if test -n "${MY_LOG_FILE}"; then
53 echo "`date -uIsec`: ${MY_IP}: info:" $* >> ${MY_LOG_FILE};
54 fi
55}
56
57
58##
59# Error message and reboot+exit. First argument is exit code.
60#
61ErrorMsgExit()
62{
63 MY_RET=$1
64 shift
65 echo "testbox-maintenance.sh: error:" $* >&2;
66 # Append to the testbox log.
67 if test -n "${MY_LOG_FILE}"; then
68 echo "`date -uIsec`: ${MY_IP}: error:" $* >> "${MY_LOG_FILE}";
69 fi
70 # Append to the global log.
71 if test -f "${MY_BACKUP_MNT_TEST_FILE}"; then
72 echo "`date -uIsec`: ${MY_IP}: error:" $* >> "${MY_GLOBAL_LOG_FILE}";
73 fi
74
75 #
76 # On error we normally wait 5min before rebooting to avoid repeating the
77 # same error too many time before the admin finds out. We choose NOT to
78 # remove the PXE config file here because (a) the admin might otherwise
79 # not notice something went wrong, (b) the system could easily be in a
80 # weird unbootable state, (c) the problem might be temporary.
81 #
82 # While debugging, we just exit here.
83 #
84 if test -n "${MY_REBOOT_WHEN_DONE}"; then
85 sleep 5m
86 echo "testbox-maintenance.sh: rebooting (after error)" >&2;
87 reboot
88 fi
89 exit ${MY_RET}
90}
91
92#
93# Try figure out the IP address of the box and the hostname from it again.
94#
95MY_IP=` hostname -I | cut -f1 -d' ' | head -1 `
96if test -z "${MY_IP}" -o `echo "${MY_IP}" | wc -w` -ne "1" -o "${MY_IP}" = "127.0.0.1"; then
97 ErrorMsgExit 10 "Failed to get a good IP! (MY_IP=${MY_IP})"
98fi
99MY_HOSTNAME=`getent hosts "${MY_IP}" | sed -s 's/[[:space:]][[:space:]]*/ /g' | cut -d' ' -f2 `
100if test -z "${MY_HOSTNAME}"; then
101 MY_HOSTNAME="unknown";
102fi
103
104# Derive the backup dir and log file name from it.
105if test ! -f "${MY_BACKUP_MNT_TEST_FILE}"; then
106 ErrorMsgExit 11 "Backup directory is not mounted."
107fi
108MY_BACKUP_DIR="${MY_BACKUP_ROOT}/${MY_IP}"
109MY_LOG_FILE="${MY_BACKUP_DIR}/maintenance.log"
110mkdir -p "${MY_BACKUP_DIR}"
111echo "================ `date -uIsec`: ${MY_IP}: ${MY_HOSTNAME} starts a new session ================" >> "${MY_LOG_FILE}"
112echo "`date -uIsec`: ${MY_IP}: ${MY_HOSTNAME} says hi." >> "${MY_GLOBAL_LOG_FILE}"
113InfoMsg "MY_IP=${MY_IP}<eol>"
114
115#
116# Redirect stderr+stdout thru tee and to a log file on the server.
117#
118MY_OUTPUT_LOG_FILE="${MY_BACKUP_DIR}/maintenance-output.log"
119echo "" >> "${MY_OUTPUT_LOG_FILE}"
120echo "================ `date -uIsec`: ${MY_IP}: ${MY_HOSTNAME} starts a new session ================" >> "${MY_OUTPUT_LOG_FILE}"
121exec &> >(tee -a "${MY_OUTPUT_LOG_FILE}")
122
123#
124# Convert the IP address to PXELINUX hex format, then check that we've got
125# a config file on the TFTP share that we later can remove. We consider it a
126# fatal failure if we don't because we've probably got the wrong IP and we'll
127# be stuck doing the same stuff over and over again.
128#
129MY_TMP=`echo "${MY_IP}" | sed -e 's/\./ /g' `
130MY_IP_HEX=`printf "%02X%02X%02X%02X" ${MY_TMP}`
131InfoMsg "MY_IP_HEX=${MY_IP_HEX}<eol>"
132
133if test ! -f "${MY_TFTP_ROOT}/pxelinux.0"; then
134 ErrorMsgExit 12 "TFTP share mounted or mixxing pxelinux.0 in the root."
135fi
136
137MY_PXELINUX_CFG_FILE="${MY_TFTP_ROOT}/pxelinux.cfg/${MY_IP_HEX}"
138if test ! -f "${MY_PXELINUX_CFG_FILE}"; then
139 ErrorMsgExit 13 "No pxelinux.cfg file found (${MY_PXELINUX_CFG_FILE}) - wrong IP?"
140fi
141
142#
143# Dig the action out of from the kernel command line.
144#
145if test -n "${MY_REBOOT_WHEN_DONE}"; then
146 InfoMsg "/proc/cmdline: `cat /proc/cmdline`"
147 set `cat /proc/cmdline`
148else
149 InfoMsg "Using script command line: $*"
150fi
151MY_ACTION=not-found
152while test $# -ge 1; do
153 case "$1" in
154 testbox-action-*)
155 MY_ACTION="$1"
156 ;;
157 esac
158 shift
159done
160if test "${MY_ACTION}" = "not-found"; then
161 ErrorMsgExit 14 "No action given. Expected testbox-action-backup, testbox-action-backup-again, testbox-action-restore," \
162 "testbox-action-refresh-info, or testbox-action-rescue on the kernel command line.";
163fi
164
165# Validate and shorten the action.
166case "${MY_ACTION}" in
167 testbox-action-backup)
168 MY_ACTION="backup";
169 ;;
170 testbox-action-backup-again)
171 MY_ACTION="backup-again";
172 ;;
173 testbox-action-restore)
174 MY_ACTION="restore";
175 ;;
176 testbox-action-refresh-info)
177 MY_ACTION="refresh-info";
178 ;;
179 testbox-action-rescue)
180 MY_ACTION="rescue";
181 ;;
182 *) ErrorMsgExit 15 "Invalid action '${MY_ACTION}'";
183 ;;
184esac
185
186# Log the action in both logs.
187echo "`date -uIsec`: ${MY_IP}: info: Executing '${MY_ACTION}'." >> "${MY_GLOBAL_LOG_FILE}";
188
189#
190# Generate missing info for this testbox if backing up.
191#
192MY_INFO_FILE="${MY_BACKUP_DIR}/testbox-info.txt"
193if test '!' -f "${MY_INFO_FILE}" \
194 -o "${MY_ACTION}" = "backup" \
195 -o "${MY_ACTION}" = "backup-again" \
196 -o "${MY_ACTION}" = "refresh-info" ;
197then
198 echo "IP: ${MY_IP}" > ${MY_INFO_FILE};
199 echo "HEX-IP: ${MY_IP_HEX}" >> ${MY_INFO_FILE};
200 echo "Hostname: ${MY_HOSTNAME}" >> ${MY_INFO_FILE};
201 echo "" >> ${MY_INFO_FILE};
202 echo "**** cat /proc/cpuinfo ****" >> ${MY_INFO_FILE};
203 echo "**** cat /proc/cpuinfo ****" >> ${MY_INFO_FILE};
204 echo "**** cat /proc/cpuinfo ****" >> ${MY_INFO_FILE};
205 cat /proc/cpuinfo >> ${MY_INFO_FILE};
206 echo "" >> ${MY_INFO_FILE};
207 echo "**** lspci -vvv ****" >> ${MY_INFO_FILE};
208 echo "**** lspci -vvv ****" >> ${MY_INFO_FILE};
209 echo "**** lspci -vvv ****" >> ${MY_INFO_FILE};
210 lspci -vvv >> ${MY_INFO_FILE} 2>&1;
211 echo "" >> ${MY_INFO_FILE};
212 echo "**** biosdecode ****" >> ${MY_INFO_FILE};
213 echo "**** biosdecode ****" >> ${MY_INFO_FILE};
214 echo "**** biosdecode ****" >> ${MY_INFO_FILE};
215 biosdecode >> ${MY_INFO_FILE} 2>&1;
216 echo "" >> ${MY_INFO_FILE};
217 echo "**** dmidecode ****" >> ${MY_INFO_FILE};
218 echo "**** dmidecode ****" >> ${MY_INFO_FILE};
219 echo "**** dmidecode ****" >> ${MY_INFO_FILE};
220 dmidecode >> ${MY_INFO_FILE} 2>&1;
221 echo "" >> ${MY_INFO_FILE};
222 echo "**** fdisk -l ****" >> ${MY_INFO_FILE};
223 echo "**** fdisk -l ****" >> ${MY_INFO_FILE};
224 echo "**** fdisk -l ****" >> ${MY_INFO_FILE};
225 fdisk -l >> ${MY_INFO_FILE} 2>&1;
226 echo "" >> ${MY_INFO_FILE};
227 echo "**** dmesg ****" >> ${MY_INFO_FILE};
228 echo "**** dmesg ****" >> ${MY_INFO_FILE};
229 echo "**** dmesg ****" >> ${MY_INFO_FILE};
230 dmesg >> ${MY_INFO_FILE} 2>&1;
231
232 #
233 # Get the raw ACPI tables and whatnot since we can. Use zip as tar will
234 # zero pad virtual files due to wrong misleading size returned by stat (4K).
235 #
236 # Note! /sys/firmware/dmi/entries/15-0/system_event_log/raw_event_log has been
237 # see causing fatal I/O errors, so skip all raw_event_log files.
238 #
239 zip -qr9 "${MY_BACKUP_DIR}/testbox-info.zip" \
240 /proc/cpuinfo \
241 /sys/firmware/ \
242 -x "*/raw_event_log"
243fi
244
245if test '!' -f "${MY_BACKUP_DIR}/${MY_HOSTNAME}" -a "${MY_HOSTNAME}" != "unknown"; then
246 echo "${MY_HOSTNAME}" > "${MY_BACKUP_DIR}/${MY_HOSTNAME}"
247fi
248
249if test '!' -f "${MY_BACKUP_DIR}/${MY_IP_HEX}"; then
250 echo "${MY_IP}" > "${MY_BACKUP_DIR}/${MY_IP_HEX}"
251fi
252
253#
254# Assemble a list of block devices using /sys/block/* and some filtering.
255#
256if test -f "${MY_BACKUP_DIR}/disk-devices.lst"; then
257 MY_BLOCK_DEVS=`cat ${MY_BACKUP_DIR}/disk-devices.lst \
258 | sed -e 's/[[:space:]][::space::]]*/ /g' -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' `;
259 if test -z "${MY_BLOCK_DEVS}"; then
260 ErrorMsgExit 17 "No block devices found via sys/block."
261 fi
262 InfoMsg "disk-device.lst: MY_BLOCK_DEVS=${MY_BLOCK_DEVS}";
263else
264 MY_BLOCK_DEVS="";
265 for MY_DEV in `ls /sys/block`; do
266 case "${MY_DEV}" in
267 [sh]d*)
268 MY_BLOCK_DEVS="${MY_BLOCK_DEVS} ${MY_DEV}"
269 ;;
270 *) InfoMsg "Ignoring /sys/block/${MY_DEV}";
271 ;;
272 esac
273 done
274 if test -z "${MY_BLOCK_DEVS}"; then
275 ErrorMsgExit 17 "No block devices found via /sys/block."
276 fi
277 InfoMsg "/sys/block: MY_BLOCK_DEVS=${MY_BLOCK_DEVS}";
278fi
279
280#
281# Take action
282#
283case "${MY_ACTION}" in
284 #
285 # Create a backup. The 'backup' action refuses to overwrite an
286 # existing backup, but is otherwise identical to 'backup-again'.
287 #
288 backup|backup-again)
289 for MY_DEV in ${MY_BLOCK_DEVS}; do
290 MY_DST="${MY_BACKUP_DIR}/${MY_DEV}.gz"
291 if test -f "${MY_DST}"; then
292 if test "${MY_ACTION}" != 'backup-again'; then
293 ErrorMsgExit 18 "${MY_DST} already exists"
294 fi
295 InfoMsg "${MY_DST} already exists"
296 fi
297 done
298
299 # Do the backing up.
300 for MY_DEV in ${MY_BLOCK_DEVS}; do
301 MY_SRC="/dev/${MY_DEV}"
302 MY_DST="${MY_BACKUP_DIR}/${MY_DEV}.gz"
303 if test -f "${MY_DST}"; then
304 mv -f "${MY_DST}" "${MY_DST}.old";
305 fi
306 if test -b "${MY_SRC}"; then
307 InfoMsg "Backing up ${MY_SRC} to ${MY_DST}...";
308 dd if="${MY_SRC}" bs=2M | gzip -c > "${MY_DST}";
309 MY_RCS=("${PIPESTATUS[@]}");
310 if test "${MY_RCS[0]}" -eq 0 -a "${MY_RCS[1]}" -eq 0; then
311 InfoMsg "Successfully backed up ${MY_SRC} to ${MY_DST}";
312 else
313 rm -f "${MY_DST}";
314 ErrorMsgExit 19 "There was a problem backing up ${MY_SRC} to ${MY_DST}: dd => ${MY_RCS[0]}; gzip => ${MY_RCS[1]}";
315 fi
316 else
317 InfoMsg "Skipping ${MY_SRC} as it either doesn't exist or isn't a block device";
318 fi
319 done
320 ;;
321
322 #
323 # Restore existing.
324 #
325 restore)
326 for MY_DEV in ${MY_BLOCK_DEVS}; do
327 MY_SRC="${MY_BACKUP_DIR}/${MY_DEV}.gz"
328 MY_DST="/dev/${MY_DEV}"
329 if test -b "${MY_DST}"; then
330 if test -f "${MY_SRC}"; then
331 InfoMsg "Restoring ${MY_SRC} onto ${MY_DST}...";
332 gunzip -c "${MY_SRC}" | dd of="${MY_DST}" bs=64K;
333 MY_RCS=("${PIPESTATUS[@]}");
334 if test ${MY_RCS[0]} -eq 0 -a ${MY_RCS[1]} -eq 0; then
335 InfoMsg "Successfully restored ${MY_SRC} onto ${MY_DST}";
336 else
337 ErrorMsgExit 20 "There was a problem restoring ${MY_SRC} onto ${MY_DST}: dd => ${MY_RCS[1]}; gunzip => ${MY_RCS[0]}";
338 fi
339 else
340 InfoMsg "Skipping ${MY_DST} because ${MY_SRC} does not exist.";
341 fi
342 else
343 InfoMsg "Skipping ${MY_DST} as it either doesn't exist or isn't a block device.";
344 fi
345 done
346 ;;
347
348 #
349 # Nothing else to do for refresh-info.
350 #
351 refresh-info)
352 ;;
353
354 #
355 # For the rescue action, we just quit without removing the PXE config or
356 # rebooting the box. The admin will do that once the system has been rescued.
357 #
358 rescue)
359 InfoMsg "rescue: exiting. Admin must remove PXE config and reboot manually when done."
360 exit 0;
361 ;;
362
363 *) ErrorMsgExit 98 "Huh? MY_ACTION='${MY_ACTION}'"
364 ;;
365esac
366
367#
368# If we get here, remove the PXE config and reboot immediately.
369#
370InfoMsg "'${MY_ACTION}' - done";
371if test -n "${MY_REBOOT_WHEN_DONE}"; then
372 sync
373 if rm -f "${MY_PXELINUX_CFG_FILE}"; then
374 InfoMsg "removed ${MY_PXELINUX_CFG_FILE}";
375 else
376 ErrorMsgExit 99 "failed to remove ${MY_PXELINUX_CFG_FILE}";
377 fi
378 sync
379 InfoMsg "rebooting";
380 reboot
381fi
382exit 0
383
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette