i recently presented a bash script which schedules computational tasks on multi-core machines. in the meanwhile, i fixed a bug in the display, made the program more flexible, and started to use local variables instead of global variables only. the new version is also more intelligent: it tries to adjust the running times of its controlled processes so that the running times are not far apart.
here is the newest version:
1#/bin/bash 2 3initProfile() { 4 PROFILEFN=bigprimerunner-$PROFILE.profile 5 CORES=`grep "^CORES " $PROFILEFN` 6 CORES=${CORES/CORES } 7 STARTUP=`grep "^STARTUP " $PROFILEFN` 8 STARTUP=${STARTUP/STARTUP } 9 eval STARTUP=$STARTUP 10} 11 12# Startup 13LOADMODIFIER=0 14if [ "$1" != "" ] 15then 16 PROFILE=$1 17else 18 PROFILE=`hostname` 19fi 20if [ "$2" != "" ] 21then 22 LOADMODIFIER=$2 23fi 24initProfile 25if [ "$CORES" == "" ] 26then 27 echo "Cannot load profile $PROFILEFN!" 28 exit 29fi 30echo Cores: $CORES 31echo Load modifier: $LOADMODIFIER 32 33# The command to execute 34COMMAND=primefinder 35 36computeFreecores() { 37 FREECORES=0 38 local DAY=`date +%w` 39 local LINE=`grep "^$DAY " $PROFILEFN` 40 local LINE=${LINE/$DAY } 41 local HOUR=`date +%k` 42 for ((i=0;i<$HOUR;++i)); 43 do 44 local LINE=${LINE#* } 45 done 46 local LINE=${LINE/ *} 47 eval FREECORES=$LINE 48 # Also determine how many jobs should be started 49 STARTUP=`grep "^STARTUP " $PROFILEFN` 50 STARTUP=${STARTUP/STARTUP } 51 eval STARTUP=$STARTUP 52} 53 54killProcess() { # One argument: PID of process to kill 55 local PID=$1 56 local FILE=`lsof -p $PID -F n 2>/dev/null | grep primedatabase | grep -v "\.nfs"` 57 kill $PID 2> /dev/null 58 local A=${FILE#n*} 59 local A=${A/ (nfs*} 60 if [ "$A" != "" ] 61 then 62 rm $A 63 echo Killed $PID with open file $A 64 else 65 echo Killed $PID with no open file 66 fi 67} 68 69stopsignal() { 70 local PIDS=`jobs -p` 71 echo 72 echo 73 echo Terminating... 74 echo Killing: $PIDS 75 for PID in $PIDS; 76 do 77 killProcess $PID 78 done 79 echo done. 80 exit 81} 82 83trap 'stopsignal' 2 84 85computeFreecores 86 87echo "Starting $STARTUP instances (in $BINDIR)" 88 89filterRunning() { # Removes all PIDs from the arguments which are currently stopped 90 ps -o pid= -o s= $* | grep R | sed -e "s/R//" 91} 92 93filterStopped() { # Removes all PIDs from the arguments 94 ps -o pid= -o s= $* | grep T | sed -e "s/T//" 95} 96 97determineToAdd() { 98 computeFreecores 99 local LOAD=`uptime` 100 local LOAD=${LOAD#*average: } 101 local LOAD=${LOAD/,*} 102 local LOAD=${LOAD/.*} 103 ADD=$[CORES-FREECORES-(LOAD+LOADMODIFIER)] 104 local JOBS=`jobs -p` 105 local JOBS=`filterRunning $JOBS` 106 echo "Load: $[LOAD+LOADMODIFIER], Intended number of free cores: $FREECORES, Running: `echo $JOBS | wc -w`, Started: `jobs -p | wc -l` (should be $STARTUP)" 107} 108 109continueOne() { 110 local JOBS=`jobs -p` 111 local JOBS=`filterStopped $JOBS` 112 if [ "$JOBS" != "" ] 113 then 114 local PID=`ps -o pid= --sort +time $JOBS | head -1` 115 echo Continuing $PID... 116 kill -SIGCONT $PID 117 fi 118} 119 120stopOne() { 121 local JOBS=`jobs -p` 122 local JOBS=`filterRunning $JOBS` 123 if [ "$JOBS" != "" ] 124 then 125 local PID=`ps -o pid= --sort -time $JOBS | head -1` 126 echo Stopping $PID... 127 kill -SIGSTOP $PID 128 fi 129} 130 131killOne() { 132 local JOBS=`jobs -p` 133 if [ "$JOBS" != "" ] 134 then 135 local PID=`ps -o pid= --sort -time $JOBS | head -1` 136 killProcess $PID 137 fi 138} 139 140launchOne() { 141 echo "Launching \"$COMMAND\"..." 142 $COMMAND & 143 sleep 1.5 144} 145 146computeTotaltimeInSecs() { 147 # Input: $1 148 # Output: $TOTALSECS 149 local I=$1 150 local SECS=${I##*:} 151 local REST=${I%:*} 152 local MINS=${REST##*:} 153 local REST=${REST%:*} 154 local HOURS=${REST##*-} 155 local DAYS=`expr "$REST" : '\([0-9]*-\)'` 156 local DAYS=${DAYS%-} 157 if [ "$DAYS" == "" ] 158 then 159 local DAYS=0 160 fi 161 if [ "$HOURS" == "" ] 162 then 163 local HOURS=0 164 fi 165 if [ "$MINS" == "" ] 166 then 167 local MINS=0 168 fi 169 echo "((($DAYS * 24) + $HOURS) * 60 + $MINS) * 60 + $SECS" | bc 170} 171 172adjustProcesses() { 173 local JOBS=`jobs -p` 174 local JOBS=`filterRunning $JOBS` 175 if [ "$JOBS" != "" ] 176 then 177 local STOPPID=`ps -o pid= --sort -time $JOBS | head -1` 178 local JOBS=`jobs -p` 179 local JOBS=`filterStopped $JOBS` 180 if [ "$JOBS" != "" ] 181 then 182 local CONTPID=`ps -o pid= --sort +time $JOBS | head -1` 183 # Compute times 184 local I=`ps -o time= $STOPPID` 185 local STOPSEC=`computeTotaltimeInSecs $I` 186 local I=`ps -o time= $CONTPID` 187 local CONTSEC=`computeTotaltimeInSecs $I` 188 # Compare times 189 local CT=`echo $CONTSEC+60*5 | bc` 190 if [ $STOPSEC -gt $CT ] 191 then 192 echo Stopping $STOPPID and continuing $CONTPID 193 kill -SIGSTOP $STOPPID 194 kill -SIGCONT $CONTPID 195 fi 196 fi 197 fi 198} 199 200# Start programs in the background 201determineToAdd 202for ((i=1;i<=STARTUP;++i)); 203do 204 launchOne 205 if [ $i -gt $ADD ] 206 then 207 sleep 1 208 kill -SIGSTOP %$i 209 fi 210done 211 212# Start mainloop 213while [ 1 ] 214do 215 sleep 60 216 217 # Determine how many processes should be added/removed 218 determineToAdd 219 220 # Stop/continue processes 221 if [ $ADD -gt 0 ] 222 then 223 # Add processes 224 echo ADD:$ADD 225 for ((i=0;i<ADD;++i)) 226 do 227 continueOne 228 done 229 fi 230 if [ $ADD -lt 0 ] 231 then 232 REM=$[-ADD] 233 # Remove processes 234 echo REMOVE:$REM 235 for ((i=0;i<REM;++i)) 236 do 237 stopOne 238 done; 239 fi 240 241 # Launch new processes or kill running ones 242 CURRLAUNCHED=`jobs -p | wc -l` 243 if [ $STARTUP != $CURRLAUNCHED ] 244 then 245 if [ $STARTUP -lt $CURRLAUNCHED ] 246 then 247 echo kill: $STARTUP $CURRLAUNCHED 248 for ((i=STARTUP;i<CURRLAUNCHED;++i)); 249 do 250 killOne 251 done; 252 else 253 echo add: $CURRLAUNCHED $STARTUP 254 for ((i=CURRLAUNCHED;i<STARTUP;++i)); 255 do 256 launchOne 257 done; 258 fi 259 fi 260 sleep 2 261 262 # Adjust 263 adjustProcesses 264done