skip to main content.

i recently presented a bash script which schedules computational tasks on multi-core machines. in the meanwhile, i fixed a bug in the display, made the program more flexible, and started to use local variables instead of global variables only. the new version is also more intelligent: it tries to adjust the running times of its controlled processes so that the running times are not far apart.
here is the newest version:

  1#/bin/bash
  2
  3initProfile() {
  4    PROFILEFN=bigprimerunner-$PROFILE.profile
  5    CORES=`grep "^CORES " $PROFILEFN`
  6    CORES=${CORES/CORES }
  7    STARTUP=`grep "^STARTUP " $PROFILEFN`
  8    STARTUP=${STARTUP/STARTUP }
  9    eval STARTUP=$STARTUP
 10}
 11
 12# Startup
 13LOADMODIFIER=0
 14if [ "$1" != "" ]
 15then
 16    PROFILE=$1
 17else
 18    PROFILE=`hostname`
 19fi
 20if [ "$2" != "" ]
 21then
 22    LOADMODIFIER=$2
 23fi
 24initProfile
 25if [ "$CORES" == "" ]
 26then
 27    echo "Cannot load profile $PROFILEFN!"
 28    exit
 29fi
 30echo Cores: $CORES
 31echo Load modifier: $LOADMODIFIER
 32
 33# The command to execute
 34COMMAND=primefinder
 35
 36computeFreecores() {
 37    FREECORES=0
 38    local DAY=`date +%w`
 39    local LINE=`grep "^$DAY " $PROFILEFN`
 40    local LINE=${LINE/$DAY }
 41    local HOUR=`date +%k`
 42    for ((i=0;i<$HOUR;++i));
 43    do
 44        local LINE=${LINE#* }
 45    done
 46    local LINE=${LINE/ *}
 47    eval FREECORES=$LINE
 48    # Also determine how many jobs should be started
 49    STARTUP=`grep "^STARTUP " $PROFILEFN`
 50    STARTUP=${STARTUP/STARTUP }
 51    eval STARTUP=$STARTUP
 52}
 53
 54killProcess() { # One argument: PID of process to kill
 55    local PID=$1
 56    local FILE=`lsof -p $PID -F n 2>/dev/null | grep primedatabase | grep -v "\.nfs"`
 57    kill $PID 2> /dev/null
 58    local A=${FILE#n*}
 59    local A=${A/ (nfs*}
 60    if [ "$A" != "" ]
 61    then
 62        rm $A
 63        echo Killed $PID with open file $A
 64    else
 65        echo Killed $PID with no open file
 66    fi
 67}
 68
 69stopsignal() {
 70    local PIDS=`jobs -p`
 71    echo
 72    echo
 73    echo Terminating...
 74    echo Killing: $PIDS
 75    for PID in $PIDS;
 76    do
 77        killProcess $PID
 78    done
 79    echo done.
 80    exit
 81}
 82
 83trap 'stopsignal' 2
 84
 85computeFreecores
 86
 87echo "Starting $STARTUP instances (in $BINDIR)"
 88
 89filterRunning() { # Removes all PIDs from the arguments which are currently stopped
 90    ps -o pid= -o s= $* | grep R | sed -e "s/R//"
 91}
 92
 93filterStopped() { # Removes all PIDs from the arguments
 94    ps -o pid= -o s= $* | grep T | sed -e "s/T//"
 95}
 96
 97determineToAdd() {
 98    computeFreecores
 99    local LOAD=`uptime`
100    local LOAD=${LOAD#*average: }
101    local LOAD=${LOAD/,*}
102    local LOAD=${LOAD/.*}
103    ADD=$[CORES-FREECORES-(LOAD+LOADMODIFIER)]
104    local JOBS=`jobs -p`
105    local JOBS=`filterRunning $JOBS`
106    echo "Load: $[LOAD+LOADMODIFIER], Intended number of free cores: $FREECORES, Running: `echo $JOBS | wc -w`, Started: `jobs -p | wc -l` (should be $STARTUP)"
107}
108
109continueOne() {
110    local JOBS=`jobs -p`
111    local JOBS=`filterStopped $JOBS`
112    if [ "$JOBS" != "" ]
113    then
114        local PID=`ps -o pid= --sort +time $JOBS | head -1`
115        echo Continuing $PID...
116        kill -SIGCONT $PID
117    fi
118}
119
120stopOne() {
121    local JOBS=`jobs -p`
122    local JOBS=`filterRunning $JOBS`
123    if [ "$JOBS" != "" ]
124    then
125        local PID=`ps -o pid= --sort -time $JOBS | head -1`
126        echo Stopping $PID...
127        kill -SIGSTOP $PID
128    fi
129}
130
131killOne() {
132    local JOBS=`jobs -p`
133    if [ "$JOBS" != "" ]
134    then
135        local PID=`ps -o pid= --sort -time $JOBS | head -1`
136        killProcess $PID
137    fi
138}
139
140launchOne() {
141    echo "Launching \"$COMMAND\"..."
142    $COMMAND &
143    sleep 1.5
144}
145
146computeTotaltimeInSecs() {
147    # Input: $1
148    # Output: $TOTALSECS
149    local I=$1
150    local SECS=${I##*:}
151    local REST=${I%:*}
152    local MINS=${REST##*:}
153    local REST=${REST%:*}
154    local HOURS=${REST##*-}
155    local DAYS=`expr "$REST" : '\([0-9]*-\)'`
156    local DAYS=${DAYS%-}
157    if [ "$DAYS" == "" ]
158    then
159        local DAYS=0
160    fi
161    if [ "$HOURS" == "" ]
162    then
163        local HOURS=0
164    fi
165    if [ "$MINS" == "" ]
166    then
167        local MINS=0
168    fi
169    echo "((($DAYS * 24) + $HOURS) * 60 + $MINS) * 60 + $SECS" | bc
170}
171
172adjustProcesses() {
173    local JOBS=`jobs -p`
174    local JOBS=`filterRunning $JOBS`
175    if [ "$JOBS" != "" ]
176    then
177        local STOPPID=`ps -o pid= --sort -time $JOBS | head -1`
178        local JOBS=`jobs -p`
179        local JOBS=`filterStopped $JOBS`
180        if [ "$JOBS" != "" ]
181        then
182            local CONTPID=`ps -o pid= --sort +time $JOBS | head -1`
183            # Compute times
184            local I=`ps -o time= $STOPPID`
185            local STOPSEC=`computeTotaltimeInSecs $I`
186            local I=`ps -o time= $CONTPID`
187            local CONTSEC=`computeTotaltimeInSecs $I`
188            # Compare times
189            local CT=`echo $CONTSEC+60*5 | bc`
190            if [ $STOPSEC -gt $CT ]
191            then
192                echo Stopping $STOPPID and continuing $CONTPID
193                kill -SIGSTOP $STOPPID
194                kill -SIGCONT $CONTPID
195            fi
196        fi
197    fi
198}
199
200# Start programs in the background
201determineToAdd
202for ((i=1;i<=STARTUP;++i));
203do
204    launchOne
205    if [ $i -gt $ADD ]
206    then
207        sleep 1
208        kill -SIGSTOP %$i
209    fi
210done
211
212# Start mainloop
213while [ 1 ]
214do
215    sleep 60
216    
217    # Determine how many processes should be added/removed
218    determineToAdd
219
220    # Stop/continue processes
221    if [ $ADD -gt 0 ]
222    then
223        # Add processes
224        echo ADD:$ADD
225        for ((i=0;i<ADD;++i))
226        do
227            continueOne
228        done
229    fi
230    if [ $ADD -lt 0 ]
231    then
232        REM=$[-ADD]
233        # Remove processes
234        echo REMOVE:$REM
235        for ((i=0;i<REM;++i))
236        do
237            stopOne
238        done;
239    fi
240
241    # Launch new processes or kill running ones
242    CURRLAUNCHED=`jobs -p | wc -l`
243    if [ $STARTUP != $CURRLAUNCHED ]
244    then
245        if [ $STARTUP -lt $CURRLAUNCHED ]
246        then
247            echo kill: $STARTUP $CURRLAUNCHED
248            for ((i=STARTUP;i<CURRLAUNCHED;++i));
249            do
250                killOne
251            done;
252        else
253            echo add: $CURRLAUNCHED $STARTUP
254            for ((i=CURRLAUNCHED;i<STARTUP;++i));
255            do
256                launchOne
257            done;
258        fi
259    fi
260    sleep 2
261    
262    # Adjust
263    adjustProcesses
264done
posted in: computer
tags:
places:

comments.

no comments.