0

"polite" shutdown - restart jenkins whenever there is a break in activity?

We would like to restart Jenkins in a non-intrusive way.

Although we can "prepare for shutdown" and close out all currently running tasks, teams inevitably are upset whenever this happens.

Instead, I would like to have a trigger which monitors for a time when there are no jobs running, and then jump in and restart the instance.

Teams won't mind waiting ~5 minutes for the instance to restart, but do mind waiting for longer running jobs to complete.

I'm thinking of a script that: 

  • in a loop, monitors the instance to determine when there are no jobs running
  • if no jobs are running, prepare for shutdown and run the clean shutdown script
  • restart the instance

Probably best to run this as a cluster operation.  Does anything like this already exist?

 

1 comment

  • 0
    Avatar
    Daniel Ritchie

    Here is the code (not pretty, will clean up and submit a PR if I have a chance)...





    /*** BEGIN META {
    "name" : "Polite Shutdown",
    "comment" : "Waits until there are no active executors before shutting down",
    "parameters" : [ ],
    "core": "1.350",
    "authors" : [
    { name : "Daniel Ritchie" }
    ]
    } END META**/


    // NOTE: A lot of this was borrowed from: https://github.com/cloudbees/jenkins-scripts/blob/c36ac4818fa20d0f2255d95c237f3aff9f6d4231/count-executors.groovy (also includes Cloud Slave Executors)

    import jenkins.model.Jenkins
    import groovy.time.*;


    def JOBS_RUNNING = true

    while (JOBS_RUNNING) {

    // Jenkins Master and slaves
    def regularSlaves = Jenkins.instance.computers.grep{
    it.class.superclass?.simpleName != 'AbstractCloudComputer' &&
    it.class.superclass?.simpleName != 'AbstractCloudSlave' &&
    it.class.simpleName != 'EC2AbstractSlave'
    }

    def EXECUTORS_USED = 0

    int regularSlaveExecutorCount = regularSlaves.inject(0, {a, c -> a + c.numExecutors})
    //TODO perhaps filter other known cloud slaves; shame there isn't a cleaner way to know them

    regularSlaves.each {
    EXECUTORS_USED = EXECUTORS_USED + it.countBusy()

    // report details if busy
    if (it.countBusy() != 0) {
    println "| ${it.displayName} | ${it.class.simpleName} | ${it.numExecutors} | ${it.countBusy()}"
    }

    }


    // CJOC Shared Slaves
    def sharedSlaves = Jenkins.instance.allItems.grep{
    it.class.name == 'com.cloudbees.opscenter.server.model.SharedSlave'
    }

    int sharedSlaveExecutorCount = sharedSlaves.inject(0, {a, c -> a + c.numExecutors})

    sharedSlaves.each {
    EXECUTORS_USED = EXECUTORS_USED + it.countBusy()

    // report details if busy
    if (it.countBusy() != 0) {
    println "| ${it.displayName} | ${it.class.simpleName} | ${it.numExecutors} | ${it.countBusy()}"
    }

    }


    if ( EXECUTORS_USED == 0 ) {

    JOBS_RUNNING = false

    println "PREPARING JENKINS FOR SHUTDOWN!"

    // FYI - this is a good place for notifications

    // Put Jenkins into shutdown mode
    Jenkins.instance.doQuietDown();
    println "Jenkins has been placed into shutdown mode"


    println "Ensuring that no pipeline parts are running (1.5 minutes minimum)"
    // Kill off all jobs, copy/paste from: https://github.com/cloudbees/jenkins-scripts/blob/c36ac4818fa20d0f2255d95c237f3aff9f6d4231/ProperlyStopRunningPipelines.groovy
    /*
    Author: Alex Taylor
    Since: July 2018
    Description: This script stop all or a series of running pipeline jobs
    reset or became out of synch.
    Parameters: None
    Scope: Cloudbees Jenkins Platform
    */

    /* This script can be used to kill off all running jobs, the latest build of a job, a specific job number, or jobs
    which have been running for a certain amount of time. This is all based on a couple of specific settings which are marked
    with comments. This script will guaranteed take 1.5 minutes to run because we want to ensure that each build command has the
    time needed to run*/

    //import groovy.time.*;


    // A couple of example map definitions:

    //Used to kill all of the active jobs
    //def jobMap = []

    //Used to clean up specific builds. Put "buildNum: 0" if you want to stop all of the running builds for that job
    /*def jobMap = [
    //[name:'$JOB_NAME', buildNum: $BUILD_NUM2],
    //[name:'$JOB_NAME2', buildNum: $BUILD_NUM2],
    //[name:'$JOB_NAME3', buildNum: $BUILD_NUM3]
    ]*/


    def jobMap = []

    //Created for if the map is empty to add all the jobs
    if(jobMap.isEmpty()){
    Jenkins.instance.getAllItems(org.jenkinsci.plugins.workflow.job.WorkflowJob).each{
    job -> job.builds.findAll{it.isBuilding()}.each{
    build ->
    println("Adding: "+ job.fullName+ " build number " + build.getNumber().toInteger())
    jobMap.add([ name: job.fullName, buildNum: build.getNumber().toInteger()])
    }
    }
    }
    use(TimeCategory) {
    def delay = 1.days;//Put in a Custom date here to kill anything older
    def refDate = (new Date()- delay).time
    // NO MODIFICATION
    Jenkins.instance.getAllItems(org.jenkinsci.plugins.workflow.job.WorkflowJob).each{
    job -> job.builds.byTimestamp(refDate, new Date().time).each{
    build ->
    println("Adding: "+ job.fullName+ " build number " + build.getNumber().toInteger())
    jobMap.add([ name: job.fullName, buildNum: build.getNumber().toInteger()])
    }
    }
    }
    println "Removing the running builds for the following jobs: "
    for(int i=0; i< jobMap.size(); i++)
    {
    def currentName = jobMap.get(i).name
    def currentItem = Jenkins.instance.getItemByFullName(currentName)
    if (currentItem.isBuilding()){
    currentItem.builds.each{
    build ->
    if (build.isInProgress()&& jobMap.get(i).buildNum.equals(0)&& jobMap.get(i).name.equals(currentName)){
    println("Adding: "+ currentName+ " build number " + build.getNumber().toInteger())
    jobMap.add([ name: currentName, buildNum: build.getNumber().toInteger()])
    }
    }
    //Calling the same as the `X` in the UI
    def currentBuild = jobMap.get(i).buildNum
    if(currentBuild){
    println("Stopping " + currentName + " Build Number "+ currentBuild);
    Jenkins.instance.getItemByFullName(currentName).getBuildByNumber(currentBuild).doStop();
    }
    }
    }
    Thread.sleep(30000)
    //Calling the same as the Terminate running build command in the console log
    for(int i=0; i< jobMap.size(); i++)
    {
    def currentName = jobMap.get(i).name
    def currentItem = Jenkins.instance.getItemByFullName(currentName)
    if (currentItem.isBuilding()){
    def currentBuild = jobMap.get(i).buildNum
    if(currentBuild){
    println("Terminating " + currentName + " Build Number "+ currentBuild);
    Jenkins.instance.getItemByFullName(currentName).getBuildByNumber(currentBuild).doTerm();
    }
    }
    }
    Thread.sleep(30000)
    //Calling the same as the Kill running build command in the console log
    for(int i=0; i< jobMap.size(); i++)
    {
    def currentName = jobMap.get(i).name
    def currentItem = Jenkins.instance.getItemByFullName(currentName)
    if (currentItem.isBuilding()){
    def currentBuild = jobMap.get(i).buildNum
    if(currentBuild){
    println("Killing " + currentName + " Build Number "+ currentBuild);
    Jenkins.instance.getItemByFullName(currentName).getBuildByNumber(currentBuild).doKill();
    }
    }
    }

    println "Restarting instance..."
    // Restart Instance
    Jenkins.instance.restart()

    // Would be nice to watch it come back up, but need to exit here because we loose the connection to the master at this point.

    } else {

    println "Waiting... There are " + EXECUTORS_USED + " EXECUTORS IN USE"
    println "******************************************************************"
    sleep 100000

    }
    }

    return null






Please sign in to leave a comment.