M buildsrht-shell => buildsrht-shell +9 -5
@@ 59,14 59,16 @@ deadline = datetime.utcfromtimestamp(info["deadline"])
manifest = Manifest(yaml.safe_load(info["manifest"]))
def connect(job_id, info):
- print("Your VM will be terminated "
- + naturaltime(datetime.utcnow() - deadline))
+ """Opens a shell on the build VM"""
+ limit = naturaltime(datetime.utcnow() - deadline)
+ print(f"Your VM will be terminated {limit}, or when you log out.")
print()
+ requests.post(f"http://localhost:8080/job/{job_id}/claim")
sys.stdout.flush()
sys.stderr.flush()
tty = os.open("/dev/tty", os.O_RDWR)
os.dup2(0, tty)
- os.execvp("ssh", [
+ subprocess.call([
"ssh", "-qt",
"-p", str(info["port"]),
"-o", "UserKnownHostsFile=/dev/null",
@@ 74,8 76,10 @@ def connect(job_id, info):
"-o", "LogLevel=quiet",
"build@localhost", "bash"
])
+ requests.post(f"http://localhost:8080/job/{job_id}/terminate")
def tail(job_id, info):
+ """Tails the build logs to stdout"""
logs = os.path.join(cfg("builds.sr.ht::worker", "buildlogs"), str(job_id))
p = subprocess.Popen(["tail", "-f", os.path.join(logs, "log")])
tasks = set()
@@ 88,7 92,7 @@ def tail(job_id, info):
path = os.path.join(logs, task.name, "log")
if os.path.exists(path):
procs.append(subprocess.Popen(
- f"tail -c +1 -f {shlex.quote(path)} | " +
+ f"tail -f {shlex.quote(path)} | " +
"awk '{ print \"[" + shlex.quote(task.name) + "] \" $0 }'",
shell=True))
tasks.update({ task.name })
@@ 102,7 106,7 @@ def tail(job_id, info):
time.sleep(3)
if op == "connect":
- if info["task"] != info["tasks"]:
+ if info["task"] != info["tasks"] and info["status"] == "running":
tail(job_id, info)
connect(job_id, info)
elif op == "tail":
M buildsrht/app.py => buildsrht/app.py +6 -1
@@ 1,4 1,5 @@
from buildsrht.types import JobStatus, OAuthToken, User
+from datetime import datetime, timedelta
from flask import session
from srht.config import cfg
from srht.database import DbSession
@@ 32,6 33,10 @@ class BuildApp(SrhtFlask):
@self.context_processor
def inject():
- return { "JobStatus": JobStatus }
+ return {
+ "datetime": datetime,
+ "timedelta": timedelta,
+ "JobStatus": JobStatus,
+ }
app = BuildApp()
M buildsrht/templates/job.html => buildsrht/templates/job.html +14 -1
@@ 1,4 1,4 @@
-{% extends "layout.html" %}
+{% extends "layout-full.html" %}
{% block title %}
<title>
builds #{{ job.id }} - {{ job.status.value }}
@@ 103,6 103,19 @@
<a href="#bottom">go to bottom »</a>
</div>
<div class="col-md-9">
+ {% if current_user and current_user.id == job.owner_id %}
+ {% if (job.status.value == "failed" and
+ datetime.utcnow() < job.updated + timedelta(minutes=10)) %}
+ <div class="alert alert-danger">
+ <strong>This build job failed.</strong> You may log into the failed
+ build environment within 10 minutes to examine the results with the
+ following command:
+ <pre
+ style="margin-bottom: 0; margin-top: 1rem;"
+ >ssh builds@{{job.runner}} connect {{job.id}}</pre>
+ </div>
+ {% endif %}
+ {% endif %}
{% for log in logs %}
<details open>
{% if log["name"] %}
M worker/context.go => worker/context.go +39 -3
@@ 56,16 56,18 @@ type WorkerContext struct {
type JobContext struct {
Cancel context.CancelFunc
+ Claimed bool
Conf func(section, key string) string
Context context.Context
Db *sql.DB
Deadline time.Time
Job *Job
+ Log *log.Logger
LogDir string
LogFile *os.File
- Log *log.Logger
Manifest *Manifest
Port int
+ Settled bool
NTasks int
Task int
@@ 80,6 82,8 @@ func (wctx *WorkerContext) RunBuild(
err error
job *Job
ctx *JobContext
+
+ cleanup func()
)
timer := prometheus.NewTimer(buildDuration)
@@ 120,6 124,9 @@ func (wctx *WorkerContext) RunBuild(
job.SetStatus("failed")
}
ctx.ProcessTriggers()
+ if ctx.Settled {
+ ctx.Standby()
+ }
if ctx.Log != nil {
ctx.Log.Printf("Error: %v\n", err)
ctx.LogFile.Close()
@@ 129,6 136,9 @@ func (wctx *WorkerContext) RunBuild(
}
failedBuilds.Inc()
}
+ if cleanup != nil {
+ cleanup()
+ }
}()
timeout, _ := time.ParseDuration(conf("builds.sr.ht::worker", "timeout"))
@@ 160,8 170,7 @@ func (wctx *WorkerContext) RunBuild(
ctx.Log = log.New(io.MultiWriter(ctx.LogFile, os.Stdout),
"[#"+strconv.Itoa(job.Id)+"] ", log.LstdFlags)
- cleanup := ctx.Boot(wctx.Redis)
- defer cleanup()
+ cleanup = ctx.Boot(wctx.Redis)
tasks := []func() error{
ctx.Settle,
@@ 204,6 213,33 @@ func (wctx *WorkerContext) RunBuild(
successfulBuilds.Inc()
}
+func (ctx *JobContext) Standby() {
+ ctx.Log.Println("\x1B[1m\x1B[91mBuild failed.\x1B[0m")
+ ctx.Log.Println("The build environment will be kept alive for 10 minutes.")
+ ctx.Log.Println("To log in with SSH and examine it, use the following command:")
+ ctx.Log.Println()
+ ctx.Log.Printf("\tssh -t builds@%s connect %d", *ctx.Job.Runner, ctx.Job.Id)
+ ctx.Log.Println()
+ ctx.Log.Println("After logging in, the deadline is increased to your remaining build time.")
+ select {
+ case <-time.After(10*time.Minute):
+ break
+ case <-ctx.Context.Done():
+ ctx.Log.Println("Build cancelled. Terminating build environment.")
+ return
+ }
+ if ctx.Claimed {
+ select {
+ case <-time.After(time.Until(ctx.Deadline)):
+ break
+ case <-ctx.Context.Done():
+ break
+ }
+ } else {
+ ctx.Log.Println("Deadline elapsed. Terminating build environment.")
+ }
+}
+
func (ctx *JobContext) Control(
context context.Context, args ...string) *exec.Cmd {
M worker/http.go => worker/http.go +21 -1
@@ 56,6 56,8 @@ func HttpServer() {
w.Write([]byte("404 not found"))
}
case "cancel":
+ fallthrough
+ case "terminate":
if r.Method != "POST" {
w.WriteHeader(405)
w.Write([]byte("405 method not allowed"))
@@ 65,7 67,9 @@ func HttpServer() {
defer jobsMutex.Unlock()
if job, ok := jobs[jobId]; ok {
job.Cancel()
- job.Job.SetStatus("cancelled")
+ if op == "cancel" {
+ job.Job.SetStatus("cancelled")
+ }
} else {
w.WriteHeader(404)
w.Write([]byte("404 not found"))
@@ 73,6 77,22 @@ func HttpServer() {
}
w.WriteHeader(200)
w.Write([]byte("cancelled"))
+ case "claim":
+ if r.Method != "POST" {
+ w.WriteHeader(405)
+ w.Write([]byte("405 method not allowed"))
+ return
+ }
+ jobsMutex.Lock()
+ defer jobsMutex.Unlock()
+ if job, ok := jobs[jobId]; ok {
+ job.Claimed = true
+ w.WriteHeader(200)
+ w.Write([]byte("claimed"))
+ } else {
+ w.WriteHeader(404)
+ w.Write([]byte("404 not found"))
+ }
default:
w.WriteHeader(404)
w.Write([]byte("404 not found"))
M worker/tasks.go => worker/tasks.go +1 -0
@@ 110,6 110,7 @@ func (ctx *JobContext) Settle() error {
stdout, _ := ioutil.ReadAll(pipe)
if err := check.Wait(); err == nil {
if string(stdout) == "hello world" {
+ ctx.Settled = true
done <- nil
return
} else {