xmrig-auto-pause: fix
This commit is contained in:
@@ -35,6 +35,11 @@ CPU_THRESHOLD = float(os.environ.get("CPU_THRESHOLD", "5"))
|
||||
# RandomX dataset initialization complete (~4s on the target hardware)
|
||||
# without retriggering a stop.
|
||||
STARTUP_COOLDOWN = float(os.environ.get("STARTUP_COOLDOWN", "10"))
|
||||
# Directory for persisting pause state across script restarts. Without
|
||||
# this, a restart while xmrig is paused loses the paused_by_us flag and
|
||||
# xmrig stays stopped permanently.
|
||||
STATE_DIR = os.environ.get("STATE_DIR", "")
|
||||
_PAUSE_FILE = os.path.join(STATE_DIR, "paused") if STATE_DIR else ""
|
||||
|
||||
|
||||
def log(msg):
|
||||
@@ -78,13 +83,36 @@ def systemctl(action, unit):
|
||||
return result.returncode == 0
|
||||
|
||||
|
||||
def _save_paused(paused):
|
||||
"""Persist pause flag so a script restart can resume where we left off."""
|
||||
if not _PAUSE_FILE:
|
||||
return
|
||||
try:
|
||||
if paused:
|
||||
open(_PAUSE_FILE, "w").close()
|
||||
else:
|
||||
os.remove(_PAUSE_FILE)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _load_paused():
|
||||
"""Check if a previous instance left xmrig paused."""
|
||||
if not _PAUSE_FILE:
|
||||
return False
|
||||
return os.path.isfile(_PAUSE_FILE)
|
||||
|
||||
|
||||
def main():
|
||||
paused_by_us = False
|
||||
paused_by_us = _load_paused()
|
||||
idle_since = None
|
||||
started_at = None # monotonic time when we last started xmrig
|
||||
prev_total = None
|
||||
prev_work = None
|
||||
|
||||
if paused_by_us:
|
||||
log("Recovered pause state from previous instance")
|
||||
|
||||
log(
|
||||
f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s "
|
||||
f"threshold={CPU_THRESHOLD}% cooldown={STARTUP_COOLDOWN}s"
|
||||
@@ -116,6 +144,14 @@ def main():
|
||||
if time.monotonic() - started_at < STARTUP_COOLDOWN:
|
||||
time.sleep(POLL_INTERVAL)
|
||||
continue
|
||||
# Cooldown expired — verify xmrig survived startup. If it
|
||||
# crashed during init (hugepage failure, pool unreachable, etc.),
|
||||
# re-enter the pause/retry cycle rather than silently leaving
|
||||
# xmrig dead.
|
||||
if not is_active("xmrig.service"):
|
||||
log("xmrig died during startup cooldown — will retry")
|
||||
paused_by_us = True
|
||||
_save_paused(True)
|
||||
started_at = None
|
||||
|
||||
busy = real_work_pct > CPU_THRESHOLD
|
||||
@@ -128,6 +164,7 @@ def main():
|
||||
# manage it again.
|
||||
log("xmrig was restarted externally while paused — reclaiming")
|
||||
paused_by_us = False
|
||||
_save_paused(False)
|
||||
if not paused_by_us:
|
||||
# Only claim ownership if xmrig is actually running.
|
||||
# If something else stopped it (e.g. UPS battery hook),
|
||||
@@ -136,6 +173,7 @@ def main():
|
||||
log(f"Real workload detected ({real_work_pct:.1f}% CPU) — stopping xmrig")
|
||||
if systemctl("stop", "xmrig.service"):
|
||||
paused_by_us = True
|
||||
_save_paused(True)
|
||||
else:
|
||||
if paused_by_us:
|
||||
if idle_since is None:
|
||||
@@ -144,6 +182,7 @@ def main():
|
||||
log(f"Workload ended ({real_work_pct:.1f}% CPU) past grace period — starting xmrig")
|
||||
if systemctl("start", "xmrig.service"):
|
||||
paused_by_us = False
|
||||
_save_paused(False)
|
||||
started_at = time.monotonic()
|
||||
idle_since = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user