#!/usr/bin/env python3 import json import os import subprocess import sys import time TEXTFILE = os.environ.get( "TEXTFILE", "/var/lib/prometheus-node-exporter-textfiles/intel-gpu.prom", ) def read_one_sample(): proc = subprocess.Popen( ["intel_gpu_top", "-J", "-s", "1000"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, ) buf = b"" depth = 0 in_obj = False deadline = time.monotonic() + 5.0 try: while time.monotonic() < deadline: byte = proc.stdout.read(1) if not byte: break if byte == b"{": in_obj = True depth += 1 if in_obj: buf += byte if in_obj and byte == b"}": depth -= 1 if depth == 0: break finally: proc.terminate() proc.wait() return json.loads(buf) if buf else None def write_metrics(sample): lines = [ "# HELP intel_gpu_engine_busy_percent Intel GPU engine busy percentage", "# TYPE intel_gpu_engine_busy_percent gauge", ] for engine, data in sample.get("engines", {}).items(): lines.append( f'intel_gpu_engine_busy_percent{{engine="{engine}"}} {data.get("busy", 0)}' ) freq = sample.get("frequency", {}) lines += [ "# HELP intel_gpu_frequency_mhz Intel GPU actual frequency in MHz", "# TYPE intel_gpu_frequency_mhz gauge", f'intel_gpu_frequency_mhz {freq.get("actual", 0)}', "# HELP intel_gpu_rc6_percent Intel GPU RC6 power-saving state percentage", "# TYPE intel_gpu_rc6_percent gauge", f'intel_gpu_rc6_percent {sample.get("rc6", {}).get("value", 0)}', ] tmp = TEXTFILE + ".tmp" with open(tmp, "w") as f: f.write("\n".join(lines) + "\n") os.replace(tmp, TEXTFILE) def main(): sample = read_one_sample() if sample is None: print("Failed to read intel_gpu_top sample", file=sys.stderr) sys.exit(1) write_metrics(sample) if __name__ == "__main__": main()