server-config/services/arr/torrent-audit.py

#!/usr/bin/env python3
"""
Audit qBittorrent torrents against Radarr/Sonarr.

Reports two categories:

  UNMANAGED  -- torrents in qBittorrent that no *arr service has ever touched.
                These were added manually or by some other tool.

  ABANDONED  -- torrents that *arr grabbed but later replaced with a better
                version.  The old torrent is still seeding while the library
                points to the new one.

Abandoned detection uses API cross-referencing (not filesystem hardlinks) and
verifies against the *arr's current file state:

  1. HISTORY   -- group imports by content unit (movieId / episodeId); the
                  most recent import is the keeper, older ones are candidates.
  2. CURRENT   -- verify against the *arr's active file mapping.
"""

import logging
import os
import sys
from collections import defaultdict
from xml.etree import ElementTree

import qbittorrentapi
from pyarr import RadarrAPI, SonarrAPI

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(message)s",
    stream=sys.stderr,
)
log = logging.getLogger(__name__)


def get_api_key(config_path: str) -> str:
    tree = ElementTree.parse(config_path)
    return tree.find(".//ApiKey").text


def paginate(arr_client, endpoint: str, page_size: int = 1000):
    method = getattr(arr_client, f"get_{endpoint}")
    page = 1
    while True:
        data = method(page=page, page_size=page_size)
        yield from data["records"]
        if page * page_size >= data["totalRecords"]:
            break
        page += 1


def get_qbit_torrents(qbit_client, category: str) -> dict[str, dict]:
    torrents = qbit_client.torrents_info(category=category)
    return {t["hash"].upper(): t for t in torrents}


def gib(size_bytes: int) -> str:
    return f"{size_bytes / 1073741824:.1f}"


# ---------------------------------------------------------------------------
# Collect all known hashes from *arr history + queue
# ---------------------------------------------------------------------------


def collect_all_known_hashes(arr_client, page_size: int = 1000) -> set[str]:
    hashes = set()
    for endpoint in ("queue", "history"):
        for rec in paginate(arr_client, endpoint, page_size):
            did = (rec.get("downloadId") or "").upper()
            if did:
                hashes.add(did)
    return hashes


# ---------------------------------------------------------------------------
# Unmanaged: torrents with hashes not in any *arr history/queue
# ---------------------------------------------------------------------------


def find_unmanaged(qbit_torrents: dict, known_hashes: set) -> list[dict]:
    results = []
    for uhash, torrent in qbit_torrents.items():
        if uhash not in known_hashes:
            results.append(torrent)
    return sorted(results, key=lambda t: t["added_on"])


# ---------------------------------------------------------------------------
# Abandoned movies: group imports by movieId, older = abandoned
# ---------------------------------------------------------------------------


def find_movie_abandoned(radarr, qbit_movies):
    log.info("Analysing Radarr import history ...")
    imports_by_movie = defaultdict(list)
    for rec in paginate(radarr, "history"):
        if rec.get("eventType") != "downloadFolderImported":
            continue
        did = (rec.get("downloadId") or "").upper()
        if not did:
            continue
        mid = rec.get("movieId")
        if not mid:
            continue
        imports_by_movie[mid].append(
            {"downloadId": did, "date": rec["date"]}
        )

    # Identify keeper (latest) and abandoned (older) hashes per movie.
    abandoned_hashes: set[str] = set()
    keeper_hashes: set[str] = set()
    hash_to_movie: dict[str, int] = {}

    for mid, events in imports_by_movie.items():
        ordered = sorted(events, key=lambda e: e["date"])
        keeper_hashes.add(ordered[-1]["downloadId"])
        for e in ordered[:-1]:
            abandoned_hashes.add(e["downloadId"])
            hash_to_movie[e["downloadId"]] = mid

    # A hash that is a keeper for *any* movie must not be deleted.
    abandoned_hashes -= keeper_hashes

    log.info("Fetching Radarr current movie state ...")
    radarr_movies = {m["id"]: m for m in radarr.get_movie()}

    results = []
    for ahash in abandoned_hashes:
        torrent = qbit_movies.get(ahash)
        if torrent is None:
            continue

        mid = hash_to_movie.get(ahash)
        movie = radarr_movies.get(mid) if mid else None
        mf = (movie or {}).get("movieFile") or {}

        current_quality = (mf.get("quality") or {}).get("quality", {}).get("name", "?")
        current_size = mf.get("size", 0)

        status = "SAFE"
        notes = []

        if not movie or not movie.get("hasFile"):
            notes.append("movie removed or has no file in Radarr")
            status = "REVIEW"
        elif torrent["size"] > current_size * 1.05:
            notes.append(
                f"abandoned is larger than current "
                f"({gib(torrent['size'])} > {gib(current_size)} GiB)"
            )
            status = "REVIEW"

        results.append(
            {
                "name": torrent["name"],
                "size": torrent["size"],
                "state": torrent["state"],
                "hash": torrent["hash"],
                "added_on": torrent["added_on"],
                "status": status,
                "notes": notes,
                "current_quality": current_quality,
            }
        )

    return sorted(results, key=lambda r: r["added_on"])


# ---------------------------------------------------------------------------
# Abandoned TV: group imports by episodeId, a hash is abandoned only when
# it is NOT the latest import for ANY episode it covers.
# ---------------------------------------------------------------------------


def find_tv_abandoned(sonarr, qbit_tvshows):
    log.info("Analysing Sonarr import history ...")
    episode_imports = defaultdict(list)
    all_download_ids: set[str] = set()
    hash_to_series: dict[str, int] = {}

    for rec in paginate(sonarr, "history"):
        if rec.get("eventType") != "downloadFolderImported":
            continue
        did = (rec.get("downloadId") or "").upper()
        eid = rec.get("episodeId")
        if not did or not eid:
            continue
        episode_imports[eid].append({"downloadId": did, "date": rec["date"]})
        all_download_ids.add(did)
        sid = rec.get("seriesId")
        if sid:
            hash_to_series[did] = sid

    # A hash is "active" if it is the latest import for *any* episode.
    active_hashes: set[str] = set()
    for events in episode_imports.values():
        latest = max(events, key=lambda e: e["date"])
        active_hashes.add(latest["downloadId"])

    abandoned_hashes = all_download_ids - active_hashes

    log.info("Fetching Sonarr current series state ...")
    current_series = {s["id"] for s in sonarr.get_series()}

    results = []
    for ahash in abandoned_hashes:
        torrent = qbit_tvshows.get(ahash)
        if torrent is None:
            continue

        status = "SAFE"
        notes = []
        sid = hash_to_series.get(ahash)
        if sid and sid not in current_series:
            notes.append("series removed from Sonarr")
            status = "REVIEW"

        results.append(
            {
                "name": torrent["name"],
                "size": torrent["size"],
                "state": torrent["state"],
                "hash": torrent["hash"],
                "added_on": torrent["added_on"],
                "status": status,
                "notes": notes,
            }
        )

    return sorted(results, key=lambda r: r["added_on"])


# ---------------------------------------------------------------------------
# Report
# ---------------------------------------------------------------------------


def print_section(torrents, show_status=False):
    if not torrents:
        print("  (none)\n")
        return

    total_size = sum(t["size"] for t in torrents)
    for t in torrents:
        prefix = f"[{t['status']:6s}]  " if show_status else "  "
        print(f"  {prefix}{t['name']}")
        extra = f"{gib(t['size'])} GiB | {t['state']}"
        print(f"  {' ' * len(prefix)}{extra}")
        for note in t.get("notes", []):
            print(f"  {' ' * len(prefix)}** {note}")
        print()

    if show_status:
        safe = [t for t in torrents if t["status"] == "SAFE"]
        review = [t for t in torrents if t["status"] == "REVIEW"]
        print(
            f"  total={len(torrents)} ({gib(total_size)} GiB) | "
            f"safe={len(safe)} | review={len(review)}"
        )
    else:
        print(f"  total={len(torrents)} ({gib(total_size)} GiB)")
    print()


def main():
    qbit_url = os.environ["QBITTORRENT_URL"]
    radarr_url = os.environ["RADARR_URL"]
    radarr_config = os.environ["RADARR_CONFIG"]
    sonarr_url = os.environ["SONARR_URL"]
    sonarr_config = os.environ["SONARR_CONFIG"]
    categories = os.environ.get("CATEGORIES", "tvshows,movies,anime").split(",")

    radarr_key = get_api_key(radarr_config)
    sonarr_key = get_api_key(sonarr_config)

    radarr = RadarrAPI(radarr_url, radarr_key)
    sonarr = SonarrAPI(sonarr_url, sonarr_key)
    qbit = qbittorrentapi.Client(host=qbit_url)

    log.info("Getting qBittorrent state ...")
    qbit_torrents = {cat: get_qbit_torrents(qbit, cat) for cat in categories}
    for cat, torrents in qbit_torrents.items():
        log.info("  %s: %d torrents", cat, len(torrents))

    log.info("Collecting known hashes from Sonarr ...")
    sonarr_hashes = collect_all_known_hashes(sonarr)
    log.info("  %d unique hashes", len(sonarr_hashes))

    log.info("Collecting known hashes from Radarr ...")
    radarr_hashes = collect_all_known_hashes(radarr)
    log.info("  %d unique hashes", len(radarr_hashes))

    all_known = sonarr_hashes | radarr_hashes

    # -- Unmanaged --
    print("\n========== UNMANAGED TORRENTS ==========\n")
    for cat in categories:
        unmanaged = find_unmanaged(qbit_torrents[cat], all_known)
        print(f"--- {cat} ({len(unmanaged)} unmanaged / {len(qbit_torrents[cat])} total) ---\n")
        print_section(unmanaged)

    # -- Abandoned --
    print("========== ABANDONED UPGRADE LEFTOVERS ==========\n")

    movie_abandoned = find_movie_abandoned(
        radarr, qbit_torrents.get("movies", {})
    )
    print(f"--- movies ({len(movie_abandoned)} abandoned) ---\n")
    print_section(movie_abandoned, show_status=True)

    tv_abandoned = find_tv_abandoned(
        sonarr, qbit_torrents.get("tvshows", {})
    )
    print(f"--- tvshows ({len(tv_abandoned)} abandoned) ---\n")
    print_section(tv_abandoned, show_status=True)

    # -- Summary --
    all_abandoned = movie_abandoned + tv_abandoned
    safe = [t for t in all_abandoned if t["status"] == "SAFE"]

    print("=" * 50)
    print(
        f"ABANDONED: {len(all_abandoned)} total ({len(safe)} safe to delete)"
    )
    print(f"SAFE TO RECLAIM: {gib(sum(t['size'] for t in safe))} GiB")


if __name__ == "__main__":
    main()