This repository has been archived on 2026-04-18. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
server-config/services/arr/torrent-audit.py
2026-03-27 18:13:21 -07:00

334 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Audit qBittorrent torrents against Radarr/Sonarr.
Reports two categories:
UNMANAGED -- torrents in qBittorrent that no *arr service has ever touched.
These were added manually or by some other tool.
ABANDONED -- torrents that *arr grabbed but later replaced with a better
version. The old torrent is still seeding while the library
points to the new one.
Abandoned detection uses API cross-referencing (not filesystem hardlinks) and
verifies against the *arr's current file state:
1. HISTORY -- group imports by content unit (movieId / episodeId); the
most recent import is the keeper, older ones are candidates.
2. CURRENT -- verify against the *arr's active file mapping.
"""
import logging
import os
import sys
from collections import defaultdict
from xml.etree import ElementTree
import qbittorrentapi
from pyarr import RadarrAPI, SonarrAPI
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(message)s",
stream=sys.stderr,
)
log = logging.getLogger(__name__)
def get_api_key(config_path: str) -> str:
tree = ElementTree.parse(config_path)
return tree.find(".//ApiKey").text
def paginate(arr_client, endpoint: str, page_size: int = 1000):
method = getattr(arr_client, f"get_{endpoint}")
page = 1
while True:
data = method(page=page, page_size=page_size)
yield from data["records"]
if page * page_size >= data["totalRecords"]:
break
page += 1
def get_qbit_torrents(qbit_client, category: str) -> dict[str, dict]:
torrents = qbit_client.torrents_info(category=category)
return {t["hash"].upper(): t for t in torrents}
def gib(size_bytes: int) -> str:
return f"{size_bytes / 1073741824:.1f}"
# ---------------------------------------------------------------------------
# Collect all known hashes from *arr history + queue
# ---------------------------------------------------------------------------
def collect_all_known_hashes(arr_client, page_size: int = 1000) -> set[str]:
hashes = set()
for endpoint in ("queue", "history"):
for rec in paginate(arr_client, endpoint, page_size):
did = (rec.get("downloadId") or "").upper()
if did:
hashes.add(did)
return hashes
# ---------------------------------------------------------------------------
# Unmanaged: torrents with hashes not in any *arr history/queue
# ---------------------------------------------------------------------------
def find_unmanaged(qbit_torrents: dict, known_hashes: set) -> list[dict]:
results = []
for uhash, torrent in qbit_torrents.items():
if uhash not in known_hashes:
results.append(torrent)
return sorted(results, key=lambda t: t["added_on"])
# ---------------------------------------------------------------------------
# Abandoned movies: group imports by movieId, older = abandoned
# ---------------------------------------------------------------------------
def find_movie_abandoned(radarr, qbit_movies):
log.info("Analysing Radarr import history ...")
imports_by_movie = defaultdict(list)
for rec in paginate(radarr, "history"):
if rec.get("eventType") != "downloadFolderImported":
continue
did = (rec.get("downloadId") or "").upper()
if not did:
continue
mid = rec.get("movieId")
if not mid:
continue
imports_by_movie[mid].append(
{"downloadId": did, "date": rec["date"]}
)
# Identify keeper (latest) and abandoned (older) hashes per movie.
abandoned_hashes: set[str] = set()
keeper_hashes: set[str] = set()
hash_to_movie: dict[str, int] = {}
for mid, events in imports_by_movie.items():
ordered = sorted(events, key=lambda e: e["date"])
keeper_hashes.add(ordered[-1]["downloadId"])
for e in ordered[:-1]:
abandoned_hashes.add(e["downloadId"])
hash_to_movie[e["downloadId"]] = mid
# A hash that is a keeper for *any* movie must not be deleted.
abandoned_hashes -= keeper_hashes
log.info("Fetching Radarr current movie state ...")
radarr_movies = {m["id"]: m for m in radarr.get_movie()}
results = []
for ahash in abandoned_hashes:
torrent = qbit_movies.get(ahash)
if torrent is None:
continue
mid = hash_to_movie.get(ahash)
movie = radarr_movies.get(mid) if mid else None
mf = (movie or {}).get("movieFile") or {}
current_quality = (mf.get("quality") or {}).get("quality", {}).get("name", "?")
current_size = mf.get("size", 0)
status = "SAFE"
notes = []
if not movie or not movie.get("hasFile"):
notes.append("movie removed or has no file in Radarr")
status = "REVIEW"
elif torrent["size"] > current_size * 1.05:
notes.append(
f"abandoned is larger than current "
f"({gib(torrent['size'])} > {gib(current_size)} GiB)"
)
status = "REVIEW"
results.append(
{
"name": torrent["name"],
"size": torrent["size"],
"state": torrent["state"],
"hash": torrent["hash"],
"added_on": torrent["added_on"],
"status": status,
"notes": notes,
"current_quality": current_quality,
}
)
return sorted(results, key=lambda r: r["added_on"])
# ---------------------------------------------------------------------------
# Abandoned TV: group imports by episodeId, a hash is abandoned only when
# it is NOT the latest import for ANY episode it covers.
# ---------------------------------------------------------------------------
def find_tv_abandoned(sonarr, qbit_tvshows):
log.info("Analysing Sonarr import history ...")
episode_imports = defaultdict(list)
all_download_ids: set[str] = set()
hash_to_series: dict[str, int] = {}
for rec in paginate(sonarr, "history"):
if rec.get("eventType") != "downloadFolderImported":
continue
did = (rec.get("downloadId") or "").upper()
eid = rec.get("episodeId")
if not did or not eid:
continue
episode_imports[eid].append({"downloadId": did, "date": rec["date"]})
all_download_ids.add(did)
sid = rec.get("seriesId")
if sid:
hash_to_series[did] = sid
# A hash is "active" if it is the latest import for *any* episode.
active_hashes: set[str] = set()
for events in episode_imports.values():
latest = max(events, key=lambda e: e["date"])
active_hashes.add(latest["downloadId"])
abandoned_hashes = all_download_ids - active_hashes
log.info("Fetching Sonarr current series state ...")
current_series = {s["id"] for s in sonarr.get_series()}
results = []
for ahash in abandoned_hashes:
torrent = qbit_tvshows.get(ahash)
if torrent is None:
continue
status = "SAFE"
notes = []
sid = hash_to_series.get(ahash)
if sid and sid not in current_series:
notes.append("series removed from Sonarr")
status = "REVIEW"
results.append(
{
"name": torrent["name"],
"size": torrent["size"],
"state": torrent["state"],
"hash": torrent["hash"],
"added_on": torrent["added_on"],
"status": status,
"notes": notes,
}
)
return sorted(results, key=lambda r: r["added_on"])
# ---------------------------------------------------------------------------
# Report
# ---------------------------------------------------------------------------
def print_section(torrents, show_status=False):
if not torrents:
print(" (none)\n")
return
total_size = sum(t["size"] for t in torrents)
for t in torrents:
prefix = f"[{t['status']:6s}] " if show_status else " "
print(f" {prefix}{t['name']}")
extra = f"{gib(t['size'])} GiB | {t['state']}"
print(f" {' ' * len(prefix)}{extra}")
for note in t.get("notes", []):
print(f" {' ' * len(prefix)}** {note}")
print()
if show_status:
safe = [t for t in torrents if t["status"] == "SAFE"]
review = [t for t in torrents if t["status"] == "REVIEW"]
print(
f" total={len(torrents)} ({gib(total_size)} GiB) | "
f"safe={len(safe)} | review={len(review)}"
)
else:
print(f" total={len(torrents)} ({gib(total_size)} GiB)")
print()
def main():
qbit_url = os.environ["QBITTORRENT_URL"]
radarr_url = os.environ["RADARR_URL"]
radarr_config = os.environ["RADARR_CONFIG"]
sonarr_url = os.environ["SONARR_URL"]
sonarr_config = os.environ["SONARR_CONFIG"]
categories = os.environ.get("CATEGORIES", "tvshows,movies,anime").split(",")
radarr_key = get_api_key(radarr_config)
sonarr_key = get_api_key(sonarr_config)
radarr = RadarrAPI(radarr_url, radarr_key)
sonarr = SonarrAPI(sonarr_url, sonarr_key)
qbit = qbittorrentapi.Client(host=qbit_url)
log.info("Getting qBittorrent state ...")
qbit_torrents = {cat: get_qbit_torrents(qbit, cat) for cat in categories}
for cat, torrents in qbit_torrents.items():
log.info(" %s: %d torrents", cat, len(torrents))
log.info("Collecting known hashes from Sonarr ...")
sonarr_hashes = collect_all_known_hashes(sonarr)
log.info(" %d unique hashes", len(sonarr_hashes))
log.info("Collecting known hashes from Radarr ...")
radarr_hashes = collect_all_known_hashes(radarr)
log.info(" %d unique hashes", len(radarr_hashes))
all_known = sonarr_hashes | radarr_hashes
# -- Unmanaged --
print("\n========== UNMANAGED TORRENTS ==========\n")
for cat in categories:
unmanaged = find_unmanaged(qbit_torrents[cat], all_known)
print(f"--- {cat} ({len(unmanaged)} unmanaged / {len(qbit_torrents[cat])} total) ---\n")
print_section(unmanaged)
# -- Abandoned --
print("========== ABANDONED UPGRADE LEFTOVERS ==========\n")
movie_abandoned = find_movie_abandoned(
radarr, qbit_torrents.get("movies", {})
)
print(f"--- movies ({len(movie_abandoned)} abandoned) ---\n")
print_section(movie_abandoned, show_status=True)
tv_abandoned = find_tv_abandoned(
sonarr, qbit_torrents.get("tvshows", {})
)
print(f"--- tvshows ({len(tv_abandoned)} abandoned) ---\n")
print_section(tv_abandoned, show_status=True)
# -- Summary --
all_abandoned = movie_abandoned + tv_abandoned
safe = [t for t in all_abandoned if t["status"] == "SAFE"]
print("=" * 50)
print(
f"ABANDONED: {len(all_abandoned)} total ({len(safe)} safe to delete)"
)
print(f"SAFE TO RECLAIM: {gib(sum(t['size'] for t in safe))} GiB")
if __name__ == "__main__":
main()