torrent-audit: init
This commit is contained in:
333
services/arr/torrent-audit.py
Normal file
333
services/arr/torrent-audit.py
Normal file
@@ -0,0 +1,333 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Audit qBittorrent torrents against Radarr/Sonarr.
|
||||
|
||||
Reports two categories:
|
||||
|
||||
UNMANAGED -- torrents in qBittorrent that no *arr service has ever touched.
|
||||
These were added manually or by some other tool.
|
||||
|
||||
ABANDONED -- torrents that *arr grabbed but later replaced with a better
|
||||
version. The old torrent is still seeding while the library
|
||||
points to the new one.
|
||||
|
||||
Abandoned detection uses API cross-referencing (not filesystem hardlinks) and
|
||||
verifies against the *arr's current file state:
|
||||
|
||||
1. HISTORY -- group imports by content unit (movieId / episodeId); the
|
||||
most recent import is the keeper, older ones are candidates.
|
||||
2. CURRENT -- verify against the *arr's active file mapping.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from xml.etree import ElementTree
|
||||
|
||||
import qbittorrentapi
|
||||
from pyarr import RadarrAPI, SonarrAPI
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
stream=sys.stderr,
|
||||
)
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_api_key(config_path: str) -> str:
|
||||
tree = ElementTree.parse(config_path)
|
||||
return tree.find(".//ApiKey").text
|
||||
|
||||
|
||||
def paginate(arr_client, endpoint: str, page_size: int = 1000):
|
||||
method = getattr(arr_client, f"get_{endpoint}")
|
||||
page = 1
|
||||
while True:
|
||||
data = method(page=page, page_size=page_size)
|
||||
yield from data["records"]
|
||||
if page * page_size >= data["totalRecords"]:
|
||||
break
|
||||
page += 1
|
||||
|
||||
|
||||
def get_qbit_torrents(qbit_client, category: str) -> dict[str, dict]:
|
||||
torrents = qbit_client.torrents_info(category=category)
|
||||
return {t["hash"].upper(): t for t in torrents}
|
||||
|
||||
|
||||
def gib(size_bytes: int) -> str:
|
||||
return f"{size_bytes / 1073741824:.1f}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Collect all known hashes from *arr history + queue
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def collect_all_known_hashes(arr_client, page_size: int = 1000) -> set[str]:
|
||||
hashes = set()
|
||||
for endpoint in ("queue", "history"):
|
||||
for rec in paginate(arr_client, endpoint, page_size):
|
||||
did = (rec.get("downloadId") or "").upper()
|
||||
if did:
|
||||
hashes.add(did)
|
||||
return hashes
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Unmanaged: torrents with hashes not in any *arr history/queue
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def find_unmanaged(qbit_torrents: dict, known_hashes: set) -> list[dict]:
|
||||
results = []
|
||||
for uhash, torrent in qbit_torrents.items():
|
||||
if uhash not in known_hashes:
|
||||
results.append(torrent)
|
||||
return sorted(results, key=lambda t: t["added_on"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Abandoned movies: group imports by movieId, older = abandoned
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def find_movie_abandoned(radarr, qbit_movies):
|
||||
log.info("Analysing Radarr import history ...")
|
||||
imports_by_movie = defaultdict(list)
|
||||
for rec in paginate(radarr, "history"):
|
||||
if rec.get("eventType") != "downloadFolderImported":
|
||||
continue
|
||||
did = (rec.get("downloadId") or "").upper()
|
||||
if not did:
|
||||
continue
|
||||
mid = rec.get("movieId")
|
||||
if not mid:
|
||||
continue
|
||||
imports_by_movie[mid].append(
|
||||
{"downloadId": did, "date": rec["date"]}
|
||||
)
|
||||
|
||||
# Identify keeper (latest) and abandoned (older) hashes per movie.
|
||||
abandoned_hashes: set[str] = set()
|
||||
keeper_hashes: set[str] = set()
|
||||
hash_to_movie: dict[str, int] = {}
|
||||
|
||||
for mid, events in imports_by_movie.items():
|
||||
ordered = sorted(events, key=lambda e: e["date"])
|
||||
keeper_hashes.add(ordered[-1]["downloadId"])
|
||||
for e in ordered[:-1]:
|
||||
abandoned_hashes.add(e["downloadId"])
|
||||
hash_to_movie[e["downloadId"]] = mid
|
||||
|
||||
# A hash that is a keeper for *any* movie must not be deleted.
|
||||
abandoned_hashes -= keeper_hashes
|
||||
|
||||
log.info("Fetching Radarr current movie state ...")
|
||||
radarr_movies = {m["id"]: m for m in radarr.get_movie()}
|
||||
|
||||
results = []
|
||||
for ahash in abandoned_hashes:
|
||||
torrent = qbit_movies.get(ahash)
|
||||
if torrent is None:
|
||||
continue
|
||||
|
||||
mid = hash_to_movie.get(ahash)
|
||||
movie = radarr_movies.get(mid) if mid else None
|
||||
mf = (movie or {}).get("movieFile") or {}
|
||||
|
||||
current_quality = (mf.get("quality") or {}).get("quality", {}).get("name", "?")
|
||||
current_size = mf.get("size", 0)
|
||||
|
||||
status = "SAFE"
|
||||
notes = []
|
||||
|
||||
if not movie or not movie.get("hasFile"):
|
||||
notes.append("movie removed or has no file in Radarr")
|
||||
status = "REVIEW"
|
||||
elif torrent["size"] > current_size * 1.05:
|
||||
notes.append(
|
||||
f"abandoned is larger than current "
|
||||
f"({gib(torrent['size'])} > {gib(current_size)} GiB)"
|
||||
)
|
||||
status = "REVIEW"
|
||||
|
||||
results.append(
|
||||
{
|
||||
"name": torrent["name"],
|
||||
"size": torrent["size"],
|
||||
"state": torrent["state"],
|
||||
"hash": torrent["hash"],
|
||||
"added_on": torrent["added_on"],
|
||||
"status": status,
|
||||
"notes": notes,
|
||||
"current_quality": current_quality,
|
||||
}
|
||||
)
|
||||
|
||||
return sorted(results, key=lambda r: r["added_on"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Abandoned TV: group imports by episodeId, a hash is abandoned only when
|
||||
# it is NOT the latest import for ANY episode it covers.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def find_tv_abandoned(sonarr, qbit_tvshows):
|
||||
log.info("Analysing Sonarr import history ...")
|
||||
episode_imports = defaultdict(list)
|
||||
all_download_ids: set[str] = set()
|
||||
hash_to_series: dict[str, int] = {}
|
||||
|
||||
for rec in paginate(sonarr, "history"):
|
||||
if rec.get("eventType") != "downloadFolderImported":
|
||||
continue
|
||||
did = (rec.get("downloadId") or "").upper()
|
||||
eid = rec.get("episodeId")
|
||||
if not did or not eid:
|
||||
continue
|
||||
episode_imports[eid].append({"downloadId": did, "date": rec["date"]})
|
||||
all_download_ids.add(did)
|
||||
sid = rec.get("seriesId")
|
||||
if sid:
|
||||
hash_to_series[did] = sid
|
||||
|
||||
# A hash is "active" if it is the latest import for *any* episode.
|
||||
active_hashes: set[str] = set()
|
||||
for events in episode_imports.values():
|
||||
latest = max(events, key=lambda e: e["date"])
|
||||
active_hashes.add(latest["downloadId"])
|
||||
|
||||
abandoned_hashes = all_download_ids - active_hashes
|
||||
|
||||
log.info("Fetching Sonarr current series state ...")
|
||||
current_series = {s["id"] for s in sonarr.get_series()}
|
||||
|
||||
results = []
|
||||
for ahash in abandoned_hashes:
|
||||
torrent = qbit_tvshows.get(ahash)
|
||||
if torrent is None:
|
||||
continue
|
||||
|
||||
status = "SAFE"
|
||||
notes = []
|
||||
sid = hash_to_series.get(ahash)
|
||||
if sid and sid not in current_series:
|
||||
notes.append("series removed from Sonarr")
|
||||
status = "REVIEW"
|
||||
|
||||
results.append(
|
||||
{
|
||||
"name": torrent["name"],
|
||||
"size": torrent["size"],
|
||||
"state": torrent["state"],
|
||||
"hash": torrent["hash"],
|
||||
"added_on": torrent["added_on"],
|
||||
"status": status,
|
||||
"notes": notes,
|
||||
}
|
||||
)
|
||||
|
||||
return sorted(results, key=lambda r: r["added_on"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Report
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def print_section(torrents, show_status=False):
|
||||
if not torrents:
|
||||
print(" (none)\n")
|
||||
return
|
||||
|
||||
total_size = sum(t["size"] for t in torrents)
|
||||
for t in torrents:
|
||||
prefix = f"[{t['status']:6s}] " if show_status else " "
|
||||
print(f" {prefix}{t['name']}")
|
||||
extra = f"{gib(t['size'])} GiB | {t['state']}"
|
||||
print(f" {' ' * len(prefix)}{extra}")
|
||||
for note in t.get("notes", []):
|
||||
print(f" {' ' * len(prefix)}** {note}")
|
||||
print()
|
||||
|
||||
if show_status:
|
||||
safe = [t for t in torrents if t["status"] == "SAFE"]
|
||||
review = [t for t in torrents if t["status"] == "REVIEW"]
|
||||
print(
|
||||
f" total={len(torrents)} ({gib(total_size)} GiB) | "
|
||||
f"safe={len(safe)} | review={len(review)}"
|
||||
)
|
||||
else:
|
||||
print(f" total={len(torrents)} ({gib(total_size)} GiB)")
|
||||
print()
|
||||
|
||||
|
||||
def main():
|
||||
qbit_url = os.environ["QBITTORRENT_URL"]
|
||||
radarr_url = os.environ["RADARR_URL"]
|
||||
radarr_config = os.environ["RADARR_CONFIG"]
|
||||
sonarr_url = os.environ["SONARR_URL"]
|
||||
sonarr_config = os.environ["SONARR_CONFIG"]
|
||||
categories = os.environ.get("CATEGORIES", "tvshows,movies,anime").split(",")
|
||||
|
||||
radarr_key = get_api_key(radarr_config)
|
||||
sonarr_key = get_api_key(sonarr_config)
|
||||
|
||||
radarr = RadarrAPI(radarr_url, radarr_key)
|
||||
sonarr = SonarrAPI(sonarr_url, sonarr_key)
|
||||
qbit = qbittorrentapi.Client(host=qbit_url)
|
||||
|
||||
log.info("Getting qBittorrent state ...")
|
||||
qbit_torrents = {cat: get_qbit_torrents(qbit, cat) for cat in categories}
|
||||
for cat, torrents in qbit_torrents.items():
|
||||
log.info(" %s: %d torrents", cat, len(torrents))
|
||||
|
||||
log.info("Collecting known hashes from Sonarr ...")
|
||||
sonarr_hashes = collect_all_known_hashes(sonarr)
|
||||
log.info(" %d unique hashes", len(sonarr_hashes))
|
||||
|
||||
log.info("Collecting known hashes from Radarr ...")
|
||||
radarr_hashes = collect_all_known_hashes(radarr)
|
||||
log.info(" %d unique hashes", len(radarr_hashes))
|
||||
|
||||
all_known = sonarr_hashes | radarr_hashes
|
||||
|
||||
# -- Unmanaged --
|
||||
print("\n========== UNMANAGED TORRENTS ==========\n")
|
||||
for cat in categories:
|
||||
unmanaged = find_unmanaged(qbit_torrents[cat], all_known)
|
||||
print(f"--- {cat} ({len(unmanaged)} unmanaged / {len(qbit_torrents[cat])} total) ---\n")
|
||||
print_section(unmanaged)
|
||||
|
||||
# -- Abandoned --
|
||||
print("========== ABANDONED UPGRADE LEFTOVERS ==========\n")
|
||||
|
||||
movie_abandoned = find_movie_abandoned(
|
||||
radarr, qbit_torrents.get("movies", {})
|
||||
)
|
||||
print(f"--- movies ({len(movie_abandoned)} abandoned) ---\n")
|
||||
print_section(movie_abandoned, show_status=True)
|
||||
|
||||
tv_abandoned = find_tv_abandoned(
|
||||
sonarr, qbit_torrents.get("tvshows", {})
|
||||
)
|
||||
print(f"--- tvshows ({len(tv_abandoned)} abandoned) ---\n")
|
||||
print_section(tv_abandoned, show_status=True)
|
||||
|
||||
# -- Summary --
|
||||
all_abandoned = movie_abandoned + tv_abandoned
|
||||
safe = [t for t in all_abandoned if t["status"] == "SAFE"]
|
||||
|
||||
print("=" * 50)
|
||||
print(
|
||||
f"ABANDONED: {len(all_abandoned)} total ({len(safe)} safe to delete)"
|
||||
)
|
||||
print(f"SAFE TO RECLAIM: {gib(sum(t['size'] for t in safe))} GiB")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user