torrent-audit: make more robust

This commit is contained in:
2026-05-04 02:27:52 -04:00
parent ce42ccdcc0
commit 09175cd0dc
2 changed files with 128 additions and 21 deletions

View File

@@ -74,6 +74,26 @@ def gib(size_bytes: int) -> str:
return f"{size_bytes / 1073741824:.1f}" return f"{size_bytes / 1073741824:.1f}"
def _is_keeper_viable(
keeper_hash: str,
all_qbit: dict[str, dict],
file_path: str | None,
) -> bool:
"""True iff the keeper (newer import) is actually viable.
A keeper is viable when its data is reachable: either the keeper
torrent is complete in qBittorrent, or (when no torrent exists) the
file is present on the filesystem.
"""
keeper_torrent = all_qbit.get(keeper_hash)
if keeper_torrent is not None:
return is_complete(keeper_torrent)
# Keeper hash not in qBittorrent -- trust Radarr's file mapping
# only if the file actually exists on disk.
if file_path:
return os.path.exists(file_path)
return False
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Collect all known hashes from *arr history + queue # Collect all known hashes from *arr history + queue
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -107,7 +127,7 @@ def find_unmanaged(qbit_torrents: dict, known_hashes: set) -> list[dict]:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def find_movie_abandoned(radarr, qbit_movies): def find_movie_abandoned(radarr, qbit_movies, all_qbit=None):
log.info("Analysing Radarr import history ...") log.info("Analysing Radarr import history ...")
imports_by_movie = defaultdict(list) imports_by_movie = defaultdict(list)
for rec in paginate(radarr, "history"): for rec in paginate(radarr, "history"):
@@ -126,11 +146,14 @@ def find_movie_abandoned(radarr, qbit_movies):
# Identify keeper (latest) and abandoned (older) hashes per movie. # Identify keeper (latest) and abandoned (older) hashes per movie.
abandoned_hashes: set[str] = set() abandoned_hashes: set[str] = set()
keeper_hashes: set[str] = set() keeper_hashes: set[str] = set()
hash_to_movie: dict[str, int] = {} hash_to_movie: dict[str, int] = {} # abandoned hash -> movieId
movie_to_keeper: dict[int, str] = {} # movieId -> keeper hash
for mid, events in imports_by_movie.items(): for mid, events in imports_by_movie.items():
ordered = sorted(events, key=lambda e: e["date"]) ordered = sorted(events, key=lambda e: e["date"])
keeper_hashes.add(ordered[-1]["downloadId"]) keeper = ordered[-1]["downloadId"]
keeper_hashes.add(keeper)
movie_to_keeper[mid] = keeper
for e in ordered[:-1]: for e in ordered[:-1]:
abandoned_hashes.add(e["downloadId"]) abandoned_hashes.add(e["downloadId"])
hash_to_movie[e["downloadId"]] = mid hash_to_movie[e["downloadId"]] = mid
@@ -152,7 +175,6 @@ def find_movie_abandoned(radarr, qbit_movies):
# re-download in progress. # re-download in progress.
if not is_complete(torrent): if not is_complete(torrent):
continue continue
mid = hash_to_movie.get(ahash) mid = hash_to_movie.get(ahash)
movie = radarr_movies.get(mid) if mid else None movie = radarr_movies.get(mid) if mid else None
mf = (movie or {}).get("movieFile") or {} mf = (movie or {}).get("movieFile") or {}
@@ -163,16 +185,27 @@ def find_movie_abandoned(radarr, qbit_movies):
status = "SAFE" status = "SAFE"
notes = [] notes = []
# Verify the keeper (newer import) is actually viable before
# declaring the abandoned torrent safe to delete.
keeper_hash = movie_to_keeper.get(mid) if mid else None
if keeper_hash and all_qbit is not None:
file_path = mf.get("path")
if not _is_keeper_viable(keeper_hash, all_qbit, file_path):
notes.append(
"keeper torrent incomplete or missing, "
"file not on disk"
)
status = "REVIEW"
if not movie or not movie.get("hasFile"): if not movie or not movie.get("hasFile"):
notes.append("movie removed or has no file in Radarr") notes.append("movie removed or has no file in Radarr")
status = "REVIEW" status = "REVIEW"
elif torrent["size"] > current_size * 1.05: if current_size > 0 and torrent["size"] > current_size * 1.05:
notes.append( notes.append(
f"abandoned is larger than current " f"abandoned is larger than current "
f"({gib(torrent['size'])} > {gib(current_size)} GiB)" f"({gib(torrent['size'])} > {gib(current_size)} GiB)"
) )
status = "REVIEW" status = "REVIEW"
results.append( results.append(
{ {
"name": torrent["name"], "name": torrent["name"],
@@ -195,7 +228,7 @@ def find_movie_abandoned(radarr, qbit_movies):
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def find_tv_abandoned(sonarr, qbit_tvshows): def find_tv_abandoned(sonarr, qbit_tvshows, all_qbit=None):
log.info("Analysing Sonarr import history ...") log.info("Analysing Sonarr import history ...")
episode_imports = defaultdict(list) episode_imports = defaultdict(list)
all_download_ids: set[str] = set() all_download_ids: set[str] = set()
@@ -222,6 +255,15 @@ def find_tv_abandoned(sonarr, qbit_tvshows):
abandoned_hashes = all_download_ids - active_hashes abandoned_hashes = all_download_ids - active_hashes
# For keeper-viability checks: find the active hash for each
# episode an abandoned hash once covered.
hash_to_active: dict[str, list[str]] = defaultdict(list)
for eid, events in episode_imports.items():
active = max(events, key=lambda e: e["date"])["downloadId"]
for e in events:
if e["downloadId"] in abandoned_hashes:
hash_to_active[e["downloadId"]].append(active)
log.info("Fetching Sonarr current series state ...") log.info("Fetching Sonarr current series state ...")
current_series = {s["id"] for s in sonarr.get_series()} current_series = {s["id"] for s in sonarr.get_series()}
@@ -235,6 +277,17 @@ def find_tv_abandoned(sonarr, qbit_tvshows):
status = "SAFE" status = "SAFE"
notes = [] notes = []
# Verify every keeper that replaced this hash is viable.
if all_qbit is not None:
for keeper_h in hash_to_active.get(ahash, []):
if not _is_keeper_viable(keeper_h, all_qbit, None):
notes.append(
"replacement torrent incomplete or missing"
)
status = "REVIEW"
break
sid = hash_to_series.get(ahash) sid = hash_to_series.get(ahash)
if sid and sid not in current_series: if sid and sid not in current_series:
notes.append("series removed from Sonarr") notes.append("series removed from Sonarr")
@@ -369,17 +422,23 @@ def main():
print(f"--- {cat} ({len(unmanaged)} unmanaged / {len(qbit_torrents[cat])} total) ---\n") print(f"--- {cat} ({len(unmanaged)} unmanaged / {len(qbit_torrents[cat])} total) ---\n")
print_section(unmanaged) print_section(unmanaged)
# Flatten all qBittorrent torrents into one lookup for keeper
# viability checks across categories.
all_qbit = {}
for torrents in qbit_torrents.values():
all_qbit.update(torrents)
# -- Abandoned -- # -- Abandoned --
print("========== ABANDONED UPGRADE LEFTOVERS ==========\n") print("========== ABANDONED UPGRADE LEFTOVERS ==========\n")
movie_abandoned = find_movie_abandoned( movie_abandoned = find_movie_abandoned(
radarr, qbit_torrents.get("movies", {}) radarr, qbit_torrents.get("movies", {}), all_qbit
) )
print(f"--- movies ({len(movie_abandoned)} abandoned) ---\n") print(f"--- movies ({len(movie_abandoned)} abandoned) ---\n")
print_section(movie_abandoned, show_status=True) print_section(movie_abandoned, show_status=True)
tv_abandoned = find_tv_abandoned( tv_abandoned = find_tv_abandoned(
sonarr, qbit_torrents.get("tvshows", {}) sonarr, qbit_torrents.get("tvshows", {}), all_qbit
) )
print(f"--- tvshows ({len(tv_abandoned)} abandoned) ---\n") print(f"--- tvshows ({len(tv_abandoned)} abandoned) ---\n")
print_section(tv_abandoned, show_status=True) print_section(tv_abandoned, show_status=True)

View File

@@ -53,6 +53,10 @@ let
SINGLE8_NEW = "A" * 38 + "0D" # movieId=8, newer import keeper (not in qBit) SINGLE8_NEW = "A" * 38 + "0D" # movieId=8, newer import keeper (not in qBit)
QUEUED_MOV = "A" * 38 + "0E" # in Radarr queue, not in history QUEUED_MOV = "A" * 38 + "0E" # in Radarr queue, not in history
INPROGRESS_MOV = "A" * 38 + "0F" # movieId=10, older import, currently re-downloading INPROGRESS_MOV = "A" * 38 + "0F" # movieId=10, older import, currently re-downloading
KEEPER_GONE_OLD = "A" * 38 + "11" # movieId=11, older import, keeper incomplete in qBit
KEEPER_GONE_NEW = "A" * 38 + "12" # movieId=11, newer import (keeper), incomplete in qBit
INPROGRESS_MOV_NEW = "A" * 38 + "10" # movieId=10, newer import (not in qBit)
# TV # TV
UNMANAGED_TV = "B" * 38 + "01" UNMANAGED_TV = "B" * 38 + "01"
@@ -65,7 +69,9 @@ let
REMOVED_TV_NEW = "B" * 38 + "08" # episodeId=400, newer import (not in qBit) REMOVED_TV_NEW = "B" * 38 + "08" # episodeId=400, newer import (not in qBit)
INPROGRESS_TV = "B" * 38 + "09" # episodeId=500, older import, currently re-downloading INPROGRESS_TV = "B" * 38 + "09" # episodeId=500, older import, currently re-downloading
INPROGRESS_TV_NEW = "B" * 38 + "0A" # episodeId=500, newer import (not in qBit) INPROGRESS_TV_NEW = "B" * 38 + "0A" # episodeId=500, newer import (not in qBit)
INPROGRESS_MOV_NEW = "A" * 38 + "10" # movieId=10, newer import (not in qBit) KEEPER_GONE_TV = "B" * 38 + "0B" # episodeId=600, older import, keeper incomplete in qBit
KEEPER_GONE_TV_NEW = "B" * 38 + "0C" # episodeId=600, newer import (active), incomplete in qBit
def make_torrent(h, name, size, added_on, state="uploading", progress=1.0): def make_torrent(h, name, size, added_on, state="uploading", progress=1.0):
return { return {
@@ -92,6 +98,10 @@ let
# In-progress re-download: hash matches an old import, but data is # In-progress re-download: hash matches an old import, but data is
# not yet on disk. Must NOT be flagged as abandoned (regression). # not yet on disk. Must NOT be flagged as abandoned (regression).
make_torrent(INPROGRESS_MOV, "InProgress.Movie.2024", 8_000_000_000, 1704067209, state="downloading", progress=0.05), make_torrent(INPROGRESS_MOV, "InProgress.Movie.2024", 8_000_000_000, 1704067209, state="downloading", progress=0.05),
# Keeper-incomplete regression: old torrent is complete, keeper is
# incomplete in qBittorrent. Must be REVIEW, not SAFE.
make_torrent(KEEPER_GONE_OLD, "KeeperGone.Movie.2024", 2_000_000_000, 1704067210),
make_torrent(KEEPER_GONE_NEW, "KeeperGone.Movie.2024.Upgr", 3_000_000_000, 1704067211, state="downloading", progress=0.10),
], ],
"tvshows": [ "tvshows": [
make_torrent(UNMANAGED_TV, "Unmanaged.Show.S01E01", 1_000_000_000, 1704067200), make_torrent(UNMANAGED_TV, "Unmanaged.Show.S01E01", 1_000_000_000, 1704067200),
@@ -101,6 +111,9 @@ let
make_torrent(SEASON_PACK, "Season.Pack.S02", 5_000_000_000, 1704067204), make_torrent(SEASON_PACK, "Season.Pack.S02", 5_000_000_000, 1704067204),
make_torrent(REMOVED_TV, "Removed.Show.S01E01", 900_000_000, 1704067205), make_torrent(REMOVED_TV, "Removed.Show.S01E01", 900_000_000, 1704067205),
make_torrent(INPROGRESS_TV, "InProgress.Show.S01E01", 1_500_000_000, 1704067209, state="downloading", progress=0.05), make_torrent(INPROGRESS_TV, "InProgress.Show.S01E01", 1_500_000_000, 1704067209, state="downloading", progress=0.05),
# Keeper-incomplete regression for TV
make_torrent(KEEPER_GONE_TV, "KeeperGone.Show.S01E01", 500_000_000, 1704067210),
make_torrent(KEEPER_GONE_TV_NEW, "KeeperGone.Show.S01E01.Upgr", 800_000_000, 1704067211, state="downloading", progress=0.10),
], ],
} }
@@ -127,6 +140,10 @@ let
# In-progress re-download regression case for movies # In-progress re-download regression case for movies
{"movieId": 10, "downloadId": INPROGRESS_MOV, "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"}, {"movieId": 10, "downloadId": INPROGRESS_MOV, "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
{"movieId": 10, "downloadId": INPROGRESS_MOV_NEW,"eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"}, {"movieId": 10, "downloadId": INPROGRESS_MOV_NEW,"eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"},
# Keeper-incomplete regression: old import + newer import (keeper) that
# is incomplete in qBittorrent. The old torrent must be REVIEW.
{"movieId": 11, "downloadId": KEEPER_GONE_OLD, "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
{"movieId": 11, "downloadId": KEEPER_GONE_NEW, "eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"},
] ]
RADARR_MOVIES = [ RADARR_MOVIES = [
@@ -139,6 +156,7 @@ let
{"id": 7, "hasFile": True, "movieFile": {"size": 4_000_000_000, "quality": {"quality": {"name": "Bluray-1080p"}}}}, {"id": 7, "hasFile": True, "movieFile": {"size": 4_000_000_000, "quality": {"quality": {"name": "Bluray-1080p"}}}},
{"id": 8, "hasFile": True, "movieFile": {"size": 5_000_000_000, "quality": {"quality": {"name": "Remux-1080p"}}}}, {"id": 8, "hasFile": True, "movieFile": {"size": 5_000_000_000, "quality": {"quality": {"name": "Remux-1080p"}}}},
{"id": 10, "hasFile": True, "movieFile": {"size": 8_000_000_000, "quality": {"quality": {"name": "Remux-2160p"}}}}, {"id": 10, "hasFile": True, "movieFile": {"size": 8_000_000_000, "quality": {"quality": {"name": "Remux-2160p"}}}},
{"id": 11, "hasFile": True, "movieFile": {"size": 3_000_000_000, "quality": {"quality": {"name": "Remux-1080p"}}}},
] ]
# Sonarr mock data # Sonarr mock data
@@ -164,6 +182,9 @@ let
# In-progress re-download regression case for TV # In-progress re-download regression case for TV
{"episodeId": 500, "seriesId": 1, "downloadId": INPROGRESS_TV, "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"}, {"episodeId": 500, "seriesId": 1, "downloadId": INPROGRESS_TV, "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
{"episodeId": 500, "seriesId": 1, "downloadId": INPROGRESS_TV_NEW,"eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"}, {"episodeId": 500, "seriesId": 1, "downloadId": INPROGRESS_TV_NEW,"eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"},
# Keeper-incomplete regression for TV
{"episodeId": 600, "seriesId": 1, "downloadId": KEEPER_GONE_TV, "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
{"episodeId": 600, "seriesId": 1, "downloadId": KEEPER_GONE_TV_NEW,"eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"},
] ]
SONARR_HISTORY_ALL = SONARR_HISTORY_PAGE1 + SONARR_HISTORY_PAGE2 SONARR_HISTORY_ALL = SONARR_HISTORY_PAGE1 + SONARR_HISTORY_PAGE2
@@ -335,14 +356,14 @@ pkgs.testers.runNixOSTest {
with subtest("Detects unmanaged movie torrent"): with subtest("Detects unmanaged movie torrent"):
assert "Unmanaged.Movie.2024" in unmanaged_section, \ assert "Unmanaged.Movie.2024" in unmanaged_section, \
"Should detect unmanaged movie" "Should detect unmanaged movie"
assert "1 unmanaged / 10 total" in unmanaged_section, \ assert "1 unmanaged / 12 total" in unmanaged_section, \
"Should show 1 unmanaged movie out of 10" "Should show 1 unmanaged movie out of 12"
with subtest("Detects unmanaged TV torrent"): with subtest("Detects unmanaged TV torrent"):
assert "Unmanaged.Show.S01E01" in unmanaged_section, \ assert "Unmanaged.Show.S01E01" in unmanaged_section, \
"Should detect unmanaged TV show" "Should detect unmanaged TV show"
assert "1 unmanaged / 7 total" in unmanaged_section, \ assert "1 unmanaged / 9 total" in unmanaged_section, \
"Should show 1 unmanaged TV show out of 7" "Should show 1 unmanaged TV show out of 9"
with subtest("Empty category shows zero counts"): with subtest("Empty category shows zero counts"):
assert "0 unmanaged / 0 total" in unmanaged_section, \ assert "0 unmanaged / 0 total" in unmanaged_section, \
@@ -434,15 +455,42 @@ pkgs.testers.runNixOSTest {
break break
with subtest("Correct abandoned counts per category"): with subtest("Correct abandoned counts per category"):
assert "movies (3 abandoned)" in abandoned_section, \ assert "movies (4 abandoned)" in abandoned_section, \
"Should show 3 abandoned movies" "Should show 4 abandoned movies"
assert "tvshows (2 abandoned)" in abandoned_section, \ assert "tvshows (3 abandoned)" in abandoned_section, \
"Should show 2 abandoned TV shows" "Should show 3 abandoned TV shows"
with subtest("Correct summary totals"): assert "ABANDONED: 7 total (2 safe to delete)" in output, \
assert "ABANDONED: 5 total (2 safe to delete)" in output, \ "Summary should show 7 total abandoned, 2 safe to delete"
"Summary should show 5 total abandoned, 2 safe to delete"
assert "SAFE TO RECLAIM: 3.4 GiB" in output, \ assert "SAFE TO RECLAIM: 3.4 GiB" in output, \
"Should report 3.4 GiB reclaimable (2.8 GiB movie + 0.7 GiB TV)" "Should report 3.4 GiB reclaimable (2.8 GiB movie + 0.7 GiB TV)"
with subtest("Keeper-incomplete movie triggers REVIEW"):
assert "KeeperGone.Movie.2024" in abandoned_section, \
"Should detect abandoned torrent with incomplete keeper"
assert_note_near(abandoned_section, "KeeperGone.Movie.2024", "keeper torrent incomplete")
for line in abandoned_section.splitlines():
if "KeeperGone.Movie.2024" in line:
assert "REVIEW" in line, f"Keeper-incomplete movie should be REVIEW, got: {line}"
break
with subtest("Keeper-incomplete TV triggers REVIEW"):
assert "KeeperGone.Show.S01E01" in abandoned_section, \
"Should detect abandoned TV torrent with incomplete keeper"
assert_note_near(abandoned_section, "KeeperGone.Show.S01E01", "replacement torrent incomplete")
for line in abandoned_section.splitlines():
if "KeeperGone.Show.S01E01" in line:
assert "REVIEW" in line, f"Keeper-incomplete TV should be REVIEW, got: {line}"
break
with subtest("Keeper-incomplete does not affect existing SAFE verdicts"):
for line in abandoned_section.splitlines():
if "Old.Movie.Quality.2024" in line:
assert "SAFE" in line, f"Old movie should still be SAFE, got: {line}"
break
for line in abandoned_section.splitlines():
if "Old.Show.S01E01" in line:
assert "SAFE" in line, f"Old TV should still be SAFE, got: {line}"
break
''; '';
} }