Files
dotfiles/patches/omp-fix-auth.patch
Simon Gardling 3627cb19c6
Some checks failed
Build and Deploy / deploy (push) Failing after 12m21s
omp: add fix auth patch (to test)
2026-04-08 13:08:30 -04:00

294 lines
12 KiB
Diff

commit 31d537735d3c5e25a2b620d63ebbfea4cf84f57e
Author: Simon Gardling <titaniumtown@proton.me>
Date: Wed Apr 8 13:05:09 2026 -0400
Fix key reauth with parallel sessions
diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md
index 3f50b7bd4..248dacbff 100644
--- a/packages/ai/CHANGELOG.md
+++ b/packages/ai/CHANGELOG.md
@@ -5,6 +5,10 @@
- Removed `coerceNullStrings` function and its automatic null-string coercion behavior from JSON parsing
+### Fixed
+
+- Fixed concurrent OAuth refresh token rotation race: when multiple instances share the same credential DB, one instance refreshing a token no longer causes other instances to permanently disable the credential on `invalid_grant` ([#607](https://github.com/can1357/oh-my-pi/issues/607))
+
## [13.19.0] - 2026-04-05
### Fixed
diff --git a/packages/ai/src/auth-storage.ts b/packages/ai/src/auth-storage.ts
index 9fdb4473b..fac936dad 100644
--- a/packages/ai/src/auth-storage.ts
+++ b/packages/ai/src/auth-storage.ts
@@ -1865,7 +1865,30 @@ export class AuthStorage {
});
if (isDefinitiveFailure) {
- // Permanently disable invalid credentials with an explicit cause for inspection/debugging
+ // Before permanently disabling, check if another instance already refreshed
+ // the token in the shared DB. Concurrent instances hold stale in-memory
+ // copies; refresh token rotation by one instance invalidates the token
+ // that other instances still hold. Re-read from DB before giving up.
+ if (/invalid_grant/i.test(errorMsg)) {
+ const entries = this.#getStoredCredentials(provider);
+ const entry = entries[selection.index];
+ if (entry) {
+ const dbCredentials = this.#store.listAuthCredentials(provider);
+ const dbEntry = dbCredentials.find(row => row.id === entry.id);
+ if (
+ dbEntry?.credential.type === "oauth" &&
+ dbEntry.credential.refresh !== selection.credential.refresh
+ ) {
+ // DB has a newer refresh token — another instance refreshed.
+ // Update in-memory state and retry with the fresh credential.
+ const updated = [...entries];
+ updated[selection.index] = { id: entry.id, credential: dbEntry.credential };
+ this.#setStoredCredentials(provider, updated);
+ return this.getApiKey(provider, sessionId, options);
+ }
+ }
+ }
+ // Genuinely invalid — disable the credential
this.#disableCredentialAt(provider, selection.index, `oauth refresh failed: ${errorMsg}`);
if (this.#getCredentialsForProvider(provider).some(credential => credential.type === "oauth")) {
return this.getApiKey(provider, sessionId, options);
diff --git a/packages/ai/test/auth-storage-refresh-race.test.ts b/packages/ai/test/auth-storage-refresh-race.test.ts
new file mode 100644
index 000000000..5a8098a18
--- /dev/null
+++ b/packages/ai/test/auth-storage-refresh-race.test.ts
@@ -0,0 +1,230 @@
+import { Database } from "bun:sqlite";
+import { afterEach, beforeEach, describe, expect, test, vi } from "bun:test";
+import * as fs from "node:fs/promises";
+import * as os from "node:os";
+import * as path from "node:path";
+import { AuthCredentialStore, AuthStorage, type OAuthCredential } from "../src/auth-storage";
+import * as oauthUtils from "../src/utils/oauth";
+import type { OAuthCredentials } from "../src/utils/oauth/types";
+
+/**
+ * Tests for the concurrent OAuth refresh token rotation race condition.
+ *
+ * When multiple omp instances share the same SQLite credential DB but hold
+ * independent in-memory caches, refresh token rotation by one instance
+ * invalidates the token that other instances still hold. Without the fix,
+ * the stale instance permanently disables the credential on `invalid_grant`
+ * even though a valid refresh token exists in the DB.
+ */
+
+function readDisabledCauses(dbPath: string, provider: string): string[] {
+ const db = new Database(dbPath, { readonly: true });
+ try {
+ const rows = db
+ .prepare(
+ "SELECT disabled_cause FROM auth_credentials WHERE provider = ? AND disabled_cause IS NOT NULL ORDER BY id ASC",
+ )
+ .all(provider) as Array<{ disabled_cause?: string | null }>;
+ return rows.flatMap(row => (typeof row.disabled_cause === "string" ? [row.disabled_cause] : []));
+ } finally {
+ db.close();
+ }
+}
+
+function countActiveCredentials(dbPath: string, provider: string): number {
+ const db = new Database(dbPath, { readonly: true });
+ try {
+ const row = db
+ .prepare("SELECT COUNT(*) AS count FROM auth_credentials WHERE provider = ? AND disabled_cause IS NULL")
+ .get(provider) as { count?: number } | undefined;
+ return row?.count ?? 0;
+ } finally {
+ db.close();
+ }
+}
+
+const EXPIRED_TOKEN_EXPIRES = Date.now() - 60_000;
+const FRESH_TOKEN_EXPIRES = Date.now() + 3_600_000;
+
+function makeOAuthCredential(suffix: string, opts?: { expires?: number }): OAuthCredential {
+ return {
+ type: "oauth",
+ access: `access-${suffix}`,
+ refresh: `refresh-${suffix}`,
+ expires: opts?.expires ?? FRESH_TOKEN_EXPIRES,
+ accountId: "acct-shared",
+ email: "user@example.com",
+ };
+}
+
+describe("AuthStorage concurrent OAuth refresh token rotation race", () => {
+ let tempDir = "";
+ let dbPath = "";
+ let storeA: AuthCredentialStore | null = null;
+ let storeB: AuthCredentialStore | null = null;
+ let authStorageA: AuthStorage | null = null;
+ let authStorageB: AuthStorage | null = null;
+
+ beforeEach(async () => {
+ tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "pi-ai-auth-refresh-race-"));
+ dbPath = path.join(tempDir, "agent.db");
+
+ // Both stores point at the same SQLite DB — simulates two omp instances
+ storeA = await AuthCredentialStore.open(dbPath);
+ storeB = await AuthCredentialStore.open(dbPath);
+ });
+
+ afterEach(async () => {
+ vi.restoreAllMocks();
+ storeA?.close();
+ storeB?.close();
+ storeA = null;
+ storeB = null;
+ authStorageA = null;
+ authStorageB = null;
+ if (tempDir) {
+ await fs.rm(tempDir, { recursive: true, force: true });
+ tempDir = "";
+ }
+ });
+
+ test("instance B recovers from invalid_grant when instance A already refreshed", async () => {
+ if (!storeA || !storeB) throw new Error("test setup failed");
+
+ // Store an expired OAuth credential — both instances will need to refresh
+ const staleCredential = makeOAuthCredential("v1", { expires: EXPIRED_TOKEN_EXPIRES });
+ authStorageA = new AuthStorage(storeA);
+ await authStorageA.set("anthropic", [staleCredential]);
+
+ // Instance B loads same credential from DB into its own in-memory cache
+ authStorageB = new AuthStorage(storeB);
+ await authStorageB.reload();
+
+ // The refreshed credential that instance A will produce
+ const refreshedCredential: OAuthCredentials = {
+ access: "access-v2",
+ refresh: "refresh-v2",
+ expires: FRESH_TOKEN_EXPIRES,
+ accountId: "acct-shared",
+ email: "user@example.com",
+ };
+
+ // Mock both refresh paths:
+ // - refreshOAuthToken: called by pre-refresh in #resolveOAuthApiKey for expired tokens
+ // - getOAuthApiKey: called by #tryOAuthCredential for the actual token exchange
+ const refreshSpy = vi.spyOn(oauthUtils, "refreshOAuthToken");
+ const getApiKeySpy = vi.spyOn(oauthUtils, "getOAuthApiKey");
+
+ // Step 1: Instance A refreshes successfully
+ refreshSpy.mockImplementation(async (_provider, credential) => {
+ return { ...credential, ...refreshedCredential };
+ });
+ getApiKeySpy.mockImplementation(async (_provider, credentials) => {
+ const cred = credentials.anthropic as OAuthCredentials | undefined;
+ if (!cred) return null;
+ return { newCredentials: refreshedCredential, apiKey: "sk-ant-new-key" };
+ });
+
+ const keyA = await authStorageA.getApiKey("anthropic", "session-a");
+ expect(keyA).toBe("sk-ant-new-key");
+
+ // DB now has refreshed credential from instance A.
+ // Instance B still has stale refresh-v1 in memory.
+
+ // Step 2: Instance B tries to refresh with stale token.
+ // The pre-refresh (refreshOAuthToken) runs first, then #tryOAuthCredential calls getOAuthApiKey.
+ // Both throw invalid_grant for stale tokens. After DB re-read, retry with fresh token succeeds.
+ let getApiKeyCallCount = 0;
+ refreshSpy.mockImplementation(async (_provider, credential) => {
+ if (credential.refresh === "refresh-v1") {
+ throw new Error("invalid_grant: Refresh token not found or invalid");
+ }
+ return { ...credential, ...refreshedCredential };
+ });
+ getApiKeySpy.mockImplementation(async (_provider, credentials) => {
+ const cred = credentials.anthropic as OAuthCredentials | undefined;
+ if (!cred) return null;
+ getApiKeyCallCount++;
+
+ if (cred.refresh === "refresh-v1") {
+ // Stale token — Anthropic would return invalid_grant
+ throw new Error("invalid_grant: Refresh token not found or invalid");
+ }
+ if (cred.refresh === "refresh-v2") {
+ // Fresh token from instance A — success
+ return { newCredentials: refreshedCredential, apiKey: "sk-ant-new-key-b" };
+ }
+ return null;
+ });
+
+ const keyB = await authStorageB.getApiKey("anthropic", "session-b");
+
+ // The credential must NOT be disabled — instance B should have recovered
+ expect(keyB).toBeDefined();
+ expect(typeof keyB).toBe("string");
+
+ // DB should still have exactly 1 active credential, none disabled
+ expect(countActiveCredentials(dbPath, "anthropic")).toBe(1);
+ expect(readDisabledCauses(dbPath, "anthropic")).toEqual([]);
+ // The getOAuthApiKey mock must have been called at least twice: once with stale, once with fresh
+ expect(getApiKeyCallCount).toBeGreaterThanOrEqual(2);
+ });
+
+ test("credential is still disabled when DB has same stale token (genuine failure)", async () => {
+ if (!storeA || !storeB) throw new Error("test setup failed");
+
+ // Store an expired credential — only one instance, no concurrent refresh
+ const staleCredential = makeOAuthCredential("v1", { expires: EXPIRED_TOKEN_EXPIRES });
+ authStorageA = new AuthStorage(storeA);
+ await authStorageA.set("anthropic", [staleCredential]);
+
+ // Mock: always fail with invalid_grant (genuinely revoked token)
+ vi.spyOn(oauthUtils, "refreshOAuthToken").mockImplementation(async () => {
+ throw new Error("invalid_grant: Refresh token not found or invalid");
+ });
+ vi.spyOn(oauthUtils, "getOAuthApiKey").mockImplementation(async () => {
+ throw new Error("invalid_grant: Refresh token not found or invalid");
+ });
+
+ const key = await authStorageA.getApiKey("anthropic", "session-a");
+
+ // Should return undefined — no valid credentials
+ expect(key).toBeUndefined();
+
+ // Credential should be disabled in DB
+ const causes = readDisabledCauses(dbPath, "anthropic");
+ expect(causes.length).toBe(1);
+ expect(causes[0]).toContain("invalid_grant");
+ });
+
+ test("terminates when DB token matches stale token (no concurrent refresh)", async () => {
+ if (!storeA || !storeB) throw new Error("test setup failed");
+
+ // Both instances share the same stale credential — nobody refreshed
+ const staleCredential = makeOAuthCredential("v1", { expires: EXPIRED_TOKEN_EXPIRES });
+ authStorageA = new AuthStorage(storeA);
+ await authStorageA.set("anthropic", [staleCredential]);
+
+ // Instance B loads same stale credential
+ authStorageB = new AuthStorage(storeB);
+ await authStorageB.reload();
+
+ // Mock: always fail — the token is genuinely revoked, nobody refreshed
+ vi.spyOn(oauthUtils, "refreshOAuthToken").mockImplementation(async () => {
+ throw new Error("invalid_grant: Refresh token not found or invalid");
+ });
+ vi.spyOn(oauthUtils, "getOAuthApiKey").mockImplementation(async () => {
+ throw new Error("invalid_grant: Refresh token not found or invalid");
+ });
+
+ // Instance B fails. DB has the same token (nobody refreshed), so the
+ // fix correctly falls through to disable instead of retrying forever.
+ const keyB = await authStorageB.getApiKey("anthropic", "session-b");
+ expect(keyB).toBeUndefined();
+
+ // Credential should be disabled — the fix did not prevent a genuine failure
+ const causes = readDisabledCauses(dbPath, "anthropic");
+ expect(causes.length).toBe(1);
+ expect(causes[0]).toContain("invalid_grant");
+ });
+});