commit 02b80dd74e2f962fffc9b0076bb7966eea4e45ff Author: Simon Gardling Date: Wed Apr 8 13:05:09 2026 -0400 Fix key reauth with parallel sessions diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index 3f50b7bd4..248dacbff 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -5,6 +5,10 @@ - Removed `coerceNullStrings` function and its automatic null-string coercion behavior from JSON parsing +### Fixed + +- Fixed concurrent OAuth refresh token rotation race: when multiple instances share the same credential DB, one instance refreshing a token no longer causes other instances to permanently disable the credential on `invalid_grant` ([#607](https://github.com/can1357/oh-my-pi/issues/607)) + ## [13.19.0] - 2026-04-05 ### Fixed diff --git a/packages/ai/src/auth-storage.ts b/packages/ai/src/auth-storage.ts index 9fdb4473b..fc81a6f3b 100644 --- a/packages/ai/src/auth-storage.ts +++ b/packages/ai/src/auth-storage.ts @@ -1865,7 +1865,35 @@ export class AuthStorage { }); if (isDefinitiveFailure) { - // Permanently disable invalid credentials with an explicit cause for inspection/debugging + // Before permanently disabling, check if another instance already refreshed + // the token in the shared DB. Concurrent instances hold stale in-memory + // copies; refresh token rotation by one instance invalidates the token + // that other instances still hold. Re-read from DB before giving up. + if (/invalid_grant/i.test(errorMsg)) { + const entries = this.#getStoredCredentials(provider); + const entry = entries[selection.index]; + if (entry) { + const dbCredentials = this.#store.listAuthCredentials(provider); + const dbEntry = dbCredentials.find(row => row.id === entry.id); + if ( + dbEntry?.credential.type === "oauth" && + dbEntry.credential.refresh !== selection.credential.refresh + ) { + // DB has a newer refresh token — another instance refreshed. + // Update in-memory state and retry with the fresh credential. + logger.warn("Concurrent refresh detected, syncing from DB and retrying", { + provider, + index: selection.index, + rowId: entry.id, + }); + const updated = [...entries]; + updated[selection.index] = { id: entry.id, credential: dbEntry.credential }; + this.#setStoredCredentials(provider, updated); + return this.getApiKey(provider, sessionId, options); + } + } + } + // Genuinely invalid — disable the credential this.#disableCredentialAt(provider, selection.index, `oauth refresh failed: ${errorMsg}`); if (this.#getCredentialsForProvider(provider).some(credential => credential.type === "oauth")) { return this.getApiKey(provider, sessionId, options); diff --git a/packages/ai/test/auth-storage-refresh-race.test.ts b/packages/ai/test/auth-storage-refresh-race.test.ts new file mode 100644 index 000000000..5a8098a18 --- /dev/null +++ b/packages/ai/test/auth-storage-refresh-race.test.ts @@ -0,0 +1,230 @@ +import { Database } from "bun:sqlite"; +import { afterEach, beforeEach, describe, expect, test, vi } from "bun:test"; +import * as fs from "node:fs/promises"; +import * as os from "node:os"; +import * as path from "node:path"; +import { AuthCredentialStore, AuthStorage, type OAuthCredential } from "../src/auth-storage"; +import * as oauthUtils from "../src/utils/oauth"; +import type { OAuthCredentials } from "../src/utils/oauth/types"; + +/** + * Tests for the concurrent OAuth refresh token rotation race condition. + * + * When multiple omp instances share the same SQLite credential DB but hold + * independent in-memory caches, refresh token rotation by one instance + * invalidates the token that other instances still hold. Without the fix, + * the stale instance permanently disables the credential on `invalid_grant` + * even though a valid refresh token exists in the DB. + */ + +function readDisabledCauses(dbPath: string, provider: string): string[] { + const db = new Database(dbPath, { readonly: true }); + try { + const rows = db + .prepare( + "SELECT disabled_cause FROM auth_credentials WHERE provider = ? AND disabled_cause IS NOT NULL ORDER BY id ASC", + ) + .all(provider) as Array<{ disabled_cause?: string | null }>; + return rows.flatMap(row => (typeof row.disabled_cause === "string" ? [row.disabled_cause] : [])); + } finally { + db.close(); + } +} + +function countActiveCredentials(dbPath: string, provider: string): number { + const db = new Database(dbPath, { readonly: true }); + try { + const row = db + .prepare("SELECT COUNT(*) AS count FROM auth_credentials WHERE provider = ? AND disabled_cause IS NULL") + .get(provider) as { count?: number } | undefined; + return row?.count ?? 0; + } finally { + db.close(); + } +} + +const EXPIRED_TOKEN_EXPIRES = Date.now() - 60_000; +const FRESH_TOKEN_EXPIRES = Date.now() + 3_600_000; + +function makeOAuthCredential(suffix: string, opts?: { expires?: number }): OAuthCredential { + return { + type: "oauth", + access: `access-${suffix}`, + refresh: `refresh-${suffix}`, + expires: opts?.expires ?? FRESH_TOKEN_EXPIRES, + accountId: "acct-shared", + email: "user@example.com", + }; +} + +describe("AuthStorage concurrent OAuth refresh token rotation race", () => { + let tempDir = ""; + let dbPath = ""; + let storeA: AuthCredentialStore | null = null; + let storeB: AuthCredentialStore | null = null; + let authStorageA: AuthStorage | null = null; + let authStorageB: AuthStorage | null = null; + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "pi-ai-auth-refresh-race-")); + dbPath = path.join(tempDir, "agent.db"); + + // Both stores point at the same SQLite DB — simulates two omp instances + storeA = await AuthCredentialStore.open(dbPath); + storeB = await AuthCredentialStore.open(dbPath); + }); + + afterEach(async () => { + vi.restoreAllMocks(); + storeA?.close(); + storeB?.close(); + storeA = null; + storeB = null; + authStorageA = null; + authStorageB = null; + if (tempDir) { + await fs.rm(tempDir, { recursive: true, force: true }); + tempDir = ""; + } + }); + + test("instance B recovers from invalid_grant when instance A already refreshed", async () => { + if (!storeA || !storeB) throw new Error("test setup failed"); + + // Store an expired OAuth credential — both instances will need to refresh + const staleCredential = makeOAuthCredential("v1", { expires: EXPIRED_TOKEN_EXPIRES }); + authStorageA = new AuthStorage(storeA); + await authStorageA.set("anthropic", [staleCredential]); + + // Instance B loads same credential from DB into its own in-memory cache + authStorageB = new AuthStorage(storeB); + await authStorageB.reload(); + + // The refreshed credential that instance A will produce + const refreshedCredential: OAuthCredentials = { + access: "access-v2", + refresh: "refresh-v2", + expires: FRESH_TOKEN_EXPIRES, + accountId: "acct-shared", + email: "user@example.com", + }; + + // Mock both refresh paths: + // - refreshOAuthToken: called by pre-refresh in #resolveOAuthApiKey for expired tokens + // - getOAuthApiKey: called by #tryOAuthCredential for the actual token exchange + const refreshSpy = vi.spyOn(oauthUtils, "refreshOAuthToken"); + const getApiKeySpy = vi.spyOn(oauthUtils, "getOAuthApiKey"); + + // Step 1: Instance A refreshes successfully + refreshSpy.mockImplementation(async (_provider, credential) => { + return { ...credential, ...refreshedCredential }; + }); + getApiKeySpy.mockImplementation(async (_provider, credentials) => { + const cred = credentials.anthropic as OAuthCredentials | undefined; + if (!cred) return null; + return { newCredentials: refreshedCredential, apiKey: "sk-ant-new-key" }; + }); + + const keyA = await authStorageA.getApiKey("anthropic", "session-a"); + expect(keyA).toBe("sk-ant-new-key"); + + // DB now has refreshed credential from instance A. + // Instance B still has stale refresh-v1 in memory. + + // Step 2: Instance B tries to refresh with stale token. + // The pre-refresh (refreshOAuthToken) runs first, then #tryOAuthCredential calls getOAuthApiKey. + // Both throw invalid_grant for stale tokens. After DB re-read, retry with fresh token succeeds. + let getApiKeyCallCount = 0; + refreshSpy.mockImplementation(async (_provider, credential) => { + if (credential.refresh === "refresh-v1") { + throw new Error("invalid_grant: Refresh token not found or invalid"); + } + return { ...credential, ...refreshedCredential }; + }); + getApiKeySpy.mockImplementation(async (_provider, credentials) => { + const cred = credentials.anthropic as OAuthCredentials | undefined; + if (!cred) return null; + getApiKeyCallCount++; + + if (cred.refresh === "refresh-v1") { + // Stale token — Anthropic would return invalid_grant + throw new Error("invalid_grant: Refresh token not found or invalid"); + } + if (cred.refresh === "refresh-v2") { + // Fresh token from instance A — success + return { newCredentials: refreshedCredential, apiKey: "sk-ant-new-key-b" }; + } + return null; + }); + + const keyB = await authStorageB.getApiKey("anthropic", "session-b"); + + // The credential must NOT be disabled — instance B should have recovered + expect(keyB).toBeDefined(); + expect(typeof keyB).toBe("string"); + + // DB should still have exactly 1 active credential, none disabled + expect(countActiveCredentials(dbPath, "anthropic")).toBe(1); + expect(readDisabledCauses(dbPath, "anthropic")).toEqual([]); + // The getOAuthApiKey mock must have been called at least twice: once with stale, once with fresh + expect(getApiKeyCallCount).toBeGreaterThanOrEqual(2); + }); + + test("credential is still disabled when DB has same stale token (genuine failure)", async () => { + if (!storeA || !storeB) throw new Error("test setup failed"); + + // Store an expired credential — only one instance, no concurrent refresh + const staleCredential = makeOAuthCredential("v1", { expires: EXPIRED_TOKEN_EXPIRES }); + authStorageA = new AuthStorage(storeA); + await authStorageA.set("anthropic", [staleCredential]); + + // Mock: always fail with invalid_grant (genuinely revoked token) + vi.spyOn(oauthUtils, "refreshOAuthToken").mockImplementation(async () => { + throw new Error("invalid_grant: Refresh token not found or invalid"); + }); + vi.spyOn(oauthUtils, "getOAuthApiKey").mockImplementation(async () => { + throw new Error("invalid_grant: Refresh token not found or invalid"); + }); + + const key = await authStorageA.getApiKey("anthropic", "session-a"); + + // Should return undefined — no valid credentials + expect(key).toBeUndefined(); + + // Credential should be disabled in DB + const causes = readDisabledCauses(dbPath, "anthropic"); + expect(causes.length).toBe(1); + expect(causes[0]).toContain("invalid_grant"); + }); + + test("terminates when DB token matches stale token (no concurrent refresh)", async () => { + if (!storeA || !storeB) throw new Error("test setup failed"); + + // Both instances share the same stale credential — nobody refreshed + const staleCredential = makeOAuthCredential("v1", { expires: EXPIRED_TOKEN_EXPIRES }); + authStorageA = new AuthStorage(storeA); + await authStorageA.set("anthropic", [staleCredential]); + + // Instance B loads same stale credential + authStorageB = new AuthStorage(storeB); + await authStorageB.reload(); + + // Mock: always fail — the token is genuinely revoked, nobody refreshed + vi.spyOn(oauthUtils, "refreshOAuthToken").mockImplementation(async () => { + throw new Error("invalid_grant: Refresh token not found or invalid"); + }); + vi.spyOn(oauthUtils, "getOAuthApiKey").mockImplementation(async () => { + throw new Error("invalid_grant: Refresh token not found or invalid"); + }); + + // Instance B fails. DB has the same token (nobody refreshed), so the + // fix correctly falls through to disable instead of retrying forever. + const keyB = await authStorageB.getApiKey("anthropic", "session-b"); + expect(keyB).toBeUndefined(); + + // Credential should be disabled — the fix did not prevent a genuine failure + const causes = readDisabledCauses(dbPath, "anthropic"); + expect(causes.length).toBe(1); + expect(causes[0]).toContain("invalid_grant"); + }); +});