diff --git a/home-manager/no-gui.nix b/home-manager/no-gui.nix index 3805bea..a7211e8 100644 --- a/home-manager/no-gui.nix +++ b/home-manager/no-gui.nix @@ -119,7 +119,7 @@ in imports = [ ./progs/fish.nix ./progs/helix.nix - ./progs/opencode.nix + ./progs/pi.nix ( { ... }: { diff --git a/home-manager/progs/opencode.nix b/home-manager/progs/opencode.nix deleted file mode 100644 index 1a10479..0000000 --- a/home-manager/progs/opencode.nix +++ /dev/null @@ -1,240 +0,0 @@ -{ - config, - lib, - pkgs, - inputs, - ... -}: -let - # what model should be used in place of haiku? - haiku-model = "anthropic/claude-haiku-4-5"; - - opus-model = "anthropic/claude-opus-4-6"; - - opencode-claude-bridge = pkgs.buildNpmPackage { - pname = "opencode-claude-bridge"; - version = "1.8.0"; - src = inputs.opencode-claude-bridge; - npmDepsHash = "sha256-jH/UweuHqfeLxICxNRsBODWOBfVdE+ZgIinfW/ITSSc="; - buildPhase = '' - runHook preBuild - npx tsc - runHook postBuild - ''; - # the plugin entry point is dist/index.js - installPhase = '' - runHook preInstall - mkdir -p $out/lib/opencode-claude-bridge - cp -r dist $out/lib/opencode-claude-bridge/ - runHook postInstall - ''; - }; - - ohMyOpencodeConfig = { - "$schema" = - "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json"; - git_master = { - commit_footer = false; - include_co_authored_by = false; - }; - agents = { - sisyphus.model = opus-model; - sisyphus-junior.model = opus-model; - oracle.model = opus-model; - librarian.model = haiku-model; - explore.model = haiku-model; - multimodal-looker.model = "anthropic/claude-opus-4-6"; - - prometheus.model = opus-model; - metis.model = opus-model; - momus.model = opus-model; - atlas.model = opus-model; - }; - categories = { - visual-engineering.model = "openrouter/google/gemini-3-pro"; - ultrabrain.model = opus-model; - artistry = { - model = "openrouter/google/gemini-3-pro"; - variant = "max"; - }; - quick.model = haiku-model; - deep.model = opus-model; - writing.model = "openrouter/google/gemini-3-flash-preview"; - }; - }; - oh-my-opencode-pkg = inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.oh-my-opencode; -in -{ - home.packages = [ - oh-my-opencode-pkg - pkgs.playwright-driver.browsers - inputs.claude-code.packages.${pkgs.stdenv.hostPlatform.system}.claude-code - ]; - - home.sessionVariables = { - PLAYWRIGHT_BROWSERS_PATH = "${pkgs.playwright-driver.browsers}"; - PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD = "1"; - }; - - xdg.configFile."opencode/oh-my-opencode.json".text = builtins.toJSON ohMyOpencodeConfig; - - # Plugins are placed directly in the plugins directory so opencode - # auto-discovers them instead of downloading them from npm at runtime. - xdg.configFile."opencode/plugins/oh-my-opencode.js".source = - "${oh-my-opencode-pkg}/lib/oh-my-opencode/dist/index.js"; - - xdg.configFile."opencode/plugins/opencode-claude-bridge.js".source = - "${opencode-claude-bridge}/lib/opencode-claude-bridge/dist/index.js"; - - xdg.configFile."opencode/skills/android-ui.md".text = '' - --- - name: android-ui - description: "Android UI automation via ADB - use for any Android device interaction, UI testing, screenshot analysis, element coordinate lookup, and gesture automation." - --- - - # Android UI Interaction Workflow - - ## 1. Taking Screenshots - ``` - adb exec-out screencap -p > /tmp/screen.png - ``` - Captures the current screen state as a PNG image. - - ## 2. Analyzing Screenshots - Delegate screenshot analysis to an explore agent rather than analyzing images directly: - ``` - mcp_task(subagent_type="explore", prompt="Analyze /tmp/screen.png. What screen is this? What elements are visible?") - ``` - The agent describes the UI, identifies elements, and estimates Y coordinates. - - ## 3. Getting Precise Element Coordinates - UI Automator dump - extracts the full UI hierarchy as XML: - ``` - adb shell uiautomator dump /sdcard/ui.xml && adb pull /sdcard/ui.xml /tmp/ui.xml - ``` - Then grep for specific elements: - ```sh - # Find by text - grep -oP 'text="Login".*?bounds="[^"]*"' /tmp/ui.xml - # Find by class - grep -oP 'class="android.widget.EditText".*?bounds="[^"]*"' /tmp/ui.xml - ``` - Bounds format: `[left,top][right,bottom]` — tap center: `((left+right)/2, (top+bottom)/2)` - - ## 4. Tapping Elements - ``` - adb shell input tap X Y - ``` - Where X, Y are pixel coordinates from the bounds. - - ## 5. Text Input - ``` - adb shell input text "some_text" - ``` - Note: Special characters need escaping (`\!`, `\;`, etc.) - - ## 6. Other Gestures - ```sh - # Swipe/scroll - adb shell input swipe startX startY endX endY duration_ms - # Key events - adb shell input keyevent KEYCODE_BACK - adb shell input keyevent KEYCODE_ENTER - ``` - - ## 7. WebView Limitation - - UI Automator can see WebView content if accessibility is enabled - - Touch events on iframe content (like Cloudflare Turnstile) often fail due to cross-origin isolation - - Form fields in WebViews work if you get exact bounds from the UI dump - - ## Typical Flow - 1. Take screenshot → analyze with explore agent (get rough layout) - 2. Dump UI hierarchy → grep for exact element bounds - - NEVER ASSUME COORDINATES. You must ALWAYS check first. - - Do this before ANY tap action as elements on the screen may have changed. - 3. Calculate center coordinates from bounds - 4. Tap/interact - 5. Wait → screenshot → verify result - ''; - - xdg.configFile."opencode/skills/playwright.md".text = - let - browsers = pkgs.playwright-driver.browsers; - chromiumDir = builtins.head ( - builtins.filter (n: builtins.match "chromium-[0-9]+" n != null) ( - builtins.attrNames browsers.passthru.entries - ) - ); - chromiumPath = "${browsers}/${chromiumDir}/chrome-linux64/chrome"; - in - '' - --- - name: playwright - description: "MUST USE for any browser-related tasks. Browser automation via Playwright MCP - verification, browsing, information gathering, web scraping, testing, screenshots, and all browser interactions." - mcp: - playwright: - command: npx - args: - - "@playwright/mcp@latest" - - "--executable-path" - - "${chromiumPath}" - - "--user-data-dir" - - "${config.home.homeDirectory}/.cache/playwright-mcp" - --- - - # Playwright Browser Automation - - This skill provides browser automation capabilities via the Playwright MCP server. - ''; - - programs.opencode = { - package = inputs.llm-agents.packages.${pkgs.stdenv.targetPlatform.system}.opencode; - enable = true; - rules = '' - You are an intelligent and observant agent. - If instructed to commit, disable gpg signing. - You are on nixOS, if you don't have access to a tool, you can access it via the `nix-shell` command. - - ## Think deeply about everything. - When given a problem, break it down, abstract it out, understand the fundamentals, then solve it in the real world. - - ## Misc - For long-running commands, make sure you set the timeout of the Bash tool provided to a larger value. - Do NOT read secret files. Do not directly read files that are api keys or are contextually sensitive. - Do NOT run `skill_mcp [mcp_name=playwright, tool_name=browser_install]` as browsers are provided by NixOS via PLAYWRIGHT_BROWSERS_PATH. - - ## Behavior - Do not be sycophantic in your responses. - Do not use emojis unless explicitly asked to. This includes in code. - Use Test Driven Development methodology. - - ## Nix - For using `nix build` append `-L` to get better visibility into the logs. - If you get an error that a file can't be found, always try to `git add` the file before trying other troubleshooting steps. - ''; - settings = { - theme = "opencode"; - - model = opus-model; - # small model used for titles - small_model = "openrouter/openai/gpt-oss-20b:free"; - - autoshare = false; - autoupdate = false; - agent = { }; - plugin = [ ]; - provider = { - openrouter = { - models = { - "openai/gpt-oss-20b:free" = { }; - "qwen/qwen3-vl-30b-a3b-thinking" = { }; - }; - options = { - # TODO! use agenix here instead - apiKey = "{file:${../secrets/openrouter_api_key}}"; - }; - }; - }; - }; - }; -} diff --git a/home-manager/progs/pi.nix b/home-manager/progs/pi.nix new file mode 100644 index 0000000..9b529b8 --- /dev/null +++ b/home-manager/progs/pi.nix @@ -0,0 +1,169 @@ +{ + config, + lib, + pkgs, + inputs, + ... +}: +let + # sisyphus/oracle/prometheus → default/slow/plan = opus + # librarian/explore/quick → smol/commit = haiku + ompSettings = { + modelRoles = { + default = "claude-opus-4-6:high"; + smol = "claude-haiku-4-5:low"; + slow = "claude-opus-4-6:xhigh"; + plan = "claude-opus-4-6:high"; + commit = "claude-haiku-4-5:low"; + }; + }; + + # provider config — openrouter API key read from secrets at runtime + ompModels = { + providers = { + openrouter = { + apiKey = "!cat ${../secrets/openrouter_api_key}"; + }; + }; + }; +in +{ + home.packages = [ + inputs.llm-agents.packages.${pkgs.stdenv.hostPlatform.system}.omp + ]; + + # main settings: ~/.omp/agent/config.yml (JSON is valid YAML) + home.file.".omp/agent/config.yml".text = builtins.toJSON ompSettings; + + # model/provider config: ~/.omp/agent/models.yml + home.file.".omp/agent/models.yml".text = builtins.toJSON ompModels; + + # global instructions loaded at startup + home.file.".omp/agent/AGENTS.md".text = '' + You are an intelligent and observant agent. + If instructed to commit, disable gpg signing. + You are on nixOS, if you don't have access to a tool, you can access it via the `nix-shell` command. + + ## Think deeply about everything. + When given a problem, break it down, abstract it out, understand the fundamentals, then solve it in the real world. + + ## Misc + For long-running commands, make sure you set the timeout of the Bash tool provided to a larger value. + Do NOT read secret files. Do not directly read files that are api keys or are contextually sensitive. + + ## Behavior + Do not be sycophantic in your responses. + Do not use emojis unless explicitly asked to. This includes in code. + Use Test Driven Development methodology. + + ## Nix + For using `nix build` append `-L` to get better visibility into the logs. + If you get an error that a file can't be found, always try to `git add` the file before trying other troubleshooting steps. + ''; + + home.file.".omp/agent/skills/android-ui/SKILL.md".text = '' + --- + name: android-ui + description: Android UI automation via ADB. Use for any Android device interaction, UI testing, screenshot analysis, element coordinate lookup, and gesture automation. + --- + + # Android UI + + ## 1. Taking Screenshots + ``` + adb exec-out screencap -p > /tmp/screen.png + ``` + Captures the current screen state as a PNG image. + + ## 2. Analyzing Screenshots + Read the screenshot file to understand the current screen state and identify UI elements. + + ## 3. Getting Precise Element Coordinates + UI Automator dump - extracts the full UI hierarchy as XML: + ``` + adb shell uiautomator dump /sdcard/ui.xml && adb pull /sdcard/ui.xml /tmp/ui.xml + ``` + Then grep for specific elements: + ```sh + # Find by text + grep -oP 'text="Login".*?bounds="[^"]*"' /tmp/ui.xml + # Find by class + grep -oP 'class="android.widget.EditText".*?bounds="[^"]*"' /tmp/ui.xml + ``` + Bounds format: `[left,top][right,bottom]` — tap center: `((left+right)/2, (top+bottom)/2)` + + ## 4. Tapping Elements + ``` + adb shell input tap X Y + ``` + Where X, Y are pixel coordinates from the bounds. + + ## 5. Text Input + ``` + adb shell input text "some_text" + ``` + Note: Special characters need escaping (`\!`, `\;`, etc.) + + ## 6. Other Gestures + ```sh + # Swipe/scroll + adb shell input swipe startX startY endX endY duration_ms + # Key events + adb shell input keyevent KEYCODE_BACK + adb shell input keyevent KEYCODE_ENTER + ``` + + ## 7. WebView Limitation + - UI Automator can see WebView content if accessibility is enabled + - Touch events on iframe content (like Cloudflare Turnstile) often fail due to cross-origin isolation + - Form fields in WebViews work if you get exact bounds from the UI dump + + ## Typical Flow + 1. Take screenshot → analyze it (get rough layout) + 2. Dump UI hierarchy → grep for exact element bounds + - NEVER ASSUME COORDINATES. You must ALWAYS check first. + - Do this before ANY tap action as elements on the screen may have changed. + 3. Calculate center coordinates from bounds + 4. Tap/interact + 5. Wait → screenshot → verify result + ''; + + # omp has a built-in browser tool with NixOS auto-detection, + # but this skill provides playwright MCP as a supplementary option + home.file.".omp/agent/skills/playwright/SKILL.md".text = + let + browsers = pkgs.playwright-driver.browsers; + chromiumDir = builtins.head ( + builtins.filter (n: builtins.match "chromium-[0-9]+" n != null) ( + builtins.attrNames browsers.passthru.entries + ) + ); + chromiumPath = "${browsers}/${chromiumDir}/chrome-linux64/chrome"; + in + '' + --- + name: playwright + description: Browser automation via Playwright MCP. Use as an alternative to the built-in browser tool for Playwright-specific workflows, testing, and web scraping. Chromium is provided by NixOS. + --- + + # Playwright + + ## Browser Setup + Chromium is provided by NixOS. Do NOT attempt to download browsers. + + - Chromium path: `${chromiumPath}` + - Browsers path: `${browsers}` + + ## Usage + Launch the Playwright MCP server for browser automation: + ```bash + npx @playwright/mcp@latest --executable-path "${chromiumPath}" --user-data-dir "${config.home.homeDirectory}/.cache/playwright-mcp" + ``` + + Set these environment variables if not already set: + ```bash + export PLAYWRIGHT_BROWSERS_PATH="${browsers}" + export PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 + ``` + ''; +}