seed-skills/agent-browser/SKILL.md
Fast Rust-based headless browser automation CLI with Node.js fallback for AI agents, featuring navigation, clicking, typing, snapshots, and structured commands optimized for agent workflows.
npx skillsauth add PramodDutta/qaskills Agent Browser AutomationInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
You are an expert in browser automation using agent-browser, a fast Rust-based CLI tool designed specifically for AI coding agents. When the user asks you to automate browser interactions, perform web scraping, or test web applications, follow these detailed instructions.
# Install globally via npm
npm install -g agent-browser
# Or use npx without installation
npx agent-browser --version
# Verify installation
agent-browser --help
tests/
browser-automation/
scenarios/
login-flow.json
checkout-flow.json
search-navigation.json
snapshots/
login-success.png
cart-state.png
scripts/
run-scenario.sh
batch-test.sh
config/
browser-config.json
viewport-sizes.json
# Navigate to URL
agent-browser navigate --url "https://example.com"
# Navigate with custom viewport
agent-browser navigate --url "https://example.com" --viewport "1920x1080"
# Navigate with custom user agent
agent-browser navigate --url "https://example.com" \
--user-agent "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X)"
# Navigate and wait for network idle
agent-browser navigate --url "https://example.com" --wait-until "networkidle"
Output:
{
"success": true,
"url": "https://example.com",
"title": "Example Domain",
"loadTime": 234,
"timestamp": "2024-02-12T10:30:00Z"
}
# Click by selector
agent-browser click --selector "button#submit"
# Click by text content
agent-browser click --text "Sign In"
# Click with coordinate offset
agent-browser click --selector ".dropdown" --offset "10,20"
# Wait for element before clicking
agent-browser click --selector "button.load-more" --wait 5000
Output:
{
"success": true,
"element": "button#submit",
"action": "click",
"timestamp": "2024-02-12T10:30:01Z"
}
# Type into input field
agent-browser type --selector "input#email" --text "[email protected]"
# Type with delay between keystrokes (human-like)
agent-browser type --selector "input#search" --text "automation" --delay 50
# Type and press Enter
agent-browser type --selector "input#search" --text "query" --enter
# Clear field before typing
agent-browser type --selector "input#username" --text "newuser" --clear
Output:
{
"success": true,
"element": "input#email",
"action": "type",
"length": 17,
"timestamp": "2024-02-12T10:30:02Z"
}
# Full page screenshot
agent-browser snapshot --output "screenshot.png"
# Snapshot specific element
agent-browser snapshot --selector "#content" --output "content.png"
# Snapshot with custom dimensions
agent-browser snapshot --viewport "1920x1080" --output "desktop.png"
# Get page HTML
agent-browser snapshot --format "html" --output "page.html"
# Get page as PDF
agent-browser snapshot --format "pdf" --output "document.pdf"
Output:
{
"success": true,
"format": "png",
"path": "screenshot.png",
"size": 245678,
"dimensions": {
"width": 1280,
"height": 720
},
"timestamp": "2024-02-12T10:30:03Z"
}
# Extract text from element
agent-browser extract --selector "h1.title" --attribute "text"
# Extract attribute value
agent-browser extract --selector "a.link" --attribute "href"
# Extract multiple elements
agent-browser extract --selector "li.item" --all
# Extract as JSON
agent-browser extract --selector ".product-card" --json \
--fields "title:.title,price:.price,image:img@src"
Output:
{
"success": true,
"selector": "h1.title",
"results": [
{
"text": "Welcome to Our Site",
"visible": true
}
],
"count": 1,
"timestamp": "2024-02-12T10:30:04Z"
}
# Wait for element to appear
agent-browser wait --selector ".loading-complete" --timeout 10000
# Wait for element to disappear
agent-browser wait --selector ".spinner" --hidden --timeout 5000
# Wait for text to appear
agent-browser wait --text "Success" --timeout 3000
# Wait for custom condition
agent-browser wait --script "document.readyState === 'complete'"
Output:
{
"success": true,
"condition": "element_visible",
"selector": ".loading-complete",
"waited": 1234,
"timestamp": "2024-02-12T10:30:05Z"
}
#!/bin/bash
# login-test.sh
# Navigate to login page
agent-browser navigate --url "https://app.example.com/login"
# Fill email
agent-browser type --selector "input#email" --text "[email protected]"
# Fill password
agent-browser type --selector "input#password" --text "$PASSWORD"
# Click submit
agent-browser click --selector "button[type='submit']"
# Wait for dashboard
agent-browser wait --selector ".dashboard" --timeout 5000
# Take success snapshot
agent-browser snapshot --output "login-success.png"
# Extract user info
agent-browser extract --selector ".user-name" --attribute "text"
#!/bin/bash
# checkout-flow.sh
set -e # Exit on any error
echo "Starting checkout flow test..."
# 1. Navigate to product page
agent-browser navigate --url "https://shop.example.com/products/widget-123"
agent-browser wait --selector ".product-details" --timeout 5000
# 2. Add to cart
agent-browser click --selector "button.add-to-cart"
agent-browser wait --text "Added to cart" --timeout 3000
# 3. Open cart
agent-browser click --selector ".cart-icon"
agent-browser wait --selector ".cart-items" --timeout 2000
# 4. Take cart snapshot
agent-browser snapshot --selector ".cart-summary" --output "cart-state.png"
# 5. Proceed to checkout
agent-browser click --text "Proceed to Checkout"
agent-browser wait --selector ".checkout-form" --timeout 5000
# 6. Fill shipping info
agent-browser type --selector "#shipping-name" --text "John Doe"
agent-browser type --selector "#shipping-address" --text "123 Main St"
agent-browser type --selector "#shipping-city" --text "San Francisco"
# 7. Take final snapshot
agent-browser snapshot --output "checkout-complete.png"
echo "Checkout flow completed successfully"
import { execSync } from 'child_process';
interface BrowserResult {
success: boolean;
[key: string]: any;
}
class AgentBrowser {
private baseCommand = 'agent-browser';
async navigate(url: string, options: { viewport?: string; waitUntil?: string } = {}): Promise<BrowserResult> {
const args = [`navigate`, `--url`, `"${url}"`];
if (options.viewport) args.push(`--viewport`, options.viewport);
if (options.waitUntil) args.push(`--wait-until`, options.waitUntil);
return this.exec(args);
}
async click(selector: string, options: { wait?: number; offset?: string } = {}): Promise<BrowserResult> {
const args = [`click`, `--selector`, `"${selector}"`];
if (options.wait) args.push(`--wait`, options.wait.toString());
if (options.offset) args.push(`--offset`, options.offset);
return this.exec(args);
}
async type(selector: string, text: string, options: { delay?: number; enter?: boolean; clear?: boolean } = {}): Promise<BrowserResult> {
const args = [`type`, `--selector`, `"${selector}"`, `--text`, `"${text}"`];
if (options.delay) args.push(`--delay`, options.delay.toString());
if (options.enter) args.push(`--enter`);
if (options.clear) args.push(`--clear`);
return this.exec(args);
}
async snapshot(output: string, options: { format?: string; selector?: string; viewport?: string } = {}): Promise<BrowserResult> {
const args = [`snapshot`, `--output`, `"${output}"`];
if (options.format) args.push(`--format`, options.format);
if (options.selector) args.push(`--selector`, `"${options.selector}"`);
if (options.viewport) args.push(`--viewport`, options.viewport);
return this.exec(args);
}
async extract(selector: string, options: { attribute?: string; all?: boolean; json?: boolean } = {}): Promise<BrowserResult> {
const args = [`extract`, `--selector`, `"${selector}"`];
if (options.attribute) args.push(`--attribute`, options.attribute);
if (options.all) args.push(`--all`);
if (options.json) args.push(`--json`);
return this.exec(args);
}
private exec(args: string[]): BrowserResult {
const command = `${this.baseCommand} ${args.join(' ')}`;
try {
const output = execSync(command, { encoding: 'utf-8' });
return JSON.parse(output);
} catch (error: any) {
return {
success: false,
error: error.message,
command,
};
}
}
}
// Usage example
async function testLoginFlow() {
const browser = new AgentBrowser();
// Navigate
const navResult = await browser.navigate('https://app.example.com/login', {
waitUntil: 'networkidle',
});
if (!navResult.success) {
throw new Error(`Navigation failed: ${navResult.error}`);
}
// Login
await browser.type('#email', '[email protected]');
await browser.type('#password', process.env.PASSWORD || '', { enter: true });
// Wait for dashboard
await browser.wait('.dashboard', { timeout: 5000 });
// Capture success state
await browser.snapshot('login-success.png');
console.log('Login test completed successfully');
}
import json
import subprocess
from typing import Dict, Any, Optional
class AgentBrowser:
def __init__(self, browser_path: str = "agent-browser"):
self.browser_path = browser_path
def navigate(self, url: str, viewport: Optional[str] = None, wait_until: Optional[str] = None) -> Dict[str, Any]:
args = [self.browser_path, "navigate", "--url", url]
if viewport:
args.extend(["--viewport", viewport])
if wait_until:
args.extend(["--wait-until", wait_until])
return self._exec(args)
def click(self, selector: str, wait: Optional[int] = None, offset: Optional[str] = None) -> Dict[str, Any]:
args = [self.browser_path, "click", "--selector", selector]
if wait:
args.extend(["--wait", str(wait)])
if offset:
args.extend(["--offset", offset])
return self._exec(args)
def type(self, selector: str, text: str, delay: Optional[int] = None, enter: bool = False, clear: bool = False) -> Dict[str, Any]:
args = [self.browser_path, "type", "--selector", selector, "--text", text]
if delay:
args.extend(["--delay", str(delay)])
if enter:
args.append("--enter")
if clear:
args.append("--clear")
return self._exec(args)
def snapshot(self, output: str, format: Optional[str] = None, selector: Optional[str] = None) -> Dict[str, Any]:
args = [self.browser_path, "snapshot", "--output", output]
if format:
args.extend(["--format", format])
if selector:
args.extend(["--selector", selector])
return self._exec(args)
def extract(self, selector: str, attribute: Optional[str] = None, all: bool = False) -> Dict[str, Any]:
args = [self.browser_path, "extract", "--selector", selector]
if attribute:
args.extend(["--attribute", attribute])
if all:
args.append("--all")
return self._exec(args)
def _exec(self, args: list) -> Dict[str, Any]:
try:
result = subprocess.run(args, capture_output=True, text=True, check=True)
return json.loads(result.stdout)
except subprocess.CalledProcessError as e:
return {
"success": False,
"error": e.stderr,
"command": " ".join(args)
}
# Usage example
def test_search_flow():
browser = AgentBrowser()
# Navigate to search page
nav = browser.navigate("https://example.com/search")
assert nav["success"], f"Navigation failed: {nav.get('error')}"
# Enter search query
browser.type("input#search", "automation testing", enter=True)
# Wait for results
browser.wait(".search-results", timeout=5000)
# Extract result titles
results = browser.extract(".result-item h3", attribute="text", all=True)
print(f"Found {len(results.get('results', []))} search results")
# Take snapshot
browser.snapshot("search-results.png")
{
"viewport": {
"width": 1920,
"height": 1080
},
"userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"timeout": {
"navigation": 30000,
"element": 10000,
"network": 5000
},
"screenshot": {
"format": "png",
"quality": 80,
"fullPage": true
},
"headless": true,
"slowMo": 0,
"devtools": false
}
# Use config file
agent-browser --config ./browser-config.json navigate --url "https://example.com"
# Override config values
agent-browser --config ./browser-config.json --viewport "1280x720" navigate --url "https://example.com"
Rust Implementation Benefits:
When Node.js Fallback Activates:
# Chain commands for efficiency
agent-browser chain \
"navigate --url https://example.com" \
"type --selector #search --text query" \
"click --selector button" \
"wait --selector .results" \
"snapshot --output results.png"
# Output includes timing for each step
# Registration form
agent-browser navigate --url "https://app.example.com/register"
agent-browser type --selector "#firstName" --text "John"
agent-browser type --selector "#lastName" --text "Doe"
agent-browser type --selector "#email" --text "[email protected]"
agent-browser type --selector "#password" --text "SecurePass123!"
agent-browser click --selector "#terms-checkbox"
agent-browser click --selector "button[type='submit']"
agent-browser wait --text "Registration successful" --timeout 5000
agent-browser snapshot --output "registration-success.png"
# Extract product information
agent-browser navigate --url "https://shop.example.com/products"
agent-browser wait --selector ".product-grid" --timeout 5000
agent-browser extract --selector ".product-card" --json \
--fields "name:.product-name,price:.product-price,rating:.rating" \
--all > products.json
# Complex dashboard interaction
agent-browser navigate --url "https://dashboard.example.com"
agent-browser wait --selector ".dashboard-loaded" --timeout 10000
agent-browser click --selector "a[href='/analytics']"
agent-browser wait --selector ".chart-container" --timeout 5000
agent-browser click --selector "button.date-filter"
agent-browser click --text "Last 30 days"
agent-browser wait --selector ".chart-updated" --timeout 3000
agent-browser snapshot --selector ".analytics-panel" --output "analytics.png"
# Verbose output
agent-browser --debug navigate --url "https://example.com"
# Show browser window (non-headless)
agent-browser --headless false navigate --url "https://example.com"
# Save DevTools logs
agent-browser --console-log ./console.log navigate --url "https://example.com"
async function robustBrowserAction(action: () => Promise<BrowserResult>): Promise<BrowserResult> {
const maxRetries = 3;
let lastError: any;
for (let i = 0; i < maxRetries; i++) {
try {
const result = await action();
if (result.success) {
return result;
}
lastError = result.error;
} catch (error) {
lastError = error;
}
// Exponential backoff
await new Promise(resolve => setTimeout(resolve, Math.pow(2, i) * 1000));
}
throw new Error(`Action failed after ${maxRetries} retries: ${lastError}`);
}
When AI agents use agent-browser, they should:
This tool empowers AI agents to interact with web applications at high speed with structured, parseable output optimized for autonomous decision-making.
development
Build WebdriverIO E2E suites — wdio.conf.ts setup, $ and $$ selectors, auto-wait and waitUntil, Mocha framework structure, page objects, parallel capabilities, and services for visual testing and Appium mobile.
testing
Test Vue 3 components with Vue Test Utils and Vitest — mount vs shallowMount, finding and triggering DOM, asserting props and emitted events, awaiting async updates, and mocking Pinia stores and Vue Router.
testing
Write fast unit and integration tests with Vitest — vitest.config.ts setup, vi.fn and vi.mock module mocking, fake timers, snapshots, V8 coverage with thresholds, workspaces for monorepos, and in-source testing.
development
Practice strict red-green-refactor test-driven development — write one failing test first, make it pass with the minimum code, then refactor under green, with worked cycles in Jest and pytest, AAA structure, and behavior-based test naming.