AI Guided Reader

This Extension demonstrates an AI-powered reading assistant with a chat interface in a side panel. The user can ask it to guide them through a page — it highlights content blocks one at a time, auto-scrolls to each, and shows reading statistics when finished. It uses a background script to call an external LLM API for conversational responses, a content script that implements the guided reading tool, and two-way messaging between all three components.

Directoryai_agent_extension
- manifest.json
- background.js
- content.js
- sidepanel.html
- sidepanel.js
- sidepanel.css

`manifest.json`

{
    "manifest_version": 3,
    "name": "AI Guided Reader",
    "version": "1.0",
    "background": {
        "service_worker": "background.js"
    },
    "content_scripts": [
        {
            "js": ["content.js"],
            "matches": ["<all_urls>"]
        }
    ],
    "side_panel": {
        "default_path": "sidepanel.html"
    },
    "host_permissions": [
        "https://your-llm-api.example/*"
    ],
    "env": [
        {
            "key": "AGENT_API_URL",
            "value": "",
            "description": "Base URL of the LLM API (e.g. https://your-llm-api.example/v1/messages)"
        }
    ]
}

`content.js`

The content script implements a guided reading tool. It locates the main content area on the page, then highlights text blocks one at a time with auto-scroll — giving the user a focused, distraction-free reading experience. When finished, it sends reading statistics back to the side panel via runtime.sendMessage().

let guidedReadState = null;

// Find the main content area, skipping nav/header/footer/sidebar
function findContentRoot() {
    // Try semantic elements — pick the one with the most text
    const semantic = [...document.querySelectorAll('article, [role="main"], main')];
    if (semantic.length) {
        return semantic.sort((a, b) => b.textContent.length - a.textContent.length)[0];
    }

    // Fall back to the largest div/section that looks like prose
    let best = null;
    let bestLen = 0;
    for (const el of document.querySelectorAll('div, section')) {
        const tag = (el.getAttribute('role') || el.tagName).toLowerCase();
        if (['nav', 'navigation', 'banner', 'complementary', 'contentinfo'].includes(tag)) continue;
        const id = (el.id + ' ' + el.className).toLowerCase();
        if (/nav|menu|header|footer|sidebar|cookie|banner|advert|comment/.test(id)) continue;

        const text = el.textContent.trim();
        const hasProse = el.querySelectorAll('p').length >= 2 || text.length > 500;
        if (hasProse && text.length > bestLen) {
            bestLen = text.length;
            best = el;
        }
    }
    return best || document.body;
}

// Collect text-heavy elements within the content area
function collectBlocks() {
    const root = findContentRoot();
    const blocks = [];
    const seen = new Set();

    for (const el of root.querySelectorAll('h1, h2, h3, h4, p, li, blockquote, pre')) {
        const text = el.textContent.trim();
        if (text.length < 20) continue;
        if (el.offsetHeight === 0) continue;

        // Skip elements inside nav/footer/aside nested within content root
        const parent = el.closest(
            'nav, header, footer, aside, [role="navigation"], [role="banner"], [role="contentinfo"]'
        );
        if (parent && root.contains(parent)) continue;

        // Skip if a parent block already covers this text
        let dominated = false;
        for (const s of seen) {
            if (s.contains(el)) { dominated = true; break; }
        }
        if (dominated) continue;
        seen.add(el);

        blocks.push(el);
    }
    return blocks;
}

function injectStyles() {
    if (document.getElementById('wf-gr-style')) return;
    const style = document.createElement('style');
    style.id = 'wf-gr-style';
    style.textContent = `
        .wf-gr-highlight {
            outline: 3px solid #6366f1 !important;
            outline-offset: 4px;
            border-radius: 4px;
            background: rgba(99, 102, 241, 0.08) !important;
            transition: outline-color 0.3s, background 0.3s;
        }`;
    document.head.appendChild(style);
}

function clearHighlights() {
    document.querySelectorAll('.wf-gr-highlight')
        .forEach(el => el.classList.remove('wf-gr-highlight'));
}

function startGuidedRead(speed) {
    if (guidedReadState) stopGuidedRead();

    const blocks = collectBlocks();
    if (!blocks.length) return { active: false, error: 'No readable content found' };

    injectStyles();

    const charsPerSec = (speed || 1500) / 60;
    const MIN_PAUSE = 1.0;
    let index = 0;
    const startTime = Date.now();
    let totalChars = 0;
    let totalWords = 0;
    let headingsRead = 0;

    function step() {
        if (!guidedReadState || index >= blocks.length) {
            const elapsed = Math.round((Date.now() - startTime) / 1000);
            const stats = {
                totalBlocks: blocks.length,
                totalChars,
                totalWords,
                headingsRead,
                elapsedSec: elapsed,
                avgWordsPerMin: elapsed > 0 ? Math.round(totalWords / (elapsed / 60)) : 0,
                pageTitle: document.title,
            };
            stopGuidedRead();
            // Notify the side panel that reading is complete
            browser.runtime.sendMessage({ type: 'GUIDED_READ_DONE', stats });
            return;
        }

        clearHighlights();

        const el = blocks[index];
        el.classList.add('wf-gr-highlight');
        el.scrollIntoView({ behavior: 'smooth', block: 'center' });

        const text = el.textContent.trim();
        totalChars += text.length;
        totalWords += text.split(/\s+/).length;
        if (/^H[1-4]$/.test(el.tagName)) headingsRead++;

        const pause = Math.max(MIN_PAUSE, text.length / charsPerSec);

        index++;
        guidedReadState.timer = setTimeout(step, pause * 1000);
    }

    guidedReadState = { blocks, timer: null };
    step();

    return { active: true, totalBlocks: blocks.length, charsPerMin: speed || 1500 };
}

function stopGuidedRead() {
    if (guidedReadState) {
        clearTimeout(guidedReadState.timer);
        guidedReadState = null;
    }
    clearHighlights();
    const style = document.getElementById('wf-gr-style');
    if (style) style.remove();
    return { active: false };
}

// Listen for tool execution requests from the background script
browser.runtime.onMessage.addListener((message) => {
    if (message.type === 'TOOL_EXEC' && message.tool === 'guidedRead') {
        const { action, speed } = message.input || {};
        if (action === 'start') return startGuidedRead(speed);
        if (action === 'stop') return stopGuidedRead();
        return { error: 'Unknown action. Use "start" or "stop".' };
    }
});

`background.js`

The background script acts as the central message router. It receives chat requests from the side panel, calls the LLM API, and forwards tool execution requests to the content script.

const API_URL = browser.webfuseSession.env.AGENT_API_URL;

// Call the LLM API.
// Sends { messages } and expects a JSON response with: { text: string }
// Adapt the request body and response parsing to match your LLM provider.
async function callLLM(messages) {
    const response = await fetch(API_URL, {
        method: 'POST',
        headers: {
            'Content-Type': 'application/json',
        },
        body: JSON.stringify({ messages }),
    });

    if (!response.ok) {
        throw new Error(`API error: ${response.status}`);
    }

    return response.json();
}

// Route messages between side panel and content script
browser.runtime.onMessage.addListener((message, sender) => {
    switch (message.type) {
        case 'CHAT_REQUEST': {
            const { requestId, messages } = message;
            callLLM(messages)
                .then(result => {
                    browser.runtime.sendMessage({ type: 'CHAT_RESPONSE', requestId, result });
                })
                .catch(err => {
                    browser.runtime.sendMessage({
                        type: 'CHAT_RESPONSE',
                        requestId,
                        result: { error: err.message },
                    });
                });
            break;
        }

        case 'TOOL_EXEC': {
            // Forward to content script via tabs.sendMessage (supports async return),
            // then relay the result back to the side panel via runtime.sendMessage
            const { requestId } = message;
            browser.webfuseSession.getTabs().then(tabs => {
                const activeTab = tabs.find(t => t.active);
                if (activeTab) {
                    return browser.tabs.sendMessage(activeTab.id, message);
                }
                return { error: 'No active tab' };
            }).then(result => {
                browser.runtime.sendMessage({ type: 'TOOL_RESULT', requestId, result });
            }).catch(err => {
                browser.runtime.sendMessage({ type: 'TOOL_RESULT', requestId, result: { error: err.message } });
            });
            break;
        }
    }
});

// Auto-open the side panel
browser.sidePanel.setPanelBehavior({ openPanelOnActionClick: true });
browser.sidePanel.open();

`sidepanel.html`

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>AI Guided Reader</title>
    <link rel="stylesheet" href="sidepanel.css">
</head>
<body>
    <div class="header">
        <span class="status" id="status">ready</span>
    </div>
    <div class="messages" id="messages"></div>
    <div class="input-row">
        <input id="input" type="text" placeholder="Ask me anything..." />
        <button id="send">Go</button>
        <button id="stop" style="display:none">Stop</button>
    </div>
    <script src="sidepanel.js"></script>
</body>
</html>

`sidepanel.js`

The side panel manages the chat UI, detects reading intent from user messages, and dispatches tool calls to the content script via the background. Conversational messages are handled by the LLM; the guided reading tool is triggered locally via keyword detection, which works reliably regardless of model quality.

const SYSTEM_PROMPT = `You are "Reader" — a cheerful reading assistant inside a browser session.
You help users read web pages by highlighting text blocks one at a time and auto-scrolling through them.
Keep responses short — 1-2 sentences. Be friendly and a little nerdy about reading.`;

const history = [];
const pendingRequests = {};
let isReading = false;

// --- UI helpers ---
const messagesEl = document.getElementById('messages');
const inputEl = document.getElementById('input');
const sendBtn = document.getElementById('send');
const stopBtn = document.getElementById('stop');
const statusEl = document.getElementById('status');

function addMessage(role, text) {
    const div = document.createElement('div');
    div.className = `msg ${role}`;
    div.textContent = text;
    messagesEl.appendChild(div);
    messagesEl.scrollTop = messagesEl.scrollHeight;
}

function addAction(text) {
    const div = document.createElement('div');
    div.className = 'msg action';
    div.textContent = text;
    messagesEl.appendChild(div);
    messagesEl.scrollTop = messagesEl.scrollHeight;
}

function setActive(on) {
    if (!on && isReading) return; // don't override "reading..." state
    statusEl.textContent = on ? 'working...' : 'ready';
    statusEl.className = `status ${on ? 'active' : ''}`;
    inputEl.disabled = on;
    sendBtn.disabled = on;
}

function setReading(on) {
    isReading = on;
    statusEl.textContent = on ? 'reading...' : 'ready';
    statusEl.className = `status ${on ? 'reading' : ''}`;
    sendBtn.style.display = on ? 'none' : '';
    stopBtn.style.display = on ? '' : 'none';
    if (!on) {
        inputEl.disabled = false;
        sendBtn.disabled = false;
    }
}

// --- Messaging ---

// Send a request to the background and wait for a matching response.
// runtime.sendMessage is fire-and-forget, so we track pending requests by requestId.
function sendRequest(payload, timeoutMs) {
    return new Promise((resolve) => {
        const requestId = Date.now().toString() + Math.random().toString(36).slice(2);
        pendingRequests[requestId] = resolve;
        browser.runtime.sendMessage({ ...payload, requestId });
        setTimeout(() => {
            if (pendingRequests[requestId]) {
                delete pendingRequests[requestId];
                resolve({ error: 'Request timed out' });
            }
        }, timeoutMs);
    });
}

function callAgent(messages) {
    return sendRequest({ type: 'CHAT_REQUEST', messages }, 60000);
}

function execToolOnPage(toolName, input) {
    return sendRequest({ type: 'TOOL_EXEC', tool: toolName, input }, 10000);
}

// Listen for responses and events
browser.runtime.onMessage.addListener((message) => {
    if (message.requestId && pendingRequests[message.requestId]) {
        const resolve = pendingRequests[message.requestId];
        delete pendingRequests[message.requestId];
        resolve(message.result);
        return;
    }

    // Content script signals that guided reading finished
    if (message.type === 'GUIDED_READ_DONE') {
        setReading(false);
        const s = message.stats;
        const mins = Math.floor(s.elapsedSec / 60);
        const secs = s.elapsedSec % 60;
        const time = mins > 0 ? `${mins}m ${secs}s` : `${secs}s`;

        const lines = [
            `Done! Read ${s.totalBlocks} blocks in ${time}.`,
            `${s.totalWords} words · ${s.totalChars} chars · ${s.avgWordsPerMin} wpm`,
        ];
        if (s.headingsRead > 0) lines[0] += ` (${s.headingsRead} headings)`;
        addMessage('assistant', lines.join('\n'));
    }
});

// --- Intent detection ---

// Detect reading intent locally — small models are unreliable with tool-use decisions
function detectIntent(text) {
    const lower = text.toLowerCase();

    if (/\b(stop|end|pause|quit|cancel)\b/.test(lower)) {
        return { action: 'stop' };
    }

    if (/\b(start|begin|read|go|scan)\b/.test(lower)) {
        let speed = 1500;

        // Explicit number: "read at 2000", "speed 800"
        const numMatch = lower.match(/\b(\d{3,})\b/);
        if (numMatch) speed = parseInt(numMatch[1], 10);

        // Words: "very slow" < "slow" < default < "fast" < "very fast"
        if (/very\s*(slow|careful)/.test(lower)) speed = 500;
        else if (/\b(slow|careful|casual)\b/.test(lower)) speed = 800;
        else if (/very\s*fast|skim/.test(lower)) speed = 3500;
        else if (/\b(fast|quick|rapid)\b/.test(lower)) speed = 2500;

        return { action: 'start', speed };
    }

    return null;
}

// --- Chat handler ---

async function sendMessage(text) {
    setActive(true);
    history.push({ role: 'user', content: text });
    addMessage('user', text);

    const intent = detectIntent(text);

    // Start reading
    if (intent?.action === 'start' && !isReading) {
        addAction(`Starting guided read (${intent.speed} chars/min)...`);
        const result = await execToolOnPage('guidedRead', intent);
        if (result.active) {
            setReading(true);
            const msg = `Reading through ${result.totalBlocks} blocks. I'll highlight each one as we go!`;
            history.push({ role: 'assistant', content: msg });
            addMessage('assistant', msg);
        } else {
            const msg = result.error || 'Could not start reading on this page.';
            history.push({ role: 'assistant', content: msg });
            addMessage('assistant', msg);
        }
        setActive(false);
        return;
    }

    // Stop reading
    if (intent?.action === 'stop' && isReading) {
        addAction('Stopping guided read...');
        await execToolOnPage('guidedRead', { action: 'stop' });
        setReading(false);
        setActive(false);
        return;
    }

    // Everything else — chat with LLM (no tools, just conversation)
    const response = await callAgent(
        [{ role: 'system', content: SYSTEM_PROMPT }, ...history],
    );

    if (response.error) {
        addMessage('assistant', `Error: ${response.error}`);
        setActive(false);
        return;
    }

    const reply = response.text
        || "I'm Reader! I can help you read through any page — just say \"start reading\"."
        + " I'll highlight the important parts one by one and scroll through them for you.";
    history.push({ role: 'assistant', content: reply });
    addMessage('assistant', reply);

    setActive(false);
}

// --- Input handling ---

stopBtn.addEventListener('click', async () => {
    addAction('Stopping guided read...');
    await execToolOnPage('guidedRead', { action: 'stop' });
    setReading(false);
});

sendBtn.addEventListener('click', () => {
    const text = inputEl.value.trim();
    if (!text) return;
    inputEl.value = '';
    sendMessage(text);
});

inputEl.addEventListener('keydown', (e) => {
    if (e.key === 'Enter') sendBtn.click();
});

`sidepanel.css`

View sidepanel.css

* { margin: 0; padding: 0; box-sizing: border-box; }

body {
    font-family: system-ui, -apple-system, sans-serif;
    background: #1a1a2e;
    color: #e0e0e0;
    height: 100vh;
    display: flex;
    flex-direction: column;
}

.header {
    display: flex;
    align-items: center;
    justify-content: flex-end;
    padding: 8px 16px;
    border-bottom: 1px solid #2a2a4a;
}

.status {
    font-size: 11px;
    padding: 2px 8px;
    border-radius: 12px;
    background: #2a2a4a;
    color: #9ca3af;
}

.status.active {
    background: #6366f1;
    color: white;
    animation: pulse 1.5s infinite;
}

.status.reading {
    background: #22c55e;
    color: white;
    animation: pulse 1.5s infinite;
}

@keyframes pulse {
    0%, 100% { opacity: 1; }
    50% { opacity: 0.6; }
}

.messages {
    flex: 1;
    overflow-y: auto;
    padding: 12px 16px;
    display: flex;
    flex-direction: column;
    gap: 8px;
}

.msg {
    padding: 8px 12px;
    border-radius: 12px;
    font-size: 13px;
    line-height: 1.5;
    max-width: 90%;
    word-wrap: break-word;
    white-space: pre-line;
}

.msg.user {
    background: #4f46e5;
    color: white;
    align-self: flex-end;
    border-bottom-right-radius: 4px;
}

.msg.assistant {
    background: #2a2a4a;
    border: 1px solid #3a3a5a;
    align-self: flex-start;
    border-bottom-left-radius: 4px;
}

.msg.action {
    background: transparent;
    border: 1px solid #6366f1;
    color: #a78bfa;
    font-size: 12px;
    align-self: flex-start;
    padding: 4px 10px;
}

.input-row {
    display: flex;
    gap: 8px;
    padding: 12px 16px;
    border-top: 1px solid #2a2a4a;
}

.input-row input {
    flex: 1;
    padding: 10px 14px;
    border-radius: 10px;
    border: 1px solid #3a3a5a;
    background: #2a2a4a;
    color: #e0e0e0;
    font-size: 13px;
    outline: none;
}

.input-row input:focus {
    border-color: #6366f1;
}

.input-row input:disabled {
    opacity: 0.5;
}

.input-row button {
    padding: 10px 18px;
    border: none;
    border-radius: 10px;
    background: linear-gradient(135deg, #6366f1, #8b5cf6);
    color: white;
    font-size: 13px;
    font-weight: 500;
    cursor: pointer;
}

.input-row button:hover { opacity: 0.9; }
.input-row button:disabled { opacity: 0.5; cursor: not-allowed; }

#stop {
    padding: 10px 18px;
    border: none;
    border-radius: 10px;
    background: #ef4444;
    color: white;
    font-size: 13px;
    font-weight: 500;
    cursor: pointer;
}

#stop:hover { opacity: 0.9; }

Key Points

Environment Variables: The LLM API endpoint is stored in the manifest env section and accessed via browser.webfuseSession.env.AGENT_API_URL. This makes the extension shareable — each user provides their own endpoint in the Space’s extension settings.
host_permissions Must Match the API Domain: The background script runs from the extensions origin, so the API domain must be declared in host_permissions to update the CSP connect-src directive. Without it, the request is blocked.
Generic LLM Contract: The background script sends { messages } to the API and expects { text } back. This keeps the extension API-agnostic — adapt the callLLM() function’s request body and response parsing to match your LLM provider. Most LLM providers (OpenAI, Anthropic, etc.) use a different request/response format, so you will typically need a lightweight backend adapter — a small HTTP server that accepts the extension’s generic format, translates it to your provider’s API, and normalizes the response. This also keeps API keys on the server side rather than exposing them in the extension environment variables.
Two-Way Messaging via Background: runtime.sendMessage() is fire-and-forget, so the side panel and background use a requestId pattern to match requests with responses. The background forwards tool requests to the content script via tabs.sendMessage() (which supports async return values), then relays the result back to the side panel.
Content Script as Custom Tool Provider: The content script runs inside the Webfuse sandbox and provides the guidedRead tool — a custom page interaction that goes beyond what the built-in Automation API offers. This is the key value of content scripts in AI agent extensions: they can implement domain-specific tools that interact with the DOM in ways the standard API does not cover.
Local Intent Detection: Reading commands (start, stop, speed modifiers) are detected locally via keyword matching, keeping the tool reliable regardless of LLM model quality. Only conversational messages are sent to the LLM. The side panel also listens for a GUIDED_READ_DONE event from the content script to display reading statistics when the tool finishes.