Donobu LogoDonobu Logo
  • Blog
  • Documentation
  • FAQ
  • Contact
Sign Up
Download
Donobu LogoDonobu Logo

AI QA. Self-healing. Zero friction.

© Copyright 2025 Donobu. All Rights Reserved.

About
  • Blog
  • Contact
Product
  • Documentation
  • Context Buffet
Legal
  • Terms of Service
  • Privacy Policy
  • Cookie Policy
  • Get Started with Donobu
    • Creating Your First Flow
    • Using a Logged-In Browser State for Autonomous Runs
  • Donobu API
    • API Configuration
    • Working with Flows
  • Donobu Features
  • Step-by-Step Tutorials
  • Best Practices & Tips
  • Integrations & Ecosystem
    • Testing Framework Integrations
    • CI/CD Pipeline Integrations
    • Monitoring & Alerting
  • Troubleshooting Guide
  • Security and Trust
    • Subprocessors

Monitoring & Alerting

Set up comprehensive monitoring and alerting for your Donobu automation flows with webhooks, Slack, Datadog, and other monitoring platforms.

Monitoring & Alerting Overview

Implement robust monitoring and alerting systems to track your Donobu automation flows, detect issues early, and maintain high reliability.

Webhook Integration

Basic Webhook Handler

Configure webhooks to receive real-time notifications when flows complete:

// webhook-handler.js
const express = require('express');
const app = express();

app.use(express.json());

app.post('/donobu-webhook', async (req, res) => {
  const { id } = req.body;
  
  try {
    // Fetch complete flow details
    const response = await fetch(`http://localhost:31000/api/flows/${id}`);
    const flow = await response.json();
    
    // Process based on flow state
    await processFlowCompletion(flow);
    
    res.status(200).send('OK');
  } catch (error) {
    console.error('Webhook processing error:', error);
    res.status(500).send('Error processing webhook');
  }
});

async function processFlowCompletion(flow) {
  if (flow.state === 'FAILED') {
    await sendCriticalAlert(flow);
  } else if (flow.state === 'SUCCESS') {
    await logSuccessMetrics(flow);
  }
  
  // Always log execution metrics
  await logFlowMetrics(flow);
}

app.listen(4000, () => {
  console.log('Webhook server running on port 4000');
});

Advanced Webhook Processing

// advanced-webhook-handler.js
const express = require('express');
const crypto = require('crypto');
const app = express();

// Webhook signature verification
function verifyWebhookSignature(payload, signature, secret) {
  const expectedSignature = crypto
    .createHmac('sha256', secret)
    .update(payload)
    .digest('hex');
  
  return crypto.timingSafeEqual(
    Buffer.from(signature),
    Buffer.from(expectedSignature)
  );
}

app.use('/donobu-webhook', express.raw({type: 'application/json'}), (req, res) => {
  const signature = req.headers['x-donobu-signature'];
  const secret = process.env.WEBHOOK_SECRET;
  
  if (!verifyWebhookSignature(req.body, signature, secret)) {
    return res.status(401).send('Unauthorized');
  }
  
  const payload = JSON.parse(req.body);
  processWebhookPayload(payload);
  
  res.status(200).send('OK');
});

async function processWebhookPayload(payload) {
  const { id, event } = payload;
  
  switch (event) {
    case 'flow.completed':
      await handleFlowCompletion(id);
      break;
    case 'flow.failed':
      await handleFlowFailure(id);
      break;
    case 'flow.started':
      await handleFlowStart(id);
      break;
    default:
      console.log('Unknown webhook event:', event);
  }
}

Slack Integration

Slack Notification Service

// slack-notifier.js
const { WebClient } = require('@slack/web-api');

class SlackNotifier {
  constructor(token) {
    this.slack = new WebClient(token);
  }

  async sendFlowNotification(flow) {
    const emoji = this.getStatusEmoji(flow.state);
    const color = this.getStatusColor(flow.state);
    
    const message = {
      channel: '#qa-automation',
      text: `${emoji} Flow ${flow.state}: ${flow.name}`,
      attachments: [{
        color: color,
        fields: [
          {
            title: 'Flow ID',
            value: flow.id,
            short: true
          },
          {
            title: 'Duration',
            value: this.formatDuration(flow.completedAt - flow.startedAt),
            short: true
          },
          {
            title: 'Token Usage',
            value: `${flow.inputTokensUsed + flow.completionTokensUsed}`,
            short: true
          },
          {
            title: 'Website',
            value: flow.targetWebsite,
            short: true
          }
        ],
        actions: flow.state === 'FAILED' ? [
          {
            type: 'button',
            text: 'View Flow Details',
            url: `http://localhost:3000/flows/${flow.id}`
          },
          {
            type: 'button',
            text: 'Download Video',
            url: `http://localhost:31000/api/flows/${flow.id}/video`
          }
        ] : [],
        footer: 'Donobu Automation',
        ts: Math.floor(flow.completedAt / 1000)
      }]
    };

    await this.slack.chat.postMessage(message);
  }

  getStatusEmoji(state) {
    const emojis = {
      SUCCESS: '✅',
      FAILED: '❌',
      RUNNING: '🔄',
      PAUSED: '⏸️'
    };
    return emojis[state] || '❓';
  }

  getStatusColor(state) {
    const colors = {
      SUCCESS: 'good',
      FAILED: 'danger',
      RUNNING: '#439FE0',
      PAUSED: 'warning'
    };
    return colors[state] || '#cccccc';
  }

  formatDuration(milliseconds) {
    const seconds = Math.round(milliseconds / 1000);
    if (seconds < 60) return `${seconds}s`;
    const minutes = Math.floor(seconds / 60);
    const remainingSeconds = seconds % 60;
    return `${minutes}m ${remainingSeconds}s`;
  }

  async sendDailyReport(flows) {
    const successful = flows.filter(f => f.state === 'SUCCESS').length;
    const failed = flows.filter(f => f.state === 'FAILED').length;
    const total = flows.length;

    const message = {
      channel: '#qa-automation',
      text: '📊 Daily Automation Report',
      attachments: [{
        color: failed > 0 ? 'warning' : 'good',
        fields: [
          {
            title: 'Total Flows',
            value: total.toString(),
            short: true
          },
          {
            title: 'Successful',
            value: `✅ ${successful}`,
            short: true
          },
          {
            title: 'Failed',
            value: `❌ ${failed}`,
            short: true
          },
          {
            title: 'Success Rate',
            value: `${Math.round((successful / total) * 100)}%`,
            short: true
          }
        ]
      }]
    };

    await this.slack.chat.postMessage(message);
  }
}

module.exports = SlackNotifier;

Datadog Integration

Datadog Metrics Collection

// datadog-metrics.js
const StatsD = require('hot-shots');

class DatadogMetrics {
  constructor() {
    this.dogstatsd = new StatsD({
      host: 'localhost',
      port: 8125,
      prefix: 'donobu.',
      globalTags: {
        environment: process.env.NODE_ENV || 'development',
        service: 'donobu-automation'
      }
    });
  }

  trackFlowCompletion(flow) {
    // Track flow completion with status
    this.dogstatsd.increment('flow.completed', 1, {
      flow_name: flow.name,
      state: flow.state,
      run_mode: flow.runMode,
      website: this.sanitizeTag(flow.targetWebsite)
    });
  }

  trackFlowDuration(flow) {
    const duration = flow.completedAt - flow.startedAt;
    
    this.dogstatsd.histogram('flow.duration', duration, {
      flow_name: flow.name,
      run_mode: flow.runMode,
      state: flow.state
    });
  }

  trackTokenUsage(flow) {
    this.dogstatsd.gauge('flow.tokens.input', flow.inputTokensUsed, {
      flow_name: flow.name
    });
    
    this.dogstatsd.gauge('flow.tokens.completion', flow.completionTokensUsed, {
      flow_name: flow.name
    });
    
    this.dogstatsd.gauge('flow.tokens.total', 
      flow.inputTokensUsed + flow.completionTokensUsed, {
      flow_name: flow.name
    });
  }

  trackToolCalls(flow) {
    // Get tool calls count from API
    fetch(`http://localhost:31000/api/flows/${flow.id}/tool-calls`)
      .then(response => response.json())
      .then(toolCalls => {
        this.dogstatsd.gauge('flow.tool_calls', toolCalls.length, {
          flow_name: flow.name
        });

        // Track tool usage breakdown
        const toolUsage = {};
        toolCalls.forEach(call => {
          toolUsage[call.toolName] = (toolUsage[call.toolName] || 0) + 1;
        });

        Object.entries(toolUsage).forEach(([toolName, count]) => {
          this.dogstatsd.gauge('flow.tool_usage', count, {
            flow_name: flow.name,
            tool_name: toolName
          });
        });
      })
      .catch(error => console.error('Error fetching tool calls:', error));
  }

  trackErrorRate() {
    // This would typically be called periodically
    fetch('http://localhost:31000/api/flows?limit=100')
      .then(response => response.json())
      .then(data => {
        const flows = data.items || [];
        const failed = flows.filter(f => f.state === 'FAILED').length;
        const total = flows.length;
        
        if (total > 0) {
          const errorRate = (failed / total) * 100;
          this.dogstatsd.gauge('flow.error_rate', errorRate);
        }
      })
      .catch(error => console.error('Error calculating error rate:', error));
  }

  sanitizeTag(value) {
    // Remove or replace invalid characters for Datadog tags
    return value.replace(/[^a-zA-Z0-9_.-]/g, '_').toLowerCase();
  }

  // Custom business metrics
  trackBusinessMetrics(flow) {
    // Example: Track conversion funnel steps
    if (flow.name.includes('Checkout')) {
      this.dogstatsd.increment('business.checkout_attempts');
      
      if (flow.state === 'SUCCESS') {
        this.dogstatsd.increment('business.checkout_completed');
      }
    }

    // Example: Track user onboarding
    if (flow.name.includes('Onboarding')) {
      this.dogstatsd.increment('business.onboarding_attempts');
      
      if (flow.state === 'SUCCESS') {
        this.dogstatsd.increment('business.onboarding_completed');
      }
    }
  }
}

module.exports = DatadogMetrics;

Datadog Dashboard Configuration

{
  "title": "Donobu Automation Dashboard",
  "widgets": [
    {
      "id": 1,
      "definition": {
        "type": "timeseries",
        "title": "Flow Completion Rate",
        "requests": [
          {
            "q": "sum:donobu.flow.completed{state:success}.as_rate()",
            "display_type": "line",
            "style": {
              "palette": "green"
            }
          },
          {
            "q": "sum:donobu.flow.completed{state:failed}.as_rate()",
            "display_type": "line", 
            "style": {
              "palette": "red"
            }
          }
        ]
      }
    },
    {
      "id": 2,
      "definition": {
        "type": "query_value",
        "title": "Current Error Rate",
        "requests": [
          {
            "q": "avg:donobu.flow.error_rate",
            "aggregator": "last"
          }
        ],
        "precision": 2
      }
    }
  ]
}

Prometheus & Grafana Integration

Prometheus Metrics Exporter

// prometheus-metrics.js
const client = require('prom-client');
const express = require('express');

// Create metrics
const flowCompletionCounter = new client.Counter({
  name: 'donobu_flows_completed_total',
  help: 'Total number of completed flows',
  labelNames: ['status', 'flow_name', 'run_mode']
});

const flowDurationHistogram = new client.Histogram({
  name: 'donobu_flow_duration_seconds',
  help: 'Duration of flow execution in seconds',
  labelNames: ['flow_name', 'status'],
  buckets: [0.5, 1, 2, 5, 10, 30, 60, 120, 300]
});

const tokenUsageGauge = new client.Gauge({
  name: 'donobu_tokens_used_total',
  help: 'Total tokens used by flows',
  labelNames: ['flow_name', 'token_type']
});

const activeFlowsGauge = new client.Gauge({
  name: 'donobu_active_flows',
  help: 'Number of currently running flows'
});

class PrometheusMetrics {
  constructor() {
    // Register default metrics
    client.register.setDefaultLabels({
      app: 'donobu-automation',
      environment: process.env.NODE_ENV || 'development'
    });
    
    client.collectDefaultMetrics();
  }

  recordFlowCompletion(flow) {
    flowCompletionCounter.inc({
      status: flow.state.toLowerCase(),
      flow_name: flow.name,
      run_mode: flow.runMode
    });

    const durationSeconds = (flow.completedAt - flow.startedAt) / 1000;
    flowDurationHistogram.observe({
      flow_name: flow.name,
      status: flow.state.toLowerCase()
    }, durationSeconds);

    tokenUsageGauge.set({
      flow_name: flow.name,
      token_type: 'input'
    }, flow.inputTokensUsed);

    tokenUsageGauge.set({
      flow_name: flow.name,
      token_type: 'completion'
    }, flow.completionTokensUsed);
  }

  updateActiveFlows(count) {
    activeFlowsGauge.set(count);
  }

  startMetricsServer(port = 9090) {
    const app = express();
    
    app.get('/metrics', async (req, res) => {
      res.set('Content-Type', client.register.contentType);
      res.end(await client.register.metrics());
    });
    
    app.listen(port, () => {
      console.log(`Metrics server listening on port ${port}`);
    });
  }
}

module.exports = PrometheusMetrics;

Grafana Dashboard JSON

{
  "dashboard": {
    "id": null,
    "title": "Donobu Automation Metrics",
    "tags": ["donobu", "automation"],
    "timezone": "browser",
    "panels": [
      {
        "id": 1,
        "title": "Flow Success Rate",
        "type": "stat",
        "targets": [
          {
            "expr": "rate(donobu_flows_completed_total{status=\"success\"}[5m]) / rate(donobu_flows_completed_total[5m]) * 100",
            "refId": "A"
          }
        ],
        "fieldConfig": {
          "defaults": {
            "unit": "percent",
            "min": 0,
            "max": 100
          }
        }
      },
      {
        "id": 2,
        "title": "Flow Duration Distribution",
        "type": "heatmap",
        "targets": [
          {
            "expr": "increase(donobu_flow_duration_seconds_bucket[5m])",
            "refId": "A",
            "format": "heatmap"
          }
        ]
      },
      {
        "id": 3,
        "title": "Active Flows",
        "type": "graph",
        "targets": [
          {
            "expr": "donobu_active_flows",
            "refId": "A"
          }
        ]
      }
    ],
    "time": {
      "from": "now-1h",
      "to": "now"
    },
    "refresh": "30s"
  }
}

PagerDuty Integration

PagerDuty Alert Service

// pagerduty-alerts.js
const pdClient = require('node-pagerduty');

class PagerDutyAlerts {
  constructor(integrationKey) {
    this.pd = new pdClient({
      integrationKey: integrationKey
    });
  }

  async sendCriticalAlert(flow) {
    const payload = {
      routing_key: this.integrationKey,
      event_action: 'trigger',
      dedup_key: `donobu-flow-${flow.id}`,
      payload: {
        summary: `Critical Flow Failure: ${flow.name}`,
        source: 'donobu-automation',
        severity: 'critical',
        component: 'automation-flow',
        group: 'e2e-testing',
        class: 'flow-execution',
        custom_details: {
          flow_id: flow.id,
          flow_name: flow.name,
          target_website: flow.targetWebsite,
          error: flow.result?.error,
          duration: flow.completedAt - flow.startedAt,
          token_usage: flow.inputTokensUsed + flow.completionTokensUsed
        }
      },
      links: [
        {
          href: `http://localhost:3000/flows/${flow.id}`,
          text: 'View Flow Details'
        }
      ]
    };

    try {
      const response = await this.pd.events.sendEvent(payload);
      console.log('PagerDuty alert sent:', response.dedup_key);
    } catch (error) {
      console.error('Failed to send PagerDuty alert:', error);
    }
  }

  async resolveAlert(flowId) {
    const payload = {
      routing_key: this.integrationKey,
      event_action: 'resolve',
      dedup_key: `donobu-flow-${flowId}`
    };

    try {
      await this.pd.events.sendEvent(payload);
      console.log('PagerDuty alert resolved for flow:', flowId);
    } catch (error) {
      console.error('Failed to resolve PagerDuty alert:', error);
    }
  }
}

module.exports = PagerDutyAlerts;

Custom Monitoring Dashboard

Health Check Service

// health-monitor.js
class HealthMonitor {
  constructor() {
    this.healthChecks = new Map();
    this.alertThresholds = {
      errorRate: 10, // Alert if error rate > 10%
      avgDuration: 300000, // Alert if avg duration > 5 minutes
      failureStreak: 3 // Alert after 3 consecutive failures
    };
  }

  async performHealthCheck() {
    const health = {
      timestamp: Date.now(),
      status: 'healthy',
      checks: {}
    };

    try {
      // Check API availability
      health.checks.api = await this.checkApiHealth();
      
      // Check recent flow performance
      health.checks.flows = await this.checkFlowHealth();
      
      // Check system resources
      health.checks.resources = await this.checkSystemHealth();
      
      // Determine overall status
      health.status = this.calculateOverallStatus(health.checks);
      
    } catch (error) {
      health.status = 'unhealthy';
      health.error = error.message;
    }

    return health;
  }

  async checkApiHealth() {
    try {
      const response = await fetch('http://localhost:31000/api/ping');
      return {
        status: response.ok ? 'healthy' : 'unhealthy',
        responseTime: Date.now() - startTime
      };
    } catch (error) {
      return {
        status: 'unhealthy',
        error: error.message
      };
    }
  }

  async checkFlowHealth() {
    try {
      const response = await fetch('http://localhost:31000/api/flows?limit=50');
      const data = await response.json();
      const flows = data.items || [];
      
      const recent = flows.filter(f => 
        f.startedAt > Date.now() - 3600000 // Last hour
      );
      
      const failed = recent.filter(f => f.state === 'FAILED');
      const errorRate = recent.length > 0 ? (failed.length / recent.length) * 100 : 0;
      
      return {
        status: errorRate > this.alertThresholds.errorRate ? 'unhealthy' : 'healthy',
        recentFlows: recent.length,
        failedFlows: failed.length,
        errorRate: Math.round(errorRate * 100) / 100
      };
    } catch (error) {
      return {
        status: 'unhealthy',
        error: error.message
      };
    }
  }

  async checkSystemHealth() {
    const used = process.memoryUsage();
    const cpuUsage = process.cpuUsage();
    
    return {
      status: 'healthy',
      memory: {
        used: Math.round(used.heapUsed / 1024 / 1024),
        total: Math.round(used.heapTotal / 1024 / 1024)
      },
      uptime: Math.round(process.uptime())
    };
  }

  calculateOverallStatus(checks) {
    const statuses = Object.values(checks).map(check => check.status);
    return statuses.every(status => status === 'healthy') ? 'healthy' : 'unhealthy';
  }
}

module.exports = HealthMonitor;

This comprehensive monitoring and alerting setup ensures you have full visibility into your Donobu automation flows, with real-time notifications and detailed metrics to maintain high reliability and performance.

  1. Webhook Integration
    1. Basic Webhook Handler
    2. Advanced Webhook Processing
    3. Slack Integration
    4. Datadog Integration
    5. Prometheus & Grafana Integration
    6. PagerDuty Integration
    7. Custom Monitoring Dashboard