Usage Examples

Scraper API

Usage Examples

Complete code examples in Python, Node.js, and more showing how to integrate the Scraper API into your applications.

Python Examples

Basic Request

import requests

API_KEY = "YOUR_API_KEY"
BASE_URL = "https://scrape.evomi.com/api/v1/scraper/realtime"

def scrape_url(url):
    response = requests.get(
        f"{BASE_URL}?url={url}&api_key={API_KEY}"
    )
    
    if response.status_code == 200:
        return response.text
    else:
        print(f"Error: {response.status_code}")
        return None

# Use it
html = scrape_url("https://example.com")
print(html[:200])

JSON Response with Metadata

import requests

def scrape_with_metadata(url):
    response = requests.get(
        BASE_URL,
        params={
            "url": url,
            "delivery": "json",
            "include_content": True
        },
        headers={"x-api-key": API_KEY}
    )
    
    data = response.json()
    
    print(f"Title: {data['title']}")
    print(f"Status: {data['status_code']}")
    print(f"Credits Used: {data['credits_used']}")
    print(f"Mode: {data['mode_used']}")
    
    return data

result = scrape_with_metadata("https://example.com")

Browser Mode with Screenshot

import requests
import json

def scrape_with_screenshot(url):
    response = requests.post(
        BASE_URL,
        headers={
            "x-api-key": API_KEY,
            "Content-Type": "application/json"
        },
        json={
            "url": url,
            "mode": "browser",
            "screenshot": True,
            "delivery": "json"
        }
    )
    
    data = response.json()
    screenshot_url = data.get("screenshot_uri")
    
    print(f"Screenshot available at: {screenshot_url}")
    
    # Download screenshot
    img_response = requests.get(screenshot_url)
    with open("screenshot.png", "wb") as f:
        f.write(img_response.content)
    
    return data

scrape_with_screenshot("https://example.com")

Markdown Output for AI Processing

import requests

def get_markdown(url):
    response = requests.get(
        f"{BASE_URL}?url={url}&content=markdown&delivery=raw&api_key={API_KEY}"
    )
    
    return response.text

# Get clean text
markdown = get_markdown("https://blog.example.com/post")

# Feed to your AI model
# summary = openai.complete(f"Summarize: {markdown}")

JavaScript Automation

import requests
import json

def automated_search(search_term):
    response = requests.post(
        BASE_URL,
        headers={
            "x-api-key": API_KEY,
            "Content-Type": "application/json"
        },
        json={
            "url": "https://example.com",
            "mode": "browser",
            "js_instructions": [
                {"wait_for": "#search-input"},
                {"fill": ["#search-input", search_term]},
                {"click": "#search-button"},
                {"wait": 2000},
                {"wait_for": ".search-results"}
            ],
            "delivery": "json",
            "include_content": True
        }
    )
    
    data = response.json()
    return data['content']

results = automated_search("web scraping")
print(results[:500])

AI Enhancement

import requests
import json

def extract_article_data(url):
    response = requests.post(
        BASE_URL,
        headers={
            "x-api-key": API_KEY,
            "Content-Type": "application/json"
        },
        json={
            "url": url,
            "ai_enhance": True,
            "ai_source": "markdown",
            "ai_prompt": "Extract headline, author, date, and create 2-sentence summary as JSON",
            "delivery": "json"
        }
    )
    
    data = response.json()
    return data['ai_response']

article_data = extract_article_data("https://news.example.com/article")
print(json.dumps(article_data, indent=2))

Async Processing with Polling

import requests
import time

def submit_async_scrape(url):
    response = requests.post(
        BASE_URL,
        headers={
            "x-api-key": API_KEY,
            "Content-Type": "application/json"
        },
        json={
            "url": url,
            "async": True
        }
    )
    
    data = response.json()
    return data['task_id']

def check_task_status(task_id):
    url = f"https://scrape.evomi.com/api/v1/scraper/tasks/{task_id}"
    response = requests.get(url, headers={"x-api-key": API_KEY})
    return response.json()

def wait_for_result(task_id, max_wait=120):
    start_time = time.time()
    
    while time.time() - start_time < max_wait:
        status_data = check_task_status(task_id)
        
        if status_data['status'] == 'completed':
            return status_data['result']
        elif status_data['status'] == 'failed':
            raise Exception(f"Task failed: {status_data.get('error')}")
        
        time.sleep(2)  # Poll every 2 seconds
    
    raise TimeoutError("Task did not complete in time")

# Use it
task_id = submit_async_scrape("https://example.com")
print(f"Task submitted: {task_id}")

result = wait_for_result(task_id)
print(f"Content length: {len(result['content'])}")

Retry with Exponential Backoff

import requests
import time

def scrape_with_retry(url, max_retries=3):
    for attempt in range(max_retries):
        try:
            response = requests.get(
                f"{BASE_URL}?url={url}&api_key={API_KEY}",
                timeout=60
            )
            
            if response.status_code == 200:
                return response.text
            
            elif response.status_code == 429:
                # Rate limited
                data = response.json()
                wait_time = data.get("reset", 60)
                print(f"Rate limited. Waiting {wait_time}s...")
                time.sleep(wait_time)
                continue
            
            elif response.status_code >= 500:
                # Server error - retry with backoff
                if attempt < max_retries - 1:
                    wait_time = 2 ** attempt
                    print(f"Server error. Retrying in {wait_time}s...")
                    time.sleep(wait_time)
                    continue
            
            # Other errors - don't retry
            response.raise_for_status()
            
        except requests.exceptions.Timeout:
            if attempt < max_retries - 1:
                wait_time = 2 ** attempt
                print(f"Timeout. Retrying in {wait_time}s...")
                time.sleep(wait_time)
            else:
                raise
    
    return None

content = scrape_with_retry("https://example.com")

Node.js Examples

Basic Request

const axios = require('axios');

const API_KEY = 'YOUR_API_KEY';
const BASE_URL = 'https://scrape.evomi.com/api/v1/scraper/realtime';

async function scrapeUrl(url) {
  try {
    const response = await axios.get(
      `${BASE_URL}?url=${encodeURIComponent(url)}&api_key=${API_KEY}`
    );
    
    console.log('Credits used:', response.headers['x-credits-used']);
    return response.data;
  } catch (error) {
    console.error('Error:', error.response?.status, error.message);
    return null;
  }
}

// Use it
scrapeUrl('https://example.com').then(html => {
  console.log(html.substring(0, 200));
});

POST with JSON Configuration

const axios = require('axios');

async function scrapeWithConfig(url) {
  try {
    const response = await axios.post(BASE_URL, {
      url: url,
      mode: 'browser',
      proxy_type: 'residential',
      proxy_country: 'US',
      wait_until: 'networkidle',
      delivery: 'json'
    }, {
      headers: {
        'x-api-key': API_KEY,
        'Content-Type': 'application/json'
      }
    });
    
    const data = response.data;
    console.log(`Title: ${data.title}`);
    console.log(`Credits used: ${data.credits_used}`);
    console.log(`Mode used: ${data.mode_used}`);
    
    return data;
  } catch (error) {
    console.error('Error:', error.response?.data);
    throw error;
  }
}

scrapeWithConfig('https://example.com');

Batch Scraping with Promise.all

const axios = require('axios');

async function scrapeBatch(urls) {
  const promises = urls.map(url =>
    axios.get(
      `${BASE_URL}?url=${encodeURIComponent(url)}&delivery=json&api_key=${API_KEY}`
    ).then(res => ({
      url,
      title: res.data.title,
      credits: res.data.credits_used
    })).catch(err => ({
      url,
      error: err.message
    }))
  );
  
  return Promise.all(promises);
}

const urls = [
  'https://example.com/page1',
  'https://example.com/page2',
  'https://example.com/page3'
];

scrapeBatch(urls).then(results => {
  results.forEach(result => {
    if (result.error) {
      console.log(`❌ ${result.url}: ${result.error}`);
    } else {
      console.log(`✅ ${result.url}: ${result.title} (${result.credits} credits)`);
    }
  });
});

Async with Polling

const axios = require('axios');

async function submitAsyncScrape(url) {
  const response = await axios.post(BASE_URL, {
    url,
    async: true
  }, {
    headers: {
      'x-api-key': API_KEY,
      'Content-Type': 'application/json'
    }
  });
  
  return response.data.task_id;
}

async function waitForResult(taskId, maxWait = 120000) {
  const startTime = Date.now();
  const taskUrl = `https://scrape.evomi.com/api/v1/scraper/tasks/${taskId}`;
  
  while (Date.now() - startTime < maxWait) {
    const response = await axios.get(taskUrl, {
      headers: { 'x-api-key': API_KEY }
    });
    
    const data = response.data;
    
    if (data.status === 'completed') {
      return data.result;
    } else if (data.status === 'failed') {
      throw new Error(`Task failed: ${data.error}`);
    }
    
    // Wait 2 seconds before next check
    await new Promise(resolve => setTimeout(resolve, 2000));
  }
  
  throw new Error('Task did not complete in time');
}

// Use it
(async () => {
  const taskId = await submitAsyncScrape('https://example.com');
  console.log(`Task submitted: ${taskId}`);
  
  const result = await waitForResult(taskId);
  console.log(`Content length: ${result.content.length}`);
})();

cURL Examples

Simple GET

curl "https://scrape.evomi.com/api/v1/scraper/realtime?url=https://example.com&api_key=YOUR_API_KEY"

Save to File

curl "https://scrape.evomi.com/api/v1/scraper/realtime?url=https://example.com&api_key=YOUR_API_KEY" \
  -o page.html

JSON Delivery

curl "https://scrape.evomi.com/api/v1/scraper/realtime?url=https://example.com&delivery=json&api_key=YOUR_API_KEY" \
  | jq '.'

POST with Configuration

curl -X POST "https://scrape.evomi.com/api/v1/scraper/realtime" \
  -H "x-api-key: YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "url": "https://example.com",
    "mode": "browser",
    "proxy_type": "residential",
    "proxy_country": "US",
    "screenshot": true
  }' \
  -o screenshot.png

Markdown Output

curl "https://scrape.evomi.com/api/v1/scraper/realtime?url=https://blog.example.com/post&content=markdown&api_key=YOUR_API_KEY" \
  -o article.md

With JavaScript Automation

curl -X POST "https://scrape.evomi.com/api/v1/scraper/realtime" \
  -H "x-api-key: YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "url": "https://example.com",
    "mode": "browser",
    "js_instructions": [
      {"wait_for": "#button"},
      {"click": "#button"},
      {"wait": 2000}
    ]
  }'

Show Response Headers

curl -v "https://scrape.evomi.com/api/v1/scraper/realtime?url=https://example.com&api_key=YOUR_API_KEY" \
  2>&1 | grep -i "x-"

Best Practices

1. Use Environment Variables

import os

API_KEY = os.environ.get('EVOMI_API_KEY')
if not API_KEY:
    raise ValueError("EVOMI_API_KEY environment variable not set")

2. Implement Error Handling

try:
    response = scrape(url)
    response.raise_for_status()
except requests.exceptions.HTTPError as e:
    if e.response.status_code == 402:
        alert("Insufficient credits!")
    elif e.response.status_code == 429:
        time.sleep(60)  # Rate limited
except requests.exceptions.Timeout:
    log("Request timed out")

3. Monitor Credits

credits_remaining = float(response.headers.get('X-Credits-Remaining', 0))
if credits_remaining < 100:
    send_alert(f"Low credits: {credits_remaining}")

4. Use Appropriate Modes

# For static sites
scrape(url, mode='request', proxy_type='datacenter')  # 1 credit

# For JS-heavy sites
scrape(url, mode='browser', proxy_type='residential')  # 25 credits

# Let API decide
scrape(url, mode='auto')  # 1-27.5 credits depending on needs

5. Cache Results

import redis
import hashlib

cache = redis.Redis()

def scrape_cached(url, ttl=3600):
    cache_key = hashlib.md5(url.encode()).hexdigest()
    cached = cache.get(cache_key)
    
    if cached:
        return cached.decode()
    
    content = scrape(url)
    cache.setex(cache_key, ttl, content)
    return content

ℹ️

For more examples and language-specific SDKs, check our GitHub repository or contact support for assistance with your specific use case.

AI Enhancement