Skip to main content

Overview

The WhizoAI Python SDK provides a production-ready Python client for accessing WhizoAI’s web scraping, crawling, and AI-powered data extraction capabilities. Perfect for data science, automation, and backend applications.

Type Hints

Full type annotations for better IDE support and code quality

Async Support

Async/await support for concurrent operations

Simple API

Pythonic interface with intuitive method naming

Installation

pip install whizoai

Quick Start

from whizoai import WhizoAI

# Initialize the client
client = WhizoAI(api_key='wai_your_api_key_here')

# Scrape a webpage
result = client.scrape(
    url='https://example.com',
    options={
        'format': 'markdown',
        'onlyMainContent': True
    }
)

print(result['data']['content'])
print(f"Credits used: {result['creditsUsed']}")

Authentication

Get your API key from the WhizoAI Dashboard.
export WHIZOAI_API_KEY="wai_your_api_key_here"
import os
from whizoai import WhizoAI

client = WhizoAI(api_key=os.getenv('WHIZOAI_API_KEY'))

Direct Initialization

from whizoai import WhizoAI

client = WhizoAI(api_key='wai_your_api_key_here')

Core Features

Single Page Scraping

Extract content from any webpage in multiple formats:
result = client.scrape(
    url='https://example.com',
    options={
        'format': 'markdown',  # 'markdown', 'html', 'text', 'json'
        'onlyMainContent': True,
        'includeScreenshot': False,
        'includePDF': False,
        'waitFor': 0,  # milliseconds to wait
        'headers': {
            'User-Agent': 'Custom User Agent'
        }
    }
)

print(result['data']['content'])
print(result['data']['metadata']['title'])
print(f"Credits used: {result['creditsUsed']}")

Multi-Page Crawling

Crawl entire websites with depth and page limits:
result = client.crawl(
    url='https://example.com',
    options={
        'maxPages': 10,
        'maxDepth': 2,
        'allowedDomains': ['example.com'],
        'excludePaths': ['/admin', '/private'],
        'format': 'markdown',
        'onlyMainContent': True
    }
)

print(f"Crawled {len(result['data']['pages'])} pages")
for page in result['data']['pages']:
    print(f"{page['url']}: {page['content'][:100]}...")

AI-Powered Extraction

Extract structured data from webpages using AI:
result = client.extract(
    url='https://github.com/anthropics',
    options={
        'schema': {
            'companyName': 'string',
            'description': 'string',
            'mainProducts': ['string'],
            'teamSize': 'number'
        },
        'prompt': 'Extract information about this company'
    }
)

print(result['data']['extractedData'])
# {
#   'companyName': 'Anthropic',
#   'description': 'AI safety company...',
#   'mainProducts': ['Claude', 'Constitutional AI'],
#   'teamSize': 150
# }
Search the web with optional content scraping:
result = client.search(
    query='best web scraping tools 2025',
    options={
        'maxResults': 10,
        'scrapeResults': True,  # Scrape each result
        'searchEngine': 'google',  # 'google', 'bing', 'duckduckgo'
        'country': 'us',
        'language': 'en'
    }
)

print(f"Found {len(result['data']['results'])} results")
for item in result['data']['results']:
    print(f"{item['title']}: {item['url']}")
    if 'content' in item:
        print(f"Content: {item['content'][:200]}...")

Batch Operations

Process multiple URLs in parallel:
result = client.batch(
    urls=[
        'https://example.com',
        'https://example.com/about',
        'https://example.com/contact'
    ],
    scrape_type='scrape',
    options={
        'format': 'markdown',
        'onlyMainContent': True
    }
)

print(f"Total credits used: {result['totalCreditsUsed']}")
for i, item in enumerate(result['data']['results']):
    print(f"Result {i + 1}: {item['status']}")
    if 'data' in item:
        print(item['data']['content'][:100])

Job Management

List Jobs

jobs = client.list_jobs(
    limit=20,
    offset=0,
    status='completed',  # 'pending', 'running', 'completed', 'failed'
    scrape_type='scrape'  # Filter by type
)

print(f"Found {jobs['data']['total']} jobs")
for job in jobs['data']['jobs']:
    print(f"{job['id']}: {job['url']} - {job['status']}")

Get Job Details

job = client.get_job('job-id-here')

print(f"Status: {job['data']['status']}")
print(f"Progress: {job['data']['progress']}%")
print(f"Credits used: {job['data']['creditsUsed']}")

Cancel Job

client.cancel_job('job-id-here')
print('Job cancelled successfully')

Account Management

Check Credit Balance

credits = client.get_credit_balance()

print(f"Plan: {credits['data']['plan']}")
print(f"Monthly credits: {credits['data']['monthlyCredits']}")
print(f"Used this month: {credits['data']['creditsUsedThisMonth']}")
print(f"Remaining: {credits['data']['creditsRemaining']}")

Get User Profile

profile = client.get_user_profile()

print(f"Email: {profile['data']['email']}")
print(f"Name: {profile['data']['fullName']}")
print(f"Plan: {profile['data']['plan']}")

API Key Management

List API Keys

keys = client.list_api_keys()

for key in keys['data']:
    status = 'Active' if key['isActive'] else 'Inactive'
    print(f"{key['name']}: {key['maskedKey']} ({status})")
    print(f"  Last used: {key['lastUsedAt']}")
    print(f"  Usage: {key['usageCount']} requests")

Create API Key

new_key = client.create_api_key(
    name='Production API Key',
    scopes=['scrape', 'crawl', 'extract'],
    rate_limit_per_hour=100,
    expires_at='2025-12-31T23:59:59Z'  # Optional
)

print(f"New API key: {new_key['data']['apiKey']}")
print('⚠️  Save this key - you won\'t see it again!')

Error Handling

The SDK provides structured error handling:
from whizoai import (
    WhizoAI,
    WhizoAIError,
    AuthenticationError,
    ValidationError,
    InsufficientCreditsError,
    RateLimitError,
    NetworkError
)

try:
    result = client.scrape('https://example.com')
except AuthenticationError as e:
    print(f'Invalid API key: {e}')
except ValidationError as e:
    print(f'Invalid input: {e}')
except InsufficientCreditsError as e:
    print(f'Out of credits: {e}')
except RateLimitError as e:
    print(f'Rate limit exceeded: {e}')
except NetworkError as e:
    print(f'Network error: {e}')
except WhizoAIError as e:
    print(f'WhizoAI error: {e}')
except Exception as e:
    print(f'Unexpected error: {e}')

Async Support

The SDK supports async/await for concurrent operations:
import asyncio
from whizoai import AsyncWhizoAI

async def main():
    client = AsyncWhizoAI(api_key='wai_your_api_key_here')

    # Concurrent scraping
    tasks = [
        client.scrape('https://example.com/page1'),
        client.scrape('https://example.com/page2'),
        client.scrape('https://example.com/page3')
    ]

    results = await asyncio.gather(*tasks)

    for i, result in enumerate(results):
        print(f"Page {i+1}: {result['data']['metadata']['title']}")

asyncio.run(main())

Advanced Configuration

Custom API URL

For self-hosted or testing environments:
client = WhizoAI(
    api_key='wai_your_api_key_here',
    base_url='http://localhost:8080'  # Default: https://api.whizo.ai
)

Custom Timeout

client = WhizoAI(
    api_key='wai_your_api_key_here',
    timeout=60  # seconds (default: 30)
)

Retry Configuration

client = WhizoAI(
    api_key='wai_your_api_key_here',
    max_retries=5,  # Default: 3
    retry_delay=2.0  # Initial delay in seconds (default: 1.0)
)

Type Hints

The SDK includes comprehensive type hints for better IDE support:
from typing import Dict, List, Optional
from whizoai import WhizoAI
from whizoai.types import ScrapeOptions, ScrapeResponse

client: WhizoAI = WhizoAI(api_key='wai_your_api_key_here')

options: ScrapeOptions = {
    'format': 'markdown',
    'onlyMainContent': True,
    'includeScreenshot': False
}

result: ScrapeResponse = client.scrape('https://example.com', options)

pandas DataFrame

import pandas as pd
from whizoai import WhizoAI

client = WhizoAI(api_key='wai_your_api_key_here')

# Extract structured data
result = client.extract(
    url='https://example.com/products',
    options={
        'schema': {
            'products': [{
                'name': 'string',
                'price': 'number',
                'rating': 'number'
            }]
        }
    }
)

# Convert to DataFrame
df = pd.DataFrame(result['data']['extractedData']['products'])
print(df.head())

BeautifulSoup

from bs4 import BeautifulSoup
from whizoai import WhizoAI

client = WhizoAI(api_key='wai_your_api_key_here')

# Get HTML content
result = client.scrape(
    url='https://example.com',
    options={'format': 'html'}
)

# Parse with BeautifulSoup
soup = BeautifulSoup(result['data']['content'], 'html.parser')
titles = soup.find_all('h1')
for title in titles:
    print(title.text)

Credit Costs

OperationBase CostAdditional
Basic scraping1 credit-
Screenshot+1 creditPer page
PDF generation+1 creditPer page
AI extraction3-6 creditsPer page
Web search1 creditPer search
Stealth mode+4 creditsPer page

Rate Limits

Rate limits vary by subscription plan:
PlanRequests/HourRequests/Day
Free10100
Hobby50500
Standard2002,000
Growth5005,000
EnterpriseCustomCustom

PyPI Package

View on PyPI registry

GitHub Repository

View source code and contribute

API Reference

Explore all available endpoints

Get API Key

Generate your API key

Support