Skip to main content

Overview

The WhizoAI Python SDK provides a production-ready Python client for accessing WhizoAI’s web scraping, crawling, and AI-powered data extraction capabilities. Perfect for data science, automation, and backend applications.

Type Hints

Full type annotations for better IDE support and code quality

Async Support

Async/await support for concurrent operations

Simple API

Pythonic interface with intuitive method naming

Installation

pip install whizoai

Quick Start

from whizoai import WhizoAI

# Initialize the client
client = WhizoAI(api_key='wai_your_api_key_here')

# Scrape a webpage
result = client.scrape(
    url='https://example.com',
    options={
        'format': 'markdown',
        'onlyMainContent': True
    }
)

print(result['data']['content'])
print(f"Credits used: {result['creditsUsed']}")

Authentication

Get your API key from the WhizoAI Dashboard.
export WHIZOAI_API_KEY="wai_your_api_key_here"
import os
from whizoai import WhizoAI

client = WhizoAI(api_key=os.getenv('WHIZOAI_API_KEY'))

Direct Initialization

from whizoai import WhizoAI

client = WhizoAI(api_key='wai_your_api_key_here')

Core Features

Single Page Scraping

Extract content from any webpage in multiple formats:
result = client.scrape(
    url='https://example.com',
    options={
        'format': 'markdown',  # 'markdown', 'html', 'text', 'json'
        'onlyMainContent': True,
        'includeScreenshot': False,
        'includePDF': False,
        'waitFor': 0,  # milliseconds to wait
        'headers': {
            'User-Agent': 'Custom User Agent'
        }
    }
)

print(result['data']['content'])
print(result['data']['metadata']['title'])
print(f"Credits used: {result['creditsUsed']}")

Multi-Page Crawling

Crawl entire websites with depth and page limits:
result = client.crawl(
    url='https://example.com',
    options={
        'maxPages': 10,
        'maxDepth': 2,
        'allowedDomains': ['example.com'],
        'excludePaths': ['/admin', '/private'],
        'format': 'markdown',
        'onlyMainContent': True
    }
)

print(f"Crawled {len(result['data']['pages'])} pages")
for page in result['data']['pages']:
    print(f"{page['url']}: {page['content'][:100]}...")

AI-Powered Extraction

Extract structured data from webpages using AI:
result = client.extract(
    url='https://github.com/anthropics',
    options={
        'schema': {
            'companyName': 'string',
            'description': 'string',
            'mainProducts': ['string'],
            'teamSize': 'number'
        },
        'prompt': 'Extract information about this company'
    }
)

print(result['data']['extractedData'])
# {
#   'companyName': 'Anthropic',
#   'description': 'AI safety company...',
#   'mainProducts': ['Claude', 'Constitutional AI'],
#   'teamSize': 150
# }
Search the web with optional content scraping:
result = client.search(
    query='best web scraping tools 2025',
    options={
        'maxResults': 10,
        'scrapeResults': True,  # Scrape each result
        'searchEngine': 'google',  # 'google', 'bing', 'duckduckgo'
        'country': 'us',
        'language': 'en'
    }
)

print(f"Found {len(result['data']['results'])} results")
for item in result['data']['results']:
    print(f"{item['title']}: {item['url']}")
    if 'content' in item:
        print(f"Content: {item['content'][:200]}...")

Batch Operations

Process multiple URLs in parallel:
result = client.batch(
    urls=[
        'https://example.com',
        'https://example.com/about',
        'https://example.com/contact'
    ],
    scrape_type='scrape',
    options={
        'format': 'markdown',
        'onlyMainContent': True
    }
)

print(f"Total credits used: {result['totalCreditsUsed']}")
for i, item in enumerate(result['data']['results']):
    print(f"Result {i + 1}: {item['status']}")
    if 'data' in item:
        print(item['data']['content'][:100])

Job Management

List Jobs

jobs = client.list_jobs(
    limit=20,
    offset=0,
    status='completed',  # 'pending', 'running', 'completed', 'failed'
    scrape_type='scrape'  # Filter by type
)

print(f"Found {jobs['data']['total']} jobs")
for job in jobs['data']['jobs']:
    print(f"{job['id']}: {job['url']} - {job['status']}")

Get Job Details

job = client.get_job('job-id-here')

print(f"Status: {job['data']['status']}")
print(f"Progress: {job['data']['progress']}%")
print(f"Credits used: {job['data']['creditsUsed']}")

Cancel Job

client.cancel_job('job-id-here')
print('Job cancelled successfully')

Account Management

Check Credit Balance

credits = client.get_credit_balance()

print(f"Plan: {credits['data']['plan']}")
print(f"Monthly credits: {credits['data']['monthlyCredits']}")
print(f"Used this month: {credits['data']['creditsUsedThisMonth']}")
print(f"Remaining: {credits['data']['creditsRemaining']}")

Get User Profile

profile = client.get_user_profile()

print(f"Email: {profile['data']['email']}")
print(f"Name: {profile['data']['fullName']}")
print(f"Plan: {profile['data']['plan']}")

API Key Management

List API Keys

keys = client.list_api_keys()

for key in keys['data']:
    status = 'Active' if key['isActive'] else 'Inactive'
    print(f"{key['name']}: {key['maskedKey']} ({status})")
    print(f"  Last used: {key['lastUsedAt']}")
    print(f"  Usage: {key['usageCount']} requests")

Create API Key

new_key = client.create_api_key(
    name='Production API Key',
    scopes=['scrape', 'crawl', 'extract'],
    rate_limit_per_hour=100,
    expires_at='2025-12-31T23:59:59Z'  # Optional
)

print(f"New API key: {new_key['data']['apiKey']}")
print('⚠️  Save this key - you won\'t see it again!')

Error Handling

The SDK provides structured error handling:
from whizoai import (
    WhizoAI,
    WhizoAIError,
    AuthenticationError,
    ValidationError,
    InsufficientCreditsError,
    RateLimitError,
    NetworkError
)

try:
    result = client.scrape('https://example.com')
except AuthenticationError as e:
    print(f'Invalid API key: {e}')
except ValidationError as e:
    print(f'Invalid input: {e}')
except InsufficientCreditsError as e:
    print(f'Out of credits: {e}')
except RateLimitError as e:
    print(f'Rate limit exceeded: {e}')
except NetworkError as e:
    print(f'Network error: {e}')
except WhizoAIError as e:
    print(f'WhizoAI error: {e}')
except Exception as e:
    print(f'Unexpected error: {e}')

Async Support

The SDK supports async/await for concurrent operations:
import asyncio
from whizoai import AsyncWhizoAI

async def main():
    client = AsyncWhizoAI(api_key='wai_your_api_key_here')

    # Concurrent scraping
    tasks = [
        client.scrape('https://example.com/page1'),
        client.scrape('https://example.com/page2'),
        client.scrape('https://example.com/page3')
    ]

    results = await asyncio.gather(*tasks)

    for i, result in enumerate(results):
        print(f"Page {i+1}: {result['data']['metadata']['title']}")

asyncio.run(main())

Advanced Configuration

Custom API URL

For self-hosted or testing environments:
client = WhizoAI(
    api_key='wai_your_api_key_here',
    base_url='http://localhost:8080'  # Default: https://api.whizo.ai
)

Custom Timeout

client = WhizoAI(
    api_key='wai_your_api_key_here',
    timeout=60  # seconds (default: 30)
)

Retry Configuration

client = WhizoAI(
    api_key='wai_your_api_key_here',
    max_retries=5,  # Default: 3
    retry_delay=2.0  # Initial delay in seconds (default: 1.0)
)

Type Hints

The SDK includes comprehensive type hints for better IDE support:
from typing import Dict, List, Optional
from whizoai import WhizoAI
from whizoai.types import ScrapeOptions, ScrapeResponse

client: WhizoAI = WhizoAI(api_key='wai_your_api_key_here')

options: ScrapeOptions = {
    'format': 'markdown',
    'onlyMainContent': True,
    'includeScreenshot': False
}

result: ScrapeResponse = client.scrape('https://example.com', options)

pandas DataFrame

import pandas as pd
from whizoai import WhizoAI

client = WhizoAI(api_key='wai_your_api_key_here')

# Extract structured data
result = client.extract(
    url='https://example.com/products',
    options={
        'schema': {
            'products': [{
                'name': 'string',
                'price': 'number',
                'rating': 'number'
            }]
        }
    }
)

# Convert to DataFrame
df = pd.DataFrame(result['data']['extractedData']['products'])
print(df.head())

BeautifulSoup

from bs4 import BeautifulSoup
from whizoai import WhizoAI

client = WhizoAI(api_key='wai_your_api_key_here')

# Get HTML content
result = client.scrape(
    url='https://example.com',
    options={'format': 'html'}
)

# Parse with BeautifulSoup
soup = BeautifulSoup(result['data']['content'], 'html.parser')
titles = soup.find_all('h1')
for title in titles:
    print(title.text)

Credit Costs

OperationBase CostAdditional
Basic scraping1 credit-
Screenshot+1 creditPer page
PDF generation+1 creditPer page
AI extraction3-6 creditsPer page
Web search1 creditPer search
Stealth mode+4 creditsPer page

Rate Limits

Rate limits vary by subscription plan:
PlanRequests/HourRequests/Day
Free10100
Hobby50500
Standard2002,000
Growth5005,000
EnterpriseCustomCustom

Support