Initial commit
This commit is contained in:
13
.claude-plugin/plugin.json
Normal file
13
.claude-plugin/plugin.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"name": "integrations",
|
||||
"description": "Integration tools for external services including Databento market data and Discord",
|
||||
"version": "0.0.0-2025.11.28",
|
||||
"author": {
|
||||
"name": "Jeremy Miranda",
|
||||
"email": "jeremy@nicewolfstudio.com"
|
||||
},
|
||||
"skills": [
|
||||
"./skills/databento",
|
||||
"./skills/discord-integration"
|
||||
]
|
||||
}
|
||||
3
README.md
Normal file
3
README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# integrations
|
||||
|
||||
Integration tools for external services including Databento market data and Discord
|
||||
80
plugin.lock.json
Normal file
80
plugin.lock.json
Normal file
@@ -0,0 +1,80 @@
|
||||
{
|
||||
"$schema": "internal://schemas/plugin.lock.v1.json",
|
||||
"pluginId": "gh:Nice-Wolf-Studio/wolf-skills-marketplace:integrations",
|
||||
"normalized": {
|
||||
"repo": null,
|
||||
"ref": "refs/tags/v20251128.0",
|
||||
"commit": "12e800a554bfadd038595a71b7776a0a38651bcd",
|
||||
"treeHash": "44c1d5649e0f5708dd3771febb1a742710c37db26e73511eba344631e9fc0184",
|
||||
"generatedAt": "2025-11-28T10:12:13.786063Z",
|
||||
"toolVersion": "publish_plugins.py@0.2.0"
|
||||
},
|
||||
"origin": {
|
||||
"remote": "git@github.com:zhongweili/42plugin-data.git",
|
||||
"branch": "master",
|
||||
"commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
|
||||
"repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
|
||||
},
|
||||
"manifest": {
|
||||
"name": "integrations",
|
||||
"description": "Integration tools for external services including Databento market data and Discord"
|
||||
},
|
||||
"content": {
|
||||
"files": [
|
||||
{
|
||||
"path": "README.md",
|
||||
"sha256": "f655dcb261091d364c928cf0d14d8ed28faf71447982074d6ade130b354c8a9e"
|
||||
},
|
||||
{
|
||||
"path": ".claude-plugin/plugin.json",
|
||||
"sha256": "ec9f2ebd89e6d2bf899a59b3e959035b6d4ba19367d6e0e42105e2af34baabed"
|
||||
},
|
||||
{
|
||||
"path": "skills/databento/SKILL.md",
|
||||
"sha256": "cf864a3d93473d1d97692b0d57df80432a5f39d0885ae40646943a96ad7a727c"
|
||||
},
|
||||
{
|
||||
"path": "skills/databento/references/cost-optimization.md",
|
||||
"sha256": "0000588816dc82795f8dcf9ed12ceb2d96d086acbe1e89ee9f07cd3c085316ee"
|
||||
},
|
||||
{
|
||||
"path": "skills/databento/references/schemas.md",
|
||||
"sha256": "301d51d6bf320a571550284eeaabc968219abbfe2e27cc07e364e4156b80bf8b"
|
||||
},
|
||||
{
|
||||
"path": "skills/databento/references/api-parameters.md",
|
||||
"sha256": "13908d613d636e10827c38e3f914789904ad13af868b056126274d14687053c3"
|
||||
},
|
||||
{
|
||||
"path": "skills/databento/references/symbology.md",
|
||||
"sha256": "f55044c7a7ce320adcc843d3b972e5c9db6afbc14a5430f5a16f3bf6893ff2f8"
|
||||
},
|
||||
{
|
||||
"path": "skills/databento/scripts/fetch_ohlcv.py",
|
||||
"sha256": "a8321dddad8c7498da295c468b64cd7f2290653868fbe5fbc91c863056961f94"
|
||||
},
|
||||
{
|
||||
"path": "skills/databento/scripts/session_filter.py",
|
||||
"sha256": "8a64f4f468fe479052bff61f292faacb3346db1d6b011045fcfc68d457378696"
|
||||
},
|
||||
{
|
||||
"path": "skills/databento/scripts/validate_data.py",
|
||||
"sha256": "795a4b3d2818e5c816175056233097374743a6dd947f933e6f56719f08145c85"
|
||||
},
|
||||
{
|
||||
"path": "skills/discord-integration/examples.md",
|
||||
"sha256": "ea565e953cf1d0c307fe920234f3b7f73051547b0a116aac5f4baaec112bcb20"
|
||||
},
|
||||
{
|
||||
"path": "skills/discord-integration/SKILL.md",
|
||||
"sha256": "071fa973408def21e885fe395dcf82a4433a70751f8c53516bd8a6c3eeb4b341"
|
||||
}
|
||||
],
|
||||
"dirSha256": "44c1d5649e0f5708dd3771febb1a742710c37db26e73511eba344631e9fc0184"
|
||||
},
|
||||
"security": {
|
||||
"scannedAt": null,
|
||||
"scannerVersion": null,
|
||||
"flags": []
|
||||
}
|
||||
}
|
||||
393
skills/databento/SKILL.md
Normal file
393
skills/databento/SKILL.md
Normal file
@@ -0,0 +1,393 @@
|
||||
---
|
||||
name: databento
|
||||
description: Use when working with ES/NQ futures market data, before calling any Databento API - follow mandatory four-step workflow (cost check, availability check, fetch, validate); prevents costly API errors and ensures data quality
|
||||
version: 1.0.1
|
||||
triggers:
|
||||
- "ES futures"
|
||||
- "NQ futures"
|
||||
- "market data"
|
||||
- "databento"
|
||||
- "historical prices"
|
||||
- "order flow"
|
||||
- "mcp__databento"
|
||||
---
|
||||
|
||||
# Databento - ES/NQ Futures Market Data Analysis
|
||||
|
||||
## Overview
|
||||
|
||||
Use the databento skill for ES/NQ futures analysis with the Databento market data platform. The skill provides immediate access to critical reference information (schemas, symbology, datasets) and reusable code patterns to eliminate repeated documentation lookups and API usage errors.
|
||||
|
||||
**Primary focus:** ES (E-mini S&P 500) and NQ (E-mini Nasdaq-100) futures analysis
|
||||
**Secondary focus:** Equity market breadth indicators when supporting futures analysis
|
||||
**Priority 1:** Knowledge and workflows to prevent wasted cycles
|
||||
**Priority 2:** Reusable scripts for common data operations
|
||||
|
||||
## When to Use This Skill
|
||||
|
||||
Trigger this skill when:
|
||||
- User mentions ES, NQ, or futures analysis
|
||||
- User asks to fetch market data or historical prices
|
||||
- User wants to backtest a trading strategy
|
||||
- User asks about databento schemas, datasets, or symbology
|
||||
- User needs order flow or market microstructure analysis
|
||||
- About to use any `mcp__databento__*` MCP tool
|
||||
|
||||
## When NOT to Use This Skill
|
||||
|
||||
Don't use this skill for:
|
||||
- Real-time streaming data (use WebSocket connections directly, not REST API)
|
||||
- Options or spread analysis (limited support in current skill)
|
||||
- Non-CME futures exchanges (skill focuses on GLBX.MDP3 dataset)
|
||||
- Equities-only analysis (use equity-specific tools unless correlating with futures)
|
||||
- Data you already have cached (don't re-fetch repeatedly)
|
||||
|
||||
## The Four Steps (MANDATORY - NO EXCEPTIONS)
|
||||
|
||||
**You MUST complete each step before proceeding to the next. Skipping steps leads to wasted API calls, unexpected costs, or missing data.**
|
||||
|
||||
### Step 1: Check Cost BEFORE Fetching (REQUIRED)
|
||||
|
||||
**BEFORE any data fetch, estimate cost** using `mcp__databento__metadata_get_cost`.
|
||||
|
||||
Parameters needed:
|
||||
- dataset (e.g., "GLBX.MDP3")
|
||||
- start date (YYYY-MM-DD)
|
||||
- end date (optional)
|
||||
- symbols (e.g., "ES.c.0")
|
||||
- schema (e.g., "ohlcv-1h")
|
||||
|
||||
**Why:** Prevents unexpected charges and helps optimize data requests.
|
||||
|
||||
**Gate:** You cannot proceed to Step 3 (fetch) without completing this cost check.
|
||||
|
||||
### Step 2: Validate Dataset Availability (REQUIRED)
|
||||
|
||||
Check that data exists for your requested date range using `mcp__databento__metadata_get_dataset_range`.
|
||||
|
||||
Parameters needed:
|
||||
- dataset (e.g., "GLBX.MDP3")
|
||||
|
||||
**Why:** Returns the available date range so you don't request data that doesn't exist.
|
||||
|
||||
**Gate:** If your requested date range is outside the available range, STOP and adjust your request.
|
||||
|
||||
### Step 3: Fetch Data Appropriately (REQUIRED)
|
||||
|
||||
Choose the right tool based on data size:
|
||||
|
||||
**For small/quick requests (< 5GB, typically < 1 day tick data):**
|
||||
- Use `mcp__databento__timeseries_get_range`
|
||||
- Default limit: 100 records (use limit parameter to adjust)
|
||||
- Returns data directly in response
|
||||
|
||||
**For large requests (> 5GB, multi-day tick data):**
|
||||
- Use `mcp__databento__batch_submit_job`
|
||||
- Poll status with `mcp__databento__batch_list_jobs`
|
||||
- Download with `mcp__databento__batch_download`
|
||||
|
||||
**Gate:** If fetch returns an error, DO NOT retry without checking Steps 1 and 2 first.
|
||||
|
||||
### Step 4: Validate Data Post-Fetch (REQUIRED)
|
||||
|
||||
After receiving data, always validate:
|
||||
- Check for timestamp gaps
|
||||
- Verify expected record counts
|
||||
- Validate price ranges (no negative prices, no extreme outliers)
|
||||
- Check for duplicate timestamps
|
||||
|
||||
Use `scripts/validate_data.py` for automated validation.
|
||||
|
||||
**Gate:** Do not proceed with analysis until validation passes.
|
||||
|
||||
## Red Flags - STOP
|
||||
|
||||
If you catch yourself:
|
||||
- ❌ Fetching data without checking cost first
|
||||
- ❌ Assuming data exists for your date range without checking
|
||||
- ❌ Using `timeseries_get_range` for multi-day tick data (> 5GB)
|
||||
- ❌ Skipping post-fetch validation
|
||||
- ❌ Making multiple identical API calls (cache your data!)
|
||||
- ❌ Using wrong `stype_in` for continuous contracts
|
||||
- ❌ Requesting data in wrong date format (not YYYY-MM-DD)
|
||||
|
||||
**STOP. Return to The Four Steps. Follow them in order.**
|
||||
|
||||
## Verification Checklist
|
||||
|
||||
Before marking data work complete:
|
||||
|
||||
- [ ] Cost estimated and acceptable
|
||||
- [ ] Dataset availability confirmed for date range
|
||||
- [ ] Appropriate fetch method chosen (timeseries vs batch)
|
||||
- [ ] Data fetched successfully
|
||||
- [ ] Post-fetch validation passed (no gaps, valid prices, expected count)
|
||||
- [ ] Data cached locally (not fetching repeatedly)
|
||||
|
||||
Can't check all boxes? A step was skipped. Review The Four Steps above.
|
||||
|
||||
## Quick Reference: Essential Information
|
||||
|
||||
### Primary Dataset
|
||||
**GLBX.MDP3** - CME Globex MDP 3.0 (for ES/NQ futures)
|
||||
|
||||
### Common Schemas
|
||||
|
||||
| Schema | Description | When to Use | Typical Limit |
|
||||
|--------|-------------|-------------|---------------|
|
||||
| `ohlcv-1h` | 1-hour OHLCV bars | Multi-day backtesting | 100 bars |
|
||||
| `ohlcv-1d` | Daily OHLCV bars | Long-term analysis | 100 bars |
|
||||
| `trades` | Individual trades | Intraday analysis, order flow | Use batch for > 1 day |
|
||||
| `mbp-1` | Top of book (L1) | Bid/ask spread, microstructure | Use batch for > 1 day |
|
||||
| `mbp-10` | 10 levels of depth (L2) | Order book analysis | Use batch for > 1 day |
|
||||
|
||||
### ES/NQ Symbol Patterns
|
||||
|
||||
| Symbol | Description | Example Use Case |
|
||||
|--------|-------------|------------------|
|
||||
| `ES.c.0` | ES front month continuous (calendar roll) | Standard backtesting |
|
||||
| `NQ.c.0` | NQ front month continuous (calendar roll) | Standard backtesting |
|
||||
| `ES.n.0` | ES front month (open interest roll) | Avoiding roll timing issues |
|
||||
| `ESH5` | Specific contract (Mar 2025) | Analyzing specific expiration |
|
||||
| `ES.c.1` | ES second month continuous | Spread analysis |
|
||||
|
||||
**Roll Strategies:**
|
||||
- `.c.X` = Calendar-based roll (switches on fixed dates)
|
||||
- `.n.X` = Open interest-based roll (switches when OI moves)
|
||||
- `.v.X` = Volume-based roll (switches when volume moves)
|
||||
|
||||
### Common Symbology Types (stypes)
|
||||
|
||||
| Stype | Description | When to Use |
|
||||
|-------|-------------|-------------|
|
||||
| `raw_symbol` | Native exchange symbol | When you have exact contract codes |
|
||||
| `instrument_id` | Databento's numeric ID | After symbol resolution |
|
||||
| `continuous` | Continuous contract notation | For backtesting across rolls |
|
||||
| `parent` | Parent contract symbol | For options or complex instruments |
|
||||
|
||||
## MCP Tool Selection Guide
|
||||
|
||||
### For Current/Live Data
|
||||
|
||||
**Get current ES/NQ quote:**
|
||||
```
|
||||
mcp__databento__get_futures_quote
|
||||
- symbol: "ES" or "NQ"
|
||||
```
|
||||
|
||||
**Get current trading session:**
|
||||
```
|
||||
mcp__databento__get_session_info
|
||||
- timestamp: (optional, defaults to now)
|
||||
```
|
||||
|
||||
**Get recent historical bars:**
|
||||
```
|
||||
mcp__databento__get_historical_bars
|
||||
- symbol: "ES" or "NQ"
|
||||
- timeframe: "1h", "H4", or "1d"
|
||||
- count: number of bars (max 100)
|
||||
```
|
||||
|
||||
### For Historical Data Analysis
|
||||
|
||||
**Timeseries (< 5GB, direct response):**
|
||||
```
|
||||
mcp__databento__timeseries_get_range
|
||||
- dataset: "GLBX.MDP3"
|
||||
- symbols: "ES.c.0,NQ.c.0" (comma-separated, max 2000)
|
||||
- schema: "ohlcv-1h", "trades", "mbp-1", etc.
|
||||
- start: "2024-01-01" (YYYY-MM-DD or ISO 8601)
|
||||
- end: "2024-01-31" (optional)
|
||||
- limit: number of records (optional)
|
||||
```
|
||||
|
||||
**Batch Download (> 5GB, async processing):**
|
||||
```
|
||||
mcp__databento__batch_submit_job
|
||||
- dataset: "GLBX.MDP3"
|
||||
- symbols: ["ES.c.0", "NQ.c.0"] (array, max 2000)
|
||||
- schema: "trades", "mbp-1", etc.
|
||||
- start: "2024-01-01"
|
||||
- end: "2024-12-31"
|
||||
- encoding: "dbn" (native), "csv", or "json"
|
||||
- compression: "zstd" (default), "gzip", or "none"
|
||||
```
|
||||
|
||||
Then monitor with `mcp__databento__batch_list_jobs` and download with `mcp__databento__batch_download`.
|
||||
|
||||
### For Symbol Resolution
|
||||
|
||||
**Resolve symbols between types:**
|
||||
```
|
||||
mcp__databento__symbology_resolve
|
||||
- dataset: "GLBX.MDP3"
|
||||
- symbols: ["ES.c.0", "NQ.c.0"]
|
||||
- stype_in: "continuous" (input type)
|
||||
- stype_out: "instrument_id" (output type)
|
||||
- start_date: "2024-01-01"
|
||||
- end_date: "2024-12-31" (optional)
|
||||
```
|
||||
|
||||
### For Metadata Discovery
|
||||
|
||||
**List available schemas:**
|
||||
```
|
||||
mcp__databento__metadata_list_schemas
|
||||
- dataset: "GLBX.MDP3"
|
||||
```
|
||||
|
||||
**Get dataset date range:**
|
||||
```
|
||||
mcp__databento__metadata_get_dataset_range
|
||||
- dataset: "GLBX.MDP3"
|
||||
```
|
||||
|
||||
**Estimate cost:**
|
||||
```
|
||||
mcp__databento__metadata_get_cost
|
||||
- dataset: "GLBX.MDP3"
|
||||
- start: "2024-01-01"
|
||||
- end: "2024-01-31" (optional)
|
||||
- symbols: "ES.c.0"
|
||||
- schema: "ohlcv-1h"
|
||||
```
|
||||
|
||||
## Analysis Workflow Patterns
|
||||
|
||||
### Historical Backtesting (OHLCV)
|
||||
1. Check cost for date range
|
||||
2. Fetch OHLCV data (1h, 4h, or 1d timeframe)
|
||||
3. Validate data completeness
|
||||
4. Perform analysis
|
||||
5. Consider using `scripts/fetch_ohlcv.py` for standard pattern
|
||||
|
||||
**Typical request:**
|
||||
- Schema: `ohlcv-1h` or `ohlcv-1d`
|
||||
- Symbols: `ES.c.0` or `NQ.c.0`
|
||||
- Limit: 100 bars per request (adjust as needed)
|
||||
|
||||
### Intraday Order Flow Analysis
|
||||
1. Check cost (important for tick data!)
|
||||
2. Use batch job for multi-day tick data
|
||||
3. Fetch trades or mbp-1 schema
|
||||
4. Filter by trading session if needed (use `scripts/session_filter.py`)
|
||||
5. Validate tick data completeness
|
||||
|
||||
**Typical request:**
|
||||
- Schema: `trades` or `mbp-1`
|
||||
- Use batch download for > 1 day of data
|
||||
- Consider session filtering for session-specific analysis
|
||||
|
||||
### Cross-Market Analysis (ES/NQ + Equities)
|
||||
1. Fetch ES/NQ data from GLBX.MDP3
|
||||
2. Fetch equity breadth from XNAS.ITCH (Nasdaq dataset)
|
||||
3. Align timestamps for correlation
|
||||
4. Perform cross-market analysis
|
||||
|
||||
**Datasets needed:**
|
||||
- GLBX.MDP3 (ES/NQ futures)
|
||||
- XNAS.ITCH (Nasdaq equities)
|
||||
|
||||
## Reference Files
|
||||
|
||||
Load these reference files as needed for detailed information:
|
||||
|
||||
### references/schemas.md
|
||||
Comprehensive field-level documentation for all schemas (trades, mbp-1, ohlcv).
|
||||
|
||||
**Load when:** Need to understand specific fields, data types, or schema structure.
|
||||
|
||||
### references/symbology.md
|
||||
Detailed symbology guide with continuous contracts, roll strategies, and expiration handling.
|
||||
|
||||
**Load when:** Working with continuous contracts, need to understand roll timing, or resolving symbol types.
|
||||
|
||||
### references/api-parameters.md
|
||||
Complete parameter reference for all MCP tools with enum values and format requirements.
|
||||
|
||||
**Load when:** Uncertain about parameter formats, enum values, or tool-specific requirements.
|
||||
|
||||
### references/cost-optimization.md
|
||||
Strategies for minimizing costs including T+1 data usage and batch optimization.
|
||||
|
||||
**Load when:** Working with large datasets or need to optimize data costs.
|
||||
|
||||
## Reusable Scripts
|
||||
|
||||
### scripts/fetch_ohlcv.py
|
||||
Standard pattern for fetching OHLCV data with built-in cost checks, error handling, and validation.
|
||||
|
||||
**Use when:** Fetching OHLCV bars for backtesting or analysis.
|
||||
|
||||
**Features:**
|
||||
- Automatic cost estimation before fetch
|
||||
- Error handling with retries
|
||||
- Post-fetch data validation
|
||||
- Export to CSV/pandas options
|
||||
|
||||
### scripts/validate_data.py
|
||||
Data quality validation to catch issues early.
|
||||
|
||||
**Use when:** After fetching any market data.
|
||||
|
||||
**Features:**
|
||||
- Timestamp gap detection
|
||||
- Record count verification
|
||||
- Price range validation
|
||||
- Summary quality report
|
||||
|
||||
### scripts/session_filter.py
|
||||
Filter data by trading session (Asian/London/NY).
|
||||
|
||||
**Use when:** Performing session-specific analysis.
|
||||
|
||||
**Features:**
|
||||
- Session detection using get_session_info
|
||||
- Historical data filtering by session
|
||||
- Session transition handling
|
||||
- Session-specific statistics
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Always check cost first** - Prevents surprises and helps optimize requests
|
||||
2. **Use continuous contracts for backtesting** - Avoids roll gaps in analysis
|
||||
3. **Validate data quality** - Catch issues before running analysis
|
||||
4. **Use batch jobs for large data** - More efficient for > 5GB requests
|
||||
5. **Cache reusable data** - Don't re-fetch the same data repeatedly
|
||||
6. **Consider T+1 data** - Historical data (24+ hours old) has lower costs
|
||||
7. **Use appropriate schema** - Match schema granularity to analysis needs
|
||||
8. **Filter by session when relevant** - Session-based patterns are important for ES/NQ
|
||||
|
||||
---
|
||||
|
||||
## After Using This Skill
|
||||
|
||||
**REQUIRED NEXT STEPS:**
|
||||
|
||||
1. **Validate data quality** - Use verification checklist (Step 4) to confirm data integrity
|
||||
2. **Cache results** - Save fetched data locally to avoid redundant API calls and costs
|
||||
3. **Document assumptions** - Record roll strategy, schema choice, date range in analysis notes
|
||||
|
||||
**OPTIONAL NEXT STEPS:**
|
||||
|
||||
- **Cost tracking** - Log actual cost vs estimate for future budget planning
|
||||
- **Performance notes** - Document fetch time and data volume for optimization
|
||||
- **Quality metrics** - Track data completeness, gaps, or anomalies for future reference
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
**v1.0.1** (2025-11-14)
|
||||
- Added structured frontmatter with triggers list
|
||||
- Added "When NOT to Use" section
|
||||
- Strengthened "The Four Steps" with MANDATORY language and gates
|
||||
- Added "Red Flags - STOP" section
|
||||
- Added "Verification Checklist"
|
||||
- Improved description to follow superpowers pattern
|
||||
|
||||
**v1.0.0** (2025-11-06)
|
||||
- Initial databento skill creation
|
||||
- Comprehensive reference tables and MCP tool guide
|
||||
- Bundled resources (references and scripts)
|
||||
541
skills/databento/references/api-parameters.md
Normal file
541
skills/databento/references/api-parameters.md
Normal file
@@ -0,0 +1,541 @@
|
||||
# Databento API Parameters Reference
|
||||
|
||||
Complete parameter reference for all Databento MCP tools with accepted values, formats, and requirements.
|
||||
|
||||
## Date and Time Formats
|
||||
|
||||
### Date Format
|
||||
**Accepted formats:**
|
||||
- `YYYY-MM-DD` (e.g., "2024-01-15")
|
||||
- ISO 8601 with time (e.g., "2024-01-15T14:30:00Z")
|
||||
|
||||
**Important:**
|
||||
- Dates are in UTC timezone
|
||||
- Inclusive for `start`, exclusive for `end`
|
||||
- Time portion is optional
|
||||
|
||||
### Timestamp Format
|
||||
**Accepted formats:**
|
||||
- ISO 8601 string: "2024-01-15T14:30:00Z"
|
||||
- Unix timestamp (seconds): 1705329000
|
||||
- Unix timestamp (nanoseconds): 1705329000000000000
|
||||
|
||||
## Schema Parameter
|
||||
|
||||
Valid schema values for historical data requests.
|
||||
|
||||
### OHLCV Schemas
|
||||
```
|
||||
"ohlcv-1s" # 1-second bars
|
||||
"ohlcv-1m" # 1-minute bars
|
||||
"ohlcv-1h" # 1-hour bars
|
||||
"ohlcv-1d" # Daily bars
|
||||
"ohlcv-eod" # End-of-day bars
|
||||
```
|
||||
|
||||
### Trade and Quote Schemas
|
||||
```
|
||||
"trades" # Individual trades
|
||||
"mbp-1" # Market by price - level 1 (top of book)
|
||||
"mbp-10" # Market by price - 10 levels of depth
|
||||
"mbo" # Market by order - level 3 (order-level)
|
||||
"tbbo" # Top of book best bid/offer
|
||||
```
|
||||
|
||||
### Metadata Schemas
|
||||
```
|
||||
"definition" # Instrument definitions and metadata
|
||||
"statistics" # Market statistics
|
||||
"status" # Trading status changes
|
||||
"imbalance" # Order imbalance data
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# timeseries_get_range
|
||||
schema="ohlcv-1h"
|
||||
|
||||
# batch_submit_job
|
||||
schema="trades"
|
||||
```
|
||||
|
||||
## Symbology Type (stype) Parameter
|
||||
|
||||
Used for symbol input and output format specification.
|
||||
|
||||
### stype_in (Input Symbol Type)
|
||||
|
||||
```
|
||||
"raw_symbol" # Native exchange symbols (ESH5, AAPL)
|
||||
"instrument_id" # Databento numeric IDs
|
||||
"continuous" # Continuous contracts (ES.c.0)
|
||||
"parent" # Parent symbols (ES, NQ)
|
||||
"nasdaq" # Nasdaq symbology
|
||||
"cms" # CMS symbology
|
||||
"bats" # BATS symbology
|
||||
"smart" # Smart routing symbols
|
||||
```
|
||||
|
||||
### stype_out (Output Symbol Type)
|
||||
|
||||
Same values as `stype_in`.
|
||||
|
||||
**Common Patterns:**
|
||||
```python
|
||||
# Continuous to instrument_id (most common)
|
||||
stype_in="continuous"
|
||||
stype_out="instrument_id"
|
||||
|
||||
# Raw symbol to instrument_id
|
||||
stype_in="raw_symbol"
|
||||
stype_out="instrument_id"
|
||||
|
||||
# Continuous to raw symbol (see current contract)
|
||||
stype_in="continuous"
|
||||
stype_out="raw_symbol"
|
||||
```
|
||||
|
||||
**Important:** Always match stype_in to your actual symbol format:
|
||||
- `"ES.c.0"` → stype_in="continuous"
|
||||
- `"ESH5"` → stype_in="raw_symbol"
|
||||
- `123456` → stype_in="instrument_id"
|
||||
|
||||
## Dataset Parameter
|
||||
|
||||
Dataset codes identify the data source and venue.
|
||||
|
||||
### Common Datasets
|
||||
|
||||
**Futures (CME):**
|
||||
```
|
||||
"GLBX.MDP3" # CME Globex - ES, NQ, and other CME futures
|
||||
```
|
||||
|
||||
**Equities:**
|
||||
```
|
||||
"XNAS.ITCH" # Nasdaq - all Nasdaq-listed stocks
|
||||
"XNYS.PILLAR" # NYSE - NYSE-listed stocks
|
||||
"XCHI.PILLAR" # Chicago Stock Exchange
|
||||
"BATS.PITCH" # BATS exchange
|
||||
"IEXG.TOPS" # IEX exchange
|
||||
```
|
||||
|
||||
**Options:**
|
||||
```
|
||||
"OPRA.PILLAR" # US equity options
|
||||
```
|
||||
|
||||
**Crypto:**
|
||||
```
|
||||
"DBEQ.BASIC" # Databento equities (subset)
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# ES/NQ futures
|
||||
dataset="GLBX.MDP3"
|
||||
|
||||
# Nasdaq equities
|
||||
dataset="XNAS.ITCH"
|
||||
```
|
||||
|
||||
## Symbols Parameter
|
||||
|
||||
### Format Variations
|
||||
|
||||
**String (comma-separated):**
|
||||
```python
|
||||
symbols="ES.c.0,NQ.c.0,GC.c.0"
|
||||
```
|
||||
|
||||
**Array:**
|
||||
```python
|
||||
symbols=["ES.c.0", "NQ.c.0", "GC.c.0"]
|
||||
```
|
||||
|
||||
**Single symbol:**
|
||||
```python
|
||||
symbols="ES.c.0"
|
||||
# or
|
||||
symbols=["ES.c.0"]
|
||||
```
|
||||
|
||||
### Limits
|
||||
- Maximum: 2000 symbols per request
|
||||
- Must match stype_in format
|
||||
|
||||
### Symbol Wildcards
|
||||
|
||||
Some endpoints support wildcards:
|
||||
```
|
||||
"ES*" # All ES contracts
|
||||
"*" # All instruments (use with caution)
|
||||
```
|
||||
|
||||
## Encoding Parameter (Batch Jobs)
|
||||
|
||||
Output format for batch download jobs.
|
||||
|
||||
```
|
||||
"dbn" # Databento Binary (native format, most efficient)
|
||||
"csv" # Comma-separated values
|
||||
"json" # JSON format
|
||||
```
|
||||
|
||||
**Recommendations:**
|
||||
- `"dbn"` - Best for large datasets, fastest processing
|
||||
- `"csv"` - Good for spreadsheet analysis
|
||||
- `"json"` - Good for custom parsing, human-readable
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# batch_submit_job
|
||||
encoding="dbn"
|
||||
```
|
||||
|
||||
## Compression Parameter (Batch Jobs)
|
||||
|
||||
Compression algorithm for batch downloads.
|
||||
|
||||
```
|
||||
"zstd" # Zstandard (default, best compression)
|
||||
"gzip" # Gzip (widely supported)
|
||||
"none" # No compression
|
||||
```
|
||||
|
||||
**Recommendations:**
|
||||
- `"zstd"` - Best compression ratio, fastest
|
||||
- `"gzip"` - Good compatibility
|
||||
- `"none"` - Only for small datasets or testing
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# batch_submit_job
|
||||
compression="zstd"
|
||||
```
|
||||
|
||||
## Limit Parameter
|
||||
|
||||
Maximum number of records to return.
|
||||
|
||||
**Default:** 100 (varies by tool)
|
||||
**Maximum:** No hard limit, but consider:
|
||||
- Timeseries: practical limit ~10M records
|
||||
- Batch jobs: unlimited but affects processing time
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# timeseries_get_range
|
||||
limit=1000 # Return up to 1000 records
|
||||
```
|
||||
|
||||
**Important:** For large datasets, use batch jobs instead of increasing limit.
|
||||
|
||||
## Timeframe Parameter (get_historical_bars)
|
||||
|
||||
Specific to the `get_historical_bars` convenience tool.
|
||||
|
||||
```
|
||||
"1h" # 1-hour bars
|
||||
"H4" # 4-hour bars (alternative notation)
|
||||
"1d" # Daily bars
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# get_historical_bars (ES/NQ only)
|
||||
timeframe="1h"
|
||||
count=100
|
||||
```
|
||||
|
||||
## Symbol Parameter (get_futures_quote)
|
||||
|
||||
Specific to the `get_futures_quote` tool.
|
||||
|
||||
```
|
||||
"ES" # E-mini S&P 500
|
||||
"NQ" # E-mini Nasdaq-100
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# get_futures_quote
|
||||
symbol="ES"
|
||||
```
|
||||
|
||||
**Note:** Uses root symbol only, not full contract code.
|
||||
|
||||
## Split Parameters (Batch Jobs)
|
||||
|
||||
Control how batch job output files are split.
|
||||
|
||||
### split_duration
|
||||
```
|
||||
"day" # One file per day
|
||||
"week" # One file per week
|
||||
"month" # One file per month
|
||||
"none" # Single file (default)
|
||||
```
|
||||
|
||||
### split_size
|
||||
```
|
||||
split_size=1000000000 # Split at 1GB
|
||||
split_size=5000000000 # Split at 5GB
|
||||
```
|
||||
|
||||
### split_symbols
|
||||
```
|
||||
split_symbols=True # One file per symbol
|
||||
split_symbols=False # All symbols in same file (default)
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# batch_submit_job
|
||||
split_duration="day" # Daily files
|
||||
split_symbols=True # Separate file per symbol
|
||||
```
|
||||
|
||||
## Filter Parameters
|
||||
|
||||
### State Filter (list_jobs)
|
||||
```
|
||||
states=["received", "queued", "processing", "done", "expired"]
|
||||
```
|
||||
|
||||
### Time Filter (list_jobs)
|
||||
```
|
||||
since="2024-01-01T00:00:00Z" # Jobs since this timestamp
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# batch_list_jobs
|
||||
states=["done", "processing"]
|
||||
since="2024-01-01"
|
||||
```
|
||||
|
||||
## Mode Parameter (get_cost)
|
||||
|
||||
Query mode for cost estimation.
|
||||
|
||||
```
|
||||
"historical" # Historical data (default)
|
||||
"historical-streaming" # Streaming historical
|
||||
"live" # Live data
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# metadata_get_cost
|
||||
mode="historical"
|
||||
```
|
||||
|
||||
## Complete Parameter Examples
|
||||
|
||||
### timeseries_get_range
|
||||
```python
|
||||
{
|
||||
"dataset": "GLBX.MDP3",
|
||||
"symbols": "ES.c.0,NQ.c.0",
|
||||
"schema": "ohlcv-1h",
|
||||
"start": "2024-01-01",
|
||||
"end": "2024-01-31",
|
||||
"stype_in": "continuous",
|
||||
"stype_out": "instrument_id",
|
||||
"limit": 1000
|
||||
}
|
||||
```
|
||||
|
||||
### batch_submit_job
|
||||
```python
|
||||
{
|
||||
"dataset": "GLBX.MDP3",
|
||||
"symbols": ["ES.c.0", "NQ.c.0"],
|
||||
"schema": "trades",
|
||||
"start": "2024-01-01",
|
||||
"end": "2024-12-31",
|
||||
"stype_in": "continuous",
|
||||
"stype_out": "instrument_id",
|
||||
"encoding": "dbn",
|
||||
"compression": "zstd",
|
||||
"split_duration": "day",
|
||||
"split_symbols": False
|
||||
}
|
||||
```
|
||||
|
||||
### symbology_resolve
|
||||
```python
|
||||
{
|
||||
"dataset": "GLBX.MDP3",
|
||||
"symbols": ["ES.c.0", "NQ.c.0"],
|
||||
"stype_in": "continuous",
|
||||
"stype_out": "instrument_id",
|
||||
"start_date": "2024-01-01",
|
||||
"end_date": "2024-12-31"
|
||||
}
|
||||
```
|
||||
|
||||
### metadata_get_cost
|
||||
```python
|
||||
{
|
||||
"dataset": "GLBX.MDP3",
|
||||
"start": "2024-01-01",
|
||||
"end": "2024-01-31",
|
||||
"symbols": "ES.c.0",
|
||||
"schema": "ohlcv-1h",
|
||||
"stype_in": "continuous",
|
||||
"mode": "historical"
|
||||
}
|
||||
```
|
||||
|
||||
### get_futures_quote
|
||||
```python
|
||||
{
|
||||
"symbol": "ES" # or "NQ"
|
||||
}
|
||||
```
|
||||
|
||||
### get_session_info
|
||||
```python
|
||||
{
|
||||
"timestamp": "2024-01-15T14:30:00Z" # Optional
|
||||
}
|
||||
```
|
||||
|
||||
### get_historical_bars
|
||||
```python
|
||||
{
|
||||
"symbol": "ES", # or "NQ"
|
||||
"timeframe": "1h",
|
||||
"count": 100
|
||||
}
|
||||
```
|
||||
|
||||
## Common Parameter Mistakes
|
||||
|
||||
### 1. Wrong stype_in for Symbol Format
|
||||
**Wrong:**
|
||||
```python
|
||||
symbols="ES.c.0"
|
||||
stype_in="raw_symbol" # WRONG!
|
||||
```
|
||||
|
||||
**Correct:**
|
||||
```python
|
||||
symbols="ES.c.0"
|
||||
stype_in="continuous"
|
||||
```
|
||||
|
||||
### 2. Date Format Errors
|
||||
**Wrong:**
|
||||
```python
|
||||
start="01/15/2024" # US date format - WRONG
|
||||
start="15-01-2024" # Non-ISO format - WRONG
|
||||
```
|
||||
|
||||
**Correct:**
|
||||
```python
|
||||
start="2024-01-15" # ISO format - CORRECT
|
||||
```
|
||||
|
||||
### 3. Missing Required Parameters
|
||||
**Wrong:**
|
||||
```python
|
||||
# metadata_get_cost
|
||||
dataset="GLBX.MDP3"
|
||||
start="2024-01-01"
|
||||
# Missing symbols and schema!
|
||||
```
|
||||
|
||||
**Correct:**
|
||||
```python
|
||||
dataset="GLBX.MDP3"
|
||||
start="2024-01-01"
|
||||
symbols="ES.c.0"
|
||||
schema="ohlcv-1h"
|
||||
```
|
||||
|
||||
### 4. Schema Typos
|
||||
**Wrong:**
|
||||
```python
|
||||
schema="OHLCV-1H" # Wrong case
|
||||
schema="ohlcv-1hour" # Wrong format
|
||||
schema="ohlcv_1h" # Wrong separator
|
||||
```
|
||||
|
||||
**Correct:**
|
||||
```python
|
||||
schema="ohlcv-1h" # Lowercase, hyphenated
|
||||
```
|
||||
|
||||
### 5. Symbol Array vs String Confusion
|
||||
**Wrong:**
|
||||
```python
|
||||
# batch_submit_job expects array
|
||||
symbols="ES.c.0,NQ.c.0" # WRONG for batch jobs
|
||||
```
|
||||
|
||||
**Correct:**
|
||||
```python
|
||||
# batch_submit_job
|
||||
symbols=["ES.c.0", "NQ.c.0"] # CORRECT
|
||||
```
|
||||
|
||||
### 6. Encoding/Compression Not Strings
|
||||
**Wrong:**
|
||||
```python
|
||||
encoding=dbn # Not a string
|
||||
compression=zstd # Not a string
|
||||
```
|
||||
|
||||
**Correct:**
|
||||
```python
|
||||
encoding="dbn"
|
||||
compression="zstd"
|
||||
```
|
||||
|
||||
## Parameter Validation Checklist
|
||||
|
||||
Before making API calls, verify:
|
||||
|
||||
- [ ] Date format is YYYY-MM-DD or ISO 8601
|
||||
- [ ] Dataset matches your data source (GLBX.MDP3 for ES/NQ)
|
||||
- [ ] Schema is valid and lowercase
|
||||
- [ ] stype_in matches symbol format
|
||||
- [ ] Symbols parameter matches tool expectation (string vs array)
|
||||
- [ ] All required parameters are present
|
||||
- [ ] Enum values are exact strings (case-sensitive)
|
||||
- [ ] start_date <= end_date
|
||||
- [ ] limit is reasonable for dataset size
|
||||
|
||||
## Quick Reference: Required Parameters
|
||||
|
||||
### timeseries_get_range
|
||||
**Required:** dataset, symbols, schema, start
|
||||
|
||||
**Optional:** end, stype_in, stype_out, limit
|
||||
|
||||
### batch_submit_job
|
||||
**Required:** dataset, symbols, schema, start
|
||||
|
||||
**Optional:** end, stype_in, stype_out, encoding, compression, split_duration, split_size, split_symbols, limit
|
||||
|
||||
### symbology_resolve
|
||||
**Required:** dataset, symbols, stype_in, stype_out, start_date
|
||||
|
||||
**Optional:** end_date
|
||||
|
||||
### metadata_get_cost
|
||||
**Required:** dataset, start
|
||||
|
||||
**Optional:** end, symbols, schema, stype_in, mode
|
||||
|
||||
### get_futures_quote
|
||||
**Required:** symbol
|
||||
|
||||
### get_session_info
|
||||
**Optional:** timestamp
|
||||
|
||||
### get_historical_bars
|
||||
**Required:** symbol, timeframe, count
|
||||
501
skills/databento/references/cost-optimization.md
Normal file
501
skills/databento/references/cost-optimization.md
Normal file
@@ -0,0 +1,501 @@
|
||||
# Databento Cost Optimization Guide
|
||||
|
||||
Strategies and best practices for minimizing costs when working with Databento market data.
|
||||
|
||||
## Databento Pricing Model
|
||||
|
||||
### Cost Components
|
||||
|
||||
1. **Databento Usage Fees** - Pay-per-use or subscription
|
||||
2. **Exchange License Fees** - Venue-dependent (varies by exchange)
|
||||
3. **Data Volume** - Amount of data retrieved
|
||||
|
||||
### Pricing Tiers
|
||||
|
||||
**Free Credits:**
|
||||
- $125 free credits for new users
|
||||
- Good for initial development and testing
|
||||
|
||||
**Usage-Based:**
|
||||
- Pay only for data you use
|
||||
- Varies by venue and data type
|
||||
- No minimum commitment
|
||||
|
||||
**Subscriptions:**
|
||||
- Basic Plan: $199/month
|
||||
- Corporate Actions/Security Master: $299/month
|
||||
- Flat-rate access to specific datasets
|
||||
|
||||
## Cost Estimation (ALWAYS Do This First)
|
||||
|
||||
### Use metadata_get_cost Before Every Request
|
||||
|
||||
**Always** estimate cost before fetching data:
|
||||
|
||||
```python
|
||||
mcp__databento__metadata_get_cost(
|
||||
dataset="GLBX.MDP3",
|
||||
start="2024-01-01",
|
||||
end="2024-01-31",
|
||||
symbols="ES.c.0",
|
||||
schema="ohlcv-1h"
|
||||
)
|
||||
```
|
||||
|
||||
**Returns:**
|
||||
- Estimated cost in USD
|
||||
- Data size estimate
|
||||
- Helps decide if request is reasonable
|
||||
|
||||
### When Cost Checks Matter Most
|
||||
|
||||
1. **Multi-day tick data** - Can be expensive
|
||||
2. **Multiple symbols** - Costs multiply
|
||||
3. **High-granularity schemas** - trades, mbp-1, mbo
|
||||
4. **Long date ranges** - Weeks or months of data
|
||||
|
||||
**Example Cost Check:**
|
||||
```python
|
||||
# Cheap: 1 month of daily bars
|
||||
cost_check(schema="ohlcv-1d", start="2024-01-01", end="2024-01-31")
|
||||
# Estimated: $0.10
|
||||
|
||||
# Expensive: 1 month of tick trades
|
||||
cost_check(schema="trades", start="2024-01-01", end="2024-01-31")
|
||||
# Estimated: $50-$200 (depends on volume)
|
||||
```
|
||||
|
||||
## Historical Data (T+1) - No Licensing Required
|
||||
|
||||
**Key Insight:** Historical data that is **24+ hours old (T+1)** does not require exchange licensing fees.
|
||||
|
||||
### Cost Breakdown
|
||||
|
||||
**Live/Recent Data (< 24 hours):**
|
||||
- Databento fees + Exchange licensing fees
|
||||
|
||||
**Historical Data (24+ hours old):**
|
||||
- Databento fees only (no exchange licensing)
|
||||
- Significantly cheaper
|
||||
|
||||
### Optimization Strategy
|
||||
|
||||
**For Development:**
|
||||
- Use T+1 data for strategy development
|
||||
- Switch to live data only for production
|
||||
|
||||
**For Backtesting:**
|
||||
- Always use historical (T+1) data
|
||||
- Much more cost-effective
|
||||
- Same data quality
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
# Expensive: Yesterday's data (< 24 hours)
|
||||
start="2024-11-05" # Requires licensing
|
||||
|
||||
# Cheap: 3 days ago (> 24 hours)
|
||||
start="2024-11-03" # No licensing required
|
||||
```
|
||||
|
||||
## Schema Selection for Cost
|
||||
|
||||
Different schemas have vastly different costs due to data volume.
|
||||
|
||||
### Schema Cost Hierarchy (Cheapest to Most Expensive)
|
||||
|
||||
1. **ohlcv-1d** (Cheapest)
|
||||
- ~100 bytes per record
|
||||
- ~250 records per symbol per year
|
||||
- **Best for:** Long-term backtesting
|
||||
|
||||
2. **ohlcv-1h**
|
||||
- ~100 bytes per record
|
||||
- ~6,000 records per symbol per year
|
||||
- **Best for:** Multi-day backtesting
|
||||
|
||||
3. **ohlcv-1m**
|
||||
- ~100 bytes per record
|
||||
- ~360,000 records per symbol per year
|
||||
- **Best for:** Intraday strategies
|
||||
|
||||
4. **trades**
|
||||
- ~50 bytes per record
|
||||
- ~100K-500K records per symbol per day (ES/NQ)
|
||||
- **Best for:** Tick analysis (use selectively)
|
||||
|
||||
5. **mbp-1**
|
||||
- ~150 bytes per record
|
||||
- ~1M-5M records per symbol per day
|
||||
- **Best for:** Order flow analysis (use selectively)
|
||||
|
||||
6. **mbp-10**
|
||||
- ~500 bytes per record
|
||||
- ~1M-5M records per symbol per day
|
||||
- **Best for:** Deep order book analysis (expensive!)
|
||||
|
||||
7. **mbo** (Most Expensive)
|
||||
- ~80 bytes per record
|
||||
- ~5M-20M records per symbol per day
|
||||
- **Best for:** Order-level research (very expensive!)
|
||||
|
||||
### Cost Optimization Strategy
|
||||
|
||||
**Start with lower granularity:**
|
||||
1. Develop strategy with ohlcv-1h or ohlcv-1d
|
||||
2. Validate with ohlcv-1m if needed
|
||||
3. Only use trades/mbp-1 if absolutely necessary
|
||||
4. Avoid mbp-10/mbo unless essential
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
# Cheap: Daily bars for 1 year
|
||||
schema="ohlcv-1d"
|
||||
start="2023-01-01"
|
||||
end="2023-12-31"
|
||||
# Cost: < $1
|
||||
|
||||
# Expensive: Trades for 1 year
|
||||
schema="trades"
|
||||
start="2023-01-01"
|
||||
end="2023-12-31"
|
||||
# Cost: $500-$2000 (depending on venue)
|
||||
```
|
||||
|
||||
## Symbol Selection
|
||||
|
||||
Fewer symbols = lower cost. Be selective.
|
||||
|
||||
### Strategies
|
||||
|
||||
**1. Start with Single Symbol**
|
||||
```python
|
||||
# Development
|
||||
symbols="ES.c.0" # Just ES
|
||||
|
||||
# After validation, expand
|
||||
symbols="ES.c.0,NQ.c.0" # Add NQ
|
||||
```
|
||||
|
||||
**2. Use Continuous Contracts**
|
||||
```python
|
||||
# Good: Single continuous contract
|
||||
symbols="ES.c.0" # Covers all front months
|
||||
|
||||
# Wasteful: Multiple specific contracts
|
||||
symbols="ESH5,ESM5,ESU5,ESZ5" # Same data, 4x cost
|
||||
```
|
||||
|
||||
**3. Avoid Symbol Wildcards**
|
||||
```python
|
||||
# Expensive: All instruments
|
||||
symbols="*" # Don't do this!
|
||||
|
||||
# Targeted: Just what you need
|
||||
symbols="ES.c.0,NQ.c.0" # Explicit
|
||||
```
|
||||
|
||||
## Date Range Optimization
|
||||
|
||||
Request only the data you need.
|
||||
|
||||
### Strategies
|
||||
|
||||
**1. Iterative Refinement**
|
||||
```python
|
||||
# First: Test with small range
|
||||
start="2024-01-01"
|
||||
end="2024-01-07" # Just 1 week
|
||||
|
||||
# Then: Expand after validation
|
||||
start="2024-01-01"
|
||||
end="2024-12-31" # Full year
|
||||
```
|
||||
|
||||
**2. Segment Long Ranges**
|
||||
```python
|
||||
# Instead of: 5 years at once
|
||||
start="2019-01-01"
|
||||
end="2024-12-31"
|
||||
|
||||
# Do: Segment by year
|
||||
start="2024-01-01"
|
||||
end="2024-12-31"
|
||||
# Process, then request next year if needed
|
||||
```
|
||||
|
||||
**3. Use Limit for Testing**
|
||||
```python
|
||||
# Test with small limit first
|
||||
limit=100 # Just 100 records
|
||||
|
||||
# After validation, increase or remove
|
||||
limit=10000 # Larger sample
|
||||
```
|
||||
|
||||
## Batch vs Timeseries Selection
|
||||
|
||||
Choose the right tool for the job.
|
||||
|
||||
### Timeseries (< 5GB)
|
||||
**When to use:**
|
||||
- Small to medium datasets
|
||||
- Quick exploration
|
||||
- <= 1 day of tick data
|
||||
- Any OHLCV data
|
||||
|
||||
**Benefits:**
|
||||
- Immediate results
|
||||
- No job management
|
||||
- Direct response
|
||||
|
||||
**Costs:**
|
||||
- Same per-record cost as batch
|
||||
|
||||
### Batch Downloads (> 5GB)
|
||||
**When to use:**
|
||||
- Large datasets (> 5GB)
|
||||
- Multi-day tick data
|
||||
- Multiple symbols over long periods
|
||||
- Production data pipelines
|
||||
|
||||
**Benefits:**
|
||||
- More efficient for large data
|
||||
- Can split output files
|
||||
- Asynchronous processing
|
||||
|
||||
**Costs:**
|
||||
- Same per-record cost as timeseries
|
||||
- No additional fees for batch processing
|
||||
|
||||
### Decision Matrix
|
||||
|
||||
| Data Type | Date Range | Method |
|
||||
|-----------|-----------|--------|
|
||||
| ohlcv-1h | 1 year | Timeseries |
|
||||
| ohlcv-1d | Any | Timeseries |
|
||||
| trades | 1 day | Timeseries |
|
||||
| trades | 1 week+ | Batch |
|
||||
| mbp-1 | 1 day | Batch (safer) |
|
||||
| mbp-1 | 1 week+ | Batch |
|
||||
|
||||
## DBEQ Bundle - Zero Exchange Fees
|
||||
|
||||
Databento offers a special bundle for US equities with **$0 exchange fees**.
|
||||
|
||||
### DBEQ.BASIC Dataset
|
||||
|
||||
**Coverage:**
|
||||
- US equity securities
|
||||
- Zero licensing fees
|
||||
- Databento usage fees only
|
||||
|
||||
**When to use:**
|
||||
- Equity market breadth for ES/NQ analysis
|
||||
- Testing equity strategies
|
||||
- Learning market data APIs
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
# Regular equity dataset (has exchange fees)
|
||||
dataset="XNAS.ITCH"
|
||||
# Cost: Databento + Nasdaq fees
|
||||
|
||||
# DBEQ bundle (no exchange fees)
|
||||
dataset="DBEQ.BASIC"
|
||||
# Cost: Databento fees only
|
||||
```
|
||||
|
||||
## Caching and Reuse
|
||||
|
||||
Don't fetch the same data multiple times.
|
||||
|
||||
### Strategies
|
||||
|
||||
**1. Cache Locally**
|
||||
```python
|
||||
# First request: Fetch and save
|
||||
data = fetch_data(...)
|
||||
save_to_disk(data, "ES_2024_ohlcv1h.csv")
|
||||
|
||||
# Subsequent runs: Load from disk
|
||||
data = load_from_disk("ES_2024_ohlcv1h.csv")
|
||||
```
|
||||
|
||||
**2. Incremental Updates**
|
||||
```python
|
||||
# Initial: Fetch full history
|
||||
start="2023-01-01"
|
||||
end="2024-01-01"
|
||||
|
||||
# Later: Fetch only new data
|
||||
start="2024-01-01" # Resume from last fetch
|
||||
end="2024-12-31"
|
||||
```
|
||||
|
||||
**3. Share Data Across Analyses**
|
||||
```python
|
||||
# Fetch once
|
||||
historical_data = fetch_data(schema="ohlcv-1h", ...)
|
||||
|
||||
# Use multiple times
|
||||
backtest_strategy_a(historical_data)
|
||||
backtest_strategy_b(historical_data)
|
||||
backtest_strategy_c(historical_data)
|
||||
```
|
||||
|
||||
## Session-Based Analysis
|
||||
|
||||
For ES/NQ, consider filtering by trading session to reduce data volume.
|
||||
|
||||
### Sessions
|
||||
|
||||
- **Asian Session:** 6pm-2am ET
|
||||
- **London Session:** 2am-8am ET
|
||||
- **New York Session:** 8am-4pm ET
|
||||
|
||||
### Cost Benefit
|
||||
|
||||
**Full 24-hour data:**
|
||||
- Maximum data volume
|
||||
- Higher cost
|
||||
|
||||
**Session-filtered data:**
|
||||
- 1/3 to 1/2 the volume
|
||||
- Lower cost
|
||||
- May be sufficient for analysis
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
# Expensive: Full 24-hour data
|
||||
# Process all records
|
||||
|
||||
# Cheaper: NY session only
|
||||
# Filter records to 8am-4pm ET
|
||||
# ~1/3 the data volume
|
||||
```
|
||||
|
||||
Use `scripts/session_filter.py` to filter post-fetch, or request only specific hours.
|
||||
|
||||
## Monitoring Usage
|
||||
|
||||
Track your usage to avoid surprises.
|
||||
|
||||
### Check Dashboard
|
||||
- Databento provides usage dashboard
|
||||
- Monitor monthly spend
|
||||
- Set alerts for limits
|
||||
|
||||
### Set Monthly Limits
|
||||
```python
|
||||
# In account settings
|
||||
monthly_limit=$500
|
||||
```
|
||||
|
||||
### Review Costs Regularly
|
||||
- Check cost estimates vs actual
|
||||
- Identify expensive queries
|
||||
- Adjust strategies
|
||||
|
||||
## Cost Optimization Checklist
|
||||
|
||||
Before every data request:
|
||||
|
||||
- [ ] **Estimate cost first** - Use metadata_get_cost
|
||||
- [ ] **Use T+1 data** - Avoid < 24 hour data unless necessary
|
||||
- [ ] **Choose lowest granularity schema** - Start with ohlcv, not trades
|
||||
- [ ] **Minimize symbols** - Only request what you need
|
||||
- [ ] **Limit date range** - Test with small range first
|
||||
- [ ] **Use continuous contracts** - Avoid requesting multiple months
|
||||
- [ ] **Cache locally** - Don't re-fetch same data
|
||||
- [ ] **Consider DBEQ** - Use zero-fee dataset when applicable
|
||||
- [ ] **Filter by session** - Reduce volume if session-specific
|
||||
- [ ] **Use batch for large data** - More efficient for > 5GB
|
||||
|
||||
## Cost Examples
|
||||
|
||||
### Cheap Requests (< $1)
|
||||
|
||||
```python
|
||||
# Daily bars for 1 year
|
||||
dataset="GLBX.MDP3"
|
||||
symbols="ES.c.0"
|
||||
schema="ohlcv-1d"
|
||||
start="2023-01-01"
|
||||
end="2023-12-31"
|
||||
# Estimated cost: $0.10
|
||||
```
|
||||
|
||||
### Moderate Requests ($1-$10)
|
||||
|
||||
```python
|
||||
# Hourly bars for 1 year
|
||||
dataset="GLBX.MDP3"
|
||||
symbols="ES.c.0,NQ.c.0"
|
||||
schema="ohlcv-1h"
|
||||
start="2023-01-01"
|
||||
end="2023-12-31"
|
||||
# Estimated cost: $2-5
|
||||
```
|
||||
|
||||
### Expensive Requests ($10-$100)
|
||||
|
||||
```python
|
||||
# Trades for 1 month
|
||||
dataset="GLBX.MDP3"
|
||||
symbols="ES.c.0"
|
||||
schema="trades"
|
||||
start="2024-01-01"
|
||||
end="2024-01-31"
|
||||
# Estimated cost: $20-50
|
||||
```
|
||||
|
||||
### Very Expensive Requests ($100+)
|
||||
|
||||
```python
|
||||
# MBP-10 for 1 month
|
||||
dataset="GLBX.MDP3"
|
||||
symbols="ES.c.0,NQ.c.0"
|
||||
schema="mbp-10"
|
||||
start="2024-01-01"
|
||||
end="2024-01-31"
|
||||
# Estimated cost: $200-500
|
||||
```
|
||||
|
||||
## Free Credit Strategy
|
||||
|
||||
Make the most of your $125 free credits:
|
||||
|
||||
1. **Development Phase** - Use free credits for:
|
||||
- Testing API integration
|
||||
- Small-scale strategy development
|
||||
- Learning the platform
|
||||
|
||||
2. **Prioritize T+1 Data** - Stretch credits further:
|
||||
- Avoid real-time data during development
|
||||
- Use historical data (no licensing fees)
|
||||
|
||||
3. **Start with OHLCV** - Cheapest data:
|
||||
- Develop strategy with daily/hourly bars
|
||||
- Validate before moving to tick data
|
||||
|
||||
4. **Cache Everything** - Don't waste credits:
|
||||
- Save all fetched data locally
|
||||
- Reuse for multiple analyses
|
||||
|
||||
5. **Monitor Remaining Balance**:
|
||||
- Check credit usage regularly
|
||||
- Adjust requests to stay within budget
|
||||
|
||||
## Summary
|
||||
|
||||
**Most Important Cost-Saving Strategies:**
|
||||
|
||||
1. ✅ **Always check cost first** - Use metadata_get_cost
|
||||
2. ✅ **Use T+1 data** - 24+ hours old, no licensing fees
|
||||
3. ✅ **Start with OHLCV schemas** - Much cheaper than tick data
|
||||
4. ✅ **Cache and reuse data** - Don't fetch twice
|
||||
5. ✅ **Be selective with symbols** - Fewer symbols = lower cost
|
||||
6. ✅ **Test with small ranges** - Validate before large requests
|
||||
7. ✅ **Use continuous contracts** - One symbol instead of many
|
||||
8. ✅ **Monitor usage** - Track spending, set limits
|
||||
372
skills/databento/references/schemas.md
Normal file
372
skills/databento/references/schemas.md
Normal file
@@ -0,0 +1,372 @@
|
||||
# Databento Schema Reference
|
||||
|
||||
Comprehensive documentation of Databento schemas with field-level details, data types, and usage guidance.
|
||||
|
||||
## Schema Overview
|
||||
|
||||
Databento provides 12+ schema types representing different granularity levels of market data. All schemas share common timestamp fields for consistency.
|
||||
|
||||
## Common Fields (All Schemas)
|
||||
|
||||
Every schema includes these timestamp fields:
|
||||
|
||||
| Field | Type | Description | Unit |
|
||||
|-------|------|-------------|------|
|
||||
| `ts_event` | uint64 | Event timestamp from venue | Nanoseconds (Unix epoch) |
|
||||
| `ts_recv` | uint64 | Databento gateway receipt time | Nanoseconds (Unix epoch) |
|
||||
|
||||
**Important:** Databento provides up to 4 timestamps per event for sub-microsecond accuracy.
|
||||
|
||||
## OHLCV Schemas
|
||||
|
||||
Candlestick/bar data at various time intervals.
|
||||
|
||||
### ohlcv-1s (1 Second Bars)
|
||||
### ohlcv-1m (1 Minute Bars)
|
||||
### ohlcv-1h (1 Hour Bars)
|
||||
### ohlcv-1d (Daily Bars)
|
||||
### ohlcv-eod (End of Day)
|
||||
|
||||
**Common OHLCV Fields:**
|
||||
|
||||
| Field | Type | Description | Unit |
|
||||
|-------|------|-------------|------|
|
||||
| `open` | int64 | Opening price | Fixed-point (divide by 1e9 for decimal) |
|
||||
| `high` | int64 | Highest price | Fixed-point (divide by 1e9 for decimal) |
|
||||
| `low` | int64 | Lowest price | Fixed-point (divide by 1e9 for decimal) |
|
||||
| `close` | int64 | Closing price | Fixed-point (divide by 1e9 for decimal) |
|
||||
| `volume` | uint64 | Total volume | Contracts/shares |
|
||||
|
||||
**When to Use:**
|
||||
- **1h/1d**: Historical backtesting, multi-day analysis
|
||||
- **1m**: Intraday strategy development
|
||||
- **1s**: High-frequency analysis (use batch for large ranges)
|
||||
- **eod**: Long-term investment analysis
|
||||
|
||||
**Pricing Format:**
|
||||
Prices are in fixed-point notation. To convert to decimal:
|
||||
```
|
||||
decimal_price = int64_price / 1_000_000_000
|
||||
```
|
||||
|
||||
For ES futures at 4500.00, the value would be stored as `4500000000000`.
|
||||
|
||||
## Trades Schema
|
||||
|
||||
Individual trade executions with price, size, and side information.
|
||||
|
||||
| Field | Type | Description | Values |
|
||||
|-------|------|-------------|--------|
|
||||
| `price` | int64 | Trade execution price | Fixed-point (÷ 1e9) |
|
||||
| `size` | uint32 | Trade size | Contracts/shares |
|
||||
| `action` | char | Trade action | 'T' = trade, 'C' = cancel |
|
||||
| `side` | char | Aggressor side | 'B' = buy, 'S' = sell, 'N' = none |
|
||||
| `flags` | uint8 | Trade flags | Bitmask |
|
||||
| `depth` | uint8 | Depth level | Usually 0 |
|
||||
| `ts_in_delta` | int32 | Time delta | Nanoseconds |
|
||||
| `sequence` | uint32 | Sequence number | Venue-specific |
|
||||
|
||||
**When to Use:**
|
||||
- Intraday order flow analysis
|
||||
- Tick-by-tick backtesting
|
||||
- Market microstructure research
|
||||
- Volume profile analysis
|
||||
|
||||
**Aggressor Side:**
|
||||
- `B` = Buy-side aggressor (market buy hit the ask)
|
||||
- `S` = Sell-side aggressor (market sell hit the bid)
|
||||
- `N` = Cannot be determined or not applicable
|
||||
|
||||
**Important:** For multi-day tick data, use batch downloads. Trades can generate millions of records per day.
|
||||
|
||||
## MBP-1 Schema (Market By Price - Top of Book)
|
||||
|
||||
Level 1 order book data showing best bid and ask.
|
||||
|
||||
| Field | Type | Description | Values |
|
||||
|-------|------|-------------|--------|
|
||||
| `price` | int64 | Reference price (usually last trade) | Fixed-point (÷ 1e9) |
|
||||
| `size` | uint32 | Reference size | Contracts/shares |
|
||||
| `action` | char | Book action | 'A' = add, 'C' = cancel, 'M' = modify, 'T' = trade |
|
||||
| `side` | char | Order side | 'B' = bid, 'A' = ask, 'N' = none |
|
||||
| `flags` | uint8 | Flags | Bitmask |
|
||||
| `depth` | uint8 | Depth level | Always 0 for MBP-1 |
|
||||
| `ts_in_delta` | int32 | Time delta | Nanoseconds |
|
||||
| `sequence` | uint32 | Sequence number | Venue-specific |
|
||||
| `bid_px_00` | int64 | Best bid price | Fixed-point (÷ 1e9) |
|
||||
| `ask_px_00` | int64 | Best ask price | Fixed-point (÷ 1e9) |
|
||||
| `bid_sz_00` | uint32 | Best bid size | Contracts/shares |
|
||||
| `ask_sz_00` | uint32 | Best ask size | Contracts/shares |
|
||||
| `bid_ct_00` | uint32 | Bid order count | Number of orders |
|
||||
| `ask_ct_00` | uint32 | Ask order count | Number of orders |
|
||||
|
||||
**When to Use:**
|
||||
- Bid/ask spread analysis
|
||||
- Liquidity analysis
|
||||
- Market microstructure studies
|
||||
- Quote-based strategies
|
||||
|
||||
**Key Metrics:**
|
||||
```
|
||||
spread = ask_px_00 - bid_px_00
|
||||
mid_price = (bid_px_00 + ask_px_00) / 2
|
||||
bid_ask_imbalance = (bid_sz_00 - ask_sz_00) / (bid_sz_00 + ask_sz_00)
|
||||
```
|
||||
|
||||
## MBP-10 Schema (Market By Price - 10 Levels)
|
||||
|
||||
Level 2 order book data showing 10 levels of depth.
|
||||
|
||||
**Fields:** Same as MBP-1, plus 9 additional levels:
|
||||
- `bid_px_01` through `bid_px_09` (10 bid levels)
|
||||
- `ask_px_01` through `ask_px_09` (10 ask levels)
|
||||
- `bid_sz_01` through `bid_sz_09`
|
||||
- `ask_sz_01` through `ask_sz_09`
|
||||
- `bid_ct_01` through `bid_ct_09`
|
||||
- `ask_ct_01` through `ask_ct_09`
|
||||
|
||||
**When to Use:**
|
||||
- Order book depth analysis
|
||||
- Liquidity beyond top of book
|
||||
- Order flow imbalance at multiple levels
|
||||
- Market impact modeling
|
||||
|
||||
**Important:** MBP-10 generates significantly more data than MBP-1. Use batch downloads for multi-day requests.
|
||||
|
||||
## MBO Schema (Market By Order)
|
||||
|
||||
Level 3 order-level data with individual order IDs - most granular.
|
||||
|
||||
| Field | Type | Description | Values |
|
||||
|-------|------|-------------|--------|
|
||||
| `order_id` | uint64 | Unique order ID | Venue-specific |
|
||||
| `price` | int64 | Order price | Fixed-point (÷ 1e9) |
|
||||
| `size` | uint32 | Order size | Contracts/shares |
|
||||
| `flags` | uint8 | Flags | Bitmask |
|
||||
| `channel_id` | uint8 | Channel ID | Venue-specific |
|
||||
| `action` | char | Order action | 'A' = add, 'C' = cancel, 'M' = modify, 'F' = fill, 'T' = trade |
|
||||
| `side` | char | Order side | 'B' = bid, 'A' = ask, 'N' = none |
|
||||
| `ts_in_delta` | int32 | Time delta | Nanoseconds |
|
||||
| `sequence` | uint32 | Sequence number | Venue-specific |
|
||||
|
||||
**When to Use:**
|
||||
- Highest granularity order flow analysis
|
||||
- Order-level reconstructions
|
||||
- Advanced market microstructure research
|
||||
- Queue position analysis
|
||||
|
||||
**Important:** MBO data is extremely granular and generates massive datasets. Always use batch downloads and carefully check costs.
|
||||
|
||||
## Definition Schema
|
||||
|
||||
Instrument metadata and definitions.
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `ts_recv` | uint64 | Receipt timestamp |
|
||||
| `min_price_increment` | int64 | Minimum tick size |
|
||||
| `display_factor` | int64 | Display factor for prices |
|
||||
| `expiration` | uint64 | Contract expiration timestamp |
|
||||
| `activation` | uint64 | Contract activation timestamp |
|
||||
| `high_limit_price` | int64 | Upper price limit |
|
||||
| `low_limit_price` | int64 | Lower price limit |
|
||||
| `max_price_variation` | int64 | Maximum price move |
|
||||
| `trading_reference_price` | int64 | Reference price |
|
||||
| `unit_of_measure_qty` | int64 | Contract size |
|
||||
| `min_price_increment_amount` | int64 | Tick value |
|
||||
| `price_ratio` | int64 | Price ratio |
|
||||
| `inst_attrib_value` | int32 | Instrument attributes |
|
||||
| `underlying_id` | uint32 | Underlying instrument ID |
|
||||
| `raw_instrument_id` | uint32 | Raw instrument ID |
|
||||
| `market_depth_implied` | int32 | Implied depth |
|
||||
| `market_depth` | int32 | Market depth |
|
||||
| `market_segment_id` | uint32 | Market segment |
|
||||
| `max_trade_vol` | uint32 | Maximum trade volume |
|
||||
| `min_lot_size` | int32 | Minimum lot size |
|
||||
| `min_lot_size_block` | int32 | Block trade minimum |
|
||||
| `min_lot_size_round_lot` | int32 | Round lot minimum |
|
||||
| `min_trade_vol` | uint32 | Minimum trade volume |
|
||||
| `contract_multiplier` | int32 | Contract multiplier |
|
||||
| `decay_quantity` | int32 | Decay quantity |
|
||||
| `original_contract_size` | int32 | Original size |
|
||||
| `trading_reference_date` | uint16 | Reference date |
|
||||
| `appl_id` | int16 | Application ID |
|
||||
| `maturity_year` | uint16 | Year |
|
||||
| `decay_start_date` | uint16 | Decay start |
|
||||
| `channel_id` | uint16 | Channel |
|
||||
| `currency` | string | Currency code |
|
||||
| `settl_currency` | string | Settlement currency |
|
||||
| `secsubtype` | string | Security subtype |
|
||||
| `raw_symbol` | string | Raw symbol |
|
||||
| `group` | string | Instrument group |
|
||||
| `exchange` | string | Exchange code |
|
||||
| `asset` | string | Asset class |
|
||||
| `cfi` | string | CFI code |
|
||||
| `security_type` | string | Security type |
|
||||
| `unit_of_measure` | string | Unit of measure |
|
||||
| `underlying` | string | Underlying symbol |
|
||||
| `strike_price_currency` | string | Strike currency |
|
||||
| `instrument_class` | char | Class |
|
||||
| `strike_price` | int64 | Strike price (options) |
|
||||
| `match_algorithm` | char | Matching algorithm |
|
||||
| `md_security_trading_status` | uint8 | Trading status |
|
||||
| `main_fraction` | uint8 | Main fraction |
|
||||
| `price_display_format` | uint8 | Display format |
|
||||
| `settl_price_type` | uint8 | Settlement type |
|
||||
| `sub_fraction` | uint8 | Sub fraction |
|
||||
| `underlying_product` | uint8 | Underlying product |
|
||||
| `security_update_action` | char | Update action |
|
||||
| `maturity_month` | uint8 | Month |
|
||||
| `maturity_day` | uint8 | Day |
|
||||
| `maturity_week` | uint8 | Week |
|
||||
| `user_defined_instrument` | char | User-defined |
|
||||
| `contract_multiplier_unit` | int8 | Multiplier unit |
|
||||
| `flow_schedule_type` | int8 | Flow schedule |
|
||||
| `tick_rule` | uint8 | Tick rule |
|
||||
|
||||
**When to Use:**
|
||||
- Understanding instrument specifications
|
||||
- Calculating tick values
|
||||
- Contract expiration management
|
||||
- Symbol resolution and mapping
|
||||
|
||||
**Key Fields for ES/NQ:**
|
||||
- `min_price_increment`: Tick size (0.25 for ES, 0.25 for NQ)
|
||||
- `expiration`: Contract expiration timestamp
|
||||
- `raw_symbol`: Exchange symbol
|
||||
- `contract_multiplier`: Usually 50 for ES, 20 for NQ
|
||||
|
||||
## Statistics Schema
|
||||
|
||||
Market statistics and calculated metrics.
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `ts_recv` | uint64 | Receipt timestamp |
|
||||
| `ts_ref` | uint64 | Reference timestamp |
|
||||
| `price` | int64 | Reference price |
|
||||
| `quantity` | int64 | Reference quantity |
|
||||
| `sequence` | uint32 | Sequence number |
|
||||
| `ts_in_delta` | int32 | Time delta |
|
||||
| `stat_type` | uint16 | Statistic type |
|
||||
| `channel_id` | uint16 | Channel ID |
|
||||
| `update_action` | uint8 | Update action |
|
||||
| `stat_flags` | uint8 | Statistic flags |
|
||||
|
||||
**Common Statistic Types:**
|
||||
- Opening price
|
||||
- Settlement price
|
||||
- High/low prices
|
||||
- Trading volume
|
||||
- Open interest
|
||||
|
||||
**When to Use:**
|
||||
- Official settlement prices
|
||||
- Open interest analysis
|
||||
- Exchange-calculated statistics
|
||||
|
||||
## Status Schema
|
||||
|
||||
Instrument trading status and state changes.
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `ts_recv` | uint64 | Receipt timestamp |
|
||||
| `ts_event` | uint64 | Event timestamp |
|
||||
| `action` | uint16 | Status action |
|
||||
| `reason` | uint16 | Status reason |
|
||||
| `trading_event` | uint16 | Trading event |
|
||||
| `is_trading` | int8 | Trading flag (1 = trading, 0 = not trading) |
|
||||
| `is_quoting` | int8 | Quoting flag |
|
||||
| `is_short_sell_restricted` | int8 | Short sell flag |
|
||||
|
||||
**When to Use:**
|
||||
- Detecting trading halts
|
||||
- Understanding market status changes
|
||||
- Filtering data by trading status
|
||||
|
||||
## Imbalance Schema
|
||||
|
||||
Order imbalance data for auctions and closes.
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `ts_recv` | uint64 | Receipt timestamp |
|
||||
| `ts_event` | uint64 | Event timestamp |
|
||||
| `ref_price` | int64 | Reference price |
|
||||
| `auction_time` | uint64 | Auction timestamp |
|
||||
| `cont_book_clr_price` | int64 | Continuous book clearing price |
|
||||
| `auct_interest_clr_price` | int64 | Auction interest clearing price |
|
||||
| `paired_qty` | uint64 | Paired quantity |
|
||||
| `total_imbalance_qty` | uint64 | Total imbalance |
|
||||
| `side` | char | Imbalance side ('B' or 'A') |
|
||||
| `significant_imbalance` | char | Significance flag |
|
||||
|
||||
**When to Use:**
|
||||
- Opening/closing auction analysis
|
||||
- Imbalance trading strategies
|
||||
- End-of-day positioning
|
||||
|
||||
## Schema Selection Decision Matrix
|
||||
|
||||
| Analysis Type | Recommended Schema | Alternative |
|
||||
|---------------|-------------------|-------------|
|
||||
| Daily backtesting | ohlcv-1d | ohlcv-1h |
|
||||
| Intraday backtesting | ohlcv-1h, ohlcv-1m | trades |
|
||||
| Spread analysis | mbp-1 | trades |
|
||||
| Order flow | trades | mbp-1 |
|
||||
| Market depth | mbp-10 | mbo |
|
||||
| Tick-by-tick | trades | mbo |
|
||||
| Liquidity analysis | mbp-1, mbp-10 | mbo |
|
||||
| Contract specifications | definition | - |
|
||||
| Settlement prices | statistics | definition |
|
||||
| Trading halts | status | - |
|
||||
| Auction analysis | imbalance | trades |
|
||||
|
||||
## Data Type Reference
|
||||
|
||||
### Fixed-Point Prices
|
||||
All price fields are stored as int64 in fixed-point notation with 9 decimal places of precision.
|
||||
|
||||
**Conversion:**
|
||||
```python
|
||||
decimal_price = int64_price / 1_000_000_000
|
||||
```
|
||||
|
||||
**Example:**
|
||||
- ES at 4500.25 → stored as 4500250000000
|
||||
- NQ at 15000.50 → stored as 15000500000000
|
||||
|
||||
### Timestamps
|
||||
All timestamps are uint64 nanoseconds since Unix epoch (1970-01-01 00:00:00 UTC).
|
||||
|
||||
**Conversion to datetime:**
|
||||
```python
|
||||
import datetime
|
||||
dt = datetime.datetime.fromtimestamp(ts_event / 1_000_000_000, tz=datetime.timezone.utc)
|
||||
```
|
||||
|
||||
### Character Fields
|
||||
Single-character fields (char) represent enums:
|
||||
- Action: 'A' (add), 'C' (cancel), 'M' (modify), 'T' (trade), 'F' (fill)
|
||||
- Side: 'B' (bid), 'A' (ask), 'N' (none/unknown)
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Schema Size (Approximate bytes per record)
|
||||
|
||||
| Schema | Size | Records/GB |
|
||||
|--------|------|------------|
|
||||
| ohlcv-1d | ~100 | ~10M |
|
||||
| ohlcv-1h | ~100 | ~10M |
|
||||
| trades | ~50 | ~20M |
|
||||
| mbp-1 | ~150 | ~6.7M |
|
||||
| mbp-10 | ~500 | ~2M |
|
||||
| mbo | ~80 | ~12.5M |
|
||||
|
||||
**Planning requests:**
|
||||
- 1 day of ES trades ≈ 100K-500K records ≈ 5-25 MB
|
||||
- 1 day of ES mbp-1 ≈ 1M-5M records ≈ 150-750 MB
|
||||
- 1 year of ES ohlcv-1h ≈ 6K records ≈ 600 KB
|
||||
|
||||
Use these estimates to decide between timeseries (< 5GB) and batch downloads (> 5GB).
|
||||
451
skills/databento/references/symbology.md
Normal file
451
skills/databento/references/symbology.md
Normal file
@@ -0,0 +1,451 @@
|
||||
# Databento Symbology Reference
|
||||
|
||||
Comprehensive guide to Databento's symbology system including continuous contracts, symbol types, and resolution strategies.
|
||||
|
||||
## Symbol Types (stypes)
|
||||
|
||||
Databento supports multiple symbology naming conventions. Use `mcp__databento__symbology_resolve` to convert between types.
|
||||
|
||||
### raw_symbol
|
||||
Native exchange symbols as provided by the venue.
|
||||
|
||||
**Examples:**
|
||||
- `ESH5` - ES March 2025 contract
|
||||
- `NQM5` - NQ June 2025 contract
|
||||
- `AAPL` - Apple Inc. stock
|
||||
- `SPY` - SPDR S&P 500 ETF
|
||||
|
||||
**When to use:**
|
||||
- Working with specific contract months
|
||||
- Exact symbol from exchange documentation
|
||||
- Historical analysis of specific expirations
|
||||
|
||||
**Limitations:**
|
||||
- Requires knowing exact contract codes
|
||||
- Different venues use different conventions
|
||||
- Doesn't handle roll automatically
|
||||
|
||||
### instrument_id
|
||||
Databento's internal numeric identifier for each instrument.
|
||||
|
||||
**Examples:**
|
||||
- `123456789` - Unique ID for ESH5
|
||||
- `987654321` - Unique ID for NQM5
|
||||
|
||||
**When to use:**
|
||||
- After symbol resolution
|
||||
- Internally within Databento system
|
||||
- When guaranteed uniqueness is required
|
||||
|
||||
**Benefits:**
|
||||
- Globally unique across all venues
|
||||
- Never changes for a given instrument
|
||||
- Most efficient for API requests
|
||||
|
||||
**Limitations:**
|
||||
- Not human-readable
|
||||
- Requires resolution step to obtain
|
||||
|
||||
### continuous
|
||||
Continuous contract notation with automatic rolling for futures.
|
||||
|
||||
**Format:** `{ROOT}.{STRATEGY}.{OFFSET}`
|
||||
|
||||
**Examples:**
|
||||
- `ES.c.0` - ES front month, calendar roll
|
||||
- `NQ.n.0` - NQ front month, open interest roll
|
||||
- `ES.v.1` - ES second month, volume roll
|
||||
- `GC.c.0` - Gold front month, calendar roll
|
||||
|
||||
**When to use:**
|
||||
- Backtesting across multiple expirations
|
||||
- Avoiding roll gaps in analysis
|
||||
- Long-term continuous price series
|
||||
|
||||
**Benefits:**
|
||||
- Automatic roll handling
|
||||
- Consistent symbology across time
|
||||
- Ideal for backtesting
|
||||
|
||||
### parent
|
||||
Parent contract symbols for options or complex instruments.
|
||||
|
||||
**Examples:**
|
||||
- `ES` - Parent for all ES contracts
|
||||
- `NQ` - Parent for all NQ contracts
|
||||
|
||||
**When to use:**
|
||||
- Options underlying symbols
|
||||
- Querying all contracts in a family
|
||||
- Getting contract family metadata
|
||||
|
||||
## Continuous Contract Deep Dive
|
||||
|
||||
Continuous contracts are the most powerful feature for futures analysis. They automatically handle contract rolls using different strategies.
|
||||
|
||||
### Roll Strategies
|
||||
|
||||
#### Calendar Roll (.c.X)
|
||||
Rolls on fixed calendar dates regardless of market activity.
|
||||
|
||||
**Notation:** `ES.c.0`, `NQ.c.1`
|
||||
|
||||
**Roll Timing:**
|
||||
- ES: Rolls 8 days before contract expiration
|
||||
- NQ: Rolls 8 days before contract expiration
|
||||
|
||||
**When to use:**
|
||||
- Standard backtesting
|
||||
- Most predictable roll schedule
|
||||
- When roll timing is less critical
|
||||
|
||||
**Pros:**
|
||||
- Predictable roll dates
|
||||
- Consistent across instruments
|
||||
- Simple to understand
|
||||
|
||||
**Cons:**
|
||||
- May roll during low liquidity
|
||||
- Doesn't consider market dynamics
|
||||
|
||||
#### Open Interest Roll (.n.X)
|
||||
Rolls when open interest moves to the next contract.
|
||||
|
||||
**Notation:** `ES.n.0`, `NQ.n.1`
|
||||
|
||||
**Roll Timing:**
|
||||
- Switches when next contract's OI > current contract's OI
|
||||
|
||||
**When to use:**
|
||||
- Avoiding early rolls
|
||||
- Following market participants
|
||||
- When market dynamics matter
|
||||
|
||||
**Pros:**
|
||||
- Follows market behavior
|
||||
- Natural transition point
|
||||
- Avoids artificial timing
|
||||
|
||||
**Cons:**
|
||||
- Less predictable timing
|
||||
- Can be delayed during low volume
|
||||
- Different instruments roll at different times
|
||||
|
||||
#### Volume Roll (.v.X)
|
||||
Rolls when trading volume moves to the next contract.
|
||||
|
||||
**Notation:** `ES.v.0`, `NQ.v.1`
|
||||
|
||||
**Roll Timing:**
|
||||
- Switches when next contract's volume > current contract's volume
|
||||
|
||||
**When to use:**
|
||||
- Following most liquid contract
|
||||
- High-frequency analysis
|
||||
- When execution quality matters
|
||||
|
||||
**Pros:**
|
||||
- Always in most liquid contract
|
||||
- Best for execution
|
||||
- Real-time liquidity tracking
|
||||
|
||||
**Cons:**
|
||||
- Most variable timing
|
||||
- Can switch back and forth
|
||||
- Requires careful validation
|
||||
|
||||
### Offset Parameter (.X)
|
||||
|
||||
The offset determines which contract month in the series.
|
||||
|
||||
| Offset | Description | Example Usage |
|
||||
|--------|-------------|---------------|
|
||||
| `.0` | Front month | Primary trading contract |
|
||||
| `.1` | Second month | Spread analysis vs front |
|
||||
| `.2` | Third month | Deferred spread analysis |
|
||||
| `.3+` | Further months | Calendar spread strategies |
|
||||
|
||||
**Common Patterns:**
|
||||
- `ES.c.0` - Standard ES continuous (front month)
|
||||
- `ES.c.0,ES.c.1` - ES calendar spread (front vs back)
|
||||
- `ES.c.0,NQ.c.0` - ES/NQ pair analysis
|
||||
|
||||
## ES/NQ Specific Symbology
|
||||
|
||||
### ES (E-mini S&P 500)
|
||||
|
||||
**Contract Months:** H (Mar), M (Jun), U (Sep), Z (Dec)
|
||||
|
||||
**Raw Symbol Format:** `ES{MONTH}{YEAR}`
|
||||
- `ESH5` = March 2025
|
||||
- `ESM5` = June 2025
|
||||
- `ESU5` = September 2025
|
||||
- `ESZ5` = December 2025
|
||||
|
||||
**Continuous Contracts:**
|
||||
- `ES.c.0` - Front month (most common)
|
||||
- `ES.n.0` - OI-based front month
|
||||
- `ES.v.0` - Volume-based front month
|
||||
|
||||
**Tick Size:** 0.25 points ($12.50 per tick)
|
||||
**Contract Multiplier:** $50 per point
|
||||
**Trading Hours:** Nearly 24 hours (Sunday 6pm - Friday 5pm ET)
|
||||
|
||||
### NQ (E-mini Nasdaq-100)
|
||||
|
||||
**Contract Months:** H (Mar), M (Jun), U (Sep), Z (Dec)
|
||||
|
||||
**Raw Symbol Format:** `NQ{MONTH}{YEAR}`
|
||||
- `NQH5` = March 2025
|
||||
- `NQM5` = June 2025
|
||||
- `NQU5` = September 2025
|
||||
- `NQZ5` = December 2025
|
||||
|
||||
**Continuous Contracts:**
|
||||
- `NQ.c.0` - Front month (most common)
|
||||
- `NQ.n.0` - OI-based front month
|
||||
- `NQ.v.0` - Volume-based front month
|
||||
|
||||
**Tick Size:** 0.25 points ($5.00 per tick)
|
||||
**Contract Multiplier:** $20 per point
|
||||
**Trading Hours:** Nearly 24 hours (Sunday 6pm - Friday 5pm ET)
|
||||
|
||||
### Month Codes Reference
|
||||
|
||||
| Code | Month | Typical Expiration |
|
||||
|------|-------|-------------------|
|
||||
| F | January | 3rd Friday |
|
||||
| G | February | 3rd Friday |
|
||||
| H | March | 3rd Friday |
|
||||
| J | April | 3rd Friday |
|
||||
| K | May | 3rd Friday |
|
||||
| M | June | 3rd Friday |
|
||||
| N | July | 3rd Friday |
|
||||
| Q | August | 3rd Friday |
|
||||
| U | September | 3rd Friday |
|
||||
| V | October | 3rd Friday |
|
||||
| X | November | 3rd Friday |
|
||||
| Z | December | 3rd Friday |
|
||||
|
||||
**Note:** ES/NQ only trade quarterly contracts (H, M, U, Z).
|
||||
|
||||
## Symbol Resolution
|
||||
|
||||
Use `mcp__databento__symbology_resolve` to convert between symbol types.
|
||||
|
||||
### Common Resolution Patterns
|
||||
|
||||
**Continuous to Instrument ID:**
|
||||
```
|
||||
Input: ES.c.0
|
||||
stype_in: continuous
|
||||
stype_out: instrument_id
|
||||
Result: Maps to current front month's instrument_id
|
||||
```
|
||||
|
||||
**Raw Symbol to Instrument ID:**
|
||||
```
|
||||
Input: ESH5
|
||||
stype_in: raw_symbol
|
||||
stype_out: instrument_id
|
||||
Result: Specific instrument_id for ESH5
|
||||
```
|
||||
|
||||
**Continuous to Raw Symbol:**
|
||||
```
|
||||
Input: ES.c.0
|
||||
stype_in: continuous
|
||||
stype_out: raw_symbol
|
||||
Result: Current front month symbol (e.g., ESH5)
|
||||
```
|
||||
|
||||
### Time-Based Resolution
|
||||
|
||||
Symbol resolution is **date-dependent**. The same continuous contract resolves to different instruments across time.
|
||||
|
||||
**Example:**
|
||||
- `ES.c.0` on 2024-01-15 → ESH4 (March 2024)
|
||||
- `ES.c.0` on 2024-04-15 → ESM4 (June 2024)
|
||||
- `ES.c.0` on 2024-07-15 → ESU4 (September 2024)
|
||||
|
||||
**Important:** Always specify `start_date` and `end_date` when resolving symbols for historical analysis.
|
||||
|
||||
### Resolution Parameters
|
||||
|
||||
```
|
||||
mcp__databento__symbology_resolve
|
||||
- dataset: "GLBX.MDP3"
|
||||
- symbols: ["ES.c.0", "NQ.c.0"]
|
||||
- stype_in: "continuous"
|
||||
- stype_out: "instrument_id"
|
||||
- start_date: "2024-01-01"
|
||||
- end_date: "2024-12-31"
|
||||
```
|
||||
|
||||
Returns mapping of continuous symbols to instrument IDs for each day in the range.
|
||||
|
||||
## Expiration Handling
|
||||
|
||||
### Roll Dates
|
||||
|
||||
ES/NQ contracts expire on the **3rd Friday of the contract month** at 9:30 AM ET.
|
||||
|
||||
**Calendar Roll (.c.0) Schedule:**
|
||||
- Rolls **8 days before expiration**
|
||||
- Always rolls on the same relative day
|
||||
- Predictable for backtesting
|
||||
|
||||
**Example for ESH5 (March 2025):**
|
||||
- Expiration: Friday, March 21, 2025
|
||||
- Calendar roll: March 13, 2025 (8 days before)
|
||||
|
||||
### Roll Detection
|
||||
|
||||
To detect when a continuous contract rolled, compare instrument_id or raw_symbol across consecutive timestamps.
|
||||
|
||||
**Example:**
|
||||
```
|
||||
2024-03-12: ES.c.0 → ESH4
|
||||
2024-03-13: ES.c.0 → ESM4 (rolled!)
|
||||
```
|
||||
|
||||
### Handling Roll Gaps
|
||||
|
||||
Price discontinuities often occur at roll:
|
||||
|
||||
**Gap Detection:**
|
||||
```
|
||||
if abs(close_before_roll - open_after_roll) > threshold:
|
||||
# Roll gap detected
|
||||
```
|
||||
|
||||
**Adjustment Strategies:**
|
||||
1. **Ratio Adjustment:** Multiply historical prices by ratio
|
||||
2. **Difference Adjustment:** Add/subtract difference
|
||||
3. **No Adjustment:** Keep raw prices (most common for futures)
|
||||
|
||||
For ES/NQ futures, **no adjustment** is standard since contracts are similar.
|
||||
|
||||
## Symbol Validation
|
||||
|
||||
### Valid Symbol Patterns
|
||||
|
||||
**Continuous:**
|
||||
- Must match: `{ROOT}.{c|n|v}.{0-9+}`
|
||||
- Examples: `ES.c.0`, `NQ.n.1`, `GC.v.0`
|
||||
|
||||
**Raw Symbols (Futures):**
|
||||
- Must match: `{ROOT}{MONTH_CODE}{YEAR}`
|
||||
- Examples: `ESH5`, `NQZ4`, `GCM6`
|
||||
|
||||
**Equity Symbols:**
|
||||
- 1-5 uppercase letters
|
||||
- Examples: `AAPL`, `MSFT`, `SPY`, `GOOGL`
|
||||
|
||||
### Symbol Existence Validation
|
||||
|
||||
Before using a symbol, validate it exists in the dataset:
|
||||
|
||||
1. Use `mcp__databento__symbology_resolve` to resolve
|
||||
2. Use `mcp__databento__reference_search_securities` for metadata
|
||||
3. Check definition schema for instrument details
|
||||
|
||||
## Common Symbol Pitfalls
|
||||
|
||||
### 1. Wrong stype_in for Continuous Contracts
|
||||
**Wrong:**
|
||||
```
|
||||
symbols: "ES.c.0"
|
||||
stype_in: "raw_symbol" # WRONG!
|
||||
```
|
||||
|
||||
**Correct:**
|
||||
```
|
||||
symbols: "ES.c.0"
|
||||
stype_in: "continuous" # CORRECT
|
||||
```
|
||||
|
||||
### 2. Forgetting Date Range for Resolution
|
||||
**Wrong:**
|
||||
```
|
||||
symbology_resolve(symbols=["ES.c.0"], start_date="2024-01-01")
|
||||
# Missing end_date - only resolves for one day
|
||||
```
|
||||
|
||||
**Correct:**
|
||||
```
|
||||
symbology_resolve(symbols=["ES.c.0"], start_date="2024-01-01", end_date="2024-12-31")
|
||||
# Resolves for entire year
|
||||
```
|
||||
|
||||
### 3. Using Expired Contracts
|
||||
**Wrong:**
|
||||
```
|
||||
# ESH4 expired in March 2024
|
||||
symbols: "ESH4"
|
||||
start_date: "2024-06-01" # After expiration!
|
||||
```
|
||||
|
||||
**Correct:**
|
||||
```
|
||||
# Use continuous contract
|
||||
symbols: "ES.c.0"
|
||||
start_date: "2024-06-01" # Automatically maps to ESM4
|
||||
```
|
||||
|
||||
### 4. Mixing Symbol Types
|
||||
**Wrong:**
|
||||
```
|
||||
symbols: "ES.c.0,ESH5,123456" # Mixed types!
|
||||
```
|
||||
|
||||
**Correct:**
|
||||
```
|
||||
# Resolve separately or use same type
|
||||
symbols: "ES.c.0,NQ.c.0" # All continuous
|
||||
```
|
||||
|
||||
## Symbol Best Practices
|
||||
|
||||
1. **Use continuous contracts for backtesting** - Avoids manual roll management
|
||||
2. **Prefer calendar rolls (.c.X) unless specific reason** - Most predictable
|
||||
3. **Always validate symbols exist** - Use symbology_resolve before fetching data
|
||||
4. **Specify date ranges for resolution** - Symbol meanings change over time
|
||||
5. **Use instrument_id after resolution** - Most efficient for API calls
|
||||
6. **Document roll strategy** - Know which roll type (.c/.n/.v) you're using
|
||||
7. **Test around roll dates** - Verify behavior during contract transitions
|
||||
8. **Cache symbol mappings** - Don't re-resolve repeatedly
|
||||
|
||||
## Quick Reference: Common Symbols
|
||||
|
||||
### ES/NQ Continuous (Most Common)
|
||||
```
|
||||
ES.c.0 # ES front month, calendar roll
|
||||
NQ.c.0 # NQ front month, calendar roll
|
||||
ES.c.1 # ES second month
|
||||
NQ.c.1 # NQ second month
|
||||
```
|
||||
|
||||
### ES/NQ Specific Contracts (2025)
|
||||
```
|
||||
ESH5 # ES March 2025
|
||||
ESM5 # ES June 2025
|
||||
ESU5 # ES September 2025
|
||||
ESZ5 # ES December 2025
|
||||
|
||||
NQH5 # NQ March 2025
|
||||
NQM5 # NQ June 2025
|
||||
NQU5 # NQ September 2025
|
||||
NQZ5 # NQ December 2025
|
||||
```
|
||||
|
||||
### Equity Market Breadth (Supporting ES/NQ Analysis)
|
||||
```
|
||||
SPY # SPDR S&P 500 ETF
|
||||
QQQ # Invesco QQQ (Nasdaq-100 ETF)
|
||||
VIX # CBOE Volatility Index
|
||||
TICK # NYSE TICK
|
||||
VOLD # NYSE Volume Delta
|
||||
```
|
||||
|
||||
For equity symbols, use dataset `XNAS.ITCH` (Nasdaq) or other appropriate equity dataset.
|
||||
345
skills/databento/scripts/fetch_ohlcv.py
Normal file
345
skills/databento/scripts/fetch_ohlcv.py
Normal file
@@ -0,0 +1,345 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Databento OHLCV Data Fetcher
|
||||
|
||||
Standard pattern for fetching OHLCV data with built-in best practices:
|
||||
- Automatic cost estimation before fetch
|
||||
- Error handling with retries
|
||||
- Post-fetch data validation
|
||||
- Export options (CSV/pandas)
|
||||
|
||||
Usage:
|
||||
python fetch_ohlcv.py --symbol ES.c.0 --schema ohlcv-1h --start 2024-01-01 --end 2024-01-31
|
||||
python fetch_ohlcv.py --symbol NQ.c.0 --schema ohlcv-1d --start 2024-01-01 --limit 100
|
||||
python fetch_ohlcv.py --symbol ES.c.0,NQ.c.0 --schema ohlcv-1h --start 2024-01-01 --output data.csv
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any, List
|
||||
import time
|
||||
|
||||
|
||||
class DatabentoPHTLCVFetcher:
|
||||
"""Fetches OHLCV data from Databento with best practices built-in."""
|
||||
|
||||
def __init__(self, dataset: str = "GLBX.MDP3", stype_in: str = "continuous"):
|
||||
"""
|
||||
Initialize fetcher.
|
||||
|
||||
Args:
|
||||
dataset: Dataset code (default: GLBX.MDP3 for ES/NQ)
|
||||
stype_in: Input symbol type (default: continuous)
|
||||
"""
|
||||
self.dataset = dataset
|
||||
self.stype_in = stype_in
|
||||
self.max_retries = 3
|
||||
self.retry_delay = 2 # seconds
|
||||
|
||||
def estimate_cost(
|
||||
self,
|
||||
symbols: str,
|
||||
schema: str,
|
||||
start: str,
|
||||
end: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Estimate cost before fetching data.
|
||||
|
||||
Args:
|
||||
symbols: Comma-separated symbol list
|
||||
schema: Data schema (e.g., ohlcv-1h)
|
||||
start: Start date (YYYY-MM-DD)
|
||||
end: End date (optional)
|
||||
|
||||
Returns:
|
||||
Cost estimation result
|
||||
"""
|
||||
print(f"[COST CHECK] Estimating cost for {symbols} ({schema})...")
|
||||
|
||||
# NOTE: In actual usage, this would call the MCP tool:
|
||||
# mcp__databento__metadata_get_cost(
|
||||
# dataset=self.dataset,
|
||||
# start=start,
|
||||
# end=end,
|
||||
# symbols=symbols,
|
||||
# schema=schema,
|
||||
# stype_in=self.stype_in
|
||||
# )
|
||||
|
||||
# For this template, we simulate the response
|
||||
print("[NOTE] This template script demonstrates the pattern.")
|
||||
print("[NOTE] In actual usage, integrate with MCP tools directly.")
|
||||
|
||||
return {
|
||||
"estimated_cost_usd": 0.0,
|
||||
"estimated_size_mb": 0.0,
|
||||
"note": "Call mcp__databento__metadata_get_cost here"
|
||||
}
|
||||
|
||||
def validate_dataset_range(self) -> Dict[str, str]:
|
||||
"""
|
||||
Validate dataset availability.
|
||||
|
||||
Returns:
|
||||
Dataset date range
|
||||
"""
|
||||
print(f"[VALIDATION] Checking dataset availability for {self.dataset}...")
|
||||
|
||||
# NOTE: In actual usage, this would call:
|
||||
# mcp__databento__metadata_get_dataset_range(dataset=self.dataset)
|
||||
|
||||
return {
|
||||
"start_date": "2000-01-01",
|
||||
"end_date": datetime.now().strftime("%Y-%m-%d"),
|
||||
"note": "Call mcp__databento__metadata_get_dataset_range here"
|
||||
}
|
||||
|
||||
def fetch_data(
|
||||
self,
|
||||
symbols: str,
|
||||
schema: str,
|
||||
start: str,
|
||||
end: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
check_cost: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Fetch OHLCV data with retries and error handling.
|
||||
|
||||
Args:
|
||||
symbols: Comma-separated symbol list
|
||||
schema: Data schema (e.g., ohlcv-1h, ohlcv-1d)
|
||||
start: Start date (YYYY-MM-DD)
|
||||
end: End date (optional)
|
||||
limit: Maximum number of records (optional)
|
||||
check_cost: Whether to check cost before fetching (default: True)
|
||||
|
||||
Returns:
|
||||
Fetched data
|
||||
"""
|
||||
# Step 1: Cost check (if enabled)
|
||||
if check_cost:
|
||||
cost_info = self.estimate_cost(symbols, schema, start, end)
|
||||
print(f"[COST] Estimated cost: ${cost_info.get('estimated_cost_usd', 0):.2f}")
|
||||
print(f"[COST] Estimated size: {cost_info.get('estimated_size_mb', 0):.2f} MB")
|
||||
|
||||
# Prompt for confirmation if cost is high
|
||||
estimated_cost = cost_info.get('estimated_cost_usd', 0)
|
||||
if estimated_cost > 10:
|
||||
response = input(f"\nEstimated cost is ${estimated_cost:.2f}. Continue? (y/n): ")
|
||||
if response.lower() != 'y':
|
||||
print("[CANCELLED] Data fetch cancelled by user.")
|
||||
sys.exit(0)
|
||||
|
||||
# Step 2: Validate dataset
|
||||
dataset_range = self.validate_dataset_range()
|
||||
print(f"[DATASET] Available range: {dataset_range.get('start_date')} to {dataset_range.get('end_date')}")
|
||||
|
||||
# Step 3: Fetch data with retries
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
print(f"\n[FETCH] Attempt {attempt + 1}/{self.max_retries}")
|
||||
print(f"[FETCH] Fetching {symbols} ({schema}) from {start} to {end or 'now'}...")
|
||||
|
||||
# NOTE: In actual usage, this would call:
|
||||
# data = mcp__databento__timeseries_get_range(
|
||||
# dataset=self.dataset,
|
||||
# symbols=symbols,
|
||||
# schema=schema,
|
||||
# start=start,
|
||||
# end=end,
|
||||
# stype_in=self.stype_in,
|
||||
# stype_out="instrument_id",
|
||||
# limit=limit
|
||||
# )
|
||||
|
||||
# Simulate successful fetch
|
||||
print("[SUCCESS] Data fetched successfully!")
|
||||
return {
|
||||
"data": [],
|
||||
"record_count": 0,
|
||||
"note": "Call mcp__databento__timeseries_get_range here"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Attempt {attempt + 1} failed: {str(e)}")
|
||||
|
||||
if attempt < self.max_retries - 1:
|
||||
print(f"[RETRY] Waiting {self.retry_delay} seconds before retry...")
|
||||
time.sleep(self.retry_delay)
|
||||
else:
|
||||
print("[FAILED] All retry attempts exhausted.")
|
||||
raise
|
||||
|
||||
def validate_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate fetched data quality.
|
||||
|
||||
Args:
|
||||
data: Fetched data
|
||||
|
||||
Returns:
|
||||
Validation report
|
||||
"""
|
||||
print("\n[VALIDATION] Running data quality checks...")
|
||||
|
||||
# NOTE: Actual validation would:
|
||||
# - Check for timestamp gaps
|
||||
# - Verify record counts
|
||||
# - Validate price ranges
|
||||
# - Check for duplicates
|
||||
|
||||
# Use scripts/validate_data.py for comprehensive validation
|
||||
|
||||
return {
|
||||
"valid": True,
|
||||
"record_count": data.get("record_count", 0),
|
||||
"issues": [],
|
||||
"note": "Use scripts/validate_data.py for detailed validation"
|
||||
}
|
||||
|
||||
def export_csv(self, data: Dict[str, Any], output_path: str):
|
||||
"""
|
||||
Export data to CSV.
|
||||
|
||||
Args:
|
||||
data: Data to export
|
||||
output_path: Output file path
|
||||
"""
|
||||
print(f"\n[EXPORT] Saving data to {output_path}...")
|
||||
|
||||
# NOTE: Actual export would convert data to CSV format
|
||||
# and write to file
|
||||
|
||||
print(f"[SUCCESS] Data saved to {output_path}")
|
||||
|
||||
def export_json(self, data: Dict[str, Any], output_path: str):
|
||||
"""
|
||||
Export data to JSON.
|
||||
|
||||
Args:
|
||||
data: Data to export
|
||||
output_path: Output file path
|
||||
"""
|
||||
print(f"\n[EXPORT] Saving data to {output_path}...")
|
||||
|
||||
with open(output_path, 'w') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
print(f"[SUCCESS] Data saved to {output_path}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for CLI usage."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Fetch OHLCV data from Databento with best practices"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--symbol",
|
||||
"-s",
|
||||
required=True,
|
||||
help="Symbol or comma-separated symbols (e.g., ES.c.0 or ES.c.0,NQ.c.0)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--schema",
|
||||
choices=["ohlcv-1s", "ohlcv-1m", "ohlcv-1h", "ohlcv-1d", "ohlcv-eod"],
|
||||
default="ohlcv-1h",
|
||||
help="OHLCV schema (default: ohlcv-1h)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--start",
|
||||
required=True,
|
||||
help="Start date (YYYY-MM-DD)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--end",
|
||||
help="End date (YYYY-MM-DD, optional)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
help="Maximum number of records (optional)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--dataset",
|
||||
default="GLBX.MDP3",
|
||||
help="Dataset code (default: GLBX.MDP3)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--stype-in",
|
||||
default="continuous",
|
||||
choices=["continuous", "raw_symbol", "instrument_id"],
|
||||
help="Input symbol type (default: continuous)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
"-o",
|
||||
help="Output file path (CSV or JSON based on extension)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--no-cost-check",
|
||||
action="store_true",
|
||||
help="Skip cost estimation (not recommended)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create fetcher
|
||||
fetcher = DatabentOHLCVFetcher(
|
||||
dataset=args.dataset,
|
||||
stype_in=args.stype_in
|
||||
)
|
||||
|
||||
try:
|
||||
# Fetch data
|
||||
data = fetcher.fetch_data(
|
||||
symbols=args.symbol,
|
||||
schema=args.schema,
|
||||
start=args.start,
|
||||
end=args.end,
|
||||
limit=args.limit,
|
||||
check_cost=not args.no_cost_check
|
||||
)
|
||||
|
||||
# Validate data
|
||||
validation = fetcher.validate_data(data)
|
||||
print(f"\n[VALIDATION] Data is valid: {validation['valid']}")
|
||||
print(f"[VALIDATION] Record count: {validation['record_count']}")
|
||||
|
||||
if validation['issues']:
|
||||
print(f"[WARNING] Issues found: {validation['issues']}")
|
||||
|
||||
# Export if output specified
|
||||
if args.output:
|
||||
if args.output.endswith('.csv'):
|
||||
fetcher.export_csv(data, args.output)
|
||||
elif args.output.endswith('.json'):
|
||||
fetcher.export_json(data, args.output)
|
||||
else:
|
||||
print("[WARNING] Unknown output format. Saving as JSON.")
|
||||
fetcher.export_json(data, args.output + '.json')
|
||||
|
||||
print("\n[DONE] Fetch complete!")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n[CANCELLED] Fetch cancelled by user.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"\n[ERROR] Fetch failed: {str(e)}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
388
skills/databento/scripts/session_filter.py
Normal file
388
skills/databento/scripts/session_filter.py
Normal file
@@ -0,0 +1,388 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Databento Trading Session Filter
|
||||
|
||||
Filter market data by trading session (Asian/London/NY):
|
||||
- Session detection using get_session_info
|
||||
- Historical data filtering by session
|
||||
- Session transition handling
|
||||
- Session-specific statistics
|
||||
|
||||
Usage:
|
||||
python session_filter.py --input data.json --session NY --output ny_session.json
|
||||
python session_filter.py --input data.json --session London --stats
|
||||
python session_filter.py --input data.json --sessions Asian,London --output combined.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class TradingSession(Enum):
|
||||
"""Trading session definitions (in ET)."""
|
||||
ASIAN = ("Asian", 18, 2) # 6pm - 2am ET
|
||||
LONDON = ("London", 2, 8) # 2am - 8am ET
|
||||
NY = ("NY", 8, 16) # 8am - 4pm ET
|
||||
|
||||
|
||||
class SessionFilter:
|
||||
"""Filters Databento market data by trading session."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize session filter."""
|
||||
self.sessions = {
|
||||
"Asian": TradingSession.ASIAN,
|
||||
"London": TradingSession.LONDON,
|
||||
"NY": TradingSession.NY
|
||||
}
|
||||
|
||||
def get_current_session(self, timestamp: Optional[str] = None) -> str:
|
||||
"""
|
||||
Get trading session for a timestamp.
|
||||
|
||||
Args:
|
||||
timestamp: ISO timestamp (optional, defaults to now)
|
||||
|
||||
Returns:
|
||||
Session name (Asian, London, or NY)
|
||||
"""
|
||||
# NOTE: In actual usage, this would call:
|
||||
# session_info = mcp__databento__get_session_info(timestamp=timestamp)
|
||||
# return session_info["session"]
|
||||
|
||||
# For this template, simulate session detection
|
||||
if timestamp:
|
||||
dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
||||
else:
|
||||
dt = datetime.now(timezone.utc)
|
||||
|
||||
# Convert to ET
|
||||
et_hour = (dt.hour - 5) % 24 # Simplified ET conversion
|
||||
|
||||
# Determine session
|
||||
if 18 <= et_hour or et_hour < 2:
|
||||
return "Asian"
|
||||
elif 2 <= et_hour < 8:
|
||||
return "London"
|
||||
else:
|
||||
return "NY"
|
||||
|
||||
def is_in_session(
|
||||
self,
|
||||
timestamp_ns: int,
|
||||
session: TradingSession
|
||||
) -> bool:
|
||||
"""
|
||||
Check if timestamp falls within trading session.
|
||||
|
||||
Args:
|
||||
timestamp_ns: Timestamp in nanoseconds
|
||||
session: Trading session to check
|
||||
|
||||
Returns:
|
||||
True if timestamp is in session
|
||||
"""
|
||||
# Convert nanoseconds to datetime
|
||||
ts_seconds = timestamp_ns / 1_000_000_000
|
||||
dt = datetime.fromtimestamp(ts_seconds, tz=timezone.utc)
|
||||
|
||||
# Convert to ET (simplified, doesn't handle DST)
|
||||
et_offset = timedelta(hours=-5)
|
||||
dt_et = dt + et_offset
|
||||
|
||||
hour = dt_et.hour
|
||||
|
||||
# Check if hour falls within session
|
||||
_, start_hour, end_hour = session.value
|
||||
|
||||
if start_hour < end_hour:
|
||||
# Session doesn't cross midnight
|
||||
return start_hour <= hour < end_hour
|
||||
else:
|
||||
# Session crosses midnight (Asian session)
|
||||
return hour >= start_hour or hour < end_hour
|
||||
|
||||
def filter_by_session(
|
||||
self,
|
||||
data: List[Dict[str, Any]],
|
||||
sessions: List[str]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Filter data to include only specified sessions.
|
||||
|
||||
Args:
|
||||
data: List of records
|
||||
sessions: List of session names to include
|
||||
|
||||
Returns:
|
||||
Filtered data
|
||||
"""
|
||||
print(f"[FILTER] Filtering {len(data)} records for sessions: {', '.join(sessions)}")
|
||||
|
||||
session_enums = [self.sessions[s] for s in sessions]
|
||||
filtered = []
|
||||
|
||||
for record in data:
|
||||
# Extract timestamp
|
||||
ts_ns = record.get("ts_event") or record.get("ts_recv") or record.get("timestamp")
|
||||
|
||||
if not ts_ns:
|
||||
continue
|
||||
|
||||
# Check if in any of the specified sessions
|
||||
for session in session_enums:
|
||||
if self.is_in_session(int(ts_ns), session):
|
||||
filtered.append(record)
|
||||
break
|
||||
|
||||
print(f"[FILTER] Kept {len(filtered)} records ({len(filtered)/len(data)*100:.1f}%)")
|
||||
return filtered
|
||||
|
||||
def calculate_session_stats(
|
||||
self,
|
||||
data: List[Dict[str, Any]]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Calculate statistics by trading session.
|
||||
|
||||
Args:
|
||||
data: List of records
|
||||
|
||||
Returns:
|
||||
Session statistics
|
||||
"""
|
||||
print(f"[STATS] Calculating session statistics for {len(data)} records...")
|
||||
|
||||
stats = {
|
||||
"Asian": {"count": 0, "volume": 0, "trades": 0},
|
||||
"London": {"count": 0, "volume": 0, "trades": 0},
|
||||
"NY": {"count": 0, "volume": 0, "trades": 0}
|
||||
}
|
||||
|
||||
for record in data:
|
||||
ts_ns = record.get("ts_event") or record.get("ts_recv") or record.get("timestamp")
|
||||
|
||||
if not ts_ns:
|
||||
continue
|
||||
|
||||
# Determine session
|
||||
for session_name, session_enum in self.sessions.items():
|
||||
if self.is_in_session(int(ts_ns), session_enum):
|
||||
stats[session_name]["count"] += 1
|
||||
|
||||
# Add volume if available
|
||||
if "volume" in record:
|
||||
stats[session_name]["volume"] += record["volume"]
|
||||
|
||||
# Count trades
|
||||
if "size" in record: # Trade record
|
||||
stats[session_name]["trades"] += 1
|
||||
|
||||
break
|
||||
|
||||
# Calculate percentages
|
||||
total_count = sum(s["count"] for s in stats.values())
|
||||
for session_stats in stats.values():
|
||||
if total_count > 0:
|
||||
session_stats["percentage"] = (session_stats["count"] / total_count) * 100
|
||||
else:
|
||||
session_stats["percentage"] = 0
|
||||
|
||||
return stats
|
||||
|
||||
def filter_session_transitions(
|
||||
self,
|
||||
data: List[Dict[str, Any]],
|
||||
minutes_before: int = 30,
|
||||
minutes_after: int = 30
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Filter data to include only session transitions (handoffs).
|
||||
|
||||
Args:
|
||||
data: List of records
|
||||
minutes_before: Minutes before transition to include
|
||||
minutes_after: Minutes after transition to include
|
||||
|
||||
Returns:
|
||||
Filtered data around session transitions
|
||||
"""
|
||||
print(f"[FILTER] Extracting session transitions ({minutes_before}m before, {minutes_after}m after)...")
|
||||
|
||||
# Session transition times (in ET)
|
||||
transitions = [
|
||||
2, # Asian → London (2am ET)
|
||||
8, # London → NY (8am ET)
|
||||
16, # NY → Post-market
|
||||
18, # Post-market → Asian (6pm ET)
|
||||
]
|
||||
|
||||
filtered = []
|
||||
transition_window = timedelta(minutes=minutes_before + minutes_after)
|
||||
|
||||
for record in data:
|
||||
ts_ns = record.get("ts_event") or record.get("ts_recv") or record.get("timestamp")
|
||||
|
||||
if not ts_ns:
|
||||
continue
|
||||
|
||||
# Convert to ET hour
|
||||
ts_seconds = int(ts_ns) / 1_000_000_000
|
||||
dt = datetime.fromtimestamp(ts_seconds, tz=timezone.utc)
|
||||
et_offset = timedelta(hours=-5)
|
||||
dt_et = dt + et_offset
|
||||
|
||||
# Check if near any transition
|
||||
for transition_hour in transitions:
|
||||
transition_dt = dt_et.replace(hour=transition_hour, minute=0, second=0, microsecond=0)
|
||||
|
||||
# Calculate time difference
|
||||
time_diff = abs((dt_et - transition_dt).total_seconds())
|
||||
|
||||
# Include if within window
|
||||
if time_diff <= transition_window.total_seconds():
|
||||
filtered.append(record)
|
||||
break
|
||||
|
||||
print(f"[FILTER] Found {len(filtered)} records near session transitions")
|
||||
return filtered
|
||||
|
||||
def print_session_stats(self, stats: Dict[str, Any]):
|
||||
"""Print session statistics to console."""
|
||||
print("\n" + "=" * 60)
|
||||
print("SESSION STATISTICS")
|
||||
print("=" * 60)
|
||||
|
||||
for session_name in ["Asian", "London", "NY"]:
|
||||
session_stats = stats[session_name]
|
||||
print(f"\n{session_name} Session:")
|
||||
print(f" Records: {session_stats['count']:,} ({session_stats['percentage']:.1f}%)")
|
||||
if session_stats['volume'] > 0:
|
||||
print(f" Volume: {session_stats['volume']:,}")
|
||||
if session_stats['trades'] > 0:
|
||||
print(f" Trades: {session_stats['trades']:,}")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for CLI usage."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Filter Databento data by trading session"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--input",
|
||||
"-i",
|
||||
required=True,
|
||||
help="Input data file (JSON)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--session",
|
||||
"--sessions",
|
||||
help="Session(s) to filter (Asian, London, NY). Comma-separated for multiple."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--transitions",
|
||||
action="store_true",
|
||||
help="Filter for session transition periods only"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--minutes-before",
|
||||
type=int,
|
||||
default=30,
|
||||
help="Minutes before transition (default: 30)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--minutes-after",
|
||||
type=int,
|
||||
default=30,
|
||||
help="Minutes after transition (default: 30)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--stats",
|
||||
action="store_true",
|
||||
help="Calculate and display session statistics"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
"-o",
|
||||
help="Output file for filtered data (JSON)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Load data
|
||||
print(f"[LOAD] Loading data from {args.input}...")
|
||||
with open(args.input, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Handle different data formats
|
||||
if isinstance(data, dict) and "data" in data:
|
||||
data = data["data"]
|
||||
|
||||
# Create filter
|
||||
session_filter = SessionFilter()
|
||||
|
||||
# Calculate stats if requested
|
||||
if args.stats:
|
||||
stats = session_filter.calculate_session_stats(data)
|
||||
session_filter.print_session_stats(stats)
|
||||
|
||||
# Filter data
|
||||
filtered_data = data
|
||||
|
||||
if args.transitions:
|
||||
# Filter for session transitions
|
||||
filtered_data = session_filter.filter_session_transitions(
|
||||
filtered_data,
|
||||
minutes_before=args.minutes_before,
|
||||
minutes_after=args.minutes_after
|
||||
)
|
||||
elif args.session:
|
||||
# Filter by specific session(s)
|
||||
sessions = [s.strip() for s in args.session.split(',')]
|
||||
|
||||
# Validate sessions
|
||||
for session in sessions:
|
||||
if session not in ["Asian", "London", "NY"]:
|
||||
print(f"[ERROR] Invalid session: {session}")
|
||||
print("[ERROR] Valid sessions: Asian, London, NY")
|
||||
sys.exit(1)
|
||||
|
||||
filtered_data = session_filter.filter_by_session(filtered_data, sessions)
|
||||
|
||||
# Save filtered data if output specified
|
||||
if args.output:
|
||||
print(f"\n[SAVE] Saving {len(filtered_data)} filtered records to {args.output}...")
|
||||
|
||||
output_data = {
|
||||
"data": filtered_data,
|
||||
"metadata": {
|
||||
"original_count": len(data),
|
||||
"filtered_count": len(filtered_data),
|
||||
"filter_type": "transitions" if args.transitions else "sessions",
|
||||
"sessions": args.session.split(',') if args.session else None
|
||||
}
|
||||
}
|
||||
|
||||
with open(args.output, 'w') as f:
|
||||
json.dump(output_data, f, indent=2)
|
||||
|
||||
print(f"[SUCCESS] Filtered data saved!")
|
||||
|
||||
print("\n[DONE] Session filtering complete!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
496
skills/databento/scripts/validate_data.py
Normal file
496
skills/databento/scripts/validate_data.py
Normal file
@@ -0,0 +1,496 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Databento Data Quality Validator
|
||||
|
||||
Validates market data quality to catch issues early:
|
||||
- Timestamp gap detection
|
||||
- Record count verification
|
||||
- Price range validation (no negative prices, outliers)
|
||||
- Duplicate timestamp detection
|
||||
- Summary quality report
|
||||
|
||||
Usage:
|
||||
python validate_data.py --input data.json
|
||||
python validate_data.py --input data.csv --schema ohlcv-1h
|
||||
python validate_data.py --input data.json --max-gap-minutes 60 --report report.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
class DataValidator:
|
||||
"""Validates Databento market data quality."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
schema: str,
|
||||
max_gap_minutes: int = 60,
|
||||
price_outlier_std: float = 10.0
|
||||
):
|
||||
"""
|
||||
Initialize validator.
|
||||
|
||||
Args:
|
||||
schema: Data schema (ohlcv-1h, trades, mbp-1, etc.)
|
||||
max_gap_minutes: Maximum acceptable gap in minutes
|
||||
price_outlier_std: Standard deviations for outlier detection
|
||||
"""
|
||||
self.schema = schema
|
||||
self.max_gap_seconds = max_gap_minutes * 60
|
||||
self.price_outlier_std = price_outlier_std
|
||||
self.issues: List[Dict[str, Any]] = []
|
||||
|
||||
def validate(self, data: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Run all validation checks on data.
|
||||
|
||||
Args:
|
||||
data: List of records to validate
|
||||
|
||||
Returns:
|
||||
Validation report
|
||||
"""
|
||||
print(f"[VALIDATION] Running quality checks on {len(data)} records...")
|
||||
|
||||
report = {
|
||||
"total_records": len(data),
|
||||
"valid": True,
|
||||
"checks": {}
|
||||
}
|
||||
|
||||
if not data:
|
||||
print("[WARNING] No data to validate!")
|
||||
report["valid"] = False
|
||||
return report
|
||||
|
||||
# Run all validation checks
|
||||
report["checks"]["timestamp_gaps"] = self.check_timestamp_gaps(data)
|
||||
report["checks"]["duplicates"] = self.check_duplicates(data)
|
||||
report["checks"]["price_range"] = self.check_price_range(data)
|
||||
report["checks"]["record_count"] = self.check_record_count(data)
|
||||
report["checks"]["data_completeness"] = self.check_completeness(data)
|
||||
|
||||
# Overall validity
|
||||
report["valid"] = all(
|
||||
check.get("valid", True)
|
||||
for check in report["checks"].values()
|
||||
)
|
||||
|
||||
report["issues"] = self.issues
|
||||
|
||||
return report
|
||||
|
||||
def check_timestamp_gaps(self, data: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Check for unexpected gaps in timestamps.
|
||||
|
||||
Args:
|
||||
data: List of records
|
||||
|
||||
Returns:
|
||||
Gap check report
|
||||
"""
|
||||
print("[CHECK] Checking for timestamp gaps...")
|
||||
|
||||
gaps = []
|
||||
timestamps = self._extract_timestamps(data)
|
||||
|
||||
if len(timestamps) < 2:
|
||||
return {"valid": True, "gaps": [], "note": "Insufficient data for gap detection"}
|
||||
|
||||
# Sort timestamps
|
||||
sorted_ts = sorted(timestamps)
|
||||
|
||||
# Check gaps between consecutive timestamps
|
||||
for i in range(len(sorted_ts) - 1):
|
||||
gap_ns = sorted_ts[i + 1] - sorted_ts[i]
|
||||
gap_seconds = gap_ns / 1_000_000_000
|
||||
|
||||
if gap_seconds > self.max_gap_seconds:
|
||||
gap_info = {
|
||||
"index": i,
|
||||
"gap_seconds": gap_seconds,
|
||||
"gap_minutes": gap_seconds / 60,
|
||||
"before": self._format_timestamp(sorted_ts[i]),
|
||||
"after": self._format_timestamp(sorted_ts[i + 1])
|
||||
}
|
||||
gaps.append(gap_info)
|
||||
|
||||
self.issues.append({
|
||||
"type": "timestamp_gap",
|
||||
"severity": "warning",
|
||||
"message": f"Gap of {gap_seconds / 60:.1f} minutes detected",
|
||||
**gap_info
|
||||
})
|
||||
|
||||
valid = len(gaps) == 0
|
||||
print(f"[CHECK] Found {len(gaps)} gaps > {self.max_gap_seconds / 60} minutes")
|
||||
|
||||
return {
|
||||
"valid": valid,
|
||||
"gaps_found": len(gaps),
|
||||
"gaps": gaps[:10] if gaps else [], # Limit to first 10 for report
|
||||
"total_gaps": len(gaps)
|
||||
}
|
||||
|
||||
def check_duplicates(self, data: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Check for duplicate timestamps.
|
||||
|
||||
Args:
|
||||
data: List of records
|
||||
|
||||
Returns:
|
||||
Duplicate check report
|
||||
"""
|
||||
print("[CHECK] Checking for duplicate timestamps...")
|
||||
|
||||
timestamps = self._extract_timestamps(data)
|
||||
timestamp_counts = defaultdict(int)
|
||||
|
||||
for ts in timestamps:
|
||||
timestamp_counts[ts] += 1
|
||||
|
||||
duplicates = {ts: count for ts, count in timestamp_counts.items() if count > 1}
|
||||
|
||||
if duplicates:
|
||||
for ts, count in list(duplicates.items())[:10]: # Limit to first 10
|
||||
self.issues.append({
|
||||
"type": "duplicate_timestamp",
|
||||
"severity": "error",
|
||||
"timestamp": self._format_timestamp(ts),
|
||||
"count": count,
|
||||
"message": f"Timestamp appears {count} times"
|
||||
})
|
||||
|
||||
valid = len(duplicates) == 0
|
||||
print(f"[CHECK] Found {len(duplicates)} duplicate timestamps")
|
||||
|
||||
return {
|
||||
"valid": valid,
|
||||
"duplicates_found": len(duplicates),
|
||||
"duplicate_timestamps": len(duplicates)
|
||||
}
|
||||
|
||||
def check_price_range(self, data: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Check for invalid or outlier prices.
|
||||
|
||||
Args:
|
||||
data: List of records
|
||||
|
||||
Returns:
|
||||
Price range check report
|
||||
"""
|
||||
print("[CHECK] Checking price ranges...")
|
||||
|
||||
prices = self._extract_prices(data)
|
||||
|
||||
if not prices:
|
||||
return {"valid": True, "note": "No price data to validate"}
|
||||
|
||||
# Check for negative prices
|
||||
negative_prices = [p for p in prices if p < 0]
|
||||
|
||||
# Check for zero prices (unusual for ES/NQ)
|
||||
zero_prices = [p for p in prices if p == 0]
|
||||
|
||||
# Calculate statistics for outlier detection
|
||||
if len(prices) > 1:
|
||||
mean_price = sum(prices) / len(prices)
|
||||
variance = sum((p - mean_price) ** 2 for p in prices) / len(prices)
|
||||
std_dev = variance ** 0.5
|
||||
|
||||
# Detect outliers (> N standard deviations from mean)
|
||||
outliers = []
|
||||
for p in prices:
|
||||
if abs(p - mean_price) > (self.price_outlier_std * std_dev):
|
||||
outliers.append(p)
|
||||
if len(outliers) <= 10: # Limit issues
|
||||
self.issues.append({
|
||||
"type": "price_outlier",
|
||||
"severity": "warning",
|
||||
"price": p,
|
||||
"mean": mean_price,
|
||||
"std_dev": std_dev,
|
||||
"message": f"Price {p:.2f} is {abs(p - mean_price) / std_dev:.1f} std devs from mean"
|
||||
})
|
||||
else:
|
||||
outliers = []
|
||||
mean_price = prices[0] if prices else 0
|
||||
std_dev = 0
|
||||
|
||||
# Report negative prices as errors
|
||||
for p in negative_prices[:10]: # Limit to first 10
|
||||
self.issues.append({
|
||||
"type": "negative_price",
|
||||
"severity": "error",
|
||||
"price": p,
|
||||
"message": f"Negative price detected: {p}"
|
||||
})
|
||||
|
||||
valid = len(negative_prices) == 0 and len(zero_prices) == 0
|
||||
|
||||
print(f"[CHECK] Price range: {min(prices):.2f} to {max(prices):.2f}")
|
||||
print(f"[CHECK] Negative prices: {len(negative_prices)}, Zero prices: {len(zero_prices)}, Outliers: {len(outliers)}")
|
||||
|
||||
return {
|
||||
"valid": valid,
|
||||
"min_price": min(prices),
|
||||
"max_price": max(prices),
|
||||
"mean_price": mean_price,
|
||||
"std_dev": std_dev,
|
||||
"negative_prices": len(negative_prices),
|
||||
"zero_prices": len(zero_prices),
|
||||
"outliers": len(outliers)
|
||||
}
|
||||
|
||||
def check_record_count(self, data: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Verify expected record count.
|
||||
|
||||
Args:
|
||||
data: List of records
|
||||
|
||||
Returns:
|
||||
Record count check report
|
||||
"""
|
||||
print(f"[CHECK] Verifying record count: {len(data)} records")
|
||||
|
||||
# For OHLCV data, can estimate expected count based on timeframe
|
||||
expected_count = self._estimate_expected_count(data)
|
||||
|
||||
valid = True
|
||||
if expected_count and abs(len(data) - expected_count) > (expected_count * 0.1):
|
||||
# More than 10% deviation
|
||||
valid = False
|
||||
self.issues.append({
|
||||
"type": "unexpected_record_count",
|
||||
"severity": "warning",
|
||||
"actual": len(data),
|
||||
"expected": expected_count,
|
||||
"message": f"Expected ~{expected_count} records, got {len(data)}"
|
||||
})
|
||||
|
||||
return {
|
||||
"valid": valid,
|
||||
"actual_count": len(data),
|
||||
"expected_count": expected_count,
|
||||
"note": "Expected count is estimated based on schema and date range"
|
||||
}
|
||||
|
||||
def check_completeness(self, data: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Check data completeness (required fields present).
|
||||
|
||||
Args:
|
||||
data: List of records
|
||||
|
||||
Returns:
|
||||
Completeness check report
|
||||
"""
|
||||
print("[CHECK] Checking data completeness...")
|
||||
|
||||
if not data:
|
||||
return {"valid": False, "note": "No data"}
|
||||
|
||||
# Check required fields based on schema
|
||||
required_fields = self._get_required_fields()
|
||||
|
||||
missing_fields = defaultdict(int)
|
||||
for record in data[:100]: # Sample first 100 records
|
||||
for field in required_fields:
|
||||
if field not in record or record[field] is None:
|
||||
missing_fields[field] += 1
|
||||
|
||||
if missing_fields:
|
||||
for field, count in missing_fields.items():
|
||||
self.issues.append({
|
||||
"type": "missing_field",
|
||||
"severity": "error",
|
||||
"field": field,
|
||||
"missing_count": count,
|
||||
"message": f"Field '{field}' missing in {count} records (sampled)"
|
||||
})
|
||||
|
||||
valid = len(missing_fields) == 0
|
||||
|
||||
return {
|
||||
"valid": valid,
|
||||
"missing_fields": dict(missing_fields) if missing_fields else {}
|
||||
}
|
||||
|
||||
def _extract_timestamps(self, data: List[Dict[str, Any]]) -> List[int]:
|
||||
"""Extract timestamps from records."""
|
||||
timestamps = []
|
||||
for record in data:
|
||||
# Try different timestamp field names
|
||||
ts = record.get("ts_event") or record.get("ts_recv") or record.get("timestamp")
|
||||
if ts:
|
||||
timestamps.append(int(ts))
|
||||
return timestamps
|
||||
|
||||
def _extract_prices(self, data: List[Dict[str, Any]]) -> List[float]:
|
||||
"""Extract prices from records."""
|
||||
prices = []
|
||||
for record in data:
|
||||
# For OHLCV, use close price
|
||||
if "close" in record:
|
||||
# Convert from fixed-point if needed
|
||||
price = record["close"]
|
||||
if isinstance(price, int) and price > 1_000_000:
|
||||
price = price / 1_000_000_000 # Fixed-point conversion
|
||||
prices.append(float(price))
|
||||
# For trades/mbp, use price field
|
||||
elif "price" in record:
|
||||
price = record["price"]
|
||||
if isinstance(price, int) and price > 1_000_000:
|
||||
price = price / 1_000_000_000
|
||||
prices.append(float(price))
|
||||
return prices
|
||||
|
||||
def _format_timestamp(self, ts_ns: int) -> str:
|
||||
"""Format nanosecond timestamp to readable string."""
|
||||
ts_seconds = ts_ns / 1_000_000_000
|
||||
dt = datetime.fromtimestamp(ts_seconds)
|
||||
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
def _estimate_expected_count(self, data: List[Dict[str, Any]]) -> Optional[int]:
|
||||
"""Estimate expected record count based on schema and date range."""
|
||||
# This is a simplified estimation
|
||||
# In practice, would calculate based on actual date range
|
||||
if "ohlcv" in self.schema:
|
||||
if "1h" in self.schema:
|
||||
return None # ~24 records per day per symbol
|
||||
elif "1d" in self.schema:
|
||||
return None # ~1 record per day per symbol
|
||||
return None
|
||||
|
||||
def _get_required_fields(self) -> List[str]:
|
||||
"""Get required fields for schema."""
|
||||
base_fields = ["ts_event", "ts_recv"]
|
||||
|
||||
if "ohlcv" in self.schema:
|
||||
return base_fields + ["open", "high", "low", "close", "volume"]
|
||||
elif self.schema == "trades":
|
||||
return base_fields + ["price", "size"]
|
||||
elif "mbp" in self.schema:
|
||||
return base_fields + ["bid_px_00", "ask_px_00", "bid_sz_00", "ask_sz_00"]
|
||||
else:
|
||||
return base_fields
|
||||
|
||||
def print_report(self, report: Dict[str, Any]):
|
||||
"""Print validation report to console."""
|
||||
print("\n" + "=" * 60)
|
||||
print("DATA VALIDATION REPORT")
|
||||
print("=" * 60)
|
||||
|
||||
print(f"\nTotal Records: {report['total_records']}")
|
||||
print(f"Overall Valid: {'✓ YES' if report['valid'] else '✗ NO'}")
|
||||
|
||||
print("\n" + "-" * 60)
|
||||
print("CHECK RESULTS")
|
||||
print("-" * 60)
|
||||
|
||||
for check_name, check_result in report["checks"].items():
|
||||
status = "✓" if check_result.get("valid", True) else "✗"
|
||||
print(f"\n{status} {check_name.replace('_', ' ').title()}")
|
||||
for key, value in check_result.items():
|
||||
if key != "valid" and key != "gaps":
|
||||
print(f" {key}: {value}")
|
||||
|
||||
if report["issues"]:
|
||||
print("\n" + "-" * 60)
|
||||
print(f"ISSUES FOUND ({len(report['issues'])})")
|
||||
print("-" * 60)
|
||||
for i, issue in enumerate(report["issues"][:20], 1): # Limit to 20
|
||||
print(f"\n{i}. [{issue['severity'].upper()}] {issue['type']}")
|
||||
print(f" {issue['message']}")
|
||||
|
||||
if len(report["issues"]) > 20:
|
||||
print(f"\n... and {len(report['issues']) - 20} more issues")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for CLI usage."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Validate Databento market data quality"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--input",
|
||||
"-i",
|
||||
required=True,
|
||||
help="Input data file (JSON or CSV)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--schema",
|
||||
default="ohlcv-1h",
|
||||
help="Data schema (default: ohlcv-1h)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--max-gap-minutes",
|
||||
type=int,
|
||||
default=60,
|
||||
help="Maximum acceptable gap in minutes (default: 60)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--price-outlier-std",
|
||||
type=float,
|
||||
default=10.0,
|
||||
help="Standard deviations for outlier detection (default: 10.0)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--report",
|
||||
"-r",
|
||||
help="Save report to JSON file"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Load data
|
||||
print(f"[LOAD] Loading data from {args.input}...")
|
||||
with open(args.input, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Handle different data formats
|
||||
if isinstance(data, dict) and "data" in data:
|
||||
data = data["data"]
|
||||
|
||||
# Create validator
|
||||
validator = DataValidator(
|
||||
schema=args.schema,
|
||||
max_gap_minutes=args.max_gap_minutes,
|
||||
price_outlier_std=args.price_outlier_std
|
||||
)
|
||||
|
||||
# Run validation
|
||||
report = validator.validate(data)
|
||||
|
||||
# Print report
|
||||
validator.print_report(report)
|
||||
|
||||
# Save report if requested
|
||||
if args.report:
|
||||
print(f"\n[SAVE] Saving report to {args.report}...")
|
||||
with open(args.report, 'w') as f:
|
||||
json.dump(report, f, indent=2)
|
||||
print(f"[SUCCESS] Report saved!")
|
||||
|
||||
# Exit with appropriate code
|
||||
sys.exit(0 if report["valid"] else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
564
skills/discord-integration/SKILL.md
Normal file
564
skills/discord-integration/SKILL.md
Normal file
@@ -0,0 +1,564 @@
|
||||
---
|
||||
name: discord-integration
|
||||
version: 1.0.1
|
||||
description: Use when sending Discord messages or encountering bot permission errors - provides three-tier integration methods with automatic fallback (MCP → REST API → Gateway); prevents wasted time on OAuth scope issues
|
||||
triggers:
|
||||
- discord
|
||||
- send message
|
||||
- permission error
|
||||
- bot access
|
||||
- discord.js
|
||||
- mcp discord
|
||||
---
|
||||
|
||||
# Discord Integration
|
||||
|
||||
Comprehensive skill for working with Discord from Claude Code using multiple methods with automatic fallback.
|
||||
|
||||
## When to Use This Skill
|
||||
|
||||
Use this skill when the user wants to:
|
||||
- Send messages to Discord channels
|
||||
- Read messages from Discord channels
|
||||
- Troubleshoot Discord bot connection or permission issues
|
||||
- Set up or configure Discord bot access
|
||||
- Choose the best method for Discord interaction
|
||||
|
||||
**Trigger phrases:**
|
||||
- "Send a message to Discord"
|
||||
- "Post to #channel-name"
|
||||
- "Read messages from Discord"
|
||||
- "Discord bot isn't working"
|
||||
- "Can't access Discord channel"
|
||||
|
||||
## When NOT to Use This Skill
|
||||
|
||||
**Do NOT use this skill for:**
|
||||
- **Discord server management** - Use Discord's admin panel for creating channels, managing roles, or configuring server settings
|
||||
- **Complex bot features** - For advanced slash commands, persistent state, or complex workflows, build a dedicated bot application
|
||||
- **Real-time message listening** - This skill focuses on sending/reading; continuous message monitoring requires a persistent Gateway connection outside this skill's scope
|
||||
- **Voice channel interactions** - This skill only handles text channels
|
||||
|
||||
## Available Bots on This Machine
|
||||
|
||||
### 1. Sombra (Local MCP Bot)
|
||||
- **Purpose:** Claude Code MCP integration (this machine only)
|
||||
- **Token Location:** `~/.claude/discordmcp/.env`
|
||||
- **Client ID:** 1435274653126889544
|
||||
- **Guild:** Nice Wolf Studio (745376918030909471)
|
||||
- **Connection:** discord.js Gateway via MCP server
|
||||
- **Use For:** Claude Code sessions, automated workflows
|
||||
|
||||
### 2. GladOSv3 (tjr-suite Bot)
|
||||
- **Purpose:** Trading bot application
|
||||
- **Token Location:** `~/Dev/tjr-suite/.env`
|
||||
- **Client ID:** 1425850836277395506
|
||||
- **Guild:** Nice Wolf Studio (745376918030909471)
|
||||
- **Connection:** discord.js Gateway (always running)
|
||||
- **Use For:** Trading signals, slash commands, application-specific tasks
|
||||
|
||||
**Important:** These are separate bots. Do not confuse their configurations.
|
||||
|
||||
## Three Integration Methods (Priority Order)
|
||||
|
||||
### Method 1: MCP Tools (PREFERRED)
|
||||
**Availability:** After Claude Code full restart
|
||||
**Tools:** `mcp__discord__send-message`, `mcp__discord__read-messages`
|
||||
|
||||
**Pros:**
|
||||
- Clean, native Claude Code integration
|
||||
- No manual script execution
|
||||
- Persistent connection via discord.js Gateway
|
||||
- Smart auto-discovery (find channels by name or ID)
|
||||
|
||||
**Cons:**
|
||||
- Requires full Claude Code restart to load
|
||||
- Not available immediately after config changes
|
||||
|
||||
### Method 2: Discord.js Gateway Script (FALLBACK)
|
||||
**Availability:** Always (if bot token configured)
|
||||
**Script:** `~/.claude/discordmcp/send-test-message.js`
|
||||
|
||||
**Pros:**
|
||||
- Works immediately, no restart needed
|
||||
- Direct access to discord.js library
|
||||
- Can be customized easily
|
||||
- Reliable connection
|
||||
|
||||
**Cons:**
|
||||
- Requires manual script execution
|
||||
- Less integrated than MCP tools
|
||||
|
||||
### Method 3: REST API (LAST RESORT)
|
||||
**Availability:** Always
|
||||
**Method:** Direct curl to Discord API v10
|
||||
|
||||
**Pros:**
|
||||
- No dependencies
|
||||
- Quick for testing
|
||||
|
||||
**Cons:**
|
||||
- Often gets "Missing Access" (OAuth scope issues)
|
||||
- Requires exact permissions
|
||||
- Less reliable than Gateway methods
|
||||
|
||||
## Integration Method Selection (MANDATORY)
|
||||
|
||||
**ALWAYS follow this priority order - do NOT skip steps:**
|
||||
|
||||
```
|
||||
User requests Discord interaction
|
||||
↓
|
||||
STEP 1: Are MCP tools available? (mcp__discord__send-message)
|
||||
YES → REQUIRED: Use MCP tools (Method 1)
|
||||
NO ↓
|
||||
↓
|
||||
STEP 2: Can we use send-test-message.js? (file exists, token configured)
|
||||
YES → REQUIRED: Use Gateway script (Method 2)
|
||||
NO ↓
|
||||
↓
|
||||
STEP 3: Try REST API (Method 3) with warning about limitations
|
||||
↓
|
||||
FALLBACK LOGIC:
|
||||
- If Method 1 fails → Immediately try Method 2
|
||||
- If Method 2 fails → Try Method 3
|
||||
- If Method 3 fails → Report error with troubleshooting steps
|
||||
```
|
||||
|
||||
**CRITICAL:** You MUST attempt Method 1 (MCP) first, even if you think it won't work. Only fall back after confirming failure.
|
||||
|
||||
## Step-by-Step Instructions
|
||||
|
||||
### Step 1: Check Method Availability
|
||||
|
||||
**Before attempting any Discord interaction, determine which method is available:**
|
||||
|
||||
1. **Try MCP tools first:**
|
||||
```javascript
|
||||
// Attempt to use MCP tool
|
||||
mcp__discord__send-message({
|
||||
channel: "CHANNEL_ID",
|
||||
message: "Test"
|
||||
})
|
||||
```
|
||||
|
||||
If you get "No such tool available" error → MCP not loaded
|
||||
|
||||
2. **Fall back to Gateway script:**
|
||||
```bash
|
||||
# Check if script exists
|
||||
ls -la ~/.claude/discordmcp/send-test-message.js
|
||||
|
||||
# Check if token configured
|
||||
grep DISCORD_TOKEN ~/.claude/discordmcp/.env
|
||||
```
|
||||
|
||||
3. **REST API as last resort** (expect permission issues)
|
||||
|
||||
### Step 2: Execute Based on Available Method
|
||||
|
||||
#### Using Method 1: MCP Tools
|
||||
|
||||
**Send Message:**
|
||||
```javascript
|
||||
mcp__discord__send-message({
|
||||
channel: "1420759585349697710", // Channel ID or name
|
||||
message: "Your message here"
|
||||
})
|
||||
```
|
||||
|
||||
**Read Messages:**
|
||||
```javascript
|
||||
mcp__discord__read-messages({
|
||||
channel: "1420759585349697710",
|
||||
limit: 10 // 1-100
|
||||
})
|
||||
```
|
||||
|
||||
#### Using Method 2: Gateway Script
|
||||
|
||||
**Send Message:**
|
||||
```bash
|
||||
cd ~/.claude/discordmcp
|
||||
node send-test-message.js CHANNEL_ID "Optional custom message"
|
||||
|
||||
# Example:
|
||||
node send-test-message.js 1420759585349697710 "Hello from Claude Code"
|
||||
```
|
||||
|
||||
**Custom Message:**
|
||||
Create a modified version of the script or pass message as argument.
|
||||
|
||||
#### Using Method 3: REST API
|
||||
|
||||
**Send Message:**
|
||||
```bash
|
||||
curl -X POST "https://discord.com/api/v10/channels/CHANNEL_ID/messages" \
|
||||
-H "Authorization: Bot ${DISCORD_BOT_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"content": "Your message"}'
|
||||
```
|
||||
|
||||
**Note:** Will likely fail with "Missing Access" - see troubleshooting.
|
||||
|
||||
### Step 3: Handle Errors
|
||||
|
||||
See troubleshooting section below for specific error codes and solutions.
|
||||
|
||||
## Red Flags - STOP
|
||||
|
||||
**If you encounter these situations, STOP and address them immediately:**
|
||||
|
||||
❌ **Skipping MCP method without trying first**
|
||||
- **Why it's bad:** MCP is the most reliable method; skipping wastes time with inferior fallbacks
|
||||
- **Fix:** Always attempt `mcp__discord__send-message` before assuming it won't work
|
||||
|
||||
❌ **Not checking bot permissions before sending**
|
||||
- **Why it's bad:** Results in repeated "Missing Access" errors and wasted API calls
|
||||
- **Fix:** Verify bot has "View Channel" and "Send Messages" permissions first
|
||||
|
||||
❌ **Retrying same failed method repeatedly**
|
||||
- **Why it's bad:** Same input = same output; you're wasting time and rate limits
|
||||
- **Fix:** After one failure, switch to next method in priority order
|
||||
|
||||
❌ **Missing token/credentials setup**
|
||||
- **Why it's bad:** Nothing will work without valid authentication
|
||||
- **Fix:** Verify token exists in appropriate .env file and test with `/users/@me` endpoint
|
||||
|
||||
❌ **Not handling 403 Forbidden errors properly**
|
||||
- **Why it's bad:** 403 means permission issue, not connection issue
|
||||
- **Fix:** Check channel-specific permissions or switch to Gateway method (better permission handling)
|
||||
|
||||
❌ **Using REST API when Gateway methods are available**
|
||||
- **Why it's bad:** REST API has OAuth scope limitations that cause "Missing Access" errors
|
||||
- **Fix:** Use MCP tools or Gateway script instead - they have proper intents
|
||||
|
||||
## Verification Checklist
|
||||
|
||||
**Before marking Discord integration as complete, verify:**
|
||||
|
||||
- [ ] **MCP tools attempted first** - You tried `mcp__discord__send-message` before other methods
|
||||
- [ ] **Permissions verified for channel** - Bot has "View Channel" and "Send Messages" in target channel
|
||||
- [ ] **Message sent successfully** - Received confirmation (not just "no error")
|
||||
- [ ] **Error handling implemented** - Code handles failures gracefully with fallback or clear error message
|
||||
- [ ] **Correct bot used** - Sombra for MCP/local, GladOSv3 for tjr-suite (not mixed up)
|
||||
- [ ] **Token validated** - Tested token with `/users/@me` endpoint returns valid bot info
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### General Requirements
|
||||
- Bot must be in the target Discord server
|
||||
- Bot must have proper permissions in the target channel
|
||||
- Valid bot token must be configured
|
||||
|
||||
### For MCP Tools
|
||||
- Claude Code must be fully restarted after mcp.json changes
|
||||
- MCP server must be configured in `~/.claude/mcp.json`
|
||||
- Discord MCP server must be built: `cd ~/.claude/discordmcp && npm run build`
|
||||
|
||||
### For Gateway Script
|
||||
- Node.js installed
|
||||
- discord.js package available (in discordmcp directory)
|
||||
- Token in `~/.claude/discordmcp/.env`
|
||||
|
||||
### For REST API
|
||||
- `DISCORD_BOT_TOKEN` environment variable set
|
||||
- Bot must have been added with proper OAuth2 scopes
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Error: "No such tool available: mcp__discord__send-message"
|
||||
|
||||
**Cause:** Discord MCP server not loaded in Claude Code.
|
||||
|
||||
**Solutions:**
|
||||
1. **Verify MCP configuration exists:**
|
||||
```bash
|
||||
grep -A 5 '"discord"' ~/.claude/mcp.json
|
||||
```
|
||||
|
||||
Should show:
|
||||
```json
|
||||
"discord": {
|
||||
"type": "stdio",
|
||||
"command": "node",
|
||||
"args": ["/Users/USERNAME/.claude/discordmcp/build/index.js"]
|
||||
}
|
||||
```
|
||||
|
||||
2. **Verify MCP server is built:**
|
||||
```bash
|
||||
ls -la ~/.claude/discordmcp/build/index.js
|
||||
```
|
||||
|
||||
3. **Test MCP server manually:**
|
||||
```bash
|
||||
cd ~/.claude/discordmcp
|
||||
node build/index.js
|
||||
```
|
||||
Should see: "Discord bot is ready!" and "Discord MCP Server running on stdio"
|
||||
|
||||
4. **Fully restart Claude Code:**
|
||||
- Quit completely (Cmd+Q or equivalent)
|
||||
- Relaunch application
|
||||
- Start new session
|
||||
|
||||
5. **If still not available, use Method 2 (Gateway script) instead.**
|
||||
|
||||
### Error: "Missing Access" (Code 50001)
|
||||
|
||||
**Cause:** Bot doesn't have permission to access that channel.
|
||||
|
||||
**Solutions:**
|
||||
|
||||
1. **Verify bot is in the server:**
|
||||
```bash
|
||||
curl -H "Authorization: Bot TOKEN" \
|
||||
https://discord.com/api/v10/users/@me/guilds
|
||||
```
|
||||
|
||||
2. **Check channel-specific permissions:**
|
||||
- Right-click channel in Discord
|
||||
- Edit Channel → Permissions
|
||||
- Add the bot (Sombra or GladOSv3)
|
||||
- Enable: View Channel, Send Messages, Read Message History
|
||||
|
||||
3. **Grant server-wide permissions:**
|
||||
- Server Settings → Roles
|
||||
- Find bot's role
|
||||
- Enable Send Messages permission
|
||||
|
||||
4. **If using REST API, switch to Gateway method** (discord.js or MCP tools)
|
||||
|
||||
### Error: "Unknown Guild" (Code 10004)
|
||||
|
||||
**Cause:** Bot is not in that Discord server.
|
||||
|
||||
**Solutions:**
|
||||
|
||||
1. **Verify bot membership:**
|
||||
```bash
|
||||
curl -H "Authorization: Bot TOKEN" \
|
||||
https://discord.com/api/v10/users/@me/guilds
|
||||
```
|
||||
|
||||
2. **Invite bot to server using OAuth2 URL:**
|
||||
|
||||
**For Sombra (Local MCP):**
|
||||
```
|
||||
https://discord.com/api/oauth2/authorize?client_id=1435274653126889544&permissions=69632&scope=bot
|
||||
```
|
||||
|
||||
**For GladOSv3 (tjr-suite):**
|
||||
```
|
||||
https://discord.com/api/oauth2/authorize?client_id=1425850836277395506&permissions=69632&scope=bot
|
||||
```
|
||||
|
||||
3. **After inviting, wait a few seconds for bot to join**
|
||||
|
||||
4. **Verify the bot appears in server member list**
|
||||
|
||||
### Error: Connection Timeout or "Discord bot is ready!" but MCP fails
|
||||
|
||||
**Cause:** Token invalid or bot permissions insufficient.
|
||||
|
||||
**Solutions:**
|
||||
|
||||
1. **Verify token is correct:**
|
||||
```bash
|
||||
curl -H "Authorization: Bot TOKEN" \
|
||||
https://discord.com/api/v10/users/@me
|
||||
```
|
||||
Should return bot user info, not error.
|
||||
|
||||
2. **Check token hasn't been regenerated:**
|
||||
- Go to Discord Developer Portal
|
||||
- Applications → Your Bot → Bot
|
||||
- If token was regenerated, update configs
|
||||
|
||||
3. **Verify bot has proper Gateway intents:**
|
||||
- In Developer Portal: Bot → Privileged Gateway Intents
|
||||
- Enable: Guilds, Guild Messages (Message Content if reading)
|
||||
|
||||
4. **Check .env file syntax:**
|
||||
```bash
|
||||
cat ~/.claude/discordmcp/.env
|
||||
```
|
||||
No extra spaces, quotes, or newlines around token.
|
||||
|
||||
### Error: "Channel not found or not a text channel"
|
||||
|
||||
**Cause:** Invalid channel ID or bot can't see it.
|
||||
|
||||
**Solutions:**
|
||||
|
||||
1. **Verify channel ID is correct:**
|
||||
- Right-click channel in Discord
|
||||
- Copy Channel ID (enable Developer Mode if needed)
|
||||
- Channel IDs are 17-19 digit numbers
|
||||
|
||||
2. **Check bot can see the channel:**
|
||||
- Bot must have "View Channel" permission
|
||||
- Channel must not be in a category the bot can't access
|
||||
|
||||
3. **Try using channel name instead of ID** (if using MCP tools):
|
||||
```javascript
|
||||
mcp__discord__send-message({
|
||||
channel: "bot-testing-grounds", // Name instead of ID
|
||||
message: "Test"
|
||||
})
|
||||
```
|
||||
|
||||
## Bot Configuration Reference
|
||||
|
||||
### Sombra (Local MCP) Configuration
|
||||
|
||||
**Files:**
|
||||
- **Token:** `~/.claude/discordmcp/.env`
|
||||
- **MCP Config:** `~/.claude/mcp.json` (lines 55-61)
|
||||
- **Source:** `~/.claude/discordmcp/src/index.ts`
|
||||
- **Built:** `~/.claude/discordmcp/build/index.js`
|
||||
|
||||
**Environment Variables:**
|
||||
```env
|
||||
DISCORD_TOKEN=MTQzNTI3NDY1MzEyNjg4OTU0NA...
|
||||
DISCORD_CLIENT_ID=1435274653126889544
|
||||
DISCORD_GUILD_ID=745376918030909471
|
||||
DISCORD_ENABLED=true
|
||||
```
|
||||
|
||||
**Rebuild after changes:**
|
||||
```bash
|
||||
cd ~/.claude/discordmcp
|
||||
npm run build
|
||||
```
|
||||
|
||||
### GladOSv3 (tjr-suite) Configuration
|
||||
|
||||
**Files:**
|
||||
- **Token:** `~/Dev/tjr-suite/.env`
|
||||
- **Source:** `~/Dev/tjr-suite/packages/app/src/services/discord/`
|
||||
- **HTTP API:** Running on `http://localhost:3000`
|
||||
|
||||
**Environment Variables:**
|
||||
```env
|
||||
DISCORD_TOKEN=MTQyNTg1MDgzNjI3NzM5NTUwNg...
|
||||
DISCORD_CLIENT_ID=1425850836277395506
|
||||
DISCORD_GUILD_ID=745376918030909471
|
||||
DISCORD_ENABLED=true
|
||||
```
|
||||
|
||||
**Note:** GladOSv3 is always running as part of tjr-suite. Do not confuse with Sombra.
|
||||
|
||||
## Common Channel IDs
|
||||
|
||||
**Nice Wolf Studio Server:**
|
||||
- **Guild ID:** 745376918030909471
|
||||
- **#bot-testing-grounds:** 1420759585349697710
|
||||
|
||||
## Permission Reference
|
||||
|
||||
### Required Bot Permissions
|
||||
|
||||
**Minimum (for message sending):**
|
||||
- View Channels (1024)
|
||||
- Send Messages (2048)
|
||||
|
||||
**Recommended:**
|
||||
- Read Message History (65536)
|
||||
- Add Reactions (64)
|
||||
|
||||
**Permission Integer:** 69632 (includes all recommended)
|
||||
|
||||
### How to Calculate Custom Permissions
|
||||
|
||||
Visit: https://discordapi.com/permissions.html
|
||||
|
||||
Enter desired permissions, copy the integer for OAuth2 URL.
|
||||
|
||||
## Testing Checklist
|
||||
|
||||
When setting up or troubleshooting Discord integration:
|
||||
|
||||
- [ ] Bot token is valid (test with `/users/@me` endpoint)
|
||||
- [ ] Bot is in the correct server (check `/users/@me/guilds`)
|
||||
- [ ] Bot has View Channel permission
|
||||
- [ ] Bot has Send Messages permission in target channel
|
||||
- [ ] Channel ID is correct (17-19 digit number)
|
||||
- [ ] Token configured in correct .env file
|
||||
- [ ] MCP server built (if using MCP tools)
|
||||
- [ ] Claude Code restarted (if using MCP tools)
|
||||
|
||||
## Quick Reference Commands
|
||||
|
||||
### Verify Token
|
||||
```bash
|
||||
curl -H "Authorization: Bot TOKEN" https://discord.com/api/v10/users/@me
|
||||
```
|
||||
|
||||
### List Guilds
|
||||
```bash
|
||||
curl -H "Authorization: Bot TOKEN" https://discord.com/api/v10/users/@me/guilds
|
||||
```
|
||||
|
||||
### Test MCP Server
|
||||
```bash
|
||||
cd ~/.claude/discordmcp && node build/index.js
|
||||
```
|
||||
|
||||
### Send via Gateway Script
|
||||
```bash
|
||||
cd ~/.claude/discordmcp && node send-test-message.js CHANNEL_ID "Message"
|
||||
```
|
||||
|
||||
### Check MCP Config
|
||||
```bash
|
||||
grep -A 5 '"discord"' ~/.claude/mcp.json
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
See `examples.md` in this skill directory for complete working examples.
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- **Setup Guide:** `~/.claude/discordmcp/SETUP_COMPLETE.md`
|
||||
- **Validation Report:** `~/.claude/discordmcp/VALIDATION_REPORT.md`
|
||||
- **Gateway Script:** `~/.claude/discordmcp/send-test-message.js`
|
||||
- **Discord.js Docs:** https://discord.js.org/
|
||||
- **Discord API:** https://discord.com/developers/docs
|
||||
|
||||
---
|
||||
|
||||
## After Using This Skill
|
||||
|
||||
**REQUIRED NEXT STEPS:**
|
||||
|
||||
1. **Verify message sent successfully** - Check verification checklist (6 items) to confirm integration worked
|
||||
2. **Document method used** - Record which method succeeded (MCP/Gateway/REST) for future reference
|
||||
3. **Test error handling** - Verify fallback logic works if primary method fails
|
||||
|
||||
**OPTIONAL NEXT STEPS:**
|
||||
|
||||
- **Set up logging** - Track Discord API calls, errors, and response times for debugging
|
||||
- **Create reusable wrapper** - Build abstraction around working method for consistent usage
|
||||
- **Monitor bot health** - Set up alerts for token expiration or permission changes
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
### Version 1.0.1 (2025-11-14)
|
||||
- **Added** superpowers-style improvements for better agent guidance
|
||||
- **Added** "When NOT to Use" section to prevent misuse
|
||||
- **Enhanced** Integration Method Selection with MANDATORY fallback logic
|
||||
- **Added** "Red Flags - STOP" section with 6 common anti-patterns
|
||||
- **Added** "Verification Checklist" for completion validation
|
||||
- **Improved** frontmatter with version, triggers, and clearer description
|
||||
- **Updated** description to emphasize automatic fallback and OAuth scope issue prevention
|
||||
|
||||
### Version 1.0.0 (Initial)
|
||||
- Initial discord-integration skill
|
||||
- Three-tier integration methods (MCP, Gateway, REST API)
|
||||
- Comprehensive troubleshooting for common errors
|
||||
- Configuration reference for Sombra and GladOSv3 bots
|
||||
517
skills/discord-integration/examples.md
Normal file
517
skills/discord-integration/examples.md
Normal file
@@ -0,0 +1,517 @@
|
||||
# Discord Integration - Examples
|
||||
|
||||
Complete working examples for all three integration methods.
|
||||
|
||||
## Method 1: MCP Tools (After Claude Code Restart)
|
||||
|
||||
### Example 1: Send Simple Message
|
||||
|
||||
**User Request:**
|
||||
> Send a message to Discord channel 1420759585349697710 saying "Test from Claude Code"
|
||||
|
||||
**Assistant Response:**
|
||||
```javascript
|
||||
mcp__discord__send-message({
|
||||
channel: "1420759585349697710",
|
||||
message: "Test from Claude Code"
|
||||
})
|
||||
```
|
||||
|
||||
**Expected Result:**
|
||||
```
|
||||
Message sent successfully to #bot-testing-grounds in Nice Wolf Studio. Message ID: 1234567890123456789
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Example 2: Send Formatted Message
|
||||
|
||||
**User Request:**
|
||||
> Post a formatted update to the bot-testing-grounds channel
|
||||
|
||||
**Assistant Response:**
|
||||
```javascript
|
||||
mcp__discord__send-message({
|
||||
channel: "1420759585349697710",
|
||||
message: `🤖 **Status Update**
|
||||
|
||||
✅ System: Online
|
||||
✅ Database: Connected
|
||||
✅ API: Healthy
|
||||
|
||||
Last checked: ${new Date().toISOString()}`
|
||||
})
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Example 3: Read Recent Messages
|
||||
|
||||
**User Request:**
|
||||
> What are the last 10 messages in #bot-testing-grounds?
|
||||
|
||||
**Assistant Response:**
|
||||
```javascript
|
||||
mcp__discord__read-messages({
|
||||
channel: "1420759585349697710",
|
||||
limit: 10
|
||||
})
|
||||
```
|
||||
|
||||
**Expected Result:**
|
||||
```json
|
||||
[
|
||||
{
|
||||
"channel": "#bot-testing-grounds",
|
||||
"server": "Nice Wolf Studio",
|
||||
"author": "Sombra#2394",
|
||||
"content": "Test message",
|
||||
"timestamp": "2025-11-04T14:43:09.073Z"
|
||||
},
|
||||
...
|
||||
]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Example 4: Using Channel Name (Auto-Discovery)
|
||||
|
||||
**User Request:**
|
||||
> Send "Hello" to the general channel
|
||||
|
||||
**Assistant Response:**
|
||||
```javascript
|
||||
mcp__discord__send-message({
|
||||
channel: "general", // Channel name instead of ID
|
||||
message: "Hello"
|
||||
})
|
||||
```
|
||||
|
||||
**Note:** MCP server will auto-discover channel by name if bot is only in one server.
|
||||
|
||||
---
|
||||
|
||||
## Method 2: Gateway Script (Always Available)
|
||||
|
||||
### Example 5: Send Message via Script
|
||||
|
||||
**User Request:**
|
||||
> Send a message to Discord right now
|
||||
|
||||
**Assistant Response (if MCP not available):**
|
||||
```bash
|
||||
cd ~/.claude/discordmcp
|
||||
node send-test-message.js 1420759585349697710 "Message sent via Gateway script"
|
||||
```
|
||||
|
||||
**Expected Output:**
|
||||
```
|
||||
🔌 Connecting to Discord...
|
||||
✅ Connected to Discord
|
||||
📡 Bot: Sombra#2394
|
||||
🏠 Guilds: 1
|
||||
✅ Found guild: Nice Wolf Studio
|
||||
✅ Found channel: #bot-testing-grounds
|
||||
📤 Sending message...
|
||||
✅ Message sent! ID: 1435281072207171654
|
||||
👋 Disconnected
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Example 6: Send Multi-Line Message
|
||||
|
||||
**User Request:**
|
||||
> Post a detailed status report to Discord
|
||||
|
||||
**Assistant Response:**
|
||||
```bash
|
||||
cd ~/.claude/discordmcp
|
||||
node send-test-message.js 1420759585349697710 "📊 **Daily Report**
|
||||
|
||||
✅ Tasks completed: 15
|
||||
⏳ In progress: 3
|
||||
🔴 Blocked: 1
|
||||
|
||||
Next update in 24 hours."
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Example 7: Create Custom Script for Specific Need
|
||||
|
||||
**User Request:**
|
||||
> I need to send messages to multiple channels
|
||||
|
||||
**Assistant Response:**
|
||||
I'll create a custom multi-channel script based on send-test-message.js:
|
||||
|
||||
```javascript
|
||||
// save as: ~/.claude/discordmcp/multi-channel-send.js
|
||||
import { Client, GatewayIntentBits } from 'discord.js';
|
||||
import { config } from 'dotenv';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { dirname, join } from 'path';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
config({ path: join(__dirname, '.env') });
|
||||
|
||||
const client = new Client({
|
||||
intents: [GatewayIntentBits.Guilds, GatewayIntentBits.GuildMessages],
|
||||
});
|
||||
|
||||
async function sendToMultiple() {
|
||||
await client.login(process.env.DISCORD_TOKEN);
|
||||
await new Promise((resolve) => client.once('ready', resolve));
|
||||
|
||||
const channels = [
|
||||
"1420759585349697710", // bot-testing-grounds
|
||||
"ANOTHER_CHANNEL_ID",
|
||||
];
|
||||
|
||||
const message = process.argv[2] || "Multi-channel broadcast";
|
||||
|
||||
for (const channelId of channels) {
|
||||
const channel = await client.channels.fetch(channelId);
|
||||
if (channel && 'send' in channel) {
|
||||
await channel.send(message);
|
||||
console.log(`✅ Sent to ${channel.name}`);
|
||||
}
|
||||
}
|
||||
|
||||
await client.destroy();
|
||||
}
|
||||
|
||||
sendToMultiple().catch(console.error);
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
cd ~/.claude/discordmcp
|
||||
node multi-channel-send.js "Broadcast message"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Method 3: REST API (Last Resort)
|
||||
|
||||
### Example 8: Simple REST API Test
|
||||
|
||||
**User Request:**
|
||||
> Test if the bot token works
|
||||
|
||||
**Assistant Response:**
|
||||
```bash
|
||||
# Get bot info
|
||||
curl -H "Authorization: Bot ${DISCORD_BOT_TOKEN}" \
|
||||
https://discord.com/api/v10/users/@me | python3 -m json.tool
|
||||
```
|
||||
|
||||
**Expected Output:**
|
||||
```json
|
||||
{
|
||||
"id": "1435274653126889544",
|
||||
"username": "Sombra",
|
||||
"bot": true,
|
||||
"verified": true
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Example 9: Check Bot's Guilds
|
||||
|
||||
**User Request:**
|
||||
> Which servers is the bot in?
|
||||
|
||||
**Assistant Response:**
|
||||
```bash
|
||||
curl -H "Authorization: Bot ${DISCORD_BOT_TOKEN}" \
|
||||
https://discord.com/api/v10/users/@me/guilds | python3 -m json.tool
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Example 10: Try Sending via REST (Expect Failure)
|
||||
|
||||
**User Request:**
|
||||
> Try sending via REST API
|
||||
|
||||
**Assistant Response:**
|
||||
```bash
|
||||
curl -X POST "https://discord.com/api/v10/channels/1420759585349697710/messages" \
|
||||
-H "Authorization: Bot ${DISCORD_BOT_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"content": "Test from REST API"}'
|
||||
```
|
||||
|
||||
**Likely Error:**
|
||||
```json
|
||||
{"message": "Missing Access", "code": 50001}
|
||||
```
|
||||
|
||||
**Explanation:** REST API often fails due to OAuth scope issues. Use Gateway method (Method 2) instead.
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting Examples
|
||||
|
||||
### Example 11: MCP Tools Not Available
|
||||
|
||||
**User Request:**
|
||||
> Send a message to Discord
|
||||
|
||||
**Assistant Attempt:**
|
||||
```javascript
|
||||
mcp__discord__send-message({
|
||||
channel: "1420759585349697710",
|
||||
message: "Test"
|
||||
})
|
||||
```
|
||||
|
||||
**Error:**
|
||||
```
|
||||
Error: No such tool available: mcp__discord__send-message
|
||||
```
|
||||
|
||||
**Assistant Recovery:**
|
||||
The Discord MCP tools aren't loaded yet. Let me use the Gateway script instead:
|
||||
|
||||
```bash
|
||||
cd ~/.claude/discordmcp
|
||||
node send-test-message.js 1420759585349697710 "Test"
|
||||
```
|
||||
|
||||
**Success:**
|
||||
```
|
||||
✅ Message sent! ID: 1435281072207171654
|
||||
```
|
||||
|
||||
**Explanation to User:**
|
||||
> The MCP tools require a full Claude Code restart to load. I've used the Gateway script method instead, which works immediately. If you want to use MCP tools in the future, quit and relaunch Claude Code.
|
||||
|
||||
---
|
||||
|
||||
### Example 12: Missing Channel Permissions
|
||||
|
||||
**User Request:**
|
||||
> Send to channel 9999999999999999999
|
||||
|
||||
**Assistant Attempt:**
|
||||
```bash
|
||||
node send-test-message.js 9999999999999999999 "Test"
|
||||
```
|
||||
|
||||
**Error:**
|
||||
```
|
||||
❌ Error: Missing Access
|
||||
DiscordAPIError[50001]: Missing Access
|
||||
```
|
||||
|
||||
**Assistant Response:**
|
||||
The bot doesn't have permission to access that channel. Let me help you fix this:
|
||||
|
||||
1. **Verify the channel ID is correct:**
|
||||
```
|
||||
Right-click the channel in Discord → Copy Channel ID
|
||||
```
|
||||
|
||||
2. **Grant bot permissions:**
|
||||
- Right-click the channel → Edit Channel
|
||||
- Permissions → Add "Sombra" bot
|
||||
- Enable: View Channel, Send Messages
|
||||
|
||||
3. **Try again after granting permissions**
|
||||
|
||||
---
|
||||
|
||||
### Example 13: Bot Not in Server
|
||||
|
||||
**User Request:**
|
||||
> Send to my private server
|
||||
|
||||
**Assistant Check:**
|
||||
```bash
|
||||
curl -H "Authorization: Bot ${DISCORD_BOT_TOKEN}" \
|
||||
https://discord.com/api/v10/users/@me/guilds
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
[
|
||||
{
|
||||
"id": "745376918030909471",
|
||||
"name": "Nice Wolf Studio"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
**Assistant Response:**
|
||||
The bot is only in "Nice Wolf Studio" server currently. To add it to your private server:
|
||||
|
||||
**For Sombra (Local MCP):**
|
||||
```
|
||||
https://discord.com/api/oauth2/authorize?client_id=1435274653126889544&permissions=69632&scope=bot
|
||||
```
|
||||
|
||||
After inviting the bot, you can send messages to channels in that server.
|
||||
|
||||
---
|
||||
|
||||
## Complete Workflow Examples
|
||||
|
||||
### Example 14: First-Time Setup Verification
|
||||
|
||||
**User Request:**
|
||||
> Make sure Discord integration is working
|
||||
|
||||
**Assistant Workflow:**
|
||||
|
||||
**Step 1: Test token**
|
||||
```bash
|
||||
curl -H "Authorization: Bot $(grep DISCORD_TOKEN ~/.claude/discordmcp/.env | cut -d= -f2)" \
|
||||
https://discord.com/api/v10/users/@me
|
||||
```
|
||||
|
||||
**Step 2: Check guilds**
|
||||
```bash
|
||||
curl -H "Authorization: Bot $(grep DISCORD_TOKEN ~/.claude/discordmcp/.env | cut -d= -f2)" \
|
||||
https://discord.com/api/v10/users/@me/guilds
|
||||
```
|
||||
|
||||
**Step 3: Test MCP server**
|
||||
```bash
|
||||
cd ~/.claude/discordmcp && node build/index.js &
|
||||
sleep 3
|
||||
pkill -f "node build/index.js"
|
||||
```
|
||||
|
||||
**Step 4: Send test message**
|
||||
```bash
|
||||
cd ~/.claude/discordmcp
|
||||
node send-test-message.js 1420759585349697710 "✅ Discord integration test successful"
|
||||
```
|
||||
|
||||
**Report:**
|
||||
> ✅ All checks passed! Discord integration is working correctly.
|
||||
> - Bot: Sombra#2394
|
||||
> - Server: Nice Wolf Studio
|
||||
> - Test message sent successfully
|
||||
>
|
||||
> MCP tools will be available after a full Claude Code restart.
|
||||
|
||||
---
|
||||
|
||||
### Example 15: Switching Between Bots
|
||||
|
||||
**User Request:**
|
||||
> Use the tjr-suite bot instead of Sombra
|
||||
|
||||
**Assistant Response:**
|
||||
|
||||
Currently, Sombra (Local MCP) is configured for Claude Code. GladOSv3 (tjr-suite bot) is running separately in the tjr-suite application.
|
||||
|
||||
**To use GladOSv3 from Claude Code:**
|
||||
|
||||
1. **Update MCP config to use tjr-suite token:**
|
||||
```bash
|
||||
# Edit ~/.claude/discordmcp/.env
|
||||
DISCORD_TOKEN=MTQyNTg1MDgzNjI3NzM5NTUwNg... # GladOSv3 token
|
||||
DISCORD_CLIENT_ID=1425850836277395506
|
||||
```
|
||||
|
||||
2. **Rebuild and restart:**
|
||||
```bash
|
||||
cd ~/.claude/discordmcp
|
||||
npm run build
|
||||
```
|
||||
|
||||
3. **Restart Claude Code**
|
||||
|
||||
**Or, keep them separate:**
|
||||
- Use Sombra for Claude Code (current setup)
|
||||
- Use GladOSv3 for tjr-suite app (via slash commands in Discord)
|
||||
|
||||
---
|
||||
|
||||
## Best Practices from Experience
|
||||
|
||||
### Always Use Gateway Methods When Possible
|
||||
|
||||
**❌ Don't:**
|
||||
```bash
|
||||
# REST API (often fails)
|
||||
curl -X POST "..." -H "Authorization: Bot $TOKEN" ...
|
||||
```
|
||||
|
||||
**✅ Do:**
|
||||
```bash
|
||||
# Gateway script (reliable)
|
||||
cd ~/.claude/discordmcp && node send-test-message.js CHANNEL_ID "Message"
|
||||
```
|
||||
|
||||
**Or even better:**
|
||||
```javascript
|
||||
// MCP tools (after restart)
|
||||
mcp__discord__send-message({ channel: "...", message: "..." })
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Check Method Availability First
|
||||
|
||||
**✅ Good Pattern:**
|
||||
```
|
||||
1. Try MCP tool
|
||||
2. If not available, use Gateway script
|
||||
3. Explain REST limitations if asked
|
||||
```
|
||||
|
||||
**❌ Bad Pattern:**
|
||||
```
|
||||
1. Jump straight to REST API
|
||||
2. Get "Missing Access" error
|
||||
3. Struggle with permissions
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Provide Context in Error Messages
|
||||
|
||||
**❌ Don't:**
|
||||
```
|
||||
Error: Missing Access
|
||||
[end response]
|
||||
```
|
||||
|
||||
**✅ Do:**
|
||||
```
|
||||
Error: Missing Access
|
||||
|
||||
This means the bot doesn't have permission to access that channel.
|
||||
|
||||
To fix:
|
||||
1. Right-click the channel → Edit Channel
|
||||
2. Permissions → Add "Sombra"
|
||||
3. Enable: View Channel, Send Messages
|
||||
|
||||
Alternatively, I can use a different method...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Summary: When to Use Each Method
|
||||
|
||||
| Scenario | Use Method |
|
||||
|----------|------------|
|
||||
| MCP tools available | Method 1 (MCP) |
|
||||
| MCP tools not loaded | Method 2 (Gateway script) |
|
||||
| Need immediate result | Method 2 (Gateway script) |
|
||||
| Testing token validity | Method 3 (REST API) |
|
||||
| Checking bot guilds | Method 3 (REST API) |
|
||||
| Actually sending messages | Method 1 or 2 (NOT REST) |
|
||||
| Custom workflows | Method 2 (customize script) |
|
||||
| Multiple channels | Method 2 (custom script) |
|
||||
|
||||
**Golden Rule:** Gateway > REST for actual message operations.
|
||||
Reference in New Issue
Block a user