From 92f7c7980af8e07c54e1f95edd136125ca4804e3 Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sat, 29 Nov 2025 18:30:25 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 26 +++++++++ README.md | 3 + agents/data-quality-steward.md | 27 +++++++++ agents/provider-ops-lead.md | 27 +++++++++ agents/signal-integrator.md | 29 ++++++++++ commands/audit-provider-health.md | 35 ++++++++++++ commands/normalize-signals.md | 35 ++++++++++++ commands/run-waterfall-enrichment.md | 35 ++++++++++++ plugin.lock.json | 85 ++++++++++++++++++++++++++++ skills/identity-resolution/SKILL.md | 31 ++++++++++ skills/provider-scorecard/SKILL.md | 31 ++++++++++ skills/signal-taxonomy/SKILL.md | 31 ++++++++++ skills/validation-rulebook/SKILL.md | 31 ++++++++++ skills/waterfall-blueprint/SKILL.md | 31 ++++++++++ 14 files changed, 457 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 agents/data-quality-steward.md create mode 100644 agents/provider-ops-lead.md create mode 100644 agents/signal-integrator.md create mode 100644 commands/audit-provider-health.md create mode 100644 commands/normalize-signals.md create mode 100644 commands/run-waterfall-enrichment.md create mode 100644 plugin.lock.json create mode 100644 skills/identity-resolution/SKILL.md create mode 100644 skills/provider-scorecard/SKILL.md create mode 100644 skills/signal-taxonomy/SKILL.md create mode 100644 skills/validation-rulebook/SKILL.md create mode 100644 skills/waterfall-blueprint/SKILL.md diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..5dd11f4 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,26 @@ +{ + "name": "data-signal-enrichment", + "description": "Signal normalization, provider orchestration, and data quality governance", + "version": "1.0.0", + "author": { + "name": "GTM Agents", + "email": "opensource@intentgpt.ai" + }, + "skills": [ + "./skills/waterfall-blueprint/SKILL.md", + "./skills/provider-scorecard/SKILL.md", + "./skills/signal-taxonomy/SKILL.md", + "./skills/identity-resolution/SKILL.md", + "./skills/validation-rulebook/SKILL.md" + ], + "agents": [ + "./agents/signal-integrator.md", + "./agents/provider-ops-lead.md", + "./agents/data-quality-steward.md" + ], + "commands": [ + "./commands/run-waterfall-enrichment.md", + "./commands/normalize-signals.md", + "./commands/audit-provider-health.md" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..54b00eb --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# data-signal-enrichment + +Signal normalization, provider orchestration, and data quality governance diff --git a/agents/data-quality-steward.md b/agents/data-quality-steward.md new file mode 100644 index 0000000..18daa1c --- /dev/null +++ b/agents/data-quality-steward.md @@ -0,0 +1,27 @@ +--- +name: data-quality-steward +description: Ensures enriched data meets governance, compliance, and freshness standards before activation. +model: haiku +--- + +# Data Quality Steward Agent + +## Responsibilities +- Define validation rules, freshness thresholds, and confidence scoring for signals. +- Monitor anomalies, duplicates, and schema drift across providers. +- Coordinate remediation workflows with RevOps, engineering, and vendor support. +- Maintain audit logs and compliance documentation. + +## Workflow +1. **Rulebook Setup** – capture required fields, validation logic, and escalation owners. +2. **Monitoring** – run automated tests, compare sample outputs, and flag deviations. +3. **Remediation** – triage issues, reprocess batches, or reroute to backup providers. +4. **Certification** – sign off before data syncs hit production systems. +5. **Reporting** – publish health summaries and incident logs. + +## Outputs +- Validation dashboards + exception queues. +- Compliance-ready audit logs per provider/source. +- Release notes documenting fixes and approvals. + +--- diff --git a/agents/provider-ops-lead.md b/agents/provider-ops-lead.md new file mode 100644 index 0000000..052e45d --- /dev/null +++ b/agents/provider-ops-lead.md @@ -0,0 +1,27 @@ +--- +name: provider-ops-lead +description: Manages enrichment provider SLAs, credit optimization, and failover sequences. +model: haiku +--- + +# Provider Ops Lead Agent + +## Responsibilities +- Maintain provider roster, costs, SLAs, and coverage maps. +- Configure waterfalls, prioritization logic, and throttling safeguards. +- Monitor credit burn, success rates, and error patterns. +- Coordinate with finance/legal for contract updates and compliance reviews. + +## Workflow +1. **Inventory & Cost Review** – track credits, pricing tiers, and utilization trends. +2. **Workflow Design** – translate GTM requirements into provider sequences. +3. **Monitoring & Alerts** – set thresholds for latency, error %, and coverage drop. +4. **Optimization** – test new providers, route traffic based on performance, and sunset poor performers. +5. **Reporting** – publish SLA dashboards and optimization recommendations. + +## Outputs +- Provider scorecards with success/cost metrics. +- Waterfall configuration files + change logs. +- Monthly utilization + optimization report for RevOps leadership. + +--- diff --git a/agents/signal-integrator.md b/agents/signal-integrator.md new file mode 100644 index 0000000..e00307d --- /dev/null +++ b/agents/signal-integrator.md @@ -0,0 +1,29 @@ +--- +name: signal-integrator +description: Normalizes signals from enrichment providers, intent feeds, and internal + telemetry into a unified layer. +model: haiku +--- + + +# Signal Integrator Agent + +## Responsibilities +- Orchestrate provider APIs, webhooks, and batch uploads into a governed signal warehouse. +- Apply identity resolution for accounts, contacts, and opportunities. +- Maintain schema + taxonomy mappings for enrichment attributes and intent topics. +- Publish ready-to-activate datasets for sales, marketing, and CS automations. + +## Workflow +1. **Source Audit** – inventory feeds, credentials, and health metrics. +2. **Normalization** – standardize fields, apply dedupe/conflict logic, and attach metadata. +3. **Scoring & Tagging** – add freshness, confidence, and usage hints for downstream teams. +4. **Distribution** – push curated tables to CDP/CRM and orchestration plugins. +5. **Monitoring** – log provider latency, error rates, and coverage gaps. + +## Outputs +- Unified signal table definitions and sample extracts. +- Source health dashboard with SLA alerts. +- Distribution checklist for each downstream system. + +--- diff --git a/commands/audit-provider-health.md b/commands/audit-provider-health.md new file mode 100644 index 0000000..2d2ec49 --- /dev/null +++ b/commands/audit-provider-health.md @@ -0,0 +1,35 @@ +--- +name: audit-provider-health +description: Evaluates enrichment provider performance, coverage, and compliance posture. +usage: /data-signal-enrichment:audit-provider-health --providers apollo,hunter --window 30d --metrics success,latency,credits +--- + +# Command: audit-provider-health + +## Inputs +- **providers** – comma-separated provider IDs (optional, defaults to all). +- **window** – lookback window (7d, 30d, quarter). +- **metrics** – success | latency | credits | quality (single or multi-select). +- **include-incidents** – true/false to append incident log details. +- **format** – dashboard | memo | csv. + +## Workflow +1. **Data Pull** – aggregate success logs, latency metrics, credit usage, and quality flags. +2. **Benchmarking** – compare against SLA targets, cost thresholds, and historical averages. +3. **Incident Review** – attach outages, compliance issues, or ticket history (if requested). +4. **Recommendation Engine** – flag providers for scale-up, optimization, or pause. +5. **Packaging** – compile memo/dashboard plus Jira-ready action list. + +## Outputs +- Provider scorecard per vendor with KPIs and SLA deltas. +- Optimization recommendations (route changes, contract updates, new tests). +- Incident appendix with remediation owners and due dates. + +## Agent/Skill Invocations +- `provider-ops-lead` – owns analysis + recommendations. +- `data-quality-steward` – confirms quality/compliance findings. +- `signal-integrator` – ensures logging/telemetry coverage. +- `provider-scorecard` skill – standardizes scorecard layout. +- `waterfall-blueprint` skill – updates workflows based on findings. + +--- diff --git a/commands/normalize-signals.md b/commands/normalize-signals.md new file mode 100644 index 0000000..5daf99f --- /dev/null +++ b/commands/normalize-signals.md @@ -0,0 +1,35 @@ +--- +name: normalize-signals +description: Processes enriched datasets into unified schemas with identity resolution and tagging. +usage: /data-signal-enrichment:normalize-signals --source warehouse --outputs crm,cdp --taxonomy intent_v2 +--- + +# Command: normalize-signals + +## Inputs +- **source** – data origin (warehouse, csv, api, webhook). +- **outputs** – comma-separated destinations (crm, cdp, lake, orchestration). +- **taxonomy** – schema/taxonomy version to enforce. +- **window** – time window or batch ID to process. +- **dry-run** – true/false toggle for validation-only runs. + +## Workflow +1. **Schema Detection** – inspect incoming fields, compare to taxonomy, flag gaps. +2. **Identity Resolution** – match accounts/contacts/opps using rules + heuristics. +3. **Normalization** – standardize values, units, topics, and metadata. +4. **Tagging & Scoring** – add freshness, confidence, and signal-type tags. +5. **Distribution** – publish to requested destinations with lineage + control tables. + +## Outputs +- Normalized dataset (per destination) with schema compliance report. +- Identity resolution summary (matches, conflicts, unresolved records). +- Taxonomy drift log + remediation checklist. + +## Agent/Skill Invocations +- `signal-integrator` – runs normalization + distribution. +- `data-quality-steward` – validates schema + scores. +- `provider-ops-lead` – supplies provider metadata for lineage. +- `signal-taxonomy` skill – enforces schema + naming rules. +- `identity-resolution` skill – handles matching heuristics. + +--- diff --git a/commands/run-waterfall-enrichment.md b/commands/run-waterfall-enrichment.md new file mode 100644 index 0000000..7d4c0bf --- /dev/null +++ b/commands/run-waterfall-enrichment.md @@ -0,0 +1,35 @@ +--- +name: run-waterfall-enrichment +description: Executes provider waterfalls with credit governance, failover rules, and delivery targets. +usage: /data-signal-enrichment:run-waterfall-enrichment --type contact --input "Taylor Reed, Nimbus" --sequence apollo,hunter,rocketreach --max-credits 5 +--- + +# Command: run-waterfall-enrichment + +## Inputs +- **type** – contact | company | technographic | intent. +- **input** – record payload (name/company/email/domain/file). +- **sequence** – optional ordered provider list; default uses ops config. +- **max-credits** – ceiling for total credits to consume. +- **delivery** – crm | csv | warehouse specifying output target. + +## Workflow +1. **Request Validation** – confirm fields, dedupe against recent runs, and enforce rate limits. +2. **Sequence Selection** – pull provider order from configs, adjust for overrides or outages. +3. **Execution Loop** – call providers, capture response metadata, apply validation logic. +4. **Aggregation & Scoring** – merge best data, attach confidence + provenance tags. +5. **Delivery & Logging** – push to destination, log credits, update success dashboards, trigger alerts if thresholds exceeded. + +## Outputs +- Enrichment payload (JSON/CSV/CRM payload) with fields, provider, timestamp, confidence. +- Credit + latency report appended to provider utilization table. +- Alert entries when max-credits hit or providers fail. + +## Agent/Skill Invocations +- `provider-ops-lead` – supplies sequence + credit policy. +- `signal-integrator` – handles normalization + delivery. +- `data-quality-steward` – validates results before release. +- `waterfall-blueprint` skill – enforces sequencing template. +- `provider-scorecard` skill – logs performance + cost metrics. + +--- diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..4e4e0c1 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,85 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:gtmagents/gtm-agents:plugins/data-signal-enrichment", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "98fe5e3aa90611e62fee30164fcb1972ae7d6818", + "treeHash": "c09c91a6c497c95d79d6996d4f30c6c32e183d5af79bc650c57d77273bc50e44", + "generatedAt": "2025-11-28T10:17:14.410365Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "data-signal-enrichment", + "description": "Signal normalization, provider orchestration, and data quality governance", + "version": "1.0.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "5e43c762a956d10cb04ba1856bf2941a986cd98a605440d136223b33e761dd86" + }, + { + "path": "agents/signal-integrator.md", + "sha256": "13ccffb91d86952464eeb3f5d736690cc9d0427cd839f03065da56fb6c64ad61" + }, + { + "path": "agents/data-quality-steward.md", + "sha256": "45a8de271151eed25a855a083b2a8083136bccbf245465c31a9cc761ea187f34" + }, + { + "path": "agents/provider-ops-lead.md", + "sha256": "f03ac1dcd188ff0e45a3502e84566606c1488d91a95eac470462210aff794f3d" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "f5e060c035083933ccad21ef3fc47de8337e62081a9b2e291a7d536cd3e59e1b" + }, + { + "path": "commands/run-waterfall-enrichment.md", + "sha256": "c50aba9d9dd2103b91efe95fc53b3c06eb9205429876e525b97992731bd49560" + }, + { + "path": "commands/normalize-signals.md", + "sha256": "b2afbcfdfc8d61efd6fda48c75330ee09aae5e8291d15250b4f2813a0a84ff90" + }, + { + "path": "commands/audit-provider-health.md", + "sha256": "df81fe5d97488e3651e2dbe42cc47a5fddff6326038dcef73d24c1e982e749b7" + }, + { + "path": "skills/identity-resolution/SKILL.md", + "sha256": "de44c7fad810033164ca906db2cfba64d07a5e38a0f64dd22b4505a7fbcf2db3" + }, + { + "path": "skills/waterfall-blueprint/SKILL.md", + "sha256": "2e7098231f57735dff358a8b88ac1700b4c396e9d89b230d1c995429abb3521f" + }, + { + "path": "skills/signal-taxonomy/SKILL.md", + "sha256": "8029f1563404001e0f4796fe248b2d4b68b2bd5ae011669a1f7f0e5a9881f7e2" + }, + { + "path": "skills/validation-rulebook/SKILL.md", + "sha256": "306071078b5a8cb969c80d1a46d98e9e5c5dd01747f7ecdadbb48ae0d46acdff" + }, + { + "path": "skills/provider-scorecard/SKILL.md", + "sha256": "140053a2b95dd288e9d11ecc3df4ff3118ab43db0965fd6e35702fe096c6d032" + } + ], + "dirSha256": "c09c91a6c497c95d79d6996d4f30c6c32e183d5af79bc650c57d77273bc50e44" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/identity-resolution/SKILL.md b/skills/identity-resolution/SKILL.md new file mode 100644 index 0000000..bd595f4 --- /dev/null +++ b/skills/identity-resolution/SKILL.md @@ -0,0 +1,31 @@ +--- +name: identity-resolution +description: Use to match accounts, contacts, and opportunities across enrichment + sources with governed rules. +--- + +# Identity Resolution Playbook Skill + +## When to Use +- Normalizing provider outputs before syncing to CRM/CDP. +- Tying intent, enrichment, and product telemetry to the same account/contact IDs. +- Diagnosing duplicate or conflicting records in downstream systems. + +## Framework +1. **Key Hierarchy** – define primary/secondary keys (domain, account_id, email, person_id). +2. **Matching Logic** – configure deterministic and fuzzy rules, tie-breakers, and confidence scoring. +3. **Conflict Handling** – specify precedence rules, merge policies, and exception queues. +4. **Governance** – document owners, change control, and monitoring cadence. +5. **Audit Trail** – capture lineage metadata, before/after snapshots, and rollback steps. + +## Templates +- Matching rule matrix (field, rule type, weight, confidence threshold). +- Exception queue workflow with owners + SLAs. +- Audit workbook for sampling matches vs source-of-truth. + +## Tips +- Start with deterministic keys (domain, CRM ID) before fuzzy logic to reduce noise. +- Version rules so downstream teams know when behavior changes. +- Pair with `signal-taxonomy` to keep IDs aligned with schema updates. + +--- diff --git a/skills/provider-scorecard/SKILL.md b/skills/provider-scorecard/SKILL.md new file mode 100644 index 0000000..6e40dc0 --- /dev/null +++ b/skills/provider-scorecard/SKILL.md @@ -0,0 +1,31 @@ +--- +name: provider-scorecard +description: Use to track enrichment provider success, cost, latency, and quality + trends. +--- + +# Provider Scorecard Skill + +## When to Use +- Monthly/weekly ops reviews of enrichment vendors. +- Before renewing contracts or adjusting credit allocations. +- After outages or performance regressions to inform routing changes. + +## Framework +1. **Metric Definition** – success rate, fill %, latency, credit burn, confidence. +2. **Data Sources** – provider logs, credit usage tables, QA samples, incident tickets. +3. **Scoring Model** – weight metrics by enrichment type and business priority. +4. **Visualization** – dashboard or memo highlighting leaders/laggards. +5. **Action Tracker** – document optimization experiments and owner assignments. + +## Templates +- Scorecard table per provider. +- Executive summary slide with top takeaways. +- Jira/Asana template for provider improvement tasks. + +## Tips +- Normalize metrics per enrichment type to avoid unfair comparisons. +- Capture qualitative notes (support responsiveness, roadmap access). +- Pair with `waterfall-blueprint` updates to reflect routing decisions. + +--- diff --git a/skills/signal-taxonomy/SKILL.md b/skills/signal-taxonomy/SKILL.md new file mode 100644 index 0000000..fa1eeeb --- /dev/null +++ b/skills/signal-taxonomy/SKILL.md @@ -0,0 +1,31 @@ +--- +name: signal-taxonomy +description: Use to define schemas, topic tags, and lineage metadata for enriched + signals. +--- + +# Signal Taxonomy Skill + +## When to Use +- Normalizing multiple provider outputs into a unified schema. +- Rolling out new intent topics, enrichment attributes, or scoring dimensions. +- Auditing lineage + compliance requirements. + +## Framework +1. **Schema Definition** – fields, datatypes, required/optional flags. +2. **Topic & Attribute Mapping** – align provider-specific attributes to canonical names. +3. **Versioning** – maintain change logs, effective dates, and migration steps. +4. **Validation Rules** – min/max values, allowed lists, dependency rules. +5. **Documentation** – publish data dictionary + lineage diagrams. + +## Templates +- Data dictionary sheet (field, description, source, owner). +- Topic mapping table by provider. +- Migration checklist for schema changes. + +## Tips +- Keep names human-readable for GTM teams but consistent with data warehouse standards. +- Tag every field with owner + refresh cadence. +- Pair with `identity-resolution` to ensure IDs and linkages stay consistent. + +--- diff --git a/skills/validation-rulebook/SKILL.md b/skills/validation-rulebook/SKILL.md new file mode 100644 index 0000000..de84848 --- /dev/null +++ b/skills/validation-rulebook/SKILL.md @@ -0,0 +1,31 @@ +--- +name: validation-rulebook +description: Use to define validation, freshness, and compliance checks for enriched + data. +--- + +# Validation Rulebook Skill + +## When to Use +- Running QA before syncing enrichment outputs to GTM systems. +- Auditing providers after schema changes or outages. +- Documenting compliance-ready validation procedures. + +## Framework +1. **Rule Catalog** – list required fields, regex patterns, allowed values, and dependencies. +2. **Freshness Thresholds** – define time-based SLAs per signal type. +3. **Confidence Scoring** – set weighting for provider, validation method, and history. +4. **Exception Handling** – create workflows for quarantining, reprocessing, or escalation. +5. **Compliance Hooks** – log approvals, data residency tags, and retention policies. + +## Templates +- Rule table (field, check, threshold, severity, owner). +- QA runbook with pre/post sync steps. +- Compliance audit packet ready for legal/infosec reviews. + +## Tips +- Automate as many rules as possible but keep manual sampling for high-impact signals. +- Version rules alongside taxonomy updates. +- Pair with `data-quality-steward` outputs for sign-off history. + +--- diff --git a/skills/waterfall-blueprint/SKILL.md b/skills/waterfall-blueprint/SKILL.md new file mode 100644 index 0000000..aa32494 --- /dev/null +++ b/skills/waterfall-blueprint/SKILL.md @@ -0,0 +1,31 @@ +--- +name: waterfall-blueprint +description: Use to design provider sequences, throttling logic, and credit policies + for enrichment waterfalls. +--- + +# Waterfall Blueprint Skill + +## When to Use +- Building contact/company enrichment workflows across multiple providers. +- Updating fallback rules after provider outages or cost shifts. +- Documenting waterfall logic for RevOps + engineering handoffs. + +## Framework +1. **Goal & Constraints** – define enrichment type, data requirements, credit ceiling, SLA. +2. **Provider Catalog** – list eligible providers with success %, latency, compliance notes. +3. **Routing Logic** – determine sequence, branching, and retry intervals. +4. **Safeguards** – set throttles, dedupe checks, and exception triggers. +5. **Versioning** – log changes, approvals, and effective dates. + +## Templates +- Waterfall diagram (sequence, inputs, outputs, fallback paths). +- Routing table (provider, criteria, cost, notes). +- Change log with owners + rationale. + +## Tips +- Keep sequences short for real-time use cases; reserve long chains for batch mode. +- Use A/B tests to validate new providers before full rollout. +- Pair with `provider-scorecard` to continuously optimize routing. + +---