{ "_comment": "Configuration for API validation in step 05", "_instructions": [ "Specify which external APIs to use for validating/enriching each field", "Available APIs:", " - gbif_taxonomy: GBIF for biological taxonomy", " - wfo_plants: World Flora Online for plant names", " - geonames: GeoNames for geographic locations (requires account)", " - geocode: OpenStreetMap Nominatim for geocoding", " - pubchem: PubChem for chemical compounds", " - ncbi_gene: NCBI Gene database", "Customize the field_mappings below based on your extraction schema" ], "field_mappings": { "_example_species_field": { "api": "gbif_taxonomy", "output_field": "validated_species", "description": "Validate species names against GBIF" }, "_example_location_field": { "api": "geocode", "output_field": "geocoded_location", "description": "Geocode location to lat/lon coordinates" }, "_example_compound_field": { "api": "pubchem", "output_field": "validated_compound", "description": "Validate chemical compound names" } }, "nested_field_mappings": { "_comment": "For fields nested in 'records' array", "_example": "records.species would validate the 'species' field within each record", "records.species": { "api": "gbif_taxonomy", "output_field": "validated_species" }, "records.location": { "api": "geocode", "output_field": "coordinates" } }, "api_specific_settings": { "geonames": { "_note": "Requires free account at geonames.org", "_setup": "Set GEONAMES_USERNAME environment variable" }, "rate_limits": { "_comment": "Be respectful of API rate limits", "default_delay_seconds": 0.5, "nominatim_delay_seconds": 1.0 } } }