Files
2025-11-29 18:02:40 +08:00

63 lines
1.8 KiB
JSON

{
"_comment": "Configuration for API validation in step 05",
"_instructions": [
"Specify which external APIs to use for validating/enriching each field",
"Available APIs:",
" - gbif_taxonomy: GBIF for biological taxonomy",
" - wfo_plants: World Flora Online for plant names",
" - geonames: GeoNames for geographic locations (requires account)",
" - geocode: OpenStreetMap Nominatim for geocoding",
" - pubchem: PubChem for chemical compounds",
" - ncbi_gene: NCBI Gene database",
"Customize the field_mappings below based on your extraction schema"
],
"field_mappings": {
"_example_species_field": {
"api": "gbif_taxonomy",
"output_field": "validated_species",
"description": "Validate species names against GBIF"
},
"_example_location_field": {
"api": "geocode",
"output_field": "geocoded_location",
"description": "Geocode location to lat/lon coordinates"
},
"_example_compound_field": {
"api": "pubchem",
"output_field": "validated_compound",
"description": "Validate chemical compound names"
}
},
"nested_field_mappings": {
"_comment": "For fields nested in 'records' array",
"_example": "records.species would validate the 'species' field within each record",
"records.species": {
"api": "gbif_taxonomy",
"output_field": "validated_species"
},
"records.location": {
"api": "geocode",
"output_field": "coordinates"
}
},
"api_specific_settings": {
"geonames": {
"_note": "Requires free account at geonames.org",
"_setup": "Set GEONAMES_USERNAME environment variable"
},
"rate_limits": {
"_comment": "Be respectful of API rate limits",
"default_delay_seconds": 0.5,
"nominatim_delay_seconds": 1.0
}
}
}