63 lines
1.8 KiB
JSON
63 lines
1.8 KiB
JSON
{
|
|
"_comment": "Configuration for API validation in step 05",
|
|
"_instructions": [
|
|
"Specify which external APIs to use for validating/enriching each field",
|
|
"Available APIs:",
|
|
" - gbif_taxonomy: GBIF for biological taxonomy",
|
|
" - wfo_plants: World Flora Online for plant names",
|
|
" - geonames: GeoNames for geographic locations (requires account)",
|
|
" - geocode: OpenStreetMap Nominatim for geocoding",
|
|
" - pubchem: PubChem for chemical compounds",
|
|
" - ncbi_gene: NCBI Gene database",
|
|
"Customize the field_mappings below based on your extraction schema"
|
|
],
|
|
|
|
"field_mappings": {
|
|
"_example_species_field": {
|
|
"api": "gbif_taxonomy",
|
|
"output_field": "validated_species",
|
|
"description": "Validate species names against GBIF"
|
|
},
|
|
|
|
"_example_location_field": {
|
|
"api": "geocode",
|
|
"output_field": "geocoded_location",
|
|
"description": "Geocode location to lat/lon coordinates"
|
|
},
|
|
|
|
"_example_compound_field": {
|
|
"api": "pubchem",
|
|
"output_field": "validated_compound",
|
|
"description": "Validate chemical compound names"
|
|
}
|
|
},
|
|
|
|
"nested_field_mappings": {
|
|
"_comment": "For fields nested in 'records' array",
|
|
"_example": "records.species would validate the 'species' field within each record",
|
|
|
|
"records.species": {
|
|
"api": "gbif_taxonomy",
|
|
"output_field": "validated_species"
|
|
},
|
|
|
|
"records.location": {
|
|
"api": "geocode",
|
|
"output_field": "coordinates"
|
|
}
|
|
},
|
|
|
|
"api_specific_settings": {
|
|
"geonames": {
|
|
"_note": "Requires free account at geonames.org",
|
|
"_setup": "Set GEONAMES_USERNAME environment variable"
|
|
},
|
|
|
|
"rate_limits": {
|
|
"_comment": "Be respectful of API rate limits",
|
|
"default_delay_seconds": 0.5,
|
|
"nominatim_delay_seconds": 1.0
|
|
}
|
|
}
|
|
}
|