Initial commit
This commit is contained in:
62
skills/extract_from_pdfs/assets/api_config_template.json
Normal file
62
skills/extract_from_pdfs/assets/api_config_template.json
Normal file
@@ -0,0 +1,62 @@
|
||||
{
|
||||
"_comment": "Configuration for API validation in step 05",
|
||||
"_instructions": [
|
||||
"Specify which external APIs to use for validating/enriching each field",
|
||||
"Available APIs:",
|
||||
" - gbif_taxonomy: GBIF for biological taxonomy",
|
||||
" - wfo_plants: World Flora Online for plant names",
|
||||
" - geonames: GeoNames for geographic locations (requires account)",
|
||||
" - geocode: OpenStreetMap Nominatim for geocoding",
|
||||
" - pubchem: PubChem for chemical compounds",
|
||||
" - ncbi_gene: NCBI Gene database",
|
||||
"Customize the field_mappings below based on your extraction schema"
|
||||
],
|
||||
|
||||
"field_mappings": {
|
||||
"_example_species_field": {
|
||||
"api": "gbif_taxonomy",
|
||||
"output_field": "validated_species",
|
||||
"description": "Validate species names against GBIF"
|
||||
},
|
||||
|
||||
"_example_location_field": {
|
||||
"api": "geocode",
|
||||
"output_field": "geocoded_location",
|
||||
"description": "Geocode location to lat/lon coordinates"
|
||||
},
|
||||
|
||||
"_example_compound_field": {
|
||||
"api": "pubchem",
|
||||
"output_field": "validated_compound",
|
||||
"description": "Validate chemical compound names"
|
||||
}
|
||||
},
|
||||
|
||||
"nested_field_mappings": {
|
||||
"_comment": "For fields nested in 'records' array",
|
||||
"_example": "records.species would validate the 'species' field within each record",
|
||||
|
||||
"records.species": {
|
||||
"api": "gbif_taxonomy",
|
||||
"output_field": "validated_species"
|
||||
},
|
||||
|
||||
"records.location": {
|
||||
"api": "geocode",
|
||||
"output_field": "coordinates"
|
||||
}
|
||||
},
|
||||
|
||||
"api_specific_settings": {
|
||||
"geonames": {
|
||||
"_note": "Requires free account at geonames.org",
|
||||
"_setup": "Set GEONAMES_USERNAME environment variable"
|
||||
},
|
||||
|
||||
"rate_limits": {
|
||||
"_comment": "Be respectful of API rate limits",
|
||||
"default_delay_seconds": 0.5,
|
||||
"nominatim_delay_seconds": 1.0
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user