Initial commit
This commit is contained in:
105
skills/extract_from_pdfs/assets/schema_template.json
Normal file
105
skills/extract_from_pdfs/assets/schema_template.json
Normal file
@@ -0,0 +1,105 @@
|
||||
{
|
||||
"_comment": "This is a template extraction schema. Customize for your specific use case.",
|
||||
"_instructions": "Fill in the sections below with your specific extraction requirements.",
|
||||
|
||||
"objective": "carefully analyze this paper and extract [DESCRIBE YOUR DATA TYPE, e.g., 'empirical observations of X', 'experimental measurements of Y', etc.]",
|
||||
|
||||
"system_context": "You are a scientific research assistant specializing in [YOUR DOMAIN, e.g., 'ecology', 'chemistry', 'medicine', etc.]. Your task is to analyze scientific papers and extract structured data for systematic review and meta-analysis.",
|
||||
|
||||
"instructions": [
|
||||
"Determine if the paper contains [YOUR CRITERIA, e.g., 'primary empirical data']",
|
||||
"If present, extract all [YOUR RECORD TYPE, e.g., 'observation records', 'measurements', 'outcomes']",
|
||||
"For each record, extract the following information: [LIST KEY FIELDS]"
|
||||
],
|
||||
|
||||
"analysis_steps": [
|
||||
"1. Identify and quote relevant sections containing [YOUR DATA TYPE]",
|
||||
"2. List out each [RECORD UNIT, e.g., 'species', 'compound', 'patient cohort']",
|
||||
"3. For each unit, extract required information and quote supporting text",
|
||||
"4. [ADD DOMAIN-SPECIFIC VALIDATION STEPS]",
|
||||
"5. Double-check for accuracy and completeness"
|
||||
],
|
||||
|
||||
"important_notes": [
|
||||
"Only include PRIMARY data from this paper, not secondary sources",
|
||||
"If a record involves multiple [UNITS], separate into individual records",
|
||||
"Do not add fields not in the schema",
|
||||
"Use 'none' or empty lists for unknown values",
|
||||
"List names exactly as they appear in the source"
|
||||
],
|
||||
|
||||
"output_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"has_relevant_data": {
|
||||
"type": "boolean",
|
||||
"description": "Whether the paper contains the target data type"
|
||||
},
|
||||
"data_description": {
|
||||
"type": "string",
|
||||
"description": "Brief explanation of what data is present"
|
||||
},
|
||||
"records": {
|
||||
"type": "array",
|
||||
"description": "List of extracted records",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"_comment": "CUSTOMIZE THESE FIELDS FOR YOUR USE CASE",
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "Geographic location (if applicable)"
|
||||
},
|
||||
"subject": {
|
||||
"type": "string",
|
||||
"description": "Main subject of the record (species, compound, etc.)"
|
||||
},
|
||||
"measurement_type": {
|
||||
"type": "string",
|
||||
"description": "Type of measurement or observation"
|
||||
},
|
||||
"value": {
|
||||
"type": ["number", "string"],
|
||||
"description": "Measured or observed value"
|
||||
},
|
||||
"units": {
|
||||
"type": "string",
|
||||
"description": "Units of measurement"
|
||||
},
|
||||
"method": {
|
||||
"type": "string",
|
||||
"description": "Brief description of methodology"
|
||||
},
|
||||
"sample_size": {
|
||||
"type": "integer",
|
||||
"description": "Sample size if applicable"
|
||||
},
|
||||
"notes": {
|
||||
"type": "string",
|
||||
"description": "Additional relevant notes"
|
||||
}
|
||||
},
|
||||
"required": ["subject"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["has_relevant_data", "records"]
|
||||
},
|
||||
|
||||
"output_example": {
|
||||
"has_relevant_data": true,
|
||||
"data_description": "Paper reports 5 observation records across 3 locations",
|
||||
"records": [
|
||||
{
|
||||
"location": "Example Location",
|
||||
"subject": "Example Subject",
|
||||
"measurement_type": "Example Type",
|
||||
"value": 42.5,
|
||||
"units": "mg/L",
|
||||
"method": "Brief methodology description",
|
||||
"sample_size": 20,
|
||||
"notes": "Any relevant notes"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user