Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:02:40 +08:00
commit 69617b598e
25 changed files with 5790 additions and 0 deletions

View File

@@ -0,0 +1,105 @@
{
"_comment": "This is a template extraction schema. Customize for your specific use case.",
"_instructions": "Fill in the sections below with your specific extraction requirements.",
"objective": "carefully analyze this paper and extract [DESCRIBE YOUR DATA TYPE, e.g., 'empirical observations of X', 'experimental measurements of Y', etc.]",
"system_context": "You are a scientific research assistant specializing in [YOUR DOMAIN, e.g., 'ecology', 'chemistry', 'medicine', etc.]. Your task is to analyze scientific papers and extract structured data for systematic review and meta-analysis.",
"instructions": [
"Determine if the paper contains [YOUR CRITERIA, e.g., 'primary empirical data']",
"If present, extract all [YOUR RECORD TYPE, e.g., 'observation records', 'measurements', 'outcomes']",
"For each record, extract the following information: [LIST KEY FIELDS]"
],
"analysis_steps": [
"1. Identify and quote relevant sections containing [YOUR DATA TYPE]",
"2. List out each [RECORD UNIT, e.g., 'species', 'compound', 'patient cohort']",
"3. For each unit, extract required information and quote supporting text",
"4. [ADD DOMAIN-SPECIFIC VALIDATION STEPS]",
"5. Double-check for accuracy and completeness"
],
"important_notes": [
"Only include PRIMARY data from this paper, not secondary sources",
"If a record involves multiple [UNITS], separate into individual records",
"Do not add fields not in the schema",
"Use 'none' or empty lists for unknown values",
"List names exactly as they appear in the source"
],
"output_schema": {
"type": "object",
"properties": {
"has_relevant_data": {
"type": "boolean",
"description": "Whether the paper contains the target data type"
},
"data_description": {
"type": "string",
"description": "Brief explanation of what data is present"
},
"records": {
"type": "array",
"description": "List of extracted records",
"items": {
"type": "object",
"properties": {
"_comment": "CUSTOMIZE THESE FIELDS FOR YOUR USE CASE",
"location": {
"type": "string",
"description": "Geographic location (if applicable)"
},
"subject": {
"type": "string",
"description": "Main subject of the record (species, compound, etc.)"
},
"measurement_type": {
"type": "string",
"description": "Type of measurement or observation"
},
"value": {
"type": ["number", "string"],
"description": "Measured or observed value"
},
"units": {
"type": "string",
"description": "Units of measurement"
},
"method": {
"type": "string",
"description": "Brief description of methodology"
},
"sample_size": {
"type": "integer",
"description": "Sample size if applicable"
},
"notes": {
"type": "string",
"description": "Additional relevant notes"
}
},
"required": ["subject"]
}
}
},
"required": ["has_relevant_data", "records"]
},
"output_example": {
"has_relevant_data": true,
"data_description": "Paper reports 5 observation records across 3 locations",
"records": [
{
"location": "Example Location",
"subject": "Example Subject",
"measurement_type": "Example Type",
"value": 42.5,
"units": "mg/L",
"method": "Brief methodology description",
"sample_size": 20,
"notes": "Any relevant notes"
}
]
}
}