Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 5871b37

Browse files
UPDATE
1 parent c59e207 commit 5871b37

File tree

9 files changed

+108
-67
lines changed

9 files changed

+108
-67
lines changed

‎examples/job_description.txt

Whitespace-only changes.

‎examples/news_article.txt

Whitespace-only changes.

‎examples/news_prompt.txt

Whitespace-only changes.

‎examples/news_schema.json

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
{
2+
"type": "object",
3+
"properties": {
4+
"headline": {
5+
"type": "string",
6+
"description": "Main headline of the news article"
7+
},
8+
"summary": {
9+
"type": "string",
10+
"description": "Brief summary of the article"
11+
},
12+
"publication_date": {
13+
"type": ["string", "null"],
14+
"description": "Publication date if mentioned"
15+
},
16+
"author": {
17+
"type": ["string", "null"],
18+
"description": "Author name if mentioned"
19+
},
20+
"location": {
21+
"type": ["string", "null"],
22+
"description": "Geographic location mentioned in the news"
23+
},
24+
"key_people": {
25+
"type": "array",
26+
"items": {
27+
"type": "string"
28+
},
29+
"description": "Names of key people mentioned in the article"
30+
},
31+
"organizations": {
32+
"type": "array",
33+
"items": {
34+
"type": "string"
35+
},
36+
"description": "Organizations or companies mentioned"
37+
},
38+
"category": {
39+
"type": ["string", "null"],
40+
"description": "News category (politics, technology, sports, etc.)"
41+
},
42+
"sentiment": {
43+
"type": ["string", "null"],
44+
"description": "Overall sentiment of the article (positive, negative, neutral)"
45+
}
46+
},
47+
"required": ["headline", "summary", "key_people", "organizations"],
48+
"additionalProperties": false
49+
}

‎examples/recipe.txt

Whitespace-only changes.

‎src/structured_output_cookbook/extractor.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ def extract(
4444
# Use custom prompt or schema default
4545
prompt = system_prompt or schema.get_extraction_prompt()
4646

47+
# Generate schema and ensure additionalProperties is false
48+
schema_dict = schema.model_json_schema()
49+
self._ensure_additional_properties_false(schema_dict)
50+
4751
response = self.client.chat.completions.create(
4852
model=self.config.openai_model,
4953
messages=[
@@ -53,9 +57,9 @@ def extract(
5357
response_format={
5458
"type": "json_schema",
5559
"json_schema": {
56-
"name": schema.get_schema_name().lower(),
60+
"name": schema.get_schema_name().lower().replace(" ", "_"),
5761
"strict": True,
58-
"schema": schema.model_json_schema()
62+
"schema": schema_dict
5963
}
6064
},
6165
timeout=self.config.timeout_seconds
@@ -87,6 +91,24 @@ def extract(
8791
self.logger.error(f"Extraction failed: {e}")
8892
return ExtractionResult.error_result(str(e))
8993

94+
def _ensure_additional_properties_false(self, schema_dict: Dict[str, Any]) -> None:
95+
"""Recursively ensure all objects have additionalProperties: false."""
96+
if isinstance(schema_dict, dict):
97+
if schema_dict.get("type") == "object":
98+
schema_dict["additionalProperties"] = False
99+
100+
# Recursively process nested schemas
101+
for key, value in schema_dict.items():
102+
if key in ["properties", "items", "anyOf", "allOf", "oneOf"]:
103+
if isinstance(value, dict):
104+
self._ensure_additional_properties_false(value)
105+
elif isinstance(value, list):
106+
for item in value:
107+
if isinstance(item, dict):
108+
self._ensure_additional_properties_false(item)
109+
elif isinstance(value, dict):
110+
self._ensure_additional_properties_false(value)
111+
90112
def extract_with_custom_schema(
91113
self,
92114
text: str,
@@ -107,6 +129,9 @@ def extract_with_custom_schema(
107129
try:
108130
self.logger.info("Starting extraction with custom schema")
109131

132+
# Ensure custom schema has additionalProperties: false
133+
self._ensure_additional_properties_false(schema_dict)
134+
110135
response = self.client.chat.completions.create(
111136
model=self.config.openai_model,
112137
messages=[

‎src/structured_output_cookbook/schemas/base.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,19 @@
33
from abc import ABC, abstractmethod
44
from typing import Any, Dict, Type
55

6-
from pydantic import BaseModel
6+
from pydantic import BaseModel, ConfigDict
77

88

99
class BaseSchema(BaseModel, ABC):
1010
"""Abstract base class for all extraction schemas."""
1111

12+
model_config = ConfigDict(
13+
extra="forbid", # This generates additionalProperties: false
14+
validate_assignment=True,
15+
str_strip_whitespace=True
16+
)
17+
"""Abstract base class for all extraction schemas."""
18+
1219
@classmethod
1320
@abstractmethod
1421
def get_extraction_prompt(cls) -> str:

‎src/structured_output_cookbook/templates/job_description.py

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Job description extraction schema."""
22

3-
from typing import List, Optional
3+
from typing import List, Union
44
from pydantic import Field
55
from ..schemas.base import BaseSchema
66

@@ -10,41 +10,32 @@ class JobDescriptionSchema(BaseSchema):
1010

1111
title: str = Field(description="Job title or position name")
1212
company: str = Field(description="Company name")
13-
location: Optional[str] = Field(default=None, description="Job location")
14-
employment_type: Optional[str] = Field(
15-
default=None,
13+
location: Union[str, None] = Field(description="Job location")
14+
employment_type: Union[str, None] = Field(
1615
description="Employment type (full-time, part-time, contract, etc.)"
1716
)
18-
experience_level: Optional[str] = Field(
19-
default=None,
17+
experience_level: Union[str, None] = Field(
2018
description="Required experience level (entry, mid, senior, etc.)"
2119
)
22-
salary_range: Optional[str] = Field(
23-
default=None,
20+
salary_range: Union[str, None] = Field(
2421
description="Salary range or compensation information"
2522
)
2623
required_skills: List[str] = Field(
27-
default_factory=list,
2824
description="Required technical skills and technologies"
2925
)
3026
preferred_skills: List[str] = Field(
31-
default_factory=list,
3227
description="Preferred or nice-to-have skills"
3328
)
3429
responsibilities: List[str] = Field(
35-
default_factory=list,
3630
description="Key job responsibilities and duties"
3731
)
3832
requirements: List[str] = Field(
39-
default_factory=list,
4033
description="Job requirements and qualifications"
4134
)
4235
benefits: List[str] = Field(
43-
default_factory=list,
4436
description="Benefits and perks offered"
4537
)
46-
remote_work: Optional[bool] = Field(
47-
default=None,
38+
remote_work: Union[bool, None] = Field(
4839
description="Whether remote work is available"
4940
)
5041

‎src/structured_output_cookbook/templates/recipe.py

Lines changed: 18 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,36 @@
11
"""Recipe extraction schema."""
22

3-
from typing import List, Optional
4-
from pydantic import BaseModel, Field
3+
from typing import List, Union, Dict, Any
4+
from pydantic import BaseModel, Field, ConfigDict
55
from ..schemas.base import BaseSchema
66

77

88
class Ingredient(BaseModel):
99
"""Single ingredient with quantity and unit."""
1010

11+
model_config = ConfigDict(extra="forbid")
12+
1113
name: str = Field(description="Ingredient name")
12-
quantity: Optional[str] = Field(default=None, description="Amount needed")
13-
unit: Optional[str] = Field(default=None, description="Unit of measurement")
14-
notes: Optional[str] = Field(default=None, description="Additional notes")
14+
quantity: Union[str, None] = Field(description="Amount needed")
15+
unit: Union[str, None] = Field(description="Unit of measurement")
16+
notes: Union[str, None] = Field(description="Additional notes")
1517

1618

1719
class RecipeSchema(BaseSchema):
1820
"""Extract structured information from recipes."""
1921

2022
name: str = Field(description="Recipe name or title")
21-
description: Optional[str] = Field(
22-
default=None,
23-
description="Brief description of the dish"
24-
)
25-
cuisine: Optional[str] = Field(
26-
default=None,
27-
description="Cuisine type (Italian, Asian, etc.)"
28-
)
29-
difficulty: Optional[str] = Field(
30-
default=None,
31-
description="Difficulty level (easy, medium, hard)"
32-
)
33-
prep_time: Optional[str] = Field(
34-
default=None,
35-
description="Preparation time"
36-
)
37-
cook_time: Optional[str] = Field(
38-
default=None,
39-
description="Cooking time"
40-
)
41-
total_time: Optional[str] = Field(
42-
default=None,
43-
description="Total time required"
44-
)
45-
servings: Optional[int] = Field(
46-
default=None,
47-
description="Number of servings"
48-
)
49-
ingredients: List[Ingredient] = Field(
50-
default_factory=list,
51-
description="List of ingredients with quantities"
52-
)
53-
instructions: List[str] = Field(
54-
default_factory=list,
55-
description="Step-by-step cooking instructions"
56-
)
57-
tags: List[str] = Field(
58-
default_factory=list,
59-
description="Recipe tags (vegetarian, gluten-free, etc.)"
60-
)
61-
nutrition: Optional[dict] = Field(
62-
default=None,
63-
description="Nutritional information if available"
64-
)
23+
description: Union[str, None] = Field(description="Brief description of the dish")
24+
cuisine: Union[str, None] = Field(description="Cuisine type (Italian, Asian, etc.)")
25+
difficulty: Union[str, None] = Field(description="Difficulty level (easy, medium, hard)")
26+
prep_time: Union[str, None] = Field(description="Preparation time")
27+
cook_time: Union[str, None] = Field(description="Cooking time")
28+
total_time: Union[str, None] = Field(description="Total time required")
29+
servings: Union[int, None] = Field(description="Number of servings")
30+
ingredients: List[Ingredient] = Field(description="List of ingredients with quantities")
31+
instructions: List[str] = Field(description="Step-by-step cooking instructions")
32+
tags: List[str] = Field(description="Recipe tags (vegetarian, gluten-free, etc.)")
33+
nutrition: Union[Dict[str, Any], None] = Field(description="Nutritional information if available")
6534

6635
@classmethod
6736
def get_extraction_prompt(cls) -> str:

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /