Schema serialization
Contents
Schema serialization#
A Kor schema can be serialized and deserialzed to JSON. This lets you store the schema outside of the code.
ATTENTION This only works with pydantic 1 at the moment.
from kor.nodes import Object, Text, Number
Serialization#
To serialize a schema just call the json()
method on the schema
schema = Object(
id="personal_info",
description="Personal information about a given person.",
attributes=[
Text(
id="first_name",
description="The first name of the person",
examples=[("John Smith went to the store", "John")],
),
Text(
id="last_name",
description="The last name of the person",
examples=[("John Smith went to the store", "Smith")],
),
Number(
id="age",
description="The age of the person in years.",
examples=[("23 years old", "23"), ("I turned three on sunday", "3")],
),
],
examples=[
(
"John Smith was 23 years old. He was very tall. He knew Jane Doe. She was 5 years old.",
[
{"first_name": "John", "last_name": "Smith", "age": 23},
{"first_name": "Jane", "last_name": "Doe", "age": 5},
],
)
],
many=True,
)
print(schema.json())
{"id":"personal_info","description":"Personal information about a given person.","many":true,"attributes":[{"id":"first_name","description":"The first name of the person","many":false,"examples":[["John Smith went to the store","John"]]},{"id":"last_name","description":"The last name of the person","many":false,"examples":[["John Smith went to the store","Smith"]]},{"id":"age","description":"The age of the person in years.","many":false,"examples":[["23 years old",23],["I turned three on sunday",3]]}],"examples":[["John Smith was 23 years old. He was very tall. He knew Jane Doe. She was 5 years old.",[{"first_name":"John","last_name":"Smith","age":23},{"first_name":"Jane","last_name":"Doe","age":5}]]]}
Deserialization#
Kor lets you define the schema in JSON. The structure of the JSON matches the struture of the Object
type.
The following attribute types must be annotated with a type descrimintator ($type
):
Number
Text
Bool
Selection
json = """
{
"id": "personal_info",
"description": "Personal information about a given person.",
"attributes": [
{
"$type": "Text",
"id": "first_name",
"description": "The first name of the person",
"examples": [["John Smith went to the store", "John"]]
},
{
"$type": "Text",
"id": "last_name",
"description": "The last name of the person",
"examples": [["John Smith went to the store", "Smith"]]
},
{
"$type": "Number",
"id": "age",
"description": "The age of the person in years.",
"examples": [["23 years old", "23"], ["I turned three on sunday", "3"]]
}
],
"examples": [
[
"John Smith was 23 years old. He was very tall. He knew Jane Doe. She was 5 years old.",
[
{"first_name": "John", "last_name": "Smith", "age": 23},
{"first_name": "Jane", "last_name": "Doe", "age": 5}
]
]
],
"many": true
}
"""
To deserialize a schema from JSON simply call the parse_raw()
method.
Deserialization only works with pydantic 1
try:
schema = Object.parse_raw(json)
except NotImplementedError:
print('De-serialization only works with pydantic 1!')
De-serialization only works with pydantic 1!
from kor.extraction import create_extraction_chain
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(
model_name="gpt-4o",
temperature=0,
max_tokens=2000,
model_kwargs={"frequency_penalty": 0, "presence_penalty": 0, "top_p": 1.0},
)
chain = create_extraction_chain(llm, schema)
chain.invoke("Eugene was 18 years old a long time ago.")["data"]
{'personal_info': [{'first_name': 'Eugene', 'last_name': '', 'age': '18'}]}
chain = create_extraction_chain(llm, schema)
print(
chain.invoke(
"My name is Bob Alice and my phone number is (123)-444-9999. I found my true love one"
" on a blue sunday. Her number was (333)1232832. Her name was Moana Sunrise and she was 20 years old."
)["data"]
)
{'personal_info': [{'first_name': 'Bob', 'last_name': 'Alice', 'age': ''}, {'first_name': 'Moana', 'last_name': 'Sunrise', 'age': '20'}]}