Custom Prompt
Contents
Custom Prompt#
Here, we’ll see how to customize the instruction segment of the Kor prompt.
from kor.extraction import create_extraction_chain
from kor.nodes import Object, Text, Number
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(
model_name="gpt-4o",
temperature=0,
)
schema = Object(
id="person",
description="Personal information",
examples=[
("Alice and Bob are friends", [{"first_name": "Alice"}, {"first_name": "Bob"}])
],
attributes=[
Text(
id="first_name",
description="The first name of a person.",
)
],
many=True,
)
Create a template#
Here we create an instruction template.
The template accepts 2 optional parameters:
type_description
– will be replaced with the schema type-descriptor.format_instructions
– will be replaced with the format instructions of whichever encoder is used.
instruction_template = PromptTemplate(
input_variables=["format_instructions", "type_description"],
template=(
"[Pep talk for your LLM goes here]\n\n"
"Add some type description\n\n"
"{type_description}\n\n" # Can comment out
"Add some format instructions\n\n"
"{format_instructions}\n"
"Suffix heren\n"
),
)
chain = create_extraction_chain(llm, schema, instruction_template=instruction_template)
print(chain.get_prompts()[0].format_prompt(text="hello").to_string())
[Pep talk for your LLM goes here]
Add some type description
```TypeScript
person: Array<{ // Personal information
first_name: string // The first name of a person.
}>
```
Add some format instructions
Please output the extracted information in CSV format in Excel dialect. Please use a | as the delimiter.
Do NOT add any clarifying information. Output MUST follow the schema above. Do NOT add any additional columns that do not appear in the schema.
Suffix heren
Input: Alice and Bob are friends
Output: first_name
Alice
Bob
Input: hello
Output:
Custom Encoder and TypeDescriptor#
from kor import JSONEncoder, TypeDescriptor
class CatEncoder(JSONEncoder):
def get_instruction_segment(self) -> str:
return "Encode your response as Cat JSON enclosed in <😼> tags."
class CatType(TypeDescriptor):
def describe(self, node: Object) -> str:
"""Describe the schema of the node."""
return f"A 😼 ate the schema of {type(node)} 😼"
instruction_template = PromptTemplate(
input_variables=["format_instructions", "type_description"],
template=(
"[Pep talk for your LLM goes here]\n\n"
"Add some type description\n\n"
"{type_description}\n\n" # Can comment out
"Add some format instructions\n\n"
"{format_instructions}\n"
"Suffix heren\n"
),
)
chain = create_extraction_chain(
llm,
schema,
instruction_template=instruction_template,
encoder_or_encoder_class=CatEncoder,
type_descriptor=CatType(),
)
print(chain.get_prompts()[0].format_prompt(text="hello").to_string())
[Pep talk for your LLM goes here]
Add some type description
A 😼 ate the schema of <class 'kor.nodes.Object'> 😼
Add some format instructions
Encode your response as Cat JSON enclosed in <😼> tags.
Suffix heren
Input: Alice and Bob are friends
Output: <json>{"person": [{"first_name": "Alice"}, {"first_name": "Bob"}]}</json>
Input: hello
Output: