{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "4b3a0584-b52c-4873-abb8-8382e13ff5c0",
   "metadata": {},
   "source": [
    "# Type Descriptors\n",
    "\n",
    "Let's explore type-descriptors for a bit.\n",
    "\n",
    "Many LLMs are somewhat finicky, and a slightly better phrased prompt may help improve results. So if you want\n",
    "to use your own type-descriptions, you can define a custom one and pass it as an argument when creating an extraction chain.\n",
    "\n",
    "At the moment, Kor only uses a very limited number of internal types. There's no way to represent a `Union` or even a `Boolean`. For the time being use a `Text` node (or `str` in pydnatic) to capture more complex / missing types."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "0b4597b2-2a43-4491-8830-bf9f79428074",
   "metadata": {
    "nbsphinx": "hidden",
    "tags": [
     "remove-cell"
    ]
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import sys\n",
    "\n",
    "sys.path.insert(0, \"../../\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c719e4fc-3ccf-4633-a787-b2fe0d1eac65",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import enum\n",
    "from typing import Optional, List\n",
    "\n",
    "from kor import from_pydantic\n",
    "from pydantic import BaseModel, Field"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3a59195d-bdef-47ad-a568-0984ada9259a",
   "metadata": {},
   "source": [
    "## Let's define a schema"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "fface268-cda5-430e-a0dc-c354ee4cfe2f",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class Action(enum.Enum):\n",
    "    play = \"play\"\n",
    "    stop = \"stop\"\n",
    "    previous = \"previous\"\n",
    "    next_ = \"next\"\n",
    "\n",
    "\n",
    "class MusicRequest(BaseModel):\n",
    "    song: Optional[List[str]] = Field(\n",
    "        description=\"The song(s) that the user would like to be played.\"\n",
    "    )\n",
    "    album: Optional[List[str]] = Field(\n",
    "        description=\"The album(s) that the user would like to be played.\"\n",
    "    )\n",
    "    artist: Optional[List[str]] = Field(\n",
    "        description=\"The artist(s) whose music the user would like to hear.\",\n",
    "        examples=[(\"Songs by paul simon\", \"paul simon\")],\n",
    "    )\n",
    "    action: Optional[Action] = Field(\n",
    "        description=\"The action that should be taken; one of `play`, `stop`, `next`, `previous`\",\n",
    "        examples=[\n",
    "            (\"Please stop the music\", \"stop\"),\n",
    "            (\"play something\", \"play\"),\n",
    "            (\"play a song\", \"play\"),\n",
    "            (\"next song\", \"next\"),\n",
    "        ],\n",
    "    )\n",
    "    volume: Optional[float] = Field(\n",
    "        description=\"Set the volume\",\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "4fe1ec70-1428-4433-acac-c190674a666e",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "schema, validator = from_pydantic(MusicRequest, description=\"Music recorder\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5472725a-95ec-4601-acd0-7e87890a6360",
   "metadata": {},
   "source": [
    "## TypeScript"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "8301c624-6933-4cd5-b5b6-1640c19ff32b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from kor import TypeScriptDescriptor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "42e007b7-7aab-4468-9793-7e7514bea98e",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "descriptor = TypeScriptDescriptor()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "99ed0af7-b608-4b46-96fa-ea9798113760",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "```TypeScript\n",
      "\n",
      "musicrequest: { // Music recorder\n",
      " song: Array<string> // The song(s) that the user would like to be played.\n",
      " album: Array<string> // The album(s) that the user would like to be played.\n",
      " artist: Array<string> // The artist(s) whose music the user would like to hear.\n",
      " action: \"play\" | \"stop\" | \"previous\" | \"next\" // The action that should be taken; one of `play`, `stop`, `next`, `previous`\n",
      " volume: number // Set the volume\n",
      "}\n",
      "```\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(descriptor.describe(schema))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1618bc04-ea40-4fdb-837f-e85e266299dd",
   "metadata": {},
   "source": [
    "## BulletPoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "de9683ac-e5b3-46b2-a456-6beb7a5f38aa",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from kor import BulletPointDescriptor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "19fd08fb-0098-46c8-a540-9d998bc6ee16",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "descriptor = BulletPointDescriptor()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "c0788a1d-8eb7-442b-be98-1680501befae",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "* musicrequest: Object # Music recorder\n",
      "*  song: Text # The song(s) that the user would like to be played.\n",
      "*  album: Text # The album(s) that the user would like to be played.\n",
      "*  artist: Text # The artist(s) whose music the user would like to hear.\n",
      "*  action: Selection # The action that should be taken; one of `play`, `stop`, `next`, `previous`\n",
      "*  volume: Number # Set the volume\n"
     ]
    }
   ],
   "source": [
    "print(descriptor.describe(schema))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5b213137-cdb4-4309-ba2e-3b6bf716a2e2",
   "metadata": {},
   "source": [
    "## Custom\n",
    "\n",
    "Here's an example on how to define your own type-description."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "966b3818-b591-41f5-b70e-fb51415bc3ae",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from typing import Any"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "6f7526ff-0ee0-4a17-8478-36e1c4445ca7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from kor import TypeScriptDescriptor, Object\n",
    "from kor.nodes import AbstractSchemaNode"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "93ad957a-7d6f-4ad3-b824-384cca1aa682",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "class MeowDescriptor(TypeScriptDescriptor):\n",
    "    def visit_default(self, node: \"AbstractSchemaNode\", **kwargs: Any) -> List[str]:\n",
    "        \"\"\"Default action for a node.\"\"\"\n",
    "        depth = kwargs[\"depth\"]\n",
    "        space = \" \" + depth * \" ~(^._.)\" + \" \"\n",
    "        return [f\"{space}{node.id}: {node.__class__.__name__} # {node.description}\"]\n",
    "\n",
    "    def visit_object(self, node: Object, **kwargs: Any) -> List[str]:\n",
    "        \"\"\"Visit an object node.\"\"\"\n",
    "        depth = kwargs[\"depth\"]\n",
    "        code_lines = self.visit_default(node, depth=depth)\n",
    "        for child in node.attributes:\n",
    "            code_lines.extend(child.accept(self, depth=depth + 1))\n",
    "        return code_lines\n",
    "\n",
    "    def describe(self, node: Object) -> str:\n",
    "        \"\"\"Describe the type of the given node.\"\"\"\n",
    "        code_lines = node.accept(self, depth=0)\n",
    "        return \"\\n\".join(code_lines)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "225782dd-20c7-4d96-b10c-57bc8dceb683",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  musicrequest: Object # Music recorder\n",
      "  ~(^._.) song: Text # The song(s) that the user would like to be played.\n",
      "  ~(^._.) album: Text # The album(s) that the user would like to be played.\n",
      "  ~(^._.) artist: Text # The artist(s) whose music the user would like to hear.\n",
      "  ~(^._.) action: Selection # The action that should be taken; one of `play`, `stop`, `next`, `previous`\n",
      "  ~(^._.) volume: Number # Set the volume\n"
     ]
    }
   ],
   "source": [
    "print(MeowDescriptor().describe(schema))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}