Prompty.yaml

# This schema represents the specification file for the prompty
# _frontmatter_, not the content section.

$schema: http://json-schema.org/draft-07/schema#  
$id: http://azureml/sdk-2-0/Prompty.yaml  
title: Prompty front matter schema specification  
description: A specification that describes how to provision a new prompty using definition frontmatter.

type: object 
properties:
  $schema:
    type: string
  # metadata section
  model:
    type: object
    additionalProperties: false
    properties:
      api:
        type: string
        enum: 
        - chat
        - completion
        description: The API to use for the prompty -- this has implications on how the template is processed and how the model is called.
        default: chat

      configuration:
        oneOf:
          - $ref: "#/definitions/azureOpenaiModel"
          - $ref: "#/definitions/openaiModel"
          - $ref: "#/definitions/maasModel"
  
      parameters:
        $ref: "#/definitions/parameters"

      response: 
        type: string
        description: This determines whether the full (raw) response or just the first response in the choice array is returned.
        default: first 
        enum:
          - first
          - full


  name:
    type: string
    description: Name of the prompty
  description:
    type: string
    description: Description of the prompty
  version:
    type: string
    description: Version of the prompty
  authors:
    type: array
    description: Authors of the prompty
    items:
      type: string
  tags:
    type: array
    description: Tags of the prompty
    items:
      type: string

  # not yet supported -- might add later
  # base:
  #   type: string
  #   description: The base prompty to use as a starting point

  sample: 
    oneOf:
      - type: object
        description: The sample to be used in the prompty test execution
        additionalProperties: true
      - type: string
        description: The file to be loaded to be used in the prompty test execution

  # the user can provide a single sample in a file or specify the data inline
  # sample:
  #   messages: 
  #     - role: user
  #       content: where is the nearest coffee shop?
  #     - role: system
  #       content: I'm sorry, I don't know that. Would you like me to look it up for you?
  # or point to a file
  # sample: sample.json
  # also the user can specify the data on the command line
  # pf flow test --flow p.prompty --input my_sample.json
  # if the user runs this command, the sample from the prompty will be used
  # pf flow test --flow p.prompty   
    
  inputs:
    type: object
    description: The inputs to the prompty

  outputs:
    type: object
    description: The outputs of the prompty

  # currently not supported -- might not be needed
  # init_signature:
  #   type: object
  #   description: The signature of the init function

  template:
    type: string
    description: The template engine to be used can be specified here. This is optional.
    enum: [jinja2]
    default: jinja2


additionalProperties: false

definitions:
  # vanilla openai models
  openaiModel:
    type: object
    description: Model used to generate text
    properties:
      type:
        type: string
        description: Type of the model
        const: openai
      name:
        type: string
        description: Name of the model
      organization:
        type: string
        description: Name of the organization
    additionalProperties: false

  # azure openai models
  azureOpenaiModel:
    type: object
    description: Model used to generate text
    properties:
      type:
        type: string
        description: Type of the model
        const: azure_openai
      api_version:
        type: string
        description: Version of the model
      azure_deployment:
        type: string
        description: Deployment of the model
      azure_endpoint:
        type: string
        description: Endpoint of the model
    additionalProperties: false

  # for maas models
  maasModel:
    type: object
    description: Model used to generate text
    properties:
      type:
        type: string
        description: Type of the model
        const: azure_serverless
      azure_endpoint:
        type: string
        description: Endpoint of the model
    additionalProperties: false

  # parameters for the model -- for now these are not per model but the same for all models
  parameters:
    type: object
    description: Parameters to be sent to the model 
    additionalProperties: true
    properties: 
      response_format: 
        type: object
        description: >
          An object specifying the format that the model must output. Compatible with
          `gpt-4-1106-preview` and `gpt-3.5-turbo-1106`.
          Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
          message the model generates is valid JSON.

      seed:
        type: integer
        description: > 
          This feature is in Beta. If specified, our system will make a best effort to
          sample deterministically, such that repeated requests with the same `seed` and
          parameters should return the same result. Determinism is not guaranteed, and you
          should refer to the `system_fingerprint` response parameter to monitor changes
          in the backend.

      max_tokens:
        type: integer
        description: The maximum number of [tokens](/tokenizer) that can be generated in the chat completion.

      temperature:
        type: number
        description: What sampling temperature to use, 0 means deterministic.

      tools_choice:
        oneOf:
          - type: string
          - type: object
        
        description: > 
          Controls which (if any) function is called by the model. `none` means the model
          will not call a function and instead generates a message. `auto` means the model
          can pick between generating a message or calling a function. Specifying a
          particular function via
          `{"type": "function", "function": {"name": "my_function"}}` forces the model to
          call that function.

          `none` is the default when no functions are present. `auto` is the default if
          functions are present.

      tools:
        type: array
        items:
          type: object

      frequency_penalty:
        type: number
        description: What sampling frequency penalty to use. 0 means no penalty.
      
      presence_penalty:
        type: number
        description: What sampling presence penalty to use. 0 means no penalty.
      
      stop:
        type: array
        items:
          type: string
        description: > 
          One or more sequences where the model should stop generating tokens. The model
          will stop generating tokens if it generates one of the sequences. If the model
          generates a sequence that is a prefix of one of the sequences, it will continue
          generating tokens.
      
      top_p:
        type: number
        description: > 
          What nucleus sampling probability to use. 1 means no nucleus sampling. 0 means
          no tokens are generated.