diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml
index 4fd88500..aeb3c842 100644
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -1,25 +1,48 @@
name: gpt-4
mmap: true
parameters:
- model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
+ model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf
template:
chat_message: |
- <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+ {{ if .FunctionCall }}{{end}}
+ {{ if eq .RoleName "tool" }}{{end}}
{{if .Content}}{{.Content}}{{end}}
+ {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
+ {{ if .FunctionCall }}{{end}}
+ {{ if eq .RoleName "tool" }}{{end}}
<|im_end|>
+ # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
+ function: |
+ <|im_start|>system
+ You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+
+ {{range .Functions}}
+ {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+ {{end}}
+
+ Use the following pydantic model json schema for each tool call you will make:
+ {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
+ For each function call return a json object with function name and arguments within XML tags as follows:
+
+ {'arguments': , 'name': }
+ <|im_end|>
+ {{.Input}}
+ <|im_start|>assistant
+
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
-context_size: 2048
+context_size: 4096
f16: true
stopwords:
- <|im_end|>
-
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
- "model": "phi-2-chat",
+ "model": "gpt-4",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index f5f03eb4..837b6e12 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -185,6 +185,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
input.Grammar = grammar.JSONBNF
}
+ config.Grammar = input.Grammar
+
// process functions if we have any defined or if we have a function call string
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
log.Debug().Msgf("Response needs to process functions")
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
index a67f0993..69923475 100644
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -73,6 +73,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
input.Grammar = grammar.JSONBNF
}
+ config.Grammar = input.Grammar
+
log.Debug().Msgf("Parameter Config: %+v", config)
if input.Stream {
diff --git a/embedded/models/phi-2-orange.yaml b/embedded/models/phi-2-orange.yaml
new file mode 100644
index 00000000..9207d283
--- /dev/null
+++ b/embedded/models/phi-2-orange.yaml
@@ -0,0 +1,30 @@
+name: phi-2-chat
+mmap: true
+parameters:
+ model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf
+
+template:
+ chat_message: |
+ <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
+ {{if .Content}}{{.Content}}{{end}}
+ <|im_end|>
+ chat: |
+ {{.Input}}
+ <|im_start|>assistant
+ completion: |
+ {{.Input}}
+context_size: 4096
+f16: true
+stopwords:
+- <|im_end|>
+-
+
+description: |
+ This model is a chatbot that can be used for general conversation.
+ [Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF)
+
+usage: |
+ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "phi-2-chat",
+ "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+ }'
diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go
index c52d789e..8fcd1280 100644
--- a/tests/e2e-aio/e2e_test.go
+++ b/tests/e2e-aio/e2e_test.go
@@ -2,6 +2,7 @@ package e2e_test
import (
"context"
+ "encoding/json"
"fmt"
"io"
"net/http"
@@ -9,8 +10,8 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
-
"github.com/sashabaranov/go-openai"
+ "github.com/sashabaranov/go-openai/jsonschema"
)
var _ = Describe("E2E test", func() {
@@ -40,6 +41,82 @@ var _ = Describe("E2E test", func() {
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content))
})
})
+
+ Context("function calls", func() {
+ It("correctly invoke", func() {
+ params := jsonschema.Definition{
+ Type: jsonschema.Object,
+ Properties: map[string]jsonschema.Definition{
+ "location": {
+ Type: jsonschema.String,
+ Description: "The city and state, e.g. San Francisco, CA",
+ },
+ "unit": {
+ Type: jsonschema.String,
+ Enum: []string{"celsius", "fahrenheit"},
+ },
+ },
+ Required: []string{"location"},
+ }
+
+ f := openai.FunctionDefinition{
+ Name: "get_current_weather",
+ Description: "Get the current weather in a given location",
+ Parameters: params,
+ }
+ t := openai.Tool{
+ Type: openai.ToolTypeFunction,
+ Function: &f,
+ }
+
+ dialogue := []openai.ChatCompletionMessage{
+ {Role: openai.ChatMessageRoleUser, Content: "What is the weather in Boston today?"},
+ }
+ resp, err := client.CreateChatCompletion(context.TODO(),
+ openai.ChatCompletionRequest{
+ Model: openai.GPT4,
+ Messages: dialogue,
+ Tools: []openai.Tool{t},
+ },
+ )
+ Expect(err).ToNot(HaveOccurred())
+ Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
+
+ msg := resp.Choices[0].Message
+ Expect(len(msg.ToolCalls)).To(Equal(1), fmt.Sprint(msg.ToolCalls))
+ Expect(msg.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), fmt.Sprint(msg.ToolCalls[0].Function.Name))
+ Expect(msg.ToolCalls[0].Function.Arguments).To(ContainSubstring("Boston"), fmt.Sprint(msg.ToolCalls[0].Function.Arguments))
+ })
+ })
+ Context("json", func() {
+ It("correctly", func() {
+ model := "gpt-4"
+
+ req := openai.ChatCompletionRequest{
+ ResponseFormat: &openai.ChatCompletionResponseFormat{Type: openai.ChatCompletionResponseFormatTypeJSONObject},
+ Model: model,
+ Messages: []openai.ChatCompletionMessage{
+ {
+
+ Role: "user",
+ Content: "An animal with 'name', 'gender' and 'legs' fields",
+ },
+ },
+ }
+
+ resp, err := client.CreateChatCompletion(context.TODO(), req)
+ Expect(err).ToNot(HaveOccurred())
+ Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
+
+ var i map[string]interface{}
+ err = json.Unmarshal([]byte(resp.Choices[0].Message.Content), &i)
+ Expect(err).ToNot(HaveOccurred())
+ Expect(i).To(HaveKey("name"))
+ Expect(i).To(HaveKey("gender"))
+ Expect(i).To(HaveKey("legs"))
+ })
+ })
+
Context("images", func() {
It("correctly", func() {
resp, err := client.CreateImage(context.TODO(),