chore(docs): extra-Usage and Machine-Tag docs (#4627)

Rename LocalAI-Extra-Usage -> Extra-Usage, add MACHINE_TAG as cli flag option, add docs about extra-usage and machine-tag

Signed-off-by: mintyleaf <mintyleafdev@gmail.com>
This commit is contained in:
mintyleaf 2025-01-18 11:58:38 +04:00 committed by GitHub
parent 895cd7c76a
commit 96306a39a0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 34 additions and 5 deletions

View file

@ -70,7 +70,7 @@ type RunCMD struct {
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"` Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"` DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
MachineTag string `env:"LOCALAI_MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"` MachineTag string `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"`
LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"` LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"`
} }

View file

@ -182,7 +182,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
c.Set("X-Correlation-ID", correlationID) c.Set("X-Correlation-ID", correlationID)
// Opt-in extra usage flag // Opt-in extra usage flag
extraUsage := c.Get("LocalAI-Extra-Usage", "") != "" extraUsage := c.Get("Extra-Usage", "") != ""
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true) modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
if err != nil { if err != nil {

View file

@ -67,7 +67,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
c.Set("X-Correlation-ID", id) c.Set("X-Correlation-ID", id)
// Opt-in extra usage flag // Opt-in extra usage flag
extraUsage := c.Get("LocalAI-Extra-Usage", "") != "" extraUsage := c.Get("Extra-Usage", "") != ""
modelFile, input, err := readRequest(c, cl, ml, appConfig, true) modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
if err != nil { if err != nil {

View file

@ -26,7 +26,7 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
// Opt-in extra usage flag // Opt-in extra usage flag
extraUsage := c.Get("LocalAI-Extra-Usage", "") != "" extraUsage := c.Get("Extra-Usage", "") != ""
modelFile, input, err := readRequest(c, cl, ml, appConfig, true) modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
if err != nil { if err != nil {

View file

@ -520,6 +520,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed
| --upload-limit | 15 | Default upload-limit in MB | $LOCALAI_UPLOAD_LIMIT | | --upload-limit | 15 | Default upload-limit in MB | $LOCALAI_UPLOAD_LIMIT |
| --api-keys | API-KEYS,... | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys | $LOCALAI_API_KEY | | --api-keys | API-KEYS,... | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys | $LOCALAI_API_KEY |
| --disable-welcome | | Disable welcome pages | $LOCALAI_DISABLE_WELCOME | | --disable-welcome | | Disable welcome pages | $LOCALAI_DISABLE_WELCOME |
| --machine-tag | | If not empty - put that string to Machine-Tag header in each response. Useful to track response from different machines using multiple P2P federated nodes | $LOCALAI_MACHINE_TAG |
#### Backend Flags #### Backend Flags
| Parameter | Default | Description | Environment Variable | | Parameter | Default | Description | Environment Variable |
@ -553,6 +554,34 @@ LOCALAI_MODELS_PATH=/mnt/storage/localai/models
LOCALAI_F16=true LOCALAI_F16=true
``` ```
### Request headers
You can use 'Extra-Usage' request header key presence ('Extra-Usage: true') to receive inference timings in milliseconds extending default OpenAI response model in the usage field:
```
...
{
"id": "...",
"created": ...,
"model": "...",
"choices": [
{
...
},
...
],
"object": "...",
"usage": {
"prompt_tokens": ...,
"completion_tokens": ...,
"total_tokens": ...,
// Extra-Usage header key will include these two float fields:
"timing_prompt_processing: ...,
"timing_token_generation": ...,
},
}
...
```
### Extra backends ### Extra backends
LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. The container images that are built and published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) contain a set of images split in core and extra. By default Images bring all the dependencies and backends supported by LocalAI (we call those `extra` images). The `-core` images instead bring only the strictly necessary dependencies to run LocalAI without only a core set of backends. LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. The container images that are built and published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) contain a set of images split in core and extra. By default Images bring all the dependencies and backends supported by LocalAI (we call those `extra` images). The `-core` images instead bring only the strictly necessary dependencies to run LocalAI without only a core set of backends.