feat(ui): add audio upload button in chat view (#5526)
Some checks are pending
Explorer deployment / build-linux (push) Waiting to run
GPU tests / ubuntu-latest (1.21.x) (push) Waiting to run
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Waiting to run
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, extras, latest-gpu-hipblas-extras, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -hipblas-extras) (push) Waiting to run
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, latest-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, extras, latest-gpu-intel-f16-extras, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-… (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, extras, latest-gpu-intel-f32-extras, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-… (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, extras, latest-gpu-nvidia-cuda-11-extras, latest-aio-gpu-nvidia-cuda-11, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda11-extras) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, extras, latest-gpu-nvidia-cuda-12-extras, latest-aio-gpu-nvidia-cuda-12, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda12-extras) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, latest-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, latest-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32) (push) Waiting to run
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, ) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, true, core, latest-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, true, core, latest-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, vulkan, true, core, latest-gpu-vulkan, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan) (push) Waiting to run
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64) (push) Waiting to run
Security Scan / tests (push) Waiting to run
Tests extras backends / tests-transformers (push) Waiting to run
Tests extras backends / tests-rerankers (push) Waiting to run
Tests extras backends / tests-diffusers (push) Waiting to run
Tests extras backends / tests-coqui (push) Waiting to run
tests / tests-linux (1.21.x) (push) Waiting to run
tests / tests-aio-container (push) Waiting to run
tests / tests-apple (1.21.x) (push) Waiting to run

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-05-30 16:47:31 +02:00 committed by GitHub
parent d5c9c717b5
commit 45c58752e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 201 additions and 180 deletions

View file

@ -49,12 +49,13 @@ function submitSystemPrompt(event) {
} }
var image = ""; var image = "";
var audio = "";
function submitPrompt(event) { function submitPrompt(event) {
event.preventDefault(); event.preventDefault();
const input = document.getElementById("input").value; const input = document.getElementById("input").value;
Alpine.store("chat").add("user", input, image); Alpine.store("chat").add("user", input, image, audio);
document.getElementById("input").value = ""; document.getElementById("input").value = "";
const systemPrompt = localStorage.getItem("system_prompt"); const systemPrompt = localStorage.getItem("system_prompt");
Alpine.nextTick(() => { document.getElementById('messages').scrollIntoView(false); }); Alpine.nextTick(() => { document.getElementById('messages').scrollIntoView(false); });
@ -62,7 +63,6 @@ function submitPrompt(event) {
} }
function readInputImage() { function readInputImage() {
if (!this.files || !this.files[0]) return; if (!this.files || !this.files[0]) return;
const FR = new FileReader(); const FR = new FileReader();
@ -74,35 +74,47 @@ function readInputImage() {
FR.readAsDataURL(this.files[0]); FR.readAsDataURL(this.files[0]);
} }
function readInputAudio() {
if (!this.files || !this.files[0]) return;
async function promptGPT(systemPrompt, input) { const FR = new FileReader();
const model = document.getElementById("chat-model").value;
// Set class "loader" to the element with "loader" id
//document.getElementById("loader").classList.add("loader");
// Make the "loader" visible
toggleLoader(true);
FR.addEventListener("load", function(evt) {
audio = evt.target.result;
});
messages = Alpine.store("chat").messages(); FR.readAsDataURL(this.files[0]);
}
// if systemPrompt isn't empty, push it at the start of messages async function promptGPT(systemPrompt, input) {
if (systemPrompt) { const model = document.getElementById("chat-model").value;
messages.unshift({ // Set class "loader" to the element with "loader" id
role: "system", //document.getElementById("loader").classList.add("loader");
content: systemPrompt // Make the "loader" visible
}); toggleLoader(true);
}
// loop all messages, and check if there are images. If there are, we need to change the content field messages = Alpine.store("chat").messages();
messages.forEach((message) => {
// if systemPrompt isn't empty, push it at the start of messages
if (systemPrompt) {
messages.unshift({
role: "system",
content: systemPrompt
});
}
// loop all messages, and check if there are images or audios. If there are, we need to change the content field
messages.forEach((message) => {
if (message.image || message.audio) {
// The content field now becomes an array
message.content = [
{
"type": "text",
"text": message.content
}
]
if (message.image) { if (message.image) {
// The content field now becomes an array
message.content = [
{
"type": "text",
"text": message.content
}
]
message.content.push( message.content.push(
{ {
"type": "image_url", "type": "image_url",
@ -111,168 +123,154 @@ function readInputImage() {
} }
} }
); );
// remove the image field
delete message.image; delete message.image;
} }
});
// reset the form and the image if (message.audio) {
image = ""; message.content.push(
document.getElementById("input_image").value = null; {
document.getElementById("fileName").innerHTML = ""; "type": "audio_url",
"audio_url": {
// if (image) { "url": message.audio,
// // take the last element content's and add the image
// last_message = messages[messages.length - 1]
// // The content field now becomes an array
// last_message.content = [
// {
// "type": "text",
// "text": last_message.content
// }
// ]
// last_message.content.push(
// {
// "type": "image_url",
// "image_url": {
// "url": image,
// }
// }
// );
// // and we replace it in the messages array
// messages[messages.length - 1] = last_message
// // reset the form and the image
// image = "";
// document.getElementById("input_image").value = null;
// document.getElementById("fileName").innerHTML = "";
// }
// Source: https://stackoverflow.com/a/75751803/11386095
const response = await fetch("v1/chat/completions", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
model: model,
messages: messages,
stream: true,
}),
});
if (!response.ok) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: POST /v1/chat/completions ${response.status}</span>`,
);
return;
}
const reader = response.body
?.pipeThrough(new TextDecoderStream())
.getReader();
if (!reader) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to decode API response</span>`,
);
return;
}
// Function to add content to the chat and handle DOM updates efficiently
const addToChat = (token) => {
const chatStore = Alpine.store("chat");
chatStore.add("assistant", token);
// Efficiently scroll into view without triggering multiple reflows
// const messages = document.getElementById('messages');
// messages.scrollTop = messages.scrollHeight;
};
let buffer = "";
let contentBuffer = [];
try {
while (true) {
const { value, done } = await reader.read();
if (done) break;
buffer += value;
let lines = buffer.split("\n");
buffer = lines.pop(); // Retain any incomplete line in the buffer
lines.forEach((line) => {
if (line.length === 0 || line.startsWith(":")) return;
if (line === "data: [DONE]") {
return;
}
if (line.startsWith("data: ")) {
try {
const jsonData = JSON.parse(line.substring(6));
const token = jsonData.choices[0].delta.content;
if (token) {
contentBuffer.push(token);
}
} catch (error) {
console.error("Failed to parse line:", line, error);
} }
} }
}); );
delete message.audio;
// Efficiently update the chat in batch
if (contentBuffer.length > 0) {
addToChat(contentBuffer.join(""));
contentBuffer = [];
}
} }
}
});
// Final content flush if any data remains // reset the form and the files
image = "";
audio = "";
document.getElementById("input_image").value = null;
document.getElementById("input_audio").value = null;
document.getElementById("fileName").innerHTML = "";
// Source: https://stackoverflow.com/a/75751803/11386095
const response = await fetch("v1/chat/completions", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
model: model,
messages: messages,
stream: true,
}),
});
if (!response.ok) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: POST /v1/chat/completions ${response.status}</span>`,
);
return;
}
const reader = response.body
?.pipeThrough(new TextDecoderStream())
.getReader();
if (!reader) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to decode API response</span>`,
);
return;
}
// Function to add content to the chat and handle DOM updates efficiently
const addToChat = (token) => {
const chatStore = Alpine.store("chat");
chatStore.add("assistant", token);
// Efficiently scroll into view without triggering multiple reflows
// const messages = document.getElementById('messages');
// messages.scrollTop = messages.scrollHeight;
};
let buffer = "";
let contentBuffer = [];
try {
while (true) {
const { value, done } = await reader.read();
if (done) break;
buffer += value;
let lines = buffer.split("\n");
buffer = lines.pop(); // Retain any incomplete line in the buffer
lines.forEach((line) => {
if (line.length === 0 || line.startsWith(":")) return;
if (line === "data: [DONE]") {
return;
}
if (line.startsWith("data: ")) {
try {
const jsonData = JSON.parse(line.substring(6));
const token = jsonData.choices[0].delta.content;
if (token) {
contentBuffer.push(token);
}
} catch (error) {
console.error("Failed to parse line:", line, error);
}
}
});
// Efficiently update the chat in batch
if (contentBuffer.length > 0) { if (contentBuffer.length > 0) {
addToChat(contentBuffer.join("")); addToChat(contentBuffer.join(""));
contentBuffer = [];
} }
// Highlight all code blocks once at the end
hljs.highlightAll();
} catch (error) {
console.error("An error occurred while reading the stream:", error);
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to process stream</span>`,
);
} finally {
// Perform any cleanup if necessary
reader.releaseLock();
} }
// Remove class "loader" from the element with "loader" id // Final content flush if any data remains
toggleLoader(false); if (contentBuffer.length > 0) {
addToChat(contentBuffer.join(""));
}
// scroll to the bottom of the chat // Highlight all code blocks once at the end
document.getElementById('messages').scrollIntoView(false) hljs.highlightAll();
// set focus to the input } catch (error) {
document.getElementById("input").focus(); console.error("An error occurred while reading the stream:", error);
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to process stream</span>`,
);
} finally {
// Perform any cleanup if necessary
reader.releaseLock();
} }
document.getElementById("system_prompt").addEventListener("submit", submitSystemPrompt); // Remove class "loader" from the element with "loader" id
toggleLoader(false);
document.getElementById("prompt").addEventListener("submit", submitPrompt); // scroll to the bottom of the chat
document.getElementById('messages').scrollIntoView(false)
// set focus to the input
document.getElementById("input").focus(); document.getElementById("input").focus();
document.getElementById("input_image").addEventListener("change", readInputImage); }
storesystemPrompt = localStorage.getItem("system_prompt"); document.getElementById("system_prompt").addEventListener("submit", submitSystemPrompt);
if (storesystemPrompt) { document.getElementById("prompt").addEventListener("submit", submitPrompt);
document.getElementById("systemPrompt").value = storesystemPrompt; document.getElementById("input").focus();
} else { document.getElementById("input_image").addEventListener("change", readInputImage);
document.getElementById("systemPrompt").value = null; document.getElementById("input_audio").addEventListener("change", readInputAudio);
}
marked.setOptions({ storesystemPrompt = localStorage.getItem("system_prompt");
highlight: function (code) { if (storesystemPrompt) {
return hljs.highlightAuto(code).value; document.getElementById("systemPrompt").value = storesystemPrompt;
}, } else {
}); document.getElementById("systemPrompt").value = null;
}
marked.setOptions({
highlight: function (code) {
return hljs.highlightAuto(code).value;
},
});

View file

@ -218,6 +218,8 @@ SOFTWARE.
Start chatting with the AI by typing a prompt in the input field below and pressing Enter. Start chatting with the AI by typing a prompt in the input field below and pressing Enter.
For models that support images, you can upload an image by clicking the paperclip For models that support images, you can upload an image by clicking the paperclip
<i class="fa-solid fa-paperclip"></i> icon. <i class="fa-solid fa-paperclip"></i> icon.
For models that support audio, you can upload an audio file by clicking the microphone
<i class="fa-solid fa-microphone"></i> icon.
</p> </p>
<div id="messages" class="max-w-3xl mx-auto"> <div id="messages" class="max-w-3xl mx-auto">
<template x-for="message in history"> <template x-for="message in history">
@ -290,6 +292,12 @@ SOFTWARE.
class="fa-solid fa-paperclip text-gray-400 absolute right-12 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200" class="fa-solid fa-paperclip text-gray-400 absolute right-12 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
title="Attach an image" title="Attach an image"
></button> ></button>
<button
type="button"
onclick="document.getElementById('input_audio').click()"
class="fa-solid fa-microphone text-gray-400 absolute right-20 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
title="Attach an audio file"
></button>
<!-- Send button and loader in the same position --> <!-- Send button and loader in the same position -->
<div class="absolute right-3 top-4"> <div class="absolute right-3 top-4">
@ -320,6 +328,13 @@ SOFTWARE.
style="display: none;" style="display: none;"
@change="fileName = $event.target.files[0].name" @change="fileName = $event.target.files[0].name"
/> />
<input
id="input_audio"
type="file"
accept="audio/*"
style="display: none;"
@change="fileName = $event.target.files[0].name"
/>
</div> </div>
</form> </form>
</div> </div>
@ -381,7 +396,7 @@ SOFTWARE.
clear() { clear() {
this.history.length = 0; this.history.length = 0;
}, },
add(role, content, image) { add(role, content, image, audio) {
const N = this.history.length - 1; const N = this.history.length - 1;
if (this.history.length && this.history[N].role === role) { if (this.history.length && this.history[N].role === role) {
this.history[N].content += content; this.history[N].content += content;
@ -394,7 +409,7 @@ SOFTWARE.
lines.forEach((line) => { lines.forEach((line) => {
c += DOMPurify.sanitize(marked.parse(line)); c += DOMPurify.sanitize(marked.parse(line));
}); });
this.history.push({ role, content, html: c, image }); this.history.push({ role, content, html: c, image, audio });
} }
document.getElementById('messages').scrollIntoView(false); document.getElementById('messages').scrollIntoView(false);
const parser = new DOMParser(); const parser = new DOMParser();
@ -418,6 +433,7 @@ SOFTWARE.
role: message.role, role: message.role,
content: message.content, content: message.content,
image: message.image, image: message.image,
audio: message.audio,
})); }));
}, },
}); });

1
go.mod
View file

@ -82,6 +82,7 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/morikuni/aec v1.0.0 // indirect github.com/morikuni/aec v1.0.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e // indirect
github.com/pion/datachannel v1.5.10 // indirect github.com/pion/datachannel v1.5.10 // indirect
github.com/pion/dtls/v2 v2.2.12 // indirect github.com/pion/dtls/v2 v2.2.12 // indirect
github.com/pion/dtls/v3 v3.0.4 // indirect github.com/pion/dtls/v3 v3.0.4 // indirect

2
go.sum
View file

@ -546,6 +546,8 @@ github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8= github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=
github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e h1:s2RNOM/IGdY0Y6qfTeUKhDawdHDpK9RGBdx80qN4Ttw=
github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e/go.mod h1:nBdnFKj15wFbf94Rwfq4m30eAcyY9V/IyKAGQFtqkW0=
github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg= github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg=
github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM=
github.com/otiai10/openaigo v1.7.0 h1:AOQcOjRRM57ABvz+aI2oJA/Qsz1AydKbdZAlGiKyCqg= github.com/otiai10/openaigo v1.7.0 h1:AOQcOjRRM57ABvz+aI2oJA/Qsz1AydKbdZAlGiKyCqg=

View file

@ -5,14 +5,19 @@ import (
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
"regexp"
"strings" "strings"
"time" "time"
"github.com/rs/zerolog/log"
) )
var base64DownloadClient http.Client = http.Client{ var base64DownloadClient http.Client = http.Client{
Timeout: 30 * time.Second, Timeout: 30 * time.Second,
} }
var dataURIPattern = regexp.MustCompile(`^data:([^;]+);base64,`)
// GetContentURIAsBase64 checks if the string is an URL, if it's an URL downloads the content in memory encodes it in base64 and returns the base64 string, otherwise returns the string by stripping base64 data headers // GetContentURIAsBase64 checks if the string is an URL, if it's an URL downloads the content in memory encodes it in base64 and returns the base64 string, otherwise returns the string by stripping base64 data headers
func GetContentURIAsBase64(s string) (string, error) { func GetContentURIAsBase64(s string) (string, error) {
if strings.HasPrefix(s, "http") { if strings.HasPrefix(s, "http") {
@ -36,12 +41,11 @@ func GetContentURIAsBase64(s string) (string, error) {
return encoded, nil return encoded, nil
} }
// if the string instead is prefixed with "data:image/...;base64,", drop it // Match any data URI prefix pattern
dropPrefix := []string{"data:image/jpeg;base64,", "data:image/png;base64,"} if match := dataURIPattern.FindString(s); match != "" {
for _, prefix := range dropPrefix { log.Debug().Msgf("Found data URI prefix: %s", match)
if strings.HasPrefix(s, prefix) { return strings.Replace(s, match, "", 1), nil
return strings.ReplaceAll(s, prefix, ""), nil
}
} }
return "", fmt.Errorf("not valid string")
return "", fmt.Errorf("not valid base64 data type string")
} }