feat(ui): add audio upload button in chat view (#5526)
Some checks are pending
Explorer deployment / build-linux (push) Waiting to run
GPU tests / ubuntu-latest (1.21.x) (push) Waiting to run
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Waiting to run
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, extras, latest-gpu-hipblas-extras, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -hipblas-extras) (push) Waiting to run
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, latest-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, extras, latest-gpu-intel-f16-extras, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-… (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, extras, latest-gpu-intel-f32-extras, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-… (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, extras, latest-gpu-nvidia-cuda-11-extras, latest-aio-gpu-nvidia-cuda-11, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda11-extras) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, extras, latest-gpu-nvidia-cuda-12-extras, latest-aio-gpu-nvidia-cuda-12, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda12-extras) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, latest-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, latest-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32) (push) Waiting to run
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, ) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, true, core, latest-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, true, core, latest-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, vulkan, true, core, latest-gpu-vulkan, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan) (push) Waiting to run
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64) (push) Waiting to run
Security Scan / tests (push) Waiting to run
Tests extras backends / tests-transformers (push) Waiting to run
Tests extras backends / tests-rerankers (push) Waiting to run
Tests extras backends / tests-diffusers (push) Waiting to run
Tests extras backends / tests-coqui (push) Waiting to run
tests / tests-linux (1.21.x) (push) Waiting to run
tests / tests-aio-container (push) Waiting to run
tests / tests-apple (1.21.x) (push) Waiting to run

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-05-30 16:47:31 +02:00 committed by GitHub
parent d5c9c717b5
commit 45c58752e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 201 additions and 180 deletions

View file

@ -49,12 +49,13 @@ function submitSystemPrompt(event) {
}
var image = "";
var audio = "";
function submitPrompt(event) {
event.preventDefault();
const input = document.getElementById("input").value;
Alpine.store("chat").add("user", input, image);
Alpine.store("chat").add("user", input, image, audio);
document.getElementById("input").value = "";
const systemPrompt = localStorage.getItem("system_prompt");
Alpine.nextTick(() => { document.getElementById('messages').scrollIntoView(false); });
@ -62,7 +63,6 @@ function submitPrompt(event) {
}
function readInputImage() {
if (!this.files || !this.files[0]) return;
const FR = new FileReader();
@ -74,35 +74,47 @@ function readInputImage() {
FR.readAsDataURL(this.files[0]);
}
function readInputAudio() {
if (!this.files || !this.files[0]) return;
async function promptGPT(systemPrompt, input) {
const model = document.getElementById("chat-model").value;
// Set class "loader" to the element with "loader" id
//document.getElementById("loader").classList.add("loader");
// Make the "loader" visible
toggleLoader(true);
const FR = new FileReader();
FR.addEventListener("load", function(evt) {
audio = evt.target.result;
});
messages = Alpine.store("chat").messages();
FR.readAsDataURL(this.files[0]);
}
// if systemPrompt isn't empty, push it at the start of messages
if (systemPrompt) {
messages.unshift({
role: "system",
content: systemPrompt
});
}
async function promptGPT(systemPrompt, input) {
const model = document.getElementById("chat-model").value;
// Set class "loader" to the element with "loader" id
//document.getElementById("loader").classList.add("loader");
// Make the "loader" visible
toggleLoader(true);
// loop all messages, and check if there are images. If there are, we need to change the content field
messages.forEach((message) => {
messages = Alpine.store("chat").messages();
// if systemPrompt isn't empty, push it at the start of messages
if (systemPrompt) {
messages.unshift({
role: "system",
content: systemPrompt
});
}
// loop all messages, and check if there are images or audios. If there are, we need to change the content field
messages.forEach((message) => {
if (message.image || message.audio) {
// The content field now becomes an array
message.content = [
{
"type": "text",
"text": message.content
}
]
if (message.image) {
// The content field now becomes an array
message.content = [
{
"type": "text",
"text": message.content
}
]
message.content.push(
{
"type": "image_url",
@ -111,168 +123,154 @@ function readInputImage() {
}
}
);
// remove the image field
delete message.image;
}
});
// reset the form and the image
image = "";
document.getElementById("input_image").value = null;
document.getElementById("fileName").innerHTML = "";
// if (image) {
// // take the last element content's and add the image
// last_message = messages[messages.length - 1]
// // The content field now becomes an array
// last_message.content = [
// {
// "type": "text",
// "text": last_message.content
// }
// ]
// last_message.content.push(
// {
// "type": "image_url",
// "image_url": {
// "url": image,
// }
// }
// );
// // and we replace it in the messages array
// messages[messages.length - 1] = last_message
// // reset the form and the image
// image = "";
// document.getElementById("input_image").value = null;
// document.getElementById("fileName").innerHTML = "";
// }
// Source: https://stackoverflow.com/a/75751803/11386095
const response = await fetch("v1/chat/completions", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
model: model,
messages: messages,
stream: true,
}),
});
if (!response.ok) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: POST /v1/chat/completions ${response.status}</span>`,
);
return;
}
const reader = response.body
?.pipeThrough(new TextDecoderStream())
.getReader();
if (!reader) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to decode API response</span>`,
);
return;
}
// Function to add content to the chat and handle DOM updates efficiently
const addToChat = (token) => {
const chatStore = Alpine.store("chat");
chatStore.add("assistant", token);
// Efficiently scroll into view without triggering multiple reflows
// const messages = document.getElementById('messages');
// messages.scrollTop = messages.scrollHeight;
};
let buffer = "";
let contentBuffer = [];
try {
while (true) {
const { value, done } = await reader.read();
if (done) break;
buffer += value;
let lines = buffer.split("\n");
buffer = lines.pop(); // Retain any incomplete line in the buffer
lines.forEach((line) => {
if (line.length === 0 || line.startsWith(":")) return;
if (line === "data: [DONE]") {
return;
}
if (line.startsWith("data: ")) {
try {
const jsonData = JSON.parse(line.substring(6));
const token = jsonData.choices[0].delta.content;
if (token) {
contentBuffer.push(token);
}
} catch (error) {
console.error("Failed to parse line:", line, error);
if (message.audio) {
message.content.push(
{
"type": "audio_url",
"audio_url": {
"url": message.audio,
}
}
});
// Efficiently update the chat in batch
if (contentBuffer.length > 0) {
addToChat(contentBuffer.join(""));
contentBuffer = [];
}
);
delete message.audio;
}
}
});
// Final content flush if any data remains
// reset the form and the files
image = "";
audio = "";
document.getElementById("input_image").value = null;
document.getElementById("input_audio").value = null;
document.getElementById("fileName").innerHTML = "";
// Source: https://stackoverflow.com/a/75751803/11386095
const response = await fetch("v1/chat/completions", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
model: model,
messages: messages,
stream: true,
}),
});
if (!response.ok) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: POST /v1/chat/completions ${response.status}</span>`,
);
return;
}
const reader = response.body
?.pipeThrough(new TextDecoderStream())
.getReader();
if (!reader) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to decode API response</span>`,
);
return;
}
// Function to add content to the chat and handle DOM updates efficiently
const addToChat = (token) => {
const chatStore = Alpine.store("chat");
chatStore.add("assistant", token);
// Efficiently scroll into view without triggering multiple reflows
// const messages = document.getElementById('messages');
// messages.scrollTop = messages.scrollHeight;
};
let buffer = "";
let contentBuffer = [];
try {
while (true) {
const { value, done } = await reader.read();
if (done) break;
buffer += value;
let lines = buffer.split("\n");
buffer = lines.pop(); // Retain any incomplete line in the buffer
lines.forEach((line) => {
if (line.length === 0 || line.startsWith(":")) return;
if (line === "data: [DONE]") {
return;
}
if (line.startsWith("data: ")) {
try {
const jsonData = JSON.parse(line.substring(6));
const token = jsonData.choices[0].delta.content;
if (token) {
contentBuffer.push(token);
}
} catch (error) {
console.error("Failed to parse line:", line, error);
}
}
});
// Efficiently update the chat in batch
if (contentBuffer.length > 0) {
addToChat(contentBuffer.join(""));
contentBuffer = [];
}
// Highlight all code blocks once at the end
hljs.highlightAll();
} catch (error) {
console.error("An error occurred while reading the stream:", error);
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to process stream</span>`,
);
} finally {
// Perform any cleanup if necessary
reader.releaseLock();
}
// Remove class "loader" from the element with "loader" id
toggleLoader(false);
// Final content flush if any data remains
if (contentBuffer.length > 0) {
addToChat(contentBuffer.join(""));
}
// scroll to the bottom of the chat
document.getElementById('messages').scrollIntoView(false)
// set focus to the input
document.getElementById("input").focus();
// Highlight all code blocks once at the end
hljs.highlightAll();
} catch (error) {
console.error("An error occurred while reading the stream:", error);
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to process stream</span>`,
);
} finally {
// Perform any cleanup if necessary
reader.releaseLock();
}
document.getElementById("system_prompt").addEventListener("submit", submitSystemPrompt);
// Remove class "loader" from the element with "loader" id
toggleLoader(false);
document.getElementById("prompt").addEventListener("submit", submitPrompt);
// scroll to the bottom of the chat
document.getElementById('messages').scrollIntoView(false)
// set focus to the input
document.getElementById("input").focus();
document.getElementById("input_image").addEventListener("change", readInputImage);
}
storesystemPrompt = localStorage.getItem("system_prompt");
if (storesystemPrompt) {
document.getElementById("systemPrompt").value = storesystemPrompt;
} else {
document.getElementById("systemPrompt").value = null;
}
document.getElementById("system_prompt").addEventListener("submit", submitSystemPrompt);
document.getElementById("prompt").addEventListener("submit", submitPrompt);
document.getElementById("input").focus();
document.getElementById("input_image").addEventListener("change", readInputImage);
document.getElementById("input_audio").addEventListener("change", readInputAudio);
marked.setOptions({
highlight: function (code) {
return hljs.highlightAuto(code).value;
},
});
storesystemPrompt = localStorage.getItem("system_prompt");
if (storesystemPrompt) {
document.getElementById("systemPrompt").value = storesystemPrompt;
} else {
document.getElementById("systemPrompt").value = null;
}
marked.setOptions({
highlight: function (code) {
return hljs.highlightAuto(code).value;
},
});

View file

@ -218,6 +218,8 @@ SOFTWARE.
Start chatting with the AI by typing a prompt in the input field below and pressing Enter.
For models that support images, you can upload an image by clicking the paperclip
<i class="fa-solid fa-paperclip"></i> icon.
For models that support audio, you can upload an audio file by clicking the microphone
<i class="fa-solid fa-microphone"></i> icon.
</p>
<div id="messages" class="max-w-3xl mx-auto">
<template x-for="message in history">
@ -290,6 +292,12 @@ SOFTWARE.
class="fa-solid fa-paperclip text-gray-400 absolute right-12 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
title="Attach an image"
></button>
<button
type="button"
onclick="document.getElementById('input_audio').click()"
class="fa-solid fa-microphone text-gray-400 absolute right-20 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
title="Attach an audio file"
></button>
<!-- Send button and loader in the same position -->
<div class="absolute right-3 top-4">
@ -320,6 +328,13 @@ SOFTWARE.
style="display: none;"
@change="fileName = $event.target.files[0].name"
/>
<input
id="input_audio"
type="file"
accept="audio/*"
style="display: none;"
@change="fileName = $event.target.files[0].name"
/>
</div>
</form>
</div>
@ -381,7 +396,7 @@ SOFTWARE.
clear() {
this.history.length = 0;
},
add(role, content, image) {
add(role, content, image, audio) {
const N = this.history.length - 1;
if (this.history.length && this.history[N].role === role) {
this.history[N].content += content;
@ -394,7 +409,7 @@ SOFTWARE.
lines.forEach((line) => {
c += DOMPurify.sanitize(marked.parse(line));
});
this.history.push({ role, content, html: c, image });
this.history.push({ role, content, html: c, image, audio });
}
document.getElementById('messages').scrollIntoView(false);
const parser = new DOMParser();
@ -418,6 +433,7 @@ SOFTWARE.
role: message.role,
content: message.content,
image: message.image,
audio: message.audio,
}));
},
});