feat(ui): allow to upload PDF and text files, also add support to multiple input files (#5538)

* Support file inputs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat: support multiple files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* show preview of files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-05-31 08:47:48 +02:00 committed by GitHub
parent 1cc4525f15
commit 59db154cbc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 257 additions and 52 deletions

View file

@ -312,7 +312,7 @@ func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *sch
// Decode content as base64 either if it's an URL or base64 text
base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
if err != nil {
log.Error().Msgf("Failed encoding image: %s", err)
log.Error().Msgf("Failed encoding audio: %s", err)
continue CONTENT
}
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff

View file

@ -48,49 +48,133 @@ function submitSystemPrompt(event) {
document.getElementById("systemPrompt").blur();
}
var image = "";
var audio = "";
var images = [];
var audios = [];
var fileContents = [];
var currentFileNames = [];
async function extractTextFromPDF(pdfData) {
try {
const pdf = await pdfjsLib.getDocument({ data: pdfData }).promise;
let fullText = '';
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const textContent = await page.getTextContent();
const pageText = textContent.items.map(item => item.str).join(' ');
fullText += pageText + '\n';
}
return fullText;
} catch (error) {
console.error('Error extracting text from PDF:', error);
throw error;
}
}
function readInputFile() {
if (!this.files || !this.files.length) return;
Array.from(this.files).forEach(file => {
const FR = new FileReader();
currentFileNames.push(file.name);
const fileExtension = file.name.split('.').pop().toLowerCase();
FR.addEventListener("load", async function(evt) {
if (fileExtension === 'pdf') {
try {
const content = await extractTextFromPDF(evt.target.result);
fileContents.push({ name: file.name, content: content });
} catch (error) {
console.error('Error processing PDF:', error);
fileContents.push({ name: file.name, content: "Error processing PDF file" });
}
} else {
// For text and markdown files
fileContents.push({ name: file.name, content: evt.target.result });
}
});
if (fileExtension === 'pdf') {
FR.readAsArrayBuffer(file);
} else {
FR.readAsText(file);
}
});
}
function submitPrompt(event) {
event.preventDefault();
const input = document.getElementById("input").value;
Alpine.store("chat").add("user", input, image, audio);
let fullInput = input;
// If there are file contents, append them to the input for the LLM
if (fileContents.length > 0) {
fullInput += "\n\nFile contents:\n";
fileContents.forEach(file => {
fullInput += `\n--- ${file.name} ---\n${file.content}\n`;
});
}
// Show file icons in chat if there are files
let displayContent = input;
if (currentFileNames.length > 0) {
displayContent += "\n\n";
currentFileNames.forEach(fileName => {
displayContent += `<i class="fa-solid fa-file"></i> Attached file: ${fileName}\n`;
});
}
// Add the message to the chat UI with just the icons
Alpine.store("chat").add("user", displayContent, images, audios);
// Update the last message in the store with the full content
const history = Alpine.store("chat").history;
if (history.length > 0) {
history[history.length - 1].content = fullInput;
}
document.getElementById("input").value = "";
const systemPrompt = localStorage.getItem("system_prompt");
Alpine.nextTick(() => { document.getElementById('messages').scrollIntoView(false); });
promptGPT(systemPrompt, input);
promptGPT(systemPrompt, fullInput);
// Reset file contents and names after sending
fileContents = [];
currentFileNames = [];
}
function readInputImage() {
if (!this.files || !this.files[0]) return;
if (!this.files || !this.files.length) return;
const FR = new FileReader();
Array.from(this.files).forEach(file => {
const FR = new FileReader();
FR.addEventListener("load", function(evt) {
image = evt.target.result;
FR.addEventListener("load", function(evt) {
images.push(evt.target.result);
});
FR.readAsDataURL(file);
});
FR.readAsDataURL(this.files[0]);
}
function readInputAudio() {
if (!this.files || !this.files[0]) return;
if (!this.files || !this.files.length) return;
const FR = new FileReader();
Array.from(this.files).forEach(file => {
const FR = new FileReader();
FR.addEventListener("load", function(evt) {
audio = evt.target.result;
FR.addEventListener("load", function(evt) {
audios.push(evt.target.result);
});
FR.readAsDataURL(file);
});
FR.readAsDataURL(this.files[0]);
}
async function promptGPT(systemPrompt, input) {
const model = document.getElementById("chat-model").value;
// Set class "loader" to the element with "loader" id
//document.getElementById("loader").classList.add("loader");
// Make the "loader" visible
toggleLoader(true);
messages = Alpine.store("chat").messages();
@ -105,7 +189,7 @@ async function promptGPT(systemPrompt, input) {
// loop all messages, and check if there are images or audios. If there are, we need to change the content field
messages.forEach((message) => {
if (message.image || message.audio) {
if ((message.image && message.image.length > 0) || (message.audio && message.audio.length > 0)) {
// The content field now becomes an array
message.content = [
{
@ -114,37 +198,42 @@ async function promptGPT(systemPrompt, input) {
}
]
if (message.image) {
message.content.push(
{
"type": "image_url",
"image_url": {
"url": message.image,
if (message.image && message.image.length > 0) {
message.image.forEach(img => {
message.content.push(
{
"type": "image_url",
"image_url": {
"url": img,
}
}
}
);
);
});
delete message.image;
}
if (message.audio) {
message.content.push(
{
"type": "audio_url",
"audio_url": {
"url": message.audio,
if (message.audio && message.audio.length > 0) {
message.audio.forEach(aud => {
message.content.push(
{
"type": "audio_url",
"audio_url": {
"url": aud,
}
}
}
);
);
});
delete message.audio;
}
}
});
// reset the form and the files
image = "";
audio = "";
images = [];
audios = [];
document.getElementById("input_image").value = null;
document.getElementById("input_audio").value = null;
document.getElementById("input_file").value = null;
document.getElementById("fileName").innerHTML = "";
// Source: https://stackoverflow.com/a/75751803/11386095
@ -261,6 +350,7 @@ document.getElementById("prompt").addEventListener("submit", submitPrompt);
document.getElementById("input").focus();
document.getElementById("input_image").addEventListener("change", readInputImage);
document.getElementById("input_audio").addEventListener("change", readInputAudio);
document.getElementById("input_file").addEventListener("change", readInputFile);
storesystemPrompt = localStorage.getItem("system_prompt");
if (storesystemPrompt) {
@ -274,3 +364,67 @@ marked.setOptions({
return hljs.highlightAuto(code).value;
},
});
document.addEventListener("alpine:init", () => {
Alpine.store("chat", {
history: [],
languages: [undefined],
systemPrompt: "",
clear() {
this.history.length = 0;
},
add(role, content, image, audio) {
const N = this.history.length - 1;
if (this.history.length && this.history[N].role === role) {
this.history[N].content += content;
this.history[N].html = DOMPurify.sanitize(
marked.parse(this.history[N].content)
);
// Merge new images and audio with existing ones
if (image && image.length > 0) {
this.history[N].image = [...(this.history[N].image || []), ...image];
}
if (audio && audio.length > 0) {
this.history[N].audio = [...(this.history[N].audio || []), ...audio];
}
} else {
let c = "";
const lines = content.split("\n");
lines.forEach((line) => {
c += DOMPurify.sanitize(marked.parse(line));
});
this.history.push({
role,
content,
html: c,
image: image || [],
audio: audio || []
});
}
document.getElementById('messages').scrollIntoView(false);
const parser = new DOMParser();
const html = parser.parseFromString(
this.history[this.history.length - 1].html,
"text/html"
);
const code = html.querySelectorAll("pre code");
if (!code.length) return;
code.forEach((el) => {
const language = el.className.split("language-")[1];
if (this.languages.includes(language)) return;
const script = document.createElement("script");
script.src = `https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/languages/${language}.min.js`;
document.head.appendChild(script);
this.languages.push(language);
});
},
messages() {
return this.history.map((message) => ({
role: message.role,
content: message.content,
image: message.image,
audio: message.audio,
}));
},
});
});

View file

@ -29,6 +29,11 @@ SOFTWARE.
<html lang="en">
{{template "views/partials/head" .}}
<script defer src="static/chat.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
<script>
// Initialize PDF.js worker
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
</script>
{{ $allGalleryConfigs:=.GalleryConfig }}
{{ $model:=.Model}}
<body class="bg-slate-900 text-gray-100 flex flex-col h-screen" x-data="{ sidebarOpen: true }">
@ -215,11 +220,12 @@ SOFTWARE.
<!-- Chat messages area -->
<div class="flex-1 p-4 overflow-auto" id="chat" x-data="{history: $store.chat.history}">
<p id="usage" x-show="history.length === 0" class="text-gray-300">
Start chatting with the AI by typing a prompt in the input field below and pressing Enter.
For models that support images, you can upload an image by clicking the paperclip
<i class="fa-solid fa-paperclip"></i> icon.
For models that support audio, you can upload an audio file by clicking the microphone
<i class="fa-solid fa-microphone"></i> icon.
Start chatting with the AI by typing a prompt in the input field below and pressing Enter.<br>
<ul class="list-disc list-inside">
<li>For models that support images, you can upload an image by clicking the <i class="fa-solid fa-image"></i> icon.</li>
<li>For models that support audio, you can upload an audio file by clicking the <i class="fa-solid fa-microphone"></i> icon.</li>
<li>To send a text, markdown or PDF file, click the <i class="fa-solid fa-file"></i> icon.</li>
</ul>
</p>
<div id="messages" class="max-w-3xl mx-auto">
<template x-for="message in history">
@ -231,8 +237,22 @@ SOFTWARE.
<div class="flex flex-col flex-1 items-end">
<span class="text-xs font-semibold text-gray-400">You</span>
<div class="p-2 flex-1 rounded bg-gray-700 text-white" x-html="message.html"></div>
<template x-if="message.image">
<img :src="message.image" alt="Image" class="rounded-lg mt-2 max-w-xs">
<template x-if="message.image && message.image.length > 0">
<div class="mt-2 space-y-2">
<template x-for="(img, index) in message.image" :key="index">
<img :src="img" :alt="'Image ' + (index + 1)" class="rounded-lg max-w-xs">
</template>
</div>
</template>
<template x-if="message.audio && message.audio.length > 0">
<div class="mt-2 space-y-2">
<template x-for="(audio, index) in message.audio" :key="index">
<audio controls class="w-full">
<source :src="audio" type="audio/*">
Your browser does not support the audio element.
</audio>
</template>
</div>
</template>
</div>
</div>
@ -250,8 +270,22 @@ SOFTWARE.
<i class="fa-solid fa-copy"></i>
</button>
</div>
<template x-if="message.image">
<img :src="message.image" alt="Image" class="rounded-lg mt-2 max-w-xs">
<template x-if="message.image && message.image.length > 0">
<div class="mt-2 space-y-2">
<template x-for="(img, index) in message.image" :key="index">
<img :src="img" :alt="'Image ' + (index + 1)" class="rounded-lg max-w-xs">
</template>
</div>
</template>
<template x-if="message.audio && message.audio.length > 0">
<div class="mt-2 space-y-2">
<template x-for="(audio, index) in message.audio" :key="index">
<audio controls class="w-full">
<source :src="audio" type="audio/*">
Your browser does not support the audio element.
</audio>
</template>
</div>
</template>
</div>
</div>
@ -289,8 +323,8 @@ SOFTWARE.
<button
type="button"
onclick="document.getElementById('input_image').click()"
class="fa-solid fa-paperclip text-gray-400 absolute right-12 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
title="Attach an image"
class="fa-solid fa-image text-gray-400 absolute right-12 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
title="Attach images"
></button>
<button
type="button"
@ -298,6 +332,12 @@ SOFTWARE.
class="fa-solid fa-microphone text-gray-400 absolute right-20 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
title="Attach an audio file"
></button>
<button
type="button"
onclick="document.getElementById('input_file').click()"
class="fa-solid fa-file text-gray-400 absolute right-28 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
title="Upload text, markdown or PDF file"
></button>
<!-- Send button and loader in the same position -->
<div class="absolute right-3 top-4">
@ -325,15 +365,26 @@ SOFTWARE.
<input
id="input_image"
type="file"
multiple
accept="image/*"
style="display: none;"
@change="fileName = $event.target.files[0].name"
@change="fileName = $event.target.files.length + ' image(s) selected'"
/>
<input
id="input_audio"
type="file"
multiple
accept="audio/*"
style="display: none;"
@change="fileName = $event.target.files[0].name"
@change="fileName = $event.target.files.length + ' audio file(s) selected'"
/>
<input
id="input_file"
type="file"
multiple
accept=".txt,.md,.pdf"
style="display: none;"
@change="fileName = $event.target.files.length + ' file(s) selected'"
/>
</div>
</form>