feat(ui): allow to upload PDF and text files, also add support to multiple input files (#5538)

* Support file inputs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: support multiple files Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * show preview of files Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-02 00:44:59 +00:00 · 2025-05-31 08:47:48 +02:00 · 2025-05-31 08:47:48 +02:00 · 59db154cbc
commit 59db154cbc
parent 1cc4525f15
3 changed files with 257 additions and 52 deletions
--- a/core/http/middleware/request.go
+++ b/core/http/middleware/request.go
@ -312,7 +312,7 @@ func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *sch
 					// Decode content as base64 either if it's an URL or base64 text
 					base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
 					if err != nil {
-						log.Error().Msgf("Failed encoding image: %s", err)
+						log.Error().Msgf("Failed encoding audio: %s", err)
 						continue CONTENT
 					}
 					input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
@ -48,49 +48,133 @@ function submitSystemPrompt(event) {
  document.getElementById("systemPrompt").blur();
 }

-var image = "";
-var audio = "";
+var images = [];
+var audios = [];
+var fileContents = [];
+var currentFileNames = [];
+
+async function extractTextFromPDF(pdfData) {
+  try {
+    const pdf = await pdfjsLib.getDocument({ data: pdfData }).promise;
+    let fullText = '';
+    
+    for (let i = 1; i <= pdf.numPages; i++) {
+      const page = await pdf.getPage(i);
+      const textContent = await page.getTextContent();
+      const pageText = textContent.items.map(item => item.str).join(' ');
+      fullText += pageText + '\n';
+    }
+    
+    return fullText;
+  } catch (error) {
+    console.error('Error extracting text from PDF:', error);
+    throw error;
+  }
+}
+
+function readInputFile() {
+  if (!this.files || !this.files.length) return;
+
+  Array.from(this.files).forEach(file => {
+    const FR = new FileReader();
+    currentFileNames.push(file.name);
+    const fileExtension = file.name.split('.').pop().toLowerCase();
+    
+    FR.addEventListener("load", async function(evt) {
+      if (fileExtension === 'pdf') {
+        try {
+          const content = await extractTextFromPDF(evt.target.result);
+          fileContents.push({ name: file.name, content: content });
+        } catch (error) {
+          console.error('Error processing PDF:', error);
+          fileContents.push({ name: file.name, content: "Error processing PDF file" });
+        }
+      } else {
+        // For text and markdown files
+        fileContents.push({ name: file.name, content: evt.target.result });
+      }
+    });
+
+    if (fileExtension === 'pdf') {
+      FR.readAsArrayBuffer(file);
+    } else {
+      FR.readAsText(file);
+    }
+  });
+}

 function submitPrompt(event) {
  event.preventDefault();

  const input = document.getElementById("input").value;
-  Alpine.store("chat").add("user", input, image, audio);
+  let fullInput = input;
+  
+  // If there are file contents, append them to the input for the LLM
+  if (fileContents.length > 0) {
+    fullInput += "\n\nFile contents:\n";
+    fileContents.forEach(file => {
+      fullInput += `\n--- ${file.name} ---\n${file.content}\n`;
+    });
+  }
+  
+  // Show file icons in chat if there are files
+  let displayContent = input;
+  if (currentFileNames.length > 0) {
+    displayContent += "\n\n";
+    currentFileNames.forEach(fileName => {
+      displayContent += `<i class="fa-solid fa-file"></i> Attached file: ${fileName}\n`;
+    });
+  }
+  
+  // Add the message to the chat UI with just the icons
+  Alpine.store("chat").add("user", displayContent, images, audios);
+  
+  // Update the last message in the store with the full content
+  const history = Alpine.store("chat").history;
+  if (history.length > 0) {
+    history[history.length - 1].content = fullInput;
+  }
+  
  document.getElementById("input").value = "";
  const systemPrompt = localStorage.getItem("system_prompt");
  Alpine.nextTick(() => { document.getElementById('messages').scrollIntoView(false); });
-  promptGPT(systemPrompt, input);
+  promptGPT(systemPrompt, fullInput);
+  
+  // Reset file contents and names after sending
+  fileContents = [];
+  currentFileNames = [];
 }

 function readInputImage() {
-  if (!this.files || !this.files[0]) return;
+  if (!this.files || !this.files.length) return;

-  const FR = new FileReader();
+  Array.from(this.files).forEach(file => {
+    const FR = new FileReader();

-  FR.addEventListener("load", function(evt) {
-    image = evt.target.result;
+    FR.addEventListener("load", function(evt) {
+      images.push(evt.target.result);
+    });
+
+    FR.readAsDataURL(file);
  });
-
-  FR.readAsDataURL(this.files[0]);
 }

 function readInputAudio() {
-  if (!this.files || !this.files[0]) return;
+  if (!this.files || !this.files.length) return;

-  const FR = new FileReader();
+  Array.from(this.files).forEach(file => {
+    const FR = new FileReader();

-  FR.addEventListener("load", function(evt) {
-    audio = evt.target.result;
+    FR.addEventListener("load", function(evt) {
+      audios.push(evt.target.result);
+    });
+
+    FR.readAsDataURL(file);
  });
-
-  FR.readAsDataURL(this.files[0]);
 }

 async function promptGPT(systemPrompt, input) {
  const model = document.getElementById("chat-model").value;
-  // Set class "loader" to the element with "loader" id
-  //document.getElementById("loader").classList.add("loader");
-  // Make the "loader" visible
  toggleLoader(true);

  messages = Alpine.store("chat").messages();
@ -105,7 +189,7 @@ async function promptGPT(systemPrompt, input) {

  // loop all messages, and check if there are images or audios. If there are, we need to change the content field
  messages.forEach((message) => {
-    if (message.image || message.audio) {
+    if ((message.image && message.image.length > 0) || (message.audio && message.audio.length > 0)) {
      // The content field now becomes an array
      message.content = [
        {
@ -114,37 +198,42 @@ async function promptGPT(systemPrompt, input) {
        }
      ]
      
-      if (message.image) {
-        message.content.push(
-          {
-            "type": "image_url",
-            "image_url": {
-              "url": message.image,
+      if (message.image && message.image.length > 0) {
+        message.image.forEach(img => {
+          message.content.push(
+            {
+              "type": "image_url",
+              "image_url": {
+                "url": img,
+              }
            }
-          }
-        );
+          );
+        });
        delete message.image;
      }

-      if (message.audio) {
-        message.content.push(
-          {
-            "type": "audio_url",
-            "audio_url": {
-              "url": message.audio,
+      if (message.audio && message.audio.length > 0) {
+        message.audio.forEach(aud => {
+          message.content.push(
+            {
+              "type": "audio_url",
+              "audio_url": {
+                "url": aud,
+              }
            }
-          }
-        );
+          );
+        });
        delete message.audio;
      }
    }
  });

  // reset the form and the files
-  image = "";
-  audio = "";
+  images = [];
+  audios = [];
  document.getElementById("input_image").value = null;
  document.getElementById("input_audio").value = null;
+  document.getElementById("input_file").value = null;
  document.getElementById("fileName").innerHTML = "";

  // Source: https://stackoverflow.com/a/75751803/11386095
@ -261,6 +350,7 @@ document.getElementById("prompt").addEventListener("submit", submitPrompt);
 document.getElementById("input").focus();
 document.getElementById("input_image").addEventListener("change", readInputImage);
 document.getElementById("input_audio").addEventListener("change", readInputAudio);
+document.getElementById("input_file").addEventListener("change", readInputFile);

 storesystemPrompt = localStorage.getItem("system_prompt");
 if (storesystemPrompt) {
@ -274,3 +364,67 @@ marked.setOptions({
    return hljs.highlightAuto(code).value;
  },
 });
+
+document.addEventListener("alpine:init", () => {
+  Alpine.store("chat", {
+    history: [],
+    languages: [undefined],
+    systemPrompt: "",
+    clear() {
+      this.history.length = 0;
+    },
+    add(role, content, image, audio) {
+      const N = this.history.length - 1;
+      if (this.history.length && this.history[N].role === role) {
+        this.history[N].content += content;
+        this.history[N].html = DOMPurify.sanitize(
+          marked.parse(this.history[N].content)
+        );
+        // Merge new images and audio with existing ones
+        if (image && image.length > 0) {
+          this.history[N].image = [...(this.history[N].image || []), ...image];
+        }
+        if (audio && audio.length > 0) {
+          this.history[N].audio = [...(this.history[N].audio || []), ...audio];
+        }
+      } else {
+        let c = "";
+        const lines = content.split("\n");
+        lines.forEach((line) => {
+          c += DOMPurify.sanitize(marked.parse(line));
+        });
+        this.history.push({ 
+          role, 
+          content, 
+          html: c, 
+          image: image || [], 
+          audio: audio || [] 
+        });
+      }
+      document.getElementById('messages').scrollIntoView(false);
+      const parser = new DOMParser();
+      const html = parser.parseFromString(
+        this.history[this.history.length - 1].html,
+        "text/html"
+      );
+      const code = html.querySelectorAll("pre code");
+      if (!code.length) return;
+      code.forEach((el) => {
+        const language = el.className.split("language-")[1];
+        if (this.languages.includes(language)) return;
+        const script = document.createElement("script");
+        script.src = `https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/languages/${language}.min.js`;
+        document.head.appendChild(script);
+        this.languages.push(language);
+      });
+    },
+    messages() {
+      return this.history.map((message) => ({
+        role: message.role,
+        content: message.content,
+        image: message.image,
+        audio: message.audio,
+      }));
+    },
+  });
+});
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@ -29,6 +29,11 @@ SOFTWARE.
 <html lang="en">
  {{template "views/partials/head" .}}
  <script defer src="static/chat.js"></script>
+  <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
+  <script>
+    // Initialize PDF.js worker
+    pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
+  </script>
  {{ $allGalleryConfigs:=.GalleryConfig }}
  {{ $model:=.Model}}
  <body class="bg-slate-900 text-gray-100 flex flex-col h-screen" x-data="{ sidebarOpen: true }">
@ -215,11 +220,12 @@ SOFTWARE.
        <!-- Chat messages area -->
        <div class="flex-1 p-4 overflow-auto" id="chat" x-data="{history: $store.chat.history}">
          <p id="usage" x-show="history.length === 0" class="text-gray-300">
-            Start chatting with the AI by typing a prompt in the input field below and pressing Enter.
-            For models that support images, you can upload an image by clicking the paperclip
-            <i class="fa-solid fa-paperclip"></i> icon.
-            For models that support audio, you can upload an audio file by clicking the microphone
-            <i class="fa-solid fa-microphone"></i> icon.
+            Start chatting with the AI by typing a prompt in the input field below and pressing Enter.<br>
+            <ul class="list-disc list-inside">
+              <li>For models that support images, you can upload an image by clicking the <i class="fa-solid fa-image"></i> icon.</li>
+              <li>For models that support audio, you can upload an audio file by clicking the <i class="fa-solid fa-microphone"></i> icon.</li>
+              <li>To send a text, markdown or PDF file, click the <i class="fa-solid fa-file"></i> icon.</li>
+            </ul>
          </p>
          <div id="messages" class="max-w-3xl mx-auto">
            <template x-for="message in history">
@ -231,8 +237,22 @@ SOFTWARE.
                    <div class="flex flex-col flex-1 items-end">
                      <span class="text-xs font-semibold text-gray-400">You</span>
                      <div class="p-2 flex-1 rounded bg-gray-700 text-white" x-html="message.html"></div>
-                      <template x-if="message.image">
-                        <img :src="message.image" alt="Image" class="rounded-lg mt-2 max-w-xs">
+                      <template x-if="message.image && message.image.length > 0">
+                        <div class="mt-2 space-y-2">
+                          <template x-for="(img, index) in message.image" :key="index">
+                            <img :src="img" :alt="'Image ' + (index + 1)" class="rounded-lg max-w-xs">
+                          </template>
+                        </div>
+                      </template>
+                      <template x-if="message.audio && message.audio.length > 0">
+                        <div class="mt-2 space-y-2">
+                          <template x-for="(audio, index) in message.audio" :key="index">
+                            <audio controls class="w-full">
+                              <source :src="audio" type="audio/*">
+                              Your browser does not support the audio element.
+                            </audio>
+                          </template>
+                        </div>
                      </template>
                    </div>
                  </div>
@ -250,8 +270,22 @@ SOFTWARE.
                          <i class="fa-solid fa-copy"></i>
                        </button>
                      </div>
-                      <template x-if="message.image">
-                        <img :src="message.image" alt="Image" class="rounded-lg mt-2 max-w-xs">
+                      <template x-if="message.image && message.image.length > 0">
+                        <div class="mt-2 space-y-2">
+                          <template x-for="(img, index) in message.image" :key="index">
+                            <img :src="img" :alt="'Image ' + (index + 1)" class="rounded-lg max-w-xs">
+                          </template>
+                        </div>
+                      </template>
+                      <template x-if="message.audio && message.audio.length > 0">
+                        <div class="mt-2 space-y-2">
+                          <template x-for="(audio, index) in message.audio" :key="index">
+                            <audio controls class="w-full">
+                              <source :src="audio" type="audio/*">
+                              Your browser does not support the audio element.
+                            </audio>
+                          </template>
+                        </div>
                      </template>
                    </div>
                  </div>
@ -289,8 +323,8 @@ SOFTWARE.
                <button
                  type="button"
                  onclick="document.getElementById('input_image').click()"
-                  class="fa-solid fa-paperclip text-gray-400 absolute right-12 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
-                  title="Attach an image"
+                  class="fa-solid fa-image text-gray-400 absolute right-12 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
+                  title="Attach images"
                ></button>
                <button
                  type="button"
@ -298,6 +332,12 @@ SOFTWARE.
                  class="fa-solid fa-microphone text-gray-400 absolute right-20 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
                  title="Attach an audio file"
                ></button>
+                <button
+                  type="button"
+                  onclick="document.getElementById('input_file').click()"
+                  class="fa-solid fa-file text-gray-400 absolute right-28 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
+                  title="Upload text, markdown or PDF file"
+                ></button>
                
                <!-- Send button and loader in the same position -->
                <div class="absolute right-3 top-4">
@ -325,15 +365,26 @@ SOFTWARE.
            <input
              id="input_image"
              type="file"
+              multiple
+              accept="image/*"
              style="display: none;"
-              @change="fileName = $event.target.files[0].name"
+              @change="fileName = $event.target.files.length + ' image(s) selected'"
            />
            <input
              id="input_audio"
              type="file"
+              multiple
              accept="audio/*"
              style="display: none;"
-              @change="fileName = $event.target.files[0].name"
+              @change="fileName = $event.target.files.length + ' audio file(s) selected'"
+            />
+            <input
+              id="input_file"
+              type="file"
+              multiple
+              accept=".txt,.md,.pdf"
+              style="display: none;"
+              @change="fileName = $event.target.files.length + ' file(s) selected'"
            />
          </div>
          </form>