chore(backend gallery): re-order and add description for vLLM

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-06-17 17:30:51 +02:00
parent d68660bd5a
commit 6270d01171

View file

@ -1,143 +1,87 @@
## vLLM
- &vllm
name: "cuda11-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-vllm"
license: apache-2.0
urls:
- https://github.com/vllm-project/vllm
tags:
- text-to-text
- multimodal
- GPTQ
- AWQ
- AutoRound
- INT4
- INT8
- FP8
icon: https://raw.githubusercontent.com/vllm-project/vllm/main/docs/assets/logos/vllm-logo-text-dark.png
description: |
vLLM is a fast and easy-to-use library for LLM inference and serving.
Originally developed in the Sky Computing Lab at UC Berkeley, vLLM has evolved into a community-driven project with contributions from both academia and industry.
vLLM is fast with:
State-of-the-art serving throughput
Efficient management of attention key and value memory with PagedAttention
Continuous batching of incoming requests
Fast model execution with CUDA/HIP graph
Quantizations: GPTQ, AWQ, AutoRound, INT4, INT8, and FP8
Optimized CUDA kernels, including integration with FlashAttention and FlashInfer
Speculative decoding
Chunked prefill
alias: "vllm"
- !!merge <<: *vllm
name: "cuda12-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-vllm"
- !!merge <<: *vllm
name: "rocm-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-vllm"
- !!merge <<: *vllm
name: "intel-sycl-f32-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-vllm"
- !!merge <<: *vllm
name: "intel-sycl-f16-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-vllm"
- !!merge <<: *vllm
name: "cuda11-vllm-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-vllm"
- !!merge <<: *vllm
name: "cuda12-vllm-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-vllm"
- !!merge <<: *vllm
name: "rocm-vllm-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-vllm"
- !!merge <<: *vllm
name: "intel-sycl-f32-vllm-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-vllm"
- !!merge <<: *vllm
name: "intel-sycl-f16-vllm-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm"
## Rerankers
- name: "cuda11-rerankers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-rerankers"
alias: "cuda11-rerankers"
- name: "cuda11-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-vllm"
alias: "cuda11-vllm"
- name: "cuda11-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-transformers"
alias: "cuda11-transformers"
- name: "cuda11-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-diffusers"
alias: "cuda11-diffusers"
- name: "cuda11-exllama2"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-exllama2"
alias: "cuda11-exllama2"
- name: "cuda12-rerankers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rerankers"
alias: "cuda12-rerankers"
- name: "cuda12-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-vllm"
alias: "cuda12-vllm"
- name: "cuda12-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers"
alias: "cuda12-transformers"
- name: "cuda12-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers"
alias: "cuda12-diffusers"
- name: "cuda12-exllama2"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-exllama2"
alias: "cuda12-exllama2"
- name: "rocm-rerankers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-rerankers"
alias: "rocm-rerankers"
- name: "rocm-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-vllm"
alias: "rocm-vllm"
- name: "rocm-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-transformers"
alias: "rocm-transformers"
- name: "rocm-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-diffusers"
alias: "rocm-diffusers"
- name: "intel-sycl-f32-rerankers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-rerankers"
alias: "intel-sycl-f32-rerankers"
- name: "intel-sycl-f16-rerankers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-rerankers"
alias: "intel-sycl-f16-rerankers"
- name: "intel-sycl-f32-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-vllm"
alias: "intel-sycl-f32-vllm"
- name: "intel-sycl-f16-vllm"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-vllm"
alias: "intel-sycl-f16-vllm"
- name: "intel-sycl-f32-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-transformers"
alias: "intel-sycl-f32-transformers"
- name: "intel-sycl-f16-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-transformers"
alias: "intel-sycl-f16-transformers"
- name: "intel-sycl-f32-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-diffusers"
alias: "intel-sycl-f32-diffusers"
- name: "rocm-rerankers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-rerankers"
alias: "rocm-rerankers"
- name: "cuda11-rerankers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-rerankers"
alias: "rerankers"
- name: "cuda11-vllm-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-vllm"
alias: "vllm"
- name: "cuda11-transformers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-transformers"
alias: "transformers"
- name: "cuda11-diffusers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-diffusers"
alias: "diffusers"
- name: "cuda11-exllama2-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-exllama2"
alias: "exllama2"
- name: "cuda12-rerankers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rerankers"
alias: "rerankers"
- name: "cuda12-vllm-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-vllm"
alias: "vllm"
- name: "cuda12-transformers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-transformers"
alias: "transformers"
- name: "cuda12-diffusers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-diffusers"
alias: "diffusers"
- name: "cuda12-exllama2-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-exllama2"
alias: "exllama2"
- name: "rocm-rerankers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-rerankers"
alias: "rerankers"
- name: "rocm-vllm-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-vllm"
alias: "vllm"
- name: "rocm-transformers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-transformers"
alias: "transformers"
- name: "rocm-diffusers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-diffusers"
alias: "diffusers"
- name: "intel-sycl-f32-rerankers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-rerankers"
alias: "rerankers"
@ -145,14 +89,38 @@
- name: "intel-sycl-f16-rerankers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-rerankers"
alias: "rerankers"
## Transformers
- name: "cuda12-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers"
alias: "cuda12-transformers"
- name: "rocm-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-transformers"
alias: "rocm-transformers"
- name: "intel-sycl-f32-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-transformers"
alias: "intel-sycl-f32-transformers"
- name: "intel-sycl-f16-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-transformers"
alias: "intel-sycl-f16-transformers"
- name: "cuda11-transformers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-transformers"
alias: "transformers"
- name: "cuda11-transformers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-transformers"
alias: "cuda11-transformers"
- name: "cuda12-transformers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-transformers"
alias: "transformers"
- name: "rocm-transformers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-transformers"
alias: "transformers"
- name: "intel-sycl-f32-vllm-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-vllm"
alias: "vllm"
- name: "intel-sycl-f16-vllm-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm"
alias: "vllm"
- name: "intel-sycl-f32-transformers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-transformers"
@ -161,11 +129,56 @@
- name: "intel-sycl-f16-transformers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-transformers"
alias: "transformers"
## Diffusers
- name: "cuda12-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers"
alias: "cuda12-diffusers"
- name: "rocm-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-diffusers"
alias: "rocm-diffusers"
- name: "cuda11-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-diffusers"
alias: "cuda11-diffusers"
- name: "intel-sycl-f32-diffusers"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-diffusers"
alias: "intel-sycl-f32-diffusers"
- name: "cuda11-diffusers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-diffusers"
alias: "diffusers"
- name: "cuda12-diffusers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-diffusers"
alias: "diffusers"
- name: "rocm-diffusers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-diffusers"
alias: "diffusers"
- name: "intel-sycl-f32-diffusers-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-diffusers"
alias: "diffusers"
## exllama2
- name: "cuda11-exllama2"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-exllama2"
alias: "cuda11-exllama2"
- name: "cuda12-exllama2"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-exllama2"
alias: "cuda12-exllama2"
- name: "cuda11-exllama2-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-exllama2"
alias: "exllama2"
- name: "cuda12-exllama2-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-exllama2"
alias: "exllama2"
## kokoro
- name: "cuda11-kokoro-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-kokoro"
alias: "kokoro"
@ -194,6 +207,7 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-kokoro"
alias: "kokoro"
## faster-whisper
- name: "cuda11-faster-whisper-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-faster-whisper"
alias: "faster-whisper"
@ -222,6 +236,8 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-faster-whisper"
alias: "faster-whisper"
## coqui
- name: "cuda11-coqui-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-coqui"
alias: "coqui"
@ -250,6 +266,7 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-coqui"
alias: "coqui"
## bark
- name: "cuda11-bark-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-bark"
alias: "bark"
@ -278,6 +295,8 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-bark"
alias: "bark"
## chatterbox
- name: "cuda11-chatterbox-master"
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-chatterbox"
alias: "chatterbox"