feat: Add UseFastTokenizer

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-20 10:35:01 +00:00 · 2023-08-08 01:10:05 +02:00 · 2023-08-08 01:10:05 +02:00 · 3c8fc37c56
commit 3c8fc37c56
parent 39805b09e5
10 changed files with 198 additions and 169 deletions
--- a/api/backend/options.go
+++ b/api/backend/options.go
@ -15,26 +15,27 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
 		b = c.Batch
 	}
 	return &pb.ModelOptions{
-		ContextSize:   int32(c.ContextSize),
-		Seed:          int32(c.Seed),
-		NBatch:        int32(b),
-		NGQA:          c.NGQA,
-		ModelBaseName: c.ModelBaseName,
-		Device:        c.Device,
-		UseTriton:     c.Triton,
-		RMSNormEps:    c.RMSNormEps,
-		F16Memory:     c.F16,
-		MLock:         c.MMlock,
-		RopeFreqBase:  c.RopeFreqBase,
-		RopeFreqScale: c.RopeFreqScale,
-		NUMA:          c.NUMA,
-		Embeddings:    c.Embeddings,
-		LowVRAM:       c.LowVRAM,
-		NGPULayers:    int32(c.NGPULayers),
-		MMap:          c.MMap,
-		MainGPU:       c.MainGPU,
-		Threads:       int32(c.Threads),
-		TensorSplit:   c.TensorSplit,
+		ContextSize:      int32(c.ContextSize),
+		Seed:             int32(c.Seed),
+		NBatch:           int32(b),
+		NGQA:             c.NGQA,
+		ModelBaseName:    c.ModelBaseName,
+		UseFastTokenizer: c.UseFastTokenizer,
+		Device:           c.Device,
+		UseTriton:        c.Triton,
+		RMSNormEps:       c.RMSNormEps,
+		F16Memory:        c.F16,
+		MLock:            c.MMlock,
+		RopeFreqBase:     c.RopeFreqBase,
+		RopeFreqScale:    c.RopeFreqScale,
+		NUMA:             c.NUMA,
+		Embeddings:       c.Embeddings,
+		LowVRAM:          c.LowVRAM,
+		NGPULayers:       int32(c.NGPULayers),
+		MMap:             c.MMap,
+		MainGPU:          c.MainGPU,
+		Threads:          int32(c.Threads),
+		TensorSplit:      c.TensorSplit,
 	}
 }