Spaces:
Configuration error
Configuration error
package backend | |
import ( | |
"math/rand" | |
"os" | |
"path/filepath" | |
"github.com/mudler/LocalAI/core/config" | |
pb "github.com/mudler/LocalAI/pkg/grpc/proto" | |
"github.com/mudler/LocalAI/pkg/model" | |
"github.com/rs/zerolog/log" | |
) | |
func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option { | |
name := c.Name | |
if name == "" { | |
name = c.Model | |
} | |
defOpts := []model.Option{ | |
model.WithBackendString(c.Backend), | |
model.WithModel(c.Model), | |
model.WithAssetDir(so.AssetsDestination), | |
model.WithContext(so.Context), | |
model.WithModelID(name), | |
} | |
threads := 1 | |
if c.Threads != nil { | |
threads = *c.Threads | |
} | |
if so.Threads != 0 { | |
threads = so.Threads | |
} | |
c.Threads = &threads | |
grpcOpts := grpcModelOpts(c) | |
defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts)) | |
if so.SingleBackend { | |
defOpts = append(defOpts, model.WithSingleActiveBackend()) | |
} | |
if so.ParallelBackendRequests { | |
defOpts = append(defOpts, model.EnableParallelRequests) | |
} | |
if c.GRPC.Attempts != 0 { | |
defOpts = append(defOpts, model.WithGRPCAttempts(c.GRPC.Attempts)) | |
} | |
if c.GRPC.AttemptsSleepTime != 0 { | |
defOpts = append(defOpts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime)) | |
} | |
for k, v := range so.ExternalGRPCBackends { | |
defOpts = append(defOpts, model.WithExternalBackend(k, v)) | |
} | |
return append(defOpts, opts...) | |
} | |
func getSeed(c config.BackendConfig) int32 { | |
var seed int32 = config.RAND_SEED | |
if c.Seed != nil { | |
seed = int32(*c.Seed) | |
} | |
if seed == config.RAND_SEED { | |
seed = rand.Int31() | |
} | |
return seed | |
} | |
func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions { | |
b := 512 | |
if c.Batch != 0 { | |
b = c.Batch | |
} | |
f16 := false | |
if c.F16 != nil { | |
f16 = *c.F16 | |
} | |
embeddings := false | |
if c.Embeddings != nil { | |
embeddings = *c.Embeddings | |
} | |
lowVRAM := false | |
if c.LowVRAM != nil { | |
lowVRAM = *c.LowVRAM | |
} | |
mmap := false | |
if c.MMap != nil { | |
mmap = *c.MMap | |
} | |
ctxSize := 1024 | |
if c.ContextSize != nil { | |
ctxSize = *c.ContextSize | |
} | |
mmlock := false | |
if c.MMlock != nil { | |
mmlock = *c.MMlock | |
} | |
nGPULayers := 9999999 | |
if c.NGPULayers != nil { | |
nGPULayers = *c.NGPULayers | |
} | |
return &pb.ModelOptions{ | |
CUDA: c.CUDA || c.Diffusers.CUDA, | |
SchedulerType: c.Diffusers.SchedulerType, | |
PipelineType: c.Diffusers.PipelineType, | |
CFGScale: c.Diffusers.CFGScale, | |
LoraAdapter: c.LoraAdapter, | |
LoraScale: c.LoraScale, | |
LoraAdapters: c.LoraAdapters, | |
LoraScales: c.LoraScales, | |
F16Memory: f16, | |
LoraBase: c.LoraBase, | |
IMG2IMG: c.Diffusers.IMG2IMG, | |
CLIPModel: c.Diffusers.ClipModel, | |
CLIPSubfolder: c.Diffusers.ClipSubFolder, | |
CLIPSkip: int32(c.Diffusers.ClipSkip), | |
ControlNet: c.Diffusers.ControlNet, | |
ContextSize: int32(ctxSize), | |
Seed: getSeed(c), | |
NBatch: int32(b), | |
NoMulMatQ: c.NoMulMatQ, | |
DraftModel: c.DraftModel, | |
AudioPath: c.VallE.AudioPath, | |
Quantization: c.Quantization, | |
LoadFormat: c.LoadFormat, | |
GPUMemoryUtilization: c.GPUMemoryUtilization, | |
TrustRemoteCode: c.TrustRemoteCode, | |
EnforceEager: c.EnforceEager, | |
SwapSpace: int32(c.SwapSpace), | |
MaxModelLen: int32(c.MaxModelLen), | |
TensorParallelSize: int32(c.TensorParallelSize), | |
MMProj: c.MMProj, | |
FlashAttention: c.FlashAttention, | |
NoKVOffload: c.NoKVOffloading, | |
YarnExtFactor: c.YarnExtFactor, | |
YarnAttnFactor: c.YarnAttnFactor, | |
YarnBetaFast: c.YarnBetaFast, | |
YarnBetaSlow: c.YarnBetaSlow, | |
NGQA: c.NGQA, | |
RMSNormEps: c.RMSNormEps, | |
MLock: mmlock, | |
RopeFreqBase: c.RopeFreqBase, | |
RopeScaling: c.RopeScaling, | |
Type: c.ModelType, | |
RopeFreqScale: c.RopeFreqScale, | |
NUMA: c.NUMA, | |
Embeddings: embeddings, | |
LowVRAM: lowVRAM, | |
NGPULayers: int32(nGPULayers), | |
MMap: mmap, | |
MainGPU: c.MainGPU, | |
Threads: int32(*c.Threads), | |
TensorSplit: c.TensorSplit, | |
// AutoGPTQ | |
ModelBaseName: c.AutoGPTQ.ModelBaseName, | |
Device: c.AutoGPTQ.Device, | |
UseTriton: c.AutoGPTQ.Triton, | |
UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer, | |
// RWKV | |
Tokenizer: c.Tokenizer, | |
} | |
} | |
func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOptions { | |
promptCachePath := "" | |
if c.PromptCachePath != "" { | |
p := filepath.Join(modelPath, c.PromptCachePath) | |
err := os.MkdirAll(filepath.Dir(p), 0750) | |
if err == nil { | |
promptCachePath = p | |
} else { | |
log.Error().Err(err).Str("promptCachePath", promptCachePath).Msg("error creating prompt cache folder") | |
} | |
} | |
return &pb.PredictOptions{ | |
Temperature: float32(*c.Temperature), | |
TopP: float32(*c.TopP), | |
NDraft: c.NDraft, | |
TopK: int32(*c.TopK), | |
Tokens: int32(*c.Maxtokens), | |
Threads: int32(*c.Threads), | |
PromptCacheAll: c.PromptCacheAll, | |
PromptCacheRO: c.PromptCacheRO, | |
PromptCachePath: promptCachePath, | |
F16KV: *c.F16, | |
DebugMode: *c.Debug, | |
Grammar: c.Grammar, | |
NegativePromptScale: c.NegativePromptScale, | |
RopeFreqBase: c.RopeFreqBase, | |
RopeFreqScale: c.RopeFreqScale, | |
NegativePrompt: c.NegativePrompt, | |
Mirostat: int32(*c.LLMConfig.Mirostat), | |
MirostatETA: float32(*c.LLMConfig.MirostatETA), | |
MirostatTAU: float32(*c.LLMConfig.MirostatTAU), | |
Debug: *c.Debug, | |
StopPrompts: c.StopWords, | |
Repeat: int32(c.RepeatLastN), | |
FrequencyPenalty: float32(c.FrequencyPenalty), | |
PresencePenalty: float32(c.PresencePenalty), | |
Penalty: float32(c.RepeatPenalty), | |
NKeep: int32(c.Keep), | |
Batch: int32(c.Batch), | |
IgnoreEOS: c.IgnoreEOS, | |
Seed: getSeed(c), | |
MLock: *c.MMlock, | |
MMap: *c.MMap, | |
MainGPU: c.MainGPU, | |
TensorSplit: c.TensorSplit, | |
TailFreeSamplingZ: float32(*c.TFZ), | |
TypicalP: float32(*c.TypicalP), | |
} | |
} | |