mirror of
https://github.com/ollama/ollama.git
synced 2026-01-12 00:06:57 +08:00
* WIP - MLX backend with gemma3 * MLX: add cmake and go tag build toggles To build the new MLX backend code: cmake --preset MLX cmake --build --preset MLX --parallel cmake --install build --component MLX go build -tags mlx . Note: the main.go entrypoint for the MLX engine will change in a follow up commit. * add experimental image generation runtime * add experimental image generation runtime * MLX: wire up cuda build for linux * MLX: get dependencies correct and dedup This is still too large for a unified github artifact, but is now "correct" for the mlx_cuda_v13 directory. * fix relative link bug in dedup * Add darwin build and readme * add go build tag for mlx dependent code and wire up build_darwin.sh * lint cleanup * macos: build mlx for x86 This will be CPU only. * cuda build instructions and fix drift from mlx bump * stale comment * Delete agent helper doc * Clean up readme.md * Revise README for tokenizer clarity and details Updated README to clarify tokenizer functionality and removed correctness section. --------- Co-authored-by: jmorganca <jmorganca@gmail.com>
111 lines
2.7 KiB
Go
111 lines
2.7 KiB
Go
package kvcache
|
|
|
|
// import (
|
|
// "math"
|
|
|
|
// "github.com/ollama/ollama/ml"
|
|
// "github.com/ollama/ollama/model/input"
|
|
// )
|
|
|
|
// // Wrapper cache is a container for multiple types of caches,
|
|
// // such as for the encoding and decoding portions of a model.
|
|
// type WrapperCache struct {
|
|
// // caches we are wrapping
|
|
// caches []Cache
|
|
|
|
// // cache to be used for this layer
|
|
// curType int
|
|
// }
|
|
|
|
// func NewWrapperCache(caches ...Cache) *WrapperCache {
|
|
// return &WrapperCache{
|
|
// caches: caches,
|
|
// }
|
|
// }
|
|
|
|
// func (c *WrapperCache) Init(backend ml.Backend, dtype ml.DType, maxSequences, capacity, maxBatch int) {
|
|
// for _, cache := range c.caches {
|
|
// cache.Init(backend, dtype, maxSequences, capacity, maxBatch)
|
|
// }
|
|
// }
|
|
|
|
// func (c *WrapperCache) SetConfig(config ml.CacheConfig) {
|
|
// for _, cache := range c.caches {
|
|
// cache.SetConfig(config)
|
|
// }
|
|
// }
|
|
|
|
// func (c *WrapperCache) Close() {
|
|
// for _, cache := range c.caches {
|
|
// cache.Close()
|
|
// }
|
|
// }
|
|
|
|
// func (c *WrapperCache) StartForward(ctx ml.Context, batch input.Batch, reserve bool) error {
|
|
// for i, cache := range c.caches {
|
|
// err := cache.StartForward(ctx, batch, reserve)
|
|
// if err != nil {
|
|
// // unwind on error - Remove with endIndex set to math.MaxInt32 does not fail
|
|
// for j := i - 1; j >= 0; j-- {
|
|
// for k := range batch.Positions {
|
|
// _ = c.caches[j].Remove(batch.Sequences[k], batch.Positions[k], math.MaxInt32)
|
|
// }
|
|
// }
|
|
// return err
|
|
// }
|
|
// }
|
|
|
|
// c.curType = 0
|
|
// return nil
|
|
// }
|
|
|
|
// func (c *WrapperCache) SetLayer(layer int) {
|
|
// for _, cache := range c.caches {
|
|
// cache.SetLayer(layer)
|
|
// }
|
|
// }
|
|
|
|
// func (c *WrapperCache) SetLayerType(layerType int) {
|
|
// c.curType = layerType
|
|
// }
|
|
|
|
// func (c *WrapperCache) UnderlyingCache() Cache {
|
|
// return c.caches[c.curType]
|
|
// }
|
|
|
|
// func (c *WrapperCache) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor) {
|
|
// return c.caches[c.curType].Get(ctx)
|
|
// }
|
|
|
|
// func (c *WrapperCache) Put(ctx ml.Context, key, value ml.Tensor) {
|
|
// c.caches[c.curType].Put(ctx, key, value)
|
|
// }
|
|
|
|
// func (c *WrapperCache) CopyPrefix(srcSeq, dstSeq int, len int32) {
|
|
// for _, cache := range c.caches {
|
|
// cache.CopyPrefix(srcSeq, dstSeq, len)
|
|
// }
|
|
// }
|
|
|
|
// func (c *WrapperCache) CanResume(seq int, pos int32) bool {
|
|
// for _, cache := range c.caches {
|
|
// if !cache.CanResume(seq, pos) {
|
|
// return false
|
|
// }
|
|
// }
|
|
|
|
// return true
|
|
// }
|
|
|
|
// func (c *WrapperCache) Remove(seq int, beginIndex, endIndex int32) error {
|
|
// // If the one of these fails, the caller is supposed to retry with endIndex set to math.MaxInt32, which should not fail
|
|
// for _, cache := range c.caches {
|
|
// err := cache.Remove(seq, beginIndex, endIndex)
|
|
// if err != nil {
|
|
// return err
|
|
// }
|
|
// }
|
|
|
|
// return nil
|
|
// }
|