package kvcache // import ( // "fmt" // "github.com/ollama/ollama/ml" // "github.com/ollama/ollama/model/input" // ) // // Encoder cache stores K and V tensors that are position independent // // // // The tensors can be of any shape and will be returned as they were stored // // The mask is currently always nil // // // // Not currently safe for multiple sequences // type EncoderCache struct { // // config controls mostly backend-specific optimizations // config *ml.CacheConfig // // ** current forward pass ** // // the active layer for Get and Put // curLayer int // // if something is stored during this pass, this // // will be the position (but there is no guarantee // // anything will be stored) // curPos int32 // // curReserve indicates that this forward pass is only for // // memory reservation and we should not update our metadata // // based on it. // curReserve bool // // ** cache metadata ** // // was something stored in the cache? // encoderCached bool // // position of the cached data // encoderPos int32 // // ** cache data storage ** // backend ml.Backend // ctxs map[int]ml.Context // keys, values map[int]ml.Tensor // } // func NewEncoderCache() *EncoderCache { // return &EncoderCache{ // ctxs: make(map[int]ml.Context), // keys: make(map[int]ml.Tensor), // values: make(map[int]ml.Tensor), // } // } // func (c *EncoderCache) Init(backend ml.Backend, dtype ml.DType, maxSequences, capacity, maxBatch int) { // if c.config == nil { // var config ml.CacheConfig // if cc, ok := backend.(ml.BackendCacheConfig); ok { // config = cc.CacheConfig() // } // c.config = &config // } // if maxSequences > 1 { // panic(fmt.Errorf("encoder cache does not support multiple sequences; requested: %v", maxSequences)) // } // if c.config.CachePadding != 0 && c.config.CachePadding != 1 { // panic(fmt.Errorf("encoder cache is unable to enforce requested CachePadding (%v)", c.config.CachePadding)) // } // c.backend = backend // } // func (c *EncoderCache) SetConfig(config ml.CacheConfig) { // if c.config != nil { // panic("config cannot be changed after being previously set, either by the model or backend") // } // c.config = &config // } // func (c *EncoderCache) Close() { // for _, ctx := range c.ctxs { // ctx.Close() // } // } // func (c *EncoderCache) StartForward(ctx ml.Context, batch input.Batch, reserve bool) error { // // We work with the most recent image // if len(batch.Multimodal) > 0 { // c.curPos = batch.Positions[batch.Multimodal[len(batch.Multimodal)-1].Index] // } // c.curReserve = reserve // return nil // } // func (c *EncoderCache) SetLayer(layer int) { // c.curLayer = layer // } // func (c *EncoderCache) EncoderCached() bool { // return c.encoderCached // } // func (c *EncoderCache) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor) { // return c.keys[c.curLayer], c.values[c.curLayer], nil // } // func (c *EncoderCache) Put(ctx ml.Context, key, value ml.Tensor) { // if !c.curReserve { // c.encoderPos = c.curPos // c.encoderCached = true // } // if c.config.PermutedV { // value = value.Transpose(ctx, 1, 2, 0, 3) // } // if _, ok := c.ctxs[c.curLayer]; !ok { // c.ctxs[c.curLayer] = c.backend.NewContext().Layer(c.curLayer) // } // if _, ok := c.keys[c.curLayer]; !ok { // c.keys[c.curLayer] = c.ctxs[c.curLayer].Empty(key.DType(), key.Shape()...) // } // if _, ok := c.values[c.curLayer]; !ok { // c.values[c.curLayer] = c.ctxs[c.curLayer].Empty(value.DType(), value.Shape()...) // } // ctx.Forward( // key.Copy(ctx, c.keys[c.curLayer]), // value.Copy(ctx, c.values[c.curLayer]), // ) // } // func (c *EncoderCache) CopyPrefix(srcSeq, dstSeq int, len int32) { // panic("encoder cache does not support multiple sequences") // } // func (c *EncoderCache) CanResume(seq int, pos int32) bool { // return true // } // func (c *EncoderCache) Remove(seq int, beginIndex, endIndex int32) error { // if c.encoderPos >= beginIndex && c.encoderPos < endIndex { // c.encoderCached = false // } // return nil // }