mirror of
https://github.com/ollama/ollama.git
synced 2026-01-12 00:06:57 +08:00
* WIP - MLX backend with gemma3 * MLX: add cmake and go tag build toggles To build the new MLX backend code: cmake --preset MLX cmake --build --preset MLX --parallel cmake --install build --component MLX go build -tags mlx . Note: the main.go entrypoint for the MLX engine will change in a follow up commit. * add experimental image generation runtime * add experimental image generation runtime * MLX: wire up cuda build for linux * MLX: get dependencies correct and dedup This is still too large for a unified github artifact, but is now "correct" for the mlx_cuda_v13 directory. * fix relative link bug in dedup * Add darwin build and readme * add go build tag for mlx dependent code and wire up build_darwin.sh * lint cleanup * macos: build mlx for x86 This will be CPU only. * cuda build instructions and fix drift from mlx bump * stale comment * Delete agent helper doc * Clean up readme.md * Revise README for tokenizer clarity and details Updated README to clarify tokenizer functionality and removed correctness section. --------- Co-authored-by: jmorganca <jmorganca@gmail.com>
65 lines
1.5 KiB
Go
65 lines
1.5 KiB
Go
package convert
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/ollama/ollama/fs/ggml"
|
|
)
|
|
|
|
type mixtralModel struct {
|
|
llamaModel
|
|
NumLocalExperts uint32 `json:"num_local_experts"`
|
|
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
|
|
}
|
|
|
|
func (p *mixtralModel) KV(t *Tokenizer) KV {
|
|
kv := p.llamaModel.KV(t)
|
|
|
|
if p.NumLocalExperts > 0 {
|
|
kv["llama.expert_count"] = p.NumLocalExperts
|
|
}
|
|
|
|
if p.NumExpertsPerToken > 0 {
|
|
kv["llama.expert_used_count"] = p.NumExpertsPerToken
|
|
}
|
|
|
|
return kv
|
|
}
|
|
|
|
func (p *mixtralModel) Tensors(ts []Tensor) []*ggml.Tensor {
|
|
merges := make([]merge, 0, p.NumHiddenLayers*6)
|
|
for i := range p.NumHiddenLayers {
|
|
merges = append(merges, merge{
|
|
fmt.Sprintf("blk.%d.*.w1.weight", i),
|
|
fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
|
|
}, merge{
|
|
fmt.Sprintf("blk.%d.*.w1.bias", i),
|
|
fmt.Sprintf("blk.%d.ffn_gate_exps.bias", i),
|
|
}, merge{
|
|
fmt.Sprintf("blk.%d.*.w2.weight", i),
|
|
fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
|
|
}, merge{
|
|
fmt.Sprintf("blk.%d.*.w2.bias", i),
|
|
fmt.Sprintf("blk.%d.ffn_up_exps.bias", i),
|
|
}, merge{
|
|
fmt.Sprintf("blk.%d.*.w3.weight", i),
|
|
fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
|
|
}, merge{
|
|
fmt.Sprintf("blk.%d.*.w3.bias", i),
|
|
fmt.Sprintf("blk.%d.ffn_down_exps.bias", i),
|
|
})
|
|
}
|
|
|
|
out, ts := mergeTensors(ts, merges...)
|
|
return append(out, p.llamaModel.Tensors(ts)...)
|
|
}
|
|
|
|
func (p *mixtralModel) Replacements() []string {
|
|
return append(
|
|
p.llamaModel.Replacements(),
|
|
"model.layers", "blk",
|
|
"block_sparse_moe.gate", "ffn_gate_inp",
|
|
"block_sparse_moe.experts.", ".",
|
|
)
|
|
}
|