mirror of
https://github.com/ollama/ollama.git
synced 2026-01-12 00:06:57 +08:00
* WIP - MLX backend with gemma3 * MLX: add cmake and go tag build toggles To build the new MLX backend code: cmake --preset MLX cmake --build --preset MLX --parallel cmake --install build --component MLX go build -tags mlx . Note: the main.go entrypoint for the MLX engine will change in a follow up commit. * add experimental image generation runtime * add experimental image generation runtime * MLX: wire up cuda build for linux * MLX: get dependencies correct and dedup This is still too large for a unified github artifact, but is now "correct" for the mlx_cuda_v13 directory. * fix relative link bug in dedup * Add darwin build and readme * add go build tag for mlx dependent code and wire up build_darwin.sh * lint cleanup * macos: build mlx for x86 This will be CPU only. * cuda build instructions and fix drift from mlx bump * stale comment * Delete agent helper doc * Clean up readme.md * Revise README for tokenizer clarity and details Updated README to clarify tokenizer functionality and removed correctness section. --------- Co-authored-by: jmorganca <jmorganca@gmail.com>
381 lines
9.8 KiB
Go
381 lines
9.8 KiB
Go
package convert
|
|
|
|
import (
|
|
"cmp"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io/fs"
|
|
"iter"
|
|
"log/slog"
|
|
"maps"
|
|
"os"
|
|
"slices"
|
|
"strings"
|
|
|
|
ofs "github.com/ollama/ollama/fs"
|
|
"github.com/ollama/ollama/fs/ggml"
|
|
)
|
|
|
|
type ModelParameters struct {
|
|
Architectures []string `json:"architectures"`
|
|
VocabSize uint32 `json:"vocab_size"`
|
|
|
|
// TODO is this needed?
|
|
ModelType string `json:"model_type"`
|
|
|
|
TextModel struct {
|
|
VocabSize uint32 `json:"vocab_size"`
|
|
HiddenSize uint32 `json:"hidden_size"`
|
|
ModelType string `json:"model_type"`
|
|
} `json:"text_config"`
|
|
}
|
|
|
|
type AdapterParameters struct {
|
|
Alpha uint32 `json:"lora_alpha"`
|
|
LoraLayers uint32 `json:"lora_layers"`
|
|
LoraParameters struct {
|
|
Rank uint32 `json:"rank"`
|
|
Alpha float32 `json:"alpha"`
|
|
Scale float32 `json:"scale"`
|
|
} `json:"lora_parameters"`
|
|
}
|
|
|
|
type KV map[string]any
|
|
|
|
func (kv KV) Architecture() string {
|
|
return kv.String("general.architecture", "unknown")
|
|
}
|
|
|
|
type valueTypes interface {
|
|
uint8 | int8 | uint16 | int16 |
|
|
uint32 | int32 | uint64 | int64 |
|
|
string | float32 | float64 | bool
|
|
}
|
|
|
|
type arrayValueTypes interface {
|
|
[]uint8 | []int8 | []uint16 | []int16 |
|
|
[]uint32 | []int32 | []uint64 | []int64 |
|
|
[]string | []float32 | []float64 | []bool
|
|
}
|
|
|
|
func keyValue[T valueTypes | arrayValueTypes](kv KV, key string, defaultValue ...T) (T, bool) {
|
|
if !strings.HasPrefix(key, "tokenizer.") && !strings.HasPrefix(key, "general.") {
|
|
key = kv.Architecture() + "." + key
|
|
}
|
|
|
|
if val, ok := kv[key].(T); ok {
|
|
return val, true
|
|
}
|
|
return defaultValue[0], false
|
|
}
|
|
|
|
func (kv KV) String(key string, defaultValue ...string) string {
|
|
val, _ := keyValue(kv, key, append(defaultValue, "")...)
|
|
return val
|
|
}
|
|
|
|
func (kv KV) Uint(key string, defaultValue ...uint32) uint32 {
|
|
val, _ := keyValue(kv, key, append(defaultValue, 0)...)
|
|
return val
|
|
}
|
|
|
|
func (kv KV) Float(key string, defaultValue ...float32) float32 {
|
|
val, _ := keyValue(kv, key, append(defaultValue, 0)...)
|
|
return val
|
|
}
|
|
|
|
func (kv KV) Bool(key string, defaultValue ...bool) bool {
|
|
val, _ := keyValue(kv, key, append(defaultValue, false)...)
|
|
return val
|
|
}
|
|
|
|
func (kv KV) Strings(key string, defaultValue ...[]string) []string {
|
|
val, _ := keyValue(kv, key, append(defaultValue, []string{""})...)
|
|
return val
|
|
}
|
|
|
|
func (kv KV) Ints(key string, defaultValue ...[]int32) []int32 {
|
|
val, _ := keyValue(kv, key, append(defaultValue, []int32{0})...)
|
|
return val
|
|
}
|
|
|
|
func (kv KV) Uints(key string, defaultValue ...[]uint32) []uint32 {
|
|
val, _ := keyValue(kv, key, append(defaultValue, []uint32{0})...)
|
|
return val
|
|
}
|
|
|
|
func (kv KV) Floats(key string, defaultValue ...[]float32) []float32 {
|
|
val, _ := keyValue(kv, key, append(defaultValue, []float32{0})...)
|
|
return val
|
|
}
|
|
|
|
func (kv KV) Bools(key string, defaultValue ...[]bool) []bool {
|
|
val, _ := keyValue(kv, key, append(defaultValue, []bool{false})...)
|
|
return val
|
|
}
|
|
|
|
func (kv KV) Len() int {
|
|
return len(kv)
|
|
}
|
|
|
|
func (kv KV) Keys() iter.Seq[string] {
|
|
return maps.Keys(kv)
|
|
}
|
|
|
|
func (kv KV) Value(key string) any {
|
|
return kv[key]
|
|
}
|
|
|
|
func (ModelParameters) KV(t *Tokenizer) KV {
|
|
kv := KV{
|
|
"general.file_type": uint32(1),
|
|
"general.quantization_version": uint32(2),
|
|
"tokenizer.ggml.pre": t.Pre,
|
|
"tokenizer.ggml.model": t.Vocabulary.Model,
|
|
"tokenizer.ggml.tokens": t.Vocabulary.Tokens,
|
|
"tokenizer.ggml.scores": t.Vocabulary.Scores,
|
|
"tokenizer.ggml.token_type": t.Vocabulary.Types,
|
|
}
|
|
|
|
if len(t.Merges) > 0 {
|
|
kv["tokenizer.ggml.merges"] = t.Merges
|
|
}
|
|
|
|
if t.Template != "" {
|
|
kv["tokenizer.chat_template"] = t.Template
|
|
}
|
|
|
|
for _, sv := range t.SpecialVocabulary {
|
|
kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
|
|
kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
|
|
if len(sv.IDs) > 0 {
|
|
kv[fmt.Sprintf("tokenizer.ggml.%s_token_ids", sv.Key())] = sv.IDs
|
|
}
|
|
}
|
|
|
|
return kv
|
|
}
|
|
|
|
func (p AdapterParameters) KV() KV {
|
|
var alpha float32
|
|
if p.LoraParameters.Alpha == 0 {
|
|
alpha = float32(p.Alpha)
|
|
} else {
|
|
alpha = p.LoraParameters.Alpha
|
|
}
|
|
|
|
kv := KV{
|
|
"adapter.lora.alpha": alpha,
|
|
"adapter.type": "lora",
|
|
"general.file_type": uint32(1),
|
|
"general.type": "adapter",
|
|
"general.version": "v0.2",
|
|
}
|
|
|
|
return kv
|
|
}
|
|
|
|
func (ModelParameters) specialTokenTypes() []string {
|
|
return []string{
|
|
"bos", "eos", "unk", "sep", "pad", "cls", "mask",
|
|
}
|
|
}
|
|
|
|
type ModelKV interface {
|
|
// KV maps parameters to LLM key-values
|
|
KV(*Tokenizer) KV
|
|
}
|
|
|
|
type ModelConverter interface {
|
|
ModelKV
|
|
|
|
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
|
|
Tensors([]Tensor) []*ggml.Tensor
|
|
// Replacements returns a list of string pairs to replace in tensor names.
|
|
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
|
Replacements() []string
|
|
|
|
// specialTokenTypes returns any special token types the model uses
|
|
specialTokenTypes() []string
|
|
}
|
|
|
|
type moreParser interface {
|
|
parseMore(fs.FS) error
|
|
}
|
|
|
|
type AdapterConverter interface {
|
|
// KV maps parameters to LLM key-values
|
|
KV(ofs.Config) KV
|
|
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
|
|
Tensors([]Tensor) []*ggml.Tensor
|
|
// Replacements returns a list of string pairs to replace in tensor names.
|
|
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
|
|
Replacements() []string
|
|
}
|
|
|
|
func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ofs.Config) error {
|
|
bts, err := fs.ReadFile(fsys, "adapter_config.json")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var p AdapterParameters
|
|
if err := json.Unmarshal(bts, &p); err != nil {
|
|
return err
|
|
}
|
|
|
|
arch := baseKV.Architecture()
|
|
if arch == "" {
|
|
return errors.New("architecture not set for the base model")
|
|
}
|
|
|
|
var conv AdapterConverter
|
|
switch arch {
|
|
case "llama":
|
|
conv = &llamaAdapter{}
|
|
case "gemma2":
|
|
conv = &gemma2Adapter{}
|
|
default:
|
|
return errors.New("unsupported architecture")
|
|
}
|
|
|
|
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := json.Unmarshal(bts, conv); err != nil {
|
|
return err
|
|
}
|
|
|
|
return writeFile(f, conv.KV(baseKV), conv.Tensors(ts))
|
|
}
|
|
|
|
func LoadModelMetadata(fsys fs.FS) (ModelKV, *Tokenizer, error) {
|
|
bts, err := fs.ReadFile(fsys, "config.json")
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
var p ModelParameters
|
|
if err := json.Unmarshal(bts, &p); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
if len(p.Architectures) < 1 {
|
|
return nil, nil, errors.New("unknown architecture")
|
|
}
|
|
|
|
var conv ModelConverter
|
|
switch p.Architectures[0] {
|
|
case "LlamaForCausalLM":
|
|
conv = &llamaModel{}
|
|
case "MllamaForConditionalGeneration":
|
|
conv = &mllamaModel{}
|
|
case "Llama4ForConditionalGeneration":
|
|
conv = &llama4Model{}
|
|
case "Mistral3ForConditionalGeneration":
|
|
conv = &mistral3Model{}
|
|
case "Ministral3ForCausalLM":
|
|
conv = &mistral3CausalModel{}
|
|
case "MixtralForCausalLM":
|
|
conv = &mixtralModel{}
|
|
case "GemmaForCausalLM":
|
|
conv = &gemmaModel{}
|
|
case "Gemma2ForCausalLM":
|
|
conv = &gemma2Model{}
|
|
case "Gemma3ForCausalLM", "Gemma3ForConditionalGeneration":
|
|
conv = &gemma3Model{Architecture: p.Architectures[0]}
|
|
case "Gemma3nForConditionalGeneration":
|
|
conv = &gemma3nModel{}
|
|
case "Phi3ForCausalLM":
|
|
conv = &phi3Model{}
|
|
case "Qwen2ForCausalLM":
|
|
conv = &qwen2Model{}
|
|
case "Qwen2_5_VLForConditionalGeneration":
|
|
conv = &qwen25VLModel{}
|
|
case "Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration":
|
|
conv = &qwen3VLModel{}
|
|
case "Olmo3ForCausalLM":
|
|
conv = &olmoModel{}
|
|
case "BertModel":
|
|
conv = &bertModel{}
|
|
case "NomicBertModel", "NomicBertMoEModel":
|
|
conv = &nomicbertModel{}
|
|
case "CohereForCausalLM":
|
|
conv = &commandrModel{}
|
|
case "GptOssForCausalLM":
|
|
conv = &gptossModel{}
|
|
case "DeepseekOCRForCausalLM":
|
|
conv = &deepseekocr{}
|
|
case "DeepseekV3ForCausalLM":
|
|
conv = &deepseek2Model{}
|
|
default:
|
|
return nil, nil, fmt.Errorf("unsupported architecture %q", p.Architectures[0])
|
|
}
|
|
|
|
if err := json.Unmarshal(bts, conv); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
if t, ok := conv.(moreParser); ok {
|
|
if err := t.parseMore(fsys); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
}
|
|
|
|
t, err := parseTokenizer(fsys, conv.specialTokenTypes())
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
vocabSize := int(cmp.Or(p.VocabSize, p.TextModel.VocabSize))
|
|
|
|
switch {
|
|
case vocabSize == 0:
|
|
slog.Debug("vocabulary size was not explicitly set by the model", "default size", len(t.Vocabulary.Tokens))
|
|
case vocabSize > len(t.Vocabulary.Tokens):
|
|
slog.Debug("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
|
|
for i := range vocabSize - len(t.Vocabulary.Tokens) {
|
|
t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
|
|
t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
|
|
t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
|
|
}
|
|
case vocabSize < len(t.Vocabulary.Tokens):
|
|
slog.Debug("vocabulary is larger than expected", "want", vocabSize, "got", len(t.Vocabulary.Tokens))
|
|
p.VocabSize = uint32(len(t.Vocabulary.Tokens))
|
|
p.TextModel.VocabSize = uint32(len(t.Vocabulary.Tokens))
|
|
default:
|
|
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
|
|
}
|
|
return conv, t, nil
|
|
}
|
|
|
|
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
|
|
// and files it finds in the input path.
|
|
// Supported input model formats include safetensors.
|
|
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
|
|
func ConvertModel(fsys fs.FS, f *os.File) error {
|
|
kv, t, err := LoadModelMetadata(fsys)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
conv := kv.(ModelConverter)
|
|
|
|
ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return writeFile(f, conv.KV(t), conv.Tensors(ts))
|
|
}
|
|
|
|
func writeFile(f *os.File, kv KV, ts []*ggml.Tensor) error {
|
|
for i := range ts {
|
|
ts[i].Shape = slices.Clone(ts[i].Shape)
|
|
slices.Reverse(ts[i].Shape)
|
|
}
|
|
return ggml.WriteGGUF(f, kv, ts)
|
|
}
|