Get started
Quick Start
CLI Benchmarking
openvals benchmark --dataset finance --models mistral,llama3 --config finance --output finance_report.htmlPython Example
from openvals.benchmarking.runner import BenchmarkRunner
from openvals.models.ollama_model import OllamaModel
from openvals.datasets.loader import load_dataset
dataset = load_dataset("examples/sample_eval.json")
models = {
"llama2": OllamaModel("llama2"),
"llama3": OllamaModel("llama3"),
"mistral": OllamaModel("mistral")
}
runner = BenchmarkRunner(models, dataset)
results = runner.run()
print(results)Example Output
=== FINAL RANKING ===
1. mistral (0.91)
2. llama3 (0.87)
3. llama2 (0.84)