Run: run_9d00d62f4692
VERIFIED
Systemllamaindex-memory
BenchmarkLoCoMo
Harnessv0.1.0
Verified Overall0%
Nuance Overall54.81481481481482%
DateMay 12, 2026
run_manifest.jsonjson
{
"version": "1.0.0",
"runId": "run_9d00d62f4692",
"systemId": "sys_llamaindex-memory",
"systemName": "llamaindex-memory",
"benchmarkId": "bench_locomo-v1",
"benchmarkName": "LoCoMo",
"benchmarkVersion": "1.0",
"harnessVersion": "0.1.0",
"judgeModel": "openai/gpt-4o-mini",
"judgeTemperature": 0,
"startedAt": "2026-05-12T03:45:13.984866+00:00",
"completedAt": "2026-05-12T03:49:48.682889+00:00",
"scores": {
"verified": {
"recall": 0,
"temporal": 0,
"reasoning": 0,
"overall": 0
},
"nuance": {
"recall": 44.44444444444444,
"temporal": 40,
"reasoning": 80,
"overall": 54.81481481481482
}
},
"questionCount": 49,
"passCount": 32,
"failCount": 17,
"merkleRoot": "3aff6135ae444636f3eccc23f36846b90eb165ce20b6f137b7e2635f899419ae"
}