Run: run_a9de0b9efb60
VERIFIED
Systemlangchain-memory
BenchmarkLongMemEval
Harnessv0.1.0
Verified Overall0%
Nuance Overall33.41727716727717%
DateMay 11, 2026
run_manifest.jsonjson
{
"version": "1.0.0",
"runId": "run_a9de0b9efb60",
"systemId": "sys_langchain-memory",
"systemName": "langchain-memory",
"benchmarkId": "bench_longmemeval-v1",
"benchmarkName": "LongMemEval",
"benchmarkVersion": "1.0",
"harnessVersion": "0.1.0",
"judgeModel": "openai/gpt-4o-mini",
"judgeTemperature": 0,
"startedAt": "2026-05-11T18:27:41.774893+00:00",
"completedAt": "2026-05-11T19:00:13.411797+00:00",
"scores": {
"verified": {
"recall": 0,
"temporal": 0,
"reasoning": 0,
"overall": 0
},
"nuance": {
"recall": 68.75,
"temporal": 23.809523809523807,
"reasoning": 7.6923076923076925,
"overall": 33.41727716727717
}
},
"questionCount": 50,
"passCount": 17,
"failCount": 33,
"merkleRoot": "ed2e1317686032ba0d55b9733aa6e2d477ac2ed1e2d632ca7b3683e399eae4a7"
}