[
  {
    "trial": "trial_0e7a01a2971d4850",
    "task": "59-fix-broken-cognito-m2m-httpapi-jwt-scope-gated",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 77,
    "url": "trial/trial_0e7a01a2971d4850"
  },
  {
    "trial": "trial_2f134ffcfd434d52",
    "task": "59-fix-broken-cognito-m2m-httpapi-jwt-scope-gated",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 2,
    "url": "trial/trial_2f134ffcfd434d52"
  },
  {
    "trial": "trial_31f521dbd1ed4a92",
    "task": "59-fix-broken-cognito-m2m-httpapi-jwt-scope-gated",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 79,
    "url": "trial/trial_31f521dbd1ed4a92"
  },
  {
    "trial": "trial_37394253217147ac",
    "task": "59-fix-broken-cognito-m2m-httpapi-jwt-scope-gated",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 135,
    "url": "trial/trial_37394253217147ac"
  },
  {
    "trial": "trial_4351836741b047b6",
    "task": "59-fix-broken-cognito-m2m-httpapi-jwt-scope-gated",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 80,
    "url": "trial/trial_4351836741b047b6"
  },
  {
    "trial": "trial_46dc4a220e22497b",
    "task": "59-fix-broken-cognito-m2m-httpapi-jwt-scope-gated",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 2,
    "url": "trial/trial_46dc4a220e22497b"
  },
  {
    "trial": "trial_4df7d00c02894112",
    "task": "59-fix-broken-cognito-m2m-httpapi-jwt-scope-gated",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 75,
    "url": "trial/trial_4df7d00c02894112"
  },
  {
    "trial": "trial_53b5cf09ac784a61",
    "task": "59-fix-broken-cognito-m2m-httpapi-jwt-scope-gated",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 76,
    "url": "trial/trial_53b5cf09ac784a61"
  },
  {
    "trial": "trial_9d6bbcd7afc1433c",
    "task": "59-fix-broken-cognito-m2m-httpapi-jwt-scope-gated",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 122,
    "url": "trial/trial_9d6bbcd7afc1433c"
  },
  {
    "trial": "trial_f688a5c449f64346",
    "task": "59-fix-broken-cognito-m2m-httpapi-jwt-scope-gated",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 93,
    "url": "trial/trial_f688a5c449f64346"
  },
  {
    "trial": "trial_086eaa8897bb4ce2",
    "task": "60-fix-broken-ecs-fargate-secrets-kms-exec-role",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 156,
    "url": "trial/trial_086eaa8897bb4ce2"
  },
  {
    "trial": "trial_22ec650ebc2542a9",
    "task": "60-fix-broken-ecs-fargate-secrets-kms-exec-role",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 148,
    "url": "trial/trial_22ec650ebc2542a9"
  },
  {
    "trial": "trial_61069b692d404b94",
    "task": "60-fix-broken-ecs-fargate-secrets-kms-exec-role",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 167,
    "url": "trial/trial_61069b692d404b94"
  },
  {
    "trial": "trial_6929a25b0ba24f09",
    "task": "60-fix-broken-ecs-fargate-secrets-kms-exec-role",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 110,
    "url": "trial/trial_6929a25b0ba24f09"
  },
  {
    "trial": "trial_78a359be2b3c480d",
    "task": "60-fix-broken-ecs-fargate-secrets-kms-exec-role",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 105,
    "url": "trial/trial_78a359be2b3c480d"
  },
  {
    "trial": "trial_8ebb24d9fc154d8a",
    "task": "60-fix-broken-ecs-fargate-secrets-kms-exec-role",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 127,
    "url": "trial/trial_8ebb24d9fc154d8a"
  },
  {
    "trial": "trial_9102d6e9d7e44a10",
    "task": "60-fix-broken-ecs-fargate-secrets-kms-exec-role",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 180,
    "url": "trial/trial_9102d6e9d7e44a10"
  },
  {
    "trial": "trial_1b010c50f1564294",
    "task": "Enable-gated streaming fold stage",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 21,
    "url": "trial/trial_1b010c50f1564294"
  },
  {
    "trial": "trial_bbd44ef0b7534335",
    "task": "Enable-gated streaming fold stage",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 27,
    "url": "trial/trial_bbd44ef0b7534335"
  },
  {
    "trial": "trial_c7b1379ed840442c",
    "task": "Enable-gated streaming fold stage",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 35,
    "url": "trial/trial_c7b1379ed840442c"
  },
  {
    "trial": "trial_5b8323bfb12f4d46",
    "task": "Instruction-retire commit handshake",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 20,
    "url": "trial/trial_5b8323bfb12f4d46"
  },
  {
    "trial": "trial_704619bfebd74c87",
    "task": "Instruction-retire commit handshake",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 13,
    "url": "trial/trial_704619bfebd74c87"
  },
  {
    "trial": "trial_eb35fd86efe04f94",
    "task": "Instruction-retire commit handshake",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 25,
    "url": "trial/trial_eb35fd86efe04f94"
  },
  {
    "trial": "trial_d2e8267697c34681",
    "task": "Multi-cycle signed divider with a start/valid handshake",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 16,
    "url": "trial/trial_d2e8267697c34681"
  },
  {
    "trial": "trial_f431ac01bdbb48be",
    "task": "Multi-cycle signed divider with a start/valid handshake",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 20,
    "url": "trial/trial_f431ac01bdbb48be"
  },
  {
    "trial": "trial_f935df2d17704991",
    "task": "Multi-cycle signed divider with a start/valid handshake",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 23,
    "url": "trial/trial_f935df2d17704991"
  },
  {
    "trial": "trial_22fc534995d6467e",
    "task": "Resynchronising serial byte receiver",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 18,
    "url": "trial/trial_22fc534995d6467e"
  },
  {
    "trial": "trial_246ab1d177c84493",
    "task": "Resynchronising serial byte receiver",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 18,
    "url": "trial/trial_246ab1d177c84493"
  },
  {
    "trial": "trial_24742b6234704582",
    "task": "Resynchronising serial byte receiver",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 16,
    "url": "trial/trial_24742b6234704582"
  },
  {
    "trial": "trial_1f3b2d0a84aa4e65",
    "task": "Serial bit-destuff framer",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_1f3b2d0a84aa4e65"
  },
  {
    "trial": "trial_58a44a8af850420f",
    "task": "Serial bit-destuff framer",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 15,
    "url": "trial/trial_58a44a8af850420f"
  },
  {
    "trial": "trial_ec43627b4e72444b",
    "task": "Serial bit-destuff framer",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 13,
    "url": "trial/trial_ec43627b4e72444b"
  },
  {
    "trial": "trial_57a056c312884b9c",
    "task": "Wait-state register-file completer",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_57a056c312884b9c"
  },
  {
    "trial": "trial_58cd0066266548b1",
    "task": "Wait-state register-file completer",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 19,
    "url": "trial/trial_58cd0066266548b1"
  },
  {
    "trial": "trial_b6d9d3dfc4dc41d0",
    "task": "Wait-state register-file completer",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_b6d9d3dfc4dc41d0"
  },
  {
    "trial": "trial_25fcbfa8676f4c8e",
    "task": "adaptive-quadrature",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_25fcbfa8676f4c8e"
  },
  {
    "trial": "trial_2cc24f759ecd4dc4",
    "task": "adaptive-quadrature",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 12,
    "url": "trial/trial_2cc24f759ecd4dc4"
  },
  {
    "trial": "trial_4fcb2c0874794aef",
    "task": "adaptive-quadrature",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_4fcb2c0874794aef"
  },
  {
    "trial": "trial_6de98564565744b6",
    "task": "adaptive-quadrature",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_6de98564565744b6"
  },
  {
    "trial": "trial_80771adf237a409d",
    "task": "adaptive-quadrature",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_80771adf237a409d"
  },
  {
    "trial": "trial_81c27a0629c040eb",
    "task": "adaptive-quadrature",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_81c27a0629c040eb"
  },
  {
    "trial": "trial_a9878c4ecef141de",
    "task": "adaptive-quadrature",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 14,
    "url": "trial/trial_a9878c4ecef141de"
  },
  {
    "trial": "trial_bc421a2ed4804c5e",
    "task": "adaptive-quadrature",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 12,
    "url": "trial/trial_bc421a2ed4804c5e"
  },
  {
    "trial": "trial_d0d96b55a76f4f23",
    "task": "adaptive-quadrature",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_d0d96b55a76f4f23"
  },
  {
    "trial": "trial_ed14b03551054081",
    "task": "adaptive-quadrature",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 12,
    "url": "trial/trial_ed14b03551054081"
  },
  {
    "trial": "trial_046fc7c95f5b4571",
    "task": "airfoil-self-noise",
    "category": "ml-engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 289,
    "url": "trial/trial_046fc7c95f5b4571"
  },
  {
    "trial": "trial_504d870c31aa4fff",
    "task": "airfoil-self-noise",
    "category": "ml-engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 314,
    "url": "trial/trial_504d870c31aa4fff"
  },
  {
    "trial": "trial_7afdf17026b6442c",
    "task": "airfoil-self-noise",
    "category": "ml-engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 278,
    "url": "trial/trial_7afdf17026b6442c"
  },
  {
    "trial": "trial_960fa32380ff467b",
    "task": "airfoil-self-noise",
    "category": "ml-engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 265,
    "url": "trial/trial_960fa32380ff467b"
  },
  {
    "trial": "trial_a71ba0d2552a40b8",
    "task": "airfoil-self-noise",
    "category": "ml-engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 268,
    "url": "trial/trial_a71ba0d2552a40b8"
  },
  {
    "trial": "trial_a992fbb1e27448a6",
    "task": "airfoil-self-noise",
    "category": "ml-engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 277,
    "url": "trial/trial_a992fbb1e27448a6"
  },
  {
    "trial": "trial_b1bbeed0328248b9",
    "task": "airfoil-self-noise",
    "category": "ml-engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 251,
    "url": "trial/trial_b1bbeed0328248b9"
  },
  {
    "trial": "trial_c428b26caeaa404a",
    "task": "airfoil-self-noise",
    "category": "ml-engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 231,
    "url": "trial/trial_c428b26caeaa404a"
  },
  {
    "trial": "trial_f1edf582707444c7",
    "task": "airfoil-self-noise",
    "category": "ml-engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 248,
    "url": "trial/trial_f1edf582707444c7"
  },
  {
    "trial": "trial_f74276b1e9d940fd",
    "task": "airfoil-self-noise",
    "category": "ml-engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 352,
    "url": "trial/trial_f74276b1e9d940fd"
  },
  {
    "trial": "trial_0f94d593db944c63",
    "task": "airfrans-high-reynolds-drag-extrapolation",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 274,
    "url": "trial/trial_0f94d593db944c63"
  },
  {
    "trial": "trial_2f1bf3413a6f4c74",
    "task": "airfrans-high-reynolds-drag-extrapolation",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 217,
    "url": "trial/trial_2f1bf3413a6f4c74"
  },
  {
    "trial": "trial_494ac36c68d64636",
    "task": "airfrans-high-reynolds-drag-extrapolation",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 1099,
    "url": "trial/trial_494ac36c68d64636"
  },
  {
    "trial": "trial_4cdd653e42de43e0",
    "task": "airfrans-high-reynolds-drag-extrapolation",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 170,
    "url": "trial/trial_4cdd653e42de43e0"
  },
  {
    "trial": "trial_5477ee9f96cf4f2d",
    "task": "airfrans-high-reynolds-drag-extrapolation",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 335,
    "url": "trial/trial_5477ee9f96cf4f2d"
  },
  {
    "trial": "trial_951a5d1420f24896",
    "task": "airfrans-high-reynolds-drag-extrapolation",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 195,
    "url": "trial/trial_951a5d1420f24896"
  },
  {
    "trial": "trial_9f12a436f08c4dfb",
    "task": "airfrans-high-reynolds-drag-extrapolation",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 209,
    "url": "trial/trial_9f12a436f08c4dfb"
  },
  {
    "trial": "trial_a2944c69b75046ab",
    "task": "airfrans-high-reynolds-drag-extrapolation",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 201,
    "url": "trial/trial_a2944c69b75046ab"
  },
  {
    "trial": "trial_a7c9fbc87e7b494a",
    "task": "airfrans-high-reynolds-drag-extrapolation",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 313,
    "url": "trial/trial_a7c9fbc87e7b494a"
  },
  {
    "trial": "trial_c8d4433af7b34581",
    "task": "airfrans-high-reynolds-drag-extrapolation",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 125,
    "url": "trial/trial_c8d4433af7b34581"
  },
  {
    "trial": "trial_0485af2148524316",
    "task": "anova-stats",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 24,
    "url": "trial/trial_0485af2148524316"
  },
  {
    "trial": "trial_174435577863417d",
    "task": "anova-stats",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 25,
    "url": "trial/trial_174435577863417d"
  },
  {
    "trial": "trial_198227cde9e84d9c",
    "task": "anova-stats",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 27,
    "url": "trial/trial_198227cde9e84d9c"
  },
  {
    "trial": "trial_229efec76d414fbe",
    "task": "anova-stats",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 39,
    "url": "trial/trial_229efec76d414fbe"
  },
  {
    "trial": "trial_3236444f6bdf40c6",
    "task": "anova-stats",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 23,
    "url": "trial/trial_3236444f6bdf40c6"
  },
  {
    "trial": "trial_7e19238bd1954d01",
    "task": "anova-stats",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 26,
    "url": "trial/trial_7e19238bd1954d01"
  },
  {
    "trial": "trial_8e8d1d56dd104896",
    "task": "anova-stats",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 25,
    "url": "trial/trial_8e8d1d56dd104896"
  },
  {
    "trial": "trial_8f5a4fae83e44289",
    "task": "anova-stats",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 22,
    "url": "trial/trial_8f5a4fae83e44289"
  },
  {
    "trial": "trial_a5ec10b10eef4573",
    "task": "anova-stats",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 2,
    "url": "trial/trial_a5ec10b10eef4573"
  },
  {
    "trial": "trial_a63c772ff5a54c19",
    "task": "anova-stats",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 23,
    "url": "trial/trial_a63c772ff5a54c19"
  },
  {
    "trial": "trial_c5f9c07fa1544188",
    "task": "anova-stats",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 39,
    "url": "trial/trial_c5f9c07fa1544188"
  },
  {
    "trial": "trial_15a23e43c7264b11",
    "task": "apigw-http-api-jwt-authorizer-lambda-integration",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 103,
    "url": "trial/trial_15a23e43c7264b11"
  },
  {
    "trial": "trial_1f3cabb116f044fe",
    "task": "apigw-http-api-jwt-authorizer-lambda-integration",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 219,
    "url": "trial/trial_1f3cabb116f044fe"
  },
  {
    "trial": "trial_461a83ef3d1f4170",
    "task": "apigw-http-api-jwt-authorizer-lambda-integration",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 131,
    "url": "trial/trial_461a83ef3d1f4170"
  },
  {
    "trial": "trial_5e2048a37e364924",
    "task": "apigw-http-api-jwt-authorizer-lambda-integration",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 229,
    "url": "trial/trial_5e2048a37e364924"
  },
  {
    "trial": "trial_72f8fc44613f4688",
    "task": "apigw-http-api-jwt-authorizer-lambda-integration",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 182,
    "url": "trial/trial_72f8fc44613f4688"
  },
  {
    "trial": "trial_7587152f80334e29",
    "task": "apigw-http-api-jwt-authorizer-lambda-integration",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 180,
    "url": "trial/trial_7587152f80334e29"
  },
  {
    "trial": "trial_90b519ef7c2c4546",
    "task": "apigw-http-api-jwt-authorizer-lambda-integration",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 220,
    "url": "trial/trial_90b519ef7c2c4546"
  },
  {
    "trial": "trial_9bcd966dff844466",
    "task": "apigw-http-api-jwt-authorizer-lambda-integration",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 179,
    "url": "trial/trial_9bcd966dff844466"
  },
  {
    "trial": "trial_a0626e4a6cb9487c",
    "task": "apigw-http-api-jwt-authorizer-lambda-integration",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 208,
    "url": "trial/trial_a0626e4a6cb9487c"
  },
  {
    "trial": "trial_c65081b09ed3456b",
    "task": "apigw-http-api-jwt-authorizer-lambda-integration",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 179,
    "url": "trial/trial_c65081b09ed3456b"
  },
  {
    "trial": "trial_41ac5d621001452a",
    "task": "apigw-sqs-fifo-direct-integration",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 19,
    "url": "trial/trial_41ac5d621001452a"
  },
  {
    "trial": "trial_5670bc8ed12c4718",
    "task": "apigw-sqs-fifo-direct-integration",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 17,
    "url": "trial/trial_5670bc8ed12c4718"
  },
  {
    "trial": "trial_77cec19df4804ba8",
    "task": "apigw-sqs-fifo-direct-integration",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 26,
    "url": "trial/trial_77cec19df4804ba8"
  },
  {
    "trial": "trial_aa21c7fe74da44e9",
    "task": "apigw-sqs-fifo-direct-integration",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 23,
    "url": "trial/trial_aa21c7fe74da44e9"
  },
  {
    "trial": "trial_be33eb183e4b41c3",
    "task": "apigw-sqs-fifo-direct-integration",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 36,
    "url": "trial/trial_be33eb183e4b41c3"
  },
  {
    "trial": "trial_cd164552bac84f5c",
    "task": "apigw-sqs-fifo-direct-integration",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 28,
    "url": "trial/trial_cd164552bac84f5c"
  },
  {
    "trial": "trial_cd90a99a39134a9a",
    "task": "apigw-sqs-fifo-direct-integration",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 30,
    "url": "trial/trial_cd90a99a39134a9a"
  },
  {
    "trial": "trial_e34bc6697df745f6",
    "task": "apigw-sqs-fifo-direct-integration",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 51,
    "url": "trial/trial_e34bc6697df745f6"
  },
  {
    "trial": "trial_eee2ab29e0824dcf",
    "task": "apigw-sqs-fifo-direct-integration",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 26,
    "url": "trial/trial_eee2ab29e0824dcf"
  },
  {
    "trial": "trial_fd33edaac30b47a4",
    "task": "apigw-sqs-fifo-direct-integration",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 28,
    "url": "trial/trial_fd33edaac30b47a4"
  },
  {
    "trial": "trial_546bcc0cba90423a",
    "task": "athena-workgroup-result-encryption-cmk-enforced",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 74,
    "url": "trial/trial_546bcc0cba90423a"
  },
  {
    "trial": "trial_69a0b5b174614f26",
    "task": "athena-workgroup-result-encryption-cmk-enforced",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 57,
    "url": "trial/trial_69a0b5b174614f26"
  },
  {
    "trial": "trial_865751fe85b84759",
    "task": "athena-workgroup-result-encryption-cmk-enforced",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 101,
    "url": "trial/trial_865751fe85b84759"
  },
  {
    "trial": "trial_9cc207f567f142f7",
    "task": "athena-workgroup-result-encryption-cmk-enforced",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 75,
    "url": "trial/trial_9cc207f567f142f7"
  },
  {
    "trial": "trial_eee3e4a4ef6e4afd",
    "task": "athena-workgroup-result-encryption-cmk-enforced",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 59,
    "url": "trial/trial_eee3e4a4ef6e4afd"
  },
  {
    "trial": "trial_32b9f54b7dac41eb",
    "task": "beam-deflection-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 31,
    "url": "trial/trial_32b9f54b7dac41eb"
  },
  {
    "trial": "trial_6489f2b0370f4e9a",
    "task": "beam-deflection-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 29,
    "url": "trial/trial_6489f2b0370f4e9a"
  },
  {
    "trial": "trial_96087325142d4ce8",
    "task": "beam-deflection-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 30,
    "url": "trial/trial_96087325142d4ce8"
  },
  {
    "trial": "trial_98a41b8bc80646a8",
    "task": "beam-deflection-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 21,
    "url": "trial/trial_98a41b8bc80646a8"
  },
  {
    "trial": "trial_9ea070ba6efe465c",
    "task": "beam-deflection-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 18,
    "url": "trial/trial_9ea070ba6efe465c"
  },
  {
    "trial": "trial_adc72b3d4c0144de",
    "task": "beam-deflection-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_adc72b3d4c0144de"
  },
  {
    "trial": "trial_afcb4cb58a014b4f",
    "task": "beam-deflection-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 22,
    "url": "trial/trial_afcb4cb58a014b4f"
  },
  {
    "trial": "trial_ba18bddc85e743d5",
    "task": "beam-deflection-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 11,
    "url": "trial/trial_ba18bddc85e743d5"
  },
  {
    "trial": "trial_e56e79f512e3436e",
    "task": "beam-deflection-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 19,
    "url": "trial/trial_e56e79f512e3436e"
  },
  {
    "trial": "trial_ee8d9ec7dee54ca8",
    "task": "beam-deflection-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 27,
    "url": "trial/trial_ee8d9ec7dee54ca8"
  },
  {
    "trial": "trial_02dd89e1bb0d4486",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 11,
    "url": "trial/trial_02dd89e1bb0d4486"
  },
  {
    "trial": "trial_06be8428670d4663",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_06be8428670d4663"
  },
  {
    "trial": "trial_0c50cb8d87744503",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 14,
    "url": "trial/trial_0c50cb8d87744503"
  },
  {
    "trial": "trial_12e1485a286340ac",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 13,
    "url": "trial/trial_12e1485a286340ac"
  },
  {
    "trial": "trial_18ea7977342a47d2",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 17,
    "url": "trial/trial_18ea7977342a47d2"
  },
  {
    "trial": "trial_1d0f2915493641f8",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 11,
    "url": "trial/trial_1d0f2915493641f8"
  },
  {
    "trial": "trial_1ebd2a0a133d4d65",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_1ebd2a0a133d4d65"
  },
  {
    "trial": "trial_2538cb293b9c4f52",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 14,
    "url": "trial/trial_2538cb293b9c4f52"
  },
  {
    "trial": "trial_304d36d2749249c4",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 14,
    "url": "trial/trial_304d36d2749249c4"
  },
  {
    "trial": "trial_314f5e0b286043f1",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 14,
    "url": "trial/trial_314f5e0b286043f1"
  },
  {
    "trial": "trial_420d4e872864450b",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 15,
    "url": "trial/trial_420d4e872864450b"
  },
  {
    "trial": "trial_44d08d58fa1b4415",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 16,
    "url": "trial/trial_44d08d58fa1b4415"
  },
  {
    "trial": "trial_4ba59db14e6e49e8",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_4ba59db14e6e49e8"
  },
  {
    "trial": "trial_5047b0bf583e4ef0",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 14,
    "url": "trial/trial_5047b0bf583e4ef0"
  },
  {
    "trial": "trial_5066d9cd12754f87",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_5066d9cd12754f87"
  },
  {
    "trial": "trial_5c3de2872e274536",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 15,
    "url": "trial/trial_5c3de2872e274536"
  },
  {
    "trial": "trial_663aa72c3f774309",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 12,
    "url": "trial/trial_663aa72c3f774309"
  },
  {
    "trial": "trial_664597737f524381",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_664597737f524381"
  },
  {
    "trial": "trial_69b169ac9ab84d85",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 10,
    "url": "trial/trial_69b169ac9ab84d85"
  },
  {
    "trial": "trial_6dae4d8449ad4a94",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_6dae4d8449ad4a94"
  },
  {
    "trial": "trial_87beda05ddd943f9",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 15,
    "url": "trial/trial_87beda05ddd943f9"
  },
  {
    "trial": "trial_8b20ba13bb514627",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_8b20ba13bb514627"
  },
  {
    "trial": "trial_8d8be2c2ec03420f",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_8d8be2c2ec03420f"
  },
  {
    "trial": "trial_92c34045b5f445c8",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 10,
    "url": "trial/trial_92c34045b5f445c8"
  },
  {
    "trial": "trial_9d1d4c93db184b67",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 15,
    "url": "trial/trial_9d1d4c93db184b67"
  },
  {
    "trial": "trial_a227f824fedf4eb1",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_a227f824fedf4eb1"
  },
  {
    "trial": "trial_a613fd3babf34870",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_a613fd3babf34870"
  },
  {
    "trial": "trial_a9f3f6920e334a0f",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 12,
    "url": "trial/trial_a9f3f6920e334a0f"
  },
  {
    "trial": "trial_b2c2ab8978bb486e",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_b2c2ab8978bb486e"
  },
  {
    "trial": "trial_b6aca22469584023",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 12,
    "url": "trial/trial_b6aca22469584023"
  },
  {
    "trial": "trial_ba417c7509014a5e",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 15,
    "url": "trial/trial_ba417c7509014a5e"
  },
  {
    "trial": "trial_bc636f05b1474570",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 14,
    "url": "trial/trial_bc636f05b1474570"
  },
  {
    "trial": "trial_bd4ec8f4996b43af",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_bd4ec8f4996b43af"
  },
  {
    "trial": "trial_bf46deb92aca4817",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 17,
    "url": "trial/trial_bf46deb92aca4817"
  },
  {
    "trial": "trial_cde58fa944124f61",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_cde58fa944124f61"
  },
  {
    "trial": "trial_d1d267f7bdaa4ba9",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_d1d267f7bdaa4ba9"
  },
  {
    "trial": "trial_d39fb29128894eff",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 14,
    "url": "trial/trial_d39fb29128894eff"
  },
  {
    "trial": "trial_d652d1daeb4c4578",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 10,
    "url": "trial/trial_d652d1daeb4c4578"
  },
  {
    "trial": "trial_d7b8b0690ccd401f",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_d7b8b0690ccd401f"
  },
  {
    "trial": "trial_fc5690651ad746f4",
    "task": "cg-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 12,
    "url": "trial/trial_fc5690651ad746f4"
  },
  {
    "trial": "trial_3e83031d4a584b48",
    "task": "cholesky-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_3e83031d4a584b48"
  },
  {
    "trial": "trial_433c38a8db16412f",
    "task": "cholesky-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 19,
    "url": "trial/trial_433c38a8db16412f"
  },
  {
    "trial": "trial_4e8355b8b20143f1",
    "task": "cholesky-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 15,
    "url": "trial/trial_4e8355b8b20143f1"
  },
  {
    "trial": "trial_51a9862677f54e05",
    "task": "cholesky-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 15,
    "url": "trial/trial_51a9862677f54e05"
  },
  {
    "trial": "trial_54ab9adb2b524fba",
    "task": "cholesky-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 16,
    "url": "trial/trial_54ab9adb2b524fba"
  },
  {
    "trial": "trial_71f9a30c211c46e1",
    "task": "cholesky-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 14,
    "url": "trial/trial_71f9a30c211c46e1"
  },
  {
    "trial": "trial_991442f38ed24ae4",
    "task": "cholesky-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 18,
    "url": "trial/trial_991442f38ed24ae4"
  },
  {
    "trial": "trial_b8da074dc0754d82",
    "task": "cholesky-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_b8da074dc0754d82"
  },
  {
    "trial": "trial_d6f7c34a5af94d36",
    "task": "cholesky-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_d6f7c34a5af94d36"
  },
  {
    "trial": "trial_e59eca30a66d4cb1",
    "task": "cholesky-solver",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 20,
    "url": "trial/trial_e59eca30a66d4cb1"
  },
  {
    "trial": "trial_01d49ea7c0094195",
    "task": "coffee-ratings-outliers",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 160,
    "url": "trial/trial_01d49ea7c0094195"
  },
  {
    "trial": "trial_026eff7e92414f77",
    "task": "coffee-ratings-outliers",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 157,
    "url": "trial/trial_026eff7e92414f77"
  },
  {
    "trial": "trial_2d04af4db672466c",
    "task": "coffee-ratings-outliers",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 169,
    "url": "trial/trial_2d04af4db672466c"
  },
  {
    "trial": "trial_55e625eea0314773",
    "task": "coffee-ratings-outliers",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 134,
    "url": "trial/trial_55e625eea0314773"
  },
  {
    "trial": "trial_7ae4ae530c8d4fee",
    "task": "coffee-ratings-outliers",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 131,
    "url": "trial/trial_7ae4ae530c8d4fee"
  },
  {
    "trial": "trial_7e4c5961915240f8",
    "task": "coffee-ratings-outliers",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 144,
    "url": "trial/trial_7e4c5961915240f8"
  },
  {
    "trial": "trial_7ea1b018db424e35",
    "task": "coffee-ratings-outliers",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 158,
    "url": "trial/trial_7ea1b018db424e35"
  },
  {
    "trial": "trial_ac4ec0928af44bce",
    "task": "coffee-ratings-outliers",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 142,
    "url": "trial/trial_ac4ec0928af44bce"
  },
  {
    "trial": "trial_d07a97713e254c6b",
    "task": "coffee-ratings-outliers",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 130,
    "url": "trial/trial_d07a97713e254c6b"
  },
  {
    "trial": "trial_dd51424dc55843b7",
    "task": "coffee-ratings-outliers",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 147,
    "url": "trial/trial_dd51424dc55843b7"
  },
  {
    "trial": "trial_04f816c2a2d246fa",
    "task": "collision2d-impulse-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_04f816c2a2d246fa"
  },
  {
    "trial": "trial_16b04ef670b24f86",
    "task": "collision2d-impulse-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 10,
    "url": "trial/trial_16b04ef670b24f86"
  },
  {
    "trial": "trial_6dc3a11e4c2043e5",
    "task": "collision2d-impulse-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 41,
    "url": "trial/trial_6dc3a11e4c2043e5"
  },
  {
    "trial": "trial_7cd9f90f881f4c9a",
    "task": "collision2d-impulse-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_7cd9f90f881f4c9a"
  },
  {
    "trial": "trial_97ec6b52c51a41ad",
    "task": "collision2d-impulse-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 11,
    "url": "trial/trial_97ec6b52c51a41ad"
  },
  {
    "trial": "trial_a4896f26d3db4f95",
    "task": "collision2d-impulse-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_a4896f26d3db4f95"
  },
  {
    "trial": "trial_a56777ebcd2344e8",
    "task": "collision2d-impulse-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 11,
    "url": "trial/trial_a56777ebcd2344e8"
  },
  {
    "trial": "trial_b6324e2a4f624daf",
    "task": "collision2d-impulse-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 10,
    "url": "trial/trial_b6324e2a4f624daf"
  },
  {
    "trial": "trial_c800ecc6c39d43c0",
    "task": "collision2d-impulse-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_c800ecc6c39d43c0"
  },
  {
    "trial": "trial_fa1c389a462a44b1",
    "task": "collision2d-impulse-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_fa1c389a462a44b1"
  },
  {
    "trial": "trial_2bf30e06a4814143",
    "task": "cubic-spline",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 5,
    "url": "trial/trial_2bf30e06a4814143"
  },
  {
    "trial": "trial_387eb577c99a4274",
    "task": "cubic-spline",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 6,
    "url": "trial/trial_387eb577c99a4274"
  },
  {
    "trial": "trial_3b4d2ee2faee4f0b",
    "task": "cubic-spline",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 5,
    "url": "trial/trial_3b4d2ee2faee4f0b"
  },
  {
    "trial": "trial_4dd49575d9304492",
    "task": "cubic-spline",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 8,
    "url": "trial/trial_4dd49575d9304492"
  },
  {
    "trial": "trial_70bd49d47e504e45",
    "task": "cubic-spline",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 5,
    "url": "trial/trial_70bd49d47e504e45"
  },
  {
    "trial": "trial_8edcbd74261c46b2",
    "task": "cubic-spline",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 5,
    "url": "trial/trial_8edcbd74261c46b2"
  },
  {
    "trial": "trial_90011c12d9b2483c",
    "task": "cubic-spline",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 7,
    "url": "trial/trial_90011c12d9b2483c"
  },
  {
    "trial": "trial_943ae028c8a74912",
    "task": "cubic-spline",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 5,
    "url": "trial/trial_943ae028c8a74912"
  },
  {
    "trial": "trial_d77b73f25f2b443c",
    "task": "cubic-spline",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 5,
    "url": "trial/trial_d77b73f25f2b443c"
  },
  {
    "trial": "trial_dc732f1af4154283",
    "task": "cubic-spline",
    "category": "stem",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 8,
    "url": "trial/trial_dc732f1af4154283"
  },
  {
    "trial": "trial_3908dea9d2524166",
    "task": "ddb-outbox-eventbridge-fanout",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 32,
    "url": "trial/trial_3908dea9d2524166"
  },
  {
    "trial": "trial_46a188f35b1b4144",
    "task": "ddb-outbox-eventbridge-fanout",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_46a188f35b1b4144"
  },
  {
    "trial": "trial_67cb63173a1c4651",
    "task": "ddb-outbox-eventbridge-fanout",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 32,
    "url": "trial/trial_67cb63173a1c4651"
  },
  {
    "trial": "trial_7c4b7ca31b2c4dea",
    "task": "ddb-outbox-eventbridge-fanout",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 40,
    "url": "trial/trial_7c4b7ca31b2c4dea"
  },
  {
    "trial": "trial_964eb40653844d05",
    "task": "ddb-outbox-eventbridge-fanout",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 37,
    "url": "trial/trial_964eb40653844d05"
  },
  {
    "trial": "trial_b67d0d1651cf4106",
    "task": "ddb-outbox-eventbridge-fanout",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 11,
    "url": "trial/trial_b67d0d1651cf4106"
  },
  {
    "trial": "trial_ce7b35ada36b4c4a",
    "task": "ddb-outbox-eventbridge-fanout",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 25,
    "url": "trial/trial_ce7b35ada36b4c4a"
  },
  {
    "trial": "trial_d00fd2066aa0460f",
    "task": "ddb-outbox-eventbridge-fanout",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 40,
    "url": "trial/trial_d00fd2066aa0460f"
  },
  {
    "trial": "trial_e6313d63052441d4",
    "task": "ddb-outbox-eventbridge-fanout",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 31,
    "url": "trial/trial_e6313d63052441d4"
  },
  {
    "trial": "trial_f50e42f2e8904a1c",
    "task": "ddb-outbox-eventbridge-fanout",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 41,
    "url": "trial/trial_f50e42f2e8904a1c"
  },
  {
    "trial": "trial_0e19174e9819437e",
    "task": "diff-patch-engine",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 34,
    "url": "trial/trial_0e19174e9819437e"
  },
  {
    "trial": "trial_2dd2f071ed6c4ce7",
    "task": "diff-patch-engine",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 47,
    "url": "trial/trial_2dd2f071ed6c4ce7"
  },
  {
    "trial": "trial_3519080a9de5475c",
    "task": "diff-patch-engine",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 25,
    "url": "trial/trial_3519080a9de5475c"
  },
  {
    "trial": "trial_3e83826a1b214323",
    "task": "diff-patch-engine",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 19,
    "url": "trial/trial_3e83826a1b214323"
  },
  {
    "trial": "trial_5c61f9ac0ffd499d",
    "task": "diff-patch-engine",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 20,
    "url": "trial/trial_5c61f9ac0ffd499d"
  },
  {
    "trial": "trial_616fbc80cf92449f",
    "task": "diff-patch-engine",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 26,
    "url": "trial/trial_616fbc80cf92449f"
  },
  {
    "trial": "trial_6fdfc09a5c0f4cf5",
    "task": "diff-patch-engine",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 49,
    "url": "trial/trial_6fdfc09a5c0f4cf5"
  },
  {
    "trial": "trial_8f6252116a0540d3",
    "task": "diff-patch-engine",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 21,
    "url": "trial/trial_8f6252116a0540d3"
  },
  {
    "trial": "trial_959e8b1037a94878",
    "task": "diff-patch-engine",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 31,
    "url": "trial/trial_959e8b1037a94878"
  },
  {
    "trial": "trial_d78235f03a204844",
    "task": "diff-patch-engine",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 45,
    "url": "trial/trial_d78235f03a204844"
  },
  {
    "trial": "trial_01e10f027e5f423b",
    "task": "ecr-image-scan-lifecycle-immutable-tags-replication",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 68,
    "url": "trial/trial_01e10f027e5f423b"
  },
  {
    "trial": "trial_0b2bf8b6841b4824",
    "task": "ecr-image-scan-lifecycle-immutable-tags-replication",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 73,
    "url": "trial/trial_0b2bf8b6841b4824"
  },
  {
    "trial": "trial_4095b4b4cc924789",
    "task": "ecr-image-scan-lifecycle-immutable-tags-replication",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 66,
    "url": "trial/trial_4095b4b4cc924789"
  },
  {
    "trial": "trial_5666742b6f8f4c9b",
    "task": "ecr-image-scan-lifecycle-immutable-tags-replication",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 49,
    "url": "trial/trial_5666742b6f8f4c9b"
  },
  {
    "trial": "trial_878d08131336476d",
    "task": "ecr-image-scan-lifecycle-immutable-tags-replication",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 92,
    "url": "trial/trial_878d08131336476d"
  },
  {
    "trial": "trial_89f0bfb282dd4531",
    "task": "ecr-image-scan-lifecycle-immutable-tags-replication",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 41,
    "url": "trial/trial_89f0bfb282dd4531"
  },
  {
    "trial": "trial_97ba21807b864991",
    "task": "ecr-image-scan-lifecycle-immutable-tags-replication",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 66,
    "url": "trial/trial_97ba21807b864991"
  },
  {
    "trial": "trial_a5b8831ff09c40e4",
    "task": "ecr-image-scan-lifecycle-immutable-tags-replication",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 50,
    "url": "trial/trial_a5b8831ff09c40e4"
  },
  {
    "trial": "trial_a9f01163b1334063",
    "task": "ecr-image-scan-lifecycle-immutable-tags-replication",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 52,
    "url": "trial/trial_a9f01163b1334063"
  },
  {
    "trial": "trial_05b371f108bd43fe",
    "task": "efs-access-point-posix-iam-mount-target",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 109,
    "url": "trial/trial_05b371f108bd43fe"
  },
  {
    "trial": "trial_2a30fe3fc31b4c2c",
    "task": "efs-access-point-posix-iam-mount-target",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 50,
    "url": "trial/trial_2a30fe3fc31b4c2c"
  },
  {
    "trial": "trial_509f52d8f98f45a4",
    "task": "efs-access-point-posix-iam-mount-target",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 80,
    "url": "trial/trial_509f52d8f98f45a4"
  },
  {
    "trial": "trial_652c4bec75d84543",
    "task": "efs-access-point-posix-iam-mount-target",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 86,
    "url": "trial/trial_652c4bec75d84543"
  },
  {
    "trial": "trial_6648f885162041e3",
    "task": "efs-access-point-posix-iam-mount-target",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 53,
    "url": "trial/trial_6648f885162041e3"
  },
  {
    "trial": "trial_a8218ea5cff84ac2",
    "task": "efs-access-point-posix-iam-mount-target",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 114,
    "url": "trial/trial_a8218ea5cff84ac2"
  },
  {
    "trial": "trial_be6007673c55456a",
    "task": "efs-access-point-posix-iam-mount-target",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 116,
    "url": "trial/trial_be6007673c55456a"
  },
  {
    "trial": "trial_c34d850f15384054",
    "task": "efs-access-point-posix-iam-mount-target",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 74,
    "url": "trial/trial_c34d850f15384054"
  },
  {
    "trial": "trial_cdc5113e314b418a",
    "task": "efs-access-point-posix-iam-mount-target",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 45,
    "url": "trial/trial_cdc5113e314b418a"
  },
  {
    "trial": "trial_d217b4e8371a42ef",
    "task": "efs-access-point-posix-iam-mount-target",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 45,
    "url": "trial/trial_d217b4e8371a42ef"
  },
  {
    "trial": "trial_72b50e8f00874633",
    "task": "evanw-esbuild-4417",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 46,
    "url": "trial/trial_72b50e8f00874633"
  },
  {
    "trial": "trial_8f956e5fb69542ff",
    "task": "evanw-esbuild-4417",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 59,
    "url": "trial/trial_8f956e5fb69542ff"
  },
  {
    "trial": "trial_966964209fd748cd",
    "task": "evanw-esbuild-4417",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 59,
    "url": "trial/trial_966964209fd748cd"
  },
  {
    "trial": "trial_ab11db9bd6d34506",
    "task": "evanw-esbuild-4417",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 42,
    "url": "trial/trial_ab11db9bd6d34506"
  },
  {
    "trial": "trial_eadd5466083b44cb",
    "task": "evanw-esbuild-4417",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 45,
    "url": "trial/trial_eadd5466083b44cb"
  },
  {
    "trial": "trial_0117515a305843ac",
    "task": "fedavg-federated-noniid-mnist",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 99,
    "url": "trial/trial_0117515a305843ac"
  },
  {
    "trial": "trial_6fe147f2c5b64c3d",
    "task": "fedavg-federated-noniid-mnist",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 118,
    "url": "trial/trial_6fe147f2c5b64c3d"
  },
  {
    "trial": "trial_830367e508684f9e",
    "task": "fedavg-federated-noniid-mnist",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 97,
    "url": "trial/trial_830367e508684f9e"
  },
  {
    "trial": "trial_88199da9333046a5",
    "task": "fedavg-federated-noniid-mnist",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 108,
    "url": "trial/trial_88199da9333046a5"
  },
  {
    "trial": "trial_a59bab4f2ff24976",
    "task": "fedavg-federated-noniid-mnist",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 65,
    "url": "trial/trial_a59bab4f2ff24976"
  },
  {
    "trial": "trial_b63f17295aeb4bbd",
    "task": "fedavg-federated-noniid-mnist",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 83,
    "url": "trial/trial_b63f17295aeb4bbd"
  },
  {
    "trial": "trial_c21bd416fadd4470",
    "task": "fedavg-federated-noniid-mnist",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 164,
    "url": "trial/trial_c21bd416fadd4470"
  },
  {
    "trial": "trial_d31ff3de9c984732",
    "task": "fedavg-federated-noniid-mnist",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 66,
    "url": "trial/trial_d31ff3de9c984732"
  },
  {
    "trial": "trial_f2b4372ed3004c41",
    "task": "fedavg-federated-noniid-mnist",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 39,
    "url": "trial/trial_f2b4372ed3004c41"
  },
  {
    "trial": "trial_fed282c6cc86469e",
    "task": "fedavg-federated-noniid-mnist",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 124,
    "url": "trial/trial_fed282c6cc86469e"
  },
  {
    "trial": "trial_130c663ba1e2402f",
    "task": "fix-broken-appsync-graphql-cognito-resolver-cache-leak",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 137,
    "url": "trial/trial_130c663ba1e2402f"
  },
  {
    "trial": "trial_42f02b93ffd14b44",
    "task": "fix-broken-appsync-graphql-cognito-resolver-cache-leak",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 118,
    "url": "trial/trial_42f02b93ffd14b44"
  },
  {
    "trial": "trial_510289fea63a4056",
    "task": "fix-broken-appsync-graphql-cognito-resolver-cache-leak",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 88,
    "url": "trial/trial_510289fea63a4056"
  },
  {
    "trial": "trial_51c1529faddd469d",
    "task": "fix-broken-appsync-graphql-cognito-resolver-cache-leak",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 92,
    "url": "trial/trial_51c1529faddd469d"
  },
  {
    "trial": "trial_5dc5bdcfdac64a31",
    "task": "fix-broken-appsync-graphql-cognito-resolver-cache-leak",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 135,
    "url": "trial/trial_5dc5bdcfdac64a31"
  },
  {
    "trial": "trial_7a7681d378a54549",
    "task": "fix-broken-appsync-graphql-cognito-resolver-cache-leak",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 157,
    "url": "trial/trial_7a7681d378a54549"
  },
  {
    "trial": "trial_7e9ce2304c90477d",
    "task": "fix-broken-appsync-graphql-cognito-resolver-cache-leak",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 152,
    "url": "trial/trial_7e9ce2304c90477d"
  },
  {
    "trial": "trial_a59599141cfb40de",
    "task": "fix-broken-appsync-graphql-cognito-resolver-cache-leak",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 113,
    "url": "trial/trial_a59599141cfb40de"
  },
  {
    "trial": "trial_bc8bb27ed7f24f53",
    "task": "fix-broken-appsync-graphql-cognito-resolver-cache-leak",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 61,
    "url": "trial/trial_bc8bb27ed7f24f53"
  },
  {
    "trial": "trial_ff3222a335044cd2",
    "task": "fix-broken-appsync-graphql-cognito-resolver-cache-leak",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 166,
    "url": "trial/trial_ff3222a335044cd2"
  },
  {
    "trial": "trial_1cb6b8074bff41ee",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 8,
    "url": "trial/trial_1cb6b8074bff41ee"
  },
  {
    "trial": "trial_1fcbbc9023394bde",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_1fcbbc9023394bde"
  },
  {
    "trial": "trial_27cf107f7056485c",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 7,
    "url": "trial/trial_27cf107f7056485c"
  },
  {
    "trial": "trial_34c84c977f6649ca",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_34c84c977f6649ca"
  },
  {
    "trial": "trial_42a493c23aa641f9",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 8,
    "url": "trial/trial_42a493c23aa641f9"
  },
  {
    "trial": "trial_438644008d474ab4",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 7,
    "url": "trial/trial_438644008d474ab4"
  },
  {
    "trial": "trial_5b759c32676f4e0b",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 10,
    "url": "trial/trial_5b759c32676f4e0b"
  },
  {
    "trial": "trial_73c61f9a3409418a",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_73c61f9a3409418a"
  },
  {
    "trial": "trial_a1c1b98d1ef34b65",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_a1c1b98d1ef34b65"
  },
  {
    "trial": "trial_e638457c5a0d400b",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 10,
    "url": "trial/trial_e638457c5a0d400b"
  },
  {
    "trial": "trial_2a7c35308b764080",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 30,
    "url": "trial/trial_2a7c35308b764080"
  },
  {
    "trial": "trial_2b18d3928daf40a4",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 28,
    "url": "trial/trial_2b18d3928daf40a4"
  },
  {
    "trial": "trial_3876d629742847f9",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 22,
    "url": "trial/trial_3876d629742847f9"
  },
  {
    "trial": "trial_43aa724b5fd14b89",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 32,
    "url": "trial/trial_43aa724b5fd14b89"
  },
  {
    "trial": "trial_5389ca907ccd4c2e",
    "task": "game-of-life-step",
    "category": "game",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 18,
    "url": "trial/trial_5389ca907ccd4c2e"
  },
  {
    "trial": "trial_16ea3a3ecbfd4447",
    "task": "glue-etl-catalog-security-configuration-kms",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 24,
    "url": "trial/trial_16ea3a3ecbfd4447"
  },
  {
    "trial": "trial_1f991b75e49a42af",
    "task": "glue-etl-catalog-security-configuration-kms",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 76,
    "url": "trial/trial_1f991b75e49a42af"
  },
  {
    "trial": "trial_2a7aa2650c4d4185",
    "task": "glue-etl-catalog-security-configuration-kms",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 78,
    "url": "trial/trial_2a7aa2650c4d4185"
  },
  {
    "trial": "trial_83a3d3949502428e",
    "task": "glue-etl-catalog-security-configuration-kms",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 86,
    "url": "trial/trial_83a3d3949502428e"
  },
  {
    "trial": "trial_a0dd662c29c047c5",
    "task": "glue-etl-catalog-security-configuration-kms",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 31,
    "url": "trial/trial_a0dd662c29c047c5"
  },
  {
    "trial": "trial_b4c507be482b46ce",
    "task": "glue-etl-catalog-security-configuration-kms",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 93,
    "url": "trial/trial_b4c507be482b46ce"
  },
  {
    "trial": "trial_c5b90c6b86e04248",
    "task": "glue-etl-catalog-security-configuration-kms",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 36,
    "url": "trial/trial_c5b90c6b86e04248"
  },
  {
    "trial": "trial_e6701a584aea40a3",
    "task": "glue-etl-catalog-security-configuration-kms",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 79,
    "url": "trial/trial_e6701a584aea40a3"
  },
  {
    "trial": "trial_f09fbf8d73e547d2",
    "task": "glue-etl-catalog-security-configuration-kms",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 100,
    "url": "trial/trial_f09fbf8d73e547d2"
  },
  {
    "trial": "trial_1618a78c267f49b8",
    "task": "heat1d-conduction-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 14,
    "url": "trial/trial_1618a78c267f49b8"
  },
  {
    "trial": "trial_3b721629f99c471c",
    "task": "heat1d-conduction-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 8,
    "url": "trial/trial_3b721629f99c471c"
  },
  {
    "trial": "trial_3c2f0d4fee124a6e",
    "task": "heat1d-conduction-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 8,
    "url": "trial/trial_3c2f0d4fee124a6e"
  },
  {
    "trial": "trial_54d7f0713cfb4e3b",
    "task": "heat1d-conduction-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 8,
    "url": "trial/trial_54d7f0713cfb4e3b"
  },
  {
    "trial": "trial_769e8c06ac524559",
    "task": "heat1d-conduction-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 26,
    "url": "trial/trial_769e8c06ac524559"
  },
  {
    "trial": "trial_8abe0bdd8780465d",
    "task": "heat1d-conduction-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 17,
    "url": "trial/trial_8abe0bdd8780465d"
  },
  {
    "trial": "trial_9c26d8fe31d74162",
    "task": "heat1d-conduction-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 8,
    "url": "trial/trial_9c26d8fe31d74162"
  },
  {
    "trial": "trial_af02afd83f304179",
    "task": "heat1d-conduction-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 8,
    "url": "trial/trial_af02afd83f304179"
  },
  {
    "trial": "trial_d67b5f20a1de47d4",
    "task": "heat1d-conduction-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 8,
    "url": "trial/trial_d67b5f20a1de47d4"
  },
  {
    "trial": "trial_e3057b59d9044e0e",
    "task": "heat1d-conduction-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 8,
    "url": "trial/trial_e3057b59d9044e0e"
  },
  {
    "trial": "trial_2b5051dab20b4ffa",
    "task": "iam-cross-account-externalid-sourcearn",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 12,
    "url": "trial/trial_2b5051dab20b4ffa"
  },
  {
    "trial": "trial_39a0305c08844397",
    "task": "iam-cross-account-externalid-sourcearn",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 8,
    "url": "trial/trial_39a0305c08844397"
  },
  {
    "trial": "trial_4e2806ec8d6f40a2",
    "task": "iam-cross-account-externalid-sourcearn",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 9,
    "url": "trial/trial_4e2806ec8d6f40a2"
  },
  {
    "trial": "trial_50aae8d21ad14722",
    "task": "iam-cross-account-externalid-sourcearn",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 6,
    "url": "trial/trial_50aae8d21ad14722"
  },
  {
    "trial": "trial_78713a5c309a41c8",
    "task": "iam-cross-account-externalid-sourcearn",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 11,
    "url": "trial/trial_78713a5c309a41c8"
  },
  {
    "trial": "trial_b1d17cd6319044c2",
    "task": "iam-cross-account-externalid-sourcearn",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_b1d17cd6319044c2"
  },
  {
    "trial": "trial_bbb4bbf9a94d4998",
    "task": "iam-cross-account-externalid-sourcearn",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 36,
    "url": "trial/trial_bbb4bbf9a94d4998"
  },
  {
    "trial": "trial_d1e39deba97e4071",
    "task": "iam-cross-account-externalid-sourcearn",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 11,
    "url": "trial/trial_d1e39deba97e4071"
  },
  {
    "trial": "trial_d49025d2df024aae",
    "task": "iam-cross-account-externalid-sourcearn",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_d49025d2df024aae"
  },
  {
    "trial": "trial_ef36c35287de412a",
    "task": "iam-cross-account-externalid-sourcearn",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 18,
    "url": "trial/trial_ef36c35287de412a"
  },
  {
    "trial": "trial_0a0eb65800ba4385",
    "task": "iam-permissions-boundary-ceiling",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 75,
    "url": "trial/trial_0a0eb65800ba4385"
  },
  {
    "trial": "trial_1a82347aed864b91",
    "task": "iam-permissions-boundary-ceiling",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 75,
    "url": "trial/trial_1a82347aed864b91"
  },
  {
    "trial": "trial_3c6d73f2bb2245f2",
    "task": "iam-permissions-boundary-ceiling",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 86,
    "url": "trial/trial_3c6d73f2bb2245f2"
  },
  {
    "trial": "trial_3d56f3d947cd4ccb",
    "task": "iam-permissions-boundary-ceiling",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 42,
    "url": "trial/trial_3d56f3d947cd4ccb"
  },
  {
    "trial": "trial_5c5f718dec0a4854",
    "task": "iam-permissions-boundary-ceiling",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 55,
    "url": "trial/trial_5c5f718dec0a4854"
  },
  {
    "trial": "trial_78f9a6c81c764b54",
    "task": "iam-permissions-boundary-ceiling",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 2,
    "url": "trial/trial_78f9a6c81c764b54"
  },
  {
    "trial": "trial_7b8c84a28def451e",
    "task": "iam-permissions-boundary-ceiling",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 69,
    "url": "trial/trial_7b8c84a28def451e"
  },
  {
    "trial": "trial_9cb8b8349e41484e",
    "task": "iam-permissions-boundary-ceiling",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 63,
    "url": "trial/trial_9cb8b8349e41484e"
  },
  {
    "trial": "trial_aabb06598bd14a96",
    "task": "iam-permissions-boundary-ceiling",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 32,
    "url": "trial/trial_aabb06598bd14a96"
  },
  {
    "trial": "trial_ac2e4bca60f44f1b",
    "task": "iam-permissions-boundary-ceiling",
    "category": "cyber_security",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 44,
    "url": "trial/trial_ac2e4bca60f44f1b"
  },
  {
    "trial": "trial_164a69a0524a4758",
    "task": "iam-revoke-older-sessions",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 55,
    "url": "trial/trial_164a69a0524a4758"
  },
  {
    "trial": "trial_16aadd0f14654516",
    "task": "iam-revoke-older-sessions",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 57,
    "url": "trial/trial_16aadd0f14654516"
  },
  {
    "trial": "trial_3e54f99a2050461a",
    "task": "iam-revoke-older-sessions",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 48,
    "url": "trial/trial_3e54f99a2050461a"
  },
  {
    "trial": "trial_6919bc77857a4ce7",
    "task": "iam-revoke-older-sessions",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 34,
    "url": "trial/trial_6919bc77857a4ce7"
  },
  {
    "trial": "trial_8a443ff38a3a4bc6",
    "task": "iam-revoke-older-sessions",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 41,
    "url": "trial/trial_8a443ff38a3a4bc6"
  },
  {
    "trial": "trial_a27eec79242249e1",
    "task": "iam-revoke-older-sessions",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 50,
    "url": "trial/trial_a27eec79242249e1"
  },
  {
    "trial": "trial_c790b58c0e0b45b8",
    "task": "iam-revoke-older-sessions",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 61,
    "url": "trial/trial_c790b58c0e0b45b8"
  },
  {
    "trial": "trial_e51f42d2876d4fdf",
    "task": "iam-revoke-older-sessions",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 46,
    "url": "trial/trial_e51f42d2876d4fdf"
  },
  {
    "trial": "trial_ee731dfc92dc4043",
    "task": "iam-revoke-older-sessions",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 34,
    "url": "trial/trial_ee731dfc92dc4043"
  },
  {
    "trial": "trial_f5d35bedb38e43d1",
    "task": "iam-revoke-older-sessions",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 43,
    "url": "trial/trial_f5d35bedb38e43d1"
  },
  {
    "trial": "trial_35e80ef91c824e9e",
    "task": "iam-session-tag-tenant-scope",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 83,
    "url": "trial/trial_35e80ef91c824e9e"
  },
  {
    "trial": "trial_38755eb07d0042f7",
    "task": "iam-session-tag-tenant-scope",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 59,
    "url": "trial/trial_38755eb07d0042f7"
  },
  {
    "trial": "trial_51e172b4db7447ed",
    "task": "iam-session-tag-tenant-scope",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 31,
    "url": "trial/trial_51e172b4db7447ed"
  },
  {
    "trial": "trial_6b44ab68a1c34bc9",
    "task": "iam-session-tag-tenant-scope",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 51,
    "url": "trial/trial_6b44ab68a1c34bc9"
  },
  {
    "trial": "trial_79d18c218b3f40ec",
    "task": "iam-session-tag-tenant-scope",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 55,
    "url": "trial/trial_79d18c218b3f40ec"
  },
  {
    "trial": "trial_a7aeb780360e4bc4",
    "task": "iam-session-tag-tenant-scope",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 61,
    "url": "trial/trial_a7aeb780360e4bc4"
  },
  {
    "trial": "trial_b299f024bf614481",
    "task": "iam-session-tag-tenant-scope",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 41,
    "url": "trial/trial_b299f024bf614481"
  },
  {
    "trial": "trial_b903a403b9cb49a3",
    "task": "iam-session-tag-tenant-scope",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 66,
    "url": "trial/trial_b903a403b9cb49a3"
  },
  {
    "trial": "trial_d28387c39e3c42ab",
    "task": "iam-session-tag-tenant-scope",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 36,
    "url": "trial/trial_d28387c39e3c42ab"
  },
  {
    "trial": "trial_ed5aa0d0f92b46ed",
    "task": "iam-session-tag-tenant-scope",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 43,
    "url": "trial/trial_ed5aa0d0f92b46ed"
  },
  {
    "trial": "trial_019627f8ac4e40df",
    "task": "idempotency-middleware",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 23,
    "url": "trial/trial_019627f8ac4e40df"
  },
  {
    "trial": "trial_1066179d327744fe",
    "task": "idempotency-middleware",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 29,
    "url": "trial/trial_1066179d327744fe"
  },
  {
    "trial": "trial_3e66edb141bc4d12",
    "task": "idempotency-middleware",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 19,
    "url": "trial/trial_3e66edb141bc4d12"
  },
  {
    "trial": "trial_79155e0b57c54afc",
    "task": "idempotency-middleware",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 27,
    "url": "trial/trial_79155e0b57c54afc"
  },
  {
    "trial": "trial_7a3f3af0a99a4f64",
    "task": "idempotency-middleware",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 23,
    "url": "trial/trial_7a3f3af0a99a4f64"
  },
  {
    "trial": "trial_8ba7b38b8e694a37",
    "task": "idempotency-middleware",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 19,
    "url": "trial/trial_8ba7b38b8e694a37"
  },
  {
    "trial": "trial_a1f6e47b615f4f33",
    "task": "idempotency-middleware",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 21,
    "url": "trial/trial_a1f6e47b615f4f33"
  },
  {
    "trial": "trial_a5e3a5c69b6e4599",
    "task": "idempotency-middleware",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 22,
    "url": "trial/trial_a5e3a5c69b6e4599"
  },
  {
    "trial": "trial_b3853b218f2b4be8",
    "task": "idempotency-middleware",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 29,
    "url": "trial/trial_b3853b218f2b4be8"
  },
  {
    "trial": "trial_e39189e515104a1f",
    "task": "idempotency-middleware",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 19,
    "url": "trial/trial_e39189e515104a1f"
  },
  {
    "trial": "trial_01c6d244e72a4c58",
    "task": "ipl-toss-impact-analysis-r",
    "category": "product_ds",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 88,
    "url": "trial/trial_01c6d244e72a4c58"
  },
  {
    "trial": "trial_09982b3fd14a4928",
    "task": "ipl-toss-impact-analysis-r",
    "category": "product_ds",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 91,
    "url": "trial/trial_09982b3fd14a4928"
  },
  {
    "trial": "trial_10daeff03e8d4531",
    "task": "ipl-toss-impact-analysis-r",
    "category": "product_ds",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 90,
    "url": "trial/trial_10daeff03e8d4531"
  },
  {
    "trial": "trial_38e6ba03de254ae6",
    "task": "ipl-toss-impact-analysis-r",
    "category": "product_ds",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 75,
    "url": "trial/trial_38e6ba03de254ae6"
  },
  {
    "trial": "trial_4d8581de43ee43c1",
    "task": "ipl-toss-impact-analysis-r",
    "category": "product_ds",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 86,
    "url": "trial/trial_4d8581de43ee43c1"
  },
  {
    "trial": "trial_70f6b8a6d5524684",
    "task": "ipl-toss-impact-analysis-r",
    "category": "product_ds",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 89,
    "url": "trial/trial_70f6b8a6d5524684"
  },
  {
    "trial": "trial_774260eaa514468b",
    "task": "ipl-toss-impact-analysis-r",
    "category": "product_ds",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 85,
    "url": "trial/trial_774260eaa514468b"
  },
  {
    "trial": "trial_99793a71c4724845",
    "task": "ipl-toss-impact-analysis-r",
    "category": "product_ds",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 75,
    "url": "trial/trial_99793a71c4724845"
  },
  {
    "trial": "trial_a53e6054e1c34aed",
    "task": "ipl-toss-impact-analysis-r",
    "category": "product_ds",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 97,
    "url": "trial/trial_a53e6054e1c34aed"
  },
  {
    "trial": "trial_dc08eb4993a74c8e",
    "task": "ipl-toss-impact-analysis-r",
    "category": "product_ds",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 87,
    "url": "trial/trial_dc08eb4993a74c8e"
  },
  {
    "trial": "trial_0ba3b0dde444496f",
    "task": "klauspost-compress-1115",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 59,
    "url": "trial/trial_0ba3b0dde444496f"
  },
  {
    "trial": "trial_37693344e204459a",
    "task": "klauspost-compress-1115",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 89,
    "url": "trial/trial_37693344e204459a"
  },
  {
    "trial": "trial_80a395a110eb4f34",
    "task": "klauspost-compress-1115",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 72,
    "url": "trial/trial_80a395a110eb4f34"
  },
  {
    "trial": "trial_8685b84362cc413d",
    "task": "klauspost-compress-1115",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 57,
    "url": "trial/trial_8685b84362cc413d"
  },
  {
    "trial": "trial_c415a367fef24542",
    "task": "klauspost-compress-1115",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 64,
    "url": "trial/trial_c415a367fef24542"
  },
  {
    "trial": "trial_24e57717b785441a",
    "task": "ks-equation-1d-forecast",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 168,
    "url": "trial/trial_24e57717b785441a"
  },
  {
    "trial": "trial_3d3357504ab54501",
    "task": "ks-equation-1d-forecast",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 164,
    "url": "trial/trial_3d3357504ab54501"
  },
  {
    "trial": "trial_55105d893f274ab3",
    "task": "ks-equation-1d-forecast",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 187,
    "url": "trial/trial_55105d893f274ab3"
  },
  {
    "trial": "trial_6980c5176d5c4225",
    "task": "ks-equation-1d-forecast",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 192,
    "url": "trial/trial_6980c5176d5c4225"
  },
  {
    "trial": "trial_786c2cb8cf224821",
    "task": "ks-equation-1d-forecast",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 129,
    "url": "trial/trial_786c2cb8cf224821"
  },
  {
    "trial": "trial_c156ed9c319c4a53",
    "task": "ks-equation-1d-forecast",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 171,
    "url": "trial/trial_c156ed9c319c4a53"
  },
  {
    "trial": "trial_d6965c9497ef4da5",
    "task": "ks-equation-1d-forecast",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 182,
    "url": "trial/trial_d6965c9497ef4da5"
  },
  {
    "trial": "trial_d7309438d7954f0a",
    "task": "ks-equation-1d-forecast",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 143,
    "url": "trial/trial_d7309438d7954f0a"
  },
  {
    "trial": "trial_e48d2a030eb94fb6",
    "task": "ks-equation-1d-forecast",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 172,
    "url": "trial/trial_e48d2a030eb94fb6"
  },
  {
    "trial": "trial_f28943e086b54473",
    "task": "ks-equation-1d-forecast",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 201,
    "url": "trial/trial_f28943e086b54473"
  },
  {
    "trial": "trial_14d1edcf1ec24dc4",
    "task": "lending-club-lgd-bias-correction-r",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 50,
    "url": "trial/trial_14d1edcf1ec24dc4"
  },
  {
    "trial": "trial_1bfe20cbd0c342ba",
    "task": "lending-club-lgd-bias-correction-r",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 30,
    "url": "trial/trial_1bfe20cbd0c342ba"
  },
  {
    "trial": "trial_254d16bb02724005",
    "task": "lending-club-lgd-bias-correction-r",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 40,
    "url": "trial/trial_254d16bb02724005"
  },
  {
    "trial": "trial_3ea1300a3b8e4550",
    "task": "lending-club-lgd-bias-correction-r",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 40,
    "url": "trial/trial_3ea1300a3b8e4550"
  },
  {
    "trial": "trial_70d657cae4d742bd",
    "task": "lending-club-lgd-bias-correction-r",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 47,
    "url": "trial/trial_70d657cae4d742bd"
  },
  {
    "trial": "trial_8b1a7bce47684dd1",
    "task": "lending-club-lgd-bias-correction-r",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 32,
    "url": "trial/trial_8b1a7bce47684dd1"
  },
  {
    "trial": "trial_922f356c7ba94a9a",
    "task": "lending-club-lgd-bias-correction-r",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 51,
    "url": "trial/trial_922f356c7ba94a9a"
  },
  {
    "trial": "trial_aaa014a5dbdb4743",
    "task": "lending-club-lgd-bias-correction-r",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 43,
    "url": "trial/trial_aaa014a5dbdb4743"
  },
  {
    "trial": "trial_e1e0681952ee4ef0",
    "task": "lending-club-lgd-bias-correction-r",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 47,
    "url": "trial/trial_e1e0681952ee4ef0"
  },
  {
    "trial": "trial_f4104eaede6343a7",
    "task": "lending-club-lgd-bias-correction-r",
    "category": "data-science-reversal",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 37,
    "url": "trial/trial_f4104eaede6343a7"
  },
  {
    "trial": "trial_5e7c95f12ea84f59",
    "task": "lru-cache",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_5e7c95f12ea84f59"
  },
  {
    "trial": "trial_774912a6376643d0",
    "task": "lru-cache",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 13,
    "url": "trial/trial_774912a6376643d0"
  },
  {
    "trial": "trial_791f16943c944bd4",
    "task": "lru-cache",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 14,
    "url": "trial/trial_791f16943c944bd4"
  },
  {
    "trial": "trial_79a7df0f62394c3d",
    "task": "lru-cache",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 15,
    "url": "trial/trial_79a7df0f62394c3d"
  },
  {
    "trial": "trial_84a9b332f3184fe1",
    "task": "lru-cache",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 8,
    "url": "trial/trial_84a9b332f3184fe1"
  },
  {
    "trial": "trial_9348e5dc49924554",
    "task": "lru-cache",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 7,
    "url": "trial/trial_9348e5dc49924554"
  },
  {
    "trial": "trial_9a3fa7d9698a44de",
    "task": "lru-cache",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_9a3fa7d9698a44de"
  },
  {
    "trial": "trial_9b571c3adfce4078",
    "task": "lru-cache",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_9b571c3adfce4078"
  },
  {
    "trial": "trial_c5c339442e00412d",
    "task": "lru-cache",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 13,
    "url": "trial/trial_c5c339442e00412d"
  },
  {
    "trial": "trial_e9ae5ade99934c02",
    "task": "lru-cache",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-haiku-4-5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 12,
    "url": "trial/trial_e9ae5ade99934c02"
  },
  {
    "trial": "trial_44c1f34582fc4130",
    "task": "lru-cache",
    "category": "swe",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 32,
    "url": "trial/trial_44c1f34582fc4130"
  },
  {
    "trial": "trial_baaa85e2e7074eae",
    "task": "lru-cache",
    "category": "swe",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 30,
    "url": "trial/trial_baaa85e2e7074eae"
  },
  {
    "trial": "trial_db43abb9785a4f73",
    "task": "lru-cache",
    "category": "swe",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 20,
    "url": "trial/trial_db43abb9785a4f73"
  },
  {
    "trial": "trial_deca1302fe314dd3",
    "task": "lru-cache",
    "category": "swe",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 30,
    "url": "trial/trial_deca1302fe314dd3"
  },
  {
    "trial": "trial_fb41ae2fe1d34682",
    "task": "lru-cache",
    "category": "swe",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 44,
    "url": "trial/trial_fb41ae2fe1d34682"
  },
  {
    "trial": "trial_06e311a48b2a4944",
    "task": "neonatal-drug-exposure-nlme",
    "category": "pharmacometrics",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 127,
    "url": "trial/trial_06e311a48b2a4944"
  },
  {
    "trial": "trial_06f05656795f45ea",
    "task": "neonatal-drug-exposure-nlme",
    "category": "pharmacometrics",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 175,
    "url": "trial/trial_06f05656795f45ea"
  },
  {
    "trial": "trial_08a560c125e943b5",
    "task": "neonatal-drug-exposure-nlme",
    "category": "pharmacometrics",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 161,
    "url": "trial/trial_08a560c125e943b5"
  },
  {
    "trial": "trial_16b8cd529a90421a",
    "task": "neonatal-drug-exposure-nlme",
    "category": "pharmacometrics",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 169,
    "url": "trial/trial_16b8cd529a90421a"
  },
  {
    "trial": "trial_276ae93264ac4a3b",
    "task": "neonatal-drug-exposure-nlme",
    "category": "pharmacometrics",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 102,
    "url": "trial/trial_276ae93264ac4a3b"
  },
  {
    "trial": "trial_6b9c6319ff2a4a95",
    "task": "neonatal-drug-exposure-nlme",
    "category": "pharmacometrics",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 187,
    "url": "trial/trial_6b9c6319ff2a4a95"
  },
  {
    "trial": "trial_78a63048bb2547de",
    "task": "neonatal-drug-exposure-nlme",
    "category": "pharmacometrics",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 182,
    "url": "trial/trial_78a63048bb2547de"
  },
  {
    "trial": "trial_81884d960a3b4401",
    "task": "neonatal-drug-exposure-nlme",
    "category": "pharmacometrics",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 77,
    "url": "trial/trial_81884d960a3b4401"
  },
  {
    "trial": "trial_c24266ee3cdc4006",
    "task": "neonatal-drug-exposure-nlme",
    "category": "pharmacometrics",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 176,
    "url": "trial/trial_c24266ee3cdc4006"
  },
  {
    "trial": "trial_ce13ce75999341f4",
    "task": "neonatal-drug-exposure-nlme",
    "category": "pharmacometrics",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 108,
    "url": "trial/trial_ce13ce75999341f4"
  },
  {
    "trial": "trial_0eef2c7905554413",
    "task": "occ-conditional-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 10,
    "url": "trial/trial_0eef2c7905554413"
  },
  {
    "trial": "trial_4bb39b8999bf4b2c",
    "task": "occ-conditional-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_4bb39b8999bf4b2c"
  },
  {
    "trial": "trial_4bbdb56844fc43f2",
    "task": "occ-conditional-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 10,
    "url": "trial/trial_4bbdb56844fc43f2"
  },
  {
    "trial": "trial_642cc83f659f41fa",
    "task": "occ-conditional-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_642cc83f659f41fa"
  },
  {
    "trial": "trial_a787318b196e4146",
    "task": "occ-conditional-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 10,
    "url": "trial/trial_a787318b196e4146"
  },
  {
    "trial": "trial_b15ba8c46c3d4030",
    "task": "occ-conditional-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_b15ba8c46c3d4030"
  },
  {
    "trial": "trial_c3a8f0d579cc4a50",
    "task": "occ-conditional-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_c3a8f0d579cc4a50"
  },
  {
    "trial": "trial_d3f8c103db344e0a",
    "task": "occ-conditional-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_d3f8c103db344e0a"
  },
  {
    "trial": "trial_d5e114746c5b4f0f",
    "task": "occ-conditional-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_d5e114746c5b4f0f"
  },
  {
    "trial": "trial_f117fd805cfa4fc9",
    "task": "occ-conditional-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 8,
    "url": "trial/trial_f117fd805cfa4fc9"
  },
  {
    "trial": "trial_172f113c7dcc457b",
    "task": "open-drain-command-engine",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_172f113c7dcc457b"
  },
  {
    "trial": "trial_19f15de5c21f4faf",
    "task": "open-drain-command-engine",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_19f15de5c21f4faf"
  },
  {
    "trial": "trial_226d4f6a0fab461c",
    "task": "open-drain-command-engine",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 10,
    "url": "trial/trial_226d4f6a0fab461c"
  },
  {
    "trial": "trial_42f98e78328c4f0b",
    "task": "open-drain-command-engine",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 6,
    "url": "trial/trial_42f98e78328c4f0b"
  },
  {
    "trial": "trial_475ed19326d84f15",
    "task": "open-drain-command-engine",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 8,
    "url": "trial/trial_475ed19326d84f15"
  },
  {
    "trial": "trial_4bce4783740943ec",
    "task": "open-drain-command-engine",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 23,
    "url": "trial/trial_4bce4783740943ec"
  },
  {
    "trial": "trial_79c95e0c5c1b4199",
    "task": "open-drain-command-engine",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 13,
    "url": "trial/trial_79c95e0c5c1b4199"
  },
  {
    "trial": "trial_a7b0ef7c2c364b28",
    "task": "open-drain-command-engine",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_a7b0ef7c2c364b28"
  },
  {
    "trial": "trial_c43fd5aeca484c89",
    "task": "open-drain-command-engine",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 14,
    "url": "trial/trial_c43fd5aeca484c89"
  },
  {
    "trial": "trial_cefe932f03244976",
    "task": "open-drain-command-engine",
    "category": "electrical_engineering",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 7,
    "url": "trial/trial_cefe932f03244976"
  },
  {
    "trial": "trial_0a7e4165aec044c7",
    "task": "pipeflow-colebrook-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 20,
    "url": "trial/trial_0a7e4165aec044c7"
  },
  {
    "trial": "trial_4a8d03d3a89f4288",
    "task": "pipeflow-colebrook-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 20,
    "url": "trial/trial_4a8d03d3a89f4288"
  },
  {
    "trial": "trial_52b41a8ec9f74da8",
    "task": "pipeflow-colebrook-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 24,
    "url": "trial/trial_52b41a8ec9f74da8"
  },
  {
    "trial": "trial_62ac7d5bd55d4b4d",
    "task": "pipeflow-colebrook-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 27,
    "url": "trial/trial_62ac7d5bd55d4b4d"
  },
  {
    "trial": "trial_64b0a46969564093",
    "task": "pipeflow-colebrook-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 24,
    "url": "trial/trial_64b0a46969564093"
  },
  {
    "trial": "trial_940e19b473ee49cc",
    "task": "pipeflow-colebrook-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 19,
    "url": "trial/trial_940e19b473ee49cc"
  },
  {
    "trial": "trial_b371b76e8d1b41e8",
    "task": "pipeflow-colebrook-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 23,
    "url": "trial/trial_b371b76e8d1b41e8"
  },
  {
    "trial": "trial_badb10781bb543b7",
    "task": "pipeflow-colebrook-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 19,
    "url": "trial/trial_badb10781bb543b7"
  },
  {
    "trial": "trial_caabb82061674eca",
    "task": "pipeflow-colebrook-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 23,
    "url": "trial/trial_caabb82061674eca"
  },
  {
    "trial": "trial_d5d244b4e54740bb",
    "task": "pipeflow-colebrook-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 22,
    "url": "trial/trial_d5d244b4e54740bb"
  },
  {
    "trial": "trial_00819674e3ab42e2",
    "task": "product-recall-stock-price-event",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 160,
    "url": "trial/trial_00819674e3ab42e2"
  },
  {
    "trial": "trial_5fe77f1e1c4b4699",
    "task": "product-recall-stock-price-event",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 216,
    "url": "trial/trial_5fe77f1e1c4b4699"
  },
  {
    "trial": "trial_6585d4c0ff9c4700",
    "task": "product-recall-stock-price-event",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 157,
    "url": "trial/trial_6585d4c0ff9c4700"
  },
  {
    "trial": "trial_80c5f310e4f64784",
    "task": "product-recall-stock-price-event",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 158,
    "url": "trial/trial_80c5f310e4f64784"
  },
  {
    "trial": "trial_85046dd0d2d4466a",
    "task": "product-recall-stock-price-event",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 178,
    "url": "trial/trial_85046dd0d2d4466a"
  },
  {
    "trial": "trial_c2c284e5fbc14ae4",
    "task": "product-recall-stock-price-event",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 144,
    "url": "trial/trial_c2c284e5fbc14ae4"
  },
  {
    "trial": "trial_dea29c9e2e67499f",
    "task": "product-recall-stock-price-event",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 225,
    "url": "trial/trial_dea29c9e2e67499f"
  },
  {
    "trial": "trial_dfeedb695a3d4e2c",
    "task": "product-recall-stock-price-event",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 159,
    "url": "trial/trial_dfeedb695a3d4e2c"
  },
  {
    "trial": "trial_f2651a5c669648ae",
    "task": "product-recall-stock-price-event",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 159,
    "url": "trial/trial_f2651a5c669648ae"
  },
  {
    "trial": "trial_fe48779e35154dc7",
    "task": "product-recall-stock-price-event",
    "category": "data-science",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 188,
    "url": "trial/trial_fe48779e35154dc7"
  },
  {
    "trial": "trial_0ad41e8867fa4aa0",
    "task": "projectile-drag-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_0ad41e8867fa4aa0"
  },
  {
    "trial": "trial_329769e09bfb4bdd",
    "task": "projectile-drag-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 17,
    "url": "trial/trial_329769e09bfb4bdd"
  },
  {
    "trial": "trial_3d38c8c3a87044ce",
    "task": "projectile-drag-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_3d38c8c3a87044ce"
  },
  {
    "trial": "trial_68404a84dbe94a46",
    "task": "projectile-drag-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 16,
    "url": "trial/trial_68404a84dbe94a46"
  },
  {
    "trial": "trial_727ea18eae254fb5",
    "task": "projectile-drag-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_727ea18eae254fb5"
  },
  {
    "trial": "trial_843e94da49cb443c",
    "task": "projectile-drag-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_843e94da49cb443c"
  },
  {
    "trial": "trial_cf4469c5c4e94540",
    "task": "projectile-drag-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 14,
    "url": "trial/trial_cf4469c5c4e94540"
  },
  {
    "trial": "trial_d1737e8c7dcd4499",
    "task": "projectile-drag-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 11,
    "url": "trial/trial_d1737e8c7dcd4499"
  },
  {
    "trial": "trial_d433732ead154e3a",
    "task": "projectile-drag-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_d433732ead154e3a"
  },
  {
    "trial": "trial_e07e8c70f4e04a7a",
    "task": "projectile-drag-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_e07e8c70f4e04a7a"
  },
  {
    "trial": "trial_27720bdd72dc4b35",
    "task": "quaternion-rotation-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 11,
    "url": "trial/trial_27720bdd72dc4b35"
  },
  {
    "trial": "trial_3c9d40dc811342de",
    "task": "quaternion-rotation-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 11,
    "url": "trial/trial_3c9d40dc811342de"
  },
  {
    "trial": "trial_3e5f8c7d029f4b8e",
    "task": "quaternion-rotation-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 10,
    "url": "trial/trial_3e5f8c7d029f4b8e"
  },
  {
    "trial": "trial_4ad56f84a3124019",
    "task": "quaternion-rotation-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 14,
    "url": "trial/trial_4ad56f84a3124019"
  },
  {
    "trial": "trial_518b150a0a6d48c5",
    "task": "quaternion-rotation-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_518b150a0a6d48c5"
  },
  {
    "trial": "trial_6f372cee177e452a",
    "task": "quaternion-rotation-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_6f372cee177e452a"
  },
  {
    "trial": "trial_82a3b14a93a2492d",
    "task": "quaternion-rotation-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 8,
    "url": "trial/trial_82a3b14a93a2492d"
  },
  {
    "trial": "trial_87dbf8c0952c4a18",
    "task": "quaternion-rotation-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 10,
    "url": "trial/trial_87dbf8c0952c4a18"
  },
  {
    "trial": "trial_89309bb29613409f",
    "task": "quaternion-rotation-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 10,
    "url": "trial/trial_89309bb29613409f"
  },
  {
    "trial": "trial_b2ae917fc4464d12",
    "task": "quaternion-rotation-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 9,
    "url": "trial/trial_b2ae917fc4464d12"
  },
  {
    "trial": "trial_2431976334c34b16",
    "task": "rate-limiter",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 21,
    "url": "trial/trial_2431976334c34b16"
  },
  {
    "trial": "trial_58aaf09bccc047f2",
    "task": "rate-limiter",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 18,
    "url": "trial/trial_58aaf09bccc047f2"
  },
  {
    "trial": "trial_5accb8a047314d85",
    "task": "rate-limiter",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 36,
    "url": "trial/trial_5accb8a047314d85"
  },
  {
    "trial": "trial_61f8a84488b54a4a",
    "task": "rate-limiter",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 15,
    "url": "trial/trial_61f8a84488b54a4a"
  },
  {
    "trial": "trial_6f6b884224ee4f18",
    "task": "rate-limiter",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 20,
    "url": "trial/trial_6f6b884224ee4f18"
  },
  {
    "trial": "trial_9088236d7fa54928",
    "task": "rate-limiter",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 18,
    "url": "trial/trial_9088236d7fa54928"
  },
  {
    "trial": "trial_a325589794ce4237",
    "task": "rate-limiter",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 20,
    "url": "trial/trial_a325589794ce4237"
  },
  {
    "trial": "trial_ae9f5d8585fd43bd",
    "task": "rate-limiter",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 15,
    "url": "trial/trial_ae9f5d8585fd43bd"
  },
  {
    "trial": "trial_d70a298f3abc4d85",
    "task": "rate-limiter",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 11,
    "url": "trial/trial_d70a298f3abc4d85"
  },
  {
    "trial": "trial_da53249e1a0d4ca3",
    "task": "rate-limiter",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 18,
    "url": "trial/trial_da53249e1a0d4ca3"
  },
  {
    "trial": "trial_1d56c79128cd40b5",
    "task": "resilient-http-client",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 14,
    "url": "trial/trial_1d56c79128cd40b5"
  },
  {
    "trial": "trial_21905656a73e4861",
    "task": "resilient-http-client",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_21905656a73e4861"
  },
  {
    "trial": "trial_61955a19658e4152",
    "task": "resilient-http-client",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_61955a19658e4152"
  },
  {
    "trial": "trial_736fc88c24784b65",
    "task": "resilient-http-client",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 14,
    "url": "trial/trial_736fc88c24784b65"
  },
  {
    "trial": "trial_7ed3066b08674910",
    "task": "resilient-http-client",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_7ed3066b08674910"
  },
  {
    "trial": "trial_c0f34f9e00ae475b",
    "task": "resilient-http-client",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 0,
    "url": "trial/trial_c0f34f9e00ae475b"
  },
  {
    "trial": "trial_d10db63bddcc4052",
    "task": "resilient-http-client",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_d10db63bddcc4052"
  },
  {
    "trial": "trial_d33eaf32d31c4750",
    "task": "resilient-http-client",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_d33eaf32d31c4750"
  },
  {
    "trial": "trial_e03b5cd045d04e3f",
    "task": "resilient-http-client",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 6,
    "url": "trial/trial_e03b5cd045d04e3f"
  },
  {
    "trial": "trial_e2083d22b04345bc",
    "task": "resilient-http-client",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_e2083d22b04345bc"
  },
  {
    "trial": "trial_0ae5d3fd96b84db5",
    "task": "rk4-orbit-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 15,
    "url": "trial/trial_0ae5d3fd96b84db5"
  },
  {
    "trial": "trial_460df64780054530",
    "task": "rk4-orbit-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_460df64780054530"
  },
  {
    "trial": "trial_61ba3523085c402a",
    "task": "rk4-orbit-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 12,
    "url": "trial/trial_61ba3523085c402a"
  },
  {
    "trial": "trial_6492e8d266f3438a",
    "task": "rk4-orbit-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_6492e8d266f3438a"
  },
  {
    "trial": "trial_71e2aa0a66ac4d8a",
    "task": "rk4-orbit-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 10,
    "url": "trial/trial_71e2aa0a66ac4d8a"
  },
  {
    "trial": "trial_7d8c678278644c6c",
    "task": "rk4-orbit-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_7d8c678278644c6c"
  },
  {
    "trial": "trial_cff0c28f53954f44",
    "task": "rk4-orbit-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_cff0c28f53954f44"
  },
  {
    "trial": "trial_d5490d3378094ff6",
    "task": "rk4-orbit-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_d5490d3378094ff6"
  },
  {
    "trial": "trial_fbc045555dbe4ba8",
    "task": "rk4-orbit-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 13,
    "url": "trial/trial_fbc045555dbe4ba8"
  },
  {
    "trial": "trial_fdbaf6bba4924566",
    "task": "rk4-orbit-integrator",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 12,
    "url": "trial/trial_fdbaf6bba4924566"
  },
  {
    "trial": "trial_0f8276f0521f4e68",
    "task": "rust-lang-semver-305",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 79,
    "url": "trial/trial_0f8276f0521f4e68"
  },
  {
    "trial": "trial_4501a05d9dd84070",
    "task": "rust-lang-semver-305",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 51,
    "url": "trial/trial_4501a05d9dd84070"
  },
  {
    "trial": "trial_648aefff9f394640",
    "task": "rust-lang-semver-305",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 64,
    "url": "trial/trial_648aefff9f394640"
  },
  {
    "trial": "trial_b22adfc7c57f4de9",
    "task": "rust-lang-semver-305",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 56,
    "url": "trial/trial_b22adfc7c57f4de9"
  },
  {
    "trial": "trial_de8fa41558c1413c",
    "task": "rust-lang-semver-305",
    "category": "debugging",
    "agent": "codex",
    "model": "gpt-5.5",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 82,
    "url": "trial/trial_de8fa41558c1413c"
  },
  {
    "trial": "trial_3bf4cd8eb0524507",
    "task": "s3-lambda-ddb-pipeline",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 7,
    "url": "trial/trial_3bf4cd8eb0524507"
  },
  {
    "trial": "trial_97d195f9684b430a",
    "task": "s3-lambda-ddb-pipeline",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 6,
    "url": "trial/trial_97d195f9684b430a"
  },
  {
    "trial": "trial_987320a96a4b4e36",
    "task": "s3-lambda-ddb-pipeline",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 7,
    "url": "trial/trial_987320a96a4b4e36"
  },
  {
    "trial": "trial_a1a2a1148b7948f2",
    "task": "s3-lambda-ddb-pipeline",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 9,
    "url": "trial/trial_a1a2a1148b7948f2"
  },
  {
    "trial": "trial_a679e0b040354aa4",
    "task": "s3-lambda-ddb-pipeline",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 7,
    "url": "trial/trial_a679e0b040354aa4"
  },
  {
    "trial": "trial_b99ac293abbd4798",
    "task": "s3-lambda-ddb-pipeline",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 9,
    "url": "trial/trial_b99ac293abbd4798"
  },
  {
    "trial": "trial_cb5dd0dde2bf476f",
    "task": "s3-lambda-ddb-pipeline",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 7,
    "url": "trial/trial_cb5dd0dde2bf476f"
  },
  {
    "trial": "trial_cc0c01389f8b445d",
    "task": "s3-lambda-ddb-pipeline",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 8,
    "url": "trial/trial_cc0c01389f8b445d"
  },
  {
    "trial": "trial_cdaf4626570948dc",
    "task": "s3-lambda-ddb-pipeline",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 8,
    "url": "trial/trial_cdaf4626570948dc"
  },
  {
    "trial": "trial_ed3f8dc7b1bb42bc",
    "task": "s3-lambda-ddb-pipeline",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 8,
    "url": "trial/trial_ed3f8dc7b1bb42bc"
  },
  {
    "trial": "trial_02ced5438e3b445b",
    "task": "s3-sqs-image-pipeline-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 42,
    "url": "trial/trial_02ced5438e3b445b"
  },
  {
    "trial": "trial_17655a0bc3954f14",
    "task": "s3-sqs-image-pipeline-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 32,
    "url": "trial/trial_17655a0bc3954f14"
  },
  {
    "trial": "trial_1d55f0f1889741b0",
    "task": "s3-sqs-image-pipeline-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 35,
    "url": "trial/trial_1d55f0f1889741b0"
  },
  {
    "trial": "trial_7226011c62844638",
    "task": "s3-sqs-image-pipeline-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 34,
    "url": "trial/trial_7226011c62844638"
  },
  {
    "trial": "trial_7305e845ac184501",
    "task": "s3-sqs-image-pipeline-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 36,
    "url": "trial/trial_7305e845ac184501"
  },
  {
    "trial": "trial_9ee88cb704074357",
    "task": "s3-sqs-image-pipeline-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 35,
    "url": "trial/trial_9ee88cb704074357"
  },
  {
    "trial": "trial_a18e41f658ad42fc",
    "task": "s3-sqs-image-pipeline-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 0,
    "url": "trial/trial_a18e41f658ad42fc"
  },
  {
    "trial": "trial_a7aa148352ff4136",
    "task": "s3-sqs-image-pipeline-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 33,
    "url": "trial/trial_a7aa148352ff4136"
  },
  {
    "trial": "trial_be93d3f7bfc64f41",
    "task": "s3-sqs-image-pipeline-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 11,
    "url": "trial/trial_be93d3f7bfc64f41"
  },
  {
    "trial": "trial_e4d732883a2f409b",
    "task": "s3-sqs-image-pipeline-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 52,
    "url": "trial/trial_e4d732883a2f409b"
  },
  {
    "trial": "trial_2d644edb72654f28",
    "task": "secrets-rotation-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 19,
    "url": "trial/trial_2d644edb72654f28"
  },
  {
    "trial": "trial_333bc09ac60e4809",
    "task": "secrets-rotation-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 25,
    "url": "trial/trial_333bc09ac60e4809"
  },
  {
    "trial": "trial_3896c3a9d5fa4b6d",
    "task": "secrets-rotation-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 16,
    "url": "trial/trial_3896c3a9d5fa4b6d"
  },
  {
    "trial": "trial_5119e8938c684759",
    "task": "secrets-rotation-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_5119e8938c684759"
  },
  {
    "trial": "trial_5d3b0ad359de493a",
    "task": "secrets-rotation-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 28,
    "url": "trial/trial_5d3b0ad359de493a"
  },
  {
    "trial": "trial_9d3ea69051b84345",
    "task": "secrets-rotation-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 10,
    "url": "trial/trial_9d3ea69051b84345"
  },
  {
    "trial": "trial_af341506f3a342cb",
    "task": "secrets-rotation-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 19,
    "url": "trial/trial_af341506f3a342cb"
  },
  {
    "trial": "trial_b81b43fa3e284a2a",
    "task": "secrets-rotation-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 12,
    "url": "trial/trial_b81b43fa3e284a2a"
  },
  {
    "trial": "trial_ca22c4bf94cc436e",
    "task": "secrets-rotation-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 14,
    "url": "trial/trial_ca22c4bf94cc436e"
  },
  {
    "trial": "trial_e6ef7b5814bd4eb5",
    "task": "secrets-rotation-kms",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 13,
    "url": "trial/trial_e6ef7b5814bd4eb5"
  },
  {
    "trial": "trial_18e0f19f89ac4adf",
    "task": "session-token-verify",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 19,
    "url": "trial/trial_18e0f19f89ac4adf"
  },
  {
    "trial": "trial_35a1e3f4a6604ab5",
    "task": "session-token-verify",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 10,
    "url": "trial/trial_35a1e3f4a6604ab5"
  },
  {
    "trial": "trial_4ce938e920c04ae2",
    "task": "session-token-verify",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 18,
    "url": "trial/trial_4ce938e920c04ae2"
  },
  {
    "trial": "trial_62c54b8aaed3465e",
    "task": "session-token-verify",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 19,
    "url": "trial/trial_62c54b8aaed3465e"
  },
  {
    "trial": "trial_71bfd72f3e024283",
    "task": "session-token-verify",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 17,
    "url": "trial/trial_71bfd72f3e024283"
  },
  {
    "trial": "trial_7ac4aec3220e4f11",
    "task": "session-token-verify",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 13,
    "url": "trial/trial_7ac4aec3220e4f11"
  },
  {
    "trial": "trial_7e2390cdc3f544e9",
    "task": "session-token-verify",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 18,
    "url": "trial/trial_7e2390cdc3f544e9"
  },
  {
    "trial": "trial_897ca48d29254e04",
    "task": "session-token-verify",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 13,
    "url": "trial/trial_897ca48d29254e04"
  },
  {
    "trial": "trial_c00f62913ed14b87",
    "task": "session-token-verify",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 12,
    "url": "trial/trial_c00f62913ed14b87"
  },
  {
    "trial": "trial_f503488588354622",
    "task": "session-token-verify",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 20,
    "url": "trial/trial_f503488588354622"
  },
  {
    "trial": "trial_183215d893744136",
    "task": "sfn-saga-compensation-orchestrator",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 36,
    "url": "trial/trial_183215d893744136"
  },
  {
    "trial": "trial_2ddb0cc065b14777",
    "task": "sfn-saga-compensation-orchestrator",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 23,
    "url": "trial/trial_2ddb0cc065b14777"
  },
  {
    "trial": "trial_3a2adea840734ba2",
    "task": "sfn-saga-compensation-orchestrator",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 20,
    "url": "trial/trial_3a2adea840734ba2"
  },
  {
    "trial": "trial_59aa66d84fea400e",
    "task": "sfn-saga-compensation-orchestrator",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 20,
    "url": "trial/trial_59aa66d84fea400e"
  },
  {
    "trial": "trial_7f15e57c8b4a4a73",
    "task": "sfn-saga-compensation-orchestrator",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 40,
    "url": "trial/trial_7f15e57c8b4a4a73"
  },
  {
    "trial": "trial_c00e2ff3ace84860",
    "task": "sfn-saga-compensation-orchestrator",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 34,
    "url": "trial/trial_c00e2ff3ace84860"
  },
  {
    "trial": "trial_c34b8cf9daed42fa",
    "task": "sfn-saga-compensation-orchestrator",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 38,
    "url": "trial/trial_c34b8cf9daed42fa"
  },
  {
    "trial": "trial_d0d8cf3f063641fa",
    "task": "sfn-saga-compensation-orchestrator",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 34,
    "url": "trial/trial_d0d8cf3f063641fa"
  },
  {
    "trial": "trial_d3d2dcae4af24b10",
    "task": "sfn-saga-compensation-orchestrator",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 19,
    "url": "trial/trial_d3d2dcae4af24b10"
  },
  {
    "trial": "trial_dab3af4409c84c70",
    "task": "sfn-saga-compensation-orchestrator",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 45,
    "url": "trial/trial_dab3af4409c84c70"
  },
  {
    "trial": "trial_2033a492a9c94be1",
    "task": "sfn-secrets-rotation-chain",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 17,
    "url": "trial/trial_2033a492a9c94be1"
  },
  {
    "trial": "trial_5190973914b24efd",
    "task": "sfn-secrets-rotation-chain",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 23,
    "url": "trial/trial_5190973914b24efd"
  },
  {
    "trial": "trial_6968318bdf68402d",
    "task": "sfn-secrets-rotation-chain",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 34,
    "url": "trial/trial_6968318bdf68402d"
  },
  {
    "trial": "trial_7a21c6c66a3b4fe5",
    "task": "sfn-secrets-rotation-chain",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 32,
    "url": "trial/trial_7a21c6c66a3b4fe5"
  },
  {
    "trial": "trial_7d45ce28f5be466e",
    "task": "sfn-secrets-rotation-chain",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 44,
    "url": "trial/trial_7d45ce28f5be466e"
  },
  {
    "trial": "trial_8000615de48e44da",
    "task": "sfn-secrets-rotation-chain",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 56,
    "url": "trial/trial_8000615de48e44da"
  },
  {
    "trial": "trial_90921d8ab99e49c6",
    "task": "sfn-secrets-rotation-chain",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 48,
    "url": "trial/trial_90921d8ab99e49c6"
  },
  {
    "trial": "trial_a629b8c916de4523",
    "task": "sfn-secrets-rotation-chain",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 28,
    "url": "trial/trial_a629b8c916de4523"
  },
  {
    "trial": "trial_d7a928460bc84af6",
    "task": "sfn-secrets-rotation-chain",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 42,
    "url": "trial/trial_d7a928460bc84af6"
  },
  {
    "trial": "trial_df59a96e93344185",
    "task": "sfn-secrets-rotation-chain",
    "category": "cloudops",
    "agent": "claude-code",
    "model": "claude-opus-4-7",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 45,
    "url": "trial/trial_df59a96e93344185"
  },
  {
    "trial": "trial_3face1791423485f",
    "task": "simjeb-bracket-fea-mass-prediction-real",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 314,
    "url": "trial/trial_3face1791423485f"
  },
  {
    "trial": "trial_5bacac8b5ed94fe6",
    "task": "simjeb-bracket-fea-mass-prediction-real",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 349,
    "url": "trial/trial_5bacac8b5ed94fe6"
  },
  {
    "trial": "trial_5c5ed690cfd0411b",
    "task": "simjeb-bracket-fea-mass-prediction-real",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 316,
    "url": "trial/trial_5c5ed690cfd0411b"
  },
  {
    "trial": "trial_6dfcc6d7e6994393",
    "task": "simjeb-bracket-fea-mass-prediction-real",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 216,
    "url": "trial/trial_6dfcc6d7e6994393"
  },
  {
    "trial": "trial_797d9f59d62b4ad6",
    "task": "simjeb-bracket-fea-mass-prediction-real",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 324,
    "url": "trial/trial_797d9f59d62b4ad6"
  },
  {
    "trial": "trial_b115999543b749a1",
    "task": "simjeb-bracket-fea-mass-prediction-real",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 237,
    "url": "trial/trial_b115999543b749a1"
  },
  {
    "trial": "trial_b2021d3a31414464",
    "task": "simjeb-bracket-fea-mass-prediction-real",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 280,
    "url": "trial/trial_b2021d3a31414464"
  },
  {
    "trial": "trial_ce92dc6240d94754",
    "task": "simjeb-bracket-fea-mass-prediction-real",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 276,
    "url": "trial/trial_ce92dc6240d94754"
  },
  {
    "trial": "trial_e0b691b638a1478f",
    "task": "simjeb-bracket-fea-mass-prediction-real",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 333,
    "url": "trial/trial_e0b691b638a1478f"
  },
  {
    "trial": "trial_faeb4bb325684846",
    "task": "simjeb-bracket-fea-mass-prediction-real",
    "category": "scientific-ml",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 282,
    "url": "trial/trial_faeb4bb325684846"
  },
  {
    "trial": "trial_1136a54f053c4b11",
    "task": "task_0026_codex_camera_shake_rig",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 0,
    "url": "trial/trial_1136a54f053c4b11"
  },
  {
    "trial": "trial_27a232b7de544d21",
    "task": "task_0026_codex_camera_shake_rig",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 0,
    "url": "trial/trial_27a232b7de544d21"
  },
  {
    "trial": "trial_3cb09ff8336a4a3d",
    "task": "task_0026_codex_camera_shake_rig",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 5,
    "url": "trial/trial_3cb09ff8336a4a3d"
  },
  {
    "trial": "trial_4de1b219ce6a4ba8",
    "task": "task_0026_codex_camera_shake_rig",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 5,
    "url": "trial/trial_4de1b219ce6a4ba8"
  },
  {
    "trial": "trial_6aa5542805bc4ea0",
    "task": "task_0026_codex_camera_shake_rig",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 5,
    "url": "trial/trial_6aa5542805bc4ea0"
  },
  {
    "trial": "trial_8870ad236ecc469c",
    "task": "task_0026_codex_camera_shake_rig",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 0,
    "url": "trial/trial_8870ad236ecc469c"
  },
  {
    "trial": "trial_8b797bf433804107",
    "task": "task_0026_codex_camera_shake_rig",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 0,
    "url": "trial/trial_8b797bf433804107"
  },
  {
    "trial": "trial_ce654a2130fa475d",
    "task": "task_0026_codex_camera_shake_rig",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 0,
    "url": "trial/trial_ce654a2130fa475d"
  },
  {
    "trial": "trial_edee46f9823f4932",
    "task": "task_0026_codex_camera_shake_rig",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 0,
    "url": "trial/trial_edee46f9823f4932"
  },
  {
    "trial": "trial_f6bdfed5ab4342a4",
    "task": "task_0026_codex_camera_shake_rig",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 1.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 0,
    "url": "trial/trial_f6bdfed5ab4342a4"
  },
  {
    "trial": "trial_1fceb4c86cec4356",
    "task": "task_0040_day_night_cycle_controller",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 64,
    "url": "trial/trial_1fceb4c86cec4356"
  },
  {
    "trial": "trial_3aa2323fd07d4a3a",
    "task": "task_0040_day_night_cycle_controller",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 73,
    "url": "trial/trial_3aa2323fd07d4a3a"
  },
  {
    "trial": "trial_3da340e8474548a3",
    "task": "task_0040_day_night_cycle_controller",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 60,
    "url": "trial/trial_3da340e8474548a3"
  },
  {
    "trial": "trial_3ddd22c29bbf495c",
    "task": "task_0040_day_night_cycle_controller",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 58,
    "url": "trial/trial_3ddd22c29bbf495c"
  },
  {
    "trial": "trial_53140ab3ac034afa",
    "task": "task_0040_day_night_cycle_controller",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 56,
    "url": "trial/trial_53140ab3ac034afa"
  },
  {
    "trial": "trial_655504c79da74d72",
    "task": "task_0040_day_night_cycle_controller",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 64,
    "url": "trial/trial_655504c79da74d72"
  },
  {
    "trial": "trial_6b73749d947041bc",
    "task": "task_0040_day_night_cycle_controller",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 57,
    "url": "trial/trial_6b73749d947041bc"
  },
  {
    "trial": "trial_8e084723505f412c",
    "task": "task_0040_day_night_cycle_controller",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 56,
    "url": "trial/trial_8e084723505f412c"
  },
  {
    "trial": "trial_a1d0669a584642a5",
    "task": "task_0040_day_night_cycle_controller",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 65,
    "url": "trial/trial_a1d0669a584642a5"
  },
  {
    "trial": "trial_c372b8007e9449fa",
    "task": "task_0040_day_night_cycle_controller",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 66,
    "url": "trial/trial_c372b8007e9449fa"
  },
  {
    "trial": "trial_04efa6b2a8f64bc2",
    "task": "task_0053_car_scene_assembly",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 59,
    "url": "trial/trial_04efa6b2a8f64bc2"
  },
  {
    "trial": "trial_23c5f6cc00ca4b89",
    "task": "task_0053_car_scene_assembly",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 45,
    "url": "trial/trial_23c5f6cc00ca4b89"
  },
  {
    "trial": "trial_28e0f4f8825648c5",
    "task": "task_0053_car_scene_assembly",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 52,
    "url": "trial/trial_28e0f4f8825648c5"
  },
  {
    "trial": "trial_2c90be23fc2e4d81",
    "task": "task_0053_car_scene_assembly",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 60,
    "url": "trial/trial_2c90be23fc2e4d81"
  },
  {
    "trial": "trial_3de822d4cc7d4fbc",
    "task": "task_0053_car_scene_assembly",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 44,
    "url": "trial/trial_3de822d4cc7d4fbc"
  },
  {
    "trial": "trial_4a2b49f0b5dc4ebe",
    "task": "task_0053_car_scene_assembly",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 72,
    "url": "trial/trial_4a2b49f0b5dc4ebe"
  },
  {
    "trial": "trial_4e255c994c7f449a",
    "task": "task_0053_car_scene_assembly",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 60,
    "url": "trial/trial_4e255c994c7f449a"
  },
  {
    "trial": "trial_7d6a2170af56475b",
    "task": "task_0053_car_scene_assembly",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 43,
    "url": "trial/trial_7d6a2170af56475b"
  },
  {
    "trial": "trial_d80853bb6cf643bd",
    "task": "task_0053_car_scene_assembly",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 23,
    "url": "trial/trial_d80853bb6cf643bd"
  },
  {
    "trial": "trial_f74f38f26d594527",
    "task": "task_0053_car_scene_assembly",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 70,
    "url": "trial/trial_f74f38f26d594527"
  },
  {
    "trial": "trial_129fb8fc57174c04",
    "task": "task_0054_assemble_crusader_animatedsprite",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 22,
    "url": "trial/trial_129fb8fc57174c04"
  },
  {
    "trial": "trial_30de1c0546c14d04",
    "task": "task_0054_assemble_crusader_animatedsprite",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 20,
    "url": "trial/trial_30de1c0546c14d04"
  },
  {
    "trial": "trial_8289264f4e334429",
    "task": "task_0054_assemble_crusader_animatedsprite",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 21,
    "url": "trial/trial_8289264f4e334429"
  },
  {
    "trial": "trial_a7861d7334354b1e",
    "task": "task_0054_assemble_crusader_animatedsprite",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 25,
    "url": "trial/trial_a7861d7334354b1e"
  },
  {
    "trial": "trial_b8231458a3c146f0",
    "task": "task_0054_assemble_crusader_animatedsprite",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 12,
    "url": "trial/trial_b8231458a3c146f0"
  },
  {
    "trial": "trial_bcbf509b8fd94ad5",
    "task": "task_0054_assemble_crusader_animatedsprite",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 25,
    "url": "trial/trial_bcbf509b8fd94ad5"
  },
  {
    "trial": "trial_d4d00d76f6bd4d10",
    "task": "task_0054_assemble_crusader_animatedsprite",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 17,
    "url": "trial/trial_d4d00d76f6bd4d10"
  },
  {
    "trial": "trial_d658f7bf52dd499c",
    "task": "task_0054_assemble_crusader_animatedsprite",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 21,
    "url": "trial/trial_d658f7bf52dd499c"
  },
  {
    "trial": "trial_dcee6ee0382040c9",
    "task": "task_0054_assemble_crusader_animatedsprite",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 27,
    "url": "trial/trial_dcee6ee0382040c9"
  },
  {
    "trial": "trial_fb6a69aaffc44c4b",
    "task": "task_0054_assemble_crusader_animatedsprite",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 19,
    "url": "trial/trial_fb6a69aaffc44c4b"
  },
  {
    "trial": "trial_0f0589300013471f",
    "task": "task_0108_track_particles_driven_by_tile_type_gradient",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 0,
    "url": "trial/trial_0f0589300013471f"
  },
  {
    "trial": "trial_3ef6a79cb44c477d",
    "task": "task_0108_track_particles_driven_by_tile_type_gradient",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 0,
    "url": "trial/trial_3ef6a79cb44c477d"
  },
  {
    "trial": "trial_5a6b1d3c1e5f4aaf",
    "task": "task_0108_track_particles_driven_by_tile_type_gradient",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 9,
    "url": "trial/trial_5a6b1d3c1e5f4aaf"
  },
  {
    "trial": "trial_67d7d9efc6bc42f8",
    "task": "task_0108_track_particles_driven_by_tile_type_gradient",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 0,
    "url": "trial/trial_67d7d9efc6bc42f8"
  },
  {
    "trial": "trial_794ca8fa7a1e4c82",
    "task": "task_0108_track_particles_driven_by_tile_type_gradient",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 1.0,
    "classification": "BAD_SUCCESS",
    "tool_calls": 0,
    "url": "trial/trial_794ca8fa7a1e4c82"
  },
  {
    "trial": "trial_7b13ebfc63104171",
    "task": "task_0108_track_particles_driven_by_tile_type_gradient",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 6,
    "url": "trial/trial_7b13ebfc63104171"
  },
  {
    "trial": "trial_c2da8f4d15cb4bfc",
    "task": "task_0108_track_particles_driven_by_tile_type_gradient",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 0,
    "url": "trial/trial_c2da8f4d15cb4bfc"
  },
  {
    "trial": "trial_d0dae93106b0465d",
    "task": "task_0108_track_particles_driven_by_tile_type_gradient",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 0,
    "url": "trial/trial_d0dae93106b0465d"
  },
  {
    "trial": "trial_d1cf2c9a9dcf4ed2",
    "task": "task_0108_track_particles_driven_by_tile_type_gradient",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 0,
    "url": "trial/trial_d1cf2c9a9dcf4ed2"
  },
  {
    "trial": "trial_fd1e9d1f2ffb4ed1",
    "task": "task_0108_track_particles_driven_by_tile_type_gradient",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-sonnet-4-6",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 9,
    "url": "trial/trial_fd1e9d1f2ffb4ed1"
  },
  {
    "trial": "trial_1be787d70f974444",
    "task": "task_0131_minimap_ui_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 29,
    "url": "trial/trial_1be787d70f974444"
  },
  {
    "trial": "trial_1e3218c3160a4f17",
    "task": "task_0131_minimap_ui_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 26,
    "url": "trial/trial_1e3218c3160a4f17"
  },
  {
    "trial": "trial_2be9c7f787484eb3",
    "task": "task_0131_minimap_ui_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 28,
    "url": "trial/trial_2be9c7f787484eb3"
  },
  {
    "trial": "trial_30ae8cbbb0be4a7a",
    "task": "task_0131_minimap_ui_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 36,
    "url": "trial/trial_30ae8cbbb0be4a7a"
  },
  {
    "trial": "trial_559c2f32830641b2",
    "task": "task_0131_minimap_ui_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 25,
    "url": "trial/trial_559c2f32830641b2"
  },
  {
    "trial": "trial_5e6819fcda1c4dac",
    "task": "task_0131_minimap_ui_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 27,
    "url": "trial/trial_5e6819fcda1c4dac"
  },
  {
    "trial": "trial_8cfd4cf8597347dc",
    "task": "task_0131_minimap_ui_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 26,
    "url": "trial/trial_8cfd4cf8597347dc"
  },
  {
    "trial": "trial_9b3f97c2a91e4a76",
    "task": "task_0131_minimap_ui_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 29,
    "url": "trial/trial_9b3f97c2a91e4a76"
  },
  {
    "trial": "trial_aebf06083a1844ac",
    "task": "task_0131_minimap_ui_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 28,
    "url": "trial/trial_aebf06083a1844ac"
  },
  {
    "trial": "trial_ce804c41ddb4421e",
    "task": "task_0131_minimap_ui_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 34,
    "url": "trial/trial_ce804c41ddb4421e"
  },
  {
    "trial": "trial_091839faa7074663",
    "task": "task_0132_minimap_marker_logic_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 65,
    "url": "trial/trial_091839faa7074663"
  },
  {
    "trial": "trial_22c27bce0b5844ce",
    "task": "task_0132_minimap_marker_logic_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 42,
    "url": "trial/trial_22c27bce0b5844ce"
  },
  {
    "trial": "trial_62d736479dc44d58",
    "task": "task_0132_minimap_marker_logic_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 56,
    "url": "trial/trial_62d736479dc44d58"
  },
  {
    "trial": "trial_b738a8aa834549ee",
    "task": "task_0132_minimap_marker_logic_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 45,
    "url": "trial/trial_b738a8aa834549ee"
  },
  {
    "trial": "trial_b74fb40e57dd46b8",
    "task": "task_0132_minimap_marker_logic_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 50,
    "url": "trial/trial_b74fb40e57dd46b8"
  },
  {
    "trial": "trial_d084b5f4fd204ae5",
    "task": "task_0132_minimap_marker_logic_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 53,
    "url": "trial/trial_d084b5f4fd204ae5"
  },
  {
    "trial": "trial_d934ec54ba8c41d4",
    "task": "task_0132_minimap_marker_logic_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 59,
    "url": "trial/trial_d934ec54ba8c41d4"
  },
  {
    "trial": "trial_f07bd7ddf0fa4c81",
    "task": "task_0132_minimap_marker_logic_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 45,
    "url": "trial/trial_f07bd7ddf0fa4c81"
  },
  {
    "trial": "trial_f6281ee19638468b",
    "task": "task_0132_minimap_marker_logic_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 57,
    "url": "trial/trial_f6281ee19638468b"
  },
  {
    "trial": "trial_f9ca55ceb0b544f0",
    "task": "task_0132_minimap_marker_logic_complex",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 44,
    "url": "trial/trial_f9ca55ceb0b544f0"
  },
  {
    "trial": "trial_301fee52f01b4bd8",
    "task": "task_9001_checkpoint_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 14,
    "url": "trial/trial_301fee52f01b4bd8"
  },
  {
    "trial": "trial_33b0d66f404b4195",
    "task": "task_9001_checkpoint_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 41,
    "url": "trial/trial_33b0d66f404b4195"
  },
  {
    "trial": "trial_52f5c599ff9c4824",
    "task": "task_9001_checkpoint_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 42,
    "url": "trial/trial_52f5c599ff9c4824"
  },
  {
    "trial": "trial_7211ba0463c64823",
    "task": "task_9001_checkpoint_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "HARNESS_ERROR",
    "tool_calls": 17,
    "url": "trial/trial_7211ba0463c64823"
  },
  {
    "trial": "trial_767761a8edd84bcc",
    "task": "task_9001_checkpoint_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 15,
    "url": "trial/trial_767761a8edd84bcc"
  },
  {
    "trial": "trial_785c9a165a6646fd",
    "task": "task_9001_checkpoint_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 38,
    "url": "trial/trial_785c9a165a6646fd"
  },
  {
    "trial": "trial_81878cae4483444f",
    "task": "task_9001_checkpoint_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 17,
    "url": "trial/trial_81878cae4483444f"
  },
  {
    "trial": "trial_9c934435ad4d47b6",
    "task": "task_9001_checkpoint_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 46,
    "url": "trial/trial_9c934435ad4d47b6"
  },
  {
    "trial": "trial_af9f29916e3245df",
    "task": "task_9001_checkpoint_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 17,
    "url": "trial/trial_af9f29916e3245df"
  },
  {
    "trial": "trial_ff941c92df7e45ff",
    "task": "task_9001_checkpoint_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 27,
    "url": "trial/trial_ff941c92df7e45ff"
  },
  {
    "trial": "trial_1833ddae76984b7b",
    "task": "task_9002_combo_score_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 37,
    "url": "trial/trial_1833ddae76984b7b"
  },
  {
    "trial": "trial_1b460c266f754527",
    "task": "task_9002_combo_score_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 16,
    "url": "trial/trial_1b460c266f754527"
  },
  {
    "trial": "trial_1b608e21624d467b",
    "task": "task_9002_combo_score_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 25,
    "url": "trial/trial_1b608e21624d467b"
  },
  {
    "trial": "trial_577d0fbed7b04af7",
    "task": "task_9002_combo_score_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 40,
    "url": "trial/trial_577d0fbed7b04af7"
  },
  {
    "trial": "trial_75acac68eb25456f",
    "task": "task_9002_combo_score_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 32,
    "url": "trial/trial_75acac68eb25456f"
  },
  {
    "trial": "trial_9178f6b9237447a3",
    "task": "task_9002_combo_score_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 18,
    "url": "trial/trial_9178f6b9237447a3"
  },
  {
    "trial": "trial_aa5f68c21ba747cf",
    "task": "task_9002_combo_score_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "BAD_FAILURE",
    "tool_calls": 16,
    "url": "trial/trial_aa5f68c21ba747cf"
  },
  {
    "trial": "trial_b5768d6ab6514d39",
    "task": "task_9002_combo_score_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 45,
    "url": "trial/trial_b5768d6ab6514d39"
  },
  {
    "trial": "trial_d9abc140a0cb4c63",
    "task": "task_9002_combo_score_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 16,
    "url": "trial/trial_d9abc140a0cb4c63"
  },
  {
    "trial": "trial_e8dbea7c1a654ccb",
    "task": "task_9002_combo_score_system",
    "category": "game",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 45,
    "url": "trial/trial_e8dbea7c1a654ccb"
  },
  {
    "trial": "trial_4cf7c5e8285a42fd",
    "task": "truss2d-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 19,
    "url": "trial/trial_4cf7c5e8285a42fd"
  },
  {
    "trial": "trial_658d6e5289574548",
    "task": "truss2d-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 21,
    "url": "trial/trial_658d6e5289574548"
  },
  {
    "trial": "trial_6d9b7f1bf0e04651",
    "task": "truss2d-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 26,
    "url": "trial/trial_6d9b7f1bf0e04651"
  },
  {
    "trial": "trial_82dc1bf4532a4fda",
    "task": "truss2d-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 41,
    "url": "trial/trial_82dc1bf4532a4fda"
  },
  {
    "trial": "trial_b4b02fd20e2e433f",
    "task": "truss2d-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_b4b02fd20e2e433f"
  },
  {
    "trial": "trial_bf5f8dd8c5ef464a",
    "task": "truss2d-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 20,
    "url": "trial/trial_bf5f8dd8c5ef464a"
  },
  {
    "trial": "trial_cfad4489f13a4c89",
    "task": "truss2d-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 19,
    "url": "trial/trial_cfad4489f13a4c89"
  },
  {
    "trial": "trial_df713da845ac4223",
    "task": "truss2d-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 20,
    "url": "trial/trial_df713da845ac4223"
  },
  {
    "trial": "trial_ef969fb4f8d64625",
    "task": "truss2d-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 23,
    "url": "trial/trial_ef969fb4f8d64625"
  },
  {
    "trial": "trial_f13e993ee80a400c",
    "task": "truss2d-solver",
    "category": "mechanical_engineering",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 0.0,
    "classification": "GOOD_FAILURE",
    "tool_calls": 24,
    "url": "trial/trial_f13e993ee80a400c"
  },
  {
    "trial": "trial_1225a31603454106",
    "task": "window-aggregate-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_1225a31603454106"
  },
  {
    "trial": "trial_31591e144ec14bf8",
    "task": "window-aggregate-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_31591e144ec14bf8"
  },
  {
    "trial": "trial_429d81dd56ae4717",
    "task": "window-aggregate-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 11,
    "url": "trial/trial_429d81dd56ae4717"
  },
  {
    "trial": "trial_6c618c861899499e",
    "task": "window-aggregate-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 8,
    "url": "trial/trial_6c618c861899499e"
  },
  {
    "trial": "trial_72fa0c939f494663",
    "task": "window-aggregate-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 9,
    "url": "trial/trial_72fa0c939f494663"
  },
  {
    "trial": "trial_7c9d3ef9074f4e22",
    "task": "window-aggregate-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_7c9d3ef9074f4e22"
  },
  {
    "trial": "trial_d3e62cf0d23441fa",
    "task": "window-aggregate-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 14,
    "url": "trial/trial_d3e62cf0d23441fa"
  },
  {
    "trial": "trial_ea73aec991e142f7",
    "task": "window-aggregate-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 8,
    "url": "trial/trial_ea73aec991e142f7"
  },
  {
    "trial": "trial_f0b09378eb0949fe",
    "task": "window-aggregate-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 10,
    "url": "trial/trial_f0b09378eb0949fe"
  },
  {
    "trial": "trial_f48f099030a74336",
    "task": "window-aggregate-store",
    "category": "swe",
    "agent": "claude-code",
    "model": "claude-opus-4-8",
    "reward": 1.0,
    "classification": "GOOD_SUCCESS",
    "tool_calls": 16,
    "url": "trial/trial_f48f099030a74336"
  }
]
