{
  "kind": "answer",
  "studySlug": "model-divergence",
  "slug": "how-much-do-models-disagree-on-brand-recommendations",
  "title": "How much do models disagree on brand recommendations?",
  "description": "A lot. 14.6% of prompts fall into the high-divergence bucket, and average agreement is still only 43.3% even when measured across a large, cleaned comparison set.",
  "lastUpdated": "2026-03-11",
  "lastTested": "2026-03-11",
  "sourceStudyUrl": "/trakkr-research/model-divergence",
  "sourceStudyTitle": "Same Question, Different AI, Different Answers",
  "claimIds": [
    "model-divergence:high-divergence",
    "model-divergence:avg-agreement",
    "model-divergence:valid-comparisons"
  ],
  "relatedSlugs": [
    "answer:which-query-types-produce-the-most-consensus",
    "answer:are-general-and-best-of-prompts-more-volatile-than-comparisons",
    "fact:more-than-seven-hundred-thousand-valid-comparisons-power-the-study",
    "tracker:query-class-agreement-tracker"
  ],
  "methodologySummary": "Built from 797,644 valid comparisons across 44,088 reports and 8 models, covering 6,439,133 model responses in the observed window.",
  "limitations": [
    "Agreement is measured across recommendation outputs, not across hidden reasoning or retrieval context.",
    "Average agreement can hide large differences between query classes and model pairs.",
    "The study measures overlap, not which answer was objectively “right”."
  ],
  "keywords": [
    "model divergence",
    "AI agreement",
    "ChatGPT vs Claude",
    "Gemini vs Perplexity",
    "AI disagreement",
    "brand recommendation divergence"
  ],
  "schemaHints": {
    "pageType": "Article",
    "includeDataset": true
  },
  "question": "How much do models disagree on brand recommendations?",
  "directAnswer": "A lot. 14.6% of prompts fall into the high-divergence bucket, and average agreement is still only 43.3% even when measured across a large, cleaned comparison set.",
  "answerSummary": "This is enough disagreement to treat each model as its own acquisition channel.",
  "keyFacts": [
    {
      "label": "High divergence rate",
      "value": "14.6%",
      "detail": "Prompts in the 0-25% agreement bucket.",
      "claimId": "model-divergence:high-divergence"
    },
    {
      "label": "Average agreement",
      "value": "43.3%",
      "detail": "Mean cross-model agreement rate.",
      "claimId": "model-divergence:avg-agreement"
    },
    {
      "label": "Valid comparisons",
      "value": "797,644",
      "detail": "Cross-model recommendation comparisons in the study.",
      "claimId": "model-divergence:valid-comparisons"
    }
  ],
  "evidenceTable": [
    {
      "label": "High divergence rate",
      "value": "14.6%",
      "note": "Prompts in the 0-25% agreement bucket."
    },
    {
      "label": "Average agreement",
      "value": "43.3%",
      "note": "Mean cross-model agreement rate."
    },
    {
      "label": "Valid comparisons",
      "value": "797,644",
      "note": "Cross-model recommendation comparisons in the study."
    }
  ],
  "whyItMatters": "This answer matters because it turns a study finding into an operating rule teams can use when they decide what to publish, refresh, or measure next.",
  "whatToDo": [
    "Track visibility across multiple models instead of using one platform as a proxy for the whole market.",
    "Prioritize query classes where disagreement is highest because that is where share can move fastest.",
    "Treat consensus as a benchmark, but treat divergence as the operating reality."
  ],
  "faqs": [
    {
      "question": "How much do models disagree on brand recommendations?",
      "answer": "A lot. 14.6% of prompts fall into the high-divergence bucket, and average agreement is still only 43.3% even when measured across a large, cleaned comparison set."
    },
    {
      "question": "Which numbers from Same Question, Different AI, Different Answers matter most here?",
      "answer": "High divergence rate: 14.6%. Prompts in the 0-25% agreement bucket. Average agreement: 43.3%. Mean cross-model agreement rate."
    },
    {
      "question": "What should a team do next?",
      "answer": "Track visibility across multiple models instead of using one platform as a proxy for the whole market. Prioritize query classes where disagreement is highest because that is where share can move fastest. Treat consensus as a benchmark, but treat divergence as the operating reality."
    }
  ]
}
