{
  "kind": "answer",
  "studySlug": "model-divergence",
  "slug": "how-often-is-there-perfect-consensus-across-models",
  "title": "How often is there perfect consensus across models?",
  "description": "Rarely. Only 4.0% of prompts produced unanimous agreement across all 8 models in the study.",
  "lastUpdated": "2026-03-11",
  "lastTested": "2026-03-11",
  "sourceStudyUrl": "/trakkr-research/model-divergence",
  "sourceStudyTitle": "Same Question, Different AI, Different Answers",
  "claimIds": [
    "model-divergence:perfect-agreement",
    "model-divergence:models",
    "model-divergence:valid-comparisons"
  ],
  "relatedSlugs": [
    "answer:how-much-do-models-disagree-on-brand-recommendations",
    "answer:which-query-types-produce-the-most-consensus",
    "fact:only-four-percent-of-prompts-produce-perfect-consensus",
    "tracker:cross-model-consensus-tracker"
  ],
  "methodologySummary": "Built from 797,644 valid comparisons across 44,088 reports and 8 models, covering 6,439,133 model responses in the observed window.",
  "limitations": [
    "Agreement is measured across recommendation outputs, not across hidden reasoning or retrieval context.",
    "Average agreement can hide large differences between query classes and model pairs.",
    "The study measures overlap, not which answer was objectively “right”."
  ],
  "keywords": [
    "model divergence",
    "AI agreement",
    "ChatGPT vs Claude",
    "Gemini vs Perplexity",
    "perfect consensus AI",
    "unanimous AI answers"
  ],
  "schemaHints": {
    "pageType": "Article",
    "includeDataset": true
  },
  "question": "How often is there perfect consensus across models?",
  "directAnswer": "Rarely. Only 4.0% of prompts produced unanimous agreement across all 8 models in the study.",
  "answerSummary": "Perfect consensus is the exception, meaning operators cannot rely on a single model to represent the entire AI search landscape.",
  "keyFacts": [
    {
      "label": "Perfect agreement",
      "value": "4.0%",
      "detail": "Only a small share of prompts produce unanimous outcomes.",
      "claimId": "model-divergence:perfect-agreement"
    },
    {
      "label": "Models analyzed",
      "value": "8",
      "detail": "OpenAI, Anthropic, Gemini, Grok, Deepseek, Meta, Perplexity, and Google AI Overviews.",
      "claimId": "model-divergence:models"
    },
    {
      "label": "Valid comparisons",
      "value": "797,644",
      "detail": "Cross-model recommendation comparisons in the study.",
      "claimId": "model-divergence:valid-comparisons"
    }
  ],
  "evidenceTable": [
    {
      "label": "Perfect agreement",
      "value": "4.0%",
      "note": "Only a small share of prompts produce unanimous outcomes."
    },
    {
      "label": "Models analyzed",
      "value": "8",
      "note": "OpenAI, Anthropic, Gemini, Grok, Deepseek, Meta, Perplexity, and Google AI Overviews."
    },
    {
      "label": "Valid comparisons",
      "value": "797,644",
      "note": "Cross-model recommendation comparisons in the study."
    }
  ],
  "whyItMatters": "Relying on a single model for visibility measurement creates blind spots. Operators must measure across multiple engines to accurately assess brand presence and allocate optimization resources.",
  "whatToDo": [
    "Track visibility across multiple models instead of using one platform as a proxy for the whole market.",
    "Prioritize query classes where disagreement is highest because that is where share can move fastest.",
    "Treat consensus as a benchmark but treat divergence as the operating reality."
  ],
  "faqs": [
    {
      "question": "Which models were included in the consensus analysis?",
      "answer": "The study analyzed 8 models including OpenAI, Anthropic, Gemini, Grok, Deepseek, Meta, Perplexity, and Google AI Overviews."
    },
    {
      "question": "How many comparisons were used to determine the consensus rate?",
      "answer": "The 4.0% perfect agreement rate is based on 797,644 valid cross-model recommendation comparisons."
    }
  ]
}
