{
  "kind": "fact",
  "studySlug": "model-divergence",
  "slug": "only-four-percent-of-prompts-produce-perfect-consensus",
  "title": "Only 4.0% of prompts produce perfect consensus",
  "description": "An analysis of eight major artificial intelligence models including OpenAI, Anthropic, Gemini, Grok, Deepseek, Meta, Perplexity, and Google AI Overviews reveals significant variance in output generation. When presented with identical prompts, the models generated unanimous responses in only a marginal fraction of cases.",
  "lastUpdated": "2026-03-11",
  "lastTested": "2026-03-11",
  "sourceStudyUrl": "/trakkr-research/model-divergence",
  "sourceStudyTitle": "Same Question, Different AI, Different Answers",
  "claimIds": [
    "model-divergence:perfect-agreement",
    "model-divergence:models"
  ],
  "relatedSlugs": [
    "answer:how-often-is-there-perfect-consensus-across-models",
    "answer:how-much-do-models-disagree-on-brand-recommendations",
    "tracker:cross-model-consensus-tracker"
  ],
  "methodologySummary": "Built from 797,644 valid comparisons across 44,088 reports and 8 models, covering 6,439,133 model responses in the observed window.",
  "limitations": [
    "Agreement is measured across recommendation outputs, not across hidden reasoning or retrieval context.",
    "Average agreement can hide large differences between query classes and model pairs.",
    "The study measures overlap, not which answer was objectively “right”."
  ],
  "keywords": [
    "model divergence",
    "AI agreement",
    "ChatGPT vs Claude",
    "Gemini vs Perplexity"
  ],
  "schemaHints": {
    "pageType": "Article",
    "includeDataset": true
  },
  "claim": "Only 4.0 percent of identical prompts produced perfect agreement across all eight analyzed artificial intelligence models.",
  "supportingMetrics": [
    {
      "label": "Perfect agreement",
      "value": "4.0%",
      "detail": "Only a small share of prompts produce unanimous outcomes."
    },
    {
      "label": "Models analyzed",
      "value": "8",
      "detail": "OpenAI, Anthropic, Gemini, Grok, Deepseek, Meta, Perplexity, and Google AI Overviews."
    }
  ],
  "whyItMatters": "Strategists and operators must diversify their artificial intelligence testing environments. Relying on a single model to validate search visibility or output accuracy is insufficient, as a single model win does not indicate broad market leadership or consistent cross platform performance.",
  "citationBlock": {
    "quote": "Only 4.0% of prompts produced perfect agreement across all 8 models.",
    "attribution": "Same Question, Different AI, Different Answers - Trakkr Research",
    "url": "https://trakkr.ai/trakkr-research/model-divergence/facts/only-four-percent-of-prompts-produce-perfect-consensus"
  }
}
