{
  "kind": "answer",
  "studySlug": "model-divergence",
  "slug": "which-query-types-produce-the-most-consensus",
  "title": "Which query types produce the most consensus?",
  "description": "Comparison queries produce the most consensus in the study, averaging 50.4% agreement. More open-ended general and best-of prompts are less stable.",
  "lastUpdated": "2026-03-11",
  "lastTested": "2026-03-11",
  "sourceStudyUrl": "/trakkr-research/model-divergence",
  "sourceStudyTitle": "Same Question, Different AI, Different Answers",
  "claimIds": [
    "model-divergence:comparison-agreement",
    "model-divergence:general-agreement",
    "model-divergence:bestof-divergence"
  ],
  "relatedSlugs": [
    "answer:are-general-and-best-of-prompts-more-volatile-than-comparisons",
    "answer:what-does-an-average-top-three-overlap-of-two-point-eight-mean",
    "fact:high-divergence-prompts-make-up-fourteen-point-six-percent-of-the-study",
    "tracker:cross-model-consensus-tracker"
  ],
  "methodologySummary": "Built from 797,644 valid comparisons across 44,088 reports and 8 models, covering 6,439,133 model responses in the observed window.",
  "limitations": [
    "Agreement is measured across recommendation outputs, not across hidden reasoning or retrieval context.",
    "Average agreement can hide large differences between query classes and model pairs.",
    "The study measures overlap, not which answer was objectively “right”."
  ],
  "keywords": [
    "model divergence",
    "AI agreement",
    "ChatGPT vs Claude",
    "Gemini vs Perplexity",
    "query type agreement",
    "comparison query AI"
  ],
  "schemaHints": {
    "pageType": "Article",
    "includeDataset": true
  },
  "question": "Which query types produce the most consensus?",
  "directAnswer": "Mostly comparison queries. Comparison queries produce the most consensus in the study, averaging 50.4% agreement. More open-ended general prompts average 42.2% agreement, while best-of prompts frequently split models with a high divergence of 14.8%.",
  "answerSummary": "The tighter the question, the more likely models are to converge on overlapping answer sets, meaning operators should expect higher volatility in broad or subjective search categories.",
  "keyFacts": [
    {
      "label": "Comparison-query agreement",
      "value": "50.4%",
      "detail": "Comparison prompts produce the highest average agreement.",
      "claimId": "model-divergence:comparison-agreement"
    },
    {
      "label": "General-query agreement",
      "value": "42.2%",
      "detail": "General prompts are less stable across models.",
      "claimId": "model-divergence:general-agreement"
    },
    {
      "label": "Best-of high divergence",
      "value": "14.8%",
      "detail": "Best-of prompts frequently split models.",
      "claimId": "model-divergence:bestof-divergence"
    }
  ],
  "evidenceTable": [
    {
      "label": "Comparison-query agreement",
      "value": "50.4%",
      "note": "Comparison prompts produce the highest average agreement."
    },
    {
      "label": "General-query agreement",
      "value": "42.2%",
      "note": "General prompts are less stable across models."
    },
    {
      "label": "Best-of high divergence",
      "value": "14.8%",
      "note": "Best-of prompts frequently split models."
    }
  ],
  "whyItMatters": "Understanding query consensus allows teams to allocate resources effectively, prioritizing content optimization for high-divergence queries where market share is fluid, rather than competing in locked-in consensus categories.",
  "whatToDo": [
    "Track visibility across multiple models instead of using one platform as a proxy for the whole market.",
    "Prioritize query classes where disagreement is highest because that is where share can move fastest.",
    "Treat consensus as a benchmark, but treat divergence as the operating reality."
  ],
  "faqs": [
    {
      "question": "What is the average agreement rate for comparison queries?",
      "answer": "Comparison prompts produce the highest average agreement at 50.4%."
    },
    {
      "question": "How do general queries perform compared to comparison queries?",
      "answer": "General prompts are less stable across models, showing a 42.2% agreement rate."
    },
    {
      "question": "Which query type causes the most split between models?",
      "answer": "Best-of prompts frequently split models, showing a high divergence of 14.8%."
    }
  ]
}
