{
  "sims": {
    "qwen2.5-7b_zero-shot_bg_train-time-info_v1": [
      "Both datasets include headlines with stock tickers (e.g., $CRWD in A, $PYPL in B) to identify companies.",
      "Analyst actions (upgrades, downgrades, price target changes) are central to headlines in both datasets (e.g., Deutsche Bank cuts in A, Morgan Stanley cuts in B).",
      "Earnings metrics (EPS beats/misses, revenue results) are explicitly mentioned (e.g., Tyson Foods in A, PulteGroup in B).",
      "Revenue guidance or forecasts (e.g., lowered guidance in A, Q4 guidance cuts in B) are highlighted as market-moving factors.",
      "Industry-specific trends (e.g., oil prices in A, semiconductor sector in B) are discussed in granular detail.",
      "Macroeconomic indicators (e.g., retail sales in A, inflation pressures in B) are tied to company or sector performance.",
      "Dividends, share repurchases, or capital strategies (e.g., WPT dividend in A, Ally Financial outlook pull in B) are noted as key financial events.",
      "Regulatory or political impacts (e.g., Trump-Powell meeting in A, NY Attorney General probe in B) are cited as market drivers.",
      "Corporate events (e.g., Coty acquisition in A, Zipline expansion in B) are framed as catalysts for stock movements.",
      "Structured phrasing (ticker + analyst firm + action) is consistent (e.g., \"Deutsche Bank cuts to Hold\" in A, \"Morgan Stanley upgrades $DIS\" in B)."
    ],
    "qwen2.5-32b_zero-shot_bg_test-time-info_v1": [
      "Both datasets include mentions of specific stock tickers using the dollar sign notation (e.g., $AAPL, $XLE).",
      "Headlines frequently reference earnings results (e.g., 'misses/beats on revenue' in A and 'reports stronger-than-expected earnings' in B).",
      "Analyst actions (upgrades, downgrades, price target changes) are a recurring theme (e.g., 'Deutsche Bank cuts to Hold' in A and 'Barclays Maintains Hold Rating' in B).",
      "Mentions of revenue, EPS, or financial metrics (e.g., 'EPS misses by $0.01' in A and 'Exceeds Analyst Expectations' in B) are common.",
      "Market-moving events (e.g., acquisitions, partnerships, regulatory scrutiny) are highlighted (e.g., 'Coty Acquires...' in A and 'Apple Announces Acquisition...' in B).",
      "Sector-specific news (e.g., energy, tech, healthcare) is prevalent (e.g., 'Oil slides again' in A and 'Energy Sector Under Pressure' in B).",
      "Headlines use terms like 'surges,' 'slides,' or 'plunges' to describe stock price movements (e.g., 'plummets 55%' in A and 'stock soars' in B).",
      "Forward-looking statements (e.g., guidance updates, future growth strategies) appear in both (e.g., 'raises dividend' in A and 'Outlines Fiscal 2024 Guidance' in B).",
      "References to macroeconomic factors (e.g., trade deals, tariffs, economic growth) occur in both (e.g., 'China tariffs' in A and 'Economic Concerns Mount' in B).",
      "Dividend announcements or changes are mentioned (e.g., 'declares $0.0633 dividend' in A and 'boosts dividend by 7%' in B)."
    ],
    "qwen2.5-7b_zero-shot_bg_test-time-info_v1": [
      "Both datasets include stock ticker symbols (e.g., $AAPL, $TSLA, $CVX) in headlines for immediate market context.",
      "Headlines frequently reference earnings reports, beats/misses (e.g., \"EPS misses by $0.01\", \"Q4 earnings beating expectations\").",
      "Analyst actions (upgrades/downgrades, price target changes) are central themes (e.g., \"Deutsche Bank cuts to Hold\", \"Goldman Sachs upgrades Ford\").",
      "Revenue performance is explicitly mentioned (e.g., \"misses on revenue\", \"revenue up 20%\").",
      "Headlines emphasize granular financial metrics like EPS, FFO, or dividend declarations (e.g., \"declares $0.0633 dividend\").",
      "Industry-specific terminology is used (e.g., \"semiconductor stocks\", \"energy sector\", \"refining margins\").",
      "Macroeconomic factors (e.g., GDP, tariffs, oil prices) are tied to company or sector performance (e.g., \"OPEC+ cuts\", \"trade deal with China\").",
      "Corporate events (mergers, acquisitions, layoffs) are highlighted (e.g., \"acquires majority stake\", \"lays off 2,600 employees\").",
      "Market sentiment cues (e.g., \"shares tumble\", \"stock surges\") reflect immediate price reactions.",
      "Forward-looking statements (guidance, forecasts) appear consistently (e.g., \"expects Q2 below consensus\", \"forecasting strong Q4 performance\")."
    ],
    "llama3.3-70b_zero-shot_bg_v1": [
      "All headlines pertain to financial markets, corporate performance, or economic indicators.",
      "Each headline references specific entities such as companies, stock tickers, commodities, or market indices.",
      "Headlines consistently use action verbs (e.g., 'reports,' 'downgrades,' 'surges') to describe events or outcomes.",
      "Time-sensitive references (e.g., quarterly earnings, dates like 'February 1,' 'premarket') are present in all headlines.",
      "Financial terminology (e.g., 'EPS,' 'revenue,' 'price target,' 'dividend') is universally included.",
      "All headlines provide information directly relevant to investor decisions (e.g., stock movements, analyst actions, earnings results).",
      "Structured as concise news updates rather than in-depth analyses or opinions.",
      "Mentions of outcomes or changes (e.g., 'misses,' 'beats,' 'mixed results,' 'upgrades') are present in every headline.",
      "Named entities (e.g., companies like Tesla, institutions like Morgan Stanley, indices like Dow) are explicitly referenced.",
      "Industry-specific jargon or abbreviations (e.g., 'Q4,' 'FFO,' 'premarket') are used across all samples."
    ],
    "qwen2.5-32b_zero-shot_v1": [
      "Both datasets focus on immediate market reactions to earnings reports (e.g., misses/beats, stock price changes).",
      "Headlines frequently reference stock tickers or company names to specify entities involved.",
      "Central bank actions (e.g., Federal Reserve interest rate decisions) are a recurring theme impacting market sentiment.",
      "Sector-specific developments (e.g., tech, energy, pharmaceuticals) drive news narratives in both datasets.",
      "Analyst upgrades/downgrades and price target adjustments are cited as catalysts for stock movements.",
      "Forward-looking statements (e.g., economic forecasts, policy meetings, earnings guidance) appear consistently.",
      "Regulatory or geopolitical events (e.g., trade deals, antitrust probes) are highlighted as market-moving factors.",
      "Quantitative metrics (e.g., EPS results, revenue figures, dividend announcements) are emphasized for precision.",
      "Market volatility due to macroeconomic factors (e.g., inflation, oil prices, unemployment) is a common thread.",
      "Headlines use concise, jargon-heavy phrasing typical of financial reporting (e.g., 'plummets,' 'surges,' 'beats')."
    ],
    "qwen2.5-32b_few-shot_bg_train-time-info_v1": [
      "Both datasets include headlines referencing quarterly earnings results (e.g., EPS beats/misses, revenue deviations).",
      "Analyst actions such as upgrades, downgrades, and price target adjustments are frequently mentioned in both datasets.",
      "Stock tickers are commonly embedded in headlines to identify relevant companies (e.g., $CRWD in A, $DISH in B).",
      "Corporate events like acquisitions, partnerships, and product launches are highlighted in both datasets.",
      "Macroeconomic factors (e.g., trade deals, oil prices, interest rates) are cited as market drivers in headlines.",
      "Forward-looking statements about company performance (e.g., guidance revisions, growth forecasts) appear in both datasets.",
      "Sector-specific trends (e.g., semiconductor demand, renewable energy growth) are contextualized in headlines.",
      "External events (e.g., geopolitical conflicts, regulatory changes) are linked to stock or market movements.",
      "Financial terminology (e.g., FFO, dividends, revenue) is consistently used to quantify outcomes.",
      "Conference calls, investor events, and earnings release dates are explicitly mentioned as key milestones."
    ],
    "llama3.3-70b_few-shot_bg_train-time-info_v1": [
      "All headlines reference specific companies, stock tickers, or financial instruments explicitly or contextually.",
      "Each headline contains financial terminology such as earnings, revenue, price targets, or analyst ratings (e.g., 'upgraded,' 'downgraded').",
      "Market-moving events (e.g., earnings reports, mergers, macroeconomic shifts) are central to every headline.",
      "Analyst actions (upgrades/downgrades) or institutional perspectives (e.g., Morgan Stanley, Barclays) are directly cited or implied.",
      "Stock price movements (e.g., 'surges,' 'slips') or market sentiment (bullish/bearish) are explicitly mentioned.",
      "Forward-looking statements (e.g., guidance, projections) or retrospective performance summaries are included.",
      "Quantitative metrics (e.g., EPS misses, revenue figures, dividend amounts) are consistently highlighted.",
      "Industry-specific jargon (e.g., 'FFO,' 'dividend suspension,' 'OPEC+ cuts') is prevalent across all samples.",
      "Headlines attribute information to authoritative sources (e.g., firms, analysts, government agencies).",
      "All samples are structured concisely for investor relevance, omitting narrative context in favor of actionable data."
    ],
    "llama3.3-70b_zero-shot_bg_train-time-info_v1": [
      "Both datasets include stock ticker symbols in headlines (e.g., $TSLA, $AAPL).",
      "Headlines frequently mention earnings results (EPS/revenue beats/misses) with specific metrics.",
      "Analyst actions (upgrades/downgrades) from firms like Morgan Stanley and Barclays are highlighted.",
      "Price target adjustments by financial institutions are explicitly cited.",
      "Company-specific events (mergers, product launches, expansions) are common themes.",
      "Economic indicators (e.g., retail sales, inflation, GDP) influence headline narratives.",
      "Sector-specific news (energy, tech, healthcare) drives contextual analysis.",
      "References to financial institutions (Oppenheimer, Goldman Sachs) as data sources are pervasive.",
      "Forward-looking statements (earnings dates, investor conferences) guide market expectations.",
      "Stock price movements (premarket changes, surges/drops) are directly tied to news triggers."
    ],
    "qwen2.5-32b_few-shot_bg_test-time-info_v1": [
      "Both datasets include headlines mentioning specific stock ticker symbols (e.g., $NVDA, $AAPL, $TSLA).",
      "Earnings results (EPS beats/misses) and revenue performance are frequently highlighted in both datasets.",
      "Analyst actions (upgrades, downgrades, price target adjustments) are a common focus in headlines.",
      "Market reactions to news (e.g., stock price movements, surges, or declines) are explicitly described.",
      "Headlines frequently reference company-specific challenges (supply chain disruptions, COVID-19 impacts).",
      "Partnerships, collaborations, or strategic expansions are recurring themes in both datasets.",
      "Forward-looking statements (guidance updates, forecasts, growth initiatives) are prominently featured.",
      "Sector-specific trends (tech, energy, biotech, retail) are consistently covered across both datasets.",
      "Regulatory, geopolitical, or macroeconomic factors (trade deals, tariffs, inflation) are contextual drivers.",
      "Headlines often quantify financial metrics (dividends, revenue figures, project costs, price targets)."
    ],
    "qwen2.5-7b_zero-shot_bg_v1": [
      "Both datasets include headlines referencing specific companies with ticker symbols (e.g., $AAPL, $TSLA).",
      "Headlines frequently mention earnings results (e.g., beats/misses on EPS or revenue) and analyst expectations.",
      "Stock price movements (e.g., premarket gains, post-earnings drops) are explicitly highlighted in most samples.",
      "Analyst actions (upgrades, downgrades, price target changes) are central to headlines in both datasets.",
      "Macroeconomic factors (e.g., trade deals, inflation, interest rates) are cited as market drivers.",
      "Sector-specific trends (e.g., semiconductors, energy, tech) are emphasized across samples.",
      "Forward-looking statements (e.g., guidance revisions, economic forecasts) appear consistently.",
      "Market indices (e.g., S&P 500, Dow Jones) and asset classes (e.g., oil, forex) are referenced for broader context.",
      "Corporate events (e.g., mergers, dividend changes, layoffs) are reported with financial implications.",
      "Regulatory/political developments (e.g., antitrust probes, Fed policy) are tied to market impacts."
    ],
    "qwen2.5-32b_zero-shot_bg_train-time-info_v1": [
      "Both datasets include headlines referencing earnings reports (e.g., misses/beats on EPS/revenue).",
      "Both frequently mention analyst actions (upgrades/downgrades) and price target adjustments.",
      "Stock price movements (e.g., surges, drops, stability) are explicitly highlighted in response to news.",
      "Ticker symbols prefixed with \"$\" are consistently used to identify companies/ETFs.",
      "Headlines focus on sector-specific developments (e.g., energy, tech, retail, biotech).",
      "Forward-looking guidance (e.g., lowered/raised outlooks, strategic plans) is a recurring theme.",
      "Market sentiment keywords (e.g., \"optimistic,\" \"cautious,\" \"volatility\") contextualize reactions.",
      "M&A activity, partnerships, or product launches are cited as catalysts for price changes.",
      "Regulatory/geopolitical risks (e.g., tariffs, supply chain disruptions) directly tie to financial performance.",
      "Macroeconomic indicators (e.g., GDP, inflation, oil prices) are linked to company/sector outcomes."
    ],
    "llama3.1-8b_zero-shot_bg_v1": [
      "All headlines reference specific financial entities (companies, indices, commodities, or economic indicators).",
      "Each headline reports a recent or upcoming event impacting financial markets or securities.",
      "Financial terminology (e.g., earnings, revenue, price target, guidance) is present in every headline.",
      "Headlines include either company names, ticker symbols, or sector references.",
      "Each provides information directly relevant to investor decisions or market movements.",
      "Events described are tied to quantifiable outcomes (e.g., stock price changes, earnings misses/beats, analyst targets).",
      "Mentions of financial institutions (e.g., Barclays, Morgan Stanley) or analyst actions (e.g., upgrades, downgrades) appear consistently.",
      "Headlines emphasize timeliness (e.g., 'premarket,' 'Q3 results,' 'upcoming earnings').",
      "Market-moving catalysts (e.g., mergers, geopolitical events, product launches) are central to all samples.",
      "Structural similarity in headline composition: entity + event + financial impact (e.g., 'X falls Y% after Z')."
    ],
    "llama3.1-8b_zero-shot_v1": [
      "All headlines report events directly impacting financial markets or economic conditions.",
      "Each headline references at least one specific entity (e.g., company, government body, economic indicator).",
      "Financial/economic terminology (e.g., 'stock,' 'revenue,' 'GDP') appears in every headline.",
      "Headlines emphasize timeliness, focusing on recent, ongoing, or imminent market-relevant events.",
      "Entries inform investors about factors influencing asset valuations or strategic decisions.",
      "Content is strictly factual, reporting verifiable events rather than opinions or analyses.",
      "Implicit or explicit impact on market movements/sentiment is present in all headlines.",
      "Concise, high-impact language characteristic of financial journalism is consistently used.",
      "All entries reference macroeconomic factors (e.g., interest rates) or microeconomic events (e.g., earnings).",
      "Directional indicators (e.g., 'misses,' 'surges,' 'slows') explicitly or implicitly signal financial trends."
    ],
    "llama3.3-70b_few-shot_v1": [
      "Both datasets include headlines referencing stock tickers (e.g., $CRWD in A, $TSLA in B).",
      "Headlines in both datasets focus on earnings results (e.g., 'misses on revenue' in A, 'earnings miss expectations' in B).",
      "Market reactions to events (e.g., 'stock falls' in A, 'stock plummets' in B) are explicitly stated.",
      "Analyst actions (upgrades/downgrades/targets) are mentioned in both (e.g., 'upgrades snap to outperform' in A, 'Bank of America upgrades Netflix' in B).",
      "Both include macroeconomic indicators (e.g., 'retail sales rise' in A, 'GDP growth rate' in B).",
      "Geopolitical impacts on markets (e.g., 'U.S.-China trade deal' in A, 'trade tensions escalate' in B) are recurring themes.",
      "Sector-specific performance (e.g., 'semiconductor stocks' in A, 'tech stocks lead rally' in B) is highlighted.",
      "Formal financial terminology (e.g., 'dividend declared,' 'interest rate hike') appears in both datasets.",
      "Forward-looking statements (e.g., 'analysts predict' in A, 'revised growth forecasts' in B) are present.",
      "Headlines quantify performance using metrics like percentages (e.g., 'down 9%' in A, 'plummets 10%' in B)."
    ],
    "qwen2.5-32b_few-shot_bg_v1": [
      "Both datasets include headlines referencing stock tickers (e.g., $AAPL, $TSLA) to identify companies.",
      "Earnings results (e.g., beats, misses, revenue figures) are a central focus in both datasets.",
      "Analyst actions (e.g., upgrades, downgrades, price target changes) are prominently featured.",
      "Headlines frequently mention financial metrics like EPS, revenue, and guidance.",
      "Economic indicators (e.g., GDP, retail sales, consumer confidence) are discussed in both datasets.",
      "Company-specific news (e.g., mergers, product launches, operational updates) is a recurring theme.",
      "Global macroeconomic factors (e.g., trade deals, tariffs, supply chain disruptions) are highlighted.",
      "Forward-looking statements (e.g., forecasts, economic outlooks, growth projections) are common.",
      "Market reactions (e.g., stock price movements, investor sentiment) are explicitly noted.",
      "Mixed sentiment (positive/negative) is present, driven by earnings performance or external risks."
    ],
    "qwen2.5-32b_few-shot_v1": [
      "Both datasets include headlines reporting quarterly earnings results, including beats or misses relative to analyst expectations.",
      "Stock price reactions to company-specific news or market events are explicitly mentioned in headlines across both datasets.",
      "Central bank policies, such as Federal Reserve interest rate decisions, are a recurring theme in both datasets.",
      "Analyst actions (e.g., upgrades, downgrades, price target revisions) are frequently cited in headlines from both datasets.",
      "Economic indicators like GDP growth, retail sales, and inflation rates are referenced to contextualize market conditions.",
      "Sector-specific developments (e.g., tech, energy, healthcare) are highlighted in headlines from both datasets.",
      "Geopolitical events (e.g., trade policies, regulatory changes) are discussed as drivers of market volatility.",
      "Mergers, acquisitions, or corporate deals (e.g., stake purchases, partnerships) are reported in both datasets.",
      "Forward-looking statements about market trends, economic forecasts, or company performance are common in both datasets.",
      "Company-specific operational updates (e.g., product launches, clinical trials, dividend changes) are featured in headlines."
    ],
    "qwen2.5-32b_zero-shot_bg_v1": [
      "Both datasets include headlines focusing on earnings reports, revenue outcomes, and their impact on stock performance (e.g., misses/beats).",
      "Both emphasize stock price movements (e.g., gains, losses, premarket/post-market activity) tied to company-specific news or market events.",
      "Analyst ratings, price target adjustments, and upgrades/downgrades are central to headlines in both datasets.",
      "Company-specific developments (e.g., mergers, product launches, partnerships) are highlighted in both datasets as market catalysts.",
      "References to macroeconomic indicators (e.g., retail sales, GDP, inflation) and geopolitical factors (e.g., tariffs, trade deals) appear in both.",
      "Ticker symbols ($AAPL, $TSLA, $NVDA, etc.) are consistently used to identify companies in headlines.",
      "Market sentiment analysis (e.g., bullish/bearish trends, sector optimism/pessimism) is a recurring theme in both datasets.",
      "Forward-looking statements (e.g., guidance revisions, forecasts, earnings outlooks) are prevalent in both datasets.",
      "Regulatory/political developments (e.g., Fed rate decisions, antitrust scrutiny, trade policies) influence headlines in both.",
      "Sector-specific analysis (e.g., tech, energy, semiconductors) is granularly addressed across both datasets."
    ],
    "qwen2.5-7b_zero-shot_v1": [
      "Both datasets include headlines referencing specific companies and financial instruments (e.g., stock tickers, indices like S&P 500).",
      "Earnings performance metrics (e.g., beats/misses on EPS, revenue) are a recurring theme in both datasets.",
      "Market movements (e.g., surges, plunges) tied to corporate results or macroeconomic events are highlighted in both.",
      "Central bank actions (e.g., Federal Reserve interest rates, trade policies) influence headlines in both datasets.",
      "Sector-specific impacts (e.g., tech, energy, retail) are prominently featured across headlines.",
      "Analyst ratings, upgrades, downgrades, or price target adjustments appear in both datasets.",
      "Economic indicators (e.g., GDP, unemployment, retail sales) are cited as drivers of market sentiment.",
      "Corporate events like mergers, acquisitions, dividends, and layoffs are covered in both datasets.",
      "Forward-looking statements (e.g., guidance revisions, economic forecasts) are common in headlines.",
      "Immediate stock price reactions to news (e.g., premarket moves, post-earnings volatility) are emphasized."
    ],
    "llama3.1-8b_zero-shot_bg_train-time-info_v1": [
      "Both datasets consistently include stock ticker symbols prefixed with '$' within headlines.",
      "Headlines in both datasets frequently reference earnings reports (e.g., EPS beats/misses, revenue results).",
      "Analyst actions (e.g., upgrades, downgrades, price target changes) are prominently cited in both sets.",
      "Market reactions to news (e.g., stock price movements like 'slips', 'jumps', 'tanks') are explicitly described.",
      "Specific numerical metrics (e.g., percentages, dollar amounts, guidance figures) are used to quantify performance or forecasts.",
      "Mentions of macroeconomic factors (e.g., interest rates, trade deals, geopolitical events) contextualize market impacts.",
      "Corporate events (e.g., mergers, partnerships, dividend changes, layoffs) are highlighted as catalysts.",
      "Sector-specific trends (e.g., energy, tech, healthcare) are tied to individual stock performance.",
      "Forward-looking statements (e.g., earnings projections, growth targets, guidance revisions) are common in both datasets.",
      "Regulatory or operational risks (e.g., FDA delays, lawsuits, supply chain issues) are cited as drivers of volatility."
    ],
    "qwen2.5-7b_few-shot_v1": [
      "Mentions of specific companies or financial instruments (e.g., stocks, ETFs) in headlines",
      "Frequent references to earnings reports, revenue misses/beats, or financial metrics (e.g., EPS)",
      "Includes market-moving events (e.g., geopolitical tensions, OPEC+ decisions, trade deals)",
      "Highlights analyst actions (e.g., upgrades, downgrades, price target changes) or institutional decisions (e.g., Fed rate changes)",
      "Uses sector-specific terminology (e.g., semiconductors, biotech, energy, retail)",
      "Contains forward-looking statements (e.g., economic forecasts, earnings previews, event anticipation)",
      "References regulatory, political, or macroeconomic factors (e.g., tariffs, interest rates, GDP)",
      "Emphasizes stock price volatility (e.g., \"surges,\" \"plunges,\" \"slips\") in response to news",
      "Cites quantitative data (e.g., dividend amounts, percentage gains/losses, revenue figures)",
      "Focuses on industry-specific risks or opportunities (e.g., semiconductor shortages, oil demand, tech regulations)"
    ],
    "llama3.3-70b_few-shot_bg_v1": [
      "Headlines frequently reference analyst actions such as upgrades, downgrades, and price target adjustments by major financial institutions (e.g., Morgan Stanley, Barclays, Goldman Sachs).",
      "Both datasets include mentions of quarterly earnings results, including beats/misses on EPS/revenue and post-earnings stock reactions (e.g., \"trades sideways,\" \"falls 5%\").",
      "Stock ticker symbols (e.g., $TSLA, $NVDA, $GOOGL in B; $AAPL, $CRWD in A) are consistently included to identify companies.",
      "Market movements are explicitly tied to specific events (e.g., production concerns, earnings reports) with granular price action details (e.g., \"slides 9%,\" \"jumps 20%\").",
      "Economic indicators (e.g., Federal Reserve Beige Book in B; U.S. retail sales, GDP data in A) are cited as drivers of market sentiment.",
      "Sector-specific trends (e.g., semiconductors, electric vehicles, cloud computing) are highlighted in both datasets to contextualize company performance.",
      "Company-specific operational developments (e.g., production challenges, R&D investments, leadership changes) are emphasized as catalysts for stock movements.",
      "Financial jargon (e.g., \"underweight,\" \"overweight,\" \"EPS beats,\" \"revenue misses\") is prevalent across all headlines.",
      "Institutional analysis (e.g., Barclays' ad revenue outlook, Goldman Sachs' cloud growth projections) is a recurring theme to validate claims.",
      "Forward-looking guidance (e.g., \"modest Q3 guidance,\" \"withdraws FY20 guidance\") and its market implications are explicitly addressed."
    ],
    "llama3.1-8b_few-shot_v1": [
      "Both datasets include headlines referencing specific stock tickers and company names (e.g., $CRWD, TSLA, Amazon).",
      "Headlines in both datasets frequently mention earnings results, revenue beats/misses, and financial metrics (e.g., \"FFO misses by $0.01\", \"Amazon's Earnings Soar\").",
      "Both highlight macroeconomic indicators like GDP growth, inflation, retail sales, and employment data (e.g., \"Q4 GDP\", \"Economic growth slows to 2.1%\").",
      "Market index movements (e.g., Dow, NASDAQ) and sector-specific trends (e.g., semiconductors, oil) are common topics in both datasets.",
      "Analyst actions (upgrades/downgrades) and price target adjustments feature prominently (e.g., \"Moody's turns negative\", \"Bank of America slashes Microsoft's price target\").",
      "Monetary policy updates (e.g., Federal Reserve rate decisions, central bank commentary) are recurrent themes in both datasets.",
      "Mergers, acquisitions, and corporate investments are covered (e.g., \"Coty Acquires...Kylie Jenner's\", \"Amazon to Invest $1 Billion\").",
      "Sector-specific volatility (e.g., energy, tech, retail) and stock price reactions to news are emphasized in both datasets.",
      "Geopolitical events impacting markets (e.g., US-China trade deals, Brexit) are addressed in headlines from both datasets.",
      "Quantitative data (percentages, dollar values, timeframes) are consistently embedded in headlines (e.g., \"Brent crude nears $70\", \"GDP growth slows to 2.1%\")."
    ],
    "llama3.1-8b_few-shot_bg_train-time-info_v1": [
      "Both datasets include headlines referencing stock tickers with symbols like $AAPL, $TSLA, or (DOCU).",
      "Headlines in both datasets frequently mention earnings results (e.g., 'EPS beats/misses,' 'revenue misses/beats').",
      "Analyst actions (upgrades, downgrades, price target changes) from firms like Morgan Stanley, Deutsche Bank, and Oppenheimer are prominent in both.",
      "Company-specific guidance revisions (e.g., 'cuts outlook,' 'raises guidance') are a recurring theme in both datasets.",
      "References to macroeconomic factors (e.g., interest rates, GDP, trade deals, oil prices) appear consistently across both datasets.",
      "Sector-specific trends (e.g., tech, energy, retail, healthcare) are highlighted in headlines from both A and B.",
      "Forward-looking statements (e.g., 'Q4 preview,' '2023 outlook,' 'expectations') are prevalent in both sets of samples.",
      "Mentions of mergers, acquisitions, or partnerships (e.g., 'acquires,' 'partners with') are common in both datasets.",
      "Stock price movements (e.g., 'surges X%,' 'slides X%') are explicitly noted in most headlines across A and B.",
      "Regulatory or geopolitical impacts (e.g., FDA decisions, tariffs, OPEC+ cuts) are cited as market drivers in both datasets."
    ],
    "llama3.1-8b_few-shot_bg_v1": [
      "Both datasets include headlines referencing stock ticker symbols (e.g., $AAPL, $NVDA, $TSLA) to identify companies.",
      "Headlines in both datasets frequently mention earnings results, including beats/misses on revenue or EPS (e.g., \"misses by $0.01\" in A, \"Q2 earnings miss\" in B).",
      "Analyst actions (upgrades, downgrades, price target changes) are prominently featured in both datasets (e.g., \"JMP Securities upgrades\" in A, \"Morgan Stanley cuts target\" in B).",
      "Both datasets highlight macroeconomic indicators (e.g., U.S. retail sales, inflation rates, GDP growth) and their market implications.",
      "Forward-looking statements about economic or corporate performance (e.g., \"boost global economy\" in A, \"economic outlook worsens\" in B) are common.",
      "Headlines in both datasets reference geopolitical events impacting markets (e.g., U.S.-China trade tensions in A and B).",
      "Company-specific news (e.g., mergers, acquisitions, product launches) is a recurring focus (e.g., \"Coty Acquires\" in A, \"Broadcom Makes Strategic Acquisition\" in B).",
      "Quantitative metrics (e.g., percentage stock movements, revenue figures, jobless claims) are consistently included for context.",
      "Both datasets emphasize sector-specific trends (e.g., semiconductors in A, AI chips in B) and industry disruptions.",
      "Regulatory or policy developments (e.g., Fed rate decisions in A, antitrust investigations in B) are cited as market drivers."
    ],
    "qwen2.5-7b_few-shot_bg_v1": [
      "Both datasets include stock tickers in headlines using symbols like $ or parentheses (e.g., $TSLA, NVDA).",
      "Headlines frequently reference analyst actions such as upgrades, downgrades, or price target revisions.",
      "Earnings results (e.g., EPS beats/misses, revenue performance) are a central focus across samples.",
      "Specific companies like Apple, Tesla, NVIDIA, and Meta are commonly mentioned in both datasets.",
      "Market sentiment terms (e.g., 'bullish,' 'bearish,' 'neutral') appear in headlines to describe trends.",
      "Headlines incorporate macroeconomic indicators (e.g., retail sales, GDP, inflation, oil prices).",
      "Corporate events (e.g., mergers, layoffs, dividend changes, guidance updates) are prominently featured.",
      "Regulatory/legal issues (e.g., antitrust scrutiny, lawsuits, executive probes) are addressed in both datasets.",
      "Sector-specific developments (e.g., semiconductors, cloud computing, EVs) are highlighted in headlines.",
      "Headlines use similar formatting conventions (e.g., ticker placement, 'beats/misses' language, price targets)."
    ],
    "llama3.3-70b_zero-shot_v1": [
      "Both datasets include headlines referencing specific companies or stock tickers (e.g., $CRWD in A, Apple in B).",
      "Earnings reports (e.g., EPS, revenue beats/misses) are a central focus in both (e.g., A: \"misses on revenue,\" B: \"Earnings Report Exceeds Expectations\").",
      "Market indices like the Dow Jones or S&P 500 are frequently cited to contextualize performance (A: \"Dow ends down 26,\" B: \"Dow Jones Drops 500 Points\").",
      "Monetary policy decisions (e.g., Federal Reserve interest rates) are covered in both (A: \"Fed Meeting Minutes,\" B: \"Federal Reserve Announces Interest Rate Decision\").",
      "Sector-specific trends (tech, energy, retail) are highlighted (A: \"semiconductor stocks,\" B: \"Tech Stocks Plummet\").",
      "Analyst actions (upgrades, downgrades, price targets) are mentioned (A: \"JMP Securities upgrades,\" B: \"Market Analysts Predict\").",
      "Quantitative financial metrics (e.g., $0.01 EPS miss, $600M acquisitions) are used to quantify outcomes in both datasets.",
      "Geopolitical or macroeconomic events (trade deals, pandemics) are linked to market impacts (A: US-China tariffs, B: inflation fears).",
      "Forward-looking statements (guidance, predictions) appear in both (A: \"analysts think will happen next,\" B: \"Market Analysts Predict Strong Growth\").",
      "Verb-driven language emphasizes volatility (e.g., \"plummet,\" \"soar,\" \"slips\") to describe price movements in all samples."
    ],
    "llama3.1-8b_few-shot_bg_test-time-info_v1": [
      "Headlines in both datasets frequently reference stock tickers using symbols like $CRWD (A) and $NVDA (B).",
      "Both datasets emphasize earnings reports, financial metrics (e.g., 'misses on revenue' in A, 'Q3 earnings miss' in B), and analyst expectations.",
      "Analyst actions (upgrades, downgrades, price target changes) are prominently featured, such as 'JMP Securities upgrades' (A) and 'Barclays cuts price target' (B).",
      "Company-specific events (mergers, acquisitions, product launches) are highlighted, e.g., 'Coty Acquires' (A) and 'AirBoss Aerospace acquires' (B).",
      "Market reactions to news (stock price movements, volatility) are consistently noted, like 'Roku stock falls' (A) and 'stock plunges 12%' (B).",
      "Sector-specific trends (energy, tech, retail) are discussed, such as 'Oil slides' (A) and 'Utilities sector sees renewed interest' (B).",
      "Macroeconomic factors (trade deals, geopolitical tensions) influence headlines, e.g., 'U.S.-China trade deal' (A) and 'Ukraine-Russia tensions' (B).",
      "Dividend announcements and financial guidance updates appear in both, e.g., 'declares $0.0633 dividend' (A) and 'maintains quarterly dividend' (B).",
      "Regulatory or legal developments impacting companies are mentioned, like 'DOJ review' (A) and 'Antitrust Probe' (B).",
      "Both datasets use technical financial terminology (EPS, revenue, guidance, FFO) and industry jargon (e.g., 'safe-haven demand' in B, 'OPEC+ cuts' in A)."
    ],
    "qwen2.5-7b_few-shot_bg_test-time-info_v1": [
      "Both datasets include headlines referencing specific stock ticker symbols (e.g., $AAPL, $TSLA, $SPY).",
      "Headlines frequently mention earnings reports, revenue results, or guidance (e.g., 'misses on revenue,' 'beats estimates').",
      "Analyst actions (upgrades, downgrades, price target changes) are central to headlines in both datasets (e.g., 'Deutsche Bank cuts to Hold,' 'Morgan Stanley upgrades').",
      "Headlines often include quantitative financial metrics (e.g., '$0.01 miss,' '20% price target hike').",
      "Sector-specific developments (e.g., energy, biotech, retail) are highlighted in both datasets (e.g., 'oil prices surge,' 'supply chain disruptions').",
      "Market indices and ETFs (e.g., S&P 500, $XLE) are referenced as performance benchmarks in headlines.",
      "Headlines frequently cite macroeconomic factors (e.g., trade deals, interest rates, GDP) impacting markets.",
      "Company-specific events (e.g., mergers, partnerships, dividend changes) are prominent in both datasets.",
      "Sentiment indicators (e.g., 'bullish,' 'neutral,' 'downgrade') are explicitly used to frame market reactions.",
      "External risks (e.g., geopolitical tensions, regulatory scrutiny, labor costs) are cited as drivers of market movements."
    ],
    "llama3.3-70b_few-shot_bg_test-time-info_v1": [
      "Both datasets include stock ticker symbols in parentheses (e.g., $AAPL, $TSLA) to identify companies",
      "Headlines frequently mention earnings reports with specific metric comparisons (e.g., 'misses by $0.01', 'beats analyst expectations')",
      "Analyst actions like upgrades/downgrades (e.g., 'Morgan Stanley downgrades', 'Deutsche Bank cuts to Hold') are consistently featured",
      "Price target adjustments (e.g., 'raises price target to $20', 'cuts price target to $22') are a recurring element in both datasets",
      "Specific financial metrics like EPS and revenue performance are explicitly quantified in headlines from both sets",
      "Forward-looking statements about growth/sales guidance appear frequently (e.g., 'weak demand outlook', 'improved economic outlook')",
      "Market indices and ETFs (e.g., SPY, XLE) are regularly referenced as performance indicators",
      "Macroeconomic factors (e.g., 'rising inflation concerns', 'retail sales growth') are contextualized within company-specific updates",
      "Clinical trial results and drug development milestones are mentioned for biotech/pharma companies in both datasets",
      "Investor conference participation and corporate event scheduling (e.g., 'upcoming JPMorgan healthcare conference') are consistently noted"
    ],
    "llama3.1-8b_zero-shot_bg_test-time-info_v1": [
      "Both datasets include headlines referencing stock tickers using symbols like $T or (CRWD).",
      "Both datasets frequently mention earnings reports, revenue results, and financial metrics (e.g., 'misses on revenue,' 'Q3 Earnings Beat Est').",
      "Headlines in both datasets highlight analyst actions such as upgrades, downgrades, and price target adjustments (e.g., 'upgraded to buy,' 'cuts price target').",
      "Both include mentions of market reactions to news (e.g., 'stock falls,' 'shares jump,' 'premarket gains').",
      "Economic indicators (e.g., GDP, inflation, retail sales) and macroeconomic events (e.g., trade deals, oil prices) are discussed in both datasets.",
      "Corporate events like mergers, acquisitions, and partnerships are common themes (e.g., 'Coty Acquires,' 'Strategic Partnership').",
      "Both datasets reference sector-specific trends (e.g., energy, semiconductors, retail, biotech).",
      "Regulatory or legal developments (e.g., antitrust scrutiny, credit outlook revisions) are addressed in headlines from both datasets.",
      "Forward-looking statements (e.g., guidance, forecasts, growth projections) appear consistently in both (e.g., 'lowers 2019 sales outlook,' '2024 Outlook').",
      "Headlines in both datasets use technical financial terminology such as 'dividend,' 'FFO,' 'EPS,' and 'price target.'"
    ],
    "qwen2.5-7b_few-shot_bg_train-time-info_v1": [
      "Headlines frequently mention specific companies missing or exceeding earnings estimates (e.g., \"misses on revenue\" in A and \"earnings beat\" in B).",
      "Analyst actions (upgrades, downgrades, price target changes) are prominently featured (e.g., \"Deutsche Bank cuts to Hold\" in A and \"Oppenheimer downgrades\" in B).",
      "Price target adjustments are explicitly quantified (e.g., \"raised to $70 from $62\" in A and \"cut to $15 from $20\" in B).",
      "Revenue guidance revisions (e.g., \"lowered guidance\" in A and \"cuts full-year guidance\" in B) are consistently highlighted.",
      "Sector-specific developments (semiconductors, energy, biotech) are a focus in both datasets.",
      "Stock price movements (premarket changes, post-earning reactions) are explicitly tied to news triggers in all samples.",
      "Macroeconomic factors (trade deals, tariffs, GDP) directly linked to market impacts appear in both datasets.",
      "Mentions of mergers, acquisitions, or partnerships (e.g., \"Acquires $600M Majority Stake\" in A and \"strategic agreement\" in B) are common.",
      "Regulatory/environmental impacts on businesses (e.g., \"COVID-19 impact\" in A and \"regulatory hurdles\" in B) are addressed.",
      "Ticker symbols prefixed with \"$\" are consistently used for company identification across all samples."
    ],
    "llama3.3-70b_zero-shot_bg_test-time-info_v1": [
      "Headlines reference specific companies, stocks, or financial instruments (e.g., via ticker symbols or names).",
      "Mentions of earnings reports (quarterly/annual results) and revenue metrics are present in all samples.",
      "Analyst actions (upgrades, downgrades, price target changes) are explicitly cited in every headline.",
      "All headlines include financial terminology (e.g., EPS, dividends, guidance, revenue, price targets).",
      "Stock ticker symbols are formatted with a preceding \"$\" or parentheses (e.g., $TSLA, (SAN)).",
      "Headlines focus on immediate market-moving events (e.g., earnings releases, corporate announcements, analyst revisions).",
      "Quantitative financial data (e.g., revenue figures, percentage changes, target prices) are included in all samples.",
      "References to institutional actors (e.g., Goldman Sachs, Barclays, Morgan Stanley) appear in every headline.",
      "Time-sensitive context (e.g., \"Q2 earnings,\" \"upcoming conference,\" \"recent downgrade\") is consistently provided.",
      "Headlines emphasize investor-facing implications (e.g., stock performance, sector trends, guidance impacts)."
    ]
  },
  "diffs_synth_from_real": {
    "qwen2.5-7b_zero-shot_bg_train-time-info_v1": [
      "Dataset A headlines frequently include exact EPS/revenue figures (e.g., 'misses by $0.01'), while Dataset B omits granular financial metrics in favor of general beats/misses.",
      "Dataset A explicitly ties headlines to macroeconomic events (e.g., retail sales, oil prices), whereas Dataset B focuses narrowly on analyst actions without broader economic context.",
      "Dataset B systematically names analyst firms in every headline (e.g., 'Morgan Stanley cuts'), while Dataset A sometimes omits firm names or buries them mid-sentence.",
      "Dataset A incorporates geopolitical developments (e.g., U.S.-China tariffs) as market drivers, whereas Dataset B headlines lack direct geopolitical event references.",
      "Dataset B uses rigidly formulaic structures (Ticker + Analyst Firm + Action), while Dataset A allows more narrative flexibility (e.g., placing tickers at end or mid-sentence).",
      "Dataset A specifies quantitative dividend amounts and repurchase details (e.g., '$0.0633 dividend'), while Dataset B only references capital strategies qualitatively.",
      "Dataset A headlines frequently mention regulatory/political actors by name (e.g., NY Attorney General), while Dataset B describes regulatory impacts generically when present.",
      "Dataset B emphasizes neutral/maintain/steady outlooks as recurring themes, whereas Dataset A more frequently highlights directional upgrades/downgrades.",
      "Dataset A includes explicit time-bound market references (e.g., 'Q4 guidance cuts'), while Dataset B uses vaguer temporal language like 'amid' or 'following'.",
      "Dataset A integrates secondary corporate developments (e.g., layoffs, product launches) alongside analyst actions, whereas Dataset B focuses purely on ratings/target changes."
    ],
    "qwen2.5-32b_zero-shot_bg_test-time-info_v1": [
      "Dataset A headlines frequently include specific numerical values (e.g., 'misses by $0.01') while B uses qualitative descriptors (e.g., 'exceeds expectations').",
      "Dataset B headlines consistently reference future guidance timelines (e.g., 'Fiscal 2024') whereas A lacks explicit future timeframe mentions.",
      "Dataset A includes geopolitical events (e.g., 'Iran airstrike') impacting markets, absent in B.",
      "Dataset B headlines frequently mention supply chain disruptions as a key factor, unlike A.",
      "Dataset A uses informal language (e.g., 'Damned if they do...') while B maintains formal tone.",
      "Dataset B headlines often start with the ticker/company name followed by action, while A structures vary.",
      "Dataset A includes hashtags/social media references (e.g., '#NVIDIA') absent in B.",
      "Dataset B mentions clinical trial phases (e.g., 'Phase 3') in healthcare, not seen in A.",
      "Dataset A references specific macroeconomic data (e.g., 'U.S. retail sales') while B uses broader terms.",
      "Dataset B includes non-English text (e.g., Chinese characters) in some headlines, unlike A."
    ],
    "qwen2.5-7b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently mention specific analyst/firm names initiating actions (e.g., 'Oppenheimer,' 'Morgan Stanley'), while Dataset A includes analyst actions without always specifying the source institution.",
      "Dataset B emphasizes numeric price target adjustments (e.g., 'raises price target to $55') as central components, whereas Dataset A more often references general analyst sentiment without explicit target figures.",
      "Dataset B uses standardized headline structures focused on analyst actions (e.g., '[Firm] [action] [ticker] [reason]'), while Dataset A employs varied narrative styles including questions, quotes, and event summaries.",
      "Dataset B includes explicit mentions of rating tiers (e.g., 'Underperform,' 'Equal-Weight') in nearly every headline, while Dataset A describes analyst sentiment using broader terms like 'negative' or 'neutral' without standardized tier labels.",
      "Dataset B headlines frequently pair opposing actions (upgrades/downgrades) with matching price target changes, creating a cause-effect pattern absent in Dataset A's more standalone analyst mentions.",
      "Dataset B shows higher density of financial institution branding (e.g., 'J.P. Morgan,' 'Barclays') per headline compared to Dataset A's occasional generic references to 'analysts'.",
      "Dataset B systematically associates earnings results with analyst reactions (e.g., 'beats expectations, sending shares up'), while Dataset A often reports earnings independently of immediate analyst responses.",
      "Dataset B headlines consistently quantify market movements triggered by analyst actions (e.g., 'stock drops 5%'), whereas Dataset A describes price reactions qualitatively (e.g., 'slips', 'jumps') without percentages.",
      "Dataset B emphasizes institutional consensus shifts (e.g., 'multiple firms downgrade') rather than Dataset A's focus on individual analyst perspectives or unnamed 'analyst' opinions.",
      "Dataset B includes explicit forward-looking analyst rationale (e.g., 'citing robust footwear demand') in every action, while Dataset A's forward-looking statements often originate from companies or macroeconomic trends."
    ],
    "llama3.3-70b_zero-shot_bg_v1": [
      "Dataset B headlines focus predominantly on analyst actions (downgrades/upgrades) and price target adjustments, while Dataset A includes a broader range of financial events like mergers, geopolitical impacts, and operational updates.",
      "Dataset B uses a rigid template structure (e.g., '[Institution] [action] [stock] - [reason]') across all samples, whereas Dataset A employs varied sentence structures and narrative styles.",
      "Dataset B headlines center on a narrow set of tech stocks (e.g., $NVDA, $TSLA, $GOOG) and institutions (Morgan Stanley, Barclays), while Dataset A covers diverse industries, commodities, and geopolitical entities.",
      "Dataset B consistently places stock tickers inline with hyphens (e.g., '$tsla -'), whereas Dataset A uses varied formatting like appended tickers or parentheses.",
      "Dataset B emphasizes forward-looking analyst expectations (e.g., 'awaiting guidance', 'upcoming investor conference') in every headline, while Dataset A frequently reports concluded events or historical results.",
      "Dataset B universally references institutional analyst firms (e.g., Morgan Stanley, Barclays) as primary news sources, unlike Dataset A which cites varied sources including governments, executives, and anonymous analysts.",
      "Dataset B headlines exclusively use securities ratings terminology (e.g., 'underweight', 'equal weight', 'neutral') absent in Dataset A's samples.",
      "Dataset B focuses on recurring themes of valuation concerns and revenue growth projections, while Dataset A addresses diverse financial concepts like dividends, layoffs, tariffs, and technical trading patterns.",
      "Dataset B headlines maintain uniform emotional neutrality, whereas Dataset A occasionally uses sensational language (e.g., 'succumbs to the Laws of Gravity') and investor-directed questions.",
      "Dataset B features repetitive temporal markers tied exclusively to earnings calendars (e.g., 'Q4 earnings on February 1'), while Dataset A references varied timeframes including holidays, geopolitical events, and multi-year projections."
    ],
    "qwen2.5-32b_zero-shot_v1": [
      "Dataset A headlines consistently include specific quantitative metrics (e.g., '$0.01 miss', '28% gain') absent in B's generalized percentage references",
      "Dataset A uses explicit stock tickers (e.g., $CRWD, $XLE) while B refers to companies generically (e.g., 'Tech Giant')",
      "Dataset A contains multi-entity references within single headlines (e.g., '$XLE $NOV $XEC') versus B's single-entity focus",
      "Dataset A includes granular operational updates (e.g., layoffs, dividend suspensions, clinical trial data) missing from B's earnings/rate-centric focus",
      "Dataset A features direct analyst/institution citations (e.g., 'MKM Partners', 'Deutsche Bank') while B uses anonymous 'analysts'",
      "Dataset A incorporates geopolitical event details (e.g., 'U.S. kills Iran's commander') vs B's abstract 'trade tensions' references",
      "Dataset A shows sector diversity (pharma, REITs, semiconductors) vs B's tech/renewables dominance",
      "Dataset A includes forward-looking questions/predictions (e.g., 'Which way will markets head?') absent in B's declarative style",
      "Dataset A uses varied financial verbs (e.g., 'slips', 'sinks', 'halts') vs B's repetitive 'plummets/surges' pattern",
      "Dataset A contains timestamp-specific market moves (e.g., 'premarket', 'final-hour selloff') while B uses generic timing ('today', 'next week')"
    ],
    "qwen2.5-32b_few-shot_bg_train-time-info_v1": [
      "Dataset A headlines frequently reference specific dates and upcoming events (e.g., holiday weeks, earnings release dates), while Dataset B focuses more generically on earnings releases and guidance without date-driven urgency.",
      "Dataset A incorporates granular financial metrics (e.g., \"FFO misses by $0.01\"), whereas Dataset B reports earnings beats/misses qualitatively without precise numerical deviations.",
      "Dataset B includes non-English text (e.g., Chinese and French phrases) in headlines, while Dataset A maintains monolingual English content.",
      "Dataset A explicitly cites real-time or premarket stock price movements (e.g., \"sinks 2.7% after announcing capital raise\"), whereas Dataset B omits intraday price action details.",
      "Dataset B emphasizes corporate partnerships and strategic initiatives (e.g., acquisitions, renewable energy collaborations), while Dataset A highlights acquisitions and macroeconomic drivers more broadly.",
      "Dataset A references geopolitical conflicts (e.g., U.S.-Iran tensions) as market catalysts, whereas Dataset B prioritizes regulatory hurdles and supply chain disruptions as external risks.",
      "Dataset B specifies analyst firms and price targets in downgrades/upgrades (e.g., \"Morgan Stanley cuts price target to $38\"), while Dataset A often omits quantitative targets (e.g., \"Moody's turns negative\").",
      "Dataset A includes rhetorical questions and market commentaries (e.g., \"Why the markets 'look clean'\"), while Dataset B headlines are strictly factual and declarative.",
      "Dataset A diversifies sector coverage (e.g., retail, semiconductors, real estate), whereas Dataset B concentrates heavily on tech, energy, and pharmaceuticals.",
      "Dataset A integrates broader macroeconomic indicators (e.g., GDP, retail sales) as market drivers, while Dataset B narrows its focus to company-specific operational challenges (e.g., production delays)."
    ],
    "llama3.3-70b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently specify the analyst firm (e.g., Morgan Stanley, Barclays) initiating actions, while Dataset A uses diverse sources (e.g., government agencies, companies, multiple analyst firms).",
      "Dataset B headlines rigidly follow a template of '[Analyst Firm] [Action] [Stock] [Reason]', whereas Dataset A has varied structures (e.g., questions, narratives, broader market summaries).",
      "Dataset B emphasizes explicit mentions of rating changes (e.g., 'upgraded to overweight', 'downgraded to underweight') in every headline, while Dataset A includes implicit analyst sentiment without standardized phrasing.",
      "Dataset B frequently references specific fiscal quarters (e.g., 'Q2 earnings', 'Q3 results') in all headlines, whereas Dataset A mentions earnings without consistently tying them to quarters.",
      "Dataset B headlines uniformly include a rationale for analyst actions (e.g., 'citing strong growth prospects'), while Dataset A often omits explicit justifications.",
      "Dataset B uses lowercase formatting for stock tickers (e.g., '$ulta', '$wkhs'), whereas Dataset A uses uppercase tickers (e.g., '$AAPL', '$XLE').",
      "Dataset B consistently notes post-earnings investor events (e.g., 'ahead of investor conference', 'awaiting guidance from management'), absent in Dataset A.",
      "Dataset B focuses narrowly on equity analyst actions and earnings guidance, while Dataset A covers diverse financial instruments (e.g., currencies, ETFs, commodities).",
      "Dataset B headlines frequently conclude with stock performance context (e.g., 'shares trade unchanged', 'stock remains steady'), unlike Dataset A.",
      "Dataset B restricts industry jargon to analyst rating terminology (e.g., 'overweight', 'neutral'), whereas Dataset A includes sector-specific terms (e.g., 'FFO', 'OPEC+ cuts') across industries."
    ],
    "llama3.3-70b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently place stock ticker symbols within the first few words of the headline, while Dataset A varies ticker placement or omits them in some cases.",
      "Dataset B headlines use a rigid template structure (e.g., '[Company] sees price target [action] at [Institution] due to [reason]') across all samples, unlike Dataset A's varied phrasing.",
      "Dataset B exclusively mentions analyst actions/price targets in every headline, while Dataset A includes standalone earnings results without analyst commentary in some cases.",
      "Dataset B headlines emphasize institutional authorship (e.g., 'according to Oppenheimer', 'says Morgan Stanley') as essential components, whereas Dataset A sometimes omits specific sources.",
      "Dataset B focuses narrowly on equity analyst actions without broader economic context (e.g., no mentions of geopolitics or macro indicators), while Dataset A integrates external economic drivers.",
      "Dataset B uses lowercase formatting for company names/tickers throughout, while Dataset A maintains uppercase formatting for proper nouns and tickers.",
      "Dataset B headlines consistently include both the rationale and institutional source in every analyst action mention, while Dataset A sometimes reports actions without explicit reasoning.",
      "Dataset B shows repetitive focus on 5-6 specific financial institutions (Barclays/Morgan Stanley/Oppenheimer/Goldman Sachs), while Dataset A references a wider variety of firms and data sources.",
      "Dataset B avoids non-analyst corporate developments (e.g., mergers, product launches) present in Dataset A, focusing strictly on institutional ratings/earnings.",
      "Dataset B headlines maintain uniform length/syntax with complete clauses, while Dataset A includes fragmented phrases and hashtags in some samples."
    ],
    "qwen2.5-32b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently begin with the analyst firm or financial institution name initiating the action (e.g., 'Goldman Sachs Downgrades', 'Oppenheimer Adjusts'), whereas Dataset A uses more varied lead-in phrases.",
      "Dataset B includes non-Latin characters (e.g., Chinese text) in some headlines, indicating international or multilingual market coverage absent in Dataset A.",
      "Dataset B headlines explicitly quantify price target adjustments (e.g., 'Cuts Price Target to $6', 'Targets $230') in every analyst action, while Dataset A mentions price targets less systematically.",
      "Dataset B shows a higher frequency of clinical trial results and biopharma partnerships (e.g., 'Ziopharm Announces Successful Phase II Clinical Trial'), whereas Dataset A covers broader healthcare challenges without trial specifics.",
      "Dataset B emphasizes supply chain disruptions as a recurring standalone theme (e.g., 'Faces Supply Chain Delays') across sectors, while Dataset A contextualizes supply chain issues within macro/geopolitical events.",
      "Dataset B uses standardized corporate event terminology (e.g., 'Q4 Earnings Call This Thursday', 'Host Annual Investor Conference') more uniformly than Dataset A's varied phrasing.",
      "Dataset B headlines systematically pair company names with ticker symbols in parentheses (e.g., 'Danaher Corp ($DAN)'), whereas Dataset A places tickers more variably (e.g., mid-headline or standalone).",
      "Dataset B features neutral/negative analyst ratings (e.g., 'Maintains Hold Rating', 'Downgrades to Sell') with explicit rationale, while Dataset A highlights rating changes less frequently and with less detail.",
      "Dataset B includes repetitive references to 'beats/misses expectations' in earnings reports as standalone clauses, whereas Dataset A embeds earnings results within broader narratives or analyst commentary.",
      "Dataset B demonstrates stricter adherence to third-person institutional language (e.g., 'Announces Strategic Review', 'Reports Stronger-Than-Expected Earnings'), while Dataset A occasionally uses colloquialisms (e.g., 'ripping to new highs') and direct quotes."
    ],
    "qwen2.5-7b_zero-shot_bg_v1": [
      "Dataset B headlines predominantly focus on technology sector companies (e.g., $TSLA, $AAPL, $AMZN) with repetitive mentions of Tesla, while Dataset A covers a broader range of sectors including energy, retail, healthcare, and industrials.",
      "Dataset B includes non-English text (e.g., Chinese characters) in some headlines, whereas Dataset A headlines are exclusively in English.",
      "Dataset B emphasizes analyst actions from specific firms like Morgan Stanley and Barclays repeatedly, while Dataset A references a wider variety of analyst firms without repetition.",
      "Dataset B headlines show a higher frequency of tech-specific terminology (e.g., 'supply chain constraints', 'neural interface technology') compared to Dataset A's broader macroeconomic or geopolitical terms.",
      "Dataset B frequently cites price target adjustments (e.g., 'cuts price target', 'raises target to $300') as primary analyst actions, while Dataset A includes diverse actions like dividend changes, layoffs, and mergers.",
      "Dataset B headlines focus narrowly on earnings beats/misses and tech sector growth, whereas Dataset A integrates geopolitical events (e.g., Iran tensions, trade tariff waivers) as market drivers.",
      "Dataset B features redundant headlines about the same companies/events (e.g., multiple $TSLA downgrades), while Dataset A maintains greater thematic diversity across samples.",
      "Dataset B uses phrases like 'bullish', 'bearish', or 'neutral' to explicitly label analyst sentiment, whereas Dataset A conveys sentiment implicitly through contextual details.",
      "Dataset B emphasizes forward-looking statements tied to tech production targets (e.g., 'EV production forecasts'), while Dataset A highlights macroeconomic forecasts (e.g., GDP growth, inflation).",
      "Dataset B headlines frequently mention job cuts in tech firms (e.g., Salesforce layoffs), whereas Dataset A references corporate events like acquisitions and regulatory probes across industries."
    ],
    "qwen2.5-32b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently specify the fiscal quarter (e.g., Q3, Q4) when reporting earnings, whereas Dataset A often omits quarter references.",
      "Dataset B includes the names of analyst firms (e.g., Morgan Stanley, Barclays) in every analyst action mention, while Dataset A sometimes refers to analysts generically.",
      "Dataset B headlines frequently cite upcoming conferences or events (e.g., CES 2024) as part of company developments, which is less common in Dataset A.",
      "Dataset B uses lowercase for company names at the beginning of headlines, whereas Dataset A maintains consistent capitalization.",
      "Dataset A incorporates hashtags (e.g., #investing, #economy) for contextual categorization, absent in Dataset B.",
      "Dataset A includes advisory or educational headlines (e.g., stock picks, investment strategies), whereas Dataset B focuses strictly on factual news reporting.",
      "Dataset B headlines often explicitly state the rationale behind analyst actions (e.g., 'citing rising competition'), while Dataset A may omit specific reasons.",
      "Dataset A contains headlines about broader market events or macroeconomic updates without direct company ties, whereas Dataset B is predominantly company/ETF-specific.",
      "Dataset B emphasizes partnerships and sustainability initiatives as growth catalysts more frequently than Dataset A.",
      "Dataset A occasionally uses colloquial language or quoted phrases within headlines, a stylistic choice absent in Dataset B."
    ],
    "llama3.1-8b_zero-shot_bg_v1": [
      "Dataset B headlines consistently include explicit percentage changes in stock prices (e.g., 'Falls 5%', 'Surge 4%') within the main headline text, while Dataset A typically mentions price movements qualitatively or in secondary contexts",
      "Dataset B focuses heavily on analyst rating changes (upgrades/downgrades) and price target adjustments as primary market drivers, whereas Dataset A includes more diverse catalysts like earnings results, geopolitical events, and operational developments",
      "Dataset B shows repetitive emphasis on specific mega-cap tech companies (Apple, Tesla, Meta, NVIDIA) across multiple samples, while Dataset A covers broader sectors including energy, retail, biotech, and industrial stocks",
      "Dataset B headlines frequently use a standardized structure: [Company] + [Stock Movement] + [Analyst Action/Event], whereas Dataset A employs more varied sentence constructions and contextual explanations",
      "Dataset B contains multiple references to institutional analyst firms making concurrent adjustments (e.g., 'Barclays' appearing 20+ times), while Dataset A cites a wider variety of sources including government reports, company announcements, and macroeconomic data",
      "Dataset B emphasizes immediate market reactions to specific analyst actions, while Dataset A more frequently discusses longer-term strategic implications (e.g., acquisitions, dividend policies, production changes)",
      "Dataset B shows higher concentration on equity-specific movements, whereas Dataset A regularly incorporates commodities (oil, gold), currencies (EUR/USD), and economic indicators (GDP, retail sales)",
      "Dataset B headlines frequently pair company tickers with explicit stock price directionality verbs ('Slumps', 'Plummets', 'Surges'), creating more urgent tone compared to Dataset A's mixed verb usage",
      "Dataset B contains repetitive template phrasing ('price targets cut at Barclays', 'analysts remain optimistic') across multiple entries, suggesting standardized financial reporting style, unlike Dataset A's diverse linguistic patterns",
      "Dataset B focuses narrowly on immediate earnings impacts and analyst reactions, while Dataset A includes forward-looking investment guidance (e.g., 'stocks to invest in', 'risks to stock') and sector-wide analyses"
    ],
    "llama3.1-8b_zero-shot_v1": [
      "Dataset B headlines consistently reference broad market indices (e.g., Dow, NASDAQ) while Dataset A focuses on specific securities/entities with ticker symbols",
      "Dataset B emphasizes aggregate market movements (\"Stock Market Tumbles 5%\") while Dataset A reports granular corporate actions (EPS misses, dividend declarations)",
      "Dataset B headlines prioritize macroeconomic policy decisions (Fed rates, GDP) while Dataset A includes microeconomic details (plant closures, drug trial results)",
      "Dataset B uses generic entity references (\"Tech giants\") where Dataset A specifies companies (CrowdStrike, Tyson Foods) with market identifiers",
      "Dataset B headlines feature explicit percentage change figures (\"soar 20%\") while Dataset A uses relative performance descriptions (\"slips 9%\", \"up 28%\")",
      "Dataset B maintains formal headline structure without symbols/line breaks, unlike Dataset A's use of tickers ($AAPL) and multi-line formatting",
      "Dataset B focuses on sector-wide trends (\"Tech Stocks Surge\") while Dataset A emphasizes individual security analysis (price targets, short interest)",
      "Dataset B headlines reference abstract economic concepts (recession fears, growth outlook) where Dataset A cites concrete corporate events (layoffs, acquisitions)",
      "Dataset B uses dramatic action verbs (\"plummet\", \"surge\") for market reactions while Dataset A employs technical financial terms (\"FFO misses\", \"guidance\")",
      "Dataset B headlines present aggregated investor sentiment (\"markets react cautiously\") versus Dataset A's specific institutional actions (analyst upgrades, SEC filings)"
    ],
    "llama3.3-70b_few-shot_v1": [
      "Dataset B headlines focus more prominently on Federal Reserve actions and monetary policy updates (e.g., interest rate decisions, Beige Book reports), while Dataset A rarely references central banking explicitly.",
      "Dataset A includes headlines with informal language, colloquial phrases, and social media-style formatting (e.g., 'ripping to new highs,' hashtags), whereas Dataset B maintains formal, standardized financial reporting language throughout.",
      "Dataset B headlines emphasize major tech giants (e.g., Amazon, Tesla, Microsoft) and their earnings, while Dataset A covers a wider variety of sectors including energy, retail, and pharmaceuticals.",
      "Dataset A frequently references specific geopolitical events (e.g., U.S.-China tariffs, Middle Eastern oil production), whereas Dataset B focuses on broader macroeconomic trends (e.g., inflation, GDP growth) without regional granularity.",
      "Dataset A includes corporate operational updates (e.g., layoffs, dividend suspensions, acquisitions) alongside earnings, while Dataset B concentrates almost exclusively on earnings results and stock price reactions.",
      "Dataset B headlines use repetitive phrasing structures (e.g., 'Stock Plummets... After... Earnings Report'), while Dataset A employs more varied sentence constructions and narrative styles.",
      "Dataset A incorporates forward-looking statements with explicit uncertainty (e.g., 'COVID-19 impact remains uncertain'), while Dataset B's projections are framed as analyst consensus or scheduled reports.",
      "Dataset B features frequent mentions of major market indices (Dow Jones, Nasdaq) as performance benchmarks, whereas Dataset A rarely references indices, focusing instead on individual stocks.",
      "Dataset A contains numerous headlines with multiple ticker symbols and cross-referenced companies, while Dataset B typically focuses on single-company reporting per headline.",
      "Dataset B emphasizes quarterly earnings calendar events and scheduled reports (e.g., 'Q2 earnings report schedule released'), while Dataset A highlights real-time market reactions and breaking news updates."
    ],
    "qwen2.5-32b_few-shot_bg_v1": [
      "Dataset B headlines exclusively focus on large-cap technology companies (e.g., $AAPL, $TSLA, $META), while Dataset A covers diverse sectors like energy, retail, and industrials.",
      "Dataset B emphasizes analyst actions (e.g., upgrades, downgrades) from specific firms (e.g., Morgan Stanley, Barclays), whereas Dataset A references a broader range of analyst firms and actions without consistent focus.",
      "Dataset B headlines frequently include explicit price target figures (e.g., 'raises price target to $190'), while Dataset A mentions analyst actions without numerical targets.",
      "Dataset B consistently ties macroeconomic factors (e.g., Fed rate decisions, inflation) directly to tech sector performance, whereas Dataset A discusses broader economic impacts across industries.",
      "Dataset B headlines uniformly reference quarterly earnings results (e.g., 'Q3', 'Q4') with explicit beats/misses, while Dataset A includes earnings data but with less granularity.",
      "Dataset B features structured headlines with both ticker and company name (e.g., '$AAPL - Apple...'), while Dataset A uses inconsistent formats (tickers or names alone).",
      "Dataset B highlights supply chain disruptions as a recurring theme affecting tech companies, whereas Dataset A mentions supply chains only in specific contexts (e.g., COVID-19).",
      "Dataset B focuses on ad revenue trends impacting tech firms (e.g., Meta, Alphabet), a theme absent in Dataset A.",
      "Dataset B includes multilingual text (e.g., Chinese characters) in some headlines, reflecting globalized coverage, while Dataset A uses English exclusively.",
      "Dataset B centers on forward-looking analyst projections (e.g., 'sees 20% upside'), while Dataset A includes more retrospective or event-driven updates (e.g., mergers, layoffs)."
    ],
    "qwen2.5-32b_few-shot_v1": [
      "Dataset B headlines use generic descriptors like 'Tech Giant' or 'Global Energy Company' instead of naming specific companies or ticker symbols prevalent in Dataset A.",
      "Dataset B headlines lack granular financial metrics (e.g., exact EPS misses by cents or revenue figures) commonly detailed in Dataset A.",
      "Dataset B headlines emphasize macroeconomic policy outcomes (e.g., 'Federal Reserve Maintains Interest Rates') more uniformly, while A includes diverse granular events like clinical trials or dividend changes.",
      "Dataset B headlines avoid informal language, ticker symbols, and parenthetical annotations frequently present in Dataset A (e.g., '$CRWD', 'premarket').",
      "Dataset B headlines frame earnings outcomes in qualitative terms (e.g., 'stronger-than-expected') rather than quantitative specifics (e.g., 'misses by $0.01') as in Dataset A.",
      "Dataset B headlines prioritize sector-wide trends (e.g., 'Renewable Energy Stocks Surge') over company-specific operational updates dominant in Dataset A.",
      "Dataset B headlines use standardized phrasing (e.g., 'Federal Reserve to Hold Policy Meeting') compared to Dataset A's varied structures, including questions and colloquialisms.",
      "Dataset B headlines omit forward-looking details with specific timelines (e.g., 'conference call tomorrow') common in Dataset A, favoring broader economic forecasts.",
      "Dataset B headlines avoid explicit mentions of stock price movements in premarket or after-hours trading, which are frequent in Dataset A.",
      "Dataset B headlines generalize geopolitical or regulatory impacts (e.g., 'new regulations') rather than citing specific events (e.g., 'U.S. kills Iran's commander') as in Dataset A."
    ],
    "qwen2.5-32b_zero-shot_bg_v1": [
      "Dataset B headlines predominantly focus on a narrower range of large tech companies (e.g., $AAPL, $TSLA, $NVDA, $META) across all samples, while A includes diverse sectors like energy, retail, pharma, and industrials.",
      "Dataset B consistently references specific analyst firms (e.g., Barclays, Morgan Stanley) in most headlines, whereas A cites a wider variety of sources including ratings agencies, niche analysts, and non-financial entities.",
      "Non-English characters (e.g., Chinese text) appear exclusively in Dataset B headlines, suggesting multilingual data sources or globalized targeting.",
      "Dataset A contains granular numerical specificity (e.g., 'lays off 2,600 of 10,500 employees', 'Brent crude nears $70') absent in B's more generalized quantitative references.",
      "Macroeconomic references in B are limited to high-level indicators (unemployment, Fed rates), while A includes niche metrics like 'PFAS chemical makers climb' and detailed commodity movements.",
      "Dataset A headlines frequently incorporate technical trading patterns (e.g., 'EUR/USD Failure of 1-2-3 Pattern') unseen in B's more fundamental analysis focus.",
      "B shows formulaic structural repetition (e.g., '[Firm] [action] on [Ticker] due to [reason]') across all samples, whereas A uses more varied sentence architectures.",
      "Dataset A contains explicit references to retail investor activity (e.g., 'Momentum is coming back', 'Big buy blocks') absent in B's institutional-focused narratives.",
      "Geopolitical developments in B are limited to trade policy, while A includes diverse events like military strikes, country-specific labor issues, and regulatory scandals.",
      "Dataset A features forward-looking time anchors (e.g., 'before Thanksgiving', 'next week') across samples, whereas B's temporal references focus strictly on quarterly earnings cycles."
    ],
    "qwen2.5-7b_zero-shot_v1": [
      "Dataset B headlines focus more on general sector performance (e.g., 'Tech Sector Slumps') while Dataset A emphasizes granular company-specific metrics like exact EPS misses/revenue figures.",
      "Dataset B uses rounded/approximate numerical descriptors (e.g., 'below-expected earnings') whereas Dataset A consistently includes precise financial amounts (e.g., 'misses by $0.01').",
      "Dataset A contains frequent stock ticker symbols and real-time trading context (e.g., 'premarket moves') absent in Dataset B's broader market summaries.",
      "Dataset B headlines prioritize macroeconomic stability narratives (e.g., 'steady growth', 'unchanged rates') while A emphasizes volatility triggers like geopolitical shocks or clinical trial results.",
      "Dataset A includes diverse financial instruments (forex pairs, dividends, bonds) while B focuses primarily on equity indices and generic sector performance.",
      "Dataset B headlines repeat standardized phrases like 'surges', 'plunge', and 'steady' across contexts, whereas A uses more varied action verbs tied to specific catalysts.",
      "Dataset A references analyst firms and price targets explicitly (e.g., 'Deutsche Bank cuts to Hold'), while B uses anonymous 'analysts say' generalizations.",
      "Dataset B favors passive voice constructions about market states ('remains stable'), while A uses active voice documenting discrete events ('lays off 2,600 employees').",
      "Dataset A contains timestamped market phase references (premarket, post-earnings) while B uses generic temporal markers like 'next quarter' or 'coming months'.",
      "Dataset B headlines show formulaic patterns with repetitive subject-verb structures, whereas A displays greater lexical diversity including quotes, parentheticals, and hashtags."
    ],
    "llama3.1-8b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently specify the analyst/firm behind actions (e.g., 'Morgan Stanley', 'Oppenheimer'), while A often omits institutional sources.",
      "B emphasizes precise percentage-driven price movements (e.g., 'slumps 15%', 'rises 4.6%') in every headline, whereas A uses broader descriptors like 'slips' or 'jumps' without exact figures.",
      "B headlines rigidly follow a 'Ticker + Event + Analyst Action' structure, while A uses more varied phrasing and contextual details (e.g., geopolitical narratives).",
      "All B samples tie stock movements directly to earnings results/guidance or analyst rating changes, whereas A includes non-earnings catalysts (e.g., acquisitions, macro trends).",
      "B focuses narrowly on quarterly earnings (Q1, Q4) and near-term forecasts, while A references longer-term projections (e.g., '2020 targets', 'FY results').",
      "B frequently cites price target adjustments (e.g., 'PT trimmed to $116') in every headline, unlike A, which mentions targets sparingly.",
      "B uses standardized phrases like 'beats/misses estimates' for earnings, while A employs diverse metrics (e.g., 'FFO misses by $0.01', 'declares dividend').",
      "B headlines uniformly include the stock ticker at the start, whereas A sometimes embeds tickers mid-headline or omits them entirely.",
      "B avoids non-corporate news (e.g., macroeconomic policy, geopolitical events) present in A (e.g., 'Mnuchin says trade deal...').",
      "B exclusively ties volatility to earnings/analyst actions, while A attributes it to diverse risks (e.g., 'FDA delays', 'supply chain issues')."
    ],
    "qwen2.5-7b_few-shot_v1": [
      "Dataset A headlines frequently include specific stock ticker symbols (e.g., $CRWD, $XLE), while Dataset B headlines omit ticker symbols entirely.",
      "Dataset A emphasizes granular financial metrics (e.g., 'misses by $0.01') for earnings/results, whereas Dataset B uses broader statements like 'beats analysts\u2019 expectations' without numerical specificity.",
      "Dataset A contains frequent references to premarket/after-hours stock price movements (e.g., 'up 2.6% premarket'), while Dataset B focuses on regular trading sessions or general trends.",
      "Dataset A includes niche sectors like energy, biotech, and retail with company-specific risks/opportunities, while Dataset B disproportionately emphasizes tech sector dynamics and macroeconomic trends.",
      "Dataset A headlines use sensationalist verbs (e.g., 'plunges,' 'surges') for price action, whereas Dataset B employs more neutral language (e.g., 'declines,' 'rises').",
      "Dataset A references exact dates (e.g., 'Thursday, Feb 6') and short-term events, while Dataset B focuses on broader timeframes (e.g., 'Q4 earnings,' 'annual targets').",
      "Dataset A mentions specific institutional actions (e.g., 'Moody\u2019s turns negative,' 'Fed rate changes') with immediate impacts, whereas Dataset B discusses regulatory/policy themes more abstractly (e.g., 'new laws,' 'climate policies').",
      "Dataset A includes mixed-case formatting, hashtags, and social media-style annotations (e.g., '#investing'), while Dataset B uses standardized headline capitalization and minimal metadata.",
      "Dataset A highlights transactional events (e.g., acquisitions, dividend declarations), whereas Dataset B prioritizes recurring themes like semiconductor shortages or inflation concerns.",
      "Dataset A features fragmented sentence structures and informal annotations (e.g., 'Big buy blocks'), while Dataset B employs formulaic, full-sentence headlines with consistent syntax."
    ],
    "llama3.3-70b_few-shot_bg_v1": [
      "Dataset B headlines predominantly focus on a narrow set of tech companies (e.g., $TSLA, $NVDA, $GOOGL) across all samples, while Dataset A covers diverse sectors including retail, energy, real estate, and industrials.",
      "Dataset B headlines use a repetitive template structure ('[Institution] [action] [ticker] - [reason]') for 90% of entries, whereas Dataset A employs varied sentence structures including questions, statements, and event announcements.",
      "All Dataset B samples explicitly tie institutional actions (upgrades/downgrades) to specific valuation/production concerns, while Dataset A includes broader operational catalysts like layoffs, dividend changes, and M&A activity.",
      "Dataset B exclusively references 5 major institutions (Morgan Stanley, Barclays, Goldman Sachs, Oppenheimer, Federal Reserve), while Dataset A cites 20+ varied sources including Moody's, BofA, Stifel, and regional banks.",
      "100% of Dataset B price action descriptions use generic terms like 'trades sideways' or 'falls 5%', whereas Dataset A employs unique financial metaphors ('succumbs to gravity', 'rips to new highs') and technical patterns ('Failure of 1-2-3 Pattern').",
      "Dataset B contains repetitive duplicate headlines about the same analyst action (e.g., 15+ Morgan Stanley Tesla downgrades), while Dataset A maintains unique entries even when covering similar themes.",
      "All Dataset B economic indicator mentions relate strictly to Federal Reserve Beige Book, while Dataset A references 10+ diverse indicators including retail sales, GDP, manufacturing data, and employment metrics.",
      "Dataset B exclusively uses lowercase formatting for institutions/companies, while Dataset A maintains proper capitalization (e.g., 'Goldman Sachs' vs 'goldman sachs').",
      "100% of Dataset B guidance mentions are limited to quarterly projections, whereas Dataset A includes annual guidance withdrawals, long-term investment outlooks, and multi-year growth targets.",
      "Dataset B headlines focus exclusively on pre-market/after-hours reactions to analyst actions, while Dataset A includes real-time trading updates, extended market commentary, and multi-period performance analysis."
    ],
    "llama3.1-8b_few-shot_v1": [
      "Dataset A headlines frequently include specific premarket/after-hours stock price movements (e.g., 'up 2.6% premarket') absent in B",
      "Dataset A contains granular financial metric deviations (e.g., 'misses by $0.01') while B uses broader descriptors like 'misses expectations'",
      "Dataset A references smaller/niche companies (e.g., Lindsay, Perceptron) whereas B focuses almost exclusively on large-cap/mega-cap firms",
      "Dataset A includes dividend declarations with exact amounts (e.g., '$0.0633 dividend') while B lacks specific dividend details",
      "Dataset A features technical trading terminology (e.g., 'pivot 50c', '1-2-3 Pattern') not found in B's macroeconomic-focused headlines",
      "Dataset B emphasizes geopolitical event consequences (e.g., 'US-China Trade War Escalates') while A mentions events as market catalysts without deep analysis",
      "Dataset A regularly includes employee layoffs/corporate restructuring details (e.g., 'lays off 2,600') absent in B",
      "Dataset B headlines systematically reference national/global GDP growth rates (e.g., 'slows to 2.1%') as primary subjects more than A",
      "Dataset A contains frequent conference call announcements/reminders (e.g., 'conference call tomorrow') while B focuses on finalized results",
      "Dataset B emphasizes central bank policy decisions as market drivers (e.g., 'Fed Hints at Interest Rate Cuts') more prominently than A"
    ],
    "llama3.1-8b_few-shot_bg_train-time-info_v1": [
      "Headlines in Dataset B consistently specify the analyst firm's name alongside exact price target adjustments and rating changes (e.g., 'Morgan Stanley downgrades to Underweight, price target $X').",
      "Dataset B headlines emphasize forward-looking guidance revisions tied to specific fiscal years (e.g., 'cuts 2023 outlook') more frequently than A.",
      "Dataset B includes structured ticker placement at the start of headlines (e.g., '$MATX - ...'), whereas A often embeds tickers mid-headline or inconsistently.",
      "Headlines in B explicitly link stock price movements (e.g., 'Shares Tumble 14%') directly to the cited analyst action or guidance change within the same sentence.",
      "Dataset B features more precise numerical metrics (e.g., 'revenue increase of 12.1%') compared to A\u2019s general references (e.g., 'misses by $0.01').",
      "B frequently references ETFs or sector indices (e.g., 'SPDR S&P 500 ETF Trust') as standalone subjects, while A focuses on individual stocks.",
      "Headlines in B highlight partnerships or collaborations as strategic drivers (e.g., 'partners with Savage x Fenty'), whereas A emphasizes acquisitions or regulatory impacts.",
      "Dataset B uses standardized rating terminology (e.g., 'Neutral', 'Outperform') more uniformly compared to A\u2019s varied phrasing (e.g., 'turns negative').",
      "B\u2019s headlines often cite slowing growth, valuation concerns, or competitive pressures as explicit reasons for analyst actions, unlike A\u2019s broader contextual factors.",
      "Dataset B includes recurring mentions of post-earnings guidance reaffirmation or reduction (e.g., 'reaffirms FY2023 targets') as a distinct narrative."
    ],
    "llama3.1-8b_few-shot_bg_v1": [
      "Dataset B headlines consistently specify exact numerical price targets or percentage changes in analyst actions (e.g., 'cuts target by 15%'), while A often mentions analyst actions without precise quantitative targets.",
      "Dataset B includes explicit rationales for analyst actions (e.g., 'citing slowing demand'), whereas A typically states actions (e.g., 'downgrades to Hold') without detailed reasoning.",
      "Dataset B headlines frequently reference forward-looking economic or corporate performance with specific timeframes (e.g., 'next 5 years'), while A uses broader or immediate time references (e.g., 'holiday shortened week').",
      "Dataset B emphasizes tech sector developments (e.g., AI, cloud computing) as primary industry trends, whereas A focuses more on energy, retail, and semiconductors.",
      "Dataset B integrates outcomes of events directly into headlines (e.g., 'shares sink 5%'), while A often separates market reactions into distinct headlines.",
      "Dataset B highlights regulatory/antitrust investigations (e.g., FTC scrutiny) as market drivers, whereas A focuses more on tariffs or Fed rate decisions.",
      "Dataset B frequently cites collaborations or partnerships (e.g., 'Google Cloud Partners with IBM') as catalysts, whereas A emphasizes standalone corporate actions like acquisitions.",
      "Dataset B uses formal analyst rating terminology (e.g., 'Underweight,' 'Equal Weight'), while A employs informal phrases like 'turns negative' or 'upgrades to outperform.'",
      "Dataset B explicitly ties macroeconomic data to market implications (e.g., 'CPI data anticipates inflation stability'), while A mentions indicators without always linking them to market effects.",
      "Dataset B includes granular future financial projections (e.g., 'expects $11.5B revenue'), whereas A focuses on retrospective metrics (e.g., 'misses by $0.01')."
    ],
    "qwen2.5-7b_few-shot_bg_v1": [
      "Dataset B headlines consistently specify the investment banks or analyst firms (e.g., Barclays, Morgan Stanley) behind rating changes, while Dataset A rarely mentions specific institutions.",
      "Dataset B headlines focus more narrowly on analyst actions (upgrades/downgrades) and price target revisions, whereas Dataset A includes broader corporate announcements like mergers, dividends, and layoffs alongside analyst actions.",
      "Dataset B samples disproportionately emphasize tech giants (Tesla, NVIDIA, Meta) and semiconductor stocks, while Dataset A covers a wider sector diversity including energy, retail, and industrials.",
      "Dataset B headlines frequently include explicit numerical price targets (e.g., 'raises to $400') in nearly every analyst action, while Dataset A sometimes omits specific target figures.",
      "Dataset B shows repetitive template structures (e.g., '[Bank] [action] [ticker] [reason]'), whereas Dataset A uses more varied headline phrasing even when reporting similar events.",
      "Dataset B emphasizes forward-looking analyst projections (e.g., 'sees stock reaching $800 by 2024'), while Dataset A focuses more on reporting completed events/results.",
      "Dataset B contains multiple headlines about the same company within a narrow timeframe (e.g., 5+ Tesla updates), suggesting higher frequency of analyst reassessments compared to Dataset A's event-driven coverage.",
      "Dataset B headlines frequently pair rating changes with cited rationales (e.g., 'citing production delays'), whereas Dataset A often states rating changes without immediate explanation.",
      "Dataset B shows greater emphasis on CEO/executive developments (e.g., Musk's leadership changes) as catalysts for analyst actions compared to Dataset A.",
      "Dataset B includes more direct comparisons between multiple analyst actions (e.g., 'Barclays cuts while Goldman upgrades'), whereas Dataset A typically reports single analyst actions in isolation."
    ],
    "llama3.3-70b_zero-shot_v1": [
      "Dataset A headlines frequently include specific stock ticker symbols (e.g., $CRWD, $XLE) while B uses generic company references (e.g., \"Tech Giant\") without tickers",
      "Dataset A quantifies financial outcomes with granular metrics (e.g., \"$0.01 EPS miss\") whereas B uses qualitative descriptors (e.g., \"Exceeds Expectations\") without numerical precision",
      "Dataset A contains explicit references to non-earnings corporate actions (e.g., acquisitions, layoffs, dividend changes) absent in B's earnings/Fed-focused headlines",
      "Dataset A includes specific analyst names and firms (e.g., \"JMP Securities upgrades\") while B uses generic \"Market Analysts\" references",
      "Dataset A covers diverse sectors (semiconductors, REITs, oil) versus B's predominant focus on tech stocks and broad market indices",
      "Dataset A headlines incorporate precise temporal references (e.g., \"Q4 economic growth\", \"Thanksgiving week\") lacking in B's generic timeframes",
      "Dataset A specifies geopolitical catalysts (e.g., \"US-China tariffs\", OPEC+ cuts) while B uses vague terms like \"economic uncertainty\"",
      "Dataset A includes mixed-case price action verbs (e.g., \"slips\", \"ticks up\") versus B's extreme polarity (\"plummet\", \"soar\") without gradation",
      "Dataset A references granular market instruments (e.g., gold liquidity impacts, forex patterns) absent in B's macro-level index reporting",
      "Dataset A contains forward-looking statements tied to specific analyst actions (e.g., \"price target raised to $70\") while B uses general predictions (\"Predict Strong Growth\")"
    ],
    "llama3.1-8b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines more frequently include specific price target adjustments (e.g., 'cuts price target to $235 from $280') compared to Dataset A, which often mentions analyst actions without numerical targets.",
      "Dataset B emphasizes future-oriented phrases like 'expects further headwinds,' 'forecasts lower revenue,' or 'growth potential,' whereas Dataset A focuses more on immediate past events (e.g., 'misses by $0.01').",
      "Dataset B consistently includes acquisition/transaction values (e.g., 'acquires... for $350M'), while Dataset A mentions acquisitions without specifying financial terms (e.g., 'Coty Acquires').",
      "Dataset B uses stronger directional verbs for market reactions (e.g., 'plummets,' 'tumbles,' 'surges') compared to Dataset A's milder terms like 'falls' or 'slips.'",
      "Dataset B explicitly ties macroeconomic factors to asset movements (e.g., 'Gold gains 1.5% as tensions rise') unlike Dataset A, which states macro events without direct price linkages.",
      "Dataset B features more frequent references to quarterly guidance updates (e.g., 'withdraws FY 20 guidance') compared to Dataset A's focus on historical dividend declarations.",
      "Dataset B headlines systematically include post-market/premarket indicators (e.g., 'premarket,' 'after-hours') absent in Dataset A's timestamps.",
      "Dataset B specifies clinical trial phases/regulatory milestones (e.g., 'Phase 3 results,' 'NDA submission') while Dataset A mentions drug developments without phase details.",
      "Dataset B emphasizes competitive landscape analysis (e.g., 'citing rising competition,' 'competition mounts') more prominently than Dataset A's general market commentary.",
      "Dataset B incorporates forward-looking dividend strategies (e.g., 'to Boost Dividend by 5%') whereas Dataset A focuses on static dividend announcements (e.g., 'declares dividend')."
    ],
    "qwen2.5-7b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines predominantly focus on analyst actions (upgrades/downgrades) as the primary driver, while A includes a broader mix of catalysts like geopolitical events and company-specific news.",
      "B consistently structures headlines around institutional analyst firms (e.g., 'Morgan Stanley', 'Goldman Sachs') in the lead, whereas A more frequently cites non-analyst sources like government officials or macroeconomic data.",
      "All B samples maintain neutral/technical language when describing market reactions (e.g., 'deemed stable', 'neutral impact'), while A uses more emotive phrasing like 'surge', 'plunges', or 'hammered'.",
      "B headlines emphasize precise price target figures and rating changes as core content, whereas A more often contextualizes analyst actions with operational developments or strategic implications.",
      "Dataset B shows uniform focus on quarterly earnings outcomes (beats/misses) as standalone events, while A links earnings results to broader narratives like guidance changes or sector trends.",
      "All B samples avoid non-English characters/translations present in A's multilingual headlines (e.g., Chinese text snippets in A's $SRNE/$SINT entries).",
      "B exclusively uses standardized financial terminology (e.g., 'Q3 earnings', 'price target cut'), whereas A incorporates colloquialisms like 'ripping to new highs' or 'Santa Claus rally'.",
      "Dataset B headlines omit forward-looking event previews common in A (e.g., 'What to watch', 'conference call tomorrow'), focusing strictly on completed analyst actions/results.",
      "All B entries maintain single-topic focus on analyst actions/earnings, while A headlines frequently combine multiple elements (e.g., layoffs + dividend suspension + ticker movement).",
      "B systematically includes the full institutional name initiating the action (e.g., 'J.P. Morgan', 'RBC Capital Markets'), whereas A sometimes uses generic references to 'analysts' without attribution."
    ],
    "llama3.3-70b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently start with the company name or analyst action followed by the ticker symbol in mid-sentence format (e.g., 'morgan stanley downgrades $lcii'), whereas Dataset A places tickers primarily at the end of headlines or as standalone elements",
      "Dataset B exclusively uses specific institutional analyst firm names (Morgan Stanley, Barclays, Oppenheimer) in every headline, while Dataset A references a broader range of sources including non-analyst entities and generic 'analysts'",
      "Dataset B systematically includes both the action (upgrade/downgrade) and specific rating terminology ('underweight', 'overweight', 'equal weight') in every relevant headline, unlike Dataset A which often mentions actions without formal rating classifications",
      "Dataset B headlines consistently quantify price target adjustments using exact dollar amounts and include comparative phrases like 'raises to $X from $Y', while Dataset A sometimes mentions target changes without specific figures",
      "Dataset B maintains a strict pattern of including either 'beats/misses analyst expectations' or 'in line with analyst expectations' verbiage for earnings reports, whereas Dataset A uses more varied earnings comparison phrasing",
      "Dataset B headlines frequently include standardized rationale phrases like 'citing concerns over...' or 'citing strong...' after analyst actions, while Dataset A provides more diverse reasoning styles and contextual details",
      "Dataset B uses hyphenated ticker symbols (e.g., '$tsla -') mid-headline as a consistent formatting pattern, unlike Dataset A's non-hyphenated ticker placement typically at sentence ends",
      "Dataset B emphasizes institutional investor conference participation announcements with specific firm names (e.g., 'Oppenheimer healthcare conference'), while Dataset A references conferences more generically",
      "Dataset B maintains consistent tense structure for future events using phrases like 'will report on [date]' compared to Dataset A's more varied temporal references",
      "Dataset B headlines strictly avoid non-analyst corporate developments (e.g., M&A, product launches) that appear in Dataset A, focusing exclusively on ratings, targets, and earnings"
    ],
    "llama3.1-8b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently include stock ticker symbols prefixed with '$' in every entry, while Dataset A occasionally omits tickers or uses alternative formats like (CRWD).",
      "Dataset B emphasizes specific analyst actions (e.g., 'Morgan Stanley downgrades') and firm names more prominently than Dataset A, which generalizes analyst involvement (e.g., 'analysts say').",
      "Dataset B headlines frequently quantify price target adjustments (e.g., 'cuts Price Target to $12') with exact figures, whereas Dataset A often mentions target changes without numerical specificity (e.g., 'price target raised').",
      "Dataset B includes explicit percentage-based stock price movements (e.g., 'falls 5%', 'jumps 7%') in most headlines, while Dataset A describes reactions more qualitatively (e.g., 'stock falls').",
      "Dataset B focuses heavily on quarterly earnings timelines (e.g., 'Q3 Earnings on October 27th'), whereas Dataset A references broader timeframes (e.g., '2020', '2019').",
      "Dataset B headlines frequently cite competing analyst opinions (e.g., 'analysts divided', 'analysts remain cautious'), while Dataset A rarely highlights intra-analyst disagreements.",
      "Dataset B uses standardized phrasing for earnings outcomes (e.g., 'beats on revenue', 'misses expectations'), whereas Dataset A employs varied terminology (e.g., 'blow out report', 'slips 9%').",
      "Dataset B explicitly ties corporate events to stock performance (e.g., 'partnership...sparks mixed reactions'), while Dataset A often separates event reporting from market impact analysis.",
      "Dataset B consistently references institutional investors (e.g., 'Morgan Stanley', 'Oppenheimer') in every headline, unlike Dataset A's occasional generic references to 'analysts'.",
      "Dataset B emphasizes forward-looking price targets (e.g., 'to hit $350 by Q1') as primary drivers, whereas Dataset A balances targets with operational updates (e.g., 'delays theatrical debut')."
    ],
    "qwen2.5-7b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently include analyst firm names (e.g., Oppenheimer, Jefferies) as primary actors in rating actions, while A uses institutional names without explicit 'analyst' references.",
      "All B samples specify fiscal quarters (Q1-Q4) in earnings reports, whereas A rarely includes quarterly timeframes.",
      "B headlines systematically provide rationales for analyst actions (e.g., 'citing supply chain disruptions'), while A states actions without explanations.",
      "Strategic collaborations for R&D/development appear exclusively in B, while A focuses on M&A transactions with financial stakes.",
      "B consistently uses formal rating terminology (Outperform/Underperform) while A employs simpler terms (Hold/Buy).",
      "100% of B's price target changes include both origin and destination figures ($X to $Y), while A occasionally uses single-endpoint updates.",
      "B headlines maintain neutral/mixed outlook language ('maintains neutral') contrasting with A's directional emphasis ('cuts','raises').",
      "Guidance confirmations ('reaffirms guidance') appear exclusively in B, while A focuses exclusively on guidance revisions.",
      "B shows consistent pattern of ticker-first headline structure ($SYM - Company Action), whereas A uses variable formatting.",
      "Upcoming earnings call announcements and future report dates are exclusive to B, absent in A's event-driven headlines."
    ],
    "llama3.3-70b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently specify the fiscal quarter (e.g., Q2) for earnings reports, while Dataset A sometimes omits quarterly/yearly timeframes in earnings mentions.",
      "Dataset B headlines structurally emphasize institutional actor names (e.g., 'Goldman Sachs says...') as primary agents of analyst actions, whereas Dataset A often places company/ticker symbols first.",
      "Dataset B avoids hashtags, social media references, or informal commentary (e.g., 'Momentum is coming back'), while Dataset A includes these elements.",
      "Dataset B headlines explicitly cite data sources (e.g., 'according to Refinitiv data') for earnings/results validation, which Dataset A omits.",
      "Dataset A includes premarket/after-hours stock price movements (e.g., 'premarket up 2.6%'), while Dataset B focuses solely on regular trading implications.",
      "Dataset B headlines standardize ticker placement after full company names (e.g., 'Molson Coors Beverage Company... $TAP'), whereas Dataset A often uses standalone tickers without full names.",
      "Dataset A incorporates geopolitical/commodity events (e.g., oil prices, trade wars) beyond corporate news, while Dataset B focuses strictly on company-specific earnings/analyst actions.",
      "Dataset B uses neutral, declarative language for analyst actions (e.g., 'downgraded to sell'), while Dataset A employs emotive verbs (e.g., 'slides,' 'surge').",
      "Dataset A includes multi-ticker references (e.g., '$CC $MMM $DD') for sector trends, whereas Dataset B headlines focus on single-ticker contexts.",
      "Dataset B consistently frames earnings results relative to analyst expectations (e.g., 'meets expectations'), while Dataset A emphasizes raw misses/beats (e.g., 'misses by $0.01')."
    ]
  },
  "diffs_real_from_synth": {
    "qwen2.5-7b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines more frequently include geopolitical events (e.g., U.S.-China trade deals, OPEC+ decisions) as primary market drivers, whereas A focuses on analyst actions and company-specific metrics.",
      "Dataset B contains headlines framed as questions or comparative analyses (e.g., 'Can $MSFT head higher?', 'Better Buy: AbbVie vs. Merck'), while A uses declarative statements about analyst actions.",
      "Dataset B headlines often mention broader market indices or ETFs (e.g., '$XLE', 'ETF assets to surge tenfold'), whereas A emphasizes individual stock tickers and company-level events.",
      "Dataset B includes headlines with explicit timelines (e.g., 'Final-hour selloff', 'holiday shortened week'), while A lacks temporal specificity beyond earnings quarters.",
      "Dataset B incorporates non-corporate events (e.g., 'Hackers Breach Ring Home Security Cameras', political scandals) as market catalysts, unlike A's focus on financial metrics and analyst decisions.",
      "Dataset B features headlines about macroeconomic policy debates (e.g., 'Fed rate cut implications', 'Brexit impacts') without direct company ties, whereas A links macro trends to specific tickers.",
      "Dataset B uses colloquial phrases and idioms (e.g., 'Damned if they do', 'Santa Claus rally') absent in A's formal, structured ticker-analyst-action phrasing.",
      "Dataset B includes headlines about retail investor behavior (e.g., 'Hedge Funds Have Never Been This Bullish'), while A focuses solely on institutional analyst actions.",
      "Dataset B references non-English entities (e.g., 'DSV Panalpina A/S', 'Emerald Resources NL') without localized ticker symbols, unlike A's standardized $TICKER format.",
      "Dataset B contains headlines with explicit numerical benchmarks (e.g., '10Y Yield LOD 1.7620%', 'gold will reach $1,600') absent in A's qualitative analyst ratings."
    ],
    "qwen2.5-32b_zero-shot_bg_test-time-info_v1": [
      "Dataset A headlines consistently include both the company name and stock ticker (e.g., '$F - Ford Reports...'), while B often mentions only the ticker or company name without pairing them.",
      "Dataset B headlines feature more geopolitical/global events (e.g., U.S.-Iran tensions, OPEC+ decisions, Brexit) compared to A's focus on corporate operational developments.",
      "Dataset A includes explicit future date references (e.g., 'Q3 2024 Guidance') in forward-looking statements, while B uses vague temporal references like 'next week' or 'coming months'.",
      "Dataset B contains frequent mentions of currency pairs (e.g., EUR/USD) and commodity prices (e.g., Brent crude), which are absent in A.",
      "Dataset A headlines emphasize supply chain disruptions/improvements as a recurring theme, while B rarely addresses this topic.",
      "Dataset B uses more dramatic action verbs for price movements (e.g., 'plummets 55%', 'briefly plunges 9%') compared to A's moderate terms like 'slides' or 'falls'.",
      "Dataset A regularly mentions specific clinical trial phases/results (e.g., 'successful phase 3 trial') in healthcare/biotech news, while B focuses on general drug approvals/data releases.",
      "Dataset B includes more political commentary (e.g., Trump-Powell meetings, UK election impacts) compared to A's strictly corporate/policy-neutral tone.",
      "Dataset A shows consistent formatting with complete sentence structures, while B contains more sentence fragments and tweet-style abbreviations (e.g., 'FFO misses by $0.01').",
      "Dataset B features explicit percentage changes in stock prices/valuations (e.g., 'cuts dividend by 34%') more frequently than A, which often omits specific figures."
    ],
    "qwen2.5-7b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines frequently omit stock ticker symbols in favor of company names or general sector references (e.g., 'Brixmor Property' vs. '$RLGT - Rigel Pharmaceuticals').",
      "Dataset B includes headlines with macroeconomic or geopolitical commentary unrelated to specific companies (e.g., 'China is in the process of waiving retaliatory tariffs').",
      "Dataset B contains headlines with truncated text or placeholder markers (e.g., '\u2026') indicative of raw data feeds or social media snippets.",
      "Dataset B incorporates non-equity financial instruments (e.g., forex pairs like EUR/USD, commodities like Brent crude) not seen in Dataset A.",
      "Dataset B uses hashtags (e.g., '#investing #economy #finance') and social media-style annotations absent in Dataset A.",
      "Dataset B includes speculative or analytical questions (e.g., 'Can $MSFT head higher?') rather than purely factual statements prevalent in Dataset A.",
      "Dataset B features headlines with technical trading patterns (e.g., 'Failure of 1-2-3 Pattern Might be Bullish') absent in Dataset A\u2019s fundamental analysis focus.",
      "Dataset B references non-English entities or global markets (e.g., 'Repsol SA', 'Naspers') more frequently than Dataset A\u2019s U.S.-centric focus.",
      "Dataset B headlines often lack explicit analyst attribution (e.g., 'Becton, Dickinson slips 9% on lowered guidance' vs. 'Deutsche Bank cuts to Hold').",
      "Dataset B includes headlines with non-financial corporate events (e.g., 'Credit Suisse CEO Thiam Exits in Shock Reversal') without tying them to financial metrics or analyst actions."
    ],
    "llama3.3-70b_zero-shot_bg_v1": [
      "Dataset B headlines include geopolitical or macroeconomic events (e.g., trade deals, oil price shocks) not tied to specific corporate actions, while A focuses strictly on analyst actions/company-specific updates.",
      "B contains headlines framed as advice/investment opportunities (e.g., '3 Great Semiconductor Stocks to Invest In for 2020'), whereas A exclusively reports factual events without recommendations.",
      "B uses colloquial phrases/metaphors (e.g., 'succumbs to the Laws of Gravity', 'Flurry of retailer earnings on tap') absent in A's standardized institutional tone.",
      "B references non-corporate entities like governments (e.g., China, U.S. Treasury) and political figures (e.g., Trump, Mnuchin), while A focuses solely on companies/financial institutions.",
      "B includes headlines about non-equity assets (e.g., EUR/USD forex, Brent crude oil) absent in A's exclusive focus on stocks and corporate earnings.",
      "B features headlines with explicit numerical metrics beyond financials (e.g., '10Y Yield LOD 1.7620%', 'boost output capacity by almost two thirds') unlike A's focus on EPS/revenue/price targets.",
      "B contains truncated headlines with social media markers (e.g., hashtags, ellipses, 'via @CNBC') indicating real-time feeds, while A uses polished institutional formatting.",
      "B includes operational announcements (e.g., dividend suspensions, layoffs, CFO transitions) beyond A's narrower focus on ratings/earnings/guidance.",
      "B references niche financial instruments (e.g., 'PFAS chemical makers', 'wind construction project awards') absent in A's tech/consumer sector dominance.",
      "B uses speculative/question-based headlines (e.g., 'Which way will the markets be headed...?') contrasting with A's declarative statements about confirmed events."
    ],
    "qwen2.5-32b_zero-shot_v1": [
      "Dataset B headlines frequently include stock ticker symbols prefixed with '$' (e.g., $CRWD, $XLE) while A typically uses full company names",
      "B contains headlines framed as investment recommendations/analyses (e.g., '3 Great Semiconductor Stocks to Invest In') absent in A",
      "B includes technical trading patterns/terminology (e.g., '1-2-3 Pattern', 'Momentum') not found in A's event-driven reporting",
      "B features social media-style formatting with hashtags and Twitter handles (e.g., #investing, @MadMoneyOnCNBC) unlike A's formal tone",
      "B contains more international/non-US corporate references (e.g., DSV Panalpina, Repsol SA) compared to A's US-centric focus",
      "B includes granular corporate actions beyond earnings (e.g., dividend suspensions, debt issuance) that A doesn't emphasize",
      "B uses question-based headlines about market direction (e.g., 'Which way will markets...') unlike A's declarative statements",
      "B references specific price target figures from analysts (e.g., 'raised to $70 from $62') rather than general upgrade/downgrade mentions in A",
      "B contains direct quotes from executives/politicians (e.g., 'Mnuchin says...') while A uses institutional attribution ('Analysts expect')",
      "B includes non-earnings corporate developments (e.g., 'Coty Acquires...Stake') as primary news drivers more frequently than A"
    ],
    "qwen2.5-32b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines more frequently include macroeconomic indicators (e.g., GDP, retail sales) without explicitly linking them to specific company performance, whereas A ties these factors directly to stock outcomes.",
      "Dataset B contains headlines phrased as market-wide questions, advice, or comparative analyses (e.g., 'Better Buy: AbbVie vs. Merck'), while A focuses on factual reporting of company-specific events.",
      "Dataset B includes non-corporate geopolitical or political events (e.g., U.S.-China trade deals, Brexit) as standalone headlines, whereas A contextualizes external events through their impact on specific stocks.",
      "Dataset B uses social media-style hashtags (e.g., #investing, #economy) and raw data snippets (e.g., '10Y Yield LOD 1.7620%'), which are absent in A.",
      "Dataset B headlines reference a broader range of financial instruments (e.g., ETFs, bonds, dividends) beyond individual stocks, while A emphasizes equity-specific actions like upgrades/downgrades.",
      "Dataset B features headlines about market trends or predictions (e.g., 'ETF assets to surge tenfold in 10 years'), whereas A avoids speculative forecasts beyond company guidance.",
      "Dataset B includes international economic developments (e.g., Japan\u2019s Olympics, China\u2019s tariffs) as primary topics, while A\u2019s global references are tied to company operations or earnings.",
      "Dataset B headlines often lack explicit timelines (e.g., 'Final containment ring placed...') without immediate stock implications, whereas A specifies dates, times, and direct market reactions.",
      "Dataset B incorporates retail investor-focused language (e.g., 'Santa Claus rally', 'Momentum is coming back to this name'), while A targets institutional or analyst-driven narratives.",
      "Dataset B includes headlines structured as updates or summaries (e.g., 'Stock Market Update:...'), whereas A uses declarative sentences focused on discrete corporate actions or results."
    ],
    "llama3.3-70b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines include macroeconomic indicators (e.g., GDP, retail sales) and geopolitical events (e.g., OPEC+ cuts, trade deals) absent in A.",
      "Dataset B references non-analyst entities (e.g., government agencies, rating agencies like Moody's) as primary sources, unlike A's focus on institutional firms.",
      "Dataset B headlines use broader market indices (e.g., EUR/USD, oil prices) and sector-wide trends, while A focuses on individual stocks/companies.",
      "Dataset B includes general investment advice (e.g., \"3 Great Semiconductor Stocks\") without direct analyst actions, unlike A's explicit citations.",
      "Dataset B incorporates non-English terms (e.g., \"BoJo\"), hashtags, and social media references, which A avoids entirely.",
      "Dataset B headlines feature dividend suspensions, layoffs, or operational updates (e.g., dividend halts, project awards) not tied to analyst ratings like in A.",
      "Dataset B uses narrative phrases (e.g., \"Why Shares...Soared,\" \"Damned if they do...\") for context, while A is strictly factual and concise.",
      "Dataset B includes retail investor-focused metrics (e.g., hedge fund sentiment, earnings beat/miss percentages) without explicit price targets like A.",
      "Dataset B highlights political developments (e.g., Brexit, U.S. elections) impacting markets, whereas A focuses on company-specific financial metrics.",
      "Dataset B references non-equity instruments (e.g., bonds, forex, commodities) more frequently than A, which centers on stocks and ETFs."
    ],
    "llama3.3-70b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines include general market event summaries (e.g., 'Key Events In The Holiday Shortened Week') absent in A, which focuses on granular company-specific actions.",
      "Dataset B references forex/political events (e.g., 'EUR/USD Failure of 1-2-3 Pattern Might be Bullish') and commodity price narratives, while A strictly ties triggers to stock-specific metrics.",
      "Dataset B headlines often omit explicit mentions of financial institutions (e.g., 'TJX stock price target raised to $70 from $62 at MKM Partners' lacks firm emphasis vs. A's 'Barclays/Oppenheimer').",
      "Dataset B includes non-earnings corporate actions (e.g., 'Coty Acquires $600M Majority Stake In Kylie Jenner's Cosmetics Company') without analyst attribution, unlike A's institution-linked events.",
      "Dataset B uses colloquial/question-based phrasing (e.g., 'Why Snap Stock Jumped Today') vs. A's formulaic structures (e.g., '[Company] sees price target cut at [Institution] due to...').",
      "Dataset B incorporates macroeconomic/political narratives (e.g., 'Mnuchin says trade deal with China to boost global economy') as standalone headlines, while A embeds economic indicators within company contexts.",
      "Dataset B includes dividend declarations (e.g., 'WPT Industrial REIT declares $0.0633 dividend') without analyst commentary, unlike A's focus on target adjustments tied to institutions.",
      "Dataset B headlines frequently truncate with ellipses (e.g., 'China is in the process of waiving retaliatory tariffs...') reflecting aggregated news snippets, whereas A uses complete institutional statements.",
      "Dataset B references non-equity instruments (e.g., ETFs like $XLE, bonds) and indices broadly, while A exclusively ties price movements to individual tickers and analyst actions.",
      "Dataset B integrates social media/platform mentions (e.g., 'Insights via @CMEGroup') and hashtags (#investing), absent in A's institution-centric formal tone."
    ],
    "qwen2.5-32b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines more frequently include questions, speculative statements, or opinion-driven analyses (e.g., 'Can $MSFT head higher?', 'Why Snap Stock Jumped Today') compared to Dataset A's fact-driven analyst actions.",
      "Dataset B contains more explicit references to geopolitical events (e.g., 'U.S. kills Iran's top commander') as direct market movers, whereas Dataset A contextualizes these factors indirectly within company-specific challenges.",
      "Dataset B headlines often feature broader market-wide event previews or summaries (e.g., 'Key Events In The Holiday Shortened Week') rather than Dataset A's focus on post-event financial metric disclosures.",
      "Dataset B includes headlines with colloquial or conversational language (e.g., 'Santa Claus rally', 'Damned if they do, damned if they don't') absent in Dataset A's formal tone.",
      "Dataset B incorporates social media tags or external content references (e.g., 'via @CMEGroup', 'Insights via @CNBCMakeIt') not observed in Dataset A's self-contained headlines.",
      "Dataset B more frequently covers non-corporate entities (e.g., ETFs like $HYG, sovereign actions like 'Qatar plans to boost LNG output') compared to Dataset A's emphasis on individual companies.",
      "Dataset B headlines include time-sensitive market updates (e.g., '10Y Yield LOD 1.7620%', 'Stocks hit lows of the day') lacking in Dataset A's earnings/guidance-centric timeline.",
      "Dataset B references cultural/pop culture phenomena (e.g., 'Kylie Jenner's Cosmetics Company', 'White Claw summer') unlike Dataset A's strictly financial/operational focus.",
      "Dataset B features explicit mentions of legislative/political developments (e.g., 'Trump, Powell meet', 'Barr says DOJ will review...') as headline drivers, while Dataset A embeds these in company risk factors.",
      "Dataset B uses headlines as news aggregation alerts (e.g., 'RECAP 12/10 +Pos Comments', 'Press release: cyber attack') rather than Dataset A's standalone analyst/firm actions."
    ],
    "qwen2.5-7b_zero-shot_bg_v1": [
      "Dataset B headlines more frequently reference geopolitical events (e.g., U.S.-China trade deals, OPEC+ decisions) as direct market movers compared to A's general macroeconomic mentions",
      "B includes explicit dividend announcements/actions (e.g., WPT REIT dividend) while A focuses on dividend changes as corporate events",
      "B contains headlines structured as questions/predictions ('Which way will markets...') absent in A's declarative style",
      "B shows more retail investor-focused content (bonus spending tips, ETF predictions) vs A's institutional analyst perspective",
      "B regularly cites specific debt instruments/bond market developments (coronavirus bonds) unlike A's index-level references",
      "B includes non-earnings financial metrics (FFO for REITs, same-store sales) beyond A's EPS/revenue focus",
      "B features explicit merger/acquisition price tags ($600M Kylie deal) while A mentions mergers generically",
      "B contains live market commentary elements ('On the hour', 'JUST IN') absent in A's retrospective analysis",
      "B references physical commodity prices (Brent crude, gold ounces) more concretely than A's sector trends",
      "B includes operational updates (factory suspensions, dividend halts) as standalone news vs A's earnings context"
    ],
    "qwen2.5-32b_zero-shot_bg_train-time-info_v1": [
      "Dataset A headlines consistently include explicit stock price movement percentages or specific numerical changes (e.g., 'Shares Rise 5%', 'Down 3%'), while B rarely quantifies price reactions numerically.",
      "Dataset B frequently references geopolitical actors/events (e.g., Mnuchin, Trump, China tariffs) as direct market movers, whereas A focuses on company/sector-specific catalysts.",
      "Dataset A consistently uses complete company names alongside ticker symbols (e.g., 'General Electric ($GE)'), while B often mentions tickers without full company names.",
      "Dataset B includes headlines structured as questions or listicles (e.g., '3 Great Semiconductor Stocks...'), a format absent in A's samples.",
      "Dataset A emphasizes clinical trial updates/biotech pipeline developments (e.g., 'clinical trial data presentation'), while B lacks pharma R&D-specific announcements.",
      "Dataset B contains explicit mentions of dividend declarations/actions (e.g., 'WPT Industrial REIT declares $0.0633 dividend'), which never appear in A's samples.",
      "Dataset A consistently references specific future dates for events (e.g., 'CES 2024', 'July 15th'), while B uses vague temporal references like 'holiday shortened week'.",
      "Dataset B includes currency pair analysis (e.g., 'EUR/USD Failure of 1-2-3 Pattern') and commodity spot prices, which A never addresses.",
      "Dataset A headlines consistently mention physical business expansions (e.g., 'new flagship store', 'slaughterhouse plans'), while B focuses on financial/strategic moves without physical infrastructure details.",
      "Dataset B incorporates social media handles/URL citations (e.g., 'via @CNBCMakeIt') and hashtags, while A maintains clean formatting without external platform references."
    ],
    "llama3.1-8b_zero-shot_bg_v1": [
      "Dataset B headlines more frequently mention macroeconomic indicators (e.g., GDP, retail sales) without direct company/stock impact statements",
      "Dataset B contains more speculative language/question formats ('Can $MSFT head higher?') absent in A's declarative style",
      "Dataset B includes non-equity market references (forex patterns, bond markets, commodities) not present in A's equity-focused headlines",
      "Dataset B shows higher frequency of non-analyst institutional actions (Moody's, dividend changes, CFO transitions) compared to A's focus on sell-side analyst ratings",
      "Dataset B headlines often lack explicit stock price movement verbs ('falls','surges') that dominate A's impact-focused structure",
      "Dataset B contains more event previews/calendar items ('Key Events In...') versus A's exclusive focus on completed events",
      "Dataset B includes non-quantified corporate actions (acquisitions, partnerships) without stated market impact that A consistently provides",
      "Dataset B uses more colloquial financial jargon ('treadmill', 'bull case') vs A's standardized terminology (EPS, price target)",
      "Dataset B features geopolitical/regulatory developments as primary catalysts more frequently than A's company-specific operational triggers",
      "Dataset B shows higher incidence of incomplete/unconventional ticker formatting (e.g., embedded URLs, missing $ symbols) compared to A's standardized ticker presentation"
    ],
    "llama3.1-8b_zero-shot_v1": [
      "Dataset B headlines frequently include stock ticker symbols (e.g., $CRWD, $AAPL), while Dataset A does not reference tickers.",
      "Dataset B contains explicit analyst recommendations/actions (e.g., 'upgrades', 'downgrades', 'price target raises'), whereas Dataset A focuses purely on event reporting without editorialized guidance.",
      "Dataset B includes headlines structured as questions/advice ('Why...', 'What to watch...', '3 Great... Stocks'), while Dataset A maintains declarative statements throughout.",
      "Dataset B references granular corporate actions (e.g., dividend suspensions, CFO transitions, conference calls), whereas Dataset A focuses on macro-level market movements.",
      "Dataset B contains headlines about minor/non-market-moving events (e.g., 'Libbey announces CFO transition'), while Dataset A exclusively reports high-impact macroeconomic/market-shaping events.",
      "Dataset B includes forward-looking speculative phrases ('might be bullish', 'could spell doom'), while Dataset A focuses strictly on confirmed past/present events.",
      "Dataset B features headlines about secondary financial instruments (e.g., 'PFAS chemical makers climb', '10Y Yield LOD 1.7620%') absent in Dataset A's equity-focused reports.",
      "Dataset B contains meta-commentary about market analysis processes ('This ratio suggests...', 'analysts think will happen next'), while Dataset A presents direct cause-effect reporting.",
      "Dataset B includes social media/platform references (e.g., 'via @CNBCMakeIt', 'On @MadMoneyOnCNBC'), whereas Dataset A maintains traditional journalistic formatting.",
      "Dataset B shows frequent earnings metric specificity ('misses by $0.01', 'beats on revenue'), while Dataset A describes earnings impacts qualitatively ('Surpass Expectations')."
    ],
    "llama3.3-70b_few-shot_v1": [
      "Dataset B headlines frequently include informal elements like hashtags, social media mentions, or colloquial phrases (e.g., '#investing #economy #finance', 'Santa Claus rally') absent in Dataset A.",
      "Dataset B contains truncated headlines with ellipses or partial text (e.g., 'China is in the process of waiving retaliatory tariffs...'), suggesting direct extraction from social media or real-time feeds, unlike Dataset A's complete sentences.",
      "Dataset B includes niche or non-traditional financial topics (e.g., 'Hackers Breach Ring Home Security Cameras', 'Kylie Jenner's Cosmetics Company') that are absent in Dataset A's focus on mainstream market events.",
      "Dataset B headlines often reference specific price targets or numerical thresholds (e.g., 'JMP Securities upgrades snap to outperform with $20 pt'), while Dataset A typically mentions analyst actions without explicit targets.",
      "Dataset B features headlines framed as questions or speculative prompts (e.g., 'Which way will the markets be headed next week...', 'Can $MSFT head higher?'), whereas Dataset A uses declarative statements exclusively.",
      "Dataset B includes granular corporate updates (e.g., dividend suspensions, CFO transitions, layoffs) that are less common in Dataset A, which emphasizes macroeconomic trends and large-scale market movements.",
      "Dataset B headlines frequently cite lesser-known or regional companies (e.g., 'Hastings Technology Metals', 'Ardmore') compared to Dataset A's focus on major corporations like Amazon or Tesla.",
      "Dataset B incorporates non-English characters or localized content (e.g., 'Japan\u2019s Abe Says Olympics Won\u2019t Be Postponed Amid Virus Fears'), while Dataset A centers on U.S.-centric news.",
      "Dataset B uses abbreviated financial metrics (e.g., 'FFO misses by $0.01') without full context, whereas Dataset A provides explicit explanations (e.g., 'misses on revenue').",
      "Dataset B includes headlines with mixed-case formatting (e.g., 'StockBeat - Chip Stocks Rise as Huawei Granted Another Reprieve') and inconsistent punctuation, contrasting with Dataset A\u2019s standardized title case."
    ],
    "qwen2.5-32b_few-shot_bg_v1": [
      "Dataset B headlines include a broader range of non-tech sectors (e.g., energy, retail, pharmaceuticals), while Dataset A focuses predominantly on major tech companies like Apple, Tesla, and Alphabet.",
      "Dataset B references specific geopolitical events (e.g., U.S.-China trade deals, Iran airstrikes) as primary drivers of market movements, whereas Dataset A ties market reactions more narrowly to earnings or macroeconomic indicators.",
      "Dataset B contains explicit mentions of dividends (e.g., declarations, suspensions) and bond markets, which are absent in Dataset A headlines.",
      "Dataset B headlines frequently include non-earnings corporate actions (e.g., mergers like Coty-Kylie Jenner, layoffs, partnerships), while Dataset A emphasizes quarterly earnings results and analyst rating changes.",
      "Dataset B features retail sales data, consumer spending trends, and sector-specific employment metrics, whereas Dataset A focuses on company-specific financial metrics like EPS and revenue.",
      "Dataset B headlines often pose questions (e.g., 'Can $MSFT head higher?') or speculative statements, while Dataset A uses declarative structures centered on analyst actions and earnings outcomes.",
      "Dataset B includes niche financial instruments (e.g., forex pairs like EUR/USD, commodity prices like Brent crude) not referenced in Dataset A, which centers on equity tickers and stock-specific news.",
      "Dataset B incorporates social/political events (e.g., Brexit, U.S. elections, regulatory scandals) as market catalysts, whereas Dataset A ties sentiment primarily to earnings performance or supply chain issues.",
      "Dataset B headlines mention litigation, cybersecurity incidents, and regulatory scrutiny (e.g., Boeing, Ring cameras) as market movers, absent in Dataset A\u2019s earnings-centric context.",
      "Dataset B uses informal language, hashtags, and third-party references (e.g., 'via @CNBC'), while Dataset A maintains a formal tone focused on institutional analyst reports and earnings metrics."
    ],
    "qwen2.5-32b_few-shot_v1": [
      "Headlines in B frequently include ticker symbols (e.g., $CRWD, $XLE) within the text, whereas A rarely or never does.",
      "B includes explicit mentions of dividend declarations, suspensions, or changes (e.g., 'WPT Industrial REIT declares $0.0633 dividend'), while A focuses on dividend changes only as part of broader company updates.",
      "B contains headlines structured as direct investor advice or stock recommendations (e.g., '3 Great Semiconductor Stocks to Invest In for 2020'), which are absent in A.",
      "B references premarket/after-hours stock price movements (e.g., 'Extreme Networks stock up 2.6% premarket'), whereas A describes price reactions only in general terms.",
      "B frequently cites exact EPS/revenue miss/beat margins (e.g., 'misses by $0.01'), while A uses qualitative descriptions like 'lower-than-expected' without specific figures.",
      "B includes granular analyst actions with explicit price targets (e.g., 'price target raised to $70 from $62'), whereas A mentions analyst actions more generically.",
      "B features headlines about niche financial instruments (e.g., 'EUR/USD Failure of 1-2-3 Pattern') and technical analysis, which A avoids.",
      "B incorporates casual language and slang (e.g., 'ripping to new highs,' 'StockBeat'), while A maintains formal, standardized phrasing.",
      "B highlights highly specific corporate financial decisions (e.g., 'CAE temporarily suspends dividend and share repurchase plan'), whereas A emphasizes broader operational updates.",
      "B includes real-time event-driven updates (e.g., 'Slack shares halted for pending news'), while A focuses on post-event analyses or announcements."
    ],
    "qwen2.5-32b_zero-shot_bg_v1": [
      "Dataset B headlines frequently reference geopolitical events, regulatory actions, and macroeconomic indicators as primary market movers, whereas Dataset A ties these factors to company-specific performance outcomes.",
      "Dataset B includes a higher prevalence of headlines focused on dividends, mergers, acquisitions, and legal/regulatory developments unrelated to earnings, which are less emphasized in Dataset A.",
      "Dataset B headlines often explicitly mention premarket/post-market stock movements (e.g., 'stock up 2.6% premarket'), while Dataset A focuses on implied price action through earnings/results context.",
      "Dataset B contains more social media-style formatting with hashtags (#economy), external references (via @CNBCMakeIt), and informal alerts ('JUST IN'), absent in Dataset A's formal financial reporting tone.",
      "Dataset B covers a broader range of sectors beyond tech (e.g., retail, energy, pharmaceuticals, real estate) with granular industry-specific updates, while Dataset A heavily concentrates on major tech companies.",
      "Dataset B includes forward-looking questions and comparative analyses (e.g., 'Better Buy: AbbVie vs. Merck') absent in Dataset A's focus on institutional analyst actions and confirmed earnings data.",
      "Dataset B features explicit mentions of bond markets, commodities (oil/gold), and currency pairs (EUR/USD), whereas Dataset A remains equity/stock-centric with ticker symbols.",
      "Dataset B headlines frequently reference specific numerical thresholds (e.g., 'Brent crude nears $70', '10Y Yield LOD 1.7620%'), while Dataset A emphasizes percentage changes and corporate guidance metrics.",
      "Dataset B includes non-earnings corporate actions like dividend suspensions, layoffs, and CFO transitions as standalone news, whereas Dataset A primarily ties such events to analyst rating implications.",
      "Dataset B shows greater diversity in geographic focus (e.g., China's tariff waivers, European markets, OPEC+ decisions) compared to Dataset A's U.S.-centric narratives with occasional China mentions."
    ],
    "qwen2.5-7b_zero-shot_v1": [
      "Dataset B headlines frequently include explicit stock ticker symbols (e.g., $CRWD, $AAPL) within the text, while Dataset A uses company names without tickers.",
      "Dataset B contains granular numerical specifics (e.g., 'misses by $0.01', '28% on expected positive data'), whereas Dataset A uses qualitative descriptors (e.g., 'below-expected earnings', 'surge').",
      "Dataset B headlines reference niche financial instruments (e.g., forex pairs like EUR/USD, ETFs like $XLE) absent in Dataset A, which focuses on broad indices (e.g., S&P 500).",
      "Dataset B includes explicit mentions of pre/post-market stock movements (e.g., '2.6% premarket'), while Dataset A emphasizes general intraday or post-event volatility.",
      "Dataset B features conference call announcements, earnings previews, and specific timestamps (e.g., 'conference call tomorrow'), unlike Dataset A\u2019s generic forward-looking statements.",
      "Dataset B headlines incorporate technical analysis terminology (e.g., 'Failure of 1-2-3 Pattern', 'pivot 50c'), absent in Dataset A\u2019s macro-focused narratives.",
      "Dataset B includes corporate actions with exact figures (e.g., 'lays off 2,600 employees', '$600M Majority Stake'), while Dataset A uses vague terms like 'layoffs' or 'expansion plans'.",
      "Dataset B highlights regulatory/legal developments (e.g., 'antitrust scrutiny', 'DOJ review') with procedural details, whereas Dataset A mentions 'regulatory scrutiny' generically.",
      "Dataset B features fragmented, social media-style formatting (e.g., hashtags, truncated text, URLs), while Dataset A uses complete, formal sentence structures.",
      "Dataset B explicitly names analyst firms and price targets (e.g., 'Deutsche Bank lifts price target'), whereas Dataset A refers to 'analysts' collectively without specifics."
    ],
    "llama3.1-8b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines occasionally omit stock ticker symbols entirely, while Dataset A consistently includes them prefixed with '$' in every headline.",
      "Dataset B includes headlines focused on broader market events or geopolitical developments without direct ties to specific companies, whereas Dataset A exclusively centers on company-specific news.",
      "Dataset B uses informal language, hashtags, or emojis (e.g., '#investing', '!!'), while Dataset A maintains a formal, structured tone throughout.",
      "Dataset B references non-analyst entities (e.g., politicians, CEOs, non-financial institutions) as primary news drivers, while Dataset A predominantly cites analyst firms (e.g., Morgan Stanley, Oppenheimer).",
      "Dataset B headlines often lack explicit cause-effect relationships between events and stock movements, whereas Dataset A systematically pairs market reactions with specific catalysts (e.g., earnings results, analyst actions).",
      "Dataset B includes headlines phrased as questions or speculative commentary (e.g., 'Can $MSFT head higher?'), which are absent in Dataset A.",
      "Dataset B covers non-corporate topics (e.g., political elections, hacking incidents, social trends) impacting markets, while Dataset A strictly focuses on corporate financial or operational developments.",
      "Dataset B occasionally omits numerical metrics in favor of qualitative descriptions (e.g., 'misses on revenue'), whereas Dataset A consistently quantifies performance with specific figures (e.g., '23% earnings beat').",
      "Dataset B incorporates non-equity market references (e.g., currencies like EUR/USD, commodities like oil) more frequently than Dataset A, which focuses on individual stocks.",
      "Dataset B features headlines structured as summaries or lists (e.g., 'Final-hour selloff costs stocks all their gains'), while Dataset A uses concise, formulaic templates linking tickers, events, and outcomes."
    ],
    "qwen2.5-7b_few-shot_v1": [
      "Dataset B headlines frequently include stock ticker symbols (e.g., $CRWD, $XLE) directly in the title, whereas Dataset A does not.",
      "Dataset B uses more conversational or colloquial phrases (e.g., 'What to watch in the markets', 'Here\u2019s what has futures higher') compared to Dataset A\u2019s formal tone.",
      "Dataset B often references specific technical trading patterns or chart analysis (e.g., 'EUR/USD Failure of 1-2-3 Pattern Might be Bullish'), while Dataset A focuses on broader market trends.",
      "Dataset B includes granular financial metrics tied to individual companies (e.g., 'FFO misses by $0.01', 'EPS misses by $0.0452') with explicit numerical precision, whereas Dataset A reports earnings beats/misses in general terms.",
      "Dataset B headlines frequently mention explicit price targets or analyst actions with numerical specificity (e.g., 'price target raised to $70 from $62'), whereas Dataset A describes analyst actions more generically.",
      "Dataset B incorporates social media-style formatting (e.g., hashtags like #Stock, truncated URLs, or embedded platform references like @MadMoneyOnCNBC), which Dataset A avoids.",
      "Dataset B includes headlines focused on niche financial instruments (e.g., ETFs, dividends, bonds) or hyper-specific corporate actions (e.g., 'suspends dividend and share repurchase plan'), whereas Dataset A emphasizes sector-wide or macroeconomic developments.",
      "Dataset B often highlights short-term, event-driven market movements (e.g., 'premarket', 'midday', 'final-hour selloff') with immediacy, while Dataset A reports outcomes after market closures or as sustained trends.",
      "Dataset B contains headlines with explicit mentions of retail investor-oriented content (e.g., '3 Great Semiconductor Stocks to Invest In', 'Better Buy: AbbVie vs. Merck'), whereas Dataset A targets institutional or general market audiences.",
      "Dataset B references granular operational updates (e.g., 'lays off 2,600 employees', 'CFO transition') and regulatory minutiae (e.g., 'PFAS chemical makers climb as Congress makes no decision'), whereas Dataset A focuses on high-impact policy or macroeconomic shifts."
    ],
    "llama3.3-70b_few-shot_bg_v1": [
      "Dataset B includes geopolitical and macroeconomic event impacts (e.g., U.S.-China trade deals, OPEC+ oil decisions) not explicitly tied to specific analyst actions or company earnings, unlike A's focus on institutional analyst-driven narratives.",
      "Dataset B references dividends, M&A activity (e.g., Coty-Kylie Jenner deal), and corporate actions (e.g., layoffs, CFO transitions) absent in A, which centers on analyst ratings and earnings reactions.",
      "Dataset B covers a broader range of sectors (e.g., retail, energy, pharmaceuticals, real estate) compared to A's narrower tech/semiconductor/EV focus.",
      "Dataset B features forward-looking market commentary (e.g., \"ETF assets to surge tenfold\") and investment advice (e.g., \"3 Great Semiconductor Stocks\"), whereas A focuses on institutional analyst validations.",
      "Dataset B incorporates non-institutional data sources (e.g., Moody\u2019s, CNBC, political statements) alongside financial institutions, while A relies almost exclusively on major banks (e.g., Morgan Stanley, Barclays).",
      "Dataset B includes forex (e.g., EUR/USD patterns), commodities (e.g., oil price swings), and bond market developments, which are absent in A\u2019s equity-centric headlines.",
      "Dataset B highlights retail investor-centric content (e.g., \"Hedge Funds Are Selling...\") and stock-specific momentum alerts (e.g., \"$SINT volume Alert!!\"), unlike A\u2019s institutional tone.",
      "Dataset B references political events (e.g., Brexit, U.S. elections) and regulatory risks (e.g., PFAS chemical regulations) as direct market drivers, whereas A ties movements to earnings/analyst actions.",
      "Dataset B includes non-earnings operational updates (e.g., cybersecurity breaches, product launches like NVIDIA\u2019s cloud service) absent in A\u2019s earnings/guidance-focused headlines.",
      "Dataset B uses conversational/question-based phrasing (e.g., \"Can $MSFT head higher?\") and media-driven narratives (e.g., \"Elon Musk Loses $5.9B\"), while A maintains formal analyst-action language."
    ],
    "llama3.1-8b_few-shot_v1": [
      "Dataset B headlines frequently include specific technical analysis terms and trading patterns (e.g., 'EUR/USD Failure of 1-2-3 Pattern', '50c pivot') absent in Dataset A",
      "Dataset B contains more granular financial instrument references (e.g., '10Y Yield LOD 1.7620%', 'Brent crude nears $70') compared to A's broader macroeconomic metrics",
      "Dataset B features explicit price targets and numerical analyst expectations in headlines (e.g., 'JMP upgrades Snap to $20 pt') while A generally mentions analyst actions qualitatively",
      "Dataset B includes more niche/small-cap companies (e.g., Extreme Networks, Athenex) versus A's focus on blue-chip stocks and major indices",
      "Dataset B headlines frequently incorporate social media tags and platform references (e.g., '#investing', '@MadMoneyOnCNBC') unlike A's formal news style",
      "Dataset B shows higher prevalence of premarket/after-hours price movements (e.g., 'up 2.6% premarket') compared to A's focus on regular session moves",
      "Dataset B contains more speculative phrasing about future market directions (e.g., 'Which way will markets head?') versus A's retrospective reporting style",
      "Dataset B includes detailed dividend declarations and capital allocation updates (e.g., 'WPT REIT declares $0.0633 dividend') absent in A's headlines",
      "Dataset B features more frequent references to debt instruments and corporate financing (e.g., 'Chefs' Warehouse floats new debt') than Dataset A",
      "Dataset B headlines often contain fragmented real-time update syntax (e.g., 'RECAP 12/10 +Pos Comments') contrasting with A's complete sentence structures"
    ],
    "llama3.1-8b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines frequently include non-English characters or hashtags (e.g., 'MarketScreener\u2026', '#investing') not found in A",
      "Dataset B contains more general market event previews/recaps (e.g., 'Key Events In The Holiday Shortened Week') lacking specific ticker references",
      "Dataset B includes direct quotes from non-analyst public figures (Mnuchin, Trump, Barr) absent in A's analyst-focused commentary",
      "Dataset B features geopolitical/regulatory developments as standalone topics (e.g., Brexit, OPEC+ cuts) without company linkages required in A",
      "Dataset B uses question-based headlines ('Can $MSFT head higher?') and investment comparisons ('Better Buy: AbbVie vs. Merck') unlike A's declarative style",
      "Dataset B includes non-corporate entity analysis (country economies, currencies like EUR/USD) not tied to specific stocks as in A",
      "Dataset B shows frequent EPS/revenue misses without accompanying analyst actions (e.g., 'Brixmor Property FFO misses') that A always pairs with firm responses",
      "Dataset B contains social media platform impacts (e.g., 'Zoom security concerns') as primary drivers vs A's financial metric-driven headlines",
      "Dataset B uses informal colloquialisms ('Damned if they do', 'Santa Claus rally') absent in A's formal institutional tone",
      "Dataset B includes operational updates without financial implications (e.g., 'Lays off 2,600 employees') that A would contextualize with market impact"
    ],
    "llama3.1-8b_few-shot_bg_v1": [
      "Dataset B headlines frequently include informal language and conversational phrases (e.g., 'Big buy blocks', 'ripping to new highs'), while Dataset A maintains formal tone throughout.",
      "Dataset B contains more explicit references to technical analysis patterns (e.g., 'EUR/USD Failure of 1-2-3 Pattern') compared to Dataset A's focus on fundamental analysis.",
      "Dataset B includes more retail investor-focused content like dividend declarations (e.g., 'WPT Industrial REIT declares $0.0633 dividend') and penny stock mentions ($SINT), unlike Dataset A.",
      "Dataset B headlines frequently incorporate social media elements like hashtags (#investing) and Twitter handles (@CMEGroup), which are absent in Dataset A.",
      "Dataset B shows more frequent use of speculative phrasing (e.g., 'Momentum is coming back', 'Could Spell Doom') compared to Dataset A's fact-based reporting style.",
      "Dataset B includes more granular retail sector updates (e.g., 'retail winners and losers') while Dataset A focuses on broader macroeconomic retail trends.",
      "Dataset B contains more event-driven headlines without contextual explanation (e.g., 'Slack shares halted for pending news') compared to Dataset A's detailed context.",
      "Dataset B features more direct quotes from non-analyst sources (e.g., 'I find this worship no different...') unlike Dataset A's exclusive focus on institutional sources.",
      "Dataset B includes more coverage of small-cap stocks and niche companies (e.g., Hastings Technology Metals) compared to Dataset A's focus on large-cap equities.",
      "Dataset B shows frequent use of truncated text (via '...') and real-time update formatting, suggesting social media origins, while Dataset A headlines appear complete and polished."
    ],
    "qwen2.5-7b_few-shot_bg_v1": [
      "Dataset B headlines report earnings results (e.g., EPS/revenue beats/misses) as standalone facts without linking them to subsequent analyst actions, while Dataset A consistently ties earnings outcomes to analyst reactions.",
      "Dataset B includes headlines mentioning company names without stock ticker symbols (e.g., 'Home Depot stock'), whereas Dataset A uses ticker symbols (e.g., $TSLA) consistently.",
      "Dataset B covers a broader range of non-tech sectors (e.g., real estate, retail, energy) compared to Dataset A\u2019s heavy focus on major tech companies like Apple and Tesla.",
      "Dataset B incorporates geopolitical events (e.g., U.S.-China trade deals, OPEC+ decisions) as primary drivers, while Dataset A references macroeconomic factors primarily in corporate contexts.",
      "Dataset B reports corporate actions (e.g., M&A, dividend changes) independently of analyst reactions, unlike Dataset A, which links such events to analyst ratings or price targets.",
      "Dataset B includes references to non-equity instruments (e.g., ETFs, forex pairs, commodities) absent in Dataset A\u2019s equity-centric headlines.",
      "Dataset B features forward-looking predictions (e.g., ETF growth forecasts) unrelated to analyst actions, while Dataset A focuses on current analyst evaluations.",
      "Dataset B highlights international markets and non-U.S. entities (e.g., European firms, Asian economies) more frequently than Dataset A.",
      "Dataset B uses varied headline formats (e.g., questions, advice columns), whereas Dataset A maintains a standardized structure around analyst actions and earnings.",
      "Dataset B occasionally includes social media elements (e.g., hashtags, @mentions) absent in Dataset A\u2019s traditional news formatting."
    ],
    "llama3.3-70b_zero-shot_v1": [
      "Dataset B headlines frequently include specific stock ticker symbols prefixed with '$' (e.g., $CRWD, $XLE), while Dataset A uses company names or generic terms (e.g., Apple, Tech Giant).",
      "Dataset B contains granular financial metrics with exact figures (e.g., 'misses by $0.01', '$600M stake'), whereas Dataset A uses qualitative descriptors (e.g., 'exceeds expectations', 'mixed results').",
      "Dataset B includes corporate actions like mergers, acquisitions, and dividend declarations (e.g., 'Coty Acquires $600M Majority Stake'), while Dataset A focuses on earnings outcomes and Fed policy impacts.",
      "Dataset B headlines reference specific dates, events, or timelines (e.g., 'Key Events In The Holiday Shortened Week'), while Dataset A emphasizes immediate market reactions without temporal specificity.",
      "Dataset B incorporates technical analysis terms (e.g., 'EUR/USD Failure of 1-2-3 Pattern') and trading jargon, absent in Dataset A's more generalized market narratives.",
      "Dataset B features social media tags, hashtags, or external references (e.g., '@MadMoneyOnCNBC', #economy), whereas Dataset A uses formal, standalone headlines.",
      "Dataset B includes explicit mentions of price targets and analyst firm names (e.g., 'JMP Securities upgrades snap to outperform with $20 pt'), while Dataset A refers to analysts generically (e.g., 'Market Analysts Predict').",
      "Dataset B covers niche sectors (e.g., fracking, uranium) and non-equity instruments (e.g., bonds, forex), whereas Dataset A focuses on broad sectors like tech, energy, and retail.",
      "Dataset B headlines frequently reference international markets (e.g., Japan\u2019s Abe, Asian markets) and geopolitical granularity, while Dataset A emphasizes U.S.-centric events.",
      "Dataset B includes corporate transcripts, legal updates, and operational specifics (e.g., 'Edited Transcript of PLC.TO earnings conference call'), absent in Dataset A's macroeconomic focus."
    ],
    "llama3.1-8b_few-shot_bg_test-time-info_v1": [
      "Headlines in Dataset A consistently include specific numerical metrics (e.g., percentage changes, precise dividend amounts) such as 'stock jumps 4%' or 'declares $0.0633 dividend', while Dataset B headlines often omit granular figures.",
      "Dataset A headlines frequently reference analyst firms by name (e.g., 'Barclays cuts', 'Oppenheimer upgrades'), whereas Dataset B mentions analyst actions generically (e.g., 'analysts say', 'Wells Fargo cuts').",
      "Dataset B includes non-corporate macroeconomic or geopolitical commentary (e.g., 'EUR/USD Failure of 1-2-3 Pattern', 'U.S. retail sales rise slightly'), while Dataset A focuses narrowly on company-specific events and stock reactions.",
      "Dataset A headlines emphasize forward-looking guidance updates (e.g., 'expects further headwinds ahead', 'forecasts lower revenue'), whereas Dataset B rarely discusses future corporate guidance.",
      "Dataset B incorporates conversational phrases and rhetorical questions (e.g., 'Can $MSFT head higher?', 'Why Snap Stock Jumped Today'), while Dataset A maintains a formal, declarative tone throughout.",
      "Headlines in Dataset A systematically append stock tickers to company names (e.g., 'Carnival ($CMD)', 'Teva Pharmaceutical ($TEVA)'), whereas Dataset B often omits ticker symbols or uses them inconsistently.",
      "Dataset B features currency pairs, technical trading patterns, and non-equity financial instruments (e.g., 'EUR/USD', '10Y Yield LOD 1.7620%'), which are absent in Dataset A's equity-focused headlines.",
      "Dataset A consistently pairs corporate announcements with immediate stock price reactions (e.g., 'acquires...sending shares higher'), while Dataset B frequently reports events without market response context.",
      "Dataset B includes social media-style hashtags and platform references (e.g., '#investing #economy', 'via @CNBCMakeIt'), whereas Dataset A avoids these digital engagement elements.",
      "Dataset A contains granular operational details (e.g., 'lays off 2,600 employees', 'invests $1 billion in life sciences'), while Dataset B favors high-level event reporting without implementation specifics."
    ],
    "qwen2.5-7b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines include broader market summaries and event previews not centered on specific analyst actions, while Dataset A focuses on analyst-driven actions (e.g., upgrades, downgrades).",
      "Dataset B references geopolitical events and macroeconomic indicators (e.g., trade deals, oil prices) as standalone news drivers, whereas Dataset A ties these factors to company-specific impacts.",
      "Dataset B contains headlines with social media tags, URLs, or external media references (e.g., '@MadMoneyOnCNBC', '#MarketScreener'), absent in Dataset A.",
      "Dataset B headlines use narrative explanations linking events to market impacts (e.g., 'Oil prices surge after U.S. airstrike'), while Dataset A emphasizes concise analyst actions or earnings results.",
      "Dataset B includes headlines formatted as questions or speculative discussions (e.g., 'Which way will markets head?'), which are absent in Dataset A.",
      "Dataset B mentions specific dates, holidays, or trading sessions (e.g., 'holiday shortened week'), unlike Dataset A.",
      "Dataset B covers legal/regulatory developments (e.g., 'Barr says DOJ will review') and political news, while Dataset A focuses on corporate events and analyst ratings.",
      "Dataset B headlines occasionally include non-English text or characters (e.g., Chinese, Greek), whereas Dataset A uses English exclusively.",
      "Dataset B discusses retail consumer behavior (e.g., 'retail sales rise') as standalone topics, while Dataset A ties consumer trends to company performance.",
      "Dataset B includes future-tense references to corporate plans (e.g., 'Apple delays theatrical debut'), while Dataset A emphasizes past/current earnings or analyst actions."
    ],
    "llama3.3-70b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines often include geopolitical events, macroeconomic news, or sector-wide trends without directly linking them to specific company performance (e.g., 'Oil prices surge after the U.S. kills Iran's top commander'), whereas Dataset A ties macroeconomic factors explicitly to company-specific updates.",
      "Dataset B features headlines with non-analyst actions (e.g., dividend declarations, layoffs, or mergers like 'WPT Industrial REIT declares $0.0633 dividend'), while Dataset A focuses exclusively on analyst-driven actions (e.g., upgrades, downgrades, price target changes).",
      "Dataset B includes headlines with technical analysis or trading patterns (e.g., 'EUR/USD Failure of 1-2-3 Pattern Might be Bullish'), which are absent in Dataset A.",
      "Dataset B headlines frequently reference political statements or government actions (e.g., 'Mnuchin says trade deal with China to boost global economy'), whereas Dataset A avoids political commentary.",
      "Dataset B uses varied formatting, such as hashtags, external source references (e.g., 'via @CNBCMakeIt'), or fragmented sentences from press releases, while Dataset A maintains a standardized, formal structure.",
      "Dataset B includes headlines about broader market indices, ETFs, or commodities without tying them to analyst opinions (e.g., 'Oil slides again as traders await news on OPEC+ cuts'), whereas Dataset A links ETFs/market indices to specific analyst insights.",
      "Dataset B headlines often omit explicit mentions of analyst firms or analysts' names (e.g., 'FFO misses by $0.01'), while Dataset A consistently attributes actions to specific firms (e.g., 'Morgan Stanley downgrades...').",
      "Dataset B features opinion-based or recommendation-driven headlines (e.g., '3 Great Semiconductor Stocks to Invest In for 2020'), which are absent in Dataset A's fact-driven updates.",
      "Dataset B includes operational updates (e.g., layoffs, dividend suspensions like 'CAE temporarily suspends dividend') that lack direct ties to analyst ratings, whereas Dataset A focuses on financial metrics and analyst evaluations.",
      "Dataset B headlines frequently reference non-corporate events (e.g., 'Trump, Powell meet and talk about 'everything''), while Dataset A strictly centers on company-specific financial developments."
    ],
    "llama3.1-8b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines more frequently reference geopolitical events (e.g., U.S.-Iran tensions, Brexit) and international market dynamics (e.g., China tariff waivers, Qatar LNG output) compared to Dataset A.",
      "Dataset B includes non-corporate events (e.g., elections, regulatory scandals, public health crises) affecting markets, whereas Dataset A focuses almost exclusively on corporate actions and financial metrics.",
      "Dataset B headlines use broader market indices (e.g., EUR/USD, 10Y Yield) and technical analysis terms (e.g., '1-2-3 Pattern'), while Dataset A emphasizes individual stock tickers and analyst-specific metrics.",
      "Dataset B contains more generalized sector trends (e.g., 'Great Semiconductor Stocks') without naming specific companies, unlike Dataset A, which ties trends to explicit tickers (e.g., '$XYL boosted by e-commerce').",
      "Dataset B headlines reference retail consumer behavior (e.g., 'retail winners and losers') and macroeconomic policy (e.g., Fed rate decisions) more often than Dataset A.",
      "Dataset B features headlines structured as questions or speculative prompts (e.g., 'Which way will the markets be headed?'), whereas Dataset A uses declarative statements focused on immediate outcomes.",
      "Dataset B integrates social media-style hashtags (e.g., '#investing #economy') and external content references (e.g., 'via @CNBC'), which are absent in Dataset A.",
      "Dataset B includes dividend suspensions, layoffs, and corporate restructuring as standalone topics (e.g., 'CAE suspends dividend'), while Dataset A ties such events to analyst reactions or price targets.",
      "Dataset B headlines reference bond markets, commodities (e.g., gold, crude oil), and currency pairs (e.g., GBP/USD) more frequently than Dataset A, which focuses on equities.",
      "Dataset B uses concise, fragmented phrasing (e.g., 'Final-hour selloff costs stocks all their gains') compared to Dataset A\u2019s detailed, multi-clause sentences (e.g., '$PBF Hit by Moody\u2019s Downgrade, Shares Decline 3.4% Today')."
    ],
    "qwen2.5-7b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines include macroeconomic/political events as primary subjects (e.g., trade deals, GDP updates), while A focuses solely on company-specific analyst actions.",
      "Dataset B contains headlines without ticker symbols (e.g., 'Key Events In The Holiday Shortened Week'), whereas A consistently prefixes company names with $ tickers.",
      "Dataset B features sector investment recommendations (e.g., '3 Great Semiconductor Stocks'), while A never includes broad investment advice.",
      "Dataset B incorporates non-analyst financial metrics (e.g., 'FFO misses by $0.01'), whereas A exclusively ties performance to analyst evaluations.",
      "Dataset B includes geopolitical developments directly impacting commodities (e.g., oil price wars), absent in A\u2019s company-centric focus.",
      "Dataset B uses question-based headlines (e.g., 'Can $MSFT head higher?'), while A maintains declarative analyst-action statements.",
      "Dataset B contains dividend declarations and corporate policy changes (e.g., suspended dividends), while A focuses on equity analysis actions.",
      "Dataset B references forex/currency pairs (e.g., 'EUR/USD Failure'), whereas A never discusses foreign exchange markets.",
      "Dataset B includes truncated social media-style text (e.g., '...') and hashtags, while A uses complete, formal headlines.",
      "Dataset B reports regulatory/political outcomes affecting industries (e.g., Brexit implications), while A ties regulatory impacts to specific company analyst actions."
    ],
    "llama3.3-70b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines include general market commentary and broad economic trends (e.g., 'Key Events In The Holiday Shortened Week') absent in A",
      "Dataset B contains headlines without explicit analyst actions (e.g., 'Brixmor Property FFO misses by $0.01') while A consistently cites analyst upgrades/downgrades",
      "Dataset B references geopolitical/regulatory events (e.g., 'China waiving retaliatory tariffs') not tied to specific company actions like in A",
      "Dataset B includes non-earnings corporate developments (e.g., 'Coty Acquires $600M Stake') while A focuses strictly on earnings/analyst actions",
      "Dataset B headlines occasionally omit ticker symbols (e.g., 'Tyson Foods EPS misses') unlike A's mandatory ticker formatting",
      "Dataset B features macroeconomic indicators (e.g., 'U.S. retail sales rise slightly') rather than company-specific financial metrics in A",
      "Dataset B contains investment recommendations (e.g., '3 Great Semiconductor Stocks') absent from A's event-driven reporting",
      "Dataset B includes currency/commodity market updates (e.g., 'EUR/USD Failure of 1-2-3 Pattern') beyond A's equity focus",
      "Dataset B uses question/answer formats and comparative analysis (e.g., 'Better Buy: AbbVie vs. Merck') not seen in A",
      "Dataset B references external content sources (e.g., 'Insights via @CMEGroup') while A maintains self-contained reporting"
    ]
  }
}