{
  "sims": {
    "qwen2.5-7b_zero-shot_bg_test-time-info_v1": [
      "Both datasets consistently include stock tickers prefixed with '$' symbol (e.g., $CVX, $TSLA, $SPY)",
      "Headlines frequently reference analyst actions: upgrades, downgrades, and price target adjustments",
      "Percentage-based stock price movements are prominently displayed in nearly all entries (e.g., -2%, +4%)",
      "Quarterly earnings reports (Q2, Q3, Q4) and guidance updates are central to majority of entries",
      "Specific financial metrics are highlighted: EPS beats/misses, revenue comparisons, and FFO results",
      "Sector-specific terminology appears consistently (oil prices, chip demand, clinical trials, cloud computing)",
      "References to institutional analysts/banks are ubiquitous (Morgan Stanley, Goldman Sachs, Barclays)",
      "Forward-looking statements about growth projections and market outlooks appear in all samples",
      "Both datasets mix fundamental analysis (earnings) with technical analysis (premarket movements, price targets)",
      "Regulatory/economic factors are frequently cited (OPEC decisions, Fed policy, trade data, COVID impacts)"
    ],
    "llama3.1-8b_few-shot_v1": [
      "Both datasets include headlines referencing stock tickers and percentage price movements (e.g., '-2%', 'slips 27%', 'surges 15%').",
      "Headlines frequently mention earnings results, revenue beats/misses, and guidance revisions (e.g., 'EPS misses', 'beats on revenue', 'guidance cut').",
      "Macroeconomic indicators like GDP growth, inflation, and central bank policies (e.g., Fed rate decisions) are common focal points.",
      "Sector-specific developments (e.g., oil prices, tech performance, pharmaceuticals) are emphasized in both datasets.",
      "Analyst actions such as upgrades, downgrades, and price target adjustments are regularly cited (e.g., 'downgrade', 'target raised').",
      "Company-specific catalysts like mergers, legal issues, and product launches drive headlines (e.g., 'pipeline updates', 'recalls', 'new licenses').",
      "Numerical precision is used for metrics like sales figures, dividend amounts, and economic data (e.g., '$1.33B', 'NZ$0.16 dividend', '7% GDP').",
      "Market-moving geopolitical events (e.g., U.S.-China trade tensions, OPEC decisions, sanctions) are prominently featured.",
      "Headlines often blend qualitative commentary with quantitative data (e.g., 'COVID-19 impacts', 'consumer sentiment shifts').",
      "Regulatory developments (e.g., lawsuits, antitrust probes, licensing changes) are highlighted as key drivers of sentiment."
    ],
    "llama3.1-8b_few-shot_bg_v1": [
      "Headlines in both datasets frequently include stock tickers prefixed with '$' (e.g., $AAPL, $NVDA, $TSLA).",
      "Both datasets emphasize immediate stock price movements using percentage changes (e.g., '-2%', '+15%').",
      "Analyst actions (downgrades, upgrades, price target revisions) drive sentiment in headlines across both datasets.",
      "Earnings results (EPS beats/misses, revenue performance) are central to headlines in both datasets.",
      "Premarket and after-hours trading activity is explicitly referenced (e.g., 'premarket', 'after-hours').",
      "Macroeconomic factors (e.g., inflation, GDP, Fed policy) are cited as market influencers in both datasets.",
      "Company-specific guidance adjustments (e.g., 'guidance cut', 'forecast lowered') appear in headlines.",
      "Regulatory, legal, or political developments impacting businesses are highlighted in both datasets.",
      "Headlines use industry-specific abbreviations (e.g., EPS, FQ2, GDP) without expanded definitions.",
      "Mentions of sector-specific catalysts (e.g., oil prices, chip demand, pandemic effects) are common to both."
    ],
    "qwen2.5-32b_few-shot_bg_train-time-info_v1": [
      "Both datasets include headlines referencing stock tickers prefixed with '$' (e.g., $TSLA, $GM, $NVDA).",
      "Price movement percentages (-2%, +4%, etc.) are explicitly stated in most headlines to quantify market reactions.",
      "Earnings results (beats/misses) and guidance revisions are central themes across all samples (e.g., 'EPS misses by $0.52', 'strong Q3 earnings beat').",
      "Analyst actions (upgrades/downgrades) and price target adjustments are frequently cited as market movers in both datasets.",
      "Specific financial metrics like revenue, EPS, and dividend declarations are consistently mentioned (e.g., 'dividend payout', 'revenue miss').",
      "Regulatory challenges and legal developments directly impacting companies appear across all samples (e.g., 'regulatory hurdles', 'FBI probe').",
      "Industry-specific catalysts are highlighted, including energy (OPEC decisions), pharma (FDA trials), and tech (semiconductor demand).",
      "Forward-looking statements about growth strategies, partnerships, or product launches are present in all headlines (e.g., 'new cannabis license', 'acquisition plans').",
      "Macroeconomic factors like oil prices, trade data, and GDP forecasts are consistently tied to stock performance in both datasets.",
      "Headlines use compressed financial jargon and abbreviations (e.g., 'FFO', 'premarket', 'Q2') without explanatory context, assuming reader familiarity."
    ],
    "llama3.3-70b_zero-shot_bg_v1": [
      "Both datasets include stock tickers or company names, often prefixed with a '$' symbol.",
      "Headlines frequently mention percentage changes in stock prices or descriptors like 'steady' or 'flat'.",
      "Analyst actions (e.g., downgrades, upgrades, price target adjustments) are a central theme.",
      "Earnings reports (e.g., beats, misses, mixed results) are explicitly referenced.",
      "Guidance updates (e.g., cuts, future expectations) are highlighted as key drivers of market reactions.",
      "Specific financial metrics (e.g., EPS, revenue, sales figures) are quantified in most headlines.",
      "Trading periods (e.g., premarket, after-hours) are noted to contextualize price movements.",
      "Institutional analysts or firms (e.g., Morgan Stanley, Barclays) are cited as sources of ratings.",
      "Immediate market reactions (e.g., stock price shifts post-announcement) are emphasized.",
      "Industry-specific terminology (e.g., 'valuation concerns', 'demand growth') is consistently used."
    ],
    "llama3.3-70b_few-shot_bg_v1": [
      "Both datasets include stock tickers prefixed with a '$' symbol (e.g., $TSLA, $NVDA).",
      "Headlines frequently reference percentage-based stock price movements (e.g., \"-2%\", \"+4% premarket\").",
      "Analyst actions (e.g., downgrades, upgrades, price target revisions) are prominently featured in both datasets.",
      "Earnings reports, including beats/misses on EPS and revenue, are central to headlines in both datasets.",
      "Forward-looking guidance (e.g., \"guidance cut\", \"modest bottom-line guidance\") is a recurring theme.",
      "Market reactions to news (e.g., \"shares fall 7%\", \"trades sideways\") are explicitly stated.",
      "Financial metrics like EPS, revenue, and FFO (Funds From Operations) are consistently cited.",
      "Technical analyst terminology (e.g., \"underweight\", \"overweight\", \"valuation concerns\") is used in both datasets.",
      "External factors impacting stocks (e.g., production issues, competition, regulatory news) are frequently mentioned.",
      "Specific event timelines (e.g., quarterly earnings dates, Fed meetings) are highlighted in both datasets."
    ],
    "qwen2.5-32b_zero-shot_bg_v1": [
      "Both datasets frequently mention stock tickers with percentage changes (e.g., \"-2%\", \"+4%\") to indicate immediate market reactions.",
      "Headlines in both datasets reference analyst actions (e.g., downgrades, upgrades, price target adjustments) as key drivers of stock movements.",
      "Earnings reports (e.g., \"beats/misses on revenue/EPS\") and guidance revisions are central to headlines in both datasets.",
      "Company-specific events (e.g., product launches, partnerships, lawsuits) are highlighted as catalysts for stock volatility in both datasets.",
      "Sector-specific trends (e.g., energy, tech, pharmaceuticals) are frequently discussed to contextualize individual stock movements.",
      "Macroeconomic factors (e.g., Fed policies, inflation, trade data) are cited as influencing broader market sentiment in both datasets.",
      "Regulatory or geopolitical developments (e.g., OPEC decisions, lawsuits, sanctions) are tied to market impacts in headlines from both datasets.",
      "Forward-looking statements (e.g., guidance cuts, growth forecasts, production targets) are emphasized to signal future performance expectations.",
      "Mentions of institutional actors (e.g., Morgan Stanley, Barclays, Goldman Sachs) frame analyst sentiment and credibility in both datasets.",
      "Mixed performance outcomes (e.g., \"beats on revenue but misses on EPS\") are consistently reported to reflect nuanced market reactions."
    ],
    "qwen2.5-7b_few-shot_bg_v1": [
      "Both datasets include headlines referencing stock tickers with price movements (e.g., $TSLA, percentage changes).",
      "Analyst actions (upgrades, downgrades, price target revisions) are central to headlines in both datasets.",
      "Earnings reports (beats/misses) and revenue guidance are frequently mentioned in both.",
      "Headlines cite specific financial institutions (e.g., Barclays, Morgan Stanley, Goldman Sachs) as sources of analysis.",
      "Market sentiment indicators (bullish/bearish outlooks, volatility) are common in both datasets.",
      "External factors (e.g., supply chain issues, regulatory probes, geopolitical tensions) drive headlines in both.",
      "Company-specific operational updates (mergers, product launches, layoffs) are key topics in both datasets.",
      "Macroeconomic data (e.g., GDP, PMI, inflation) and policy decisions (Fed rates) influence headlines in both.",
      "Mixed tenses are used: past events (earnings results) and forward-looking statements (guidance, forecasts).",
      "Sectors covered overlap (tech, healthcare, energy, retail) with granular focus on stock-level impacts."
    ],
    "qwen2.5-7b_zero-shot_bg_train-time-info_v1": [
      "Both datasets include headlines referencing stock ticker symbols (e.g., $TSLA, $NVDA, $PYPL) to identify companies.",
      "Headlines in both datasets frequently mention analyst actions such as price target adjustments, upgrades, or downgrades (e.g., 'Morgan Stanley cuts price target,' 'downgrade to Underperform').",
      "Both datasets highlight earnings reports, revenue results, and guidance revisions (e.g., 'EPS beats/misses,' 'revenue guidance slightly below expectations').",
      "References to pre-market or after-hours stock price movements are present in both datasets (e.g., 'premarket,' 'after hours').",
      "Sector-specific news (e.g., energy, biotech, semiconductors) is a common theme, with mentions of oil prices, clinical trials, or industry trends.",
      "Both datasets emphasize market reactions to external factors like geopolitical events, regulatory probes, or macroeconomic trends (e.g., 'OPEC+ cuts,' 'trade tensions').",
      "Headlines frequently quantify performance using percentages (e.g., '-2%,' '15% gain expected') to describe stock movements or growth projections.",
      "Mentions of institutional investors or firms (e.g., Morgan Stanley, Goldman Sachs, JPMorgan) as sources of analysis or ratings are consistent across both datasets.",
      "Both include forward-looking statements about growth potential, risks, or future performance (e.g., 'strong pipeline,' 'slowing revenue growth concerns').",
      "Use of financial terminology like 'dividend declarations,' 'mixed shelf filings,' and 'ETF outlooks' is prevalent in both datasets."
    ],
    "llama3.3-70b_zero-shot_v1": [
      "Both datasets include headlines referencing specific stock price movements using terms like 'plummet', 'soar', or percentage changes (e.g., '-2%', '+4%').",
      "Headlines in both datasets frequently mention earnings reports, guidance updates, or revenue results (e.g., 'misses on revenue', 'beats expectations').",
      "Company names and ticker symbols (e.g., '$TSLA', 'Apple') are explicitly included to identify market actors.",
      "Both datasets highlight macroeconomic factors influencing markets, such as interest rates, inflation, or GDP growth (e.g., 'Fed\u2019s interest rate decision', 'economic uncertainty').",
      "References to sector-specific performance (e.g., tech, energy, pharmaceuticals) are common across headlines.",
      "Analyst actions, such as upgrades, downgrades, or price target adjustments, are cited as drivers of market activity.",
      "Headlines frequently quantify financial metrics (e.g., 'record $1 billion quarterly revenue', '15% premarket gain').",
      "Both datasets include forward-looking statements about market trends, risks, or growth potential (e.g., 'guidance cut', 'predicts sharp decline').",
      "Regulatory or policy events (e.g., Fed meetings, lawsuits, sanctions) are cited as catalysts for market reactions.",
      "Headlines use concise, event-driven structures focusing on immediate market impacts rather than long-term analysis."
    ],
    "llama3.1-8b_few-shot_bg_test-time-info_v1": [
      "Both datasets include stock ticker symbols prefixed with '$' (e.g., $NVDA, $XLE, $BYND).",
      "Headlines frequently mention stock price movements with explicit percentage changes (e.g., '-2%', '+4%').",
      "Earnings reports (EPS beats/misses), revenue results, and guidance revisions are central to headlines in both datasets.",
      "Analyst actions (upgrades, downgrades, price target changes) are consistently highlighted (e.g., 'downgrade', 'cuts price target').",
      "Company-specific events (mergers, acquisitions, product launches, regulatory updates) drive sentiment (e.g., 'acquires', 'FDA approval').",
      "Sector-specific trends (e.g., energy, tech, biotech) are emphasized, with sector ETFs (e.g., $XLE) referenced in both.",
      "Premarket/post-market trading activity is noted (e.g., 'premarket', 'after hours').",
      "Macroeconomic factors (trade tensions, geopolitical risks, commodity prices) contextualize market movements.",
      "Forward-looking guidance (e.g., 'forecasts', 'expects', 'cuts outlook') is a recurring theme.",
      "Technical trading terms (e.g., 'momentum', 'support/resistance', 'bull flags') appear in both datasets."
    ],
    "qwen2.5-32b_zero-shot_v1": [
      "Headlines frequently mention company earnings reports and their impact on stock performance (e.g., beats/misses).",
      "Stock price movements (e.g., percentages, terms like 'plummet' or 'surge') are explicitly stated in most headlines.",
      "Regulatory or government agency actions (e.g., FDA, Federal Reserve, OPEC) are common contextual drivers.",
      "Healthcare sector news often focuses on drug approvals/clinical trial results affecting stock prices.",
      "Guidance revisions (e.g., cuts/raises) and forward-looking economic forecasts are recurring themes.",
      "Market indices/sectors (e.g., tech, energy) are frequently referenced to contextualize performance.",
      "Analyst actions (upgrades/downgrades) and institutional outlooks are cited as price catalysts.",
      "Macroeconomic factors (e.g., inflation, oil prices, trade tensions) are tied to market movements.",
      "Mentions of mergers, partnerships, or strategic investments appear in both datasets.",
      "Volatility terminology (e.g., 'plummets', 'slides', 'rallies') is consistently used to describe price action."
    ],
    "llama3.3-70b_zero-shot_bg_v1": [
      "Both datasets include stock tickers and percentage changes indicating immediate market reactions (e.g., \"$AAPL -2%\" in A and \"$TSLA shares surge\" in B).",
      "Headlines frequently reference analyst actions such as upgrades, downgrades, or price target adjustments (e.g., \"Goldman Sachs downgrades\" in B and \"Cowen's target lift\" in A).",
      "Earnings reports and guidance (beating/missing estimates) are central themes (e.g., \"beats on revenue\" in A and \"meeting Wall Street expectations\" in B).",
      "Specific financial metrics like EPS, revenue, and sales figures are explicitly mentioned (e.g., \"EPS misses by $0.52\" in A and \"revenue increases\" in B).",
      "Market indices (e.g., Dow Jones, S&P 500) and economic indicators (e.g., GDP, inflation) are cited to contextualize broader trends (e.g., \"oil near $50\" in A and \"Nasdaq Composite\" in B).",
      "Company-specific operational updates (e.g., product launches, partnerships, legal issues) are highlighted (e.g., \"new cannabis license\" in A and \"battery technology\" in B).",
      "Forward-looking statements (guidance, forecasts) from firms or analysts are common (e.g., \"2020 sales growth\" in A and \"Fed rate hike\" in B).",
      "Industry-specific jargon (e.g., \"FFO,\" \"price target,\" \"mixed shelf\") is consistently used across both datasets.",
      "Coverage spans multiple sectors, including tech, energy, healthcare, and consumer goods (e.g., \"NVIDIA\" in A and \"Tesla\" in B).",
      "External factors (e.g., geopolitical events, pandemics, regulations) are cited as market drivers (e.g., \"OPEC+ cuts\" in A and \"China regulatory pressures\" in B)."
    ],
    "qwen2.5-7b_few-shot_bg_test-time-info_v1": [
      "Both datasets include stock tickers prefixed with '$' to denote specific companies or financial instruments.",
      "Headlines frequently reference percentage-based price movements (e.g., '-2%', '+15%') tied to market events.",
      "Analyst actions (e.g., upgrades, downgrades, price target changes) are prominently featured in both datasets.",
      "Earnings reports and financial metrics (e.g., 'beats/misses on revenue', EPS figures) are central themes.",
      "Company-specific news (e.g., product launches, partnerships, regulatory updates) drives sentiment in headlines.",
      "Forward-looking guidance revisions (e.g., 'guidance cut', 'raises outlook') are common in both datasets.",
      "Market indices (e.g., S&P 500) and ETFs (e.g., $SPY, $XLE) are explicitly mentioned in multiple samples.",
      "References to analyst firms (e.g., Morgan Stanley, Goldman Sachs, Credit Suisse) appear consistently.",
      "Sector-specific terminology (e.g., 'oil', 'biotech', 'semiconductors') contextualizes market developments.",
      "Quantitative financial data (e.g., '$1.33 billion', '15 million pounds') is used to substantiate claims."
    ],
    "llama3.3-70b_few-shot_bg_test-time-info_v1": [
      "Both datasets consistently include stock ticker symbols, often prefixed with a currency symbol (e.g., $SRNE, $AAPL).",
      "Headlines frequently reference percentage-based price movements (e.g., '-2%', '+5.1% premarket') to quantify market reactions.",
      "Analyst actions (e.g., upgrades, downgrades, price target adjustments) are central to headlines in both datasets (e.g., 'downgrades $lcii', 'raises price target').",
      "Earnings results and guidance (e.g., 'beats on revenue', 'misses by $0.52') are critical data points in all samples.",
      "Specific financial metrics like EPS, revenue, and FFO (Funds From Operations) are explicitly cited in both datasets.",
      "Forward-looking statements (e.g., 'guidance cut', 'strong growth potential') are prevalent across all headlines.",
      "References to institutional entities (e.g., Morgan Stanley, Barclays, Goldman Sachs) drive sentiment and credibility in both datasets.",
      "Sector diversity (e.g., biotech, energy, tech) is consistently represented in the companies covered.",
      "Event-driven updates (e.g., clinical trials, product launches, regulatory decisions) are granularly detailed in all samples.",
      "External market factors (e.g., oil prices, geopolitical tensions, pandemic impacts) are frequently tied to stock performance in both datasets."
    ],
    "qwen2.5-32b_few-shot_v1": [
      "Headlines frequently mention stock price movements with percentage changes (e.g., \"-2%\" or \"soars 5%\").",
      "Earnings reports (e.g., EPS beats/misses, revenue results) are a central focus across both datasets.",
      "References to central bank actions (e.g., Federal Reserve interest rate decisions) appear consistently.",
      "Sector-specific updates (e.g., oil, renewable energy, tech) drive sentiment in headlines.",
      "Analyst ratings, forecasts, or target adjustments are cited as market catalysts.",
      "Regulatory or legal developments (e.g., lawsuits, sanctions, policy changes) impact sentiment.",
      "Company-specific news (e.g., mergers, product launches, leadership changes) is highlighted.",
      "Forward-looking guidance (e.g., revenue cuts, growth targets) influences market reactions.",
      "Market-wide indices (e.g., S&P 500) and macroeconomic trends (e.g., inflation) are referenced.",
      "Ticker symbols and financial jargon (e.g., \"EPS,\" \"dividend,\" \"FFO\") are used throughout."
    ],
    "llama3.1-8b_zero-shot_bg_test-time-info_v1": [
      "Both datasets use ticker symbols prefixed with '$' to identify companies (e.g., $AAPL, $TSLA).",
      "Headlines frequently mention stock price movements with percentage changes (e.g., '-2%', '+4.2% pre').",
      "Analyst actions (upgrades/downgrades) and price target adjustments are central to headlines in both datasets.",
      "Earnings reports (EPS/revenue beats/misses) and guidance revisions are common focal points.",
      "Company-specific catalysts (e.g., mergers, clinical trial results, partnerships) drive sentiment in both sets.",
      "Regulatory/legal developments (e.g., lawsuits, sanctions, licensing) are frequently cited as market movers.",
      "Sector-specific trends (energy, tech, retail) are highlighted with granular industry terminology.",
      "Forward-looking statements about growth, demand, or economic forecasts appear consistently.",
      "Market indices (e.g., SPY, XLE) and macroeconomic indicators (oil prices, GDP) contextualize stock moves.",
      "Technical trading terms (e.g., 'premarket', 'after hours', 'short', 'bull/bear') are used in both datasets."
    ],
    "llama3.1-8b_zero-shot_bg_v1": [
      "Both datasets include stock ticker symbols (e.g., $AAPL, $TSLA) paired with price movements (e.g., \"-2%\", \"+4%\").",
      "Headlines frequently reference earnings reports, guidance updates, or revenue results (e.g., \"misses on revenue\", \"Q3 Earnings Miss Estimate\").",
      "Analyst actions (e.g., \"downgrades\", \"price target cut\", \"upgrades\") are central to headlines in both datasets.",
      "Specific numerical metrics (e.g., EPS figures, sales percentages, dividend declarations) are consistently highlighted.",
      "Sector-specific developments (e.g., oil prices, pharmaceuticals, tech, EVs) are granularly detailed in both datasets.",
      "Market-moving events (e.g., Fed policy updates, trade data, regulatory decisions) are prominently featured.",
      "Forward-looking statements (e.g., \"guidance cut\", \"growth forecast downgraded\", \"economic uncertainty\") are common themes.",
      "Headlines incorporate direct quotes, analyst commentary, or institutional perspectives (e.g., \"Morgan Stanley says\", \"Goldman Sachs predicts\").",
      "Company-specific operational updates (e.g., product launches, mergers, litigation, leadership changes) are emphasized.",
      "Macroeconomic factors (e.g., inflation, GDP trends, recession risks) contextualize market reactions in both datasets."
    ],
    "llama3.1-8b_zero-shot_bg_train-time-info_v1": [
      "Both datasets include stock tickers prefixed with a dollar symbol (e.g., $TSLA, $AAPL).",
      "Headlines frequently reference percentage-based stock price movements (e.g., -2%, +4%).",
      "Earnings reports (e.g., beats/misses on revenue, EPS) are a central focus in both datasets.",
      "Analyst actions (upgrades, downgrades, price target changes) are prominently featured (e.g., Morgan Stanley, Oppenheimer).",
      "Mentions of company-specific financial metrics (e.g., revenue growth, guidance cuts) are consistent across both datasets.",
      "Market sector focus (e.g., tech, energy, biotech) is granular and explicit in headlines.",
      "Corporate announcements (e.g., partnerships, product launches, regulatory issues) drive sentiment in both datasets.",
      "Macroeconomic factors (e.g., oil prices, interest rates, geopolitical events) influence headline narratives.",
      "Terminology related to trading strategies (e.g., bullish, bearish, mixed shelf) appears in both datasets.",
      "References to ETFs, indices (e.g., SPY, XLF), and market-wide volatility are common."
    ],
    "qwen2.5-32b_zero-shot_bg_train-time-info_v1": [
      "Both datasets include headlines referencing stock tickers using symbols prefixed with '$' (e.g., $AAPL, $GE).",
      "Headlines frequently mention percentage changes in stock prices (e.g., '-2%', 'shares rise 5%').",
      "Analyst actions (e.g., downgrades, upgrades, price target adjustments) are a recurring theme in both datasets.",
      "Earnings reports (e.g., 'EPS beats/misses', 'revenue surprises') are prominently featured in both sets.",
      "Both datasets highlight company-specific financial metrics like revenue, EPS, and guidance revisions.",
      "Sector-specific news (e.g., energy, biotech, retail) is consistently addressed across all samples.",
      "Headlines often reference corporate events such as product launches, partnerships, or regulatory approvals.",
      "Market indices (e.g., S&P 500, Russell 2000) and ETFs (e.g., $SPY, $USO) are mentioned in both datasets.",
      "Forward-looking statements (e.g., 'guidance cut', 'reaffirms full-year outlook') are common to both sets.",
      "Both datasets use abbreviated financial terminology (e.g., 'Q3', 'FY23') and formatting conventions (e.g., hashtags like #MarketScreener)."
    ],
    "llama3.3-70b_few-shot_v1": [
      "Both datasets focus on stock price movements triggered by earnings reports, revenue results, or analyst expectations (e.g., 'Okta -2%' in A and 'Amazon Stock Plummets 10%' in B).",
      "Headlines frequently reference specific companies and ticker symbols (e.g., '$TSLA' in A and 'Tesla Stock Surges' in B).",
      "Earnings guidance misses or beats are emphasized as key drivers of market reactions (e.g., 'Vir Biotechnology EPS misses' in A and 'Tesla Surpasses Earnings Expectations' in B).",
      "Sector-specific trends (e.g., tech, energy, pharmaceuticals) are highlighted in both datasets (e.g., 'NVIDIA GPU-Powered Semi Simulation' in A and 'Tech Stocks Surge' in B).",
      "Macroeconomic factors like Federal Reserve policy, interest rates, and inflation are recurring themes (e.g., 'Fed Keeps Rates Steady' in A and 'Fed Cuts Interest Rates' in B).",
      "Market indices (e.g., Dow Jones, Nasdaq) are frequently cited as performance benchmarks (e.g., 'Downgrades 4/7' in A and 'Dow Jones Plunges 500 Points' in B).",
      "Analyst upgrades/downgrades and price target revisions are common catalysts (e.g., 'Cantor doubled price target' in A and 'Bank of America Upgrades Netflix' in B).",
      "Headlines often quantify financial metrics (e.g., percentages, revenue figures) to contextualize market moves (e.g., 'Myriad Genetics slips 27%' in A and '25% Revenue Increase' in B).",
      "Forward-looking statements about growth, risks, or guidance cuts are prominent (e.g., 'Honeywell says MAX production freeze' in A and 'Analysts Predict Growth for Tesla' in B).",
      "Both datasets blend breaking news with analytical commentary (e.g., 'Cash is king' quotes in A and 'Analysts Forecast Mixed Results' in B)."
    ],
    "qwen2.5-32b_zero-shot_bg_test-time-info_v1": [
      "Both datasets include stock ticker symbols prefixed with a dollar sign (e.g., $AAPL, $TSLA).",
      "Headlines frequently mention earnings results (e.g., beats/misses on EPS/revenue) and guidance revisions.",
      "Analyst actions (upgrades, downgrades, price target changes) are prominently featured in both datasets.",
      "Specific percentage changes in stock prices (e.g., +4%, -5%) are consistently highlighted.",
      "References to sector-specific trends (e.g., energy, biotech, retail) are granular and recurring.",
      "Company-specific catalysts (clinical trials, partnerships, product launches) are emphasized in both sets.",
      "Macroeconomic factors (oil prices, inflation, OPEC decisions) are directly tied to stock movements.",
      "Dividend announcements, capital raises, and financial restructuring are common topics.",
      "Use of standardized financial terminology like \"guidance cut,\" \"FFO,\" and \"mixed shelf offering\" appears in both.",
      "Mentions of regulatory actions, lawsuits, and geopolitical risks (e.g., sanctions, trade data) are present in all samples."
    ],
    "llama3.3-70b_few-shot_bg_train-time-info_v1": [
      "Both datasets include stock ticker symbols (e.g., $XYZ, $ABC) to identify companies.",
      "Headlines frequently reference percentage changes in stock prices (e.g., -2%, +4% premarket).",
      "Analyst actions (downgrades, upgrades, price target adjustments) are a central focus in both datasets.",
      "Earnings reports (e.g., misses, beats) and revenue outcomes are prominently featured.",
      "Specific financial metrics (EPS, revenue) and guidance revisions (cuts, raises) are explicitly mentioned.",
      "Mentions of investment banks and financial institutions (e.g., Barclays, Morgan Stanley, Goldman Sachs) are consistent.",
      "News includes forward-looking statements about company performance (e.g., production targets, growth outlook).",
      "Market reactions (e.g., stock movements premarket, after hours) are highlighted in real-time context.",
      "Sector-specific developments (oil, biotech, tech, retail) drive headlines in both datasets.",
      "Regulatory, legal, or operational challenges (e.g., FDA warnings, lawsuits, supply issues) are common themes."
    ],
    "llama3.1-8b_zero-shot_v1": [
      "Both datasets include headlines with explicit stock price movements (e.g., percentages, premarket/postmarket changes).",
      "Headlines frequently reference company earnings results (e.g., beats/misses on EPS/revenue).",
      "Central bank actions (e.g., Federal Reserve rate decisions, ECB policies) are a recurring theme.",
      "Sector-specific news (e.g., tech, energy, pharmaceuticals) is prominently featured in both datasets.",
      "Market-moving events (e.g., OPEC decisions, trade tensions, geopolitical risks) are highlighted.",
      "Analyst actions (e.g., upgrades/downgrades, price target adjustments) influence headline narratives.",
      "Economic indicators (e.g., GDP forecasts, unemployment data, inflation) are cited as market drivers.",
      "Company-specific catalysts (e.g., product launches, lawsuits, leadership changes) are emphasized.",
      "Mentions of guidance revisions (e.g., cuts, reaffirmations) appear in response to financial performance.",
      "Technical financial terminology (e.g., FFO, dividends, short interest) is consistently used."
    ],
    "qwen2.5-7b_zero-shot_bg_v1": [
      "Both datasets include headlines that reference specific stock tickers using symbols prefixed with '$' (e.g., $TSLA, $AAPL).",
      "Headlines in both datasets frequently mention percentage changes in stock prices (e.g., '-2%', '+4%', 'plunging 10%').",
      "Analyst actions (e.g., downgrades, upgrades, price target adjustments) are central to headlines in both datasets.",
      "Earnings reports (e.g., 'beats/misses on revenue', 'EPS results') are a recurring theme in both datasets.",
      "Company-specific operational events (e.g., production delays, partnerships, regulatory approvals) drive sentiment in headlines across both datasets.",
      "Both datasets emphasize granular financial metrics such as revenue, guidance revisions, and quarterly performance (e.g., 'Q4 earnings', 'FQ2 results').",
      "Market-moving external factors (e.g., OPEC decisions, inflation data, supply chain issues) are cited in headlines from both datasets.",
      "Headlines in both datasets attribute stock price movements to explicit causes (e.g., 'on valuation downgrade', 'due to supply chain challenges').",
      "References to institutional actors (e.g., Morgan Stanley, Barclays, the Federal Reserve) are common in both datasets.",
      "Sector diversity (e.g., tech, energy, pharmaceuticals, automotive) is reflected in headlines across both datasets."
    ],
    "llama3.3-70b_zero-shot_bg_train-time-info_v1": [
      "Both datasets include stock ticker symbols (e.g., $NVDA, $TSLA, $AAPL) to identify companies.",
      "Headlines frequently reference percentage price movements (e.g., \"-2%\", \"+4% pre\", \"slips 27%\").",
      "Earnings reports (e.g., \"Q2 results\", \"preliminary Q1\") and financial metrics (EPS, revenue) are central themes.",
      "Analyst actions (upgrades/downgrades, price target revisions) are explicitly mentioned (e.g., \"J.P. Morgan downgrade\", \"Barclays cools\").",
      "Sector-specific focus on industries like energy (oil), biotech, tech, and consumer goods appears in both datasets.",
      "External market drivers (e.g., OPEC decisions, regulatory probes, COVID-19 impacts) are cited as catalysts.",
      "Forward-looking language (e.g., \"guidance cut\", \"growth potential\", \"awaiting further guidance\") is consistently used.",
      "Headlines incorporate financial jargon like \"FFO misses\", \"mixed shelf filing\", and \"capital raise.\"",
      "Mentions of institutional actors (e.g., Federal Reserve, Morgan Stanley, Goldman Sachs) contextualize market sentiment.",
      "Event-driven updates (e.g., clinical trial results, product launches, mergers) are reported with immediate price reactions."
    ],
    "qwen2.5-7b_few-shot_v1": [
      "Headlines in both datasets frequently mention specific stock tickers or company names alongside price movements or financial metrics.",
      "Both datasets include references to earnings reports (e.g., EPS beats/misses, revenue results) and guidance updates (e.g., cuts, raises).",
      "Regulatory or political impacts on markets (e.g., FBI probes, OPEC decisions, antitrust scrutiny) are common themes.",
      "Industry-specific developments (e.g., semiconductor shortages, oil production, biotech trials) drive headlines in both datasets.",
      "Forward-looking statements (e.g., growth forecasts, Fed rate decisions, trade tensions) are consistently highlighted.",
      "Market sentiment terms like \"surge,\" \"plunge,\" \"slips,\" and \"beats\" are used to describe price action in both sets.",
      "Mentions of macroeconomic factors (e.g., GDP, inflation, interest rates, job growth) appear in both datasets.",
      "Both include headlines about mergers, acquisitions, partnerships, or strategic investments affecting stock performance.",
      "Technical trading patterns (e.g., \"new 52-week highs,\" \"momentum,\" \"resistance levels\") are occasionally referenced.",
      "Headlines frequently use financial jargon like \"FFO,\" \"dividend declarations,\" \"short positions,\" and \"valuation downgrades.\"\n]"
    ],
    "llama3.3-70b_zero-shot_bg_train-time-info_v1": [
      "Headlines reference specific companies or financial instruments using ticker symbols (e.g., $TSLA, $AAPL) or explicit names.",
      "Mentions of stock price movements (e.g., percentage changes like -2%, +4%) or directional trends (e.g., \"surges,\" \"slips\").",
      "Includes analyst actions (e.g., upgrades, downgrades, price target adjustments) from institutions like Barclays, Morgan Stanley, or Oppenheimer.",
      "Discusses earnings results, revenue performance, or EPS metrics (e.g., \"beats/misses expectations\").",
      "Highlights company-specific catalysts (e.g., product launches, clinical trials, mergers, or regulatory decisions).",
      "References forward-looking statements (e.g., guidance revisions, future growth strategies, or macroeconomic forecasts).",
      "Uses sector-specific terminology (e.g., \"oil demand,\" \"cloud computing,\" \"clinical trial results\") to contextualize events.",
      "Balances positive and negative sentiment (e.g., \"strong quarterly earnings\" vs. \"disappointing sales\").",
      "Employs concise, structured phrasing (e.g., \"Company X [action] due to [event]\").",
      "Cites external factors influencing markets (e.g., competition, regulatory changes, geopolitical events)."
    ],
    "qwen2.5-7b_few-shot_bg_train-time-info_v1": [
      "Both datasets include headlines referencing stock tickers prefixed with '$' (e.g., $MGI in A, $RRC in B).",
      "Price movements (e.g., '+4.2% pre' in A, 'Up 15%' in B) are explicitly quantified using percentages.",
      "Analyst actions (upgrades/downgrades) are cited as market catalysts (e.g., 'downgrades 4/7' in A, 'Downgraded by Oppenheimer' in B).",
      "Earnings metrics (EPS/revenue beats/misses) are a recurring focus (e.g., 'EPS misses by $0.52' in A, 'Q4 earnings miss' in B).",
      "Forward-looking guidance adjustments (e.g., 'guidance cut' in A, 'cuts full-year guidance' in B) are prominently featured.",
      "Company-specific developments (e.g., partnerships, investments) drive sentiment (e.g., 'Ripple Makes Investment' in A, 'collaboration with BioTech Innovations' in B).",
      "Sector-specific catalysts (e.g., oil prices in A, semiconductor trends in B) contextualize stock movements.",
      "Regulatory/legal risks (e.g., 'lawsuit' in A, 'fraud allegations' in B) are cited as market risks.",
      "Market indices/ETFs (e.g., 'S&P 500' in A, '$SPY' in B) anchor broader market context.",
      "Analyst firms (e.g., Cowen, Oppenheimer) and data sources (e.g., MarketScreener) are explicitly named in both datasets."
    ],
    "qwen2.5-32b_few-shot_bg_test-time-info_v1": [
      "All headlines reference specific companies, financial instruments, or economic indicators relevant to financial markets.",
      "Each headline includes financial terminology such as earnings (EPS), revenue, guidance, or market sectors.",
      "Mentions of stock price movements (e.g., percentage changes, price targets, or trends) are present in all samples.",
      "Analyst actions (e.g., upgrades, downgrades, price target adjustments) or institutional commentary are a consistent theme.",
      "Headlines focus on corporate events impacting valuations, such as earnings reports, partnerships, or regulatory developments.",
      "Numerical data (e.g., percentages, monetary figures, dates) is integrated to quantify financial performance or projections.",
      "Forward-looking statements (e.g., forecasts, guidance, strategic initiatives) are emphasized across all samples.",
      "References to market indices, ETFs, or sector-specific trends (e.g., energy, tech) are universally included.",
      "Standardized financial event descriptors (e.g., 'beats/misses estimates,' 'cuts guidance') structure the headlines.",
      "All samples are formatted to highlight immediate market impact, such as pre/post-market reactions or investor sentiment."
    ],
    "qwen2.5-32b_few-shot_bg_v1": [
      "Both datasets include headlines referencing stock tickers with symbols (e.g., $AAPL, $TSLA).",
      "Headlines frequently mention earnings reports (e.g., \"beats estimates,\" \"misses expectations\").",
      "Analyst actions (upgrades, downgrades, price target changes) are a recurring theme in both datasets.",
      "Revenue figures and growth metrics are prominently highlighted in headlines.",
      "Stock price movements (e.g., \"-2%,\" \"Shares Plummet\") are explicitly stated.",
      "References to specific fiscal quarters (e.g., Q3, Q4) are common in both datasets.",
      "Headlines frequently compare company performance to analyst expectations.",
      "Sector-specific focus (e.g., tech, energy, pharmaceuticals) is granularly addressed in both datasets.",
      "Guidance revisions (e.g., \"guidance cut,\" \"raises forecast\") are consistently mentioned.",
      "Terminology like \"downgrade,\" \"upgrade,\" and \"maintains\" is standardized across both datasets for analyst actions."
    ],
    "llama3.1-8b_few-shot_bg_train-time-info_v1": [
      "All headlines reference specific financial entities such as companies, stocks, or sectors using tickers (e.g., $TSLA) or explicit names.",
      "Each headline highlights immediate market-moving events (e.g., earnings reports, analyst actions, mergers, or macroeconomic developments).",
      "Analyst actions (upgrades, downgrades, price target changes) are explicitly mentioned in both datasets.",
      "Numerical data (e.g., percentage changes, revenue figures, EPS metrics) are included to quantify financial performance or market reactions.",
      "Forward-looking statements (guidance cuts, growth forecasts, outlook revisions) are a recurring theme in both datasets.",
      "Stock price movements are described using terms like \"slides,\" \"surges,\" or explicit percentage changes (e.g., \"-2%,\" \"+4.5%\").",
      "Headlines emphasize cause-effect relationships (e.g., \"X falls due to Y,\" \"Z rises after A\") to explain market reactions.",
      "Sector-specific terminology (e.g., \"oil prices,\" \"cloud computing,\" \"retail sales\") contextualizes the financial impact.",
      "Regulatory, geopolitical, or macroeconomic factors (e.g., trade tensions, OPEC decisions, interest rates) are cited as drivers of market shifts.",
      "Earnings metrics (EPS beats/misses, revenue comparisons) and quarterly performance are central to the narratives in both datasets."
    ],
    "llama3.3-70b_zero-shot_bg_test-time-info_v1": [
      "Headlines in both datasets prominently feature stock ticker symbols (e.g., $TSLA, $GOOG) alongside price action or corporate updates",
      "Earnings reports and guidance (beats/misses) are central narrative drivers in headlines from both datasets",
      "Analyst rating changes (upgrades/downgrades) and price target adjustments are frequently cited catalysts",
      "Mentions of specific financial metrics (EPS, revenue, net income) appear consistently across both datasets",
      "Sector-specific developments (energy, tech, pharma) are emphasized with industry jargon in both",
      "Regulatory actions (FAA approvals, SEC investigations) and legal developments impact sentiment in both",
      "Corporate strategic moves (partnerships, spinoffs, product launches) are highlighted as key drivers",
      "Market index references (S&P 500, Nasdaq) provide macroeconomic context in both datasets",
      "Mixed outcome scenarios (revenue beat with EPS miss) are commonly described using similar phrasing",
      "Temporal trading context (premarket, after hours) is consistently noted for price movements"
    ],
    "llama3.3-70b_zero-shot_bg_test-time-info_v1": [
      "Both datasets include headlines referencing stock tickers with dollar signs (e.g., $TSLA, $TAP).",
      "Headlines in both datasets frequently mention price movements (e.g., \"-2%\", \"slips 27%\", \"surges\").",
      "Both datasets focus on earnings reports, revenue results, and guidance updates (e.g., \"EPS misses\", \"beats on revenue\").",
      "Analyst actions (e.g., downgrades, upgrades, price target revisions) are central to headlines in both datasets.",
      "Both include references to company-specific events impacting stock performance (e.g., product launches, partnerships, lawsuits).",
      "Sector-specific trends (e.g., energy, tech, healthcare) are highlighted in headlines across both datasets.",
      "Forward-looking statements (e.g., \"guidance cut\", \"await further guidance\") are common in both datasets.",
      "Regulatory or legal developments (e.g., FDA warnings, lawsuits) are cited as market catalysts in both datasets.",
      "Both datasets use concise, numerical-driven language (e.g., \"misses by $0.01\", \"revenue of $1.33B\").",
      "Headlines in both datasets reference external economic factors (e.g., OPEC decisions, inflation, trade data) affecting markets."
    ],
    "qwen2.5-7b_zero-shot_v1": [
      "Both datasets include headlines referencing specific stock price movements (e.g., percentage changes) tied to company-specific news or events.",
      "Headlines frequently mention earnings reports (EPS beats/misses) and revenue results impacting market performance.",
      "Guidance revisions (cuts or upgrades) from companies are highlighted as catalysts for stock reactions in both datasets.",
      "Regulatory, legal, or political developments (e.g., lawsuits, sanctions, antitrust concerns) are cited as market influencers.",
      "Sector-specific trends (e.g., energy, tech, biotech) dominate headlines, with granular updates on industry dynamics.",
      "Central bank policies (e.g., Federal Reserve interest rates, ECB actions) are frequently cited as drivers of market sentiment.",
      "Market indices (e.g., S&P 500, Nasdaq, Dow Jones) are referenced to contextualize broader market movements.",
      "Company-specific operational updates (e.g., mergers, clinical trials, product launches) are linked to stock performance.",
      "Economic indicators (e.g., GDP forecasts, unemployment rates, trade data) are used to explain macroeconomic impacts on markets.",
      "Headlines employ financial jargon (e.g., \"guidance cut,\" \"premarket,\" \"FFO,\" \"EPS\") and sector-specific terminology consistently."
    ]
  },
  "diffs_synth_from_real": {
    "qwen2.5-7b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently specify the analyst/institution behind actions (e.g., 'Oppenheimer', 'Morgan Stanley'), while A often mentions analyst actions generically",
      "Dataset B maintains strict financial focus without political/entertainment tangents present in A (e.g., Trump probes, Taylor Swift mentions)",
      "Dataset B entries always directly connect stock movements to specific analyst actions, while A sometimes cites non-analyst catalysts (e.g., clinical trial results)",
      "Dataset B shows more frequent price target amount disclosures ($50, $55) compared to A's general mentions of target changes",
      "Dataset B uses standardized financial terminology consistently, while A includes casual language/phrases (e.g., 'woke up!!!', 'adding. Big catalyst coming up')",
      "Dataset B maintains neutral tone in earnings reports, while A includes stronger sentiment language (e.g., 'crushes', 'tumbles', 'plunges')",
      "Dataset B focuses exclusively on institutional analyst perspectives, while A includes retail investor perspectives (e.g., 'Love the news', 'Cantore doubled its price target')",
      "Dataset B headlines strictly follow '[Analyst] [Action] on [Ticker] [Rationale]' structure, while A uses varied sentence constructions",
      "Dataset B contains no social media tags/hashtags (#MarketScreener) present in multiple A samples",
      "Dataset B shows higher frequency of ETF coverage ($SPY, $GLD) compared to A's focus on individual equities"
    ],
    "llama3.1-8b_few-shot_v1": [
      "Dataset B headlines emphasize macroeconomic trends (e.g., GDP, inflation, recession) as primary drivers, while A focuses on granular company-specific catalysts (e.g., drug trials, mergers).",
      "B uses formal, structured phrasing (e.g., 'Economic growth slows to 2.1% in Q3') compared to A's casual tone (e.g., '$SINT woke up !!!!').",
      "B frequently references major indices (e.g., NASDAQ, Dow Jones) and broad market movements, whereas A highlights individual stock tickers and premarket/after-hours price changes.",
      "B integrates geopolitical events (e.g., trade wars) into macroeconomic narratives, while A ties them directly to sector/stock impacts (e.g., oil prices, sanctions).",
      "B headlines prioritize forward-looking economic forecasts (e.g., 'analysts warn of recession'), while A emphasizes real-time analyst actions (e.g., 'downgrade', 'target raised').",
      "B features explicit mentions of government policy impacts (e.g., Biden\u2019s stimulus, Fed rate cuts), unlike A\u2019s focus on regulatory outcomes (e.g., lawsuits, recalls).",
      "B avoids non-financial commentary (e.g., celebrity references, jokes) present in A (e.g., 'So Halsey needs a shower').",
      "B standardizes numerical precision for economic metrics (e.g., '6.1% GDP'), while A diversifies metrics (e.g., sales figures, dividend amounts).",
      "B headlines contextualize earnings/results within macroeconomic shifts (e.g., 'Amazon\u2019s Earnings Soar Amid Pandemic Boom'), whereas A isolates earnings as standalone catalysts.",
      "B consistently frames market movements as reactions to systemic risks (e.g., 'global recession fears'), while A attributes volatility to discrete events (e.g., 'guidance cut', 'recalls')."
    ],
    "llama3.1-8b_few-shot_bg_v1": [
      "Dataset B headlines consistently reference specific analyst firms (e.g., Morgan Stanley, Bernstein) in downgrade/upgrade rationales, while A mentions analyst actions generically without naming institutions.",
      "Dataset B emphasizes formal economic indicator releases (e.g., CPI, GDP) and government policy outcomes, whereas A references macroeconomic factors anecdotally without structured data references.",
      "Dataset B headlines include explicit long-term growth projections (e.g., 'next 5 years', '2023 outlook'), while A focuses on immediate catalysts without extended timelines.",
      "Dataset B systematically pairs stock tickers with full company names (e.g., 'NVIDIA (NVDA)'), whereas A uses tickers exclusively or informally (e.g., '$NVDA -').",
      "Dataset B headlines feature granular price target adjustments (e.g., 'cut to $275', 'raised to $350'), while A omits specific numerical targets in favor of percentage movements.",
      "Dataset B prioritizes tech sector giants (e.g., Apple, Microsoft, NVIDIA) across most samples, while A distributes coverage more evenly across biotech, energy, and consumer sectors.",
      "Dataset B incorporates international institutional perspectives (e.g., EU economic losses, UK inflation) as core narrative drivers, whereas A's global references are secondary to company-specific news.",
      "Dataset B uses structured earnings preview/result frameworks ('Analysts Expect...', 'Reports Strong...'), while A employs fragmented commentary with colloquial phrases (e.g., 'slips', 'notches').",
      "Dataset B headlines maintain neutral, institutional language throughout, whereas A includes social media artifacts (hashtags, emojis) and trader slang (e.g., 'CaPre', 'HOD').",
      "Dataset B explicitly ties company developments to sector-wide competition dynamics (e.g., 'rising rivalry', 'slowing demand'), while A presents corporate news as isolated events."
    ],
    "qwen2.5-32b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently include standardized ticker symbols in parentheses (e.g., ($PLAY), ($GM)), while Dataset A uses variable formats (e.g., $TSLA, standalone tickers without parentheses).",
      "All Dataset B samples reference specific calendar dates/times for events (e.g., 'July 15, 2023', '8:30 AM ET Conference Call'), whereas Dataset A uses relative time indicators (e.g., 'premarket', 'after hours') without fixed dates.",
      "Dataset B exclusively maintains formal sentence structure with complete financial reporting language, while Dataset A contains informal elements (e.g., Twitter-style abbreviations, emojis, conversational phrases like 'woke up !!!!').",
      "100% of Dataset B headlines focus on corporate financial activities (earnings, analyst actions, strategic initiatives), while Dataset A includes non-financial content (e.g., political news, celebrity references, shower mentions).",
      "Dataset B uniformly references analyst firms by full institutional names (e.g., Morgan Stanley, Barclays), whereas Dataset A uses casual references (e.g., 'Cantor', 'Rosenblatt') or omits sources.",
      "All Dataset B samples contain explicit forward guidance timelines (e.g., 'Q3 2023', 'Fall 2023'), while Dataset A uses vague future references (e.g., 'coming up very soon', 'new manufacturing facility').",
      "Dataset B headlines consistently quantify analyst actions with both rating changes and specific price targets (e.g., 'cuts price target to $38'), whereas Dataset A often mentions ratings without numerical targets.",
      "100% of Dataset B samples maintain English-only text with standardized financial terminology, contrasting with Dataset A's inclusion of multilingual content (e.g., Chinese characters) and non-standard abbreviations.",
      "Dataset B exclusively uses complete corporate names with industry descriptors (e.g., 'Chesapeake Energy', 'Vertex Pharmaceuticals'), while Dataset A frequently uses colloquial abbreviations (e.g., 'AMD', 'GM').",
      "All Dataset B headlines structure information in institutional reporting format (Company + Action + Quantitative Detail), whereas Dataset A includes social media elements (hashtags, URLs, tweet-style recaps)."
    ],
    "llama3.3-70b_zero-shot_bg_v1": [
      "Dataset B headlines focus predominantly on a narrow set of major tech companies (e.g., NVDA, TSLA, GOOG), while Dataset A covers a broader range of industries and smaller firms.",
      "Dataset B headlines follow a rigid, repetitive template (e.g., '[Analyst Firm] [action] [ticker] - [reason]'), whereas Dataset A uses varied structures and narrative styles.",
      "Dataset B frequently places tickers inline with hyphens (e.g., '$nvda -'), a formatting convention less common in Dataset A.",
      "Dataset B emphasizes analyst actions (downgrades/upgrades) and price target adjustments as standalone events, while Dataset A integrates these actions with broader market drivers like lawsuits, dividends, or product launches.",
      "Dataset B cites the same few institutional analysts (e.g., Morgan Stanley, Barclays) repeatedly, whereas Dataset A references a wider array of sources, including non-analyst entities (e.g., companies, regulators).",
      "Dataset B headlines lack contextual details beyond immediate analyst actions (e.g., no mentions of legal cases, geopolitical events, or sector-specific developments), which are prevalent in Dataset A.",
      "Dataset B uses homogeneous rationale (e.g., 'valuation concerns') across headlines, while Dataset A provides diverse justifications (e.g., sales misses, guidance cuts, clinical trial results).",
      "Dataset B rarely quantifies financial metrics like EPS or revenue misses/beats numerically, whereas Dataset A explicitly includes specific figures (e.g., 'EPS misses by $0.52').",
      "Dataset B avoids non-earnings corporate updates (e.g., mergers, facility expansions) and macroeconomic themes, which are frequently highlighted in Dataset A.",
      "Dataset B omits social media tags, hyperlinks, and colloquial commentary (e.g., 'adding. Big catalyst coming up'), which are present in Dataset A headlines."
    ],
    "llama3.3-70b_few-shot_bg_v1": [
      "Dataset B headlines predominantly focus on a narrow set of large tech companies (e.g., $TSLA, $NVDA, Alphabet) across all samples, while A covers diverse sectors like energy, biotech, retail, and industrials.",
      "All B samples explicitly name specific analyst firms (e.g., Morgan Stanley, Barclays) in their opening phrases, whereas A often omits institutional sources or mentions them less prominently.",
      "B consistently includes both the analyst action (downgrade/upgrade) and the resulting rating tier (e.g., 'underweight', 'overweight') in every relevant headline, while A sometimes mentions only the action without rating specifics.",
      "Price target figures (e.g., '$250', '$3,500') are explicitly stated in most B samples but rarely appear in A's headlines.",
      "B headlines show repetitive, formulaic structures (e.g., '[Firm] [action] [ticker] - [rationale]') across all samples, while A uses more varied sentence constructions.",
      "All B samples about earnings reports emphasize alignment with/misses against analyst expectations (e.g., 'in line with expectations'), whereas A often states raw results without explicit analyst benchmarking.",
      "B exclusively uses lowercase text formatting for non-ticker elements, while A maintains standard capitalization for company names and proper nouns.",
      "External factors cited in B are narrowly tied to tech-specific operational issues (e.g., 'production concerns', 'GPU demand'), whereas A references broader macroeconomic/political events (tariffs, pandemics, regulations).",
      "All B samples about market reactions use generalized descriptors like 'trades sideways' or 'unchanged', while A employs diverse directional verbs (e.g., 'slips', 'rallies', 'notches').",
      "B headlines consistently repeat identical phrases across multiple entries (e.g., 'citing production concerns'), indicating standardized templates, whereas A shows greater lexical variety in equivalent contexts."
    ],
    "qwen2.5-32b_zero-shot_bg_v1": [
      "Dataset B headlines predominantly focus on large-cap technology companies (e.g., Apple, Tesla, Nvidia, Amazon), while Dataset A covers a broader mix of sectors and includes small/mid-cap stocks.",
      "Dataset B consistently ties analyst actions (e.g., upgrades, downgrades) to specific institutional firms (e.g., Barclays, Morgan Stanley) in every relevant headline, whereas Dataset A sometimes omits institutional names.",
      "Dataset B emphasizes forward-looking timeframes with explicit references to quarterly/annual periods (e.g., 'Q2 2023,' '2024'), while Dataset A focuses on immediate or near-term impacts (e.g., 'premarket,' 'after hours').",
      "Dataset A includes informal elements like hashtags (#MarketScreener), social media-style abbreviations ('$TSLA short \ud83d\ude02'), and conversational language, whereas Dataset B maintains formal, structured headlines.",
      "Dataset B headlines frequently provide detailed rationales for analyst actions (e.g., 'citing weaker iPhone demand'), while Dataset A often states actions without explanatory context.",
      "Dataset A references international markets and non-U.S. companies (e.g., Naspers, Contact Energy) more extensively, whereas Dataset B centers on U.S.-traded equities even when covering global firms.",
      "Dataset A incorporates mentions of non-stock financial instruments (e.g., ETFs, options, dividends), while Dataset B exclusively focuses on individual stock performance.",
      "Dataset B headlines prioritize earnings outcomes and analyst price targets as standalone drivers, whereas Dataset A interweaves earnings with regulatory/geopolitical events (e.g., lawsuits, OPEC decisions).",
      "Dataset A includes real-time trading context (e.g., 'premarket,' 'post-market,' 'slides 7% after hours'), while Dataset B omits intraday timing markers in favor of quarterly/annual frameworks.",
      "Dataset B avoids colloquial phrases and opinionated language, maintaining a neutral tone, whereas Dataset A occasionally includes subjective remarks (e.g., 'Love the news,' 'Big catalyst coming up')."
    ],
    "qwen2.5-7b_few-shot_bg_v1": [
      "Dataset B headlines consistently include explicit rationales for analyst actions (e.g., 'citing production delays and battery supply issues') while Dataset A often states analyst actions without detailed justification",
      "Dataset B shows greater focus on large-cap tech stocks (TSLA, AAPL, NVDA, META) as primary subjects compared to Dataset A's broader mix of mid/small caps and diverse sectors",
      "Dataset B headlines emphasize forward-looking corporate guidance and revenue projections more prominently than Dataset A's focus on immediate price reactions",
      "Dataset B uses full company names alongside tickers more frequently (e.g., 'Tesla Inc. (TSLA)') while Dataset A prioritizes ticker-first references",
      "Dataset A contains more casual language and social media-style commentary (e.g., '$SRNE Took some. Love the news') absent in B's formal tone",
      "Dataset B demonstrates tighter focus on institutional analyst actions (upgrades/downgrades) as primary drivers, whereas A includes more diverse catalysts like clinical trials and dividends",
      "Dataset A includes non-corporate financial elements (e.g., 'Native American tribes', 'Federal Reserve policy') that rarely appear in B's company-centric headlines",
      "Dataset B shows recurring pattern of price target figures in headlines (e.g., 'raises target to $400') while A more often mentions percentage movements without specific targets",
      "Dataset A contains more temporal specificity (e.g., 'premarket', 'after hours') in price movement reporting compared to B's general market-time focus",
      "Dataset B headlines frequently reference executive leadership impacts (e.g., 'Elon Musk faces shareholder lawsuit') whereas A focuses more on operational outcomes"
    ],
    "qwen2.5-7b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently mention specific analyst firms (e.g., Morgan Stanley, Oppenheimer, Jefferies) as the primary source of rating actions, whereas Dataset A includes broader institutional references without always naming specific firms.",
      "Headlines in Dataset B uniformly structure sentences around analyst actions (e.g., 'upgrades/downgrades to [rating]') as the central focus, while Dataset A headlines often prioritize company-specific events or market reactions.",
      "Dataset B headlines frequently include explicit price target figures (e.g., 'raises price target to $50'), whereas Dataset A emphasizes percentage-based performance metrics (e.g., '-2%') without specifying numerical targets.",
      "Dataset B uses standardized analyst rating terminology (e.g., 'Outperform,' 'Underperform,' 'Neutral') consistently across all samples, while Dataset A employs more varied descriptors like 'downgrade' or 'cuts' without standardized rating labels.",
      "Headlines in Dataset B lack non-financial or political content (e.g., election updates, legal probes, cultural references) present in Dataset A, maintaining a strict focus on financial analyst actions.",
      "Dataset B headlines omit references to pre-market/after-hours price movements, which are recurrent in Dataset A (e.g., 'premarket,' 'after hours').",
      "Dataset B includes non-English characters (e.g., Chinese text) in some headlines, suggesting multilingual or regional targeting absent in Dataset A.",
      "Headlines in Dataset B avoid informal language, social media-style commentary, or subjective opinions (e.g., 'Love the news') common in Dataset A.",
      "Dataset B emphasizes sector-wide analyst outlooks (e.g., 'semiconductor sector upgraded') more uniformly, while Dataset A highlights granular sector-specific events (e.g., oil price cuts, clinical trials).",
      "Headlines in Dataset B rarely mention external macroeconomic or geopolitical factors (e.g., OPEC+ decisions, trade wars) that frequently contextualize market movements in Dataset A."
    ],
    "llama3.3-70b_zero-shot_v1": [
      "Dataset B headlines focus more broadly on macroeconomic policy decisions (e.g., Federal Reserve rate meetings) as primary market drivers, while A includes granular regulatory/policy impacts like lawsuits, sanctions, and specific operational restrictions",
      "Dataset A contains frequent references to premarket/postmarket price movements and specific trading session impacts (e.g., \"-4% after hours\"), while B focuses solely on general intraday market moves",
      "Dataset A includes detailed financial metric variances (e.g., \"EPS misses by $0.52\") and specific guidance adjustments, while B uses generalized performance descriptions like \"disappointing earnings\"",
      "Dataset B headlines emphasize sector-wide movements (e.g., \"Tech Stocks Plunge\") while A focuses on individual company actions and stock-specific catalysts",
      "Dataset A contains references to specific drug trials, medical devices, and clinical results impacting stocks, while B lacks biotech/pharma-specific drivers",
      "Dataset B uses repetitive structural templates focusing on economic fear/recession narratives, while A demonstrates more varied sentence structures including dividend announcements, M&A activity, and product launches",
      "Dataset A includes detailed references to alternative financial instruments (options activity, FFO metrics, short interest) absent in B's equity-focused headlines",
      "Dataset B headlines emphasize index-level movements (S&P 500, Dow Jones) while A focuses on individual stock price actions and microcap company news",
      "Dataset A contains forward-looking statements tied to specific clinical trial readouts, FDA decisions, and product launches, while B's forward-looking content focuses on macroeconomic forecasts",
      "Dataset A includes mixed-case ticker symbols with social media-style commentary (e.g., \"$SRNE Took some. Love the news.\"), while B maintains formal headline structure without ticker symbols or investor sentiment asides"
    ],
    "llama3.1-8b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently include both the company name and ticker symbol (e.g., 'Teva Pharmaceutical Industries Ltd. ($TEVA)'), while Dataset A often omits company names.",
      "Dataset B headlines explicitly reference analyst firms (e.g., 'Morgan Stanley', 'Barclays') in most samples, whereas Dataset A rarely names specific firms.",
      "Dataset B headlines frequently quantify analyst price target changes with exact figures (e.g., 'cuts price target to $235 from $280'), while Dataset A typically mentions downgrades/upgrades without numerical targets.",
      "Dataset B headlines maintain a strict focus on financial/stock-specific news, whereas Dataset A includes non-financial content (e.g., political updates, celebrity mentions).",
      "Dataset B headlines emphasize forward-looking corporate guidance revisions (e.g., 'expects further headwinds ahead') as primary drivers, while Dataset A mentions guidance more peripherally.",
      "Dataset B headlines structurally prioritize analyst actions (e.g., 'downgrades', 'maintains rating') as lead clauses, whereas Dataset A often leads with price movements.",
      "Dataset B headlines consistently tie stock reactions directly to specific events (e.g., 'stock jumps 4% on product launch'), while Dataset A sometimes decouples price changes from catalysts.",
      "Dataset B headlines include detailed merger/acquisition terms (e.g., 'acquires...for $350M'), whereas Dataset A mentions M&A more briefly without financial specifics.",
      "Dataset B avoids technical trading terminology (e.g., 'bull flags', 'support/resistance') present in Dataset A, focusing instead on fundamental analysis.",
      "Dataset B headlines systematically avoid social media markers (e.g., hashtags, @mentions) that appear in Dataset A samples."
    ],
    "qwen2.5-32b_zero-shot_v1": [
      "Headlines in Dataset B use more generic company references (e.g., 'Tech Giant') rather than explicitly naming specific companies as in Dataset A.",
      "Dataset B headlines often omit precise stock price percentages (e.g., 'Shares Plummet' vs. '-2%') compared to Dataset A\u2019s frequent numerical specificity.",
      "Dataset B emphasizes macroeconomic narratives (e.g., 'recession fears,' 'economic slowdown') more uniformly, while Dataset A includes granular events like clinical trials or dividend declarations.",
      "Dataset A headlines frequently include stock tickers (e.g., '$MRNA') and informal investor commentary, absent in Dataset B\u2019s formalized structure.",
      "Dataset B focuses heavily on sector-wide trends (e.g., 'renewable energy sector surges') rather than individual company actions prevalent in Dataset A.",
      "Dataset A incorporates mixed news types (e.g., legal disputes, cancellations, partnerships) beyond earnings/guidance, whereas Dataset B adheres narrowly to earnings, Fed actions, and policy impacts.",
      "Dataset B headlines lack explicit mentions of pre/post-market trading sessions (e.g., 'premarket') common in Dataset A.",
      "Dataset A includes analyst actions (e.g., 'Cantor doubled its price target') and institutional outlooks as standalone catalysts, which are rare or absent in Dataset B.",
      "Dataset B uses repetitive phrasing (e.g., 'Federal Reserve Announces...') and standardized templates, while Dataset A exhibits greater syntactic diversity.",
      "Dataset A features cross-sector granularity (e.g., energy, biotech, retail), whereas Dataset B disproportionately centers on tech giants and renewable energy themes."
    ],
    "llama3.3-70b_zero-shot_bg_v1": [
      "Dataset B headlines predominantly focus on large-cap tech companies (e.g., Apple, Tesla, Alphabet, Microsoft, Nvidia), while Dataset A includes a broader range of sectors such as energy, healthcare, and consumer goods.",
      "Dataset B heavily emphasizes analyst actions from major financial institutions (e.g., Morgan Stanley, Goldman Sachs, Barclays), whereas Dataset A references a wider variety of firms, including niche analysts and research entities.",
      "Dataset A frequently cites exact financial metric deviations (e.g., 'EPS misses by $0.52'), while Dataset B uses generalized terms like 'beats estimates' without specific numerical precision.",
      "Dataset B consistently references major market indices (e.g., Dow Jones, Nasdaq Composite) for context, while Dataset A more often cites commodity prices (e.g., oil) and granular economic indicators.",
      "Dataset A includes operational updates related to non-tech sectors (e.g., cannabis licenses, pharmaceutical trials), whereas Dataset B focuses on tech-specific updates like electric vehicle production or AI chip demand.",
      "Dataset B headlines follow a formulaic structure (e.g., '[Firm] [action] $TICKER - [reason]'), while Dataset A features more varied phrasing, including non-analyst-driven news (e.g., legal issues, geopolitical events).",
      "Dataset A incorporates external factors like geopolitical tensions (e.g., OPEC+ cuts, China trade data) as primary drivers, while Dataset B ties market movements to corporate earnings and analyst sentiment.",
      "Dataset A highlights niche or smaller companies (e.g., Myriad Genetics, Eidos) alongside large firms, whereas Dataset B exclusively covers well-known, high-profile corporations.",
      "Dataset B emphasizes quarterly earnings reports and price target adjustments as central themes, while Dataset A includes forward-looking guidance cuts/raises with explicit percentages or timelines.",
      "Dataset A integrates non-corporate entities (e.g., governments, regulatory bodies, Native American tribes) into headlines, while Dataset B remains narrowly focused on corporate and analyst actions."
    ],
    "qwen2.5-7b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently specify the analyst firm (e.g., Morgan Stanley, Credit Suisse) driving the action, while Dataset A often omits explicit attribution to specific firms.",
      "Dataset B headlines systematically include explicit price target adjustments (e.g., 'sets $170 price target'), whereas Dataset A rarely mentions numerical price targets.",
      "Dataset B uses standardized, formal language focused on analyst actions and earnings outcomes, while Dataset A includes casual language, slang, and investor commentary (e.g., 'Big catalyst coming up').",
      "Dataset B headlines frequently cite supply chain issues, cost pressures, or macroeconomic factors as rationales for analyst actions, whereas Dataset A emphasizes company-specific events (e.g., product launches) without detailed explanations.",
      "Dataset B headlines explicitly state whether earnings/results 'beat,' 'missed,' or 'met' expectations, while Dataset A often lacks this binary outcome framing.",
      "Dataset A incorporates non-English text (e.g., Chinese characters) and social media elements (hashtags, mentions), while Dataset B avoids these entirely.",
      "Dataset A includes real-time market updates (e.g., 'premarket,' 'after hours') and price movement percentages in most headlines, whereas Dataset B focuses on forward-looking guidance revisions or analyst sentiment.",
      "Dataset B headlines pair company names with tickers consistently (e.g., 'Kirkland's Inc (KSU)'), while Dataset A often uses tickers alone (e.g., '$SRNE').",
      "Dataset A features non-analyst-driven news (e.g., regulatory actions, geopolitical events) affecting sentiment, whereas Dataset B is strictly confined to analyst ratings and financial metrics.",
      "Dataset B emphasizes sector-wide trends (e.g., 'energy supply concerns') as context for stock moves, while Dataset A leans into granular company milestones (e.g., 'nuclear power plant approval')."
    ],
    "llama3.3-70b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently begin with the company name or ticker followed by analyst actions, whereas Dataset A headlines often start with stock price movements or external events.",
      "Dataset B exclusively structures headlines to mention the specific financial institution (e.g., Barclays, Morgan Stanley) driving analyst actions upfront, while Dataset A sometimes omits or buries institutional references.",
      "Dataset A includes non-analyst-driven news (e.g., political events, cultural references, lawsuits) affecting stock sentiment, whereas Dataset B focuses strictly on analyst ratings, earnings, and institutional guidance.",
      "Dataset B headlines frequently specify exact earnings report dates (e.g., 'to report on January 25'), while Dataset A rarely includes explicit timelines for corporate events.",
      "Dataset A uses informal language, hashtags (#NVIDIA), and social media-style commentary (e.g., '$SRNE Took some. Love the news'), which are absent in Dataset B's formal tone.",
      "Dataset B explicitly states whether results meet/beat/miss expectations (e.g., 'revenue in line with analyst expectations') in every earnings-related headline, whereas Dataset A often omits such qualifiers.",
      "Dataset A incorporates geopolitical, environmental, and sector-specific operational updates (e.g., oil production freezes, pipeline risks), while Dataset B ties market reactions solely to analyst actions or earnings metrics.",
      "Dataset B systematically cites valuation concerns (e.g., 'due to supply chain concerns', 'citing slowing orthodontic demand') as reasons for downgrades/upgrades, whereas Dataset A highlights broader external factors (e.g., tariffs, pandemics).",
      "Dataset A includes percentage-based pre/post-market price movements (e.g., '+4.2% pre') in most headlines, while Dataset B rarely specifies trading session timing for price changes.",
      "Dataset B standardizes the mention of price target adjustments (e.g., 'raises price target to $6') across all analyst-related headlines, whereas Dataset A inconsistently includes or omits target figures."
    ],
    "qwen2.5-32b_few-shot_v1": [
      "Dataset A headlines frequently include specific ticker symbols (e.g., $SRNE, $HON) and hashtags (e.g., #MarketScreener), while Dataset B uses generic terms like \"Tech Giant\" and avoids tickers or social media tags.",
      "Dataset A often references premarket/after-hours trading activity (e.g., \"premarket,\" \"after hours\"), whereas Dataset B focuses on standard market hours and broader trends.",
      "Dataset A includes granular financial metrics (e.g., FFO, EPS misses by $0.52) and niche jargon (e.g., \"ADC conditioning agent\"), while Dataset B uses simpler, high-level financial terminology.",
      "Dataset A headlines contain informal language, social media references, and humor (e.g., \"So Halsey needs a shower\"), whereas Dataset B maintains a formal, neutral tone throughout.",
      "Dataset A covers a wider range of sectors (e.g., biotech, oil, real estate) with hyper-specific updates (e.g., clinical trials, pipeline acquisitions), while Dataset B emphasizes broad sectors (e.g., tech, renewable energy) without detailed sub-industry focus.",
      "Dataset A frequently cites analyst firms (e.g., Cowen, BofAML) and proprietary research, while Dataset B refers to analysts generically (e.g., \"top analysts predict\").",
      "Dataset A includes real-time investor commentary (e.g., \"adding. Big catalyst coming up very soon\") and trading alerts, whereas Dataset B avoids first-person perspectives.",
      "Dataset A headlines often mention precise regulatory/legal developments (e.g., lawsuits, pipeline sanctions), while Dataset B discusses regulatory challenges in general terms (e.g., \"new regulatory challenges\").",
      "Dataset A uses fragmented sentence structures, abbreviations (e.g., \"FFO\"), and URL fragments, while Dataset B employs complete, polished sentences without technical truncation.",
      "Dataset A emphasizes immediate price reactions to events (e.g., \"slips 27% premarket\"), while Dataset B focuses on macro-level outcomes (e.g., \"Market Rebounds\") and forward-looking economic trends."
    ],
    "llama3.1-8b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently reference specific analyst firms (e.g., Morgan Stanley, Oppenheimer) in every sample, whereas Dataset A rarely includes analyst firm names.",
      "Dataset B emphasizes explicit price target adjustments (e.g., 'cuts Price Target to $12') in all headlines, while Dataset A focuses more broadly on earnings/results without explicit target figures.",
      "Dataset B headlines uniformly tie stock movements to analyst actions (upgrades/downgrades) as the primary driver, while Dataset A includes diverse catalysts like clinical trials, mergers, or macroeconomic factors.",
      "Dataset B uses structured phrases like 'Q3 Earnings on October 27th' to specify reporting timelines in all samples, whereas Dataset A mentions earnings without consistent date/quarter formatting.",
      "Dataset B headlines frequently quantify growth/revenue projections numerically (e.g., '30% stock surge') across all samples, while Dataset A uses qualitative terms like 'growth potential' or 'under accumulation'.",
      "Dataset B exclusively cites institutional analyst actions (e.g., 'JPMorgan upgrades') as the source of market sentiment, while Dataset A includes non-institutional catalysts like partnerships or regulatory rulings.",
      "Dataset B consistently frames earnings/results in relation to analyst expectations (e.g., 'beats earnings forecast for Q3'), whereas Dataset A often states earnings outcomes independently (e.g., 'misses on revenue').",
      "Dataset B headlines prioritize forward-looking analyst opinions (e.g., 'sees 20% upside') in every sample, while Dataset A balances forward statements with retrospective event summaries (e.g., 'after weak FQ2').",
      "Dataset B uses standardized industry terminology for ratings (e.g., 'Neutral', 'Overweight') universally, whereas Dataset A employs informal terms like 'bull' or 'sympathy play'.",
      "Dataset B headlines structurally anchor every narrative to analyst actions (e.g., 'Downgraded By UBS'), while Dataset A includes non-analyst-driven news like geopolitical events or consumer trends."
    ],
    "llama3.1-8b_zero-shot_bg_v1": [
      "Dataset B headlines predominantly focus on large-cap technology companies (e.g., $AAPL, $TSLA, $NVDA), while Dataset A includes diverse sectors like energy, biotech, and retail.",
      "Dataset B emphasizes explicit numerical price target adjustments (e.g., \"cuts price target to $130\"), whereas Dataset A mentions analyst actions (e.g., \"downgrades\") without specific target figures.",
      "Dataset B frequently references competition as a primary driver of stock movements (e.g., \"rising EV competition\"), while Dataset A rarely ties price changes directly to competitive dynamics.",
      "Dataset B highlights tech-specific growth drivers (e.g., AI, cloud computing, self-driving tech) as forward-looking catalysts, whereas Dataset A focuses on broader operational updates (e.g., product launches).",
      "Dataset B headlines repetitively cite specific investment banks (e.g., Barclays, Morgan Stanley) for analyst actions, unlike Dataset A, which uses generic references (e.g., \"analysts say\").",
      "Dataset B integrates precise quarterly earnings metrics (e.g., \"Q3 Earnings Miss Estimate\") as central triggers, while Dataset A often combines earnings with non-financial factors (e.g., litigation, leadership changes).",
      "Dataset B uses dramatic directional verbs (e.g., \"plummet,\" \"skyrocket\") for price movements, whereas Dataset A employs more neutral terms (e.g., \"slips,\" \"rallies\").",
      "Dataset B explicitly links macroeconomic trends (e.g., global recession, inflation) to company-specific performance, whereas Dataset A treats macro factors as broader contextual backdrops.",
      "Dataset B headlines adopt a formulaic structure (e.g., \"[Ticker] - [Event] [Price Change%]\"), while Dataset A uses varied sentence constructions with embedded commentary.",
      "Dataset B recurrently addresses ad market dynamics (e.g., \"slowing ad growth\") and tech regulatory risks, absent as themes in Dataset A."
    ],
    "llama3.1-8b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently include specific analyst firm names (e.g., Morgan Stanley, Deutsche Bank) in analyst actions, while Dataset A sometimes omits them or uses generic references.",
      "Dataset B emphasizes explicit forward-looking earnings dates (e.g., 'Q1 earnings on April 24'), whereas Dataset A uses vague temporal references (e.g., 'premarket,' 'FQ2').",
      "Dataset A includes non-corporate news (e.g., political events, regulatory probes, social issues), while Dataset B focuses exclusively on corporate/financial metrics and analyst actions.",
      "Dataset B headlines frequently cite precise price target adjustments (e.g., 'PT Trimmed to $116'), while Dataset A rarely specifies numerical targets.",
      "Dataset A references ETFs ($XLE) and broad market indices ($SPY) more frequently than Dataset B, which focuses narrowly on individual equities.",
      "Dataset B uses formal, structured language with standardized earnings terminology, whereas Dataset A incorporates casual phrasing, hashtags, and social media-style commentary.",
      "Dataset A includes dividend declarations (e.g., 'NZ$0.16 dividend'), while Dataset B lacks explicit mentions of dividend-related corporate actions.",
      "Dataset B consistently highlights mid-to-large-cap companies, whereas Dataset A features smaller/micro-cap stocks (e.g., $ACST, $SINT) more prominently.",
      "Dataset B headlines emphasize sector-specific growth narratives (e.g., 'renewable energy pipeline,' 'AI growth potential'), while Dataset A includes broader macroeconomic volatility drivers (e.g., oil prices, interest rates).",
      "Dataset B integrates explicit mentions of fiscal years (e.g., 'FY2024\u2019s quarterly reports'), whereas Dataset A uses generic quarter labels without year specificity."
    ],
    "qwen2.5-32b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently include the full names of financial institutions (e.g., Morgan Stanley, Barclays) when referencing analyst actions, while A only mentions generic 'analysts'",
      "Dataset B specifically references upcoming conferences/events (e.g., CES 2024, JPMorgan Healthcare Conference) as market catalysts, absent in A",
      "Dataset B explicitly states price target figures (e.g., 'cuts price target to $45') in analyst actions, while A only mentions rating changes without specific numbers",
      "Dataset B contains complete company names alongside tickers in most headlines (e.g., 'General Electric ($GE)'), while A primarily uses standalone tickers",
      "Dataset B headlines emphasize strategic business initiatives (e.g., 'new sustainability initiatives', 'market expansion') more prominently than earnings metrics",
      "Dataset B consistently uses proper casing for company names and ETFs (e.g., 'SPDR S&P 500 ETF Trust ($SPY)'), while A maintains lowercase formatting throughout",
      "Dataset B contains explicit references to specific calendar years in future guidance (e.g., '2024', 'FY23'), while A uses relative time references like 'full-year'",
      "Dataset B headlines frequently mention supply chain disruptions as a specific growth challenge, while A references broader operational issues",
      "Dataset B includes detailed partnership announcements (e.g., 'Medtronic partners for new drug delivery system') as standalone news items, unlike A",
      "Dataset B consistently pairs earnings results with explicit guidance commentary (e.g., 'reaffirms FY23 outlook'), while A often reports earnings without forward projections"
    ],
    "llama3.3-70b_few-shot_v1": [
      "Dataset B headlines consistently structure stock movement and cause in the same sentence (e.g., 'Plummets 10% After...'), whereas A often separates these elements or omits explicit causation.",
      "Dataset B focuses predominantly on large-cap, widely recognized companies (e.g., Amazon, Tesla, Microsoft), while A includes smaller or niche firms like Myriad Genetics and InspireMD.",
      "Dataset B headlines use formal, standardized phrases (e.g., 'Quarterly Earnings Report,' 'Monetary Policy Statement'), while A includes casual language, hashtags, and fragmented syntax (e.g., '$SINT woke up !!!!').",
      "Dataset B emphasizes temporal specificity (e.g., 'Q2,' 'Monthly Jobs Report'), whereas A often omits explicit timeframes (e.g., 'guidance cut' without quarter details).",
      "Dataset B headlines frequently quantify stock movements with percentages in every example (e.g., 'Plummets 10%'), while A sometimes uses absolute terms or omits metrics (e.g., 'Uber Declares an End to Growth at All Costs').",
      "Dataset B avoids non-financial news (e.g., political probes, cultural references), while A includes headlines unrelated to earnings or markets (e.g., 'So Halsey needs a shower').",
      "Dataset B headlines prioritize macroeconomic announcements (e.g., Federal Reserve decisions) as standalone topics, whereas A integrates macro themes with company-specific updates.",
      "Dataset B uses dramatic verbs like 'Plummets' and 'Surges' uniformly, while A employs varied, less hyperbolic terms (e.g., 'slips,' 'rallies').",
      "Dataset B avoids premarket/after-hours trading context (e.g., no 'premarket' mentions), while A explicitly references these periods in many headlines.",
      "Dataset B maintains a strict US-centric focus (e.g., Fed policies, Dow Jones), whereas A includes international entities like OPEC, Czech Republic, and South African markets."
    ],
    "qwen2.5-32b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently mention specific analyst firms (e.g., Barclays, Morgan Stanley) by name in their coverage actions, unlike Dataset A which uses generic references to analyst actions.",
      "Dataset B exclusively uses full company names alongside ticker symbols (e.g., 'Sorrento Therapeutics ($SRNE)'), whereas Dataset A often omits full names or uses informal shorthand.",
      "Dataset B emphasizes explicit phrases like 'beats analyst expectations' or 'misses estimates' in earnings reports, while Dataset A uses more varied terminology (e.g., 'guidance cut,' 'disappoint').",
      "Dataset B focuses heavily on quarterly earnings releases (Q2, Q3, Q4) with explicit timeframes, while Dataset A includes non-earnings catalysts like clinical trials or geopolitical events.",
      "Dataset B avoids non-English text and social media-style commentary (e.g., '$SINT woke up !!!!'), which appear frequently in Dataset A.",
      "Dataset B headlines prioritize corporate operational updates (supply chain issues, partnerships) over macroeconomic factors like oil prices or OPEC decisions prevalent in Dataset A.",
      "Dataset B consistently includes forward-looking corporate guidance statements (e.g., 'reaffirms guidance,' 'outlines fiscal 2024 guidance'), whereas Dataset A emphasizes retrospective performance metrics.",
      "Dataset B uses standardized phrases like 'sends shares soaring' or 'stock plunges' for price reactions, while Dataset A employs varied descriptors (e.g., 'slips,' 'notches,' 'sinks').",
      "Dataset B avoids references to regulatory/legal actions unrelated to financial performance (e.g., lawsuits, FBI probes) that appear in Dataset A.",
      "Dataset B maintains formal headline structures with complete sentences, contrasting with Dataset A's frequent use of fragmented alerts and tweet-like syntax (e.g., 'RECAP 12/10 +Pos Comments')."
    ],
    "llama3.3-70b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently specify the analyst firm or investment bank (e.g., Barclays, Morgan Stanley) responsible for the action, while Dataset A sometimes omits institutional attribution.",
      "Dataset B headlines use standardized phrases like 'upgraded to overweight' or 'downgraded to underweight' for analyst actions, whereas Dataset A uses less formalized language (e.g., 'downgrade' without rating terminology).",
      "Dataset B headlines frequently include explicit price target adjustments (e.g., 'price target raised to $65'), whereas Dataset A rarely mentions specific numerical targets beyond percentage changes.",
      "Dataset B headlines focus narrowly on analyst actions (upgrades/downgrades) and earnings guidance, while Dataset A includes broader non-analyst-driven news (e.g., regulatory issues, geopolitical events).",
      "Dataset B headlines systematically include the rationale for analyst actions using 'citing' or 'on' (e.g., 'citing strong gold price outlook'), whereas Dataset A often states outcomes without explicit causal links.",
      "Dataset B headlines prioritize institutional analyst perspectives as the primary driver of stock movements, while Dataset A more often attributes price changes to operational results (e.g., 'chicken sales disappoint').",
      "Dataset B headlines emphasize upcoming earnings report dates and conference calls (e.g., 'to report quarterly earnings on August 9'), whereas Dataset A focuses on retrospective earnings outcomes.",
      "Dataset B uses lowercase formatting for analyst firm names (e.g., 'morgan stanley'), while Dataset A inconsistently capitalizes institutional references.",
      "Dataset B headlines avoid non-financial contextual details (e.g., political spending pledges, cultural events) that appear frequently in Dataset A.",
      "Dataset B maintains a strict structure of [Ticker] - [Action] + [Analyst Firm] + [Rationale], while Dataset A uses more variable sentence structures and narrative styles."
    ],
    "llama3.1-8b_zero-shot_v1": [
      "Dataset A headlines frequently include specific stock ticker symbols (e.g., $SRNE, $MGI) and premarket/postmarket timeframes, while B uses generic market terms (e.g., 'Stock Market', 'Dow').",
      "Dataset B headlines emphasize broad macroeconomic trends (e.g., 'Global Recession Fears', 'GDP Growth') more consistently than A, which focuses on granular company/sector catalysts.",
      "Dataset A contains fragmented/informal language (e.g., 'adding.', 'woke up !!!!') and social media references absent in B's polished, standardized headlines.",
      "Dataset B prioritizes major indices (S&P 500, NASDAQ) and household-name corporations (Apple, Amazon), while A highlights niche biotech/energy firms (Eidos, Enochian Biosciences).",
      "Dataset A references specialized financial instruments (FFO, short interest, option chains) more extensively than B's focus on conventional metrics (EPS, revenue, interest rates).",
      "Dataset B headlines maintain consistent tense/formality (e.g., 'Surpasses Expectations', 'Plummets 5%'), whereas A mixes tenses and includes trader slang (e.g., 'adding', 'catching Gap and Run').",
      "Dataset A contains explicit trader commentary/opinions (e.g., 'Love the news', 'Big catalyst coming up'), while B maintains objective reporting without editorialization.",
      "Dataset B headlines emphasize institutional analyst consensus (e.g., 'Analysts Bullish', 'Experts Predict'), whereas A highlights individual analyst actions/conflicts between firms.",
      "Dataset A includes non-financial content (e.g., political news, celebrity references) mixed with market data, while B maintains strict financial/economic focus.",
      "Dataset B uses standardized percentage formats (e.g., 'Plummets 5%') universally, while A employs varied formatting (e.g., '-2%', '+4.2% pre', 'sinks 2.7%')."
    ],
    "qwen2.5-7b_zero-shot_bg_v1": [
      "Dataset B headlines focus predominantly on large-cap tech companies (e.g., $TSLA, $AAPL, $AMZN) while Dataset A includes diverse sectors like energy, biotech, and retail with less tech concentration",
      "Dataset B contains repetitive mentions of identical companies/events (e.g., 15+ $TSLA downgrades) whereas Dataset A shows greater headline uniqueness across entities",
      "Dataset B includes multilingual headlines (Chinese text) while Dataset A is exclusively English-language",
      "Dataset B headlines follow rigid templating (e.g., '[Firm] [Action] at [Bank]') whereas Dataset A uses varied syntactic structures",
      "Dataset A frequently specifies premarket/postmarket price movements (e.g., '-2% premarket') while B focuses on regular session changes",
      "Dataset A references granular financial instruments (e.g., puts, dividends, FFO metrics) absent in B's earnings/price target focus",
      "Dataset B emphasizes institutional analyst consensus (e.g., 'Barclays cuts targets') while A includes non-analyst drivers like FDA approvals or production delays",
      "Dataset A contains explicit market index references (S&P, Nasdaq) while B discusses indices only generically",
      "Dataset A incorporates non-financial news hooks (political events, celebrity tweets) whereas B remains strictly finance-focused",
      "Dataset A details geopolitical/regulatory drivers (OPEC, trade wars) while B emphasizes company-specific analyst ratings"
    ],
    "llama3.3-70b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines frequently include exact dates (e.g., 'february 27', 'november 15') for upcoming earnings reports or events, while A uses vague time references like 'premarket' or 'awaiting further guidance'.",
      "Dataset B contains placeholder text (e.g., 'your first headline', 'some example', 'your neutral headline here') indicating template-based generation or incomplete entries, absent in A.",
      "Dataset A includes social media elements like hashtags (#MarketScreener), URLs, and tweet-style truncation markers (\u2026), which are completely absent in B.",
      "Dataset B shows formulaic repetition of phrases like 'cites strong quarterly earnings' and 'price target raised/cut,' whereas A uses more varied analyst action descriptions (e.g., 'cools,' 'ramps output,' 'declares dividend').",
      "Dataset A contains headlines with non-financial political/cultural references (e.g., Trump probe, Native American tribes, Taylor Swift) that never appear in B's strictly earnings/analyst-focused content.",
      "Dataset B explicitly mentions conference call scheduling (e.g., 'management to discuss recent financial results') as a recurring element, while A focuses on immediate post-announcement price reactions.",
      "Dataset A includes forward-looking macroeconomic commentary (e.g., 'COVID-19 impacts,' 'OPEC decisions'), whereas B's external drivers are limited to sector-specific competition and demand fluctuations.",
      "Dataset B uses standardized ticker formatting with consistent spacing (e.g., '$NVDA -'), while A exhibits irregular formatting like parenthetical tickers '(NASDAQ:ZGNX)' and embedded URLs.",
      "Dataset A contains incomplete sentence fragments and casual language (e.g., '$SRNE Took some. Love the news.'), whereas B maintains full sentence structure throughout.",
      "Dataset B emphasizes institutional brokerage firms in upgrades/downgrades (e.g., 'Morgan Stanley' appears 18x in samples), while A references more diverse actors (regulators, hedge funds, researchers)."
    ],
    "qwen2.5-7b_few-shot_v1": [
      "Dataset A headlines frequently include specific percentage changes in stock prices (e.g., '-2%', '+4% premarket'), while Dataset B headlines omit granular price movement figures in favor of directional terms like 'surge' or 'plunge'.",
      "Dataset A contains more informal language, slang, or conversational phrases (e.g., 'adding. Big catalyst coming up', 'Took some. Love the news'), whereas Dataset B maintains formal, standardized financial reporting terminology.",
      "Dataset A includes explicit references to premarket/after-hours trading activity (e.g., 'premarket after weak FQ2'), while Dataset B headlines focus exclusively on regular trading session outcomes.",
      "Dataset A incorporates non-financial social/political commentary (e.g., 'Conservatives make modest spending pledges', 'Watchdog faults FBI probe'), whereas Dataset B remains strictly focused on corporate/economic developments.",
      "Dataset A shows frequent use of stock ticker symbols in headlines (e.g., '$MGI', '$TSLA'), while Dataset B predominantly uses full company names.",
      "Dataset B consistently references quarterly earnings periods with standardized labels (e.g., 'Q3', 'Q4'), whereas Dataset A uses inconsistent fiscal period abbreviations (e.g., 'FQ2', 'March 2020 SCE').",
      "Dataset A contains technical trading pattern alerts and options market specifics (e.g., 'Unusual Puts', 'momentum is coming back'), while Dataset B focuses on fundamental business performance metrics.",
      "Dataset B emphasizes sector-wide trends and macroeconomic narratives (e.g., 'global semiconductor shortage', 'supply chain disruptions'), whereas Dataset A prioritizes individual company-specific developments.",
      "Dataset A includes user-generated content markers like hashtags (#NVIDIA) and social media references ('On @MadMoneyOnCNBC'), while Dataset B maintains institutional reporting style without social elements.",
      "Dataset B headlines frequently contain explicit forward timeframes (e.g., 'tomorrow', '2024', 'next meeting'), whereas Dataset A focuses on immediate reactions to recent events without future dating."
    ],
    "llama3.3-70b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently include the specific financial institution (e.g., Barclays, Morgan Stanley) responsible for analyst actions, whereas Dataset A sometimes omits institutional attribution.",
      "Dataset B frequently references scheduled future events (e.g., earnings release dates, investor conferences) as catalysts, while Dataset A focuses on immediate or past events.",
      "Dataset B uses formal analyst rating terminology (e.g., 'overweight,' 'underweight') in every relevant headline, whereas Dataset A often omits specific rating language.",
      "Dataset B headlines frequently note the stock's immediate reaction (e.g., 'shares remain steady,' 'trades flat'), while Dataset A emphasizes premarket/after-hours percentage changes.",
      "Dataset B avoids social media-style elements (e.g., hashtags, informal commentary) present in Dataset A headlines.",
      "Dataset B ties price target adjustments or downgrades to granular competitive or sector-specific trends (e.g., 'rising competition concerns'), while Dataset A cites broader external factors like geopolitical events.",
      "Dataset B places ticker symbols parenthetically after company names (e.g., 'expedia group ($EXPE)'), whereas Dataset A often leads with tickers or embeds them irregularly.",
      "Dataset B includes explicit price target figures (e.g., 'raises price target to $50') in analyst actions, absent in Dataset A.",
      "Dataset B attributes rationale directly to institutions using phrases like 'citing' or 'according to,' while Dataset A states reasons without consistent attribution.",
      "Dataset B headlines contextualize earnings results with qualifiers like 'in line with expectations' or 'mixed quarterly results,' whereas Dataset A uses binary terms like 'beats/misses.'"
    ],
    "qwen2.5-7b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently specify the analyst firm (e.g., Oppenheimer, Citi) in every analyst action mention, while Dataset A sometimes omits explicit naming (e.g., 'Downgrades 4/7').",
      "Dataset B emphasizes precise price target adjustments (e.g., 'Price target cut to $15 from $20') in analyst actions, whereas Dataset A rarely quantifies price targets.",
      "Dataset B headlines frequently pair earnings results with forward-looking analyst guidance (e.g., 'Guides Lower for Q2'), while Dataset A often states earnings metrics without explicit linkage to guidance changes.",
      "Dataset B uses standardized phrases like 'initiates coverage,' 'maintains hold,' or 'reiterates Buy' to describe analyst actions, whereas Dataset A employs less formalized language (e.g., 'downgrades 4/7').",
      "Dataset B consistently ties sector-specific catalysts to named analyst perspectives (e.g., 'citing mixed signals in the semiconductor industry'), while Dataset A cites sector trends (e.g., oil prices) without direct analyst attribution.",
      "Dataset B headlines explicitly state credit rating changes (e.g., 'Moody\u2019s lowers credit rating'), whereas Dataset A references legal/regulatory risks (e.g., 'lawsuit') without credit agency actions.",
      "Dataset B includes collaborative developments framed as strategic business moves (e.g., 'partner to develop RNAi therapies'), while Dataset A highlights partnerships as standalone news (e.g., 'Ripple Makes Investment').",
      "Dataset B structures headlines with a ticker-first format (e.g., '$RRC - Range Resources Corp...'), whereas Dataset A often leads with company names or non-ticker identifiers (e.g., 'Myriad Genetics slips...').",
      "Dataset B integrates ETF/market index references into analytical predictions (e.g., 'predict stable performance for $SPY'), while Dataset A anchors indices as contextual markers (e.g., 'S&P cuts growth forecast').",
      "Dataset B emphasizes sequential analyst sentiment changes (e.g., 'downgrade [...] for third consecutive quarter'), while Dataset A lacks explicit tracking of recurring analyst stance revisions."
    ],
    "qwen2.5-32b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently use complete sentences and formal financial terminology, while Dataset A includes fragmented phrases, casual language, and internet slang (e.g., '$SRNE Took some. Love the news').",
      "All Dataset B samples explicitly name the analyst firm or institution driving the action (e.g., 'Goldman Sachs Downgrades'), whereas Dataset A often omits specific sources (e.g., 'Downgrades 4/7: $AAN...').",
      "Dataset B headlines focus narrowly on corporate financial metrics and analyst actions without diversions, while Dataset A includes non-financial contextual elements (e.g., political news like 'Conservatives make spending pledges').",
      "Dataset B universally structures headlines with standardized financial event sequencing (Entity \u2192 Action \u2192 Impact), unlike Dataset A's variable formats that sometimes prioritize price movements first (e.g., 'Okta -2%...').",
      "All Dataset B samples avoid social media conventions like hashtags (#NVIDIA) or @mentions seen consistently in Dataset A.",
      "Dataset B headlines emphasize quarterly results and forward guidance as primary drivers, while Dataset A frequently references immediate market mechanics (e.g., 'premarket', 'after hours').",
      "Dataset B exclusively uses conventional ticker notation ($NVDA), whereas Dataset A includes non-standard annotations (e.g., '+4.2% pre' in '$MGI (+4.2% pre)').",
      "All Dataset B samples maintain neutral tone in reporting analyst actions, while Dataset A incorporates speculative language (e.g., 'Big catalyst coming up', 'under accumulation').",
      "Dataset B headlines avoid cultural/pop-culture references entirely, unlike Dataset A's inclusions (e.g., Taylor Swift, Marvel comics).",
      "Dataset B consistently ties supply chain developments to financial outcomes, while Dataset A more frequently cites macroeconomic/political factors (e.g., 'Saudi-Russian oil price war')."
    ],
    "qwen2.5-32b_few-shot_bg_v1": [
      "Dataset B headlines consistently reference specific analyst firms (e.g., Morgan Stanley, Barclays) in their rationale, while Dataset A rarely names institutions.",
      "Dataset B focuses predominantly on large-cap tech companies (e.g., Apple, Tesla, Amazon), whereas Dataset A covers a broader sector diversity including energy, biotech, and retail.",
      "Dataset B headlines systematically pair stock tickers with full company names (e.g., '$AAPL - Apple'), while Dataset A often uses tickers alone or with partial identifiers.",
      "Dataset B emphasizes explicit price target numbers and percentage adjustments in analyst actions, while Dataset A typically mentions rating changes without specific target values.",
      "Dataset B maintains formal grammatical structure and complete sentences, whereas Dataset A includes fragmented phrases and social media-style annotations (e.g., 'adding. Big catalyst coming up').",
      "Dataset B consistently links analyst actions to specific fundamental drivers (e.g., 'supply chain woes', 'ad revenue concerns'), while Dataset A often states rating changes without explanatory context.",
      "Dataset B shows temporal focus on quarterly earnings cycles (Q3/Q4) and forward guidance, while Dataset A includes non-earnings catalysts like drug trial results and geopolitical events.",
      "Dataset B exclusively uses English-language content, whereas Dataset A contains multilingual elements (e.g., Chinese characters in analyst notes).",
      "Dataset B demonstrates standardized formatting for analyst actions ('[Firm] [Verb] [Action] on [Stock]'), while Dataset A varies in action description phrasing.",
      "Dataset B emphasizes institutional analyst perspectives as primary news drivers, while Dataset A incorporates retail investor commentary and non-professional market observations."
    ],
    "llama3.1-8b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently specify analyst/investment firms (e.g., Morgan Stanley, Deutsche Bank) when referencing rating changes, while A only mentions analyst actions generically",
      "B systematically quantifies price target adjustments with exact dollar figures (e.g., 'raises target to $160'), whereas A focuses on percentage movements without explicit target prices",
      "B emphasizes full fiscal year guidance updates (e.g., 'raises full-year outlook') as standalone narrative elements, while A embeds guidance changes within event-driven contexts",
      "B demonstrates formulaic headline structures prioritizing institutional actors ('[Firm] [action] [ticker] [rationale]'), whereas A uses more varied syntactic patterns",
      "B shows heightened focus on sequential quarterly performance comparisons (Q2 vs Q3) rather than A's emphasis on immediate pre/post-market reactions",
      "B incorporates explicit rating terminology ('underweight', 'outperform') in 92% of analyst action mentions vs 0% in A",
      "B contextualizes earnings within strategic business decisions (acquisitions, partnerships), while A ties earnings directly to stock price movements",
      "B features recurring mentions of sector-specific growth prospects (e.g., 'gaming content demand', 'datacenter boom'), whereas A emphasizes sector-specific risk factors",
      "B maintains formal register throughout, contrasting with A's occasional informal elements (e.g., 'So Halsey needs a shower', tweet-style commentary)",
      "B demonstrates systematic tracking of institutional price target revisions across multiple firms, while A focuses on single-analyst impact events"
    ],
    "llama3.3-70b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently include both company names and ticker symbols (e.g., 'novartis $NVS'), while Dataset A frequently uses tickers alone without explicit company names",
      "Dataset B headlines emphasize standardized analyst action phrasing ('cuts price target,' 'upgrades to overweight'), while Dataset A uses more varied stock movement descriptors ('slips,' 'sinks,' 'rallies')",
      "Dataset B exclusively focuses on corporate/financial developments, whereas Dataset A intermittently includes political/social news unrelated to markets (e.g., FBI probes, election impacts)",
      "Dataset B systematically references major financial institutions driving analysis (Barclays/Morgan Stanley in 90%+ samples), while Dataset A cites diverse sources including hedge funds and non-institutional analysts",
      "Dataset B headlines maintain formal structure without social media tags/commentary, contrasting with Dataset A's occasional hashtags (#NVIDIA) and conversational asides ('Cash is trash?')",
      "Dataset B consistently specifies future event timelines ('to report earnings on August 10'), while Dataset A focuses on immediate price reactions without forward-looking dates",
      "Dataset B prioritizes institutional rationale phrasing ('citing debt concerns,' 'due to competition'), whereas Dataset A states simpler catalysts ('after weak FQ2')",
      "Dataset B shows ticker symbol consistency (all caps $SYMBOL), unlike Dataset A's mix of uppercase/lowercase tickers ($MGI vs $goog)",
      "Dataset B headlines avoid truncated text/ellipses common in Dataset A ('Saudis Slash Oil Prices...') by using complete, self-contained statements",
      "Dataset B maintains uniform earnings call/reporting language ('releases quarterly earnings report'), while Dataset A varies terminology ('FFO misses,' 'EPS beats')"
    ],
    "llama3.3-70b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently specify the financial institution (e.g., Barclays, Morgan Stanley) behind analyst actions, while Dataset A often omits institutional attribution.",
      "Headlines in Dataset B follow a strict template: '[Company] [Action] by [Institution] due to [Reason]', whereas Dataset A uses more varied sentence structures.",
      "Dataset B exclusively focuses on analyst actions (upgrades/downgrades) and earnings reports, while Dataset A includes non-analyst-driven news (e.g., lawsuits, product launches, macroeconomic trends).",
      "Dataset B headlines systematically include precise earnings report dates (e.g., 'q2 earnings report on August 10'), whereas Dataset A rarely provides event timelines.",
      "Dataset A contains informal language and social media elements (e.g., hashtags, '!!!', 'woke up'), while Dataset B maintains formal, standardized financial reporting terminology.",
      "Dataset B emphasizes forward-looking analyst justifications (e.g., 'citing strong quarterly earnings growth'), whereas Dataset A more frequently states factual outcomes without rationale.",
      "Company names in Dataset B are always spelled out fully before ticker mentions (e.g., 'molson coors beverage company faces challenges as $TAP'), unlike Dataset A's frequent ticker-first references.",
      "Dataset B consistently uses lowercase formatting for full company names outside tickers, while Dataset A employs title case for proper nouns.",
      "Dataset A includes non-equity market references (e.g., cryptocurrencies, commodities like oil), whereas Dataset B strictly focuses on corporate equity analysis.",
      "Dataset B headlines universally pair analyst actions with explicit price target/rating changes, while Dataset A sometimes mentions downgrades/upgrades without numeric targets."
    ],
    "qwen2.5-7b_zero-shot_v1": [
      "Dataset B headlines use broader sector-level focus (e.g., 'Tech Sector') rather than granular company-specific tickers/symbols prevalent in A",
      "Dataset B headlines lack premarket/after-hours trading references and specific timing indicators present in A (e.g., 'premarket', 'postmarket')",
      "Dataset B headlines omit specific percentage change figures (+/- XX%) that are consistently quantified in A's price movement descriptions",
      "Dataset B headlines use generalized action verbs ('plunge', 'surge') rather than A's mix of financial jargon and casual language ('slips', 'rallies', 'woke up')",
      "Dataset B headlines avoid references to specific financial metrics (FFO, EPS beats/misses by $X.XX) that are detailed in A",
      "Dataset B headlines maintain formal sentence structure vs A's frequent use of social media-style fragments and hashtags (#MarketScreener)",
      "Dataset B headlines emphasize macroeconomic trends over operational specifics (fewer mentions of clinical trials/mergers compared to A)",
      "Dataset B headlines lack user-generated content markers present in A (e.g., 'CaPre (TRILOGY 1 and 2)', emojis, trader commentary)",
      "Dataset B headlines show consistent focus on quarterly earnings periodicity vs A's mix of earnings, dividends, and real-time event impacts",
      "Dataset B headlines avoid micro-updates on individual stock catalysts (e.g., 'new cannabis license') in favor of sector-wide trend analysis"
    ]
  },
  "diffs_real_from_synth": {
    "qwen2.5-7b_zero-shot_bg_test-time-info_v1": [
      "Dataset B includes headlines without stock tickers (e.g., 'Okta -2% on valuation downgrade'), while Dataset A consistently uses '$'-prefixed tickers in all entries.",
      "Dataset B contains non-financial news (e.g., political updates like 'Conservatives make very modest spending pledges'), whereas Dataset A focuses exclusively on financial/analyst-driven content.",
      "Dataset B incorporates social media-style commentary (e.g., '$SINT woke up !!!! New HOD') and hashtags (#NVIDIA #Stock), which are absent in Dataset A's formal headlines.",
      "Dataset B includes international/non-English company names (e.g., 'Karrie International', 'Season Pacific'), while Dataset A predominantly features U.S.-centric firms and ETFs.",
      "Dataset B references non-institutional analysts/sources (e.g., '@MadMoneyOnCNBC', 'Bloomberg's Taylor Riggs'), whereas Dataset A exclusively cites established banks/research firms.",
      "Dataset B features headlines about dividends (e.g., 'Contact Energy declares NZ$0.16 dividend'), absent in Dataset A's earnings/price-target-focused entries.",
      "Dataset B includes legal/regulatory actions unrelated to markets (e.g., 'New York hits Juul with a lawsuit'), while Dataset A ties regulatory mentions directly to financial impacts.",
      "Dataset B uses technical trading jargon (e.g., 'pivot 50c', 'bull flags breaking out'), whereas Dataset A focuses on institutional analyst terminology.",
      "Dataset B contains macroeconomic commentary without stock links (e.g., 'Revised Mexican data paints...stagnant economy'), unlike Dataset A's company-specific economic analysis.",
      "Dataset B includes non-company-specific index/currency forecasts (e.g., 'AUD/USD Weekly Price Forecast'), while Dataset A maintains equity/fund-specific focus."
    ],
    "llama3.1-8b_few-shot_v1": [
      "Dataset B headlines frequently use stock ticker symbols prefixed with '$' (e.g., $MGI, $SRNE), while Dataset A uses company names or parenthetical tickers (e.g., 'Apple Inc. (AAPL)').",
      "Dataset B includes explicit references to premarket/after-hours trading sessions (e.g., 'premarket', 'after hours'), which are absent in Dataset A.",
      "Dataset B incorporates social media elements like hashtags (#NVIDIA) and mentions (@username), whereas Dataset A headlines are formal and lack these features.",
      "Dataset B often structures headlines with fragmented, concise formats (e.g., 'Okta -2% on valuation downgrade'), while Dataset A uses complete sentences with contextual explanations.",
      "Dataset B includes precise numerical deltas for financial metrics (e.g., 'EPS misses by $0.52'), whereas Dataset A mentions beats/misses without exact figures.",
      "Dataset B features non-traditional content types like trading recaps, analyst commentary, and live updates (e.g., 'RECAP 12/10 Unusual Puts'), which Dataset A lacks.",
      "Dataset B headlines occasionally include conversational phrases or informal language (e.g., 'Anyone catching any of this Gap and Run?'), unlike Dataset A's formal tone.",
      "Dataset B references specific financial instruments (e.g., options, futures) and technical terms (e.g., 'FFO misses'), while Dataset A focuses on broader market terms.",
      "Dataset B headlines frequently start with percentage price changes (e.g., 'Eidos up 13%...') followed by catalysts, whereas Dataset A integrates movements within context.",
      "Dataset B includes direct mentions of analyst firms and price target adjustments (e.g., 'target raised to $97'), while Dataset A generalizes analyst actions."
    ],
    "llama3.1-8b_few-shot_bg_v1": [
      "Dataset B headlines use informal language, abbreviations, and emojis (e.g., '\ud83d\ude02', '...'), while A maintains formal, complete sentences.",
      "Dataset B includes direct social media references (e.g., hashtags, @mentions, tweet-style commentary), absent in A.",
      "Dataset B headlines frequently omit institutional sources (e.g., Morgan Stanley, Barclays) common in A.",
      "Dataset B emphasizes intraday trading actions (e.g., 'premarket', 'after-hours') more explicitly and repetitively than A.",
      "Dataset B includes niche or lesser-known companies (e.g., Myriad Genetics, Eidos) more often than A, which focuses on large-cap stocks.",
      "Dataset B headlines reference specific financial instruments (e.g., puts, options, 'FFO misses') not explicitly detailed in A.",
      "Dataset B features fragmented or incomplete sentences (e.g., 'So Halsey needs a shower'), whereas A uses structured news phrasing.",
      "Dataset B includes user-generated or crowdsourced data (e.g., 'RECAP 12/10 Unusual Puts', 'Big catalyst coming up very soon').",
      "Dataset B headlines often lack macroeconomic context (e.g., GDP, Fed policy) prevalent in A, focusing instead on immediate price reactions.",
      "Dataset B contains colloquial trading jargon (e.g., 'bull flags', 'momentum', 'sympathy play') more frequently than A."
    ],
    "qwen2.5-32b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines often omit specific event times/dates (e.g., conference calls at specific times), while Dataset A consistently includes them",
      "Dataset B contains political/regulatory headlines unrelated to specific corporations (e.g., spending pledges, FBI probes), which are absent in Dataset A",
      "Dataset B includes social media-style formatting (hashtags, ellipses, tweets) and casual language not found in Dataset A's formal corporate communications",
      "Dataset B features currency/commodity pair analysis (e.g., AUD/USD, oil forecasts) absent from Dataset A's company-specific focus",
      "Dataset B contains cultural references (music performances, comic books) mixed with financial content, unlike Dataset A's pure corporate news",
      "Dataset B includes ETF/index-level commentary (e.g., $XLE, S&P cuts) while Dataset A focuses exclusively on individual company stocks",
      "Dataset B uses emojis and text message abbreviations (e.g., \ud83e\udd23) not present in Dataset A's professional tone",
      "Dataset B contains philosophical investment advice (e.g., diversification benefits) absent from Dataset A's factual reporting",
      "Dataset B includes non-equity instruments (mortgage REITs, crypto analysis) beyond Dataset A's stock-centric focus",
      "Dataset B features real-time trading commentary (e.g., \"new HOD\", \"volume Alert\") unlike Dataset A's post-event earnings analysis"
    ],
    "llama3.3-70b_zero-shot_bg_v1": [
      "Dataset B includes headlines with informal, social media-like language and user-generated content (e.g., hashtags, casual phrases like \"Love the news\"), absent in A.",
      "Dataset B covers broader geopolitical, regulatory, or non-financial events (e.g., lawsuits, political pledges) influencing markets, whereas A focuses strictly on company-specific financial metrics and analyst actions.",
      "Dataset B references commodities (e.g., oil prices), currencies (e.g., AUD/USD), and macroeconomic indicators (e.g., GDP forecasts), while A emphasizes individual equities and institutional analyst ratings.",
      "Dataset B includes fragmented or incomplete sentences (e.g., \"$ACST adding. Big catalyst coming up...\") and tweet-style formatting, unlike A\u2019s structured, full-sentence headlines.",
      "Dataset B highlights non-corporate entities (e.g., governments, NGOs) as market drivers (e.g., \"Saudis Slash Oil Prices\"), whereas A centers on institutional analyst firms (e.g., Morgan Stanley, Barclays).",
      "Dataset B contains headlines about non-earnings corporate actions (e.g., dividend declarations, mergers, license grants) not tied to analyst ratings, unlike A\u2019s focus on earnings reports and guidance.",
      "Dataset B features international or non-U.S. market developments (e.g., Czech Republic data, OPEC+ cuts), while A predominantly covers U.S.-centric companies and analysts.",
      "Dataset B includes qualitative market sentiment phrases (e.g., \"Cash is king!\") and opinion-driven statements, whereas A emphasizes quantified metrics (e.g., EPS figures, price targets).",
      "Dataset B references niche financial instruments (e.g., options, futures) and trading strategies (e.g., \"unusual puts\"), absent in A\u2019s equity-centric focus.",
      "Dataset B mentions retail investor activities (e.g., \"momentum is coming back\") and speculative trading, while A focuses on institutional analyst actions and formal earnings guidance."
    ],
    "llama3.3-70b_few-shot_bg_v1": [
      "Dataset B includes headlines mentioning dividend declarations (e.g., 'Contact Energy declares NZ$0.16 dividend'), which are absent in Dataset A.",
      "Dataset B contains headlines focused on geopolitical or regulatory actions (e.g., 'Saudis Slash Oil Prices in Asia as Virus Causes Demand Shock'), whereas Dataset A focuses on analyst actions and earnings.",
      "Dataset B frequently references non-corporate entities (e.g., governments, OPEC) as primary news drivers, unlike Dataset A, which centers on analyst firms and companies.",
      "Dataset B includes headlines with non-English terms or currencies (e.g., 'NZ$0.16 dividend'), while Dataset A uses only U.S. tickers and English terminology.",
      "Dataset B features consumer product/service news (e.g., 'Uber Declares an End to Growth at All Costs'), whereas Dataset A focuses strictly on financial metrics and analyst ratings.",
      "Dataset B uses hashtags and social media-style annotations (e.g., '#NVIDIA #Stock #MarketScreener'), which are absent in Dataset A.",
      "Dataset B includes headlines about lawsuits or legal disputes (e.g., 'Bayer seeks reversal of $86M Roundup cancer verdict'), unlike Dataset A.",
      "Dataset B covers environmental or social issues impacting stocks (e.g., 'Largest coal plant in the West shuts down'), while Dataset A avoids non-financial narratives.",
      "Dataset B mentions niche financial instruments (e.g., 'FFO misses by $0.01') more frequently than Dataset A, which focuses on EPS/revenue.",
      "Dataset B includes fragmented or incomplete sentences (e.g., 'SINT volume Alert !! Big buy blocks'), whereas Dataset A uses formal, complete sentence structures."
    ],
    "qwen2.5-32b_zero-shot_bg_v1": [
      "Dataset B headlines are shorter and more concise, often omitting detailed contextual explanations (e.g., \"Okta -2% on valuation downgrade\"), while Dataset A uses longer, narrative-style sentences (e.g., \"Analysts at Morgan Stanley cut their price target...\").",
      "Dataset B includes non-corporate news impacting markets (e.g., geopolitical updates like \"Saudis Slash Oil Prices in Asia\"), whereas Dataset A focuses strictly on company-specific or sector-specific financial events.",
      "Dataset B headlines frequently integrate social media tags or platform-specific formatting (e.g., \"#NVIDIA #Stock #MarketScreener\"), while Dataset A avoids such elements.",
      "Dataset B references niche or less prominent institutional actors (e.g., \"SunTrust Robinson Humphrey\") alongside major firms, whereas Dataset A predominantly cites high-profile institutions like Morgan Stanley or Barclays.",
      "Dataset B includes headlines with mixed-case text and irregular punctuation (e.g., \"$SINT volume Alert !!!\"), while Dataset A maintains consistent capitalization and formal punctuation.",
      "Dataset B features granular market data updates (e.g., \"RECAP 12/10 Unusual Puts...\") resembling real-time trading alerts, whereas Dataset A focuses on post-event analysis.",
      "Dataset B highlights dividend declarations and operational milestones (e.g., \"Contact Energy declares NZ$0.16 dividend\"), while Dataset A emphasizes earnings outcomes and guidance revisions.",
      "Dataset B contains headlines with direct quotes or conversational phrases (e.g., \"Cash is trash? No, cash is king!\"), whereas Dataset A uses objective, third-party reporting language.",
      "Dataset B includes non-English characters or symbols only in tickers (e.g., \"$TSLA -2%\"), while Dataset A occasionally integrates multilingual text (e.g., Chinese characters in price target updates).",
      "Dataset B references localized or regional economic impacts (e.g., \"Canada's economy could end up being the big loser...\"), whereas Dataset A emphasizes global macroeconomic trends (e.g., \"Fed policies\")."
    ],
    "qwen2.5-7b_few-shot_bg_v1": [
      "Dataset B headlines are shorter and more concise, often omitting detailed reasoning (e.g., 'Okta -2% on valuation downgrade') compared to Dataset A's explicit mentions of institutional sources and specific factors.",
      "Dataset B includes non-English characters/symbols and unformatted social media tags (e.g., Chinese text, #MarketScreener), while Dataset A maintains standardized English formatting.",
      "Dataset B references niche/non-traditional financial topics (e.g., comic book valuations, celebrity tweets) absent in Dataset A's strictly institutional/operational focus.",
      "Dataset B contains technical trading terminology (e.g., 'Unusual Puts', 'premarket', '52 week highs/lows') not prevalent in Dataset A's analyst-driven narratives.",
      "Dataset B headlines frequently cite cultural/political events (e.g., Trump-Powell meetings, Brexit) without direct financial metrics, unlike Dataset A's explicit ties to market impacts.",
      "Dataset B uses fragmented/informal structures (e.g., 'RECAP' lists, broken URLs, tweet-style commentary) vs. Dataset A's complete-sentence professionalism.",
      "Dataset B includes user-generated content/slang (e.g., 'Love the news', 'Stock has been under accumulation') absent in Dataset A's institutional tone.",
      "Dataset B covers non-equity assets (e.g., cryptocurrencies, oil futures, currencies) more prominently than Dataset A's stock-centric focus.",
      "Dataset B features speculative questions/rhetoric (e.g., 'Which way will the markets be headed?') unlike Dataset A's declarative analyst actions.",
      "Dataset B highlights granular legal/regulatory outcomes (e.g., fines for bribes in Tunisia) without connecting them to broader market sentiment, whereas Dataset A ties probes to stock impacts."
    ],
    "qwen2.5-7b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines more frequently include immediate percentage changes in stock prices at the start (e.g., 'Okta -2%') without always explicitly tying them to analyst actions, while Dataset A emphasizes institutional analyst actions as the primary driver.",
      "Dataset B contains headlines with non-English characters, hashtags (e.g., #MarketScreener), and truncated text (e.g., '\u2026'), suggesting aggregation from social media or real-time feeds, unlike Dataset A's polished news format.",
      "Dataset B includes non-corporate/non-financial entities (e.g., governments, political parties, celebrities) in headlines (e.g., 'Conservatives make modest spending pledges'), whereas Dataset A focuses exclusively on companies, analysts, and investors.",
      "Dataset B references non-equity financial instruments (e.g., forex pairs like AUD/USD, commodities like oil) and macroeconomic trends (e.g., GDP forecasts) more frequently than Dataset A, which centers on equities and sector-specific news.",
      "Dataset B headlines often lack explicit stock ticker symbols (e.g., 'NVIDIA : GPU-Powered Semi Simulation...') or place them inconsistently, unlike Dataset A's systematic ticker-first structure (e.g., '$MRLN - Morgan Stanley...').",
      "Dataset B incorporates cultural/political events (e.g., 'Taylor Swift sings...', 'Trump campaign probe') with indirect market relevance, while Dataset A strictly ties news to corporate performance or analyst metrics.",
      "Dataset B includes fragmented updates (e.g., 'RECAP 12/10 Unusual Puts...') and investor commentary (e.g., 'If you are throwing deep...'), reflecting real-time trading forums, unlike Dataset A's formal analyst-driven narratives.",
      "Dataset B features headlines about litigation, regulatory fines, or geopolitical tensions (e.g., 'New York hits Juul with a lawsuit') as standalone market movers, whereas Dataset A ties external factors to analyst reactions.",
      "Dataset B uses colloquial language and non-technical phrasing (e.g., 'Cash is trash? No, cash is king!'), contrasting with Dataset A's consistent use of formal financial terminology (e.g., 'mixed shelf filings').",
      "Dataset B includes forward-looking statements attributed to vague sources (e.g., 'analysts say') or macroeconomic trends without naming specific firms, whereas Dataset A always cites institutional analysts (e.g., 'Morgan Stanley')."
    ],
    "llama3.3-70b_zero-shot_v1": [
      "Dataset B headlines frequently include precise premarket/after-hours stock price movements (e.g., 'slips 27% premarket'), while A focuses on general session changes without specific trading hour references.",
      "B consistently incorporates exact percentage changes and numerical metrics in headlines (e.g., 'beats by $0.52'), whereas A uses more qualitative descriptors like 'exceeds expectations' without specific figures.",
      "B contains explicit references to clinical trial results and drug development milestones (e.g., 'AG10 in ATTR-CM'), while A focuses on broader sector performance without biomedical specifics.",
      "Dataset B headlines frequently mention mixed shelf filings, equity offerings, and capital raise announcements absent in A's more general corporate updates.",
      "B includes detailed options market activity and specific contract references (e.g., '$AAPL Jan 240 P'), which never appear in A's macro-focused headlines.",
      "Dataset B incorporates direct quotes from analysts and specific price target adjustments (e.g., 'target raised to $97'), while A references analyst actions generally without attribution.",
      "B features granular commodity market updates (e.g., 'Oil held near $50') with specific price levels, whereas A mentions energy sectors without numerical benchmarks.",
      "Dataset B headlines regularly reference shelf registration statements and SEC filings (e.g., 'files for senior notes offering'), unlike A's regulatory mentions which focus on policy decisions.",
      "B contains explicit dividend declarations with exact amounts (e.g., 'NZ$0.16 dividend'), while A discusses shareholder value generically without payout specifics.",
      "Dataset B includes social media markers like hashtags (#MarketScreener) and Twitter-style commentary absent in A's formal news structures."
    ],
    "llama3.1-8b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines often omit company names, relying solely on stock tickers (e.g., 'Okta -2%' vs. A's '$CMD - Carnival Stock Plummets...')",
      "Dataset B includes non-financial political/social events (e.g., 'Conservatives make modest spending pledges', 'Watchdog faults FBI probe') absent in A",
      "Dataset B contains informal social media elements (hashtags, mentions like '#NVIDIA #Stock') unlike A's formal news style",
      "Dataset B features general market commentary/advice (e.g., 'Cash is trash? No, cash is king!') not tied to specific entities, unlike A's company-specific focus",
      "Dataset B includes non-English/non-market phrases (e.g., 'So Halsey needs a shower', song lyrics) irrelevant to finance, absent in A",
      "Dataset B references legal/geopolitical events without direct corporate ties (e.g., 'New York hits Juul with lawsuit') more frequently than A",
      "Dataset B uses partial sentences/clipped text (e.g., 'OPEC+ Extends Virus Crisis Talks as Russia Resists Oil Cuts...') with ellipses, unlike A's complete headlines",
      "Dataset B includes technical trading recaps (e.g., 'RECAP 12/10 Unusual Puts...') listing option trades, which A avoids",
      "Dataset B shows more international economic focus (e.g., 'Revised Mexican data', 'Saudis Slash Oil Prices in Asia') vs. A's US-centric corporate news",
      "Dataset B contains standalone macroeconomic forecasts (e.g., 'Gold will reach $1,600') without linking to specific stocks/ETFs, unlike A's granular ticker ties"
    ],
    "qwen2.5-32b_zero-shot_v1": [
      "Dataset B headlines frequently include stock ticker symbols (e.g., '$MGI', '$SRNE'), while Dataset A headlines omit them entirely.",
      "Dataset B headlines often reference premarket/after-hours price movements (e.g., 'slips 27% premarket'), whereas Dataset A focuses on regular trading session movements.",
      "Dataset B contains fragmented headlines with hashtags, social media handles (e.g., '#NVIDIA #Stock'), and non-standard formatting, unlike the complete sentences in Dataset A.",
      "Dataset B includes niche clinical trial updates (e.g., 'AG10 in ATTR-CM') and specific drug mechanism mentions, while Dataset A uses generic terms like 'Breakthrough Drug'.",
      "Dataset B headlines frequently cite granular guidance revisions (e.g., 'guidance cut') without broader economic context, whereas Dataset A ties guidance to macroeconomic themes like inflation.",
      "Dataset B incorporates opinion-driven phrases (e.g., 'Love the news', 'Unpopular Opinion') absent in Dataset A's neutral reporting style.",
      "Dataset B features trading-specific terminology (e.g., 'Unusual Puts', 'Momentum', 'HOD') not found in Dataset A's headlines.",
      "Dataset B includes non-financial political/cultural events (e.g., 'Watchdog faults FBI', 'Taylor Swift sings') as market context, unlike Dataset A's strict financial/economic focus.",
      "Dataset B headlines often quantify revenue/earnings misses/beats with exact dollar amounts (e.g., 'EPS misses by $0.52'), while Dataset A uses qualitative descriptors like 'beats expectations'.",
      "Dataset B references obscure/small-cap companies (e.g., 'Eidos', 'Apellis Pharmaceuticals') more frequently than Dataset A's focus on major corporations and sectors."
    ],
    "llama3.3-70b_zero-shot_bg_v1": [
      "Dataset B headlines include specific legal actions, regulatory updates, and lawsuits (e.g., 'New York hits Juul with a lawsuit'), whereas Dataset A focuses on analyst actions without legal details.",
      "Dataset B references non-financial entities (e.g., Netflix, Disney) and cultural events (e.g., 'So Halsey needs a shower'), while Dataset A centers on financial institutions (e.g., Morgan Stanley, Goldman Sachs).",
      "Dataset B contains explicit mentions of options trading and derivatives (e.g., 'RECAP 12/10 Unusual Puts'), which are absent in Dataset A.",
      "Dataset B includes granular operational updates (e.g., drug trial results like 'AG10 in ATTR-CM') and product-specific news, while Dataset A highlights broader company performance metrics.",
      "Dataset B headlines frequently cite social media platforms, hashtags, or URLs (e.g., '@MadMoneyOnCNBC', '#MarketScreener'), unlike the formal tone of Dataset A.",
      "Dataset B covers geopolitical and macroeconomic events with direct country-specific impacts (e.g., 'China delays trade data'), whereas Dataset A contextualizes trends via indices (e.g., Dow Jones).",
      "Dataset B features mixed-sentiment guidance (e.g., 'modest bottom-line guidance' for Twilio), while Dataset A emphasizes clear analyst upgrades/downgrades.",
      "Dataset B includes niche financial terms (e.g., 'mixed shelf', 'FFO') and non-equity instruments (e.g., commodities), whereas Dataset A focuses on EPS, revenue, and stock targets.",
      "Dataset B integrates consumer-facing news (e.g., 'AMC rallies after Frozen 2 weekend'), unlike Dataset A's institutional focus.",
      "Dataset B headlines often reference precise premarket/postmarket price movements (e.g., 'Myriad Genetics slips 27% premarket'), while Dataset A emphasizes intraday trends."
    ],
    "qwen2.5-7b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines frequently include social media elements (e.g., hashtags, @mentions, tweet-style formatting) absent in Dataset A",
      "Dataset B contains explicit references to geopolitical events, legal actions, and regulatory investigations not emphasized in Dataset A",
      "Dataset B headlines incorporate direct quotes, colloquial language, and conversational tones (e.g., \"Cash is trash?\") unlike the formal tone of Dataset A",
      "Dataset B includes non-English characters/unicode in stock commentary (e.g., Chinese text) not observed in Dataset A samples",
      "Dataset B features macroeconomic commentary (e.g., GDP forecasts, currency analysis) as standalone headlines, whereas Dataset A ties economic data strictly to company impacts",
      "Dataset B contains explicit COVID-19 pandemic references and pandemic-related market impacts absent from Dataset A headlines",
      "Dataset B headlines reference cryptocurrency/blockchain developments and technical analysis patterns (e.g., \"bull flags\") not present in Dataset A",
      "Dataset B includes consumer brand/cultural references (e.g., Taylor Swift, Marvel comics) unrelated to financial metrics, unlike Dataset A's strict corporate focus",
      "Dataset B samples show frequent use of urgency indicators like \"premarket\", \"after hours\", and real-time trading alerts less systematically noted in Dataset A",
      "Dataset B contains headlines structured as questions or speculative scenarios (\"Could Oil Markets Take A Hit?\") whereas Dataset A maintains declarative statements"
    ],
    "llama3.3-70b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines frequently omit explicit mentions of institutional analysts/banks driving actions (e.g., 'Okta -2% on valuation downgrade' vs A's 'Barclays upgrades...')",
      "Dataset B contains direct social media formatting elements like hashtags (#NVIDIA) and truncated Twitter-style text ('...') not present in A",
      "Dataset B includes political/non-financial institutional references (elections, FBI, Trump campaign) absent from A's purely financial institutions focus",
      "Dataset B headlines more frequently use technical trading terminology ('bull flags breaking out','resistance level','premarket') vs A's fundamental analysis language",
      "Dataset B shows greater inclusion of macroeconomic policy statements (Fed rate decisions, OPEC+ meetings) as primary drivers vs A's company-specific event focus",
      "Dataset B contains incomplete sentence structures and tweet-style fragments ('RECAP 12/10 Unusual Puts...') unlike A's complete grammatical sentences",
      "Dataset B references non-corporate entities impacting markets (universities, film characters, musicians) that never appear in A's institutional focus",
      "Dataset B includes direct quotes from non-analyst sources (journalists, random traders) absent from A's institutional voice",
      "Dataset B shows frequent use of emojis and informal sentiment markers ('TSLA short \ud83e\udd23') not present in A's formal tone",
      "Dataset B contains explicit references to trading positions/strategies (puts, calls, short interest) missing from A's fundamental analysis focus"
    ],
    "qwen2.5-32b_few-shot_v1": [
      "Dataset B headlines predominantly use ticker symbols (e.g., \"$AAPL\") instead of full company names, while Dataset A uses formal company names (e.g., \"Apple\").",
      "Dataset B includes premarket/after-hours price movements (e.g., \"slips 27% premarket\"), whereas Dataset A focuses on regular trading hours.",
      "Dataset B headlines are fragmented and often lack full sentence structure (e.g., \"$SRNE Took some. Love the news.\"), while Dataset A uses complete, formal sentences.",
      "Dataset B incorporates social media-style elements like hashtags (e.g., \"#NVIDIA #Stock\") and mentions of platforms (e.g., \"MarketScreener\"), absent in Dataset A.",
      "Dataset B references niche financial metrics (e.g., \"FFO misses by $0.01\") and trading-specific terms (e.g., \"unusual puts\"), while Dataset A uses broader terms like \"earnings miss.\"",
      "Dataset B includes direct quotes, conversational language (e.g., \"Cash is trash? No, cash is king!\"), and investor commentary, unlike the neutral tone of Dataset A.",
      "Dataset B features granular trading updates (e.g., \"RECAP 12/10 Unusual Puts\") and equity/debt offerings (e.g., \"files for senior notes\"), whereas Dataset A emphasizes macroeconomic trends.",
      "Dataset B headlines often omit explicit causal explanations (e.g., \"Okta -2% on valuation downgrade\"), while Dataset A provides contextual details (e.g., \"due to supply chain disruptions\").",
      "Dataset B includes mixed content types (e.g., dividend declarations, legal fines, non-financial news impacting stocks), whereas Dataset A focuses strictly on direct financial/economic events.",
      "Dataset B uses abbreviated or shorthand language (e.g., \"EPS beats by $0.01\"), while Dataset A spells out terms fully (e.g., \"earnings per share\")."
    ],
    "llama3.1-8b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines often omit specific analyst firm attributions (e.g., Morgan Stanley, JPMorgan), whereas Dataset A consistently names them.",
      "Dataset B includes non-corporate news (e.g., political events, regulatory watchdog actions) unrelated to direct analyst actions, while Dataset A focuses exclusively on company-specific analyst-driven events.",
      "Dataset B headlines frequently reference macroeconomic indicators (e.g., oil prices, GDP) without linking them to analyst commentary, whereas Dataset A ties such mentions to analyst insights.",
      "Dataset B uses casual, conversational phrases (e.g., 'Love the news,' 'Big catalyst coming up') absent in Dataset A\u2019s formal tone.",
      "Dataset B incorporates social media-style elements (e.g., hashtags like #MarketScreener, tweet-like syntax), while Dataset A maintains a structured, professional format.",
      "Dataset B headlines often lack granular financial metrics (e.g., '$2.8 billion revenue miss') seen in Dataset A, instead using vague terms like 'disappoints' or 'beats estimates.'",
      "Dataset B includes non-equity assets (e.g., currencies like AUD/USD, commodities like crude oil) as market movers, whereas Dataset A focuses solely on equities.",
      "Dataset B features geopolitical or sector-agnostic events (e.g., 'Saudis Slash Oil Prices in Asia') without tying them to specific companies, unlike Dataset A\u2019s company-centric catalysts.",
      "Dataset B headlines occasionally omit ticker symbols entirely (e.g., 'Okta -2%') despite the stated similarity, whereas Dataset A consistently includes both tickers and company names.",
      "Dataset B contains forward-looking statements about non-corporate entities (e.g., 'China delays trade data') without analyst linkage, while Dataset A ties such statements to company performance."
    ],
    "llama3.1-8b_zero-shot_bg_v1": [
      "Dataset B includes headlines referencing non-corporate entities (e.g., political parties, regulatory agencies, or legal actions), whereas A focuses strictly on companies and institutional analysts.",
      "Dataset B frequently omits the '$' prefix in stock tickers (e.g., 'NVIDIA :...' instead of '$NVDA'), while A consistently uses the ticker symbol format.",
      "Dataset B incorporates non-equity assets (e.g., oil prices, currency pairs like AUD/USD, or commodities) in headlines, which are absent in A.",
      "Dataset B contains social media elements (e.g., hashtags, @mentions, or truncated URLs) and informal language, unlike A's polished news headlines.",
      "Dataset B includes casual investor commentary or trading strategies (e.g., 'So Halsey needs a shower,' 'Love the news'), while A maintains a formal tone focused on factual updates.",
      "Dataset B explicitly details legal/regulatory actions (e.g., lawsuits, fines, probes) more frequently than A, which emphasizes analyst actions and earnings.",
      "Dataset B covers international/non-U.S. entities (e.g., European Commission, OPEC, Asian markets) more granularly, whereas A focuses on U.S.-centric or global macro trends.",
      "Dataset B highlights technical financial metrics (e.g., FFO, puts/calls, dividend declarations) specific to niche sectors, while A emphasizes broader metrics like EPS and revenue.",
      "Dataset B includes headlines structured as trading recaps (e.g., 'RECAP 12/10 Unusual Puts') or market mechanics, absent in A.",
      "Dataset B features mixed analyst sentiment within a single headline (e.g., 'beats on revenue but shares fall'), whereas A typically presents a unified analyst perspective per headline."
    ],
    "llama3.1-8b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines frequently omit explicit mentions of specific financial institutions or analyst firms (e.g., Morgan Stanley, Oppenheimer) when describing rating actions or price target changes.",
      "Dataset B includes headlines focused on political, legal, or regulatory events unrelated to corporate strategy (e.g., FBI probe critiques, lawsuits against non-financial entities).",
      "Dataset B contains explicit references to cryptocurrencies and blockchain technologies (e.g., Ethereum, Stellar\u2019s Lumen), unlike Dataset A.",
      "Dataset B headlines often lack granular numerical specifics for earnings or financial metrics (e.g., 'misses on revenue' vs. 'Q4 revenue misses by $0.52').",
      "Dataset B features direct quotes, rhetorical questions, or colloquial language (e.g., 'Cash is trash? No, cash is king!'), absent in Dataset A.",
      "Dataset B includes headlines about non-equity financial instruments like commodities (e.g., oil, gold) and currency pairs (e.g., AUD/USD, EUR/USD).",
      "Dataset B headlines reference social media platforms (e.g., Twitter, CNBC) and user-generated content (e.g., tweets) as primary sources.",
      "Dataset B contains granular macroeconomic forecasts (e.g., 'China lowering growth target to ~6%') without tying them to specific corporate impacts.",
      "Dataset B features headlines about entertainment or cultural events directly impacting stocks (e.g., AMC rallying due to Frozen 2 performance).",
      "Dataset B uses hashtags (e.g., #markets, #finance) and external article references (e.g., 'via @bopinion') within headlines, unlike Dataset A."
    ],
    "qwen2.5-32b_zero-shot_bg_train-time-info_v1": [
      "Dataset B includes headlines with social media-style elements such as hashtags (e.g., #MarketScreener) and informal language (e.g., 'Took some. Love the news'), absent in A.",
      "Headlines in B frequently reference geopolitical events (e.g., 'Saudis Slash Oil Prices in Asia') or regulatory actions (e.g., 'New York hits Juul with a lawsuit'), whereas A focuses on company-specific financial metrics.",
      "Dataset B contains truncated headlines with ellipses (e.g., 'Follow this and any other stock on\u2026'), suggesting excerpts from longer content, unlike A's complete, standalone headlines.",
      "B includes mentions of non-corporate entities (e.g., countries, political figures) and macroeconomic trends (e.g., 'China delays trade data'), while A centers on firms and their financial performance.",
      "Casual or conversational language (e.g., 'So Halsey needs a shower') appears in B but is absent in A, which uses formal, structured financial terminology.",
      "Dataset B references cryptocurrency/blockchain (e.g., 'Major blockchain developer ConsenSys announces job losses'), whereas A lacks crypto-related content.",
      "B features explicit mentions of options/futures (e.g., '$ITCI Jan 6 P') and commodity price forecasts, while A focuses on equities and ETFs.",
      "Headlines in B include user-generated content markers (e.g., 'RECAP 12/10 Unusual Puts') resembling forum posts, unlike A's institutional analyst-driven updates.",
      "B incorporates pop culture or unrelated tangents (e.g., 'Taylor Swift sings...'), diverging from A's strict focus on financial or corporate events.",
      "Dataset B frequently cites third-party media sources (e.g., 'via @bopinion') or external reports, whereas A relies on direct analyst actions or company disclosures."
    ],
    "llama3.3-70b_few-shot_v1": [
      "Dataset B headlines frequently include social media elements like hashtags (#NVIDIA), mentions (@BrianSozzi), and emojis (\ud83d\ude02), which are absent in Dataset A.",
      "Dataset B incorporates non-financial news (e.g., political updates, legal actions) that indirectly impact markets (e.g., 'Watchdog faults FBI in Trump campaign probe'), while Dataset A focuses strictly on direct financial events.",
      "Dataset B uses abbreviated formats and symbols (e.g., '-2% premarket', '$MGI (+4.2% pre)'), whereas Dataset A employs full-sentence structures (e.g., 'Amazon Stock Plummets 10%').",
      "Dataset B includes granular technical trading details (e.g., 'RECAP 12/10 Unusual Puts', 'bull flags breaking out'), while Dataset A emphasizes broader market trends and outcomes.",
      "Dataset B references niche industry developments (e.g., drug trials like 'AG10 in ATTR-CM') and sector-specific jargon, whereas Dataset A highlights general sector trends (e.g., 'tech stocks surge').",
      "Dataset B features international companies and regions (e.g., 'Naspers', 'Czech Republic') more prominently, while Dataset A is predominantly U.S.-centric (e.g., 'Dow Jones', 'Federal Reserve').",
      "Dataset B includes real-time updates and premarket/postmarket price movements (e.g., 'Myriad Genetics slips 27% premarket'), whereas Dataset A focuses on finalized market results.",
      "Dataset B mentions specific financial instruments (e.g., options, dividends, shelf filings) and regulatory actions (e.g., 'Moderna files for mixed shelf'), while Dataset A emphasizes earnings metrics and analyst ratings.",
      "Dataset B integrates editorialized commentary and quotes (e.g., 'Cash is king!'), whereas Dataset A maintains a neutral, reportorial tone in headlines.",
      "Dataset B covers a wider variety of asset classes (e.g., commodities, cryptocurrencies) and macroeconomic themes (e.g., 'OPEC+ oil cuts'), while Dataset A concentrates on equities and Fed policy."
    ],
    "qwen2.5-32b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines frequently include concise stock price movements (e.g., '-2%', '+13%') in the opening phrase, whereas Dataset A typically embeds percentage changes later in the sentence or omits explicit numerical tags.",
      "Dataset B samples often reference premarket/postmarket price action (e.g., 'premarket', 'after hours'), while Dataset A focuses on regular trading hours or omits temporal trading context.",
      "Dataset B includes fragmented social media-style annotations (e.g., hashtags like #NVIDIA, truncated URLs, and tweet-like syntax), whereas Dataset A uses formal headline structures without platform-specific formatting.",
      "Dataset B contains non-equity asset references (e.g., crude oil prices, currency pairs like AUD/USD, and cryptocurrencies), while Dataset A focuses strictly on corporate equities and ETFs.",
      "Dataset B headlines frequently incorporate direct quotes from analysts or executives (e.g., \"Cash is trash? No, cash is king!\") rather than third-party reporting common in Dataset A.",
      "Dataset B includes granular technical analysis terminology (e.g., 'bull flags', 'pivot points', '52-week highs/lows') absent in Dataset A's fundamental-focused narratives.",
      "Dataset B samples show frequent use of parenthetical stock ticker references (e.g., '(NASDAQ:ZGNX)') alongside $ symbols, while Dataset A exclusively uses $ prefixes.",
      "Dataset B contains geopolitical/regulatory developments framed as standalone market forces (e.g., 'Saudis Slash Oil Prices'), whereas Dataset A ties these explicitly to specific ticker impacts.",
      "Dataset B includes retail investor vernacular (e.g., 'adding shares', 'sympathy play', 'under accumulation') absent from Dataset A's institutional tone.",
      "Dataset B features real-time earnings call annotations (e.g., 'preliminary Q1 - SA') and conference presentation alerts, while Dataset A focuses on finalized results and post-event analysis."
    ],
    "llama3.3-70b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines include broader market factors (e.g., political events, regulatory actions) affecting stock movements, while A focuses solely on analyst actions and corporate financial metrics.",
      "Dataset B omits specific financial institutions when citing reasons for stock actions (e.g., \"downgrade\" without naming the firm), whereas A consistently attributes analyst actions to institutions like Barclays or Morgan Stanley.",
      "Dataset B contains social media elements (hashtags, URLs, casual language) absent in A\u2019s structured, institution-focused headlines.",
      "Dataset B highlights real-time percentage changes as primary triggers (e.g., \"-2%\"), while A emphasizes institutional rationale (e.g., \"downgraded due to demand outlook\").",
      "Dataset B covers non-corporate themes (legal issues, geopolitical events, consumer trends) absent in A\u2019s strictly financial/corporate focus.",
      "Dataset B uses informal phrases (e.g., \"slips,\" \"rallies\") for stock movements, whereas A employs standardized terms like \"downgraded\" or \"upgraded.\"",
      "Dataset B references non-equity instruments (commodities, currencies, ETFs), while A focuses exclusively on equities and related metrics.",
      "Dataset B often excludes ticker symbols, using company names alone (e.g., \"Okta\"), unlike A\u2019s consistent ticker-company pairing (e.g., \"$SAEX - saexploration\").",
      "Dataset B includes summaries/lists (e.g., \"RECAP 12/10 Unusual Puts\"), a format absent in A\u2019s individual event-focused headlines.",
      "Dataset B\u2019s forward-looking statements address general market trends (e.g., \"Goldman predicts gold prices\"), while A cites company-specific guidance (e.g., \"production targets\")."
    ],
    "llama3.1-8b_zero-shot_v1": [
      "Dataset B headlines frequently include specific stock ticker symbols (e.g., $MGI, $SRNE) and premarket/postmarket price movements, whereas A focuses on broader indices (e.g., Dow, NASDAQ) and general market trends.",
      "B emphasizes granular financial metrics (e.g., FFO, EPS misses/beats) and company-specific guidance revisions, while A highlights macroeconomic indicators (e.g., GDP, unemployment) as primary drivers.",
      "B incorporates mixed-content headlines blending financial updates with non-financial news (e.g., legal actions, regulatory probes), whereas A maintains a strict focus on financial/economic events.",
      "B uses fragmented, social media-style language (e.g., hashtags, abbreviated phrases like \"+4% pre\"), while A employs formal, complete sentences typical of news articles.",
      "B includes explicit mentions of options trading (e.g., puts, calls) and investor sentiment snippets (e.g., \"Love the news\"), which are absent in A.",
      "B frequently references small-cap or niche companies (e.g., Eidos, Myriad Genetics), whereas A focuses on large, well-known firms (e.g., Apple, Tesla, Amazon).",
      "B highlights dividend declarations and capital raises (e.g., \"Contact Energy declares NZ$0.16 dividend\"), while A prioritizes earnings beats/misses and sector-wide performance.",
      "B features analyst actions (e.g., target price adjustments, upgrades/downgrades) as standalone headlines, whereas A integrates them into broader market narratives.",
      "B includes forward-looking guidance details (e.g., \"2020 adj. currency-neutral EPS down 3%-4%\") with precise numerical thresholds, while A discusses guidance revisions qualitatively (e.g., \"reaffirmations\").",
      "B often combines technical trading terminology (e.g., \"momentum,\" \"pivot points\") with casual language, whereas A uses standardized financial jargon without colloquialisms."
    ],
    "qwen2.5-7b_zero-shot_bg_v1": [
      "Dataset B headlines frequently omit company names, relying solely on stock tickers (e.g., 'Okta -2%' vs. A's '$TSLA - Tesla's stock targets cut')",
      "Dataset B includes headlines with non-corporate/political entities (e.g., 'Conservatives make modest spending pledges', FBI probes) absent in A",
      "Dataset B contains casual language/social media elements (e.g., '$SRNE Took some. Love the news', hashtags) unlike A's formal news tone",
      "Dataset B more frequently cites exact metric deviations (e.g., 'misses by $0.52') while A uses generalized comparisons ('beats estimates')",
      "Dataset B includes non-equity financial instruments/terms (e.g., 'FFO misses', 'senior notes offering') not emphasized in A",
      "Dataset B features explicit premarket/after-hours price moves (e.g., 'Myriad Genetics slips 27% premarket') more than A",
      "Dataset B references non-financial cultural/niche events (e.g., comic book valuations, celebrity tweets) unrelated to A's corporate focus",
      "Dataset B includes raw trading data/order alerts (e.g., 'RECAP 12/10 Unusual Puts: $ITCI Jan 6 P') absent in A",
      "Dataset B headlines more frequently lack explicit attribution to analysts/institutions compared to A's specific citations (e.g., 'Morgan Stanley')",
      "Dataset B covers granular macroeconomic indicators (e.g., 'Building permits rise 5%') rather than A's company-driven economic narratives"
    ],
    "llama3.3-70b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines frequently omit company names, using only ticker symbols or standalone company references (e.g., 'Okta -2%' vs. A's 'Seadrill ($SDRL)').",
      "B includes headlines with non-financial political/social context (e.g., 'Watchdog faults FBI in Trump campaign probe') absent in A's pure business focus.",
      "B contains raw financial data outputs (e.g., 'Vir Biotechnology EPS misses by $0.52') while A contextualizes metrics within analyst narratives.",
      "B uses more specialized/niche financial terms (e.g., 'mixed shelf filing', 'C-band payout') compared to A's conventional earnings vocabulary.",
      "B incorporates social media-style commentary (e.g., '$SRNE Took some. Love the news') and tweet formatting not seen in A's formal headlines.",
      "B includes explicit references to non-equity assets (e.g., 'AUD/USD Weekly Price Forecast') unlike A's exclusive equity focus.",
      "B features event-driven updates tied to cultural phenomena (e.g., 'Frozen 2 weekend') vs. A's strict corporate developments.",
      "B shows macroeconomic guidance as primary drivers (e.g., 'Fed's Williams says... challenges') while A ties outlooks to specific company guidance.",
      "B contains legal/regulatory action mentions (e.g., 'New York hits Juul with lawsuit') as standalone catalysts absent in A's analyst-driven actions.",
      "B uses hashtags and social media markers (e.g., '#SP500 #index #MarketScreener') unlike A's clean financial reporting style."
    ],
    "qwen2.5-7b_few-shot_v1": [
      "Dataset B headlines frequently begin with stock tickers and immediate price changes (e.g., 'Okta -2%'), whereas A uses company names and contextualizes price movements within broader narratives.",
      "B includes explicit premarket/after-hours trading updates (e.g., 'premarket,' 'after hours'), while A focuses on regular trading hours and broader market sessions.",
      "B features fragmented, conversational language (e.g., '$SRNE Took some. Love the news.') and social media-style commentary, unlike A's formal, structured news tone.",
      "B often references specific trading instruments (e.g., options contracts like '$ITCI Jan 6 P'), whereas A rarely mentions derivatives or granular trading details.",
      "B highlights niche or smaller-cap companies (e.g., biotech firms like Eidos) and ETFs, while A emphasizes large-cap tech giants (e.g., Apple, Tesla) and macroeconomic trends.",
      "B includes granular analyst actions (e.g., 'valuation downgrade,' 'target lift') and earnings guidance cuts/raises as primary drivers, while A focuses on earnings results and sector-wide impacts.",
      "B incorporates technical trading jargon (e.g., 'bull flags,' 'resistance levels') more frequently and explicitly than A, which uses broader terms like 'new 52-week highs.'",
      "B contains legal/regulatory actions targeting specific companies (e.g., 'New York hits Juul with a lawsuit'), whereas A discusses regulatory impacts at an industry or policy level.",
      "B features cryptocurrency mentions (e.g., Bitcoin) and blockchain developments, which are absent in A's headlines.",
      "B includes partial sentences, hashtags (e.g., '#MarketScreener'), and non-English characters, suggesting aggregation from live feeds or social platforms, while A uses polished, complete sentences typical of traditional news outlets."
    ],
    "llama3.3-70b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines more frequently omit explicit mention of specific financial institutions (e.g., Barclays, Morgan Stanley) when reporting analyst actions compared to Dataset A, which consistently names institutions.",
      "Dataset B includes headlines with non-financial/political context (e.g., election impacts, legal investigations) absent in Dataset A, which remains strictly market/company-focused.",
      "Dataset B uses abbreviated formatting for percentage changes (e.g., '-2%') without full sentences, while Dataset A consistently employs complete phrasal structures (e.g., 'reports mixed quarterly results').",
      "Dataset B contains social media-style elements like hashtags (#NVIDIA), URLs, and informal commentary not present in Dataset A's institutional tone.",
      "Dataset B includes technical trading terminology (e.g., 'bull flags breaking out', 'HOD') and retail investor slang absent from Dataset A's formal lexicon.",
      "Dataset B headlines frequently mention premarket/postmarket price movements (e.g., '+4.2% pre') while Dataset A focuses on intraday or general trends.",
      "Dataset B incorporates user-generated content and first-person commentary (e.g., 'Love the news', 'adding shares') whereas Dataset A maintains third-person objectivity.",
      "Dataset B shows more frequent use of standalone ticker symbols without company names (e.g., '$SINT') compared to Dataset A's consistent dual identification (company + ticker).",
      "Dataset B includes broader macroeconomic commentary (e.g., GDP forecasts, currency analysis) while Dataset A focuses narrowly on corporate-specific developments.",
      "Dataset B contains explicit references to technical chart patterns/resistance levels (e.g., 'slams into resistance') absent from Dataset A's fundamental analysis focus."
    ],
    "qwen2.5-7b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines occasionally reference multiple stock tickers in a single headline (e.g., \"$XLE $NOV $XEC\"), while Dataset A focuses exclusively on individual tickers per headline.",
      "Dataset B includes macroeconomic or geopolitical context (e.g., \"China delays trade data; S&P cuts growth forecast\"), whereas Dataset A focuses solely on company-specific or sector-driven catalysts.",
      "Dataset B features informal language, social media-style commentary (e.g., \"$SRNE Took some. Love the news.\"), while Dataset A maintains formal, professional phrasing throughout.",
      "Dataset B incorporates hashtags and third-party data source tags (e.g., \"#MarketScreener\"), whereas Dataset A omits such annotations entirely.",
      "Dataset B includes live market updates or recaps (e.g., \"RECAP 12/10 Unusual Puts\"), while Dataset A headlines are standalone news items without temporal aggregation.",
      "Dataset B contains non-English characters or phrases (e.g., Chinese text in \"LVGO trades flat...\"), whereas Dataset A headlines are uniformly English-only.",
      "Dataset B uses interrogative or speculative phrasing (e.g., \"Can $MSFT head higher?\"), while Dataset A headlines are strictly declarative.",
      "Dataset B references external media programs or platforms (e.g., \"On @MadMoneyOnCNBC\"), whereas Dataset A avoids mentions of non-analyst entities.",
      "Dataset B includes non-financial event-driven news (e.g., \"Largest coal plant in the West shuts down\"), while Dataset A centers on earnings, guidance, or analyst actions.",
      "Dataset B mentions social media platforms or user engagement metrics (e.g., \"Snapchat usage remains flat\"), whereas Dataset A excludes social media context."
    ],
    "qwen2.5-32b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines frequently include non-financial entities/events impacting markets (e.g., political parties, legal rulings, or geopolitical developments) absent in A",
      "Dataset B samples contain fragmented formatting with social media-style elements (hashtags, ticker callouts, URLs) not seen in A's standardized structure",
      "Dataset B incorporates casual conversational phrases (e.g., 'Love the news', 'Anyone catching...') contrasting with A's formal institutional tone",
      "Dataset B includes direct quotes/tweets and commentary attribution (e.g., 'says @BrianSozzi') while A maintains third-person reporting",
      "Dataset B headlines reference non-corporate market drivers like weather patterns or cultural events (e.g., 'Frozen 2 weekend') unlike A's strict corporate focus",
      "Dataset B shows mixed language entries with untranslated foreign text/characters (e.g., Chinese phrases) absent in A's English-only formatting",
      "Dataset B uses abbreviated stock movement descriptions (e.g., '-2%') without contextual metrics that A consistently provides (e.g., 'cuts target to $25')",
      "Dataset B includes speculative trading alerts/recaps (e.g., 'RECAP 12/10 Unusual Puts') not present in A's event-driven headlines",
      "Dataset B references macroeconomic policy tools (e.g., Fed rates, OPEC+ decisions) as primary drivers more frequently than A's company-specific catalysts",
      "Dataset B contains partial sentence structures and interjected opinions (e.g., 'CaPre (TRILOGY...)') contrasting with A's complete grammatical statements"
    ],
    "qwen2.5-32b_few-shot_bg_v1": [
      "Dataset B headlines frequently include explicit premarket/after-hours stock price movements (e.g., '-2% premarket'), while Dataset A typically describes price changes generically (e.g., 'Shares Plummet').",
      "Dataset B incorporates non-financial news (e.g., political events, legal actions, regulatory updates) alongside financial updates, whereas Dataset A focuses exclusively on corporate financial performance and analyst actions.",
      "Dataset B often omits the source of analyst actions (e.g., 'Downgrades 4/7: $AAN...') while Dataset A consistently names institutions (e.g., 'Morgan Stanley Cuts...').",
      "Dataset B includes granular trading data (e.g., options activity, dividend declarations, technical terms like 'FFO misses') absent in Dataset A's earnings-centric headlines.",
      "Dataset B headlines use truncated text, social media hashtags (e.g., '#MarketScreener'), and informal annotations (e.g., '...'), unlike Dataset A's complete, formal sentences.",
      "Dataset B references a wider variety of asset classes (e.g., oil prices, cryptocurrencies, commodities) compared to Dataset A's narrower focus on equities and corporate metrics.",
      "Dataset B contains real-time updates (e.g., 'Stock Market Live Updates') and event-driven alerts, while Dataset A emphasizes retrospective analysis of earnings or guidance.",
      "Dataset B headlines frequently lack explicit explanations for stock movements (e.g., 'Okta -2%...'), whereas Dataset A consistently cites reasons (e.g., 'due to supply chain issues').",
      "Dataset B includes international economic data (e.g., 'China delays trade data') and geopolitical events more prominently than Dataset A.",
      "Dataset B features technical trading terms (e.g., 'bull flags,' 'momentum,' 'pivot') and investor sentiment phrases absent in Dataset A's institutional tone."
    ],
    "llama3.1-8b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines frequently include social media elements (hashtags, mentions, emojis) absent in A",
      "B contains non-financial/political news (e.g., FBI probes, election impacts) while A remains strictly financial",
      "B uses truncated text indicators (\u2026) suggesting automated scraping/cutting, unlike complete sentences in A",
      "B includes general market commentary/opinions (\"Cash is trash?\") while A focuses on specific corporate actions",
      "B shows live trading updates (premarket/after hours moves) rather than A's post-event analysis format",
      "B incorporates casual language/internet slang (\"woke up!!!\", \"Love the news\") absent in A's formal tone",
      "B references cryptocurrency/blockchain topics (Ethereum, ConsenSys) not present in A's traditional sector focus",
      "B contains multi-ticker recaps/options trades (\"RECAP 12/10 Unusual Puts\") unlike A's single-entity focus",
      "B includes macroeconomic data without corporate ties (GDP reports) vs A's company-specific economic impacts",
      "B features user-generated content/community sentiment (\"$SRNE Took some\") while A uses institutional sources"
    ],
    "llama3.3-70b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines frequently incorporate social media elements (hashtags, mentions, URLs) absent in Dataset A",
      "B includes explicit geopolitical/consumer event impacts (election results, virus outbreaks) while A focuses on corporate-specific catalysts",
      "B contains technical trading terminology (resistance levels, buy blocks, momentum) not prevalent in A's institutional analysis language",
      "Dataset B headlines regularly reference non-corporate economic indicators (GDP, housing starts, PMI) unlike A's earnings-focused metrics",
      "B features direct quotes from executives/analysts and tweet-style commentary missing in A's third-party reporting voice",
      "Dataset B includes dividend declarations and capital structure events (offerings, raises) not emphasized in A's earnings/ratings focus",
      "B shows higher frequency of preliminary/unaudited results announcements compared to A's finalized earnings reports",
      "Dataset B headlines contain more consumer brand/product references (Frozen 2, Netflix cancellations) versus A's pure financial entity focus",
      "B uses real-time trading platform notation (premarket %, halted trading alerts) where A specifies temporal context through full phrases",
      "Dataset B incorporates legal/regulatory outcomes (lawsuits, license decisions) as primary drivers vs A's mentions as secondary factors"
    ],
    "llama3.3-70b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines omit explicit mentions of specific analyst/investment firms (e.g., Morgan Stanley/Barclays) when reporting actions",
      "Dataset B emphasizes immediate price reactions (%) without contextualizing them with forward-looking analyst projections",
      "Dataset B includes non-financial political/regulatory developments (e.g., FBI probes, election impacts) as market movers",
      "Dataset B contains casual conversational phrases and social media slang (e.g., 'Took some. Love the news', 'woke up !!!!')",
      "Dataset B incorporates technical trading terminology (e.g., 'bull flags breaking out', 'HOD', 'pivot')",
      "Dataset B features direct quotes/statements from companies/executives without analyst interpretation (e.g., 'Uber Declares an End to Growth at All Costs')",
      "Dataset B includes hashtags and social media platform references (e.g., #MarketScreener, Twitter mentions)",
      "Dataset B shows more frequent references to macroeconomic commodities (oil/gold prices) as primary price drivers",
      "Dataset B contains multi-stock summaries/recaps (e.g., 'RECAP 12/10 Unusual Puts') rather than single-company focus",
      "Dataset B reports raw earnings metrics (EPS/revenue figures) without contextualization about investor anticipation patterns"
    ],
    "qwen2.5-7b_zero-shot_v1": [
      "Dataset B headlines prominently feature stock ticker symbols and premarket/after-hours price movements (e.g., '-2% premarket'), while A uses full company names without intra-session specificity",
      "B contains fragmented social media-style updates (e.g., hashtags, @mentions, partial sentences) compared to A's complete sentence structures and traditional journalism formatting",
      "B frequently cites exact financial metric variances (e.g., 'misses by $0.52') versus A's generalized references to earnings performance ('miss expectations')",
      "B includes detailed trading instrument references (options puts, mixed shelf offerings) absent in A's broader market commentary",
      "B showcases real-time investor commentary/textspeak (e.g., '$SRNE Took some. Love the news') unlike A's formal third-person reporting",
      "B emphasizes specific analyst actions (target price changes, rating upgrades) as immediate price catalysts, while A focuses on institutional policy changes",
      "B contains technical trading terminology ('bull flags', 'HOD', 'pivot points') absent in A's macroeconomic language",
      "B references niche financial instruments (FFO, ADC conditioning agents) versus A's standardized metrics (EPS, GDP)",
      "B includes international market-specific updates (OPEC+ cuts, Czech statistics) where A focuses on US indices/Fed actions",
      "B features granular corporate actions (patent agreements, clinical trial phases) while A emphasizes sector-wide trends"
    ]
  }
}