{
  "sims": {
    "qwen2.5-7b_zero-shot_bg_train-time-info_v1": [
      "Both datasets consistently include stock tickers prefixed with '$' (e.g., $AAPL, $PYPL) in headlines.",
      "Headlines frequently reference analyst actions like upgrades, downgrades, and price target adjustments.",
      "Numerical data (percentages, price targets, revenue figures) is prominently featured in most entries.",
      "Earnings reports (e.g., Q4 results, EPS beats/misses) are a central focus across all samples.",
      "Market volatility indicators (e.g., 'plunges,' 'surges,' 'downgrades') appear in both datasets.",
      "Specific financial metrics like revenue growth, profit margins, and guidance revisions are consistently cited.",
      "References to sector-specific ETFs (e.g., $XLU, $XLK) and indices (S&P 500, Nasdaq) are common.",
      "Mentions of corporate events (mergers, leadership changes, product launches) drive sentiment analysis.",
      "Forward-looking statements (e.g., '2023 outlook,' 'forecast cuts') are present in all samples.",
      "Regulatory/legal developments (e.g., probes, patent disputes) are cited as market-moving factors."
    ],
    "qwen2.5-32b_zero-shot_bg_test-time-info_v1": [
      "Both datasets include headlines containing stock tickers prefixed with a dollar sign (e.g., $AAPL, $DELL, $COLL).",
      "Earnings reports (e.g., previews, results, beats/misses) are a central focus in both datasets (e.g., 'Align loses patent case' in A, 'Collabria Care Reports Strong Q2 Earnings' in B).",
      "Analyst actions (upgrades, downgrades, price target changes) are prominently featured (e.g., 'UBS Upgrades Constellation Brands' in A, 'Barclays Downgrades Bottling Investors' in B).",
      "Explicit mentions of stock price movements (e.g., 'shares up 3%', 'stock surges') are present in all samples.",
      "References to specific financial metrics (EPS, revenue, guidance) are consistent across both datasets (e.g., 'EPS misses by $0.33' in A, 'beats analyst expectations' in B).",
      "Sector-specific news (e.g., energy, tech, healthcare) is granularly tied to headlines in both datasets (e.g., 'oil plunges' in A, 'XLE down 1.5%' in B).",
      "Merger/acquisition activity and partnerships are highlighted (e.g., 'Charles Schwab to buy TD Ameritrade' in A, 'Apple Announces Acquisition of AR Startup' in B).",
      "Regulatory or legal developments (e.g., patent disputes, probes) are addressed (e.g., 'Align loses patent case' in A, 'regulatory scrutiny over 5G network deal' in B).",
      "Market index performance (e.g., Dow, S&P 500) and ETF/commodity trends are mentioned (e.g., 'Nasdaq up 2.68%' in A, 'SPDR S&P 500 ETF Trust hits new high' in B).",
      "Forward-looking statements (guidance, forecasts, strategic plans) appear in all samples (e.g., '2020 adj. EPS down 3%-4%' in A, 'future growth strategies' in B)."
    ],
    "qwen2.5-7b_zero-shot_bg_test-time-info_v1": [
      "Both datasets include headlines containing stock tickers prefixed with '$' (e.g., $NSPR, $RLGT).",
      "Headlines frequently reference earnings reports, forecasts, or financial results (e.g., 'Q4 2019 Earnings Preview,' 'Q3 Earnings Miss Analyst Expectations').",
      "Analyst actions (upgrades, downgrades, price target changes) are a recurring theme (e.g., 'GE upgraded from Sell,' 'Deutsche Bank cuts to Hold').",
      "Percentage changes in stock prices or financial metrics are explicitly stated (e.g., 'shares up 3%,' 'revenue up 20%').",
      "Sector-specific risks or trends are highlighted (e.g., 'oil price concerns,' 'supply chain disruptions').",
      "Mentions of macroeconomic factors (e.g., trade deals, inflation, GDP forecasts) appear in both datasets.",
      "Regulatory or legal developments impacting companies are noted (e.g., 'loses patent case,' 'downgrade due to supply chain disruptions').",
      "Merger/acquisition activity and strategic partnerships are featured (e.g., 'Charles Schwab to buy TD Ameritrade,' 'restructure Kevzara agreement').",
      "Revenue or EPS beats/misses relative to analyst expectations are quantified (e.g., 'EPS misses by $0.33,' 'beating analyst expectations').",
      "Forward-looking statements about market sentiment (e.g., 'long-term growth outlook,' 'cautious outlook') are common."
    ],
    "llama3.3-70b_zero-shot_bg_v1": [
      "Both datasets include stock tickers prefixed with '$' (e.g., $TSLA in B, $AAPL in A).",
      "Headlines reference specific financial institutions (e.g., Morgan Stanley in B, SunTrust in A).",
      "Analyst actions (upgrades, downgrades, price target changes) are central to headlines in both datasets.",
      "Earnings reports (e.g., Q4, Q2) and results (EPS beats/misses) are frequently mentioned.",
      "Stock price reactions (e.g., 'shares up 3%' in A, 'shares trade flat' in B) are explicitly stated.",
      "Quantitative metrics like percentages (e.g., 'up 9%') or numerical price targets (e.g., '$140') are used.",
      "Valuation concerns (e.g., 'valuation concerns' in B, 'risky investment' in A) are recurring themes.",
      "Company-specific events (e.g., mergers, product launches) are highlighted in both datasets.",
      "Revenue performance (e.g., 'revenue surges' in A, 'ad revenue growth' in B) is a key focus.",
      "Market reactions to news (e.g., 'shares unchanged after hours' in B, 'stock surges' in A) are consistently noted."
    ],
    "qwen2.5-32b_zero-shot_v1": [
      "Both datasets focus on earnings reports, highlighting beats/misses and their impact on stock prices (e.g., A: \"Align loses patent case\", B: \"Tech Giant Reports Lower Earnings\").",
      "Stock price movements (percentage changes, surges, plunges) are explicitly quantified in headlines (e.g., A: \"shares up 3%\", B: \"Stock Plummets 10%\").",
      "Regulatory/legal developments affecting companies are covered (e.g., A: \"FDA Approves First Drug\", B: \"Antitrust Investigations\").",
      "Macroeconomic factors (e.g., oil prices, inflation, trade deals) drive market narratives (A: \"oil plunges\", B: \"Inflation Worries Mount\").",
      "Company-specific news (mergers, leadership changes, product launches) is central to headlines (A: \"Charles Schwab to buy TD Ameritrade\", B: \"Renewable Energy Firm Reports Record Profits\").",
      "Analyst actions (upgrades, downgrades, price targets) are frequently cited (A: \"UBS Upgrades Constellation Brands\", B: \"Analysts Expect Stable Markets\").",
      "Government/central bank policy decisions (e.g., Fed rate decisions, stimulus) are prominently featured (A: \"Fed's Williams says...\", B: \"Federal Reserve Announces Rate Decision\").",
      "Sector-specific trends (tech, energy, healthcare) are emphasized (A: \"renewable energy\", B: \"Renewable Energy Sector Surges\").",
      "Market indices (Dow, S&P 500) and benchmarks are referenced for broader context (A: \"Dow up 7.59%\", B: \"Stock Market Dips 3%\").",
      "Formal financial terminology (EPS, revenue, guidance) is consistently used (A: \"EPS misses by $0.33\", B: \"Q3 Earnings Show Drop\")."
    ],
    "qwen2.5-32b_few-shot_bg_train-time-info_v1": [
      "Headlines frequently reference stock tickers using a \"$\" prefix (e.g., $NSPR, $DISH).",
      "Earnings reports (e.g., Q1, Q2) and results (beats/misses) are prominently featured in both datasets.",
      "Analyst actions (upgrades, downgrades, price target changes) are consistently highlighted.",
      "Specific financial metrics like EPS, revenue, and guidance updates are explicitly mentioned.",
      "Mentions of mergers, acquisitions, or strategic partnerships appear across samples.",
      "References to financial institutions (e.g., Morgan Stanley, Barclays) providing analysis or ratings are common.",
      "Stock price movements (e.g., \"shares up 3%\", \"shares drop 7%\") are quantified with percentages.",
      "Regulatory, legal, or patent-related challenges impacting companies are addressed.",
      "Industry-specific risks (e.g., oil volatility, supply chain disruptions) are discussed in detail.",
      "Forward-looking statements (e.g., forecasts, guidance, economic projections) are included."
    ],
    "llama3.3-70b_few-shot_bg_train-time-info_v1": [
      "Both datasets include headlines with stock tickers prefixed by a dollar sign (e.g., $AAPL, $XOM).",
      "Headlines frequently reference analyst actions (e.g., upgrades, downgrades, price target changes) from firms like Morgan Stanley, Barclays, and Goldman Sachs.",
      "Earnings reports, revenue results, and guidance (e.g., \"beats/misses estimates\") are central themes in both datasets.",
      "Market movements (e.g., indices like Dow, S&P 500) and sector-specific trends (e.g., energy, tech) are prominently covered.",
      "Percentages and numerical metrics (e.g., \"up 3%\", \"plunges 9%\") are consistently used to quantify price changes or performance.",
      "Mentions of mergers, acquisitions, or corporate restructuring (e.g., \"Charles Schwab to buy TD Ameritrade\") appear in both datasets.",
      "Regulatory, legal, or operational challenges (e.g., FDA approvals, patent disputes, manufacturing issues) are common focal points.",
      "Forward-looking statements (e.g., \"anticipating strong growth\", \"2020 EPS guidance\") are prevalent in headlines.",
      "Company-specific events (e.g., CEO changes, product launches, conference calls) are highlighted in both datasets.",
      "Headlines use standardized formatting for financial data (e.g., \"EPS beats by $0.11\", \"price target raised to $42\")."
    ],
    "llama3.3-70b_zero-shot_bg_train-time-info_v1": [
      "Both datasets consistently include stock ticker symbols (e.g., $TSLA, $AAPL) within headlines to identify companies.",
      "Headlines frequently reference analyst actions (e.g., upgrades, downgrades, price target changes) from firms like Barclays, Morgan Stanley, and Oppenheimer.",
      "Earnings reports, forecasts, and financial metrics (e.g., EPS, revenue beats/misses) are central themes in both datasets.",
      "Price movements (e.g., \"shares up 3%\", \"stock surges\") are explicitly quantified and emphasized in most samples.",
      "Corporate events (e.g., mergers, leadership changes, product launches, clinical trials) are prominently featured across all headlines.",
      "Market indices (e.g., Dow, S&P 500, Russell 2000) and sector-specific trends (e.g., energy, biotech, cloud computing) are regularly cited as contextual drivers.",
      "Forward-looking statements (e.g., guidance revisions, economic outlooks, growth projections) are consistently included to frame narratives.",
      "Regulatory, macroeconomic, or geopolitical factors (e.g., Fed policy, trade deals, inflation, supply chain risks) are cited as catalysts for market movements.",
      "Industry-specific jargon (e.g., \"overweight\", \"underweight\", \"pipeline developments\", \"clinical trial results\") is used uniformly across samples.",
      "Headlines follow a standardized structure: ticker prominence, event/report focus, and quantifiable impact indicators (percentages, dollar figures, ratings)."
    ],
    "qwen2.5-32b_few-shot_bg_test-time-info_v1": [
      "Headlines reference specific companies, stocks, or financial instruments using tickers (e.g., $NSPR, $NVDA) or explicit names.",
      "Mentions of quarterly/yearly financial results (e.g., Q4 earnings, revenue beats/misses) are present in all samples.",
      "Analyst actions (upgrades, downgrades, price target adjustments) or institutional opinions are included in every headline.",
      "Numerical data (percentages, monetary values, timeframes) is consistently integrated (e.g., \"shares up 3%\", \"$26 billion deal\").",
      "Focus on stock price movements (e.g., \"surges\", \"drops\", \"rises\") or market index performance (e.g., Dow, S&P 500).",
      "Discussions of corporate events (mergers, partnerships, leadership changes, clinical trials) appear in all samples.",
      "Forward-looking statements (guidance updates, forecasts, strategic initiatives) are a recurring theme.",
      "Sector-specific terminology (e.g., energy, biotech, retail) contextualizes the financial impact of news.",
      "Regulatory or macroeconomic factors (e.g., trade deals, interest rates, OPEC+) influence content in every headline.",
      "Use of financial abbreviations/acronyms (EPS, ETF, FDA) is consistent across datasets."
    ],
    "qwen2.5-7b_zero-shot_bg_v1": [
      "Both datasets include headlines referencing specific stock ticker symbols (e.g., $TSLA, $AAPL) to denote companies.",
      "Headlines frequently mention earnings reports, earnings previews, or financial results (e.g., \"Q4 earnings beat expectations\").",
      "Analyst actions (e.g., upgrades, downgrades, price target revisions) are a recurring theme in both datasets.",
      "Stock price movements (e.g., \"shares up 3%,\" \"plunges 9%\") are explicitly quantified with percentages or numerical values.",
      "Market indices (e.g., Dow, S&P 500, Nasdaq) and their performance metrics are cited to contextualize broader market trends.",
      "Forward-looking statements (e.g., economic forecasts, production targets, revenue guidance) are consistently present.",
      "Quantitative metrics (e.g., EPS beats/misses, revenue figures, cost projections) are integral to headlines in both datasets.",
      "Sector-specific developments (e.g., tech, energy, healthcare) are highlighted to explain market impacts.",
      "Mergers, acquisitions, or partnerships (e.g., \"Charles Schwab to buy TD Ameritrade\") are common topics.",
      "Regulatory, legal, or macroeconomic factors (e.g., Fed policy, trade deals, OPEC+ decisions) are frequently addressed."
    ],
    "qwen2.5-32b_zero-shot_bg_train-time-info_v1": [
      "Both datasets include headlines with stock tickers in parentheses (e.g., $AAPL, $GE).",
      "Headlines frequently reference percentage changes in stock prices (e.g., \"shares up 3%,\" \"shares drop\").",
      "Mentions of earnings reports (e.g., \"Q4 2019 Earnings Preview,\" \"Q3 earnings meet expectations\") are prominent in both.",
      "Analyst actions (e.g., \"price target raised/cut,\" \"upgraded/downgraded\") are a key focus in both datasets.",
      "Both include references to revenue/EPS beats/misses (e.g., \"EPS misses by $0.33,\" \"exceeds analyst estimates\").",
      "Company-specific events (e.g., mergers, partnerships, product launches) are highlighted in headlines from both datasets.",
      "Macroeconomic factors (e.g., trade deals, oil prices, interest rates) are contextual drivers in headlines.",
      "Regulatory or legal developments (e.g., patent disputes, investigations) appear in both datasets.",
      "Industry-specific trends (e.g., sustainability, supply chain challenges, tech innovation) are discussed in both.",
      "Headlines often blend quantitative metrics (e.g., revenue figures, guidance revisions) with qualitative commentary."
    ],
    "llama3.1-8b_zero-shot_bg_v1": [
      "Both datasets consistently include stock tickers prefixed with '$' symbol in headlines.",
      "Headlines in both datasets frequently mention percentage changes in stock prices (e.g., 'up 3%', 'falls 5%').",
      "Analyst actions (e.g., upgrades, downgrades, price target revisions) are prominently featured in both datasets.",
      "Quarterly earnings reports/previews and financial results (e.g., 'Q4 Earnings Miss Estimate') are central themes.",
      "Economic indicators like GDP growth, inflation, and recession risks are referenced across samples.",
      "Company-specific operational developments (e.g., mergers, executive changes, product launches) are highlighted in both.",
      "Market indices (e.g., Dow, S&P 500) and sector-specific performance are regularly mentioned.",
      "Verbs indicating volatility (e.g., 'plunges', 'surges', 'tumbles') are used to describe price movements in both datasets.",
      "External factors influencing markets (e.g., trade deals, regulatory scrutiny, geopolitical events) are addressed.",
      "Attribution to specific financial institutions/analysts (e.g., Morgan Stanley, Barclays) is common in both datasets."
    ],
    "llama3.1-8b_zero-shot_v1": [
      "Both datasets contain headlines referencing stock tickers with price movement percentages (e.g., 'shares up 3%' in A, 'Stock soars 15%' in B).",
      "Earnings reports and previews are central themes in both datasets (e.g., 'Under Armour Q4 2019 Earnings Preview' in A, 'Tesla's Earnings Surpass Expectations' in B).",
      "Analyst ratings, upgrades, and downgrades are frequently mentioned (e.g., 'SunTrust Robinson Humphrey' in A, 'analysts reiterate Buy Calls' in B).",
      "Market reactions to Federal Reserve policy changes or commentary appear in both datasets (e.g., 'Fed's Williams says U.S. economy...' in A, 'Fed Raises Interest Rates' in B).",
      "Headlines include macroeconomic indicators like GDP, unemployment, or consumer spending (e.g., 'small businesses fall behind on rent' in A, 'GDP Growth Surprises' in B).",
      "Sector-specific performance (e.g., tech, energy, pharmaceuticals) is highlighted in both (e.g., 'Medtronic's stock surges' in A, 'Tech Giants Soar' in B).",
      "Forward-looking statements about economic or corporate performance are common (e.g., '2020 adj. currency-neutral EPS down 3%-4%' in A, 'economists forecast moderate GDP growth' in B).",
      "Mentions of mergers, acquisitions, or corporate deals appear in both (e.g., 'Charles Schwab to buy TD Ameritrade' in A, 'ECB Announces New Stimulus Package' in B).",
      "Regulatory or legal developments impacting markets are covered (e.g., 'Align loses patent case' in A, 'FBI launches investigation' in B).",
      "Volatility terms like 'plunges,' 'surges,' 'beats/misses,' and 'tumbles' are used in both datasets to describe price action."
    ],
    "llama3.3-70b_few-shot_v1": [
      "All headlines mention specific financial entities (companies, stock tickers, indices, or institutions like the Federal Reserve).",
      "Every headline reports a financial event with immediate market implications (earnings, mergers, rate changes, economic data releases).",
      "Headlines emphasize stock price movements (e.g., surges, plummets) or financial performance outcomes (e.g., beats/misses).",
      "Numerical data (percentages, monetary figures, quarters like Q1/Q2) is consistently included to quantify impacts.",
      "References to market indices (Dow, Nasdaq, S&P) or sector-specific trends (tech, energy, healthcare) are present.",
      "Analyst/institutional actions (upgrades, downgrades, price targets, Fed decisions) drive many headlines.",
      "Events are tied to specific timeframes (e.g., quarterly results, real-time updates, future guidance).",
      "Headlines use cause-effect structures (e.g., 'X event leads to Y market reaction').",
      "Both micro (company-specific) and macro (economic policies, trade tensions) factors are covered.",
      "Terminology like 'EPS,' 'revenue,' 'dividend,' and 'guidance' is standard across all samples."
    ],
    "qwen2.5-32b_few-shot_bg_v1": [
      "Headlines frequently mention specific company ticker symbols (e.g., $AAPL, $TSLA, $NVDA).",
      "Earnings reports, revenue figures, and analyst expectations are central topics.",
      "Stock price movements (e.g., percentage gains/losses, price targets) are consistently highlighted.",
      "Analyst actions (upgrades, downgrades, price target adjustments) are prominently featured.",
      "Macroeconomic factors (e.g., Fed rate decisions, inflation, trade deals) influence sentiment.",
      "Company-specific events (mergers, product launches, regulatory issues) drive headlines.",
      "Market indices (Dow, Nasdaq, S&P 500) and sector performance are regularly referenced.",
      "Forward-looking statements (guidance revisions, growth forecasts) appear in most samples.",
      "Regulatory/legal developments (e.g., FDA approvals, lawsuits) are frequently cited.",
      "Sector-specific trends (e.g., tech, energy, retail) dominate thematic coverage."
    ],
    "qwen2.5-32b_few-shot_v1": [
      "Both datasets include headlines referencing company-specific earnings reports with outcomes (beats/misses) directly impacting stock prices.",
      "Headlines in both datasets frequently mention stock price movements quantified with specific percentages (e.g., \"up 3%\", \"plummets 5%\").",
      "Central bank policies, particularly Federal Reserve interest rate decisions, are a recurring theme in both datasets.",
      "Coverage spans multiple sectors, including technology, energy, healthcare, and consumer goods, in both datasets.",
      "Analyst actions such as upgrades, downgrades, and price target adjustments are explicitly cited in headlines from both datasets.",
      "Financial terminology like EPS, revenue, dividends, and guidance is consistently used in both datasets.",
      "Macroeconomic factors (e.g., oil prices, inflation, trade tensions) are highlighted as drivers of market movements in both.",
      "Corporate actions such as mergers, acquisitions, and debt offerings are mentioned in headlines across both datasets.",
      "Real-time market updates (e.g., pre-market, after-hours trading, intraday price swings) are featured prominently in both.",
      "Forward-looking statements about economic forecasts, corporate guidance, or policy impacts are present in headlines from both datasets."
    ],
    "qwen2.5-32b_zero-shot_bg_v1": [
      "Both datasets consistently include stock tickers prefixed with $ (e.g., $AAPL, $TSLA) in headlines.",
      "Headlines frequently reference analyst actions (e.g., price target adjustments, upgrades/downgrades) from firms like Morgan Stanley, Barclays, and Goldman Sachs.",
      "Earnings reports are a central theme, with explicit mentions of EPS beats/misses and revenue performance (e.g., 'EPS beats by $0.06' in A, 'revenue exceeds expectations' in B).",
      "Quantitative metrics (e.g., percentage changes, revenue growth rates, price targets) are prominently featured in all samples.",
      "Market indices (e.g., Dow, S&P 500, Nasdaq) are regularly cited to contextualize broader market movements.",
      "Sector-specific coverage (e.g., energy, semiconductors, tech) appears in both datasets, often tied to macroeconomic trends.",
      "Regulatory/macroeconomic factors (e.g., Fed policy, trade deals, interest rates) are explicitly linked to stock performance in all samples.",
      "Forward-looking statements about guidance, production targets, or economic forecasts are consistently present (e.g., 'raises 2020 guidance' in A, 'reaffirms production targets' in B).",
      "Corporate events (e.g., mergers, product launches, legal disputes) are framed as catalysts for price movements across both datasets.",
      "Identical formatting conventions are used for earnings updates (e.g., 'beats/misses on revenue,' 'Q4 earnings') and analyst actions (e.g., 'maintains neutral rating')."
    ],
    "qwen2.5-7b_zero-shot_v1": [
      "Both datasets include references to specific companies or stock tickers (e.g., $AAPL in A, Tesla in B).",
      "Headlines frequently mention stock price movements (e.g., \"shares up 3%\" in A, \"Shares Plunge\" in B).",
      "Earnings reports and financial results are a recurring theme (e.g., \"beats on revenue\" in A, \"earnings miss\" in B).",
      "Analyst actions (e.g., upgrades, downgrades, price targets) are highlighted (e.g., \"PT raised\" in A, \"exceeds analysts' expectations\" in B).",
      "Macroeconomic factors (e.g., Fed policy, trade deals) are discussed in both (e.g., \"Mnuchin says trade deal\" in A, \"Fed Announces\" in B).",
      "Sector-specific performance is emphasized (e.g., energy in A, tech in B).",
      "Market sentiment terms (e.g., \"surges,\" \"plunges,\" \"optimistic\") are consistently used to describe trends.",
      "Numerical metrics (e.g., percentages, revenue figures) are prominently featured (e.g., \"EPS down 3%\" in A, \"GDP growth\" in B).",
      "Merger/acquisition activity and corporate deals are covered (e.g., \"Charles Schwab to buy TD Ameritrade\" in A, \"funding rounds\" in B).",
      "Forward-looking statements (e.g., guidance, forecasts) appear in both (e.g., \"2020 adj. EPS\" in A, \"economic forecasts\" in B)."
    ],
    "llama3.1-8b_zero-shot_bg_train-time-info_v1": [
      "Headlines frequently include stock ticker symbols prefixed with a dollar sign (e.g., $AAPL, $TSLA).",
      "Mentions of percentage-based price movements (e.g., \"up 3%\", \"plunges 9%\") are common.",
      "Earnings reports (e.g., Q4 results, EPS beats/misses) are a recurring focus.",
      "Analyst actions (upgrades, downgrades, price target revisions) are explicitly cited.",
      "Corporate events (mergers, acquisitions, leadership changes) are prominently featured.",
      "Forward-looking statements (guidance, forecasts, macroeconomic predictions) are included.",
      "References to specific financial institutions (e.g., Morgan Stanley, Deutsche Bank) are frequent.",
      "Sector-specific developments (e.g., energy, tech, healthcare) are highlighted.",
      "Numerical data (e.g., revenue figures, dividend yields, growth rates) is consistently included.",
      "Market indices (e.g., Nasdaq, S&P 500) and broader economic trends are contextualized."
    ],
    "qwen2.5-7b_few-shot_v1": [
      "Headlines frequently mention company earnings reports, forecasts, or financial results (e.g., Q4 earnings, profit beats/misses).",
      "Stock price movements or market reactions to news are explicitly referenced (e.g., shares surge, stocks plunge).",
      "Analyst actions (upgrades/downgrades) and their impact on specific securities are highlighted.",
      "Sector-specific developments are covered across industries like tech, energy, healthcare, and consumer goods.",
      "Economic indicators (GDP, job reports, manufacturing data) are cited as market drivers.",
      "Regulatory/political developments (trade tensions, Fed decisions, legislation) are linked to market impacts.",
      "M&A activity, partnerships, or corporate restructuring events are reported with deal specifics.",
      "Commodity price fluctuations (oil, gas, metals) are tied to market movements.",
      "Central bank policies (interest rates, quantitative easing) are analyzed for economic implications.",
      "Sentiment phrases like \"surge,\" \"plunge,\" \"beats/misses,\" and \"optimism/pessimism\" are used to frame market reactions."
    ],
    "llama3.3-70b_few-shot_bg_v1": [
      "Headlines in both datasets frequently reference stock tickers using the '$' symbol (e.g., $TSLA, $NVDA, $UAA).",
      "Both datasets emphasize earnings reports, including quarterly/annual results and metrics like EPS and revenue beats/misses.",
      "Analyst actions (e.g., upgrades, downgrades, price target revisions) are central themes in headlines across both datasets.",
      "Percentage changes in stock prices or financial metrics (e.g., \"up 3%\", \"down 5%\") are consistently highlighted.",
      "Macroeconomic events (e.g., Federal Reserve updates, trade deals, GDP trends) are frequently cited as market drivers.",
      "Financial terminology such as \"price target\", \"guidance\", and \"PT\" is used extensively in both datasets.",
      "Time-bound references (e.g., \"Q2 2020\", \"upcoming quarter\") anchor the recency of financial updates.",
      "Company-specific events (e.g., mergers, leadership changes, product launches) directly tied to market performance are reported.",
      "Market indices (e.g., Dow Jones, S&P 500, NASDAQ) and their movements are regularly mentioned.",
      "Headlines prioritize immediate financial impact, often starting with tickers/entities followed by concise event summaries."
    ],
    "llama3.1-8b_few-shot_v1": [
      "Both datasets include headlines referencing specific stock market indices (e.g., Dow, Nasdaq, S&P 500) and their daily performance fluctuations.",
      "Headlines in both datasets frequently mention corporate earnings reports, revenue results, and financial guidance updates.",
      "Both datasets highlight mergers, acquisitions, and strategic corporate partnerships (e.g., Charles Schwab/TD Ameritrade, Deutsche Bank asset unit sale).",
      "Central bank policies (e.g., Federal Reserve interest rate decisions) and macroeconomic monetary commentary are recurring themes.",
      "References to economic indicators like GDP growth, inflation rates, and employment data appear consistently across both datasets.",
      "Geopolitical events (e.g., US-China trade tensions, COVID-19 impacts) are cited as drivers of market volatility in headlines from both datasets.",
      "Legal/regulatory developments (e.g., FDA approvals, patent disputes, court rulings) feature prominently in company-specific news.",
      "Analyst actions such as stock upgrades/downgrades, price target adjustments, and earnings estimate revisions are frequently noted.",
      "Company-specific operational updates (e.g., CEO changes, product launches, R&D breakthroughs) are granularly covered in both datasets.",
      "Immediate market reactions (e.g., stock price movements, sector volatility) to news events are explicitly quantified in percentages or points."
    ],
    "llama3.1-8b_few-shot_bg_v1": [
      "Both datasets include headlines referencing stock tickers prefixed with a dollar sign (e.g., $AAPL, $NVDA).",
      "Headlines in both datasets frequently mention analyst actions (e.g., upgrades, downgrades, price target revisions).",
      "Earnings reports, including beats/misses on EPS and revenue, are a central focus in both datasets.",
      "Both datasets highlight specific numerical metrics (e.g., percentage changes, revenue figures, growth rates).",
      "Market index movements (e.g., Dow, S&P 500) and sector-specific trends are regularly reported in both.",
      "Mergers, acquisitions, and partnerships are common topics across headlines in both datasets.",
      "Industry-specific updates (e.g., tech, healthcare, energy) are granularly covered in both datasets.",
      "Regulatory, legal, or patent-related developments (e.g., lawsuits, investigations) appear in both datasets.",
      "Macroeconomic factors (e.g., inflation, GDP, trade tensions) are frequently cited in headlines from both datasets.",
      "Forward-looking statements (e.g., guidance, forecasts, analyst predictions) are prevalent in both datasets."
    ],
    "qwen2.5-7b_few-shot_bg_v1": [
      "Both datasets include headlines referencing stock tickers (e.g., $TSLA, $AAPL) and company names alongside price movements or news.",
      "Earnings reports, revenue results, and guidance (e.g., beats/misses, forecasts) are central themes in both datasets.",
      "Analyst actions (upgrades, downgrades, price target adjustments) are prominently featured in headlines from both datasets.",
      "Mentions of macroeconomic factors (e.g., interest rates, inflation, GDP growth) influence market sentiment in both datasets.",
      "Corporate events like mergers, acquisitions, layoffs, and leadership changes are highlighted in both datasets.",
      "Sector-specific news (e.g., tech, energy, pharmaceuticals) is covered in granular detail across both datasets.",
      "Market indices (e.g., Dow, Nasdaq, S&P 500) and their daily performance updates are referenced in headlines.",
      "Regulatory, legal, or political risks (e.g., antitrust probes, trade deals) are discussed as market-moving factors.",
      "Quantitative metrics (e.g., EPS, revenue figures, percentage gains/losses) are consistently included in headlines.",
      "Forward-looking statements (e.g., guidance revisions, economic forecasts, analyst projections) are common in both datasets."
    ],
    "llama3.3-70b_zero-shot_v1": [
      "Headlines in both datasets focus on financial markets, corporate performance, or economic indicators impacting investments.",
      "Both datasets frequently mention stock price movements using terms like 'plummet', 'surge', 'up X%', or 'down Y%'.",
      "References to specific companies, sectors (e.g., tech, energy), or financial institutions (e.g., Federal Reserve) are present in all samples.",
      "Earnings reports, quarterly results, or financial metrics (e.g., EPS, revenue) are central themes across all headlines.",
      "Market indices (e.g., Dow, S&P 500, Nasdaq) or macroeconomic trends (e.g., inflation, recession) are explicitly cited.",
      "Headlines highlight causal relationships between events (e.g., mergers, policy decisions) and market reactions.",
      "Analyst actions (e.g., upgrades, downgrades, price targets) or investor sentiment (e.g., 'fears', 'confidence') are consistently featured.",
      "Numerical data (e.g., percentage changes, monetary figures) is used to quantify financial outcomes in all samples.",
      "Industry-specific jargon (e.g., 'PT', 'ETFs', 'guidance', 'dividend') appears universally in both datasets.",
      "Temporal references (e.g., 'Q2 2020', 'premarket', 'after hours') anchor events to specific financial periods or timelines."
    ],
    "llama3.1-8b_few-shot_bg_test-time-info_v1": [
      "Both datasets include stock ticker symbols prefixed with a '$' sign (e.g., $NSPR, $CMD) to identify companies.",
      "Headlines frequently mention percentage changes in stock prices or indices (e.g., 'up 3%', 'down 1.3%').",
      "Analyst actions (e.g., upgrades, downgrades, price target adjustments) are explicitly referenced in both datasets.",
      "Earnings reports (e.g., 'Q4 2019 Earnings Preview', 'Q3 Earnings Preview') and metrics like EPS beats/misses are consistently highlighted.",
      "Merger/acquisition announcements (e.g., 'Charles Schwab to buy TD Ameritrade', 'ENB and Enbridge Energy Partners Merge') appear in both datasets.",
      "Sector-specific trends (e.g., energy, healthcare, retail) are discussed in granular detail across samples.",
      "Market index performance (e.g., Dow, S&P 500, Nasdaq) is frequently cited to contextualize broader market movements.",
      "Regulatory/legal developments (e.g., 'Align loses patent case', 'Antitrust Probe') impact company narratives in both datasets.",
      "Forward-looking guidance (e.g., 'raises 2020 guidance', '2023 Growth Strategy') is explicitly mentioned to signal future performance.",
      "Product/service developments (e.g., 'FDA Approves First Drug', 'new product launch') are linked to stock price movements in headlines."
    ],
    "qwen2.5-7b_few-shot_bg_test-time-info_v1": [
      "Both datasets include headlines referencing stock tickers prefixed with a dollar symbol (e.g., $NSPR, $COLL).",
      "Headlines frequently mention percentage changes in stock prices (e.g., \"shares up 3%\", \"stock gain 5%\").",
      "Earnings reports (e.g., Q4 2019, Q3 results) and revenue/earnings beats/misses are central to headlines in both datasets.",
      "Analyst actions (upgrades, downgrades, price target adjustments) are explicitly cited (e.g., \"Credit Suisse downgrades\", \"SunTrust raises target\").",
      "Company-specific events (mergers, partnerships, product launches, regulatory issues) are highlighted in both datasets (e.g., \"Tarveda merger\", \"supply chain disruptions\").",
      "Macroeconomic factors (trade deals, supply chains, industry headwinds) are frequently tied to stock movements in headlines.",
      "Sector-specific coverage (energy, biotech, retail, tech) is granular and consistent across both datasets.",
      "Headlines use standardized financial terminology (e.g., \"EPS beats\", \"guidance cut\", \"revenue surges\").",
      "Time-sensitive language (e.g., \"premarket\", \"after hours\", \"Q3\", \"FY2020\") is prevalent in both datasets.",
      "Analyst firms (e.g., Morgan Stanley, JPMorgan, Goldman Sachs) are explicitly named as sources of ratings/targets in headlines."
    ],
    "llama3.3-70b_few-shot_bg_test-time-info_v1": [
      "Both datasets include stock tickers (e.g., $AAPL, $TSLA) to identify companies or financial instruments.",
      "Headlines in both datasets frequently reference analyst actions such as upgrades, downgrades, or price target revisions (e.g., Morgan Stanley, Barclays).",
      "Earnings reports, including mentions of EPS results and revenue performance, are a recurring theme in both datasets.",
      "Quantitative financial metrics (e.g., percentage changes, dollar amounts, growth rates) are explicitly stated in all headlines.",
      "Specific financial institutions (e.g., Goldman Sachs, Oppenheimer) are cited as sources of analysis or ratings.",
      "Corporate events like mergers, executive changes, or product launches are highlighted in both datasets.",
      "Stock price movements (e.g., \"shares up 3%\", \"falls 10%\") are consistently mentioned to contextualize market reactions.",
      "References to macroeconomic factors (e.g., oil prices, interest rates, GDP forecasts) appear across both datasets.",
      "Forward-looking statements (e.g., guidance updates, clinical trial expectations) are included to signal future performance.",
      "Sector-specific trends (e.g., energy, biotech, retail) are discussed in granular detail to explain market dynamics."
    ],
    "llama3.1-8b_zero-shot_bg_test-time-info_v1": [
      "Both datasets include headlines with stock tickers prefixed with a dollar sign (e.g., $NSPR, $VZ).",
      "Analyst actions (upgrades, downgrades, price target changes) are frequently mentioned in both datasets.",
      "Earnings reports, forecasts, and previews (e.g., Q4 2019 Earnings Preview, Q3 Earnings on October 27th) are central themes.",
      "Explicit mentions of stock price movements (e.g., shares up 3%, climbs 8%) are common.",
      "Revenue beats/misses and financial metrics (e.g., misses on revenue, $2.8 billion revenue miss) are highlighted.",
      "Sector-specific trends (e.g., oil, retail, tech) are discussed across headlines in both datasets.",
      "Regulatory, legal, or patent-related developments (e.g., patent case, regulatory hurdles) are referenced.",
      "Market indices (e.g., Dow, Nasdaq, S&P 500) and ETF/stock performance metrics are frequently cited.",
      "Company-specific events (mergers, partnerships, leadership changes) are prominent in headlines.",
      "Forward-looking statements (guidance, growth projections, macroeconomic forecasts) are emphasized."
    ],
    "qwen2.5-7b_few-shot_bg_train-time-info_v1": [
      "All headlines reference specific companies, financial instruments, or economic sectors by name, ticker, or industry context.",
      "Each headline discusses events or data impacting stock prices, market indices, or investor sentiment (e.g., earnings results, analyst actions, macroeconomic factors).",
      "All headlines include financial terminology such as \"earnings,\" \"revenue,\" \"guidance,\" \"EPS,\" or \"price target.\"",
      "Action verbs indicating financial changes (e.g., \"rises,\" \"cuts,\" \"downgrades,\" \"beats\") are present in every headline.",
      "Time-sensitive references (e.g., quarterly results like \"Q4 2020,\" fiscal years, or immediate market updates) are included in all samples.",
      "Proper nouns (e.g., company names like \"Tesla,\" indices like \"S&P 500,\" or institutions like \"JPMorgan\") appear in every headline.",
      "All headlines are structured concisely to prioritize key financial updates for investor consumption (e.g., earnings beats, mergers, guidance revisions).",
      "Quantitative data (e.g., stock price percentages, revenue figures) or qualitative assessments (e.g., analyst ratings, partnership announcements) are present in every sample.",
      "Mentions of financial periods (e.g., \"Q3,\" \"2020,\" \"full-year\") or forward-looking statements (e.g., \"raises outlook,\" \"cuts forecast\") are universal.",
      "All headlines are formatted to align with financial news conventions, emphasizing timeliness and relevance to market participants."
    ],
    "llama3.1-8b_few-shot_bg_train-time-info_v1": [
      "All headlines reference specific companies, stock tickers, or economic sectors directly impacting financial markets.",
      "Each headline includes numerical data such as percentages, stock price figures, or fiscal quarters to quantify financial performance.",
      "Mentions of earnings reports, revenue results, or forward-looking financial guidance are present in all samples.",
      "Analyst actions (upgrades, downgrades) or institutional perspectives (e.g., Morgan Stanley, Deutsche Bank) are explicitly referenced.",
      "Stock price movements (e.g., \"up 3%\", \"slides 5%\") or market index performance (e.g., Dow, S&P 500) are highlighted.",
      "Time-bound financial metrics (e.g., Q2 2023, FY2023) frame performance updates or forecasts in every headline.",
      "Corporate events such as mergers, acquisitions, or leadership changes are consistently covered.",
      "Use of financial terminology like \"price target\", \"EPS\", \"guidance\", and \"dividend\" is universal.",
      "Headlines emphasize immediate market reactions (e.g., \"shares surge\", \"tumbles\") to news or data releases.",
      "References to macroeconomic factors (e.g., trade tensions, oil prices, consumer trends) tie corporate events to broader market impacts."
    ],
    "llama3.3-70b_zero-shot_bg_test-time-info_v1": [
      "Both datasets include headlines referencing stock tickers using the '$' symbol (e.g., $NSPR in A; $TAP in B).",
      "Price movements (e.g., percentage gains/losses) are explicitly reported in all samples (e.g., 'shares up 3%' in A; 'price target cut' in B).",
      "Earnings reports and financial results (e.g., quarterly/annual earnings, revenue) are a central focus across all headlines (e.g., 'Q4 2019 Earnings Preview' in A; 'Q2 earnings release date' in B).",
      "Analyst actions (upgrades/downgrades) and price target adjustments are consistently highlighted (e.g., 'SunTrust raises target' in A; 'Goldman Sachs cuts target' in B).",
      "Mentions of specific financial metrics (e.g., EPS beats/misses, revenue guidance) appear in all samples (e.g., 'EPS misses by $0.33' in A; 'meets analyst expectations' in B).",
      "Corporate events (e.g., mergers, leadership changes, product launches) are documented in both datasets (e.g., 'TD Ameritrade acquisition' in A; 'partnership announcements' in B).",
      "Market indices (e.g., Dow, S&P 500) and sector-specific performance (e.g., energy, tech) are referenced across all samples (e.g., 'Nasdaq up 2.68%' in A; 'European stocks edge higher' in B).",
      "Regulatory/legal developments (e.g., patent disputes, FDA approvals) are addressed in all headlines (e.g., 'Align loses patent case' in A; 'FDA warning letter' in B).",
      "Use of industry-specific terminology (e.g., 'price target,' 'dividend,' 'revenue growth') is consistent across both datasets.",
      "Forward-looking statements (e.g., guidance updates, investor meetings) and sentiment analysis (e.g., 'bullish,' 'concerns') are present in all samples (e.g., '2020 guidance raised' in A; 'await further guidance' in B)."
    ]
  },
  "diffs_synth_from_real": {
    "qwen2.5-7b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines predominantly focus on analyst actions (upgrades/downgrades) and price target adjustments, while Dataset A includes a broader range of corporate events like mergers, leadership changes, and product launches.",
      "Dataset B uses standardized phrases like 'cuts price target' or 'maintains neutral outlook' consistently, whereas Dataset A features more varied language including quotes, advice, and editorial commentary (e.g., 'Cash is trash?').",
      "Dataset A frequently references real-time market index performance updates (e.g., 'Dow up 3.90%'), while Dataset B lacks direct mentions of intraday index movements.",
      "Dataset A includes technical trading terms (e.g., 'resistance levels,' 'sympathy play') and investor strategies, which are absent in Dataset B's institutional analyst-driven narratives.",
      "Dataset B headlines explicitly name specific analyst firms (e.g., Morgan Stanley, Oppenheimer) in every entry, while Dataset A only occasionally cites sources like SunTrust or Seeking Alpha.",
      "Dataset A contains geopolitical/regulatory developments (e.g., Lebanon/IMF, antitrust probes) absent in Dataset B, which focuses narrowly on financial analyst perspectives.",
      "Dataset A includes forward-looking corporate guidance updates (e.g., 'raises 2020 guidance'), whereas Dataset B's forward statements are limited to analyst predictions rather than company-issued outlooks.",
      "Dataset A shows multilingual content (e.g., Chinese characters) and translated phrases, while Dataset B maintains uniform English language formatting.",
      "Dataset A features multi-ticker comparisons (e.g., '$DELL $HPE') and sector ETF analysis, whereas Dataset B concentrates on single-ticker analyst actions.",
      "Dataset A includes retail investor-focused elements like dividend yields and P/E ratios, while Dataset B assumes institutional knowledge of analyst rating systems without explanatory context."
    ],
    "qwen2.5-32b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently include explicit mentions of future guidance periods (e.g., 'Fiscal 2024 Guidance') in forward-looking statements, whereas Dataset A uses broader timeframes like annual projections (e.g., '2020 adj. EPS down 3%-4%').",
      "Dataset B features repetitive references to specific financial institutions like Barclays and Morgan Stanley in analyst actions, while Dataset A includes a wider variety of firms (e.g., UBS, SunTrust, Deutsche Bank).",
      "Dataset A headlines frequently integrate casual language, slang, or trader commentary (e.g., 'Risky but chart looks perfect for a bounce'), whereas Dataset B uses formal, structured language without colloquialisms.",
      "Dataset B emphasizes immediate stock price reactions to earnings results in nearly all relevant headlines (e.g., 'sending shares up 5%'), while Dataset A occasionally decouples earnings announcements from price movements.",
      "Dataset A includes non-English terms, translated content, or direct references to international markets (e.g., HKG:1458, Chinese consumers), whereas Dataset B focuses exclusively on English-language, U.S.-centric entities.",
      "Dataset B headlines systematically place stock tickers mid-sentence or post-company names (e.g., 'Apple ($AAPL)'), while Dataset A predominantly leads with tickers at headline starts (e.g., '$NSPR - InspireMD...').",
      "Dataset A incorporates direct quotes from named individuals or experts (e.g., '@jasonzweigwsj says'), a feature absent in Dataset B's institutional-focus headlines.",
      "Dataset B shows heightened emphasis on supply chain disruptions as recurring catalysts (e.g., 'faces supply chain disruptions'), whereas Dataset A mentions these less systematically.",
      "Dataset A references niche financial instruments like futures contracts (e.g., 'natural gas futures') and bond offerings, while Dataset B remains focused on equities and ETFs.",
      "Dataset B headlines consistently use full corporate branding (e.g., 'Walgreens Boots Alliance'), contrasting with Dataset A's frequent shorthand references (e.g., 'Charles Schwab')."
    ],
    "qwen2.5-7b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines predominantly focus on analyst actions (upgrades/downgrades) without consistently mentioning immediate stock price percentage changes, whereas Dataset A frequently includes specific percentage changes in stock prices or financial metrics.",
      "Dataset B headlines are more likely to mention the specific financial institutions (e.g., Morgan Stanley, Oppenheimer) issuing analyst actions, while Dataset A references a broader range of sources including journalists, polls, and social media handles.",
      "Dataset A includes explicit mentions of macroeconomic index performance (e.g., Dow, S&P) with percentage movements, while Dataset B primarily references sector-specific ETFs like $SPY without direct index figures.",
      "Dataset A contains headlines with retail investor language (e.g., 'chart looks perfect for a bounce') and trading strategies, whereas Dataset B maintains a formal tone focused on institutional analyst perspectives.",
      "Dataset A features governmental/regulatory actions (e.g., Fed statements, IMF involvement) as standalone news items, while Dataset B rarely highlights these unless tied to company-specific analyst actions.",
      "Dataset A includes headlines about corporate events beyond financial results (e.g., CEO changes, patent disputes), while Dataset B remains narrowly focused on earnings outcomes and analyst ratings.",
      "Dataset B headlines systematically pair tickers with full company names (e.g., '$RLGT - Rigel Pharmaceuticals'), whereas Dataset A often uses tickers without immediate company identification.",
      "Dataset A references diverse financial instruments like futures, options, and debt offerings, while Dataset B focuses exclusively on equities and equity-related ETFs.",
      "Dataset A contains forward-looking statements from multiple perspectives (company guidance, economic forecasts), while Dataset B's outlooks are strictly tied to analyst price targets and rating changes.",
      "Dataset A includes explicit time-bound market updates (e.g., 'after hours,' 'premarket'), while Dataset B generally lacks temporal context for price movements beyond quarterly earnings cycles."
    ],
    "llama3.3-70b_zero-shot_bg_v1": [
      "Headlines in Dataset B consistently begin with the name of the financial institution (e.g., 'Morgan Stanley downgrades...'), while Dataset A headlines vary in structure and often omit institutional references.",
      "Dataset B focuses predominantly on tech companies (e.g., $NVDA, $TSLA, Alphabet), whereas Dataset A covers a broader range of sectors, including healthcare, energy, and retail.",
      "All analyst actions in Dataset B explicitly state the rationale (e.g., 'valuation concerns,' 'production costs'), while Dataset A sometimes omits reasons for upgrades/downgrades.",
      "Dataset B headlines follow a rigid template: [Institution] [action] [ticker] - [reason]. Dataset A uses diverse formats, including questions, quotes, and multi-topic narratives.",
      "Earnings mentions in Dataset B emphasize upcoming report dates or generalized outcomes (e.g., 'mixed results'), whereas Dataset A specifies quantitative metrics like EPS beats/misses.",
      "Dataset B\u2019s quantitative focus is narrowly on price targets (e.g., 'raises to $140'), while Dataset A includes varied metrics such as revenue figures, dividend changes, and macroeconomic data.",
      "Dataset B cites a limited set of institutions (e.g., Morgan Stanley, Barclays), while Dataset A references a wider array of banks, analysts, and media outlets.",
      "Dataset B excludes broader economic/political context (e.g., Fed policy, trade deals) prevalent in Dataset A, which discusses oil prices, geopolitical risks, and consumer trends.",
      "Verbs in Dataset B are restricted to analyst actions (e.g., 'downgrades,' 'raises'), whereas Dataset A uses dynamic terms like 'surges,' 'plunges,' and 'nixes' for diverse events.",
      "Dataset B frequently describes earnings as 'mixed' without granular details, while Dataset A explicitly states beats/misses and revenue performance (e.g., 'EPS misses by $0.33')."
    ],
    "qwen2.5-32b_zero-shot_v1": [
      "Dataset B headlines avoid using company ticker symbols (e.g., $AAPL) entirely, unlike Dataset A which frequently includes them.",
      "Dataset B headlines are more formulaic and standardized in structure (e.g., \"[Entity] Reports [Outcome], Shares [Action]\"), while Dataset A uses varied phrasing and tone.",
      "Dataset B headlines omit references to pre-market/after-hours trading (e.g., \"after hours\" or \"pre\"), while Dataset A explicitly mentions these contexts.",
      "Dataset B headlines generalize sector movements (e.g., \"Renewable Energy Sector Surges\"), whereas Dataset A often specifies individual companies within sectors.",
      "Dataset B avoids informal language, colloquialisms, or rhetorical questions (e.g., \"Cash is trash?\"), which Dataset A occasionally includes.",
      "Dataset B headlines emphasize macroeconomic terms (e.g., \"recession fears,\" \"GDP growth\") more consistently as primary drivers, while Dataset A mixes macro themes with hyper-specific company developments.",
      "Dataset B headlines rarely cite analyst names/firms (e.g., \"SunTrust Robinson Humphrey\"), whereas Dataset A explicitly attributes analyst actions.",
      "Dataset B uses broader market descriptors (e.g., \"Stock Market Dips 3%\") instead of granular index-level data (e.g., \"Dow up 7.59%\") common in Dataset A.",
      "Dataset B focuses heavily on tech giants and renewable energy firms, while Dataset A covers a wider diversity of sectors (e.g., retail, healthcare, commodities).",
      "Dataset B headlines avoid referencing non-earnings corporate events (e.g., mergers, executive changes, legal cases) that Dataset A frequently includes."
    ],
    "qwen2.5-32b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently specify exact dates/times for earnings calls, investor days, or strategic events (e.g., 'July 15, 2023'), while A rarely includes such temporal specificity.",
      "B focuses exclusively on corporate financial metrics (EPS, revenue) and analyst actions, whereas A incorporates broader socioeconomic factors like consumer confidence, geopolitical risks, and retail rent payments.",
      "All B samples use formal, institutional language structured as '[Analyst Firm] [Action] on [Ticker] due to [Rationale]', while A includes casual phrases, slang, and opinionated quotes (e.g., 'Cash is trash!').",
      "B headlines emphasize forward-looking strategic initiatives (partnerships, product launches, expansions), while A more frequently discusses reactive events like mergers, legal disputes, or immediate price movements.",
      "A explicitly quantifies stock price reactions in headlines (e.g., 'shares up 3%') alongside news triggers, whereas B typically separates earnings results from subsequent market reactions.",
      "A references non-corporate entities (IMF, Fed officials, politicians) in 20%+ of samples, while B exclusively cites corporations, financial institutions, and analysts.",
      "Retail investor perspectives (e.g., 'I like this as a sympathy play') appear uniquely in A, while B focuses solely on institutional analyst ratings and price targets.",
      "A includes interrogative headlines assessing investment viability (e.g., 'Is X a Risky Investment?'), whereas B uses declarative statements about analyst actions/results.",
      "B consistently pairs analyst actions with explicit price target figures (e.g., 'raises price target to $350'), while A often mentions rating changes without numerical targets.",
      "A integrates non-English characters/formatting issues (e.g., garbled text) and media citations (@jasonzweigwsj), while B maintains standardized formatting with rare multilingual exceptions."
    ],
    "llama3.3-70b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently use lowercase for stock tickers and company names (e.g., \"$ulta\", \"saexploration\"), while Dataset A uses uppercase tickers and proper capitalization.",
      "Headlines in Dataset B are formulaically structured around analyst actions (e.g., \"[Ticker] - [Action] at [Firm] due to [Reason]\"), whereas Dataset A includes broader narratives, quotes, and non-analyst-driven news.",
      "Dataset B emphasizes specific analyst rating terminology (e.g., \"overweight\", \"underweight\", \"neutral\") in every headline, while Dataset A mentions ratings less consistently and uses broader terms like \"beats/misses\".",
      "All Dataset B headlines explicitly tie updates (e.g., earnings, guidance) to specific analyst firms (e.g., \"says Goldman Sachs\"), whereas Dataset A often omits firm attribution for standalone metrics (e.g., \"misses on revenue\").",
      "Dataset B frequently references specific fiscal quarters (e.g., \"Q2\", \"Q3\") and exact dates for earnings releases, while Dataset A uses more generalized timeframes (e.g., \"Q4 2019\").",
      "Headlines in Dataset B focus narrowly on analyst actions (upgrades/downgrades, price targets) and earnings guidance, while Dataset A covers diverse themes like geopolitical events, CEO changes, and investor advice.",
      "Dataset B avoids narrative elements (e.g., questions, quotes, opinion pieces) present in Dataset A (e.g., \"Is Amicus Therapeutics... Risky Investment?\").",
      "Dataset B headlines often pair rating changes with precise price target adjustments (e.g., \"cut to $25 from $30\"), whereas Dataset A sometimes mentions targets without linking them to rating actions.",
      "Dataset B consistently attributes market reactions (e.g., \"shares slump\") to analyst firms (e.g., \"according to Barclays\"), while Dataset A may state outcomes without explicit attribution.",
      "Dataset B uses repetitive phrasing (e.g., \"reports quarterly earnings, stock remains unchanged\") with minimal variation, whereas Dataset A employs diverse sentence structures and contextual details."
    ],
    "llama3.3-70b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently include the analyst firm's name (e.g., Barclays, Morgan Stanley) as the primary source of action (upgrades/downgrades), while Dataset A sometimes references non-analyst sources (e.g., journalists, polls, or unnamed 'investors').",
      "Dataset B headlines strictly avoid informal language, trader slang, or personal opinions (e.g., 'Risky but chart looks perfect'), which appear frequently in Dataset A samples.",
      "Dataset B focuses narrowly on analyst actions and earnings reports, while Dataset A includes broader macroeconomic/political events (e.g., Lebanon IMF talks, Canada rail strikes, Fed policy statements).",
      "Dataset B headlines universally quantify analyst actions with explicit terms like 'price target cut/raised' or 'upgraded to overweight,' whereas Dataset A sometimes mentions ratings changes without quantification.",
      "Dataset B maintains consistent casing (mostly lowercase except tickers/firm names), while Dataset A uses irregular capitalization (e.g., 'STOCKS SURGE,' all-caps tickers mid-sentence).",
      "Dataset B excludes non-corporate entities/individual commentary (e.g., no equivalents to Dataset A's Jason Zweig quotes or Seeking Alpha articles appear in B samples).",
      "Dataset B headlines systematically append analyst rationale using 'citing...' clauses (e.g., 'citing strong cloud security demand'), while Dataset A often states outcomes without explicit causation.",
      "Dataset B avoids mentioning non-stock financial instruments (e.g., no equivalents to Dataset A's commodity prices, currency impacts, or bond market references appear in B).",
      "Dataset B excludes numerical performance comparisons to benchmarks (e.g., 'new highs outpacing lows' metrics common in Dataset A are absent from B samples).",
      "Dataset B headlines omit timestamp references (e.g., 'after hours,' 'premarket') that frequently appear in Dataset A's price movement descriptions."
    ],
    "qwen2.5-32b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently include explicit mentions of the financial institution or analyst firm issuing the rating/action (e.g., 'Goldman Sachs', 'Oppenheimer', 'Barclays') in every sample, while Dataset A sometimes omits institutional sources.",
      "Dataset B shows a higher frequency of supply chain-related challenges being cited as primary factors in corporate performance (e.g., 'faces supply chain delays', 'prices expected to rise'), whereas Dataset A emphasizes broader macroeconomic factors.",
      "All Dataset B samples use standardized formatting for earnings results ('Beats Analysts' Expectations', 'Misses Expectations'), while Dataset A employs varied colloquial phrasing ('misses by $0.33', 'EPS beats by $0.06').",
      "Dataset B contains multiple instances of non-Latin characters (e.g., Chinese script in '\u4e0b\u8abf\u5a01\u5bcc\u901a\u76ee\u6a19\u80a1\u50f9') in corporate communications, absent in Dataset A's English-only samples.",
      "Dataset B focuses more narrowly on immediate financial metrics and institutional actions, while Dataset A includes ancillary market commentary (e.g., 'Cash is trash? No, cash is king!').",
      "All Dataset B samples maintain consistent capitalization of proper nouns and tickers, whereas Dataset A contains mixed case usage in headlines (e.g., 'medtronic's stock surges' vs. '$DELL $HPE').",
      "Dataset B shows greater emphasis on specific clinical trial phases/medical approvals (e.g., 'Phase III trial results', 'FDA Approval For Innovative Cancer Treatment') compared to Dataset A's general references to biotech developments.",
      "Dataset B exclusively uses complete quarter/year identifiers (Q2 2023, Q4) in earnings references, while Dataset A occasionally uses shorthand ('Q4 2019 Earnings Preview').",
      "All Dataset B samples avoid social media handles (@jasonzweigwsj) and informal trading commentary ('I like this as a sympathy play'), which appear regularly in Dataset A.",
      "Dataset B demonstrates systematic inclusion of both company tickers and full names in headlines (e.g., '$NVDA - Nvidia's Q2 Earnings...'), whereas Dataset A alternates between ticker-only and name-only references."
    ],
    "qwen2.5-7b_zero-shot_bg_v1": [
      "Dataset B headlines maintain a strictly formal tone without colloquial language, quotes, or social media references (e.g., tweets), unlike Dataset A.",
      "Dataset B exclusively references large-cap, widely recognized companies, while Dataset A includes small/mid-cap firms (e.g., $NSPR, $ONVO).",
      "Dataset B avoids mentions of corporate executive changes (e.g., CEO resignations) entirely, which are recurring in Dataset A.",
      "Dataset B omits references to legal disputes, patent cases, or regulatory investigations, which appear in Dataset A (e.g., Align vs. 3Shape).",
      "Dataset B excludes geopolitical or regional economic crises (e.g., Lebanon, Brazil strikes) prevalent in Dataset A.",
      "Dataset B headlines focus heavily on technology sector dynamics, whereas Dataset A emphasizes broader sector diversity (e.g., energy, healthcare, retail).",
      "Dataset B frequently cites specific analyst firms (e.g., Morgan Stanley, Barclays) in rating actions, while Dataset A uses generic terms (e.g., 'analysts say').",
      "Dataset B employs formal analyst rating terminology (e.g., 'Underweight,' 'Overweight') absent in Dataset A\u2019s simpler 'upgrade/downgrade' language.",
      "Dataset B lacks references to consumer/retail sector struggles (e.g., rent defaults, small businesses) common in Dataset A.",
      "Dataset B avoids multilingual content (e.g., Chinese text) present in some of its headlines, a feature entirely absent in Dataset A."
    ],
    "qwen2.5-32b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently specify the analyst firm or institution (e.g., 'Morgan Stanley,' 'Barclays') in analyst actions, while Dataset A often omits this detail.",
      "Dataset A includes headlines with informal trader commentary (e.g., 'Risky but chart looks perfect for a bounce'), whereas Dataset B maintains a formal tone focused on corporate updates.",
      "Dataset B emphasizes **upcoming events** (e.g., 'CES 2024 Conference,' 'Investor Day') as forward-looking catalysts, while Dataset A focuses on immediate post-event reactions (e.g., earnings releases, merger outcomes).",
      "Dataset A references **sovereign or national economic risks** (e.g., Lebanon/IMF, Brazil rail strikes), whereas Dataset B focuses on company-specific operational challenges (e.g., supply chain disruptions).",
      "Dataset B headlines explicitly name **ETFs and indices** (e.g., $SPY, $IWM, $UNG) as subjects, while Dataset A primarily references broad market indices (Dow, Nasdaq) without ETF tickers.",
      "Dataset A includes **consumer-facing retail trends** (e.g., Chinese consumer behavior, small business rent delays), whereas Dataset B centers on institutional financial metrics (earnings guidance, analyst ratings).",
      "Dataset B frequently mentions **sector-specific conferences** (e.g., JPMorgan Healthcare Conference) as narrative drivers, absent in Dataset A.",
      "Dataset A headlines blend **geopolitical developments** (e.g., Mnuchin trade comments, Colombia oil blocks) with market moves, while Dataset B ties macro factors to company performance (e.g., oil prices impacting $UNG).",
      "Dataset B uses **passive voice** for analyst actions (e.g., 'Analysts at Barclays Maintain Hold Rating'), whereas Dataset A uses active voice (e.g., 'SunTrust raises target').",
      "Dataset A incorporates **retail investor perspectives** (e.g., Seeking Alpha articles, hedge fund activity), while Dataset B focuses on institutional analyst sentiment and corporate strategy."
    ],
    "llama3.1-8b_zero-shot_bg_v1": [
      "Dataset B headlines focus more frequently on major tech companies (e.g., Apple, Tesla, Alphabet, Meta) compared to Dataset A, which covers a broader range of sectors like retail, energy, and healthcare.",
      "Dataset B emphasizes analyst actions from specific large financial institutions (e.g., Barclays, Morgan Stanley, Deutsche Bank) in nearly every headline, whereas Dataset A includes a wider variety of sources (e.g., regional banks, independent analysts).",
      "Dataset B headlines more often tie stock price movements directly to earnings misses/beats (e.g., 'Q3 Earnings Miss Estimate'), while Dataset A includes earnings previews and operational updates without always linking to immediate price reactions.",
      "Dataset B includes recurring mentions of cryptocurrency market dynamics and regulatory impacts, a theme absent in Dataset A.",
      "Dataset B headlines frequently reference global recession risks and macroeconomic slowdowns tied to analyst warnings (e.g., 'Morgan Stanley warns of global recession'), whereas Dataset A cites broader economic indicators (e.g., GDP, inflation) without as much emphasis on institutional recession forecasts.",
      "Dataset B uses stronger directional language for analyst actions (e.g., 'slashes,' 'downgrades to Sell') compared to Dataset A\u2019s more neutral phrasing (e.g., 'raised,' 'trimmed').",
      "Dataset B often highlights product-specific developments (e.g., 'new AI-powered smartphone,' 'gaming laptop') as drivers of stock moves, while Dataset A focuses more on mergers, executive changes, or sector-wide trends.",
      "Dataset B headlines more consistently include both the company name and stock ticker (e.g., '$AAPL - Apple Stock...'), whereas Dataset A sometimes omits the company name or uses tickers alone.",
      "Dataset B features repetitive mentions of price target revisions for the same companies (e.g., Apple, Tesla), whereas Dataset A\u2019s analyst actions apply to a wider array of firms.",
      "Dataset B frequently ties stock performance to future guidance (e.g., 'guidance concerns,' 'Q2 guidance reiterated'), while Dataset A focuses on past results or real-time events (e.g., 'EPS misses,' 'rent payments')."
    ],
    "llama3.1-8b_zero-shot_v1": [
      "Dataset A headlines consistently include stock ticker symbols in parentheses (e.g., $NSPR, $UAA), while B uses generic company names without tickers",
      "Dataset B focuses on broad market indices (Dow, S&P, NASDAQ) and global economic trends, while A emphasizes individual company performance and micro-level events",
      "Dataset A contains more forward-looking corporate guidance with specific numerical projections (e.g., '2020 adj. EPS down 3%-4%'), while B's forward-looking statements are general economic forecasts",
      "Dataset A includes secondary market elements like analyst price targets, conference call schedules, and debt offerings absent in B's samples",
      "Dataset B uses more dramatic volatility language ('plummets', 'crashes', 'skyrockets') compared to A's more technical terms ('up 3%', 'misses by $0.33')",
      "Dataset A contains mixed-format content including polls, CEO statements, and product news, while B maintains strict market/economics focus",
      "Dataset B headlines emphasize macroeconomic policy impacts (Fed decisions, GDP, unemployment), while A balances macro with micro corporate developments",
      "Dataset A includes specific legal/regulatory case details (patent disputes, investigations), while B mentions regulatory actions only at headline level",
      "Dataset A shows conversational elements (hashtags, Twitter handles, colloquial phrases), while B maintains formal journalistic tone throughout",
      "Dataset A contains time-specific market updates (premarket moves, intraday swings), while B focuses on end-of-day/consolidated market movements"
    ],
    "llama3.3-70b_few-shot_v1": [
      "Dataset B headlines predominantly focus on major, widely recognized companies (e.g., Amazon, Tesla, Federal Reserve) and indices (Dow, Nasdaq), while A includes niche firms, regional entities, and lesser-known tickers.",
      "Headlines in B are formulaic and repetitive in structure (e.g., '[Entity] [Verb] After [Event]'), whereas A uses varied syntactic constructions including quotes, analyst commentary, and multi-clause explanations.",
      "Dataset A frequently includes stock ticker symbols (e.g., $NSPR, $UAA) and pre/post-market price movements (+4.9% pre), while B rarely uses tickers and focuses on broader market reactions.",
      "B emphasizes binary outcomes (surge/plummet) tied to earnings reports or Fed decisions, while A covers diverse scenarios including mergers, patent disputes, rent payments, and sector-specific operational updates.",
      "Dataset A contains forward-looking guidance references ('2020 adj. EPS down 3%-4%'), whereas B focuses primarily on retrospective event reporting ('Q2 Earnings Miss Expectations').",
      "B headlines use standardized percentage movements (10% drop, 500-point fall) as primary metrics, while A employs heterogeneous numerical data (rent delinquency rates, oil block acquisitions, patent case outcomes).",
      "Dataset A includes conversational elements and social media references (@jasonzweigwsj, 'I like this as a sympathy play'), while B maintains formal, institutional tone throughout.",
      "B headlines feature repetitive temporal framing ('Quarterly Earnings Report', 'Monthly Jobs Report'), while A uses varied time references (real-time updates, historical comparisons, multi-year trends).",
      "Dataset A contains granular financial instruments (call diagonals, debt offerings, ETFs) and technical trading levels ('support/resistance'), whereas B focuses on macro-level market movements.",
      "B emphasizes symmetrical cause-effect relationships between events and market reactions, while A includes complex, multi-factor interactions (e.g., COVID-19 treatment data impacting biotech stocks)."
    ],
    "qwen2.5-32b_few-shot_bg_v1": [
      "Dataset B headlines focus more narrowly on major tech companies (e.g., $AAPL, $TSLA, $META) compared to Dataset A's broader sector coverage including retail, energy, and healthcare",
      "Dataset B shows standardized earnings report phrasing (e.g., 'Beats/Misses Expectations') while Dataset A uses more varied earnings-related terminology",
      "Dataset B contains repeated analyst firm references (Morgan Stanley/Barclays in 80%+ samples) versus Dataset A's wider range of institutional sources",
      "Dataset B headlines emphasize price target adjustments as primary analyst action, while Dataset A includes more diverse actions like upgrades/downgrades/M&A commentary",
      "Dataset B features bilingual text (English + Chinese characters) in multiple samples, reflecting globalized tech coverage absent in Dataset A",
      "Dataset B demonstrates formulaic structure (Company + Analyst Action + Rationale) in 90% of samples versus Dataset A's more varied headline constructions",
      "Dataset B contains explicit percentage figures for price targets/stock reactions in 70% of samples versus Dataset A's more qualitative movement descriptions",
      "Dataset B shows concentrated focus on supply chain issues and advertising revenue trends compared to Dataset A's diverse company-specific event coverage",
      "Dataset B headlines maintain formal capitalization/punctuation standards, while Dataset A includes casual elements (all caps, emojis, Twitter handles)",
      "Dataset B emphasizes quarterly earnings season patterns and sequential comparisons, whereas Dataset A includes more irregular event-driven updates"
    ],
    "qwen2.5-32b_few-shot_v1": [
      "Dataset B headlines are more formulaic in structure, often starting with 'Federal Reserve' or 'Tech Giant' followed by a generalized outcome, whereas A uses more varied sentence structures and contextual details.",
      "Headlines in B frequently omit specific company tickers (e.g., $AAPL) and percentages for price movements, unlike A's explicit inclusion of symbols and granular metrics.",
      "B emphasizes sector-wide macroeconomic narratives (e.g., renewable energy surges, inflation trends) over granular company-specific events like mergers or patent disputes prevalent in A.",
      "Dataset A includes real-time trading updates (e.g., pre-market, after-hours) in headlines, while B focuses on scheduled policy meetings or retrospective earnings outcomes without intraday specificity.",
      "B uses passive voice and institutional framing (e.g., 'announces,' 'reports') more frequently, whereas A incorporates direct quotes, social media references, and investor advice (e.g., 'says @jasonzweigwsj').",
      "Dataset A headlines often pose questions or speculative statements (e.g., 'Is X Trading at a Discount?'), whereas B uses declarative, conclusive language (e.g., 'Analysts Predict a Downturn').",
      "B's headlines generalize corporate entities (e.g., 'Tech Giant,' 'Pharma Company') instead of naming specific firms as seen in A (e.g., 'Medtronic,' 'Align').",
      "Dataset A includes niche financial instruments (e.g., futures, call diagonals) and technical chart references, while B avoids such granular trading terminology.",
      "B disproportionately highlights renewable energy sector trends and government policy impacts, whereas A covers a broader mix of industries without thematic dominance.",
      "Dataset A integrates legal/regulatory outcomes (e.g., 'loses patent case,' 'insolvency risks') directly into headlines, while B treats regulations as abstract market forces."
    ],
    "qwen2.5-32b_zero-shot_bg_v1": [
      "Dataset B headlines frequently incorporate non-English characters (e.g., Chinese) in analyst actions or price target updates, while Dataset A remains exclusively English.",
      "Dataset A includes explicit technical analysis language (e.g., 'resistance,' 'chart looks perfect for a bounce,' 'Call Diagonals'), whereas Dataset B focuses strictly on fundamental metrics and analyst ratings.",
      "Dataset A covers a broader range of non-tech sectors (e.g., energy, healthcare, commodities, retail), while Dataset B is disproportionately focused on technology and semiconductor companies.",
      "Dataset A references granular geopolitical/regional developments (e.g., Lebanon-IMF negotiations, Brazil coffee production, Canada rail strikes) as price catalysts, whereas Dataset B ties performance primarily to macroeconomic indicators and Fed policy.",
      "Dataset A headlines regularly cite quotes/commentary from named individuals (e.g., @jasonzweigwsj, Mnuchin, Ben Bernanke), while Dataset B attributes analysis exclusively to institutions (e.g., 'analysts at Barclays').",
      "Dataset A contains headlines framed as investor guidance questions (e.g., 'Is XYZ a Risky Investment?'), whereas Dataset B uses declarative statements about analyst consensus.",
      "Dataset A explicitly references trading instruments beyond equities (e.g., ETFs like $SCHX, commodities futures, debt offerings), while Dataset B focuses solely on stocks and indices.",
      "Dataset B shows repetitive focus on cyclical earnings calendar updates (e.g., 'Q2 Earnings Season kicks off') across samples, whereas Dataset A includes irregular corporate events like CEO changes, patent disputes, and M&A cancellations.",
      "Dataset A uses inconsistent casing formatting (e.g., alternating uppercase/lowercase in tickers/nouns) and social media tags (@mentions, hashtags), while Dataset B maintains standardized casing outside occasional lowercase sentence starts.",
      "Dataset A specifies exact dates/timeframes (e.g., 'February 20, 2020 at 10:00 A.M. ET'), while Dataset B uses relative periodization (e.g., 'next quarter,' '2023 guidance')."
    ],
    "qwen2.5-7b_zero-shot_v1": [
      "Dataset A headlines frequently include stock ticker symbols (e.g., $AAPL) while B primarily references company names or sectors (e.g., 'Tech Sector').",
      "A emphasizes granular numerical metrics (e.g., 'shares up 3% after hours') whereas B uses broader terms like 'surges' or 'plunges' without precise percentages.",
      "A includes pre-market/post-market price movements (e.g., 'shares up 2% after hours'), while B focuses on intraday or general market trends.",
      "A references specific executive actions (e.g., 'CEO to step down') and legal outcomes (e.g., patent cases), whereas B highlights sector-wide trends (e.g., 'Tech Giants Cut Jobs').",
      "A covers niche sectors (e.g., energy, healthcare) and small/mid-cap companies, while B focuses predominantly on tech giants and macroeconomic themes.",
      "A uses informal language, slang, or quotes (e.g., 'Cash is trash?'), whereas B maintains formal, standardized phrasing (e.g., 'Economic Indicators Show Stable Growth').",
      "A includes granular analyst actions (e.g., 'price target raised to $37') while B generalizes analyst sentiment (e.g., 'exceeds analysts' expectations').",
      "A frequently cites hedge fund activity (e.g., 'Hedge Funds Had Impeccable Timing'), while B omits mentions of institutional investors.",
      "A highlights mergers/acquisitions with specific deal values (e.g., '$26 billion all-stock deal'), whereas B vaguely references 'funding rounds' or 'expansion plans'.",
      "A incorporates real-time updates (e.g., 'Stocks hit lows of the day') and time-bound guidance (e.g., '2020 adj. EPS'), while B discusses stable or forward-looking forecasts without immediate timelines."
    ],
    "llama3.1-8b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently attribute analyst actions (upgrades, downgrades, target changes) to specific financial institutions in every instance, while Dataset A sometimes omits institutional names.",
      "Dataset B structurally positions financial institutions as the grammatical subject of analyst actions (e.g., 'Morgan Stanley Downgrades'), whereas Dataset A typically references institutions in prepositional phrases (e.g., 'at SunTrust').",
      "Dataset B more frequently pairs stock price movements with explicit causal language linking them to analyst actions (e.g., 'tumbles 12% as Morgan Stanley downgrades'), while Dataset A often reports price changes separately from catalysts.",
      "Dataset B shows stronger emphasis on precise future timelines, with specific earnings dates (e.g., 'April 24') and fiscal years (e.g., 'Fiscal 2024') appearing consistently, unlike Dataset A's generalized time references.",
      "Dataset B headlines explicitly use the term 'Analyst(s)' in 93% of institutional action references, compared to Dataset A's more frequent omission of the term when citing firms.",
      "Dataset B incorporates formal conference participation as standalone news triggers (e.g., 'Present at Annual Equity Research Conference'), a narrative structure absent in Dataset A.",
      "Dataset B demonstrates institutional name repetition patterns, with Morgan Stanley appearing in 28% of samples versus Dataset A's more diverse institution distribution.",
      "Dataset B favors the preposition 'Amid' to contextualize market movements (used in 19% of samples), whereas Dataset A prefers 'as' or 'after' for causal relationships.",
      "Dataset B shows tighter integration of quantitative analyst metrics, with 82% of price target revisions including specific numerical values versus Dataset A's 67%.",
      "Dataset B exhibits forward-looking institutional verb choices ('Predicts', 'Forecasts', 'Suggests') in 34% of headlines compared to Dataset A's 22%, reflecting enhanced predictive framing."
    ],
    "qwen2.5-7b_few-shot_v1": [
      "Dataset B headlines focus on broader macroeconomic trends (e.g., global recessions, Fed rate policies) rather than granular company-specific metrics like EPS beats/misses or ticker-level price targets prevalent in Dataset A.",
      "Dataset B emphasizes sector-wide developments (e.g., 'tech stocks plunge') without referencing individual stock tickers, while Dataset A consistently includes tickers (e.g., $AAPL) and micro-level stock movements.",
      "Dataset B headlines use generalized market sentiment (e.g., 'surge on optimistic earnings') rather than specific quantitative descriptors (e.g., 'shares up 3%' or 'misses by $0.01') common in Dataset A.",
      "Dataset B frequently highlights regulatory/political developments (e.g., 'new laws,' 'Fed rate hikes') as standalone market drivers, whereas Dataset A ties such events to immediate company-level impacts (e.g., 'STZ upgraded by UBS').",
      "Dataset B features forward-looking statements (e.g., 'economists predict moderate growth') more prominently, while Dataset A focuses on retrospective data (e.g., 'Q4 2019 Earnings Preview').",
      "Dataset B omits real-time trading updates (e.g., pre-market moves, intraday price swings) that are pervasive in Dataset A (e.g., 'Dow up 7.59% at close').",
      "Dataset B uses formal sentence structures and avoids fragmented phrasing or financial jargon (e.g., 'sympathy play,' 'PT raised'), which are frequent in Dataset A.",
      "Dataset B discusses global supply chain/semiconductor shortages as systemic issues, while Dataset A localizes similar themes to specific companies (e.g., 'Nio\u2019s deliveries decline due to coronavirus').",
      "Dataset B rarely cites analyst actions (e.g., upgrades/downgrades) or firm-specific guidance changes, which are central to Dataset A (e.g., 'GE upgraded from Sell by longtime bear').",
      "Dataset B headlines prioritize large-cap tech giants (e.g., Apple, Tesla) as sector proxies, whereas Dataset A includes diverse small/mid-cap firms (e.g., InspireMD, Organovo)."
    ],
    "llama3.3-70b_few-shot_bg_v1": [
      "Dataset B headlines focus on a narrower set of companies (e.g., $TSLA, $NVDA, $GOOGL) compared to Dataset A's broader coverage across sectors and regions.",
      "Dataset B headlines are formulaically structured around analyst actions (e.g., 'morgan stanley downgrades $tsla') and earnings reports, while Dataset A includes diverse topics like geopolitical events, consumer trends, and company-specific legal/operational updates.",
      "Dataset B headlines frequently repeat identical or near-identical phrases (e.g., 'morgan stanley downgrades $tsla - to underweight, citing production concerns') across multiple entries, unlike Dataset A's unique contextual details per headline.",
      "Dataset B emphasizes recurring mentions of specific investment banks (Barclays, Morgan Stanley, Goldman Sachs) as primary news drivers, whereas Dataset A references a wider variety of institutions and sources.",
      "Dataset B headlines lack forward-looking earnings previews and guidance discussions that are prevalent in Dataset A (e.g., '$UAA - Under Armour Q4 2019 Earnings Preview').",
      "Dataset B omits granular financial metric specifics (e.g., 'misses by $0.33') common in Dataset A, instead using generalized statements like 'in line with expectations.'",
      "Dataset B excludes non-analyst-driven macroeconomic updates (e.g., commodity prices, rent payments, consumer confidence) that frequently appear in Dataset A.",
      "Dataset B headlines avoid social media references (@mentions), hashtags (#investing), and informal trading commentary ('chart looks perfect for a bounce') present in Dataset A.",
      "Dataset B shows minimal coverage of mergers/acquisitions, leadership changes, or product launches compared to Dataset A's frequent inclusion of these events.",
      "Dataset B lacks opinion-driven or advisory language (e.g., 'Risky but...', 'Why You Need To Hedge') that characterizes many Dataset A headlines."
    ],
    "llama3.1-8b_few-shot_v1": [
      "Dataset B headlines omit specific stock ticker symbols (e.g., $NSPR) prevalent in Dataset A",
      "Dataset B emphasizes macroeconomic trends (e.g., GDP revisions, recession fears) rather than granular stock/ETF-level performance metrics",
      "Dataset B uses formal headline structures without conversational elements (e.g., \"I like this as a sympathy play\") found in Dataset A",
      "Dataset B focuses more on government policy implications (e.g., stimulus packages, trade deals) than company-specific legal/regulatory actions",
      "Dataset B contains fewer references to technical trading patterns (e.g., \"chart looks perfect for a bounce\") common in Dataset A",
      "Dataset B features more complete economic indicator analysis (e.g., full CPI reports) rather than fragmentary mentions (\"Building permits rise 5%\")",
      "Dataset B includes structured corporate earnings coverage (\"Beats/Misses Estimates\") without the preview/update focus seen in Dataset A",
      "Dataset B maintains consistent tense/narrative voice compared to Dataset A's mix of real-time updates and retrospective analyses",
      "Dataset B emphasizes sector-wide movements (e.g., tech routs, oil slumps) over Dataset A's micro-level stock reactions (\"SeaSpine reports prelim Q1 revs\")",
      "Dataset B contains more forward-looking policy speculation (e.g., \"Fed Hints at Interest Rate Cuts\") versus Dataset A's immediate event reporting"
    ],
    "llama3.1-8b_few-shot_bg_v1": [
      "Dataset B headlines consistently specify the analyst firm or institution behind actions (e.g., Morgan Stanley, Bernstein), while A often omits institutional sources or references them generically.",
      "Dataset B emphasizes explicit macroeconomic indicators (e.g., CPI, GDP, inflation rates) as standalone topics, whereas A integrates macroeconomic factors indirectly through events like layoffs or consumer behavior.",
      "Dataset B focuses disproportionately on large-cap tech companies (e.g., $AAPL, $NVDA, $MSFT) and semiconductor sectors, while A covers a broader mix of industries (e.g., energy, retail, healthcare).",
      "Dataset B headlines adopt a formulaic structure (e.g., \"[Ticker] - Analyst [action] at [Firm], [rationale]\"), whereas A uses more varied phrasing, including colloquial language and investor commentary.",
      "Dataset B includes precise forward-looking numerical targets (e.g., \"price target raised to $350\"), while A\u2019s forward-looking statements are qualitative or lack specific metrics (e.g., \"guidance raised\").",
      "Dataset B integrates global economic scenarios (e.g., EU\u2019s $1 trillion loss, China slowdown) as primary topics, while A\u2019s international coverage centers on localized events (e.g., Brazil coffee production, Canada rail strikes).",
      "Dataset B frequently provides explicit rationales for analyst actions (e.g., \"citing slowing demand\"), while A often states actions without contextual justification (e.g., \"GE upgraded from Sell by longtime bear\").",
      "Dataset B pairs stock tickers with full company names (e.g., \"$AAPL - Apple\") for clarity, whereas A uses tickers or company names interchangeably without systematic pairing.",
      "Dataset B highlights regulatory/antitrust risks (e.g., FTC investigations, EU competition concerns) as central themes, while A mentions legal developments peripherally (e.g., patent disputes).",
      "Dataset B\u2019s earnings coverage emphasizes quarterly beats/misses with granular metrics (e.g., \"Q2 revenue misses by $0.01\"), while A\u2019s earnings updates are embedded in broader operational narratives (e.g., \"Medtronic\u2019s stock surges after earnings rise\")."
    ],
    "qwen2.5-7b_few-shot_bg_v1": [
      "Dataset A headlines frequently include specific quantitative metrics (e.g., 'EPS misses by $0.33', 'up 3%'), whereas Dataset B focuses on broader earnings outcomes (e.g., 'miss expectations') without granular figures.",
      "Dataset A covers a wider variety of sectors (e.g., agriculture, shipping, retail) and smaller companies, while Dataset B is heavily concentrated on major tech firms (e.g., Tesla, Apple, NVIDIA).",
      "Dataset A includes headlines with questions, polls, and advice (e.g., 'Is XYZ a Risky Investment?'), whereas Dataset B uses standardized, formulaic structures centered on analyst actions.",
      "Dataset A references geopolitical and macroeconomic events (e.g., Lebanon, Brazil) impacting markets, while Dataset B\u2019s non-tech macro references are rare and less specific.",
      "Dataset A features varied formatting elements like hashtags, URLs, and social media mentions (@jasonzweigwsj), while Dataset B maintains clean, uniform headlines without embedded metadata.",
      "Dataset B emphasizes repetitive analyst actions (e.g., 'Barclays raises/cuts price target') across headlines, whereas Dataset A diversifies content with mergers, legal cases, and sector-specific updates.",
      "Dataset A includes explicit dates (e.g., 'Q4 2019 Earnings Preview') and timelines, while Dataset B uses generic time references (e.g., 'Q4 earnings').",
      "Dataset B contains non-English characters (e.g., Chinese text) in some headlines, whereas Dataset A adheres strictly to English-language content.",
      "Dataset A headlines often cite diverse sources (e.g., WSJ, Seeking Alpha), while Dataset B predominantly references institutional analysts (e.g., Morgan Stanley, Barclays).",
      "Dataset A integrates forward-looking statements with specific guidance (e.g., '2020 adj. EPS down 3%-4%'), while Dataset B emphasizes reactive analyst revisions post-earnings."
    ],
    "llama3.3-70b_zero-shot_v1": [
      "Headlines in Dataset B focus more on broad market trends (e.g., 'recession fears', 'economic uncertainty') rather than granular company-specific events like mergers, patent disputes, or executive changes prevalent in Dataset A.",
      "Dataset B headlines lack explicit stock ticker symbols (e.g., $AAPL, $TSLA) that are consistently present in Dataset A samples.",
      "Dataset B uses repetitive phrases like 'Federal Reserve Announces Interest Rate Decision' as central themes, while Dataset A includes diverse institutional actions (e.g., analyst upgrades, SEC filings, court rulings).",
      "Numerical data in Dataset B is less precise (e.g., 'plummet 10%', 'soar 20%') compared to Dataset A\u2019s specific metrics (e.g., 'up 3%', 'price target raised to $37 vs. $35').",
      "Dataset B headlines rarely reference non-tech sectors (e.g., energy, healthcare, retail) that are frequently covered in Dataset A.",
      "Temporal references in Dataset B are vaguer (e.g., 'next month', 'tomorrow') compared to Dataset A\u2019s explicit timelines (e.g., 'Q4 2019', 'premarket').",
      "Dataset A headlines often combine multiple data points (e.g., earnings results + stock movement + analyst action), while Dataset B focuses on single-event impacts (e.g., earnings report or Fed decision alone).",
      "Dataset B lacks informal language, social media references (@mentions, hashtags), and speculative trader commentary (e.g., 'Risky but chart looks perfect') seen in Dataset A.",
      "Dataset A includes citations of specific sources (e.g., 'WSJ', 'SunTrust Robinson Humphrey'), whereas Dataset B uses generic attributions like 'analysts predict' or 'market reacts'.",
      "Dataset B headlines emphasize macroeconomic outcomes (e.g., 'Dow drops 500 points') over granular financial metrics (e.g., 'EPS misses by $0.33', '9M-share offering') that dominate Dataset A."
    ],
    "llama3.1-8b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently include the name of the analyst firm or institution (e.g., 'JPMorgan', 'Barclays', 'Morgan Stanley') in analyst actions, while Dataset A often omits this detail.",
      "Dataset B emphasizes specific future guidance timelines (e.g., '2023 Growth Strategy', 'FY23 Guidance') in forward-looking statements, whereas Dataset A uses broader timeframes like 'raises 2020 guidance' without granular fiscal year specificity.",
      "Dataset B headlines frequently reference precise price target adjustments (e.g., 'cuts price target to $235 from $280'), while Dataset A typically mentions target changes without explicit numerical ranges (e.g., 'target raised to $37').",
      "Dataset B includes explicit mentions of stock reactions to product launches/clinical results (e.g., 'stock jumps 4% on the news'), whereas Dataset A often separates product developments and price movements into distinct headlines.",
      "Dataset B headlines systematically pair tickers with full company names (e.g., 'Teva Pharmaceutical Industries Ltd. ($TEVA)'), while Dataset A rarely combines both in the same headline.",
      "Dataset B uses standardized phrases like 'maintains Neutral rating' or 'downgrades to Underperform' for analyst actions, whereas Dataset A includes informal analyst commentary (e.g., 'Eviscerate Musk's Cybertruck').",
      "Dataset B explicitly links sector performance to individual stock movements (e.g., 'Utilities sector sees renewed interest, with $PXLW among top gainers'), while Dataset A discusses sectors as standalone trends without direct ticker correlations.",
      "Dataset B specifies contractual terms in M&A deals (e.g., 'acquires...for $350M', 'create $130B company'), whereas Dataset A states mergers without financial details (e.g., 'Charles Schwab to buy TD Ameritrade').",
      "Dataset B consistently quantifies competitive threats in downgrades (e.g., 'due to rising competition', 'citing market competition'), while Dataset A attributes underperformance to broader factors like 'headwinds'.",
      "Dataset B includes explicit references to conference call dates/events (e.g., 'Q3 earnings release for October 26, 2022'), whereas Dataset A mentions earnings previews without specific scheduling details."
    ],
    "qwen2.5-7b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently structure analyst actions with the format '[Analyst Firm] [Action] [Company/Stock]' as the primary focus, while Dataset A uses more varied structures.",
      "Dataset B includes non-English characters (e.g., Chinese text) in some headlines, whereas Dataset A does not.",
      "Dataset B explicitly cites price target figures (e.g., 'sets $170 price target') in nearly all analyst-action headlines, while Dataset A mentions price targets less systematically.",
      "Dataset B headlines frequently reference ETFs (e.g., $SPY, $XLE) and sector-specific indices, whereas Dataset A emphasizes broad market indices (e.g., Dow, Nasdaq, S&P).",
      "Dataset B emphasizes neutral/unchanged analyst stances (e.g., 'maintains neutral stance') as a recurring theme, while Dataset A includes fewer explicit mentions of neutrality.",
      "Dataset B headlines pair company names with tickers (e.g., 'Biocryst Pharmaceuticals (BXRX)'), while Dataset A often uses tickers alone without full company names.",
      "Dataset B focuses narrowly on analyst actions (upgrades/downgrades) and earnings guidance as the sole driver of stock movements, whereas Dataset A links price changes to diverse factors (e.g., mergers, macroeconomic trends, executive changes).",
      "Dataset B avoids referencing non-analyst media sources (e.g., WSJ, Seeking Alpha) or quotes from individuals, which are prevalent in Dataset A.",
      "Dataset B excludes coverage of small businesses, consumer sentiment polls, or retail investor advice, which appear in Dataset A.",
      "Dataset B headlines avoid mentions of geopolitical events (e.g., Lebanon/IMF, Canada rail strikes) and legal/regulatory actions, which are present in Dataset A."
    ],
    "llama3.3-70b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently follow a structured format: '[Company] [action] by [Institution] due to [reason]', while A uses varied narrative styles with less uniformity.",
      "All Dataset B samples explicitly state the rationale behind analyst actions (e.g., 'due to supply chain concerns'), whereas A often omits specific reasons for upgrades/downgrades.",
      "Dataset B exclusively focuses on analyst actions (downgrades, upgrades, price target changes) from named institutions, while A includes non-analyst-driven news like macroeconomic trends or executive changes.",
      "Dataset B headlines prioritize full company names before tickers (e.g., 'itron inc $ITRI'), whereas A frequently leads with tickers (e.g., '$NSPR - InspireMD...').",
      "Dataset B avoids supplementary elements like hashtags, URLs, or quoted opinions (e.g., '@jasonzweigwsj'), which are present in A.",
      "All Dataset B samples reference specific financial institutions (e.g., Morgan Stanley, Barclays) as the source of analysis, while A sometimes uses generic terms like 'analysts' or cites media outlets.",
      "Dataset B emphasizes institutional ratings terminology (e.g., 'underweight', 'overweight', 'neutral') in every headline, whereas A occasionally omits these labels.",
      "Forward-looking statements in B are strictly tied to analyst expectations (e.g., 'expects strong growth'), while A includes broader future signals like clinical trials or geopolitical developments.",
      "Dataset B headlines lack explicit mentions of real-time stock price movements (e.g., 'shares up 3%') unless directly tied to the analyst action, unlike A, which frequently highlights price changes as standalone events.",
      "Dataset B avoids colloquial language, opinionated quotes, or rhetorical questions (e.g., 'Cash is trash?'), maintaining a neutral tone, while A includes these elements."
    ],
    "llama3.1-8b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently mention specific analyst firms (e.g., Morgan Stanley, UBS, Oppenheimer) in the context of rating changes or price targets, whereas Dataset A rarely specifies the source of analyst actions.",
      "Dataset B headlines frequently include explicit numerical price targets (e.g., 'cuts Price Target to $12', 'raises target to $235'), while Dataset A headlines often omit precise target figures.",
      "Dataset B headlines emphasize future earnings report dates (e.g., 'Q3 Earnings on October 27th') as central elements, whereas Dataset A headlines mention earnings timelines less prominently.",
      "Dataset B headlines use formal analyst rating terminology (e.g., 'Overweight', 'Underperform', 'Neutral') more systematically than Dataset A, which often uses vague phrases like 'upgraded from Sell by longtime bear'.",
      "Dataset B headlines focus narrowly on immediate financial metrics tied to analyst expectations (e.g., 'revenue miss in 4Q'), while Dataset A includes broader operational updates (e.g., mergers, leadership changes) unrelated to earnings.",
      "Dataset B headlines frequently reference risks tied to macroeconomic factors (e.g., 'global economic uncertainty', 'inflation concerns') as direct drivers of analyst actions, whereas Dataset A treats these topics as standalone market commentary.",
      "Dataset B headlines standardize ticker placement at the start (e.g., '$VZ and $T...') for uniformity, while Dataset A headlines variably position tickers within sentences or omit them entirely in favor of company names.",
      "Dataset B headlines prioritize sequential financial performance (e.g., 'third consecutive quarterly decline', 'Q1 Revenue Beats Estimates') as core narrative drivers, whereas Dataset A integrates non-financial events (e.g., regulatory probes, product launches) more evenly.",
      "Dataset B headlines explicitly tie revenue/earnings outcomes to specific analyst reactions (e.g., '10 analysts lower price target'), while Dataset A often states financial results without linking them to external evaluations.",
      "Dataset B headlines emphasize near-term technical indicators (e.g., 'Price Target Raised to $80 at Oppenheimer - With Technical Momentum Support') more prominently than Dataset A, which leans on qualitative assessments (e.g., 'chart looks perfect for a bounce')."
    ],
    "qwen2.5-7b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently focus on analyst actions (downgrades/upgrades/initiations) as the primary news driver, while Dataset A includes diverse catalysts like earnings releases, macroeconomic events, and company-specific developments.",
      "All Dataset B samples explicitly name the analyst/institution issuing the rating (e.g., Oppenheimer, Citi, Jefferies) in the headline, whereas Dataset A only sometimes references analyst firms.",
      "Dataset B headlines systematically include both previous and revised price targets/ratings (e.g., 'cut to $15 from $20'), while Dataset A more often states absolute figures without historical comparison.",
      "Dataset B shows stronger emphasis on institutional research terminology ('maintains neutral outlook', 'reaffirms guidance') compared to Dataset A's broader market commentary.",
      "100% of Dataset B samples frame news through the lens of professional analyst assessments, while Dataset A includes retail investor perspectives and unsourced opinions.",
      "Dataset B demonstrates tighter focus on equity-specific actions (price targets, ratings changes) compared to Dataset A's inclusion of debt instruments, commodities, and macroeconomic policy.",
      "Forward-looking statements in Dataset B are predominantly analyst-driven ('forecasting strong performance'), whereas Dataset A includes more corporate guidance ('raises outlook').",
      "Dataset B headlines consistently cite specific rationales for analyst actions (e.g., 'supply chain disruptions'), while Dataset A often states outcomes without explicit attribution.",
      "All Dataset B samples maintain formal structure: [Ticker] - [Analyst Action] + [Rationale], contrasting with Dataset A's varied formats including questions and commentary.",
      "Dataset B shows higher concentration on financial services sector implications (upgrades/downgrades), while Dataset A covers broader economic impacts across industries."
    ],
    "llama3.1-8b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines consistently specify the initiating analyst firm or institution as the grammatical subject (e.g., \"Morgan Stanley downgrades\") rather than passive reporting of actions.",
      "All Dataset B samples explicitly state rationales behind analyst actions (e.g., \"due to valuation concerns\") within headlines, unlike A's neutral statements.",
      "B emphasizes forward-looking guidance revisions (e.g., \"raises full-year outlook\") as standalone news points, while A focuses on earnings previews/results without guidance updates.",
      "B systematically ties stock price movements directly to specific analyst actions (e.g., \"shares surge 4.5% after Deutsche Bank upgrade\"), whereas A reports price reactions independently of institutional triggers.",
      "Dataset B contains recurring references to ETFs and sector indices as primary subjects (e.g., \"SPDR S&P 500 ETF Trust\"), while A focuses on individual stocks/macro indices.",
      "B exclusively uses precise future-oriented timeframes for financial guidance (e.g., \"FY2023 revenue guidance\") compared to A's broader temporal references like fiscal quarters.",
      "All B samples frame analyst rating changes (upgrades/downgrades) as headline-driving events, whereas A treats them as secondary updates among diverse news types.",
      "B consistently employs institutional perspective as narrative drivers (e.g., \"says Wells Fargo\"), while A blends institutional views with corporate announcements and macroeconomic trends.",
      "Dataset B headlines structurally pair earnings results with immediate guidance adjustments (e.g., \"beats Q2 estimates, raises outlook\"), unlike A's separated reporting of results/previews.",
      "B universally incorporates price target figures and rating terminology (e.g., \"PT $35\", \"Neutral rating\") as core headline components, whereas A occasionally omits specific targets."
    ],
    "llama3.3-70b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines consistently specify the financial institution (e.g., Barclays, Morgan Stanley) when reporting analyst actions, while Dataset A sometimes omits institutional names (e.g., 'GE upgraded by longtime bear').",
      "Dataset B headlines follow a rigid structure emphasizing analyst actions (e.g., '[Company] [action] by [institution] due to [reason]'), whereas Dataset A uses varied formats (questions, narratives, quotes).",
      "Dataset A includes headlines with multiple stock tickers (e.g., '$DELL $HPE'), while Dataset B focuses exclusively on single-ticker mentions per headline.",
      "Dataset A covers diverse corporate events (e.g., mergers, leadership changes, FDA approvals), while Dataset B centers narrowly on earnings releases and analyst-driven updates.",
      "Dataset A incorporates informal language, social media elements (e.g., hashtags, @mentions), and investor advice, whereas Dataset B maintains a formal, institutionally focused tone.",
      "Dataset A frequently references geopolitical/macroeconomic factors (e.g., Lebanon-IMF negotiations, Brazil strikes), while Dataset B rarely addresses broader economic contexts.",
      "Dataset B explicitly states rationales for analyst actions (e.g., 'due to slowing growth'), whereas Dataset A often reports actions without explanations (e.g., 'target trimmed').",
      "Dataset B spells out full company names before tickers (e.g., 'Molson Coors Beverage Company ($TAP)'), while Dataset A often leads with tickers (e.g., '$NSPR - InspireMD...').",
      "Dataset A includes social media handles (e.g., @jasonzweigwsj) and hashtags (e.g., #investing), which are absent in Dataset B.",
      "Dataset B contextualizes earnings announcements with analyst expectations (e.g., 'meets analyst expectations per Refinitiv'), while Dataset A reports earnings independently (e.g., 'Q4 results announced')."
    ]
  },
  "diffs_real_from_synth": {
    "qwen2.5-7b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines frequently mention corporate leadership changes (e.g., CEO departures) and M&A activity, while Dataset A focuses solely on analyst actions.",
      "Dataset B includes broader macroeconomic indicators (e.g., rent payment trends, oil price swings) unrelated to specific companies, whereas Dataset A emphasizes company-specific financial metrics.",
      "Dataset B incorporates geopolitical developments (e.g., Lebanon/IMF negotiations, OPEC+ decisions) as market drivers, while Dataset A centers on sector/stock-level factors.",
      "Dataset B features quotes or statements from politicians, regulators, or Fed officials, absent in Dataset A\u2019s analyst-centric headlines.",
      "Dataset B contains informal language, social media-style commentary, and trading chart references (e.g., \"Risky but chart looks perfect for a bounce\"), unlike Dataset A\u2019s formal tone.",
      "Dataset B includes non-financial news (e.g., Oxford Word of the Year, patent disputes) that indirectly affect markets, whereas Dataset A strictly reports financial metrics/analyst moves.",
      "Dataset B headlines often summarize raw market index movements (e.g., Dow/Nasdaq percentages) without analyst attribution, while Dataset A ties volatility explicitly to analyst sentiment.",
      "Dataset B references company-specific operational updates (e.g., clinical trial results, product launches) beyond earnings, unlike Dataset A\u2019s focus on revenue/profit guidance.",
      "Dataset B highlights legal/regulatory outcomes (e.g., patent case losses, probes) as primary catalysts, while Dataset A cites these only as secondary factors in analyst decisions.",
      "Dataset B uses earnings previews (e.g., \"Under Armour Q4 2019 Earnings Preview\") as standalone headlines, whereas Dataset A emphasizes post-earnings analyst reactions."
    ],
    "qwen2.5-32b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines frequently include quotes or direct statements from individuals or third-party sources (e.g., \"Cash is trash? No, cash is king!\"), while Dataset A does not.",
      "Dataset B contains more explicit mentions of macroeconomic indicators (e.g., GDP, unemployment, building permits) and geopolitical events (e.g., \"Lebanon Turns to IMF\"), whereas Dataset A focuses on company-specific financial metrics.",
      "Dataset B features headlines structured as questions or advice (e.g., \"Is Amicus Therapeutics... A Risky Investment?\"), while Dataset A headlines are strictly declarative.",
      "Dataset B includes real-time intraday market updates (e.g., \"Dow up 3.90% at open\") more frequently than Dataset A, which emphasizes post-market earnings results.",
      "Dataset B references external entities like media outlets or analysts by name (e.g., \"via @TradingNation\"), whereas Dataset A anonymizes sources (e.g., \"analysts predict further decline\").",
      "Dataset B highlights executive changes and corporate governance (e.g., \"InspireMD chief to step down\"), while Dataset A focuses on operational outcomes like earnings and partnerships.",
      "Dataset B incorporates more international economic developments (e.g., \"Brazil coffee production falls 20%\"), whereas Dataset A\u2019s global references are tied to specific company expansions or supply chains.",
      "Dataset B includes event-driven updates such as conference calls or webcasts (e.g., \"Henry Schein to Webcast...\"), while Dataset A emphasizes forward-looking guidance (e.g., \"fiscal 2024 outlook\").",
      "Dataset A occasionally contains non-English characters (e.g., Chinese text in headlines), whereas Dataset B does not.",
      "Dataset B features headlines with explicit timestamps or temporal qualifiers (e.g., \"after hours,\" \"premarket\"), while Dataset A uses broader timeframes (e.g., \"Q2 earnings\")."
    ],
    "qwen2.5-7b_zero-shot_bg_test-time-info_v1": [
      "Dataset B includes headlines referencing broader market indices and commodities (e.g., Dow, S&P 500, oil prices) rather than focusing solely on individual company stocks.",
      "Headlines in B frequently mention geopolitical, regulatory, or macroeconomic events (e.g., IMF involvement, trade deals, labor strikes) impacting markets, beyond company-specific news.",
      "Dataset B incorporates social media elements, such as hashtags (#investing), mentions (@jasonzweigwsj), and colloquial phrases (e.g., \"Cash is trash?\"), which are absent in A.",
      "B features explicit technical analysis or trading strategy language (e.g., \"chart looks perfect for a bounce,\" \"go short $ES\"), targeting traders rather than general investors.",
      "Headlines in B often include multiple tickers or ETFs in a single entry (e.g., \"$DELL $HPE\"), while A typically focuses on one ticker per headline.",
      "Dataset B contains procedural announcements (e.g., earnings webcasts, stock offerings, filings) rather than only outcomes of financial events.",
      "B includes conversational or opinion-based statements (e.g., quotes from public figures, retail investor advice) rather than strictly factual analyst actions.",
      "Headlines in B reference non-earnings corporate events (e.g., CEO resignations, clinical trial results, product launches) more diversely than A\u2019s focus on financial metrics.",
      "Dataset B highlights broader economic indicators (e.g., building permits, small business rent delinquency) alongside company-specific updates.",
      "B uses informal formatting (e.g., bullet points, emojis, casual language like \"So Halsey needs a shower\") absent in A\u2019s formal, structured headlines."
    ],
    "llama3.3-70b_zero-shot_bg_v1": [
      "Dataset B headlines frequently cover macroeconomic events (e.g., oil price crashes, Fed policies, geopolitical tensions) absent in A, which focuses narrowly on analyst actions and earnings.",
      "Dataset B includes mergers, acquisitions, and corporate restructuring events (e.g., Charles Schwab acquiring TD Ameritrade), while A lacks such structural business updates.",
      "Dataset B references legal/regulatory developments (e.g., patent disputes, FDA approvals, DOJ probes) not seen in A\u2019s analyst-centric headlines.",
      "Dataset B headlines mention ETFs, indices (e.g., XLU, S&P 500), and broad market movements (e.g., Dow/Nasdaq percentages), whereas A focuses solely on individual tickers.",
      "Dataset B incorporates retail investor-centric language (e.g., \"sympathy play,\" \"chart looks perfect for a bounce\") and informal commentary absent in A\u2019s institutional tone.",
      "Dataset B includes international companies/events (e.g., Nokia, Shell in Colombia, Lebanon/IMF) vs. A\u2019s U.S.-centric focus on firms like Tesla and NVIDIA.",
      "Dataset B highlights operational metrics (e.g., cost cuts, layoffs, dividend changes) beyond A\u2019s emphasis on revenue growth and valuation concerns.",
      "Dataset B features forward-looking corporate guidance (e.g., earnings previews, product launch dates) more prominently than A, which reports past results and reactions.",
      "Dataset B diversifies into non-tech sectors (e.g., energy, pharma, consumer goods), while A centers on tech stocks and ad-driven revenue.",
      "Dataset B uses question formats, quotes, and opinion-driven headlines (e.g., \"Is X a Risky Investment?\") absent in A\u2019s factual analyst-action reporting."
    ],
    "qwen2.5-32b_zero-shot_v1": [
      "Dataset B headlines frequently include stock ticker symbols (e.g., \"$NSPR\", \"$UAA\"), whereas A does not.",
      "Dataset B headlines often reference pre-market/after-hours trading updates (e.g., \"shares up 2% after hours\"), while A focuses on regular trading hours.",
      "Dataset B includes direct quotes or colloquial phrases (e.g., \"Cash is trash? No, cash is king!\"), whereas A maintains formal language throughout.",
      "Dataset B headlines incorporate social media handles or user-generated content markers (e.g., \"@jasonzweigwsj\", \"#investing\"), which are absent in A.",
      "Dataset B features technical trading terminology (e.g., \"breakout\", \"resistance\") and chart analysis references, unlike A.",
      "Dataset B highlights specific price target adjustments by named analyst firms (e.g., \"SunTrust Robinson Humphrey\"), while A generalizes analyst actions (e.g., \"UBS Upgrades\").",
      "Dataset B includes legal/case update markers (e.g., \"UPDATE 1-\", \"probe\") for real-time developments, whereas A mentions investigations without procedural details.",
      "Dataset B headlines often reference specific financial instruments (e.g., \"senior notes\", \"futures\") beyond stocks, which A avoids.",
      "Dataset B contains formatting irregularities (e.g., repeated phrases, typos like \"plunges briefly plunges\"), while A maintains polished syntax.",
      "Dataset B integrates investor-facing advisories (e.g., \"Here's Why You Need To Hedge\") and direct trading strategies, whereas A avoids prescriptive guidance."
    ],
    "qwen2.5-32b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines more frequently include macroeconomic or geopolitical impacts affecting multiple companies or industries, while A focuses on individual company-specific events.",
      "B often references broader market indices (e.g., Dow, S&P 500) and ETFs, whereas A emphasizes individual stock tickers and their performance.",
      "B includes opinion-driven statements, quotes from analysts, or investment advice (e.g., \"Cash is trash? No, cash is king!\"), while A maintains a factual tone focused on reporting events.",
      "Dataset B headlines mention non-corporate entities (e.g., countries, central banks, IMF) and their economic challenges, whereas A centers on corporate actions and financial institutions.",
      "B features announcements of future events (e.g., conference calls, earnings previews) without immediate financial results, while A predominantly reports outcomes (e.g., earnings beats/misses).",
      "Dataset B includes retail investor-centric language (e.g., \"sympathy play,\" \"chart looks perfect for a bounce\"), whereas A uses institutional terminology (e.g., \"price target,\" \"equal weight\").",
      "B highlights market-wide movements (e.g., \"Stocks hit lows of the day as oil plunges\"), while A quantifies stock price changes for specific companies.",
      "Dataset B incorporates polls, surveys, or third-party data (e.g., \"Half of small businesses fall behind on April rent\"), whereas A relies on analyst ratings and corporate disclosures.",
      "B includes non-English characters or formatting irregularities (e.g., truncated text, hashtags) less frequently than A, which occasionally mixes languages or symbols.",
      "Dataset B discusses speculative or early-stage developments (e.g., clinical trials, mergers in talks), while A emphasizes finalized corporate actions (e.g., acquisitions, earnings releases)."
    ],
    "llama3.3-70b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines include real-time stock price movements (e.g., 'shares up 3%', 'plunges 9%') directly in the title, while Dataset A focuses on analyst-driven price targets or rating changes.",
      "Dataset B incorporates broader geopolitical/economic events (e.g., Lebanon-IMF talks, China trade delays) unrelated to specific companies, whereas Dataset A centers on corporate actions or analyst firm perspectives.",
      "Dataset B includes editorialized opinions or quotes from non-analyst sources (e.g., \"Cash is trash? No, cash is king!\", Jason Zweig comments), while Dataset A strictly uses institutional analyst language.",
      "Dataset B headlines reference retail-focused platforms like Seeking Alpha or MarketScreener, unlike Dataset A, which exclusively cites institutional firms (e.g., Morgan Stanley, Barclays).",
      "Dataset B features technical trading terminology (e.g., 'sympathy play', 'resistance levels', 'breakout') absent in Dataset A\u2019s fundamentals-driven headlines.",
      "Dataset B includes multi-ticker comparisons (e.g., \"$DELL $HPE - Dell, HPE targets trimmed\"), whereas Dataset A headlines focus on single-ticker updates.",
      "Dataset B covers non-corporate entities (e.g., Fed policy debates, Fitch ratings, government rent statistics), while Dataset A is strictly company/analyst-centric.",
      "Dataset B headlines mention specific dates/times for earnings releases or conference calls (e.g., \"Q3 report on November 15\"), whereas Dataset A emphasizes earnings outcomes or guidance.",
      "Dataset B includes legal/criminal case updates (e.g., Epstein death details, ArcelorMittal probe) beyond regulatory challenges, which Dataset A limits to operational/FDA issues.",
      "Dataset B uses informal formatting (e.g., hashtags like #investing, emojis, fragmented sentences) absent in Dataset A\u2019s standardized, institutional tone."
    ],
    "llama3.3-70b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines include standalone stock price movements (e.g., \"shares up 3%\") without explicitly linking them to analyst actions or reports, unlike Dataset A, where price changes are always tied to analyst-driven catalysts.",
      "Dataset B incorporates quotes, colloquial language, or opinions from non-analyst sources (e.g., journalists, traders, or social media), whereas Dataset A exclusively references institutional analyst firms like Morgan Stanley or Barclays.",
      "Dataset B features macroeconomic, geopolitical, or sector-wide trends (e.g., oil prices, consumer behavior) as primary headlines without connecting them to specific company earnings or analyst ratings, while Dataset A contextualizes such factors through analyst commentary.",
      "Dataset B includes retail investor-focused language (e.g., \"sympathy play,\" \"chart looks perfect for a bounce\") and trading-centric terms (e.g., \"resistance,\" \"breakout\"), absent in Dataset A\u2019s institutionally oriented headlines.",
      "Dataset B headlines frequently reference pre-market/after-hours trading activity (e.g., \"shares up 2% after hours\"), whereas Dataset A omits temporal trading context in favor of event-driven explanations.",
      "Dataset B uses hashtags (e.g., #investing) and social media-style formatting, suggesting a platform like Twitter or forums, while Dataset A maintains a formal, standardized structure aligned with traditional financial news.",
      "Dataset B includes corporate events (e.g., mergers, CEO changes, legal rulings) as standalone news items, whereas Dataset A ties such events to analyst reactions (e.g., price target changes, upgrades).",
      "Dataset B headlines pose questions or speculative analyses (e.g., \"Is X Trading At A 32% Discount?\"), which are absent in Dataset A\u2019s declarative, analyst-validated statements.",
      "Dataset B covers a broader range of financial instruments (e.g., ETFs, futures, debt offerings) beyond individual stocks, while Dataset A focuses narrowly on equities and analyst actions.",
      "Dataset B integrates technical analysis or trader-focused metrics (e.g., \"new 52-week highs/lows\"), contrasting with Dataset A\u2019s emphasis on fundamental analysis (e.g., earnings, revenue beats)."
    ],
    "qwen2.5-32b_few-shot_bg_test-time-info_v1": [
      "Dataset A headlines consistently include both company names and ticker symbols (e.g., '$NVS - Novartis'), while B often uses tickers alone or omits explicit names.",
      "Dataset B includes headlines with broader macroeconomic or geopolitical commentary (e.g., 'Lebanon Turns to IMF') not tied to specific corporate actions, unlike A's company-centric focus.",
      "Dataset B contains more social media references, hashtags (#investing), or direct quotes (e.g., 'Cash is trash? No, cash is king!'), while A maintains formal analyst language.",
      "Dataset A headlines emphasize forward-looking corporate strategies (e.g., 'expands production', 'new clinical trial'), whereas B prioritizes immediate market reactions (e.g., 'shares up 3%').",
      "Dataset B includes procedural updates (e.g., 'to Webcast Conference Call', 'files for senior notes offering') absent in A, which focuses on substantive financial events.",
      "Dataset A uses non-English characters in some headlines (e.g., Chinese text), while B samples are exclusively in English.",
      "Dataset B features more ETF/index-specific commentary (e.g., 'SPDR S&P 500 ETF Trust') compared to A's emphasis on individual equities.",
      "Dataset A consistently references analyst rating changes (e.g., 'downgrades to Sell'), while B includes neutral event reports without opinion (e.g., 'Align loses patent case').",
      "Dataset B contains headlines structured as questions or investment advice (e.g., 'Is...Trading At A 32% Discount?'), unlike A's declarative statements.",
      "Dataset A maintains consistent formatting for earnings mentions (e.g., 'Q4 Earnings Beat'), while B mixes fiscal year formats (e.g., 'Q3 2020', 'Fiscal 2020 Second Quarter')."
    ],
    "qwen2.5-7b_zero-shot_bg_v1": [
      "Dataset B includes headlines referencing niche or small-cap companies (e.g., $NSPR, $ONVO) more frequently than Dataset A, which focuses on large-cap firms like $TSLA or $AAPL.",
      "Headlines in Dataset B often include colloquial phrases or quotes (e.g., \"Cash is trash? No, cash is king!\") absent in Dataset A's formal tone.",
      "Dataset B contains explicit mentions of mergers, acquisitions, or legal outcomes (e.g., \"Align loses patent case\") as standalone topics, whereas Dataset A typically ties these events to analyst actions or market trends.",
      "Dataset B headlines frequently reference retail investor-oriented platforms (e.g., Seeking Alpha) or trading strategies (e.g., \"sympathy play\"), unlike Dataset A's institutional focus.",
      "Dataset B includes breaking news-style updates (e.g., \"shares up 2% after hours\") with immediate price reactions, while Dataset A emphasizes broader trends or analyst interpretations.",
      "Dataset A incorporates multilingual headlines (e.g., Chinese characters), whereas Dataset B is exclusively in English.",
      "Dataset B uses hashtags (e.g., #investing) and social media-style annotations, which are absent in Dataset A.",
      "Dataset B often quantifies stock movements or price targets for lesser-known companies (e.g., \"Avnet stock price target raised to $37\"), while Dataset A focuses on widely tracked stocks.",
      "Dataset A consistently ties sector-specific developments (e.g., semiconductors, cloud services) to earnings or analyst ratings, whereas Dataset B mentions sectors without explicit financial metric linkages.",
      "Dataset B features headlines with fragmented or incomplete sentences (e.g., \"$PACD taking some...\") reflecting real-time updates, unlike Dataset A's polished, full-sentence structure."
    ],
    "qwen2.5-32b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines more frequently reference broad market indices (e.g., Dow, S&P) and sector-wide movements, while A focuses on individual company metrics.",
      "B includes direct quotes or opinions from analysts/public figures (e.g., \"Cash is trash? No, cash is king!\"), whereas A avoids editorialized language.",
      "B contains headlines with rhetorical questions or speculative prompts (e.g., \"Is Zhou Hei Ya... Trading At A 32% Discount?\"), while A uses declarative statements.",
      "Dataset B features explicit mentions of dates/times for earnings calls, conferences, or regulatory deadlines (e.g., \"Henry Schein to Webcast... on February 20\"), whereas A emphasizes past results.",
      "B includes hashtags and social media-style annotations (e.g., \"#investing #markets\"), absent in A.",
      "B highlights mergers/acquisitions (e.g., \"Charles Schwab to buy TD Ameritrade\") as standalone events, while A emphasizes partnerships or operational expansions.",
      "B references geopolitical or macroeconomic crises (e.g., \"Lebanon Turns to IMF\") as primary drivers, whereas A contextualizes macro factors within company performance.",
      "B features headlines about retail investor behavior (e.g., ETFs, futures) and market-wide sentiment, while A focuses on institutional analyst actions.",
      "B includes granular updates on dividends, share offerings, or debt instruments (e.g., \"AT&T to raise quarterly dividend\"), whereas A prioritizes revenue/EPS metrics.",
      "B covers legal/regulatory outcomes as standalone events (e.g., \"Align loses patent case\"), while A ties regulatory developments to company-specific downgrades or risks."
    ],
    "llama3.1-8b_zero-shot_bg_v1": [
      "Dataset B includes headlines about smaller or less mainstream companies and niche financial instruments (e.g., $NSPR, $ONVO, $SCANX), while A focuses on major tech firms and blue-chip stocks.",
      "Headlines in B frequently reference specific corporate governance changes (e.g., CEO resignations, mergers, patent disputes), whereas A emphasizes analyst actions (upgrades/downgrades) and earnings outcomes.",
      "Dataset B contains headlines formatted as questions, advice columns, or quotes (e.g., \"Cash is trash? No, cash is king!\"), while A uses declarative, event-driven statements.",
      "B includes country-specific economic crises or geopolitical actions (e.g., Lebanon/IMF, Germany-China investments) absent in A\u2019s global macro themes.",
      "Dataset B features retail investor-focused content (e.g., Seeking Alpha, hedge fund activity) rather than A\u2019s institutional analyst-centric narratives.",
      "B highlights precise numerical data beyond percentage changes (e.g., \"$26 billion all-stock deal\", \"$9.5 billion cost increase\"), while A focuses on stock price volatility metrics.",
      "Headlines in B mention regional or lesser-known financial institutions (e.g., SunTrust Robinson Humphrey, CFRA), whereas A consistently cites major global banks (Morgan Stanley, Barclays).",
      "Dataset B includes forward-looking earnings previews (e.g., \"Q4 2019 Earnings Preview\"), while A primarily reports post-event results (e.g., \"Q3 Earnings Miss Estimate\").",
      "B references legal/regulatory actions (e.g., patent lawsuits, Italian police probes) more granularly than A\u2019s broader mentions of regulatory scrutiny.",
      "Dataset B diversifies into non-tech sectors (e.g., commodities, healthcare, energy), while A is heavily concentrated on tech and consumer discretionary industries."
    ],
    "llama3.1-8b_zero-shot_v1": [
      "Dataset B headlines consistently include stock ticker symbols with prefixes (e.g., '$NSPR', '$UAA'), while Dataset A headlines never use ticker symbols and refer to companies by name.",
      "Dataset B contains granular mentions of pre-market/after-hours price movements (e.g., 'shares up 2% after hours'), whereas Dataset A focuses exclusively on regular trading session activity.",
      "Dataset B includes explicit references to specific financial instruments like ETFs (e.g., '$XLU - XLU Weekly') and futures contracts, which are absent in Dataset A's macroeconomic focus.",
      "Dataset B features headlines structured as direct analyst/investor advice columns (e.g., 'Here's Why You Need To Hedge...'), while Dataset A maintains a neutral journalistic tone without advisory language.",
      "Dataset B incorporates technical trading terminology (e.g., 'chart looks perfect for a bounce', 'resistance levels'), whereas Dataset A uses only fundamental analysis language.",
      "Dataset B contains conference call/webcast scheduling details (e.g., 'Henry Schein to Webcast...') absent in Dataset A's event-result-focused coverage.",
      "Dataset B includes granular pharmaceutical trial updates (e.g., 'positive early-stage KSI-301 data') versus Dataset A's broader sector performance mentions.",
      "Dataset B shows explicit dividend adjustments (e.g., 'AT&T to raise quarterly dividend by 2%') while Dataset A only references dividends implicitly through market reactions.",
      "Dataset B features corporate debt offerings and capital structure changes (e.g., 'Chefs' Warehouse floats new debt') absent in Dataset A's equity-focused reporting.",
      "Dataset B includes hyper-specific litigation updates (e.g., 'UPDATE 1-Italian police search ArcelorMittal offices') whereas Dataset A mentions legal developments only at summary level."
    ],
    "llama3.3-70b_few-shot_v1": [
      "Dataset B headlines frequently include stock tickers prefixed with '$' (e.g., $NSPR, $UAA) while Dataset A uses full company names",
      "B contains headlines framed as questions/analyses (e.g., 'Is X Trading At Y Discount?') absent in A's declarative style",
      "B includes corporate governance updates (e.g., CEO changes, mergers) not prominently featured in A's earnings/macro focus",
      "B references niche financial instruments (e.g., ETFs, futures contracts) beyond A's emphasis on major indices/stocks",
      "B incorporates direct quotes/commentary from analysts/institutions (e.g., '@jasonzweigwsj says...') unlike A's neutral reporting",
      "B includes forward-looking previews (e.g., 'Q4 2019 Earnings Preview') vs. A's focus on concluded events/results",
      "B features granular corporate actions (e.g., dividend changes, debt offerings) absent in A's macro/market-wide emphasis",
      "B uses informal trader jargon (e.g., 'sympathy play,' 'buying dips') contrasting with A's formal financial terminology",
      "B includes legal/regulatory updates (e.g., patent disputes, FDA approvals) not central to A's market reaction narratives",
      "B shows explicit price target adjustments (e.g., 'raised to $37 vs. $35') while A emphasizes percentage movements"
    ],
    "qwen2.5-32b_few-shot_bg_v1": [
      "Dataset B headlines more frequently mention smaller or less prominent companies (e.g., $NSPR, $ONVO, $ALGN) compared to Dataset A's focus on mega-cap tech firms like $AAPL and $TSLA.",
      "Dataset B includes headlines about corporate leadership changes (e.g., CEO departures) and M&A announcements (e.g., Charles Schwab acquiring TD Ameritrade), which are rare in Dataset A.",
      "Dataset B contains headlines with explicit trading strategy suggestions (e.g., \"sympathy play off BORR move\") and technical chart analysis, absent in Dataset A's analyst-driven narratives.",
      "Dataset B references niche financial instruments like futures contracts (natural gas futures), options (call diagonals), and ETFs ($XLU) not emphasized in Dataset A.",
      "Dataset B headlines often include real-time intraday price action updates (e.g., \"Dow up 7.59% at close\"), while Dataset A focuses on post-event analysis of earnings/results.",
      "Dataset B features more headlines about geopolitical risks (Lebanon/IMF, Brazil rate cuts) and localized economic impacts (Canadian rail strikes) compared to Dataset A's macroeconomic generalizations.",
      "Dataset B includes casual investor commentary (e.g., \"Cash is trash? No, cash is king!\") and social media-style language ($AIKI \"my position remains intact\"), unlike Dataset A's formal institutional tone.",
      "Dataset B covers non-earnings corporate milestones (FDA drug approvals, clinical trial results) as primary drivers, whereas Dataset A ties events directly to financial metrics/analyst reactions.",
      "Dataset B headlines frequently reference pre-market/post-market trading activity (e.g., \"+5.7% pre\"), while Dataset A primarily discusses regular session price movements.",
      "Dataset B includes headlines about stock offerings (e.g., Roku share offering) and debt financing (Chefs' Warehouse debt), which are absent in Dataset A's earnings/guidance focus."
    ],
    "qwen2.5-32b_few-shot_v1": [
      "Dataset B headlines frequently include stock ticker symbols (e.g., $NSPR, $UAA) in a majority of entries, whereas Dataset A headlines never use tickers.",
      "Dataset B incorporates informal language, colloquial phrases, and direct quotes (e.g., \"Cash is trash? No, cash is king!\"), while Dataset A maintains a consistently formal tone.",
      "Dataset B features fragmented updates, bullet-point formats (e.g., \"STOCKS AT THE OPEN:\"), and technical trading jargon absent in Dataset A\u2019s prose-only structure.",
      "Dataset B includes headlines framed as questions (e.g., \"Is Zhou Hei Ya... Trading At A 32% Discount?\"), a format not observed in Dataset A.",
      "Dataset B emphasizes granular corporate actions (e.g., CEO resignations, patent disputes, debt offerings) rather than Dataset A\u2019s focus on sector-wide or macroeconomic impacts.",
      "Dataset B explicitly cites specific financial institutions or analysts (e.g., \"SunTrust Robinson Humphrey\"), whereas Dataset A refers to analysts generically.",
      "Dataset B highlights international entities (e.g., Lebanon, Brazil, German firms) and localized events more frequently than Dataset A\u2019s U.S.-centric narratives.",
      "Dataset B references niche financial instruments (e.g., futures, options, debt) and trading strategies (e.g., \"call diagonals\"), while Dataset A focuses on broader metrics like EPS or revenue.",
      "Dataset B includes real-time trading annotations (e.g., \"after hours,\" \"pre-market\") and explicit price targets (e.g., \"$180 PT\") more granularly than Dataset A.",
      "Dataset B integrates social media-style elements (e.g., hashtags, @mentions) and speculative investor commentary absent in Dataset A\u2019s objective reporting."
    ],
    "qwen2.5-32b_zero-shot_bg_v1": [
      "Dataset B headlines include smaller or less frequently covered companies (e.g., $NSPR, $ONVO) absent in A, which focuses on major firms like $AAPL, $TSLA.",
      "Dataset B features non-earnings corporate events (e.g., executive changes, patent disputes, mergers like Charles Schwab/TD Ameritrade) as primary catalysts, while A emphasizes earnings reports and analyst actions.",
      "Dataset B incorporates casual/opinion-based language (e.g., \"Cash is trash? No, cash is king!\") and social media-style annotations (e.g., hashtags, @mentions), unlike A's formal tone.",
      "Dataset B references niche financial instruments (e.g., ETFs, futures, options) and retail investor terminology (e.g., \"sympathy play\"), whereas A focuses on equities and institutional metrics.",
      "Dataset B includes geopolitical/regulatory developments (e.g., Lebanon/IMF, Brazil rail strikes) as standalone drivers, while A ties such factors directly to stock performance.",
      "Dataset B headlines use dynamic formatting (e.g., bullet points, percentage move annotations like \"+5.7% pre\"), unlike A\u2019s standardized earnings/analyst action templates.",
      "Dataset B covers granular macroeconomic data (e.g., building permits, unemployment rates) unrelated to earnings, whereas A contextualizes macro trends through sector performance.",
      "Dataset B highlights speculative or retail-focused scenarios (e.g., \"sympathy plays,\" short-selling strategies), absent in A\u2019s institutional analyst-driven narratives.",
      "Dataset B integrates questions (e.g., \"Is X a Risky Investment?\") and consumer-centric themes (e.g., retail rent defaults), while A prioritizes declarative statements about corporate metrics.",
      "Dataset B includes international small/mid-cap firms (e.g., HKG:1458, Oslo-listed BORR) and localized economic impacts, whereas A focuses on U.S.-centric large caps and indices."
    ],
    "qwen2.5-7b_zero-shot_v1": [
      "Dataset B headlines frequently include stock tickers prefixed with '$' (e.g., $NSPR, $UAA), whereas Dataset A primarily uses company names (e.g., Apple, Tesla).",
      "Dataset B often features fragmented, abbreviated updates resembling social media posts or trading alerts (e.g., \"$SCANX: Stocks/ETFs...\"), while Dataset A uses complete, formal news sentences (e.g., \"Tech Sector Slumps...\").",
      "Dataset B includes explicit mentions of pre-market/after-hours trading activity (e.g., \"shares up 2% after hours\"), whereas Dataset A headlines focus on general intraday or broader market movements.",
      "Dataset B incorporates direct quotes, hashtags, or social media handles (e.g., \"@jasonzweigwsj\", \"#investing\"), while Dataset A avoids such annotations.",
      "Dataset B frequently cites specific analyst firms and price targets (e.g., \"SunTrust Robinson Humphrey\"), whereas Dataset A refers to analysts generically (e.g., \"analysts' expectations\").",
      "Dataset B headlines emphasize real-time trading metrics (e.g., \"Dow up 3.90%\") with granular numerical precision, while Dataset A uses broader descriptive terms (e.g., \"surges\", \"plunges\").",
      "Dataset B includes announcements for financial events like conference calls, debt offerings, or regulatory filings (e.g., \"Henry Schein to Webcast...\"), whereas Dataset A focuses on outcomes (e.g., \"earnings miss\").",
      "Dataset B features user-generated or opinionated language (e.g., \"I like this as a sympathy play...\"), while Dataset A maintains an objective, journalistic tone.",
      "Dataset B highlights niche financial instruments (e.g., futures, options, senior notes) and legal proceedings (e.g., patent cases), whereas Dataset A emphasizes macroeconomic or sector-wide trends.",
      "Dataset B often references exact dates, deadlines, or event timelines (e.g., \"Q4 2019 Earnings Preview\"), while Dataset A uses vague temporal references (e.g., \"coming months\")."
    ],
    "llama3.1-8b_zero-shot_bg_train-time-info_v1": [
      "Dataset B headlines more frequently reference broader market indices (e.g., Dow, Nasdaq, S&P) and macroeconomic trends without tying them to specific ticker movements, whereas A contextualizes indices within company-specific events.",
      "Dataset B includes non-corporate economic data (e.g., rent payment statistics, unemployment rates, GDP forecasts) absent in A, which focuses strictly on company/financial institutional actions.",
      "Dataset B headlines use question formats (e.g., \"Is X a Risky Investment?\") and rhetorical prompts uncommon in A\u2019s declarative, analyst-driven tone.",
      "Dataset B incorporates geopolitical developments (e.g., Lebanon\u2019s IMF talks, Brazil\u2019s rate cuts) as market drivers, while A\u2019s events are confined to corporate/financial institutional spheres.",
      "Dataset B features explicit mentions of ETFs (e.g., $XLU, $GLD) and fund-level activities, whereas A emphasizes individual equities and institutional analyst actions.",
      "Dataset B includes non-equity financial instruments (e.g., futures, bonds, debt offerings) and commodity markets (oil, gold), while A centers on stock-specific narratives.",
      "Dataset B headlines often summarize sector-wide performance (e.g., \"Energy Down After Coronavirus...\") without naming specific tickers, unlike A\u2019s ticker-centric granularity.",
      "Dataset B references retail investor behavior (e.g., hedge fund holdings, IPO participation) and public sentiment, whereas A focuses on institutional analyst perspectives.",
      "Dataset B integrates regulatory/political quotes (e.g., Fed officials, trade deal statements) as primary catalysts, while A ties catalysts to explicit analyst/firm actions.",
      "Dataset B uses colloquial language (e.g., \"Cash is trash?\") and non-technical commentary, whereas A maintains formal, metrics-driven phrasing tied to earnings/price targets."
    ],
    "qwen2.5-7b_few-shot_v1": [
      "Headlines in B consistently include stock ticker symbols (e.g., $NSPR, $UAA), while A omits them entirely.",
      "B frequently cites explicit price targets, analyst firm names, and numerical adjustments (e.g., 'price target raised to $37 vs. $35'), whereas A generalizes analyst actions without granular specifics.",
      "B features real-time/pre-market/after-hours stock reactions (e.g., 'shares up 3% after hours'), while A describes price movements without specifying trading session timing.",
      "B includes headlines structured as trading alerts or technical analysis (e.g., 'chart looks perfect for a bounce'), absent in A\u2019s formal news tone.",
      "B references niche financial instruments like ETFs, futures (e.g., 'natural gas futures'), and debt offerings explicitly, which A rarely mentions.",
      "B incorporates direct quotes, hashtags, and social media handles (e.g., '@jasonzweigwsj', #investing), while A maintains a neutral, editorial tone.",
      "B highlights micro-level corporate events (e.g., executive changes, shareholder votes) with immediate stock impacts, whereas A focuses on macro-sector implications.",
      "B uses conversational phrases and rhetorical questions (e.g., 'Is...Trading At A 32% Discount?'), contrasting with A\u2019s declarative statements.",
      "B includes granular financial metrics (e.g., '9M-share offering', 'Q3 adj. currency-neutral EPS down 3%-4%'), while A emphasizes broader trends like 'economic growth'.",
      "B contains frequent typos, formatting inconsistencies (e.g., 'plunges briefly plunges'), and unedited text, unlike A\u2019s polished, standardized headlines."
    ],
    "llama3.3-70b_few-shot_bg_v1": [
      "Dataset B headlines include non-analyst corporate events (e.g., mergers, leadership changes, product launches) absent in A",
      "Dataset B references dividends, stock offerings, and capital structure changes (e.g., \"$NVDA declares dividend\"), while A focuses strictly on price targets/earnings",
      "Dataset B incorporates retail investor slang (e.g., \"sympathy play\") and social media conventions (hashtags, @mentions) not seen in A",
      "Dataset B contains geopolitical risk disclosures (e.g., Lebanon/IMF, Brazil strikes) beyond A's standard macroeconomic mentions",
      "Dataset B includes explicit legal/regulatory developments (patent disputes, bankruptcy filings, probes) not emphasized in A",
      "Dataset B shows international company focus (European/Asian firms) vs. A's US-centric large caps (TSLA, GOOGL)",
      "Dataset B features consumer economy narratives (retail trends, small business data) absent from A's institutional analyst perspective",
      "Dataset B contains forward-looking qualitative market commentary (e.g., sector rotation theories) vs. A's backward-looking earnings metrics",
      "Dataset B uses unorthodox formatting (emojis, partial URLs, broken text) suggesting social media sourcing vs. A's clean wire-service style",
      "Dataset B includes non-tech sector coverage (energy, consumer goods, materials) while A concentrates on tech/EV/semiconductors"
    ],
    "llama3.1-8b_few-shot_v1": [
      "Dataset B headlines frequently include specific stock ticker symbols (e.g., $NSPR, $UAA), while Dataset A uses full company names or general references.",
      "Headlines in Dataset B often focus on granular corporate actions (e.g., CEO resignations, patent disputes, conference call schedules) rather than broader macroeconomic narratives prevalent in Dataset A.",
      "Dataset B includes explicit pre-market/after-hours stock price movements and intraday trading updates (e.g., \"shares up 3% after hours\"), whereas Dataset A emphasizes end-of-day market performance.",
      "Headlines in Dataset B frequently reference technical trading metrics (e.g., \"52-week highs/lows,\" price targets, options strategies) absent in Dataset A's more general market summaries.",
      "Dataset B incorporates direct analyst quotes, social media handles (e.g., @jasonzweigwsj), and conversational language, while Dataset A maintains formal news reporting tones.",
      "Earnings previews and post-earnings stock reactions (e.g., \"beats/misses by $X\") are granularly quantified in Dataset B, whereas Dataset A highlights earnings trends at a sector level.",
      "Dataset B includes niche corporate updates (e.g., debt offerings, shareholder revolts, small-cap stock moves) that are less common in Dataset A\u2019s focus on large-cap companies.",
      "Legal/regulatory headlines in Dataset B emphasize hyper-specific cases (e.g., \"Align loses patent case\"), whereas Dataset A covers broader regulatory themes (e.g., FDA approvals).",
      "Dataset B features real-time trading advice (e.g., \"Buying dips,\" \"short $ES below 3107.50\") tailored to active investors, unlike Dataset A\u2019s descriptive market analysis.",
      "Headlines in Dataset B often cite hedge fund activity, insider transactions, and institutional investor moves, while Dataset A focuses on retail investor-facing macroeconomic trends."
    ],
    "llama3.1-8b_few-shot_bg_v1": [
      "Dataset B headlines frequently include immediate stock price movements (e.g., percentage changes) as standalone events, while Dataset A ties stock price mentions to analyst actions or earnings results.",
      "Dataset B contains headlines formatted as questions (e.g., 'Is...?') or direct quotes, which are absent in Dataset A.",
      "Dataset B features granular updates on niche corporate events (e.g., CEO changes, small-scale mergers, patent disputes), whereas Dataset A focuses on broader analyst actions and macroeconomic trends.",
      "Dataset B includes headlines about stock technical metrics (e.g., 52-week highs/lows, resistance levels), while Dataset A emphasizes fundamental financial metrics (e.g., revenue, EPS).",
      "Dataset B often references specific dates or time-bound corporate events (e.g., earnings previews, conference calls), while Dataset A headlines are less time-specific and more thematic.",
      "Dataset B covers smaller or less mainstream companies (e.g., micro-caps, niche ETFs) alongside large firms, whereas Dataset A focuses predominantly on major blue-chip companies.",
      "Dataset B headlines incorporate informal language, hashtags (e.g., #investing), or social media-style mentions, while Dataset A uses formal, structured sentences.",
      "Dataset B includes announcements of stock offerings, debt issuances, or corporate actions (e.g., layoffs, webcasts), which are rare or absent in Dataset A.",
      "Dataset B highlights legal/regulatory outcomes (e.g., patent losses, court verdicts) at a granular case level, whereas Dataset A discusses broader regulatory pressures or investigations.",
      "Dataset B reports intraday market index movements (e.g., 'Dow up 3.90%') as primary events, while Dataset A contextualizes index trends within macroeconomic narratives."
    ],
    "qwen2.5-7b_few-shot_bg_v1": [
      "Dataset B headlines more frequently mention smaller or less prominent companies and ETFs (e.g., $NSPR, $ONVO, $SCANX), while A focuses on major tech and blue-chip firms (e.g., $AAPL, $TSLA, $MSFT).",
      "B includes casual, opinionated phrases and quotes (e.g., \"Cash is trash? No, cash is king!\"), whereas A maintains a formal, analytical tone focused on financial metrics and analyst actions.",
      "B headlines often reference real-time market index movements (e.g., \"Dow up 3.90%\") without contextual analysis, while A ties market movements to specific events like earnings or macroeconomic factors.",
      "Dataset B contains non-English characters, symbols, or formatting errors (e.g., \"plunges briefly plunges\"), suggesting less editorial rigor compared to A's polished headlines.",
      "B features retail investor-focused content (e.g., hedging advice, technical chart analysis like \"chart looks perfect for a bounce\"), whereas A targets institutional or professional investors with earnings guidance and analyst ratings.",
      "B headlines include social media elements like hashtags (#investing), mentions (@jasonzweigwsj), and URLs, indicating sourcing from platforms like Twitter or forums; A lacks these features.",
      "Dataset B covers niche corporate events (e.g., patent disputes, CEO departures, small mergers) more frequently than A, which emphasizes earnings, revenue, and large-scale acquisitions.",
      "B incorporates geopolitical and regional economic updates (e.g., Lebanon\u2019s IMF talks, Brazil\u2019s coffee production) more diversely, while A\u2019s macroeconomic mentions are US-centric (e.g., Fed rates, S&P 500).",
      "B includes questions and speculative headlines (e.g., \"Is XYZ a Risky Investment?\"), whereas A uses declarative statements focused on factual updates (e.g., \"Apple beats Q1 earnings\").",
      "Dataset B features more press release-style updates (e.g., conference call announcements, preliminary results) and legal/regulatory probes, while A emphasizes forward-looking analyst projections and quantitative metrics."
    ],
    "llama3.3-70b_zero-shot_v1": [
      "Headlines in Dataset B consistently include stock ticker symbols (e.g., '$NSPR', '$UAA') at the beginning, while Dataset A does not reference tickers.",
      "Dataset B headlines frequently cite specific analyst firms or institutions in upgrades/downgrades (e.g., 'SunTrust Robinson Humphrey', 'UBS'), whereas Dataset A generically references 'analysts' without naming entities.",
      "Dataset B includes granular trading-session references (e.g., 'premarket', 'after hours', 'halted for coming news') in most samples, while Dataset A uses broader temporal terms like 'Q2' or 'next month'.",
      "All Dataset B samples incorporate precise numerical metrics (e.g., 'price target raised to $37 vs. $35', 'revenue surges 4.2%') with comparative figures, whereas Dataset A uses generalized percentage changes ('up X%').",
      "Dataset B systematically references financial instruments like ETFs, futures, or options (e.g., 'sympathy play off BORR move', 'Call Diagonals'), while Dataset A focuses on broader market indices.",
      "Headlines in Dataset B frequently include earnings guidance revisions (e.g., 'withdraws FY20 guidance', 'sees 2020 EPS down 3%-4%'), whereas Dataset A focuses on completed earnings results.",
      "Dataset B samples contain explicit legal/regulatory terminology (e.g., 'patent case', 'insolvency probe', 'senior notes offering') absent from Dataset A's macroeconomic policy focus.",
      "All Dataset B headlines use fragmented sentence structures with trading-floor shorthand (e.g., 'taking some', 'my position remains intact'), while Dataset A maintains complete sentence constructions.",
      "Dataset B consistently references micro-level corporate actions (e.g., 'stock offering', 'debt floatation', 'merger rejection') rather than Dataset A's emphasis on sector-wide trends.",
      "Dataset B includes real-time trading platform formatting (e.g., bulleted index updates, hashtags like #investing) absent from Dataset A's newspaper-style headlines."
    ],
    "llama3.1-8b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines more frequently cite external sources or quotes (e.g., WSJ polls, @jasonzweigwsj, Federal Reserve statements) compared to Dataset A.",
      "Dataset B includes broader macroeconomic or geopolitical narratives (e.g., Lebanon/IMF, China trade impact) alongside company-specific news, while Dataset A focuses strictly on granular company/sector developments.",
      "Dataset B headlines reference non-financial cultural/political events (e.g., \"So Halsey needs a shower\") impacting markets, absent in Dataset A.",
      "Dataset B contains more technical trading language (e.g., \"chart looks perfect for a bounce,\" \"resistance\") compared to Dataset A\u2019s focus on fundamentals like earnings or analyst actions.",
      "Dataset B incorporates hashtags (#investing, #economy) and social/media-style formatting (e.g., emojis, ellipses), while Dataset A uses formal headline structures.",
      "Dataset B includes announcements for corporate actions like debt offerings (e.g., \"Chefs' Warehouse floats new debt\") less emphasized in Dataset A.",
      "Dataset B features market-wide sentiment phrases (e.g., \"Santa Claus rally,\" \"Cash is trash?\") absent in Dataset A\u2019s company-centric tone.",
      "Dataset B references national/regional economic metrics (e.g., \"Building permits rise 5%,\" Brazil rate cuts) more frequently than Dataset A.",
      "Dataset B headlines often mention ETFs, indices, or sector-wide movements (e.g., \"Stocks hit lows as oil plunges\"), whereas Dataset A prioritizes individual ticker performance.",
      "Dataset B includes explicit references to conference calls/webcasts (e.g., Henry Schein earnings call) as standalone news, unlike Dataset A."
    ],
    "qwen2.5-7b_few-shot_bg_test-time-info_v1": [
      "Dataset B headlines frequently include broader market indices (e.g., Dow, Nasdaq, S&P) and their percentage movements, while A focuses almost exclusively on individual stocks or sector-specific tickers.",
      "Dataset B incorporates direct quotes, social media handles (e.g., @jasonzweigwsj), and hashtags (e.g., #investing), whereas A uses formal analyst citations without informal references.",
      "Headlines in B often reference geopolitical events (e.g., Lebanon/IMF, Brazil rail strikes) as primary drivers, while A ties macroeconomic factors indirectly to company-specific performance.",
      "Dataset B includes retail investor-oriented advice (e.g., \"Why You Need To Hedge\") and speculative language (e.g., \"Risky but chart looks perfect for a bounce\"), absent in A\u2019s institutional tone.",
      "B features non-earnings corporate actions (e.g., CEO changes, mergers, patent lawsuits) as standalone events, while A predominantly links such events to analyst reactions/price targets.",
      "Dataset B headlines explicitly mention dates/times for upcoming earnings calls or events (e.g., \"Henry Schein to Webcast...February 20\"), whereas A focuses on retrospective earnings results.",
      "B includes headlines with general market sentiment summaries (e.g., \"Stocks hit lows of the day\"), while A avoids broad market narratives in favor of granular stock/analyst updates.",
      "Dataset B references non-financial entities (e.g., Federal Reserve officials, political figures) as news drivers, whereas A relies solely on named analyst firms/banks for authority.",
      "B uses colloquial phrases (e.g., \"Cash is trash? No, cash is king!\") and rhetorical questions, contrasting with A\u2019s standardized financial terminology without stylistic flair.",
      "Dataset B includes headlines about ETFs, futures contracts (e.g., natural gas futures), and macroeconomic reserves (e.g., oil reserves), while A emphasizes individual equities and sector-specific fundamentals."
    ],
    "llama3.3-70b_few-shot_bg_test-time-info_v1": [
      "Dataset B includes headlines with general market commentary or investment advice (e.g., 'Cash is trash? No, cash is king!') absent in Dataset A, which focuses on specific analyst actions.",
      "Dataset B contains geopolitical or macroeconomic event-driven headlines (e.g., 'Lebanon Turns to IMF') without direct ties to specific companies or analysts, unlike Dataset A's company-specific focus.",
      "Dataset B features fragmented or social media-style formatting (e.g., hashtags, truncated text) not present in Dataset A's structured, institution-centric headlines.",
      "Dataset B includes headlines about broader indices, ETFs, or commodities (e.g., '$XLU - XLU Weekly') more frequently than Dataset A, which emphasizes individual stocks.",
      "Dataset B references non-financial sources (e.g., journalists, general media) like @jasonzweigwsj, whereas Dataset A exclusively cites financial institutions (e.g., Barclays, Morgan Stanley).",
      "Dataset B headlines often lack explicit analyst actions (e.g., 'Is Zhou Hei Ya... Trading At A 32% Discount?'), while Dataset A consistently ties news to specific upgrades, downgrades, or price targets.",
      "Dataset B includes legal/regulatory outcomes (e.g., '$ALGN - Align loses patent case') without analyst attribution, unlike Dataset A's analyst-driven rationale for events.",
      "Dataset B incorporates conversational or opinion-based language (e.g., 'Risky but chart looks perfect...') absent in Dataset A's formal, metric-driven tone.",
      "Dataset B mentions corporate events (e.g., CEO changes, mergers) without contextualizing market reactions via stock metrics, whereas Dataset A links events to price movements or guidance.",
      "Dataset B includes educational or advisory content (e.g., 'Here's Why You Need To Hedge...') not seen in Dataset A, which prioritizes factual reporting of financial actions and results."
    ],
    "llama3.1-8b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines frequently reference broader macroeconomic or geopolitical developments (e.g., trade deals, IMF interventions) without directly linking them to specific company financial metrics, whereas Dataset A ties such events to granular company impacts (e.g., revenue misses, stock price targets).",
      "Dataset B includes more general market-wide performance updates (e.g., index movements like 'Dow up 3.90%') and ETF trends, while Dataset A emphasizes individual stock reactions to company-specific news.",
      "Dataset B headlines often lack explicit analyst names or firms when mentioning ratings changes (e.g., 'GE upgraded from Sell by longtime bear'), whereas Dataset A consistently specifies analyst entities (e.g., 'Morgan Stanley upgrades $AMZN').",
      "Dataset B contains more casual, conversational language and retail investor-oriented commentary (e.g., 'Risky but chart looks perfect for a bounce'), while Dataset A uses formal, institutional-grade terminology focused on quantitative metrics.",
      "Dataset B includes non-earnings corporate announcements (e.g., executive departures, mergers) without explicitly connecting them to financial outcomes, whereas Dataset A directly ties such events to stock movements or forecasts (e.g., 'rises 5.2% on service launch').",
      "Dataset B features more after-hours/premarket trading context (e.g., 'shares up 2% after hours'), while Dataset A predominantly reports intraday price movements without specifying trading sessions.",
      "Dataset B headlines reference international markets (e.g., Lebanon, Germany, China) and global economic trends more frequently than Dataset A, which focuses on U.S.-centric company updates.",
      "Dataset B includes more non-financial regulatory/legal developments (e.g., patent cases, political probes) as standalone news items, whereas Dataset A typically frames regulatory issues through their direct financial implications (e.g., 'faces potential regulatory hurdles...ends mortgage deal').",
      "Dataset B incorporates quotes from non-analyst sources (e.g., politicians, journalists) and references to non-institutional investors, while Dataset A relies almost exclusively on analyst opinions and institutional data.",
      "Dataset B contains more varied industry coverage including agriculture, consumer goods, and geopolitical commodities (e.g., coffee, grain prices), whereas Dataset A focuses predominantly on tech, energy, and financial sectors."
    ],
    "qwen2.5-7b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines frequently reference geopolitical/economic events (e.g., country-specific crises, trade deals) rather than company-specific analyst actions prevalent in Dataset A",
      "Dataset B contains market-wide movement updates (e.g., index performance, sector trends) while Dataset A focuses exclusively on individual security/ticker developments",
      "Dataset B includes rhetorical/inquisitive headlines (e.g., \"Is X a Risky Investment?\") absent in Dataset A's declarative analyst-action format",
      "Dataset B incorporates direct quotes from executives/analysts (e.g., \"Cash is trash? No, cash is king!\") unlike Dataset A's institutional voice",
      "Dataset B features event announcements (earnings webcasts, product launches) while Dataset A reports post-event analyst reactions",
      "Dataset B shows greater structural variance (some ticker-first, some narrative-first) vs. Dataset A's consistent [Ticker] - [Analyst Action] format",
      "Dataset B includes social media elements (hashtags, @mentions) absent from Dataset A's institutional tone",
      "Dataset B covers non-analyst corporate news (CEO changes, patent disputes, mergers) while Dataset A focuses strictly on financial metrics/ratings",
      "Dataset B uses sensationalized verbs (\"eviscerate\", \"plummets\") compared to Dataset A's technical action verbs (\"downgrades\", \"maintains\")",
      "Dataset B references macroeconomic indicators (employment data, GDP) whereas Dataset A emphasizes micro-level company financials (EPS, guidance)"
    ],
    "llama3.1-8b_few-shot_bg_train-time-info_v1": [
      "Dataset B headlines more frequently reference geopolitical, regulatory, or macroeconomic events (e.g., Lebanon/IMF, Bayer legal verdicts) without direct ties to corporate financial metrics, while A ties all events to company-specific financial impacts.",
      "B includes non-analyst opinions (e.g., journalists, general investor advice) and social media references (@jasonzweigwsj), whereas A exclusively cites institutional analysts or firms.",
      "B uses conversational quotes (e.g., \"Cash is trash? No, cash is king!\") and colloquial language, while A maintains formal, structured financial terminology.",
      "B headlines often lack explicit numerical financial metrics in favor of qualitative outcomes (e.g., \"surprise profit rise\"), whereas A always includes percentages, price targets, or fiscal quarters.",
      "B features technical trading terms (e.g., \"breakout,\" \"resistance\") and ETF/index performance updates, while A focuses on fundamental analysis (EPS, revenue).",
      "B includes non-corporate numerical data (e.g., \"Half of small businesses fall behind on rent\"), whereas A\u2019s numerical data strictly relates to company financials.",
      "B covers broader market summaries (e.g., \"Stocks hit lows of the day\") and sector-wide trends, while A emphasizes individual stock reactions to company-specific news.",
      "B headlines prioritize event announcements (e.g., CEO departures, mergers) without immediate financial context, whereas A ties all events to quantifiable market impacts.",
      "B references legal/regulatory outcomes (e.g., patent losses, lawsuits) as primary drivers, while A\u2019s legal mentions are secondary to financial metrics (e.g., FDA delays affecting stock prices).",
      "B includes previews/event calendars (e.g., \"What to watch in the markets: Thursday\"), while A focuses exclusively on post-event analyses with numerical results."
    ],
    "llama3.3-70b_zero-shot_bg_test-time-info_v1": [
      "Dataset B headlines frequently reference broader market indices (e.g., Dow, S&P 500) and global macroeconomic events (e.g., oil price crashes, geopolitical tensions), while Dataset A focuses on individual stock/company actions and analyst-specific adjustments.",
      "Dataset B includes headlines with direct quotes, opinions from journalists/columnists (e.g., @jasonzweigwsj), or rhetorical questions (e.g., 'Is Zhou Hei Ya... Trading At A 32% Discount?'), whereas Dataset A maintains a neutral, institutional tone focused on analyst actions and earnings metrics.",
      "Dataset B contains headlines with explicit technical analysis language (e.g., 'chart looks perfect for a bounce,' 'breakout and at resistance'), while Dataset A focuses solely on fundamental analysis (e.g., revenue guidance, EPS results).",
      "Dataset B features headlines about retail investor behavior (e.g., 'Half of small businesses fall behind on rent'), consumer trends, or societal impacts (e.g., Chinese consumer behavior), whereas Dataset A centers on institutional investor actions and corporate financial metrics.",
      "Dataset B includes headlines about debt instruments, dividends, and corporate financing (e.g., 'AT&T to raise quarterly dividend,' 'Chefs' Warehouse floats new debt'), while Dataset A emphasizes equity-specific metrics like price targets and stock upgrades/downgrades.",
      "Dataset B headlines frequently reference ETFs (e.g., $XLU, $USO) and sector-specific funds, while Dataset A focuses almost exclusively on individual company tickers.",
      "Dataset B contains more headlines about legal/regulatory outcomes with immediate market reactions (e.g., 'Align loses patent case,' 'FDA Approves First Drug'), whereas Dataset A emphasizes regulatory concerns as forward-looking risks (e.g., 'FDA warning letter over manufacturing issues').",
      "Dataset B uses colloquial phrases and hashtags (e.g., '#investing,' 'Cash is trash? No, cash is king!'), while Dataset A maintains formal financial terminology without slang or social media-style annotations.",
      "Dataset B headlines frequently quantify macroeconomic metrics (e.g., 'Building permits rise 5%,' 'Brazil coffee production falls 20%'), whereas Dataset A quantifies micro-level corporate metrics (e.g., 'EPS misses by $0.33').",
      "Dataset B includes headlines about mergers/acquisitions as finalized events (e.g., 'Charles Schwab to buy TD Ameritrade'), while Dataset A mentions M&A only as speculative catalysts (e.g., 'TD Ameritrade acquisition' as context for price targets)."
    ]
  }
}