{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b1ed7b33-4f7f-43fb-8250-61555c26ee6b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "7c2e9a9f-5514-450a-ba9c-4d715f6dc354",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "count_bins   \n",
      "Medium           3400\n",
      "Frequent         2640\n",
      "Rare             1641\n",
      "Very Frequent     752\n",
      "Very Rare          88\n",
      "Name: count, dtype: int64 continent\n",
      "Americas     3801\n",
      "Europe       3760\n",
      "Asia          586\n",
      "Oceania       247\n",
      "Africa        128\n",
      "Name: count, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "df_meta = pd.read_csv('name_meta_final.csv', usecols = lambda x : not x.startswith(\"Unnamed\"))\n",
    "df_final = df_meta[~df_meta['continent'].isna()].reset_index(drop=True)\n",
    "df_final['Name'] = df_final['Name'].str.replace('_', ' ')\n",
    "print(df_final[['count_bins']].value_counts(), df_final[['continent']].value_counts())\n",
    "\n",
    "drop_names = [\"George Brydges Rodney\", \"Kiyohiko Azuma\", \"EL (rapper)\", \"Gardner Fox\", \"Beatrice of Portugal\"]\n",
    "df_final = df_final[~df_final[\"Name\"].isin(drop_names)]\n",
    "df_final.to_csv('../data/factscore_final.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "05c95871-0386-4900-8b85-5b230133a4aa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Elizabeth I',\n",
       " 'Jacob ben Asher',\n",
       " 'Harry Anderson',\n",
       " 'Nicolas Poussin',\n",
       " 'Lyndon B. Johnson',\n",
       " 'Memphis Bleek',\n",
       " 'James VI and I',\n",
       " 'Duke Special',\n",
       " 'Mary II of England',\n",
       " 'Johnny Kilbane',\n",
       " \"Grace O'Malley\",\n",
       " 'Kevin Barry',\n",
       " 'Heinrich Böll',\n",
       " 'Kevin Barry (author)',\n",
       " 'Allen Ginsberg',\n",
       " 'Irwin Allen',\n",
       " 'Peter Orlovsky',\n",
       " 'William S. Burroughs',\n",
       " 'Anne Waldman',\n",
       " 'Jack Kerouac',\n",
       " 'Helen Vendler',\n",
       " 'Walt Whitman',\n",
       " 'Banjo Paterson',\n",
       " 'Lionel Barrymore',\n",
       " 'Naomi Campbell',\n",
       " 'Emily Dickinson',\n",
       " 'Henry Wadsworth Longfellow',\n",
       " 'Steve Allen',\n",
       " 'Neal Cassady',\n",
       " 'John Clellon Holmes',\n",
       " 'Gregory Corso',\n",
       " 'Hal Greer',\n",
       " 'Joyce Johnson (author)',\n",
       " 'William Blake',\n",
       " 'William Carlos Williams',\n",
       " 'John F. Kelly',\n",
       " 'Philip Whalen',\n",
       " 'Gary Snyder',\n",
       " 'Tom McGrath (animator)',\n",
       " 'Jeff Nuttall',\n",
       " 'Peter Whitehead (racing driver)',\n",
       " 'George MacBeth',\n",
       " 'Anselm Hollo',\n",
       " 'Lawrence Ferlinghetti',\n",
       " 'Adrian Mitchell',\n",
       " 'Barbara Rubin',\n",
       " 'Sophie B. Hawkins',\n",
       " \"Frank O'Hara\",\n",
       " 'Amiri Baraka',\n",
       " 'Denise Levertov',\n",
       " 'David Amram',\n",
       " 'Zohib Islam Amiri',\n",
       " 'Robert Creeley',\n",
       " 'Diane di Prima',\n",
       " 'Timothy Leary',\n",
       " 'Langston Hughes',\n",
       " 'Bob Dylan',\n",
       " 'Ornette Coleman',\n",
       " 'Ken Kesey',\n",
       " 'Alan Watts',\n",
       " 'Janis Joplin',\n",
       " 'Richard Brookhiser',\n",
       " 'William F. Buckley Jr.',\n",
       " 'Mamata Banerjee',\n",
       " 'Akbar',\n",
       " 'Jalal-ud-din Khalji',\n",
       " 'Adrienne Rich',\n",
       " 'Herbert Huncke',\n",
       " 'Harry Everett Smith',\n",
       " 'Bulat Okudzhava',\n",
       " 'Andrei Voznesensky',\n",
       " 'Johnny Depp',\n",
       " 'Roy Lichtenstein',\n",
       " 'Patti Smith',\n",
       " 'William Sloane Coffin',\n",
       " 'Dwight Macdonald',\n",
       " 'Noam Chomsky',\n",
       " 'Norman Mailer',\n",
       " 'Robert Lowell',\n",
       " 'Fidel Castro',\n",
       " 'Jonah Raskin',\n",
       " 'Che Guevara',\n",
       " 'Lenny Bruce',\n",
       " 'Lemar',\n",
       " 'Richard Helms',\n",
       " 'Antonin Artaud',\n",
       " 'Lynd Ward',\n",
       " 'Dylan Thomas',\n",
       " 'Francis Drake',\n",
       " 'George III',\n",
       " 'Charlie Parker',\n",
       " 'Thomas Merton',\n",
       " 'John Keats',\n",
       " 'Federico García Lorca',\n",
       " 'Christopher Smart',\n",
       " 'Philip Lamantia',\n",
       " 'Jean Genet',\n",
       " 'André Breton',\n",
       " 'Paul Cézanne',\n",
       " 'Edgar Allan Poe',\n",
       " 'Herman Melville',\n",
       " 'Philip Glass',\n",
       " 'Phil Ochs',\n",
       " 'Paul McCartney',\n",
       " 'Arthur Rimbaud',\n",
       " 'Garry Kasparov',\n",
       " 'Anatoly Karpov',\n",
       " 'Leonid Stein',\n",
       " 'Lev Polugaevsky',\n",
       " 'Bobby Fischer',\n",
       " 'Bent Larsen',\n",
       " 'Larry Christiansen',\n",
       " 'Jan Timman',\n",
       " 'Mikhail Tal',\n",
       " 'Florencio Campomanes',\n",
       " 'Ulf Andersson',\n",
       " 'Joël Lautier',\n",
       " 'Judit Polgár',\n",
       " 'Gata Kamsky',\n",
       " 'Alexei Shirov',\n",
       " 'Nigel Lythgoe',\n",
       " 'Viswanathan Anand',\n",
       " 'Boris Gelfand',\n",
       " 'Vladimir Kramnik',\n",
       " 'Veselin Topalov',\n",
       " 'Magnus Carlsen',\n",
       " 'Vasyl Ivanchuk',\n",
       " 'Vladimir Putin',\n",
       " 'Kirsan Ilyumzhinov',\n",
       " 'Nigel Clough',\n",
       " 'José Ferrer',\n",
       " 'Bill France Sr.',\n",
       " 'Juan Manuel Fangio',\n",
       " 'Juan Pablo Montoya',\n",
       " 'Jonathan Palmer',\n",
       " 'Wally Parks',\n",
       " 'Klay Thompson',\n",
       " 'Robby Gordon',\n",
       " 'Wayne Rainey',\n",
       " 'Fernando Alonso',\n",
       " 'Michael Schumacher',\n",
       " 'Pierre Levegh',\n",
       " 'Murray Rothbard',\n",
       " 'Benjamin Tucker',\n",
       " 'Lysander Spooner',\n",
       " 'Benny Morris',\n",
       " 'Hans-Hermann Hoppe',\n",
       " 'Carl Menger',\n",
       " 'Eugen von Böhm-Bawerk',\n",
       " 'Linda McCartney',\n",
       " 'Gustave de Molinari',\n",
       " 'Frederic Tudor',\n",
       " 'Samuel Edward Konkin III',\n",
       " 'Frédéric Bastiat',\n",
       " 'John Locke',\n",
       " 'Henry David Thoreau',\n",
       " 'Ralph Raico',\n",
       " 'H. L. Mencken',\n",
       " 'Isabel Paterson',\n",
       " 'Frank Chodorov',\n",
       " 'Garet Garrett',\n",
       " 'Auberon Waugh',\n",
       " 'Albert Jay Nock',\n",
       " 'Herbert Spencer',\n",
       " 'Walter Benjamin',\n",
       " 'Bryan Caplan',\n",
       " 'Carl Linnaeus',\n",
       " 'Adam Smith',\n",
       " 'Lewis F. Powell Jr.',\n",
       " 'Albert Meltzer',\n",
       " 'Robert Nozick',\n",
       " 'Paul Birch (actor)',\n",
       " 'David Graeber',\n",
       " 'Vernor Vinge',\n",
       " 'Neal Stephenson',\n",
       " 'Matt Stone',\n",
       " 'Max Barry',\n",
       " 'Robert Falcon Scott',\n",
       " 'Robert E. Lee',\n",
       " 'Aristophanes',\n",
       " 'Plato',\n",
       " 'Dionysius of Halicarnassus',\n",
       " 'Philetaerus',\n",
       " 'Alcibiades',\n",
       " 'Hesiod',\n",
       " 'Jean Racine',\n",
       " 'Johann Wolfgang von Goethe',\n",
       " 'Albert Schweitzer',\n",
       " 'Louis Schweitzer (businessman)',\n",
       " 'Adela of Champagne',\n",
       " 'Charles-Marie Widor',\n",
       " 'William Church Osborn',\n",
       " 'Aristide Cavaillé-Coll',\n",
       " 'Ernst Cassirer',\n",
       " 'Max Reger',\n",
       " 'Hans Münch',\n",
       " 'Georg Wilhelm Friedrich Hegel',\n",
       " 'Jaroslav Pelikan',\n",
       " 'Martin Luther',\n",
       " 'Noel Gallagher',\n",
       " 'Dizzy Gillespie',\n",
       " 'John Gunther',\n",
       " 'Albert Ruskin Cook',\n",
       " 'David Cameron',\n",
       " 'James Cameron',\n",
       " 'Charles Darwin',\n",
       " 'Donald Winnicott',\n",
       " 'Pierre Fresnay',\n",
       " 'Jeanne Moreau',\n",
       " \"Hugh O'Brian\",\n",
       " 'Albert Einstein',\n",
       " 'Henry Fonda',\n",
       " 'Anne-Marie',\n",
       " 'Charles VII of France',\n",
       " 'Elizabeth II',\n",
       " 'Jean-Paul Sartre',\n",
       " 'Oskar Morgenstern',\n",
       " 'Leland B. Yeager',\n",
       " 'Walter Block',\n",
       " 'John Maynard Keynes',\n",
       " 'Edmund Burke',\n",
       " 'Jesús Huerta de Soto',\n",
       " 'Alan Greenspan',\n",
       " 'James Buchanan',\n",
       " 'Paul Krugman',\n",
       " 'Tyler Cowen',\n",
       " 'Jeffrey Sachs',\n",
       " 'Mark Blaug',\n",
       " 'Gordon Tullock',\n",
       " 'Paul Samuelson',\n",
       " 'John Milton',\n",
       " 'Valdemar II of Denmark',\n",
       " 'Harald Hardrada',\n",
       " 'Thomas Helmig',\n",
       " 'Hack Kampmann',\n",
       " 'Christian X of Denmark',\n",
       " 'Kay Fisker',\n",
       " 'Arne Jacobsen',\n",
       " 'Lenny White',\n",
       " 'Walther Flemming',\n",
       " 'Olafur Eliasson',\n",
       " 'Anne Linnet',\n",
       " 'Flemming Jørgensen',\n",
       " 'Steffen Brandt',\n",
       " 'Nils Malmros',\n",
       " 'Bill Nye',\n",
       " 'Syd Barrett',\n",
       " 'Francis Bacon',\n",
       " 'Isaac Newton',\n",
       " 'Gregor Strasser',\n",
       " 'Leonardo da Vinci',\n",
       " 'William Shakespeare',\n",
       " 'Pierre de Fermat',\n",
       " 'Grote Reber',\n",
       " 'Jacques Doriot',\n",
       " 'Alexis Carrel',\n",
       " 'Charles Lindbergh',\n",
       " 'Henry Drysdale Dakin',\n",
       " 'Leonard Woolf',\n",
       " 'Charles I of England',\n",
       " 'John Sutter',\n",
       " 'Vincent van Gogh',\n",
       " 'Robert Debré',\n",
       " 'Charles II of England',\n",
       " 'Pope Benedict XVI',\n",
       " 'Patricia Arquette',\n",
       " 'Anatole France',\n",
       " 'Marcel Proust',\n",
       " 'Louis XVIII',\n",
       " 'Émile Zola',\n",
       " 'Pierre Drieu La Rochelle',\n",
       " 'André Gide',\n",
       " 'Madeleine of Valois',\n",
       " 'Oscar Wilde',\n",
       " 'Marc Allégret',\n",
       " 'Théo van Rysselberghe',\n",
       " \"Catherine de' Medici\",\n",
       " 'Richard Brinsley Sheridan',\n",
       " 'Lester Bird',\n",
       " 'Mary I of England',\n",
       " 'Smokey Robinson',\n",
       " 'Emil Fackenheim',\n",
       " 'Avraham Harman',\n",
       " 'James Carroll (author)',\n",
       " 'Lewis Carroll',\n",
       " 'Bernard Lewis',\n",
       " 'Joseph Goebbels',\n",
       " 'Eric Kandel',\n",
       " 'William Nichols (architect)',\n",
       " 'Henry Ford',\n",
       " 'Abraham Foxman',\n",
       " 'Arthur de Gobineau',\n",
       " 'Israel Gutman',\n",
       " 'Ptolemy I Soter',\n",
       " 'Edward Gibbon',\n",
       " 'Maria Theresa',\n",
       " 'Joseph II',\n",
       " 'Stephenie Meyer',\n",
       " 'Maurice Barrès',\n",
       " 'Charles Maurras',\n",
       " 'Louis Veuillot',\n",
       " 'Alfred Rosenberg',\n",
       " 'Moses Montefiore',\n",
       " 'Jacob Grimm',\n",
       " 'Wilhelm Grimm',\n",
       " 'Ferdinand Walsin Esterhazy',\n",
       " 'Leo Frank',\n",
       " 'Charles Coughlin',\n",
       " 'Franklin D. Roosevelt',\n",
       " 'Joseph Stalin',\n",
       " 'Robert L. Bernstein',\n",
       " 'Vidal Sassoon',\n",
       " 'Shimon Peres',\n",
       " 'John Mayer',\n",
       " 'Ajmal Kasab',\n",
       " 'Recep Tayyip Erdoğan',\n",
       " 'Jonathan Sacks',\n",
       " 'Viatcheslav Moshe Kantor',\n",
       " 'Nicolas Sarkozy',\n",
       " 'Manuel Valls',\n",
       " 'Wolfgang Schäuble',\n",
       " 'Kristin Halvorsen',\n",
       " 'Johan Halvorsen',\n",
       " 'Antoni Macierewicz',\n",
       " 'Ronald Lauder',\n",
       " 'Mateusz Morawiecki',\n",
       " 'Yair Lapid',\n",
       " 'Göran Persson',\n",
       " 'Oleh Tyahnybok',\n",
       " 'Dmytro Yarosh',\n",
       " 'Ken Livingstone',\n",
       " 'Jeremy Corbyn',\n",
       " 'Tim Farron',\n",
       " 'Angus Robertson',\n",
       " 'Deborah Lipstadt',\n",
       " 'Jonathan Greenblatt',\n",
       " 'David Schoen',\n",
       " 'Donald Trump',\n",
       " 'William O. Douglas',\n",
       " 'Cesar Chavez',\n",
       " 'Henry Kissinger',\n",
       " 'James Baker',\n",
       " 'Nils Muižnieks',\n",
       " 'Zakir Hasanov',\n",
       " 'Ayaz Mutallibov',\n",
       " 'Jane Fonda',\n",
       " 'Roger Vadim',\n",
       " 'Ilham Aliyev',\n",
       " 'Vazgen Sargsyan',\n",
       " 'Robert Kocharyan',\n",
       " 'Aram Sargsyan',\n",
       " 'Karen Demirchyan',\n",
       " 'Andranik Margaryan',\n",
       " 'Serzh Sargsyan',\n",
       " 'Hrant Bagratyan',\n",
       " 'Ngozi Okonjo-Iweala',\n",
       " 'Ilya Klebanov',\n",
       " 'Mikhail Kasyanov',\n",
       " 'Nikol Pashinyan',\n",
       " 'Nikolay Bordyuzha',\n",
       " 'Seyran Ohanyan',\n",
       " 'Mikhail Fradkov',\n",
       " 'Anatoly Serdyukov',\n",
       " 'George W. Bush',\n",
       " 'Barack Obama',\n",
       " 'Colin Powell',\n",
       " 'Mike Gordon',\n",
       " 'Aisha Uqbah Malik',\n",
       " 'Peter Adamson (actor)',\n",
       " 'Ibn Taymiyyah',\n",
       " 'William of Ockham',\n",
       " 'Carl Benjamin Boyer',\n",
       " 'Dante Alighieri',\n",
       " 'George Sarton',\n",
       " 'Al-Zahrawi',\n",
       " 'Saladin',\n",
       " 'Galen',\n",
       " \"Louis L'Amour\",\n",
       " 'Ben Kingsley',\n",
       " 'Roger Bacon',\n",
       " 'William Bligh',\n",
       " 'A. N. Hornby',\n",
       " 'Harry Boyle (cricketer)',\n",
       " 'Ted Peate',\n",
       " 'Fred Spofforth',\n",
       " 'Charles Studd',\n",
       " 'The Game (rapper)',\n",
       " 'Robert Boyle',\n",
       " 'George Giffen',\n",
       " 'George Chapman',\n",
       " 'Delon Wright',\n",
       " 'Gary Barlow',\n",
       " 'Bobby Peel',\n",
       " 'Johnny Briggs (actor)',\n",
       " 'Syd Gregory',\n",
       " 'John Peel',\n",
       " 'Tom Richardson (cricketer)',\n",
       " 'Victor Trumper',\n",
       " 'Wilfred Rhodes',\n",
       " 'Monty Noble',\n",
       " 'George Hirst',\n",
       " 'Wilfrid Scawen Blunt',\n",
       " 'Jack Hobbs',\n",
       " 'Sydney Barnes',\n",
       " 'Sammy Carter',\n",
       " 'Vernon Ransford',\n",
       " 'Jack Gregory (cricketer)',\n",
       " 'Ted McDonald',\n",
       " 'Tibby Cotter',\n",
       " 'Herbert Sutcliffe',\n",
       " 'Warren Bardsley',\n",
       " 'Wally Hammond',\n",
       " 'Jack Ryder (cricketer)',\n",
       " 'Don Bradman',\n",
       " 'Herbie Collins',\n",
       " 'Charlie Macartney',\n",
       " 'Bill Voce',\n",
       " 'Clarrie Grimmett',\n",
       " 'Harold Larwood',\n",
       " 'Bill Ponsford',\n",
       " 'Stan McCabe',\n",
       " 'Archie Jackson',\n",
       " 'Jack Fingleton',\n",
       " 'Len Hutton',\n",
       " 'Keith Miller',\n",
       " 'Ray Lindwall',\n",
       " 'Eric Hollies',\n",
       " 'Tom Graveney',\n",
       " 'Lindsay Hassett',\n",
       " 'Alec Bedser',\n",
       " 'Denis Compton',\n",
       " 'Godfrey Evans',\n",
       " 'Fred Trueman',\n",
       " 'Trevor Bailey',\n",
       " 'Frank Tyson',\n",
       " 'Brian Statham',\n",
       " 'Peter May (cricketer)',\n",
       " 'Ian Meckiff',\n",
       " 'Richie Benaud',\n",
       " 'Bill Lawry',\n",
       " 'Geoffrey Boycott',\n",
       " 'Ray Illingworth',\n",
       " 'John Edrich',\n",
       " 'John Snow',\n",
       " 'Jeff Thomson',\n",
       " 'June Havoc',\n",
       " 'Kerry Packer',\n",
       " 'Mike Brearley',\n",
       " 'Ian Botham',\n",
       " 'Allan Border',\n",
       " 'Bob Willis',\n",
       " 'Tim Robinson (comedian)',\n",
       " 'Graham Gooch',\n",
       " 'David Gower',\n",
       " 'Gladstone Small',\n",
       " 'Ian Healy',\n",
       " 'David Boon',\n",
       " 'Merv Hughes',\n",
       " 'Steve Waugh',\n",
       " 'Graham Dilley',\n",
       " 'Mark Taylor (cricketer)',\n",
       " 'Neil Foster',\n",
       " 'Jason Gillespie',\n",
       " 'Glenn McGrath',\n",
       " 'Adam Gilchrist',\n",
       " 'Matthew Hayden',\n",
       " 'Damien Martyn',\n",
       " 'Shane Warne',\n",
       " 'Monty Panesar',\n",
       " 'Andrew Flintoff',\n",
       " 'Michael Clarke (cricketer)',\n",
       " 'Ricky Ponting',\n",
       " 'Ashton Agar',\n",
       " 'Phillip Hughes',\n",
       " 'Kevin Pietersen',\n",
       " 'Stuart Broad',\n",
       " 'Alastair Cook',\n",
       " 'Ian Bell',\n",
       " 'Mitchell Johnson',\n",
       " 'Ben Stokes',\n",
       " 'Ryan Harris (cricketer)',\n",
       " 'Graeme Swann',\n",
       " 'James Vince',\n",
       " 'Nathan Lyon',\n",
       " 'Anderson Cummins',\n",
       " 'Josh Hazlewood',\n",
       " 'Joe Root',\n",
       " 'Terence Rattigan',\n",
       " 'Jack L. Warner',\n",
       " 'Douglas Adams',\n",
       " 'Gary Sweet',\n",
       " 'Eudoxus of Cnidus',\n",
       " 'Florian Cajori',\n",
       " 'Abner Doubleday',\n",
       " 'Edward Doubleday',\n",
       " 'Hester Thrale',\n",
       " 'Henry Kent Hewitt',\n",
       " 'Nathaniel P. Banks',\n",
       " 'Robert Anderson (Civil War)',\n",
       " 'John Gibbon',\n",
       " 'James Longstreet',\n",
       " 'John Howard',\n",
       " 'John Newton',\n",
       " 'George Meade',\n",
       " 'Abraham Lincoln',\n",
       " 'Thomas Edison',\n",
       " 'John Turturro',\n",
       " 'Lee de Forest',\n",
       " 'Valdemar Poulsen',\n",
       " 'Edwin Howard Armstrong',\n",
       " 'Johnny Carson',\n",
       " 'Georges-Louis Leclerc',\n",
       " 'Augustin-Jean Fresnel',\n",
       " 'James Clerk Maxwell',\n",
       " 'Humphrey Llwyd',\n",
       " 'Prosper Mérimée',\n",
       " 'Adrien-Marie Legendre',\n",
       " 'James Bradley',\n",
       " 'Robert Hooke',\n",
       " 'Étienne-Louis Malus',\n",
       " 'Pierre Louis Maupertuis',\n",
       " 'François Arago',\n",
       " 'Jean-Baptiste Biot',\n",
       " 'Louis Poinsot',\n",
       " 'Louis-Mathieu Molé',\n",
       " 'Gaspard de Prony',\n",
       " 'John Worrall (philosopher)',\n",
       " 'Augustin-Louis Cauchy',\n",
       " 'William Hyde Wollaston',\n",
       " 'Ivor Grattan-Guinness',\n",
       " 'John Herschel',\n",
       " 'George Biddell Airy',\n",
       " 'Joseph von Fraunhofer',\n",
       " 'William Whewell',\n",
       " 'Franz Ernst Neumann',\n",
       " 'Hippolyte Fizeau',\n",
       " 'Michel Foucault',\n",
       " 'Gianluigi Buffon',\n",
       " 'Jacques Charles',\n",
       " 'Marquis de Condorcet',\n",
       " 'John Cena',\n",
       " 'Pierre Louis Dulong',\n",
       " 'Sir George Stokes',\n",
       " 'William Rowan Hamilton',\n",
       " 'Pope Alexander II',\n",
       " 'Pope Adrian IV',\n",
       " 'Richard A. Whiting',\n",
       " 'Henry VIII',\n",
       " 'Gerald of Wales',\n",
       " 'Taylor Jardine',\n",
       " 'Pope Leo X',\n",
       " 'Francis I of France',\n",
       " 'RZA',\n",
       " 'Jorge Luis Borges',\n",
       " 'Joseph Campbell',\n",
       " 'Carl Jung',\n",
       " 'George Bernard Shaw',\n",
       " 'Thomas Mann',\n",
       " 'Samuel Beckett',\n",
       " 'Adele Schopenhauer',\n",
       " 'Johanna van Gogh-Bonger',\n",
       " 'Adele',\n",
       " 'Karl August von Hardenberg',\n",
       " 'Gottlob Ernst Schulze',\n",
       " 'William Backhouse Astor Sr.',\n",
       " 'Johann Gottlieb Fichte',\n",
       " 'Martin Heinrich',\n",
       " 'Johann Elert Bode',\n",
       " 'Sigismund III Vasa',\n",
       " 'Albert Marquet',\n",
       " 'David Hume',\n",
       " 'Pedro Calderón de la Barca',\n",
       " 'Prince Felix of Bourbon-Parma',\n",
       " 'George Berkeley',\n",
       " 'Daniel Albright',\n",
       " 'Samuel R. Delany',\n",
       " 'Thomas Hobbes',\n",
       " 'Tina Thompson',\n",
       " 'Samuel von Pufendorf',\n",
       " 'Pericles',\n",
       " 'Abraham Hyacinthe Anquetil-Duperron',\n",
       " 'Joscelyn Godwin',\n",
       " 'Keiji Nishitani',\n",
       " 'William Godwin',\n",
       " 'Aleister Crowley',\n",
       " 'Marsilio Ficino',\n",
       " 'José Calderón (basketball)',\n",
       " 'George Lucas',\n",
       " 'Nicolaus Copernicus',\n",
       " 'Hans Vaihinger',\n",
       " 'Ettore Majorana',\n",
       " 'Michael Faraday',\n",
       " 'Sergei Prokofiev',\n",
       " 'George Santayana',\n",
       " 'Joris-Karl Huysmans',\n",
       " 'Roberto Baggio',\n",
       " 'Tiago Mendes',\n",
       " 'Tony Blair',\n",
       " 'Sarah Maldoror',\n",
       " 'Byron White',\n",
       " 'Geraldo Rivera',\n",
       " 'Jefferson Davis',\n",
       " 'Albert Sidney Johnston',\n",
       " 'Joseph E. Johnston',\n",
       " 'Joseph Smith',\n",
       " 'Henrietta of England',\n",
       " 'William G. Preston',\n",
       " 'Zachary Taylor',\n",
       " 'Franklin Pierce',\n",
       " 'Persifor Frazer Smith',\n",
       " 'Blake Griffin',\n",
       " 'Leonidas I',\n",
       " 'Ulysses S. Grant',\n",
       " 'James K. Polk',\n",
       " 'Simon Bolivar Buckner',\n",
       " 'Sterling Price',\n",
       " 'William Tecumseh Sherman',\n",
       " 'Don Carlos Buell',\n",
       " 'John J. Crittenden',\n",
       " 'John B',\n",
       " 'P. G. T. Beauregard',\n",
       " 'Lloyd Tilghman',\n",
       " 'Potter Stewart',\n",
       " 'Scott Dixon',\n",
       " 'Gideon Johnson Pillow',\n",
       " 'Nathan Bedford Forrest',\n",
       " 'Braxton Bragg',\n",
       " 'Henry Halleck',\n",
       " 'Bob Armstrong',\n",
       " 'William Inge',\n",
       " \"Auguste Villiers de l'Isle-Adam\",\n",
       " 'Ephraim Chambers',\n",
       " 'Emmy Rossum',\n",
       " 'Jack Williamson',\n",
       " 'Edmond de Goncourt',\n",
       " 'Carl Sigismund Kunth',\n",
       " 'Woodrow Wilson',\n",
       " 'Shoshichi Kobayashi',\n",
       " 'Walt Disney',\n",
       " 'Rachel Notley',\n",
       " 'John B. T. Campbell III',\n",
       " 'Queen Victoria',\n",
       " 'Lois Mitchell',\n",
       " 'Jim Prentice',\n",
       " 'Alison Redford',\n",
       " 'Dave Hancock',\n",
       " 'Ralph Klein',\n",
       " 'Pierre Trudeau',\n",
       " 'Hans Albert Einstein',\n",
       " 'Elsa Einstein',\n",
       " 'Henri Poincaré',\n",
       " 'Walther Nernst',\n",
       " 'Arthur Eddington',\n",
       " 'John Francis Hylan',\n",
       " 'Richard Haldane',\n",
       " 'Alexis de Tocqueville',\n",
       " 'Herbert Samuel',\n",
       " 'Santiago Ramón y Cajal',\n",
       " 'Eric Drummond',\n",
       " 'Hendrik Lorentz',\n",
       " 'Giuseppe Motta',\n",
       " 'Jimmy Walker',\n",
       " 'Harry Emerson Fosdick',\n",
       " 'Robert Andrews Millikan',\n",
       " 'Charlie Chaplin',\n",
       " 'Upton Sinclair',\n",
       " 'Walter Isaacson',\n",
       " 'Gerald Holton',\n",
       " 'Max Born',\n",
       " 'Frederick Lindemann',\n",
       " 'David Lloyd George',\n",
       " 'Winston Churchill',\n",
       " 'Bernhard Neumann',\n",
       " 'Kurt Gödel',\n",
       " 'Linus Pauling',\n",
       " 'Thurgood Marshall',\n",
       " 'Abba Eban',\n",
       " 'Leon Botstein',\n",
       " 'Alfred Einstein',\n",
       " 'Rudolph Nissen',\n",
       " 'J. Robert Oppenheimer',\n",
       " 'Hermann Minkowski',\n",
       " 'Edwin Hubble',\n",
       " 'Fred Hoyle',\n",
       " 'Hermann Bondi',\n",
       " 'Lev Landau',\n",
       " 'Nathan Rosen',\n",
       " 'Roy Kerr',\n",
       " 'E. T. A. Hoffmann',\n",
       " 'Niels Bohr',\n",
       " 'Eric Allin Cornell',\n",
       " 'Carl Wieman',\n",
       " 'Otto Stern',\n",
       " 'Louis de Broglie',\n",
       " 'Peter Bergmann',\n",
       " 'Margot Asquith',\n",
       " 'Margot Frank',\n",
       " 'Barbara Bush',\n",
       " 'Jonathan Richman',\n",
       " 'Alfred Nobel',\n",
       " 'Frederic Leighton',\n",
       " 'Ashoka',\n",
       " 'Ahmad Shah Durrani',\n",
       " 'Ranjit Singh',\n",
       " 'Abdul Ghaffar Khan',\n",
       " 'Hari Singh Nalwa',\n",
       " 'Abdur Rahman Khan',\n",
       " 'Mortimer Durand',\n",
       " 'Abdul Khaliq (athlete)',\n",
       " 'Mahmud Tarzi',\n",
       " 'Zulfikar Ali Bhutto',\n",
       " 'Babrak Karmal',\n",
       " 'Ahmed Rashid',\n",
       " 'Osama bin Laden',\n",
       " 'Abdul Rashid Ghazi',\n",
       " 'Aino Sibelius',\n",
       " 'Zalmai Rassoul',\n",
       " 'Indira Gandhi',\n",
       " 'Salman Khan',\n",
       " 'Zareen Khan',\n",
       " 'Shah Rukh Khan',\n",
       " 'Celina Jaitly',\n",
       " 'Aamir Khan',\n",
       " 'Saif Ali Khan',\n",
       " 'Naseeruddin Shah',\n",
       " 'Rahman Baba',\n",
       " 'Khushal Khattak',\n",
       " 'Köprülüzade Fazıl Ahmed Pasha',\n",
       " 'Muhammad Ali of Egypt',\n",
       " 'Ismail Qemali',\n",
       " 'Turhan Pasha Përmeti',\n",
       " 'Prenk Bib Doda',\n",
       " 'Essad Pasha Toptani',\n",
       " 'Sali Berisha',\n",
       " 'Aleksandër Meksi',\n",
       " 'Edi Rama',\n",
       " 'Pope Francis',\n",
       " 'Naim Frashëri',\n",
       " 'Bleona',\n",
       " 'Ermonela Jaho',\n",
       " 'Inva Mula',\n",
       " 'Saimir Pirgu',\n",
       " 'Elvana Gjata',\n",
       " 'Bebe Rexha',\n",
       " 'Dua Lipa',\n",
       " 'Anri Sala',\n",
       " 'Senhit (singer)',\n",
       " 'Marin Barleti',\n",
       " 'Dhimitër Anagnosti',\n",
       " 'Nik Xhelilaj',\n",
       " 'Eliza Dushku',\n",
       " 'Masiela Lusha',\n",
       " 'John Belushi',\n",
       " 'Julius Wellhausen',\n",
       " 'Thomas Carlyle',\n",
       " 'Strabo',\n",
       " 'Heydar Aliyev',\n",
       " 'Surat Huseynov',\n",
       " 'Nikki Bella',\n",
       " 'Muslim Magomayev (musician)',\n",
       " 'Shovkat Alakbarova',\n",
       " 'Bahadur Shah Zafar',\n",
       " 'Sattar Bahlulzade',\n",
       " 'Tahir Salahov',\n",
       " 'Thor Heyerdahl',\n",
       " 'Mariya Stadnik',\n",
       " 'Sharif Sharifov',\n",
       " 'Toghrul Asgarov',\n",
       " 'Zabit Samedov',\n",
       " 'Magomedrasul Majidov',\n",
       " 'Karl Guthe Jansky',\n",
       " 'Akira Tozawa',\n",
       " 'Morihei Ueshiba',\n",
       " 'Masahiro Nakai',\n",
       " 'Takeda Sōkaku',\n",
       " 'Hajime Tanabe',\n",
       " 'Minoru Mochizuki',\n",
       " 'Masamichi Noro',\n",
       " 'Koichi Tohei',\n",
       " 'Gozo Shioda',\n",
       " 'Kisshomaru Ueshiba',\n",
       " 'Hitohiro Saito',\n",
       " 'Morihiro Saito',\n",
       " 'Roger Fry',\n",
       " 'R. G. Collingwood',\n",
       " 'Benedetto Croce',\n",
       " 'Theodor W. Adorno',\n",
       " 'Pablo Picasso',\n",
       " 'Henri Matisse',\n",
       " 'Jean-Auguste-Dominique Ingres',\n",
       " 'John Singer Sargent',\n",
       " 'Damien Hirst',\n",
       " 'Tracey Emin',\n",
       " 'Joseph Beuys',\n",
       " 'Swoon (artist)',\n",
       " 'Théodore Géricault',\n",
       " 'Michelangelo',\n",
       " 'Andres Serrano',\n",
       " 'Leon Golub',\n",
       " 'J. M. W. Turner',\n",
       " 'John Ruskin',\n",
       " 'Andy Warhol',\n",
       " 'Ferdinand de Saussure',\n",
       " 'Julia Kristeva',\n",
       " 'Ferdinand VII of Spain',\n",
       " 'Luce Irigaray',\n",
       " 'Judith Butler',\n",
       " 'Nelson Goodman',\n",
       " 'Hayden White',\n",
       " 'Roger Wolcott Sperry',\n",
       " 'Arthur Danto',\n",
       " 'Francisco Goya',\n",
       " 'Madame Restell',\n",
       " 'Dwyane Wade',\n",
       " 'Eric Rudolph',\n",
       " 'Barnett Slepian',\n",
       " 'Claude Shannon',\n",
       " 'George Tiller',\n",
       " 'David L. Gunn',\n",
       " 'Robert Sanderson (theologian)',\n",
       " 'Paul Jennings Hill',\n",
       " 'Charles Cornwallis',\n",
       " 'Thomas Gage',\n",
       " 'Thomas Paine',\n",
       " 'Bill Clinton',\n",
       " 'Vicente Aranda',\n",
       " 'Alfred Thayer Mahan',\n",
       " 'François Claude Amour',\n",
       " 'George Brydges Rodney',\n",
       " 'Bernardo de Gálvez',\n",
       " 'John Polson',\n",
       " 'Hyder Ali',\n",
       " 'Tipu Sultan',\n",
       " 'John André',\n",
       " 'Henry Hamilton (colonial administrator)',\n",
       " 'Banastre Tarleton',\n",
       " 'François Joseph Paul de Grasse',\n",
       " 'Richard Howe',\n",
       " 'Matías de Gálvez y Gallardo',\n",
       " 'Pierre André de Suffren',\n",
       " 'Billy Hughes',\n",
       " 'John Braithwaite (engineer)',\n",
       " 'Eyre Coote (East India Company officer)',\n",
       " 'Charles Evans Hughes',\n",
       " 'Joseph Ellis',\n",
       " 'James II of England',\n",
       " 'Oliver Cromwell',\n",
       " 'Clement Attlee',\n",
       " 'Monta Ellis',\n",
       " 'John E. Ferling',\n",
       " 'Albert Ellis',\n",
       " 'Thomas Brown (philosopher)',\n",
       " 'Bob McDonnell',\n",
       " 'Muhammad ibn Musa al-Khwarizmi',\n",
       " 'Alan Turing',\n",
       " 'Geoffrey Chaucer',\n",
       " 'George Boolos',\n",
       " 'Marvin Minsky',\n",
       " 'Joachim Lambek',\n",
       " 'Brahmagupta',\n",
       " 'Gottfried Wilhelm Leibniz',\n",
       " 'Charles Babbage',\n",
       " 'Ada Lovelace',\n",
       " 'John Venn',\n",
       " 'Herman Hollerith',\n",
       " 'George Stibitz',\n",
       " 'George Boole',\n",
       " 'J. Barkley Rosser',\n",
       " 'Jacques Herbrand',\n",
       " 'Alfred North Whitehead',\n",
       " 'Stephen Cole Kleene',\n",
       " 'Charles Barkley',\n",
       " 'Richard Seifert',\n",
       " 'Philip II of Macedon',\n",
       " 'Arrian',\n",
       " 'Alexander Held',\n",
       " 'Septimius Severus',\n",
       " 'Caligula',\n",
       " 'Augustus',\n",
       " 'Pompey',\n",
       " 'Lucius Verus',\n",
       " 'Peter Green (musician)',\n",
       " 'Lysippos',\n",
       " 'Chandragupta Maurya',\n",
       " 'Menander I',\n",
       " 'Macrinus',\n",
       " 'Aubrey Thomas de Vere',\n",
       " 'Wendell Johnson',\n",
       " 'S. I. Hayakawa',\n",
       " 'Leroy Burgess',\n",
       " 'Ed Miliband',\n",
       " 'Ed Joyce',\n",
       " 'Richard Ernest William Turner',\n",
       " 'Carl Nielsen',\n",
       " 'Tommy Cassidy',\n",
       " 'Matt Casamassina',\n",
       " 'Craig Harris',\n",
       " 'Walter Day',\n",
       " 'Liezel Huber',\n",
       " 'Gonville Bromhead',\n",
       " 'Johann Heinrich Friedrich Link',\n",
       " 'Augustin Pyramus de Candolle',\n",
       " 'John Lindley',\n",
       " 'John Edward Gray',\n",
       " 'George Bentham',\n",
       " 'Stephen Hawking',\n",
       " 'Zach Parise',\n",
       " 'William Herschel',\n",
       " 'Karl Ludwig Hencke',\n",
       " 'Carl Friedrich Gauss',\n",
       " 'N. R. Pogson',\n",
       " 'Victor Goldschmidt',\n",
       " 'Max Wolf',\n",
       " 'James Watson',\n",
       " 'Eduard Suess',\n",
       " 'David Morrison',\n",
       " 'David J',\n",
       " 'Johann Franz Encke',\n",
       " 'Hipparchus',\n",
       " 'John Flamsteed',\n",
       " 'Abd al-Rahman al-Sufi',\n",
       " 'Johann Bayer',\n",
       " 'Brian G. Marsden',\n",
       " 'Kenzō Masaoka',\n",
       " 'Yoshiyuki Tomino',\n",
       " 'Go Nagai',\n",
       " 'Tadahito Mochinaga',\n",
       " 'Kihachirō Kawamoto',\n",
       " 'Makoto Shinkai',\n",
       " 'Akira Nogami',\n",
       " 'Valentinian I',\n",
       " 'Constantine V',\n",
       " \"Al-Ma'mun\",\n",
       " 'Gregory of Nyssa',\n",
       " 'Mansur Yavaş',\n",
       " 'Mimar Sinan',\n",
       " 'Pietro Canonica',\n",
       " 'Rahmi Koç',\n",
       " 'Talaat Pasha',\n",
       " 'Abdi İpekçi',\n",
       " 'Derek Bickerton',\n",
       " 'Louis Massignon',\n",
       " 'Salama Moussa',\n",
       " 'Muhammad Mustafa Azmi',\n",
       " 'Ahmed Lutfi el-Sayed',\n",
       " 'Sahil Anand',\n",
       " 'Hassan Massoudy',\n",
       " 'Alfred Hitchcock',\n",
       " 'Robin Wood (critic)',\n",
       " 'Ellen Terry',\n",
       " 'Orson Welles',\n",
       " 'Emma Darwin',\n",
       " 'Kathleen Garman',\n",
       " 'Tom Snyder',\n",
       " 'Peter Bogdanovich',\n",
       " 'François Truffaut',\n",
       " 'John Russell Taylor',\n",
       " 'Don Henley',\n",
       " 'Buster Keaton',\n",
       " 'Donald Spoto',\n",
       " 'Michael Balcon',\n",
       " 'Graham Cutts',\n",
       " 'Alma Reville',\n",
       " 'Virginia Valli',\n",
       " 'Cary Grant',\n",
       " 'Charles Champlin',\n",
       " 'Pat Hitchcock',\n",
       " 'Francis Bourne',\n",
       " 'Madeleine Carroll',\n",
       " 'Philip French',\n",
       " 'Robert Towne',\n",
       " 'W. Somerset Maugham',\n",
       " 'May Whitty',\n",
       " 'Daphne du Maurier',\n",
       " 'Laurence Olivier',\n",
       " 'Walter Wanger',\n",
       " 'Joel McCrea',\n",
       " 'Lina Heydrich',\n",
       " 'Anthony Berkeley Cox',\n",
       " 'Monty Woolley',\n",
       " 'Norman Lloyd',\n",
       " 'Priscilla Lane',\n",
       " 'Joseph Cotten',\n",
       " 'Guy Pearce',\n",
       " 'John Steinbeck',\n",
       " 'Charles Oakley',\n",
       " 'Teresa Wright',\n",
       " 'William Bendix',\n",
       " 'Ingrid of Sweden',\n",
       " 'David O. Selznick',\n",
       " 'Ingrid Bergman',\n",
       " 'Gregory Peck',\n",
       " 'Salvador Dalí',\n",
       " 'Ben Hecht',\n",
       " 'James Stewart',\n",
       " 'Jane Wyman',\n",
       " 'Alastair Sim',\n",
       " 'Michael Wilding',\n",
       " 'Patricia Highsmith',\n",
       " 'Raymond Chandler',\n",
       " ...]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('../data/factscore_final.csv', index_col=0)\n",
    "[n.strip() for n in df['Name']]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
