import pandas as pd
import zipfile
import numpy as np
import re
import datetime
import matplotlib.pyplot as plt
import geopandas as gpd
from scipy import stats
from urllib.request import urlopen
pd.options.mode.chained_assignment = None


with zipfile.ZipFile('archive.zip', 'r') as zip_ref:
    zip_ref.extractall()


moma_artworks = pd.read_csv("Artworks.csv")
moma_artists = pd.read_csv("Artists.csv")
moma_artists.columns

Index(['Artist ID', 'Name', 'Nationality', 'Gender', 'Birth Year',
       'Death Year'],
      dtype='object')


gender = moma_artists[["Gender", 'Name', 'Nationality', 'Birth Year','Death Year']]
moma = moma_artworks.merge(gender, on = ['Name'], how = 'left')
moma['Gender'] = moma['Gender'].replace({'male':'Male'})
moma.head()


display(moma.dtypes)
#checked to see if the values are correct, the date and the acquisition date aren't in date time so converting them
moma['Acquisition Date'] = pd.to_datetime(moma['Acquisition Date'], errors = 'coerce')
#changed to datetime
moma.dtypes

Artwork ID              int64
Title                  object
Artist ID              object
Name                   object
Date                   object
Medium                 object
Dimensions             object
Acquisition Date       object
Credit                 object
Catalogue              object
Department             object
Classification         object
Object Number          object
Diameter (cm)         float64
Circumference (cm)    float64
Height (cm)           float64
Length (cm)           float64
Width (cm)            float64
Depth (cm)            float64
Weight (kg)           float64
Duration (s)          float64
Gender                 object
Nationality            object
Birth Year            float64
Death Year            float64
dtype: object

Artwork ID                     int64
Title                         object
Artist ID                     object
Name                          object
Date                          object
Medium                        object
Dimensions                    object
Acquisition Date      datetime64[ns]
Credit                        object
Catalogue                     object
Department                    object
Classification                object
Object Number                 object
Diameter (cm)                float64
Circumference (cm)           float64
Height (cm)                  float64
Length (cm)                  float64
Width (cm)                   float64
Depth (cm)                   float64
Weight (kg)                  float64
Duration (s)                 float64
Gender                        object
Nationality                   object
Birth Year                   float64
Death Year                   float64
dtype: object


moma['Date'].unique()[:100]
#a small sample of the unique types and their differences

array(['1896', '1987', '1903', '1980', '1976-77', '1968', '1900', '1978',
       '1905', '1906', '1979', '1980-81', '1918', '1970', '1975', '1984',
       '1986', '1974', 'n.d.', 'c. 1917', '1917', '1923', 'Unknown',
       '1930', '1936', '1935', '1937', '1938', '1977', '1958', '1985',
       '1989', '1949', '1958–1964', 'c. 1935', '1991', '1941', '1965',
       '1981', '1983', '1985–1988', 'c. 1989-91', '1992', '1915-17',
       'c. 1915-17', '1953', '1910', 'c.1985', '1982–1986', '1982-86',
       '1945', '1923–1924', '.1-3 1987; .4 1990', '1990', '1976', '1995',
       '1927–1931', 'c. 1929-30', '1964', '1959', 'c. 1918-20',
       'c.1918-1920', '1939', 'c.1976', '1975-79', '1993', '1996', '1988',
       '1982-83', '1982–1983', '1952-53', '1921', '1957', '1972',
       '1956-57', '1924', '1962', '1925', '1960', '1969', '1963', '1994',
       '1961', '1960-61', '1952', 'c. 1978-84', '1927', '1979–1985',
       'before 1933', '1929', 'c. 1960-62', '1967', '1956', 'c. 1961',
       '1934-36', '1981–1982', '1979–1981', '1940', 'after 1938', '1946'],
      dtype=object)


n_d = moma[(moma["Date"] == "nan") | (moma["Date"] == "n.d") | (moma["Date"] == "Unkn") | (moma["Date"] == "Unknown") | (moma["Date"] == "Various") | (moma["Date"] == "unknown")].index
moma.drop(n_d, inplace = True)


moma['date_edit'] = moma['Date'].str.extract('(\d{4})', expand = False)
#creating a new column to store the dates
moma[(moma['date_edit'].isnull() == True) & (moma['Date'].isnull() == False)]['Date'].unique()
#checking where the date_edit is null but the Date is true - these will be the remaining dates that could
#not be converted to a correct date format with four numbers

array(['n.d.', '8th-9th century C.E.', '7th-8th century C.E.', 'Unkown',
       '(London?, published in aid of the Comforts Fund  for Women and Children of Sovie',
       '(n.d.)', 'New York', 'November 10', '(19)71', '(19)69',
       'date of publicati', 'nd', 'no date',
       '(newspaper published March 30)', 'n. d.', 'c. 196?', 'TBC', 'TBD'],
      dtype=object)


moma['Date'] = moma['date_edit']
moma.drop(columns=['date_edit'], inplace = True)
moma['Date'].unique()
#one last check to make sure that all of our corresponding values are actually dates

array(['1896', '1987', '1903', '1980', '1976', '1968', '1900', '1978',
       '1905', '1906', '1979', '1918', '1970', '1975', '1984', '1986',
       '1974', nan, '1917', '1923', '1930', '1936', '1935', '1937',
       '1938', '1977', '1958', '1985', '1989', '1949', '1991', '1941',
       '1965', '1981', '1983', '1992', '1915', '1953', '1910', '1982',
       '1945', '1990', '1995', '1927', '1929', '1964', '1959', '1939',
       '1993', '1996', '1988', '1952', '1921', '1957', '1972', '1956',
       '1924', '1962', '1925', '1960', '1969', '1963', '1994', '1961',
       '1933', '1967', '1934', '1940', '1946', '1955', '1997', '1922',
       '1942', '1954', '1916', '1973', '1926', '1932', '1947', '1943',
       '1944', '1966', '1971', '1999', '1951', '1913', '1928', '1886',
       '1920', '1950', '1931', '1901', '1948', '1912', '1908', '1902',
       '1904', '1998', '1898', '1875', '1880', '1909', '1501', '1897',
       '1907', '1895', '1914', '1885', '1768', '1878', '1808', '1865',
       '1899', '1876', '1873', '1860', '1866', '1919', '1830', '1840',
       '1884', '1883', '2000', '1894', '1893', '1879', '1890', '1892',
       '1877', '1911', '1891', '1889', '1818', '1852', '1837', '1825',
       '1828', '1854', '1797', '1799', '1810', '1863', '1816', '1874',
       '1887', '1881', '1882', '1888', '1868', '1871', '1858', '1853',
       '1850', '1855', '1846', '1862', '1856', '1843', '1872', '1861',
       '1864', '1869', '1870', '1857', '1826', '1829', '1859', '1844',
       '1851', '1842', '1845', '1867', '1839', '1838', '1805', '1786',
       '1809', '1849', '1841', '1832', '1811', '1847', '1800', '2001',
       '2002', '2003', '2004', '1821', '2005', '2006', '2011', '2009',
       '2007', '2008', '1848', '2010', '2012', '2013', '2016', '2014',
       '2015', '2017'], dtype=object)


# convert the strings back to integers
moma['Date'] = pd.to_datetime(moma['Date'], errors="coerce")
moma['Date'] = moma['Date'].dt.year


# dropped dates that didn't have an acquisition date
no_date = moma[(moma['Acquisition Date'].isnull() == True)].index
moma.drop(no_date, inplace = True)


moma['age_made'] = moma['Date'] - moma['Birth Year']
moma['alive?'] = moma['Date']


def alive(acquisition,death):
    if acquisition > death:
        return 'False'
    else:
        return 'True'
moma['Acquisition Year'] = moma['Acquisition Date'].dt.year
moma['alive?'] = moma.apply(lambda row: alive(row['Acquisition Year'],row['Death Year']),axis=1)


moma['Nationality'].unique()

array(['Austrian', 'French', nan, 'American', 'German', 'Swedish',
       'British', 'Japanese', 'Italian', 'Argentine', 'Swiss',
       'Brazilian', 'Luxembourgish', 'Spanish', 'Dutch', 'Russian',
       'Iranian', 'Finnish', 'Danish', 'Belgian', 'Nationality unknown',
       'Mexican', 'Czech', 'Romanian', 'Polish', 'Cuban', 'Chilean',
       'Puerto Rican', 'Uruguayan', 'Venezuelan', 'Moroccan', 'Colombian',
       'Australian', 'Chinese', 'Yugoslav', 'Hungarian', 'Canadian',
       'Slovenian', 'Latvian', 'Nationality Unknown', 'Various', 'Greek',
       'Haitian', 'Israeli', 'Icelandic', 'Czechoslovakian', 'Croatian',
       'Norwegian', 'Thai', 'Algerian', 'Guatemalan', 'Indian',
       'Ukrainian', 'Irish', 'Costa Rican', 'Korean', 'Ethiopian',
       'Kuwaiti', 'Scottish', 'South African', 'Zimbabwean', 'Portuguese',
       'Panamanian', 'Ecuadorian', 'Peruvian', 'Congolese', 'Malian',
       'Turkish', 'Cambodian', 'Bosnian', 'Canadian Inuit', 'Slovak',
       'Estonian', 'Pakistani', 'Bolivian', 'Taiwanese', 'Paraguayan',
       'Nicaraguan', 'Tunisian', 'Sudanese', 'Tanzanian', 'Guyanese',
       'Senegalese', 'Bahamian', 'Bulgarian', 'Lebanese', 'Kenyan',
       'Nigerian', 'Georgian', 'Egyptian', 'Albanian', 'Azerbaijani',
       'Ivorian', 'Malaysian', 'Singaporean', 'Serbian', 'Lithuanian',
       'Tajik', 'New Zealander', 'Namibian', 'Native American',
       'Ghanaian', 'Afghan', 'nationality unknown', 'Ugandan',
       'Cameroonian', 'Welsh', 'Mauritanian', 'Palestinian', 'Syrian',
       'Saudi Arabian', 'Kazakhstani', 'Rwandan', 'Iraqi', 'Indonesian',
       'Vietnamese', 'Burkinabe', 'Macedonian', 'Kyrgyzstani', 'Filipino',
       'Mozambican', 'Angolan'], dtype=object)


moma.loc[moma['Nationality'] == 'nan', 'Nationality'] = 'no'
moma.loc[moma['Nationality'] == 'Nationality unknown', 'Nationality'] = 'no'
moma.loc[moma['Nationality'] == 'Nationality Unknown', 'Nationality'] = 'no'
moma.loc[moma['Nationality'] == 'Various', 'Nationality'] = 'no'
moma.loc[moma['Nationality'] == 'nationality unknown', 'Nationality'] = 'no'
moma.loc[moma['Nationality'] == np.nan, 'Nationality'] = 'no'
moma['Nationality'] = moma['Nationality'].fillna('no')


nationalities_moma = ['Japanese',  'Argentine', 
 'Brazilian', 'Russian',
       'Iranian',
       'Mexican', 'Cuban', 'Chilean',
       'Puerto Rican', 'Uruguayan', 'Venezuelan', 'Moroccan', 'Colombian',
       'Australian', 'Chinese', 'Yugoslav', 'Latvian',
       'Haitian', 'Israeli', 'Icelandic', 'Thai', 'Algerian', 'Guatemalan', 'Indian',
       'Ukrainian', 'Costa Rican', 'Korean', 'Ethiopian',
       'Kuwaiti', 'South African', 'Zimbabwean',
       'Panamanian', 'Ecuadorian', 'Peruvian', 'Congolese', 'Malian',
       'Turkish', 'Cambodian', 'Bosnian', 'Canadian Inuit',
       'Estonian', 'Pakistani', 'Bolivian', 'Taiwanese', 'Paraguayan',
       'Nicaraguan', 'Tunisian', 'Sudanese', 'Tanzanian', 'Guyanese',
       'Senegalese', 'Bahamian', 'Lebanese', 'Kenyan',
       'Nigerian', 'Georgian', 'Egyptian', 'Albanian', 'Azerbaijani',
       'Ivorian', 'Malaysian', 'Singaporean', 'Serbian', 'Lithuanian',
       'Tajik', 'New Zealander', 'Namibian', 'Native American',
       'Ghanaian', 'Afghan', 'Ugandan',
       'Cameroonian', 'Welsh', 'Mauritanian', 'Palestinian', 'Syrian',
       'Saudi Arabian', 'Kazakhstani', 'Rwandan', 'Iraqi', 'Indonesian',
       'Vietnamese', 'Burkinabe', 'Macedonian', 'Kyrgyzstani', 'Filipino',
       'Mozambican', 'Angolan']


moma['European?'] = ['False'] * len(moma['Nationality'])


def nation(nationality):
    if nationality in nationalities_moma:
        return 'False'
    elif nationality == 'no':
        return np.nan
    else:
        return 'True'
moma['European?'] = moma.apply(lambda row: nation(row['Nationality']),axis=1)
moma.loc[moma['Nationality'] == 'no', 'Nationality'] = np.nan


ids = moma[['Artwork ID', 'Artist ID']]
names_dates_moma = moma[['Title', 'Name', 'Date', 'Acquisition Date', 'Artwork ID', 'Gender', 'Credit', 'Nationality','age_made'
                   , 'alive?', 'European?']]
dimensions = moma[['Artwork ID','Dimensions', 'Diameter (cm)',
       'Circumference (cm)', 'Height (cm)', 'Length (cm)', 'Width (cm)',
       'Depth (cm)', 'Weight (kg)', 'Duration (s)']]
credit = moma[['Artwork ID','Credit']]
extra = moma[['Medium', 'Department', 'Classification', 'Artwork ID']]


names_dates_moma.head()


# number of works acquired each year 
acquisitions_per_year = moma.groupby(['Acquisition Year'])["Artwork ID"].count().to_frame().reset_index().rename({"Artwork ID": "num_works_acquired"},axis=1)


# proportion of art by female artists for each year
female_per_year = moma[moma.Gender == 'Female'].groupby(['Acquisition Year'])["Artwork ID"].count().to_frame().reset_index()
female_per_year = female_per_year.reset_index()

# perform left merge because some years in female by year are NaN
result = pd.merge(acquisitions_per_year, female_per_year, how='left', on=['Acquisition Year'])

# calculate percent female and assign to df
percent_female = result["Artwork ID"] / acquisitions_per_year["num_works_acquired"]
female_per_year["percent_female"] = percent_female


# The proportion of works by female acquired each year
female_per_year = female_per_year.set_index("Acquisition Year")
female_graph = female_per_year["percent_female"].plot(kind='bar', figsize=(16, 5), title = "Proportion of Works by Female Artists Acquired")
female_graph.figure.tight_layout()
female_graph

<AxesSubplot:title={'center':'Proportion of Works by Female Artists Acquired'}, xlabel='Acquisition Year'>


#number of works by non American artists acquired each year 
nonAmerican_by_year = moma[moma.Nationality != 'American'].groupby(['Acquisition Year'])["Artwork ID"].count().to_frame().reset_index()
# proportion of art by female artists for each year
nonAmerican_by_year["percent_nonAmerican"] = (nonAmerican_by_year["Artwork ID"] / acquisitions_per_year["num_works_acquired"])


# The proportion of works by nonAmerican acquired each year
nonAmerican_by_year = nonAmerican_by_year.set_index("Acquisition Year")
na_graph = nonAmerican_by_year["percent_nonAmerican"].plot(kind='bar', figsize=(16, 5), title = "Proportion of Works by nonAmerican Artists Acquired")
na_graph.figure.tight_layout()
na_graph

<AxesSubplot:title={'center':'Proportion of Works by nonAmerican Artists Acquired'}, xlabel='Acquisition Year'>


names_dates_moma['acquisitionYear'] = names_dates_moma['Acquisition Date'].dt.year
dead = pd.DataFrame(names_dates_moma[names_dates_moma['alive?'] == 'False'].groupby('acquisitionYear')['alive?'].value_counts())
dead['count'] = dead['alive?']
dead.drop(columns = ['alive?'], inplace = True)
dead = dead.reset_index()
dead.drop(columns = ['alive?'], inplace = True)
dead = dead[dead['acquisitionYear']>1975]
dead['table'] = "dead"
dead_table = dead.pivot(index='acquisitionYear', columns = 'table', values = 'count')

alive = pd.DataFrame(names_dates_moma[names_dates_moma['alive?'] == 'True'].groupby('acquisitionYear')['alive?'].value_counts())
alive['count'] = alive['alive?']
alive.drop(columns = ['alive?'], inplace = True)
alive = alive.reset_index()
alive.drop(columns = ['alive?'], inplace = True)
alive = alive[alive['acquisitionYear']>1975]
alive['table'] = "alive"
alive_table = alive.pivot(index='acquisitionYear', columns = 'table', values = 'count')

#merging the two tables together on acquisitionYear
new = dead_table.merge(alive_table, on = 'acquisitionYear')

#plotting the graph
new.loc[:,['alive','dead']].plot.bar(stacked=True, figsize=(10,7), title = "Acquistion of Living vs Dead Artists Over a 38 year period")

<AxesSubplot:title={'center':'Acquistion of Living vs Dead Artists Over a 38 year period'}, xlabel='acquisitionYear'>


# establish which works are American, Western, or non-Western
# A = American, NA-W = Non-American Western, NWNA - Non-Western Non-American
moma_nation = names_dates_moma[names_dates_moma['Nationality'].notna()]
def nationality(nation):
    if nation == 'American':
        return 'A'
    if nation not in nationalities_moma:
        return 'NA-W'
    else:
        return 'NWNA'
moma_nation['Western/American'] = moma_nation.apply(lambda row: nationality(row['Nationality']),axis=1)


# extract year from acquisition date
moma_nation['acquisitionYear'] = moma_nation['Acquisition Date'].dt.year

# make a pivot table of American art
A = pd.DataFrame(moma_nation[moma_nation['Western/American'] == 'A'].groupby('acquisitionYear')['Western/American'].value_counts())
A['count'] = A['Western/American']
A.drop(columns = ['Western/American'], inplace = True)
A = A.reset_index()
A.drop(columns = ['Western/American'], inplace = True)
A = A[A['acquisitionYear']>1975]
A['table'] = "A"
A_table = A.pivot(index='acquisitionYear', columns = 'table', values = 'count')

# make a pivot table of non-American Western art
NAW = pd.DataFrame(moma_nation[moma_nation['Western/American'] == 'NA-W'].groupby('acquisitionYear')['Western/American'].value_counts())
NAW['count'] = NAW['Western/American']
NAW.drop(columns = ['Western/American'], inplace = True)
NAW = NAW.reset_index()
NAW.drop(columns = ['Western/American'], inplace = True)
NAW = NAW[NAW['acquisitionYear']>1975]
NAW['table'] = "NAW"
NAW_table = NAW.pivot(index='acquisitionYear', columns = 'table', values = 'count')

# make a pivot table of non-American, non-Western art
NWNA = pd.DataFrame(moma_nation[moma_nation['Western/American'] == 'NWNA'].groupby('acquisitionYear')['Western/American'].value_counts())
NWNA['count'] = NWNA['Western/American']
NWNA.drop(columns = ['Western/American'], inplace = True)
NWNA = NWNA.reset_index()
NWNA.drop(columns = ['Western/American'], inplace = True)
NWNA = NWNA[NWNA['acquisitionYear']>1975]
NWNA['table'] = "NAW"
NWNA_table = NWNA.pivot(index='acquisitionYear', columns = 'table', values = 'count')

#merging the two tables together on acquisitionYear
new1 = A_table.merge(NAW_table, on = 'acquisitionYear')
new2 = new1.merge(NWNA_table, on = 'acquisitionYear')
new2 = new2.reset_index()

# plot a stacked bar graph of number of works acquired from the 3 demographics
plt.bar(new2['acquisitionYear'], new2['A'], color='r', edgecolor='white', label = 'American')
plt.bar(new2['acquisitionYear'], new2['NAW_x'], color='b', edgecolor='white', label = 'Non-American Western')
plt.bar(new2['acquisitionYear'], new2['NAW_y'], color='g', edgecolor='white', label = 'Non-American Non-Western')

plt.ylabel("Number of Works")
plt.xlabel("Year")
plt.title("Number of works from each demographic by year")
plt.legend()
plt.show()


# read the natural earth shape file as a GeoDataFrame
# this file is built into geopandas, so there is no need to download it
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# drop Antartica because it takes up too much space
world = world.drop(world.index[159])

#here's the world map we will use
world.plot()

<AxesSubplot:>


demonyms = pd.read_csv("demonyms.csv")


# merge the MoMA data with the demonyms data to pair the appropriate country with each nationality
moma_nationalities = pd.merge(demonyms, names_dates_moma, on="Nationality", how='inner')


# df of all works before 2012
before_2012 = moma_nationalities[moma_nationalities["acquisitionYear"] <2012]

#get counts of the number of works from each country
count = before_2012["name"].value_counts().astype(int).to_frame().reset_index()
count = count.rename({"index": "name", "name": "count"}, axis=1)

# merge this with the geographic data to graph it and to fill in NaNs for countries not in MoMA
map_pre_2012 = pd.merge(world, count, how="left", on="name")


# Plot the pre-2012 Map!

ax1= map_pre_2012.plot(column='count', cmap =    
                                'GnBu', figsize=(15,9),   
                                  k=5, legend =  
                                  True, missing_kwds={'color': 'lightgray'}, scheme="User_Defined", classification_kwds=dict(bins=[1,10,1000,10000,50000],))
ax1.set_title("Number of works per country 1929-2011")

Text(0.5, 1.0, 'Number of works per country 1929-2011')


# same process as above, but with all of the artworks
count = moma_nationalities["name"].value_counts().astype(int).to_frame().reset_index()
count = count.rename({"index": "name", "name": "count"}, axis=1)
map_data = pd.merge(world, count, how="left", on="name")


# Plot the current Map!

ax = map_data.plot(column='count', cmap =    
                                'OrRd', figsize=(15,9),   
                                  k=5, legend =  
                                  True, missing_kwds={'color': 'lightgray'}, scheme="User_Defined", classification_kwds=dict(bins=[1,10,1000,10000,50000],))
ax.set_title("Number of works per country 1929-2017")

Text(0.5, 1.0, 'Number of works per country 1929-2017')


map_pre_2012[map_data["count"]<=1].name.count()

17


map_data[map_data["count"]<=1].name.count()

17


# countries in the present dataset that are not in the before 2012 dataset 
set(moma_nationalities.name) - set(before_2012.name)

{'Burkina Faso',
 'Indonesia',
 'Iraq',
 'Kazakhstan',
 'Macedonia',
 'Mozambique',
 'Rwanda',
 'Saudi Arabia',
 'Syria'}


# sort the data by acquisition date
moma_by_year = moma.sort_values(by=['Acquisition Date'])

# split the DataFrame into 10 (roughly) equal parts to be the 10 deciles (each representing 10% of the population)
# the frist 6 have 13,940 works and the last 4 have 13,939 works
df_split = np.array_split(moma_by_year, 10)
for i in range(0,10):
    
    # add decile number to DataFrame
    df_split[i]["decile"] = i+1
    
    # add decile start and end year to DataFrame
    df_split[i]["dec start year"] = df_split[i]["Acquisition Date"].min().year
    df_split[i]["dec end year"] = df_split[i]["Acquisition Date"].max().year

# combine all of the split DataFrames into one big DataFrame with their corresponding decile number
decile_df = pd.concat([df_split[0],df_split[1],df_split[2],df_split[3],df_split[4],df_split[5],df_split[6],df_split[7],df_split[8],df_split[9]])
decile_df.head()


# find the total number of female artists in each decile
female_list = []
females = decile_df[decile_df["Gender"] == "Female"]
for decile_num in range(1,11):
    female_list.append(females[females["decile"] == decile_num]["Acquisition Date"].count())


# make it cumulative!
total = 0
female_count = [0]
for i in range(10):
    total += female_list[i]
    female_count.append(total)


# since we need a cumulative population, keep a list of the current population being examined at each decile

total = 0
# include 0% of the population in the list of each decile's cumulative population count
pop_count = [0]
for i in range(10):
    total += df_split[i]["Acquisition Year"].count()
    pop_count.append(total)


# divide female count by total population of female artists to get the proportion of female artists in each decile
total_female = females["Acquisition Year"].count()
female_prop = list(map(lambda x: (x/total_female) *100, female_count))

# create a list of all of the cumulative proportions of population (make sure to include 0 and 100)
population_prop = [0,10,20,30,40,50,60,70,80,90,100]


# create a DataFrame with these values representing the x and y axis of the Lorenz curve
lorenz_curve = pd.DataFrame(list(zip(population_prop, female_prop)),columns =['cumulative_prop_of_artists', 'cumulative_proportion_of_female_artists'])


# add the start and end years of each decile to the DataFrame
for i in range(0,10):
    decile_df[decile_df["decile"] == i]["dec start year"]
    decile_df[decile_df["decile"] == i]["dec end year"]
    

# create an array of the each unique start and end year to the respective list, including 0 for 0% of the population
start_years = list(decile_df["dec start year"].unique())
start_years.insert(0,0)

end_years = list(decile_df["dec end year"].unique())
end_years.insert(0,0)
start_years

[0, 1929, 1953, 1964, 1968, 1974, 1986, 1998, 2005, 2009, 2013]


# add this to a new DataFrame
lorenz_year = lorenz_curve.copy()
lorenz_year["decile start"] = start_years
lorenz_year["decile end"] = end_years
lorenz_year["date range"] = ["0-0", "1929-1953", "1953-1964", "1964,1968", "1968-1974", "1974-1986","1986-1998", "1998-2005", "2005-2009", "2009-2013", "2013-2016"]
lorenz_year


# plot the Lorenz curve
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(lorenz_curve)
ax.set_xlabel("cumulative proportion of population")
ax.set_ylabel("cumulative proportion of female artists")
ax.set_title("Lorenz Curve and Perfect Equality line")

Text(0.5, 1.0, 'Lorenz Curve and Perfect Equality line')


# calculate the area under the Lorenz curve
lorenz_area = np.trapz(female_prop, x=population_prop)

# calculate the area under the perfect equality line (y=x)
pe_area = np.trapz(population_prop, x=population_prop)

# use those values to calculate the gini coefficient
gini_coeff = (pe_area - lorenz_area) / pe_area
gini_coeff

0.2180062388000266


# get counts for acquisitions each year
acquisitons_per_year = moma.groupby(['Acquisition Year']).count()

# proportion of art by female artists for each year
female_proportion_by_year = female_per_year["percent_female"]

# create df of year and number of acquisitions
acquisitons_per_year = moma.groupby(['Acquisition Year'])["Artwork ID"].count().to_frame().reset_index().rename({"Artwork ID": "num_works_acquired"},axis=1)

# get a df of year and percent female
all_data = pd.merge(acquisitons_per_year, female_proportion_by_year, on="Acquisition Year").fillna(0)


x = all_data["Acquisition Year"]
y = all_data["percent_female"]

slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
plt.figure(figsize=(20,10))
plt.plot(x, y, 'o')
plt.plot(x, slope*x + intercept, '#EB6E1F')
plt.title("1929-2016")

x_list = x.tolist()
y_list = y.tolist()

label = all_data["Acquisition Year"]
for i, txt in enumerate(label):
    plt.annotate(txt, (x_list[i], y_list[i]), 
                xytext=(10,5), 
                textcoords='offset points')
    
plt.xlabel("year")
plt.ylabel("percent_female")

plt.show()

# put summary stats into a df column
summary_frame = pd.DataFrame(index=["slope", "intercept", "r_value", "p_value", "std_err"])
summary_stats = ["{:.7f}".format(slope),"{:.7f}".format(intercept),"{:.7f}".format(r_value),"{:.7f}".format(p_value), "{:.7f}".format(std_err)]
summary_frame["summary stats"] = summary_stats
summary_frame


# get only the pre 2012 percent of female artists
pre_2012 = acquisitons_per_year[acquisitons_per_year["Acquisition Year"] < 2012]
pre_2012 = pd.merge(pre_2012, female_proportion_by_year, on="Acquisition Year")
pre_2012.rename(columns={"Acquisition Date" : "percent_female"},inplace=True)

# some of the earlier years did not have any works by female artists, so fill the NaNs with 0s
pre_2012 = pre_2012.fillna(0).drop("Artwork ID",1,errors="ignore")


# the feature we are looking at is the year a work was acquired
X = pre_2012["Acquisition Year"]

# we are using this to predict the percent of artworks by female artists
y = pre_2012["percent_female"]

# run a linear regression
slope, intercept, r_value, p_value, std_err = stats.linregress(X, y)

# use the slope to graph the best fit line
line = [slope*p + intercept for p in X]

fig, ax = plt.subplots(figsize=(20, 10))

# graph the line and the points
ax.scatter(all_data["Acquisition Year"], all_data["percent_female"], s = 20, color = 'blue')
ax.plot(X, line, color = 'orange', linewidth=2)

# add more years 
test_years = np.arange(2011, 2017, 1) # representing 2012-2017
predict_years = np.arange(2016, 2025, 1)# predicting 2017-2020


# calculate the predicted line
t_line = [slope*x + intercept for x in test_years] 
p_line = [slope*x + intercept for x in predict_years] 

# plot test line
ax.plot(test_years, t_line, color = 'red', linewidth=2, ls='--')
ax.scatter(X, y, s = 5)

# plot prediction line
ax.plot(predict_years, p_line, color = 'green', linewidth=2, ls='--')
ax.scatter(X, y, s = 5)

# format plot
plt.xticks(range(1930, 2021, 10))
plt.xlabel("year")
plt.ylabel("percent_female")
plt.title("Linear regression showing values and predictions for 2012 onward")
plt.axvline(x=2011, ls=':', color='#add8e6')
plt.show()


# calculate root of mean standard error
mse = std_err.mean()
rmse = np.sqrt(mse).mean()
print("MSE is " + str(mse))
print("RMSE is " + str(rmse))

# put summary stats into a df column
summary_frame = pd.DataFrame(index=["slope", "intercept", "r_value", "p_value", "std_err"])
summary_stats = ["{:.7f}".format(slope),"{:.7f}".format(intercept),"{:.7f}".format(r_value),"{:.7f}".format(p_value), "{:.7f}".format(std_err)]
summary_frame["summary stats"] = summary_stats
summary_frame

MSE is 0.0003427077824517334
RMSE is 0.01851236836419731


tate_artworks = pd.read_csv("artwork_data-tate.csv", low_memory = False)
tate_artists = pd.read_csv("artist_data-tate.csv", low_memory = False)


display(tate_artworks.head())
tate_artists.head()


tate_newartist = tate_artists[['name','gender','placeOfBirth','yearOfBirth','yearOfDeath']]


tate_newartworks = tate_artworks[['id','accession_number','artist','artistRole','artistId','title','medium','year'
                                 ,'acquisitionYear','dimensions']]


tate = tate_newartworks.merge(tate_newartist, left_on = ['artist'],right_on = ['name'], how = 'left')


tate = tate.drop(columns = ['name'])


no_date = tate[(tate['acquisitionYear'].isnull() == True)].index
tate.drop(no_date, inplace = True)


tate['year'].unique()

array([nan, '1785', '1826', '1828', '1825', '1803', '1794', '1789',
       '1786', '1800', '1790', '1805', '1779', '1870', '1871', '1866',
       '1877', '1873', '1872', '1879', '1875', '1862', '1863', '1861',
       '1880', '1821', '1840', '1846', '1874', '1860', '1852', '1850',
       '1845', '1844', '1827', '1829', '1831', '1830', '1812', '1814',
       '1869', '1881', '1851', '1843', '1864', '1865', '1788', '1876',
       '1858', '1859', '1849', '1885', '1818', '1893', '1806', '1837',
       '1810', '1807', '1808', '1809', '1811', '1816', '1822', '1833',
       '1817', '1931', '1959', '1916', '1917', '1951', '1900', '1899',
       '1902', '1953', '1933', '1974', '1819', '1820', '1967', '1969',
       '1979', '1983', '1989', '1992', '1993', '1994', '1997', '1998',
       '1999', '2000', '1981', '1985', '1996', '1991', '1971', '2004',
       '2002', '2005', '2006', '1975', '2001', '1973', '2003', '1963',
       '1976', '1977', '1960', '1990', '1988', '2007', '1964', '1970',
       '1949', '1952', '1954', '1955', '1956', '1958', '1961', '1965',
       '1966', '1986', '1982', '1978', '1980', '1962', '1984', '1972',
       '1957', '1948', '1987', '1968', '1946', '1945', '2009', '1995',
       '2010', '2008', '2011', '1787', '1791', '1792', '1793', '1795',
       '1796', '1797', '1802', '1798', '1799', '1780', '1804', '1801',
       '1740', '1815', '1835', '1824', '1813', '1856', '1823', '1839',
       '1832', '1834', '1836', '1842', '1841', '1847', '1777', '1680',
       '1770', '1773', '1771', '1756', '1745', '1768', '1766', '1784',
       '1760', '1776', '1783', '1781', '1838', '1696', '1765', '1775',
       '1782', '1758', '1848', '1854', '1855', '1764', '1778', '1772',
       '1753', '1670', '1759', '1752', '1738', '1868', '1750', '1731',
       '1642', '1747', '1644', '1919', '1923', '1853', '1905', '1922',
       '1857', '1762', '1737', '1748', '1755', '1746', '1545', '1888',
       '1896', '1882', '1886', '1892', '1887', '1889', '1883', '1884',
       '1891', '1878', '1897', '1895', '1890', '1894', '1769', '1741',
       '1898', '1867', '1749', '1901', '1763', '1903', '1739', '1761',
       '1904', '1906', '1907', '1908', '1646', '1774', '1909', '1910',
       '1911', '1655', '1912', '1913', '1914', '1720', '1915', '1695',
       '1623', '1920', '1743', '1918', '1715', '1937', '1921', '1751',
       '1924', '1925', '1926', '1927', '1928', '1929', '1930', '1932',
       '1645', '1744', '1934', '1576', '1935', '1936', '1938', '1939',
       '1940', '1941', '1942', '1943', '1944', '1732', '1947', '1699',
       '1735', '1659', '1950', '1733', '1573', '1710', '1672', '1625',
       '2012', '1660', '1690', '1684', '1615', '1600', '1679', '1619',
       '1569', '1586', '1650', '1742', '1557', '1629', '1725', '1648',
       '1674', '1730', '1754', '1563', '1757', '1592', '1636', '1633',
       '1700', '1594', '1635', '1620', '1640', '1767', '1724', '1610',
       '1605', '1565', '1707', '1639', '1676', '1661', '1596', '1630',
       '1701', '1681', '1728', '1675', '1654', '1689', '1662', '1685',
       '1723', '1683', '1734', '1721', '1595', '1638', '1713', '1716',
       '1719', '1692', '1726', '1718', '1637', '1647', '1658', '1736',
       'no date', '1628', 'c.1997-9'], dtype=object)


tate['year'].replace({'no date':np.nan, 'c.1997-9':1997}, inplace = True)


tate['age_made'] = pd.to_numeric(tate['year']) - tate['yearOfBirth']
tate['alive?'] = tate['year']


def alive(acquisition,death):
    if acquisition > death:
        return 'False'
    else:
        return 'True'
tate['alive?'] = tate.apply(lambda row: alive(row['acquisitionYear'],row['yearOfDeath']),axis=1)


def country(place):
    new = str(place).split(',')
    if len(new) > 1:
        first = re.sub(r"^\s+", "", new[1])
        return first
    else:
        return new[0]
tate['countryOfOrigin'] = tate.apply(lambda row: country(row['placeOfBirth']), axis=1)


tate['countryOfOrigin'].unique()

array(['United Kingdom', 'nan', 'Bharat', 'France', 'Italia', 'Viet Nam',
       'Deutschland', 'Rossiya', 'United States', 'Bahamas', 'Australia',
       'Ellás', 'België', 'Latvija', 'Canada', 'Éire', 'Schweiz',
       'Plymouth', 'Edinburgh', 'España', 'Nederland', 'Beckington',
       'Perth', 'Wimbledon', 'Canterbury', 'Danmark', 'Türkiye',
       'Österreich', 'Blackheath', 'Charlieu', 'Isle of Man',
       'Magyarország', 'Hrvatska', 'Chile', 'Epsom', 'Auteuil',
       'Kensington', 'South Africa', 'Mauritius', 'Rochdale', 'Melmerby',
       'Ukrayina', 'Sverige', 'Département de la', 'New Zealand',
       'România', 'Otok', 'Charlotte Amalie', 'Schlesien', 'Saint Hélier',
       'Polska', 'Misr', 'Bermondsey', 'Egremont', 'Norge', 'Braintree',
       'Liverpool', 'Belarus', 'Stoke on Trent', 'Zhonghua', 'Portugal',
       'Cuba', 'Moldova', 'Singapore', 'Nihon', 'Brasil', 'Staten Island',
       'México', 'Bosna i Hercegovina', 'Slovenija',
       'Slovenská Republika', 'Ceská Republika', 'Myanmar', "Yisra'el",
       'Chung-hua Min-kuo', 'D.C.', 'Colombia', 'Sri Lanka', 'Indonesia',
       'Jugoslavija', 'Bulgaria', 'Samoa', 'Argentina', 'Barbados',
       "Taehan Min'guk", 'Îran', 'Niederschlesien', 'Al-Lubnan',
       'Pakistan', 'Panamá', 'Zimbabwe', 'Zambia', 'Suriyah', 'Cameroun',
       'Nigeria', 'Bangladesh', "Al-Jaza'ir", 'Lietuva', 'Solothurn',
       'Makedonija', 'Venezuela', 'Malaysia', 'Douglas', 'London',
       'Tunis', 'Bristol', 'Armenia', 'Eesti', 'Jamaica', 'Guyana',
       'Montserrat', 'Tanzania', 'Ísland', 'Mehoz', 'Suomi',
       'Hertfordshire', 'Stockholm', 'Luxembourg', 'Shqipëria', 'Uganda',
       'Perú', 'Pilipinas', 'Malta', 'Kenya', 'Lao', 'Al-‘Iraq',
       'Nicaragua', 'Prathet Thai', "Choson Minjujuui In'min Konghwaguk",
       'As-Sudan'], dtype=object)


tate['countryOfOrigin'] = tate['countryOfOrigin'].replace({'nan':np.nan})
tate_country = tate[tate['countryOfOrigin'].isnull() == False]
#designation wasn't properly set so had to remove the works that didn't have a country of origin


index = tate_country['countryOfOrigin'].index
tate_country['European?'] = ['False'] * len(index)


countries_tate = ['Austria','Belgium','Bulgaria','Croatia','Cyprus','Czech Republic','Denmark','Estonia','Finland','France','Germany',
'Greece','Hungary','Ireland','Italy','Latvia','Luxembourg','Lithuania','Malta','Netherlands','Poland','Portugal',
'Romania','Slovak Republic','Slovenia','Spain','Sweden','United Kingdom', 'Deutschland', 'Italia','London','Ceská Republika',
 'D.C.',  'Staten Island','Liverpool', 'Polska', 'România', 'España', 'Edinburgh', 'Canada','United States', 'Stoke on Trent',
                 'Ellás','België', 'Latvija','Nederland','Wimbledon','Canterbury', 'Danmark','Österreich','Blackheath',
                  'Bermondsey', 'Egremont', 'Norge', 'Braintree','Liverpool','Bristol','Eesti','Hertfordshire', 'Stockholm', 'Luxembourg'
                 ]
non_european = []
european = []

def nation(nation):
    if nation in countries_tate:
        return 'True'
    elif nation == np.nan:
        return np.nan 
    else:
        if nation in non_european:
            return 'False'
        else:
            non_european.append(nation)
            return 'False'
            
tate_country['European?'] = tate_country.apply(lambda row: nation(row['countryOfOrigin']),axis=1)


tate_country


tate_country['acquisitionYear'].max()

2013.0


# create a new DataFrame of all of the works before 2013
d = datetime.datetime(2014, 1, 1)
moma_before_2013 = names_dates_moma[names_dates_moma['Acquisition Date'] < d]
moma_after_2013 = names_dates_moma[names_dates_moma['Acquisition Date'] > d]


# get the counts of works by Western and non-Western artists in MoMA before 2013
moma_European = moma_before_2013['European?'].value_counts()


# get the counts of works by Western and non-Western artists in the Tate
tate_European = tate_country['European?'].value_counts()

# plot the Lorenz curve
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(lorenz_curve)
ax.set_xlabel("cumulative proportion of population")
ax.set_ylabel("cumulative proportion of female artists")
ax.set_title("Lorenz Curve and Perfect Equality line")

Text(0.5, 1.0, 'Lorenz Curve and Perfect Equality line')


# create and format pie charts
fig = plt.figure()
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)

moma_European.plot.pie(ax=ax1,  autopct='%.2f')
ax1.set_title("MoMA Western vs Non Western")

tate_European.plot.pie(ax=ax2, autopct='%.2f')

ax2.set_title("Tate Western vs Non Western")

fig.subplots_adjust(wspace=.5)
fig.set_figheight(12)
fig.set_figwidth(12)


# get the counts of works by male and female artists in MoMA before 2013
moma_gender = moma_before_2013['Gender'].value_counts()


# get the counts of works by male and female artists in the Tate
tate_gender = tate_country['gender'].value_counts()


# create and format pie charts
fig = plt.figure()
ax3 = fig.add_subplot(1,2,1)
ax4 = fig.add_subplot(1,2,2)
moma_gender.plot.pie(ax=ax3, autopct='%.2f')
ax3.set_title("MoMA Male vs Female Artists")

tate_gender.plot.pie(ax=ax4, autopct='%.2f')
ax4.set_title("Tate Male vs Female Artists")

fig.subplots_adjust(wspace=.5)
fig.set_figheight(12)
fig.set_figwidth(12)


# get the counts of Western and non-Western works by gender in MoMA before 2013
moma_western_gender = moma_before_2013[moma_before_2013['European?'] == 'False']['Gender'].value_counts()


# get the counts of Western and non-Western works by gender in the Tate
tate_western_gender = tate_country[tate_country['European?'] == 'False']['gender'].value_counts()


# create and format pie charts
fig = plt.figure()
ax5 = fig.add_subplot(1,2,1)
ax6 = fig.add_subplot(1,2,2)
moma_western_gender.plot.pie(ax=ax5, autopct='%.2f')
ax5.set_title("MoMA Non Western Male vs Female Artists")

tate_western_gender.plot.pie(ax=ax6, autopct='%.2f')
ax6.set_title("Tate Non Western Male vs Female Artists")

fig.subplots_adjust(wspace=.5)
fig.set_figheight(12)
fig.set_figwidth(12)


# find the proportion of British works in the Tate
tate_country = tate_country.assign(British=np.where(tate_country['countryOfOrigin']=="United Kingdom", True, False))
tate_British_prop = (tate_country["British"] == True).mean()


# Find the proportion of American art in MoMA
names_dates_moma = names_dates_moma.assign(American=np.where(names_dates_moma['Nationality']=="American", True, False))
moma_American_prop = (names_dates_moma["American"] == True).mean()


# Find the proportion of British art in MoMA
names_dates_moma = names_dates_moma.assign(British=np.where(names_dates_moma['Nationality']=="British", True, False))
moma_British_prop = (names_dates_moma["British"] == True).mean()


# find the proportion of American works in the Tate
tate_country = tate_country.assign(American=np.where(tate_country['countryOfOrigin']=="United States", True, False))
tate_American_prop = (tate_country["American"] == True).mean()


# Graph it!
plotdata = pd.DataFrame(
    {"% of artworks": [tate_American_prop, moma_American_prop, tate_British_prop, moma_British_prop]}, 
    index=["Proportion of American Art in the Tate", "Proportion of American Art in MoMA", "Proportion of British Art in the Tate", "Proportion of British Art in MoMA"])
plotdata.plot(kind="barh", title = "Examining National Biases in MoMA and the Tate")

<AxesSubplot:title={'center':'Examining National Biases in MoMA and the Tate'}>


# Proportion of Male American Artists
american_males_moma = (names_dates_moma['American'] == True) & (names_dates_moma['Gender']  == "Male")
american_males_moma_prop = american_males_moma.mean()


# Proportion of Male non-American Artists
nonamerican_males_moma = (names_dates_moma['American'] == False) & (names_dates_moma['Gender']  == "Male")
nonamerican_males_moma_prop = nonamerican_males_moma.mean()


# Proportion of Female American Artists
american_females_moma = (names_dates_moma['American'] == True) & (names_dates_moma['Gender']  == "Female")
american_females_moma_prop = american_females_moma.mean()


# Proportion of Female non-American Artists
nonamerican_females_moma = (names_dates_moma['American'] == False) & (names_dates_moma['Gender']  == "Female")
nonamerican_females_moma_prop = nonamerican_females_moma.mean()


# Proportion of Male British Artists
british_males_tate = (tate_country['British'] == True) & (tate_country['gender']  == "Male")
british_males_tate_prop = british_males_tate.mean()


# Proportion of Male non-British Artists
nonbritish_males_tate = (tate_country['British'] == False) & (tate_country['gender']  == "Male")
nonbritish_males_tate_prop = nonbritish_males_tate.mean()


# Proportion of Female British Artists
british_females_tate = (tate_country['British'] == True) & (tate_country['gender']  == "Female")
british_females_tate_prop = british_females_tate.mean()


# Proportion of Feale non-British Artists
nonbritish_females_tate = (tate_country['British'] == False) & (tate_country['gender']  == "Female")
nonbritish_females_tate_prop = nonbritish_females_tate.mean()


# Graph MoMA data!
plotdata_moma = pd.DataFrame(
    {"% of artworks": [american_males_moma_prop, nonamerican_males_moma_prop, american_females_moma_prop, nonamerican_females_moma_prop]}, 
    index=["Proportion of American Male Artists", "Proportion of non-American Male Artists", "Proportion of American Female Artists", "Proportion of Non-American Female Artists"])
plotdata_moma.plot(kind="barh", title = "Examining National Biases MoMA")

# Graph Tate data!
plotdata_tate = pd.DataFrame(
    {"% of artworks": [british_males_tate_prop, nonbritish_males_tate_prop, british_females_tate_prop, nonbritish_females_tate_prop]}, 
    index=["Proportion of British Male Artists", "Proportion of non-British Male Artists", "Proportion of British Female Artists", "Proportion of Non-British Female Artists"])
plotdata_tate.plot(kind="barh", title = "Examining National Biases in the Tate")

<AxesSubplot:title={'center':'Examining National Biases in the Tate'}>


met = pd.read_csv("MetObjects.csv",  low_memory=False)


met.head()


met.drop(columns = ['Object Number','Is Highlight','Is Timeline Work','Is Public Domain','Object ID',
                          'Gallery Number','Dynasty', 'Reign',
       'Portfolio', 'Constiuent ID', 'Artist Role', 'Artist Prefix','Artist Display Bio', 'Artist Suffix',
       'Artist Alpha Sort','Artist ULAN URL',
       'Artist Wikidata URL','Object Begin Date',
       'Object End Date', 'Medium', 'Dimensions', 'Credit Line',
       'Geography Type', 'City', 'State', 'County', 'Country', 'Region',
       'Subregion', 'Locale', 'Locus', 'Excavation', 'River', 'Classification',
       'Rights and Reproduction', 'Link Resource', 'Object Wikidata URL',
       'Metadata Date', 'Repository', 'Tags', 'Tags AAT URL',
       'Tags Wikidata URL'], inplace = True)


met_modern = met[met['Department'] == 'Modern and Contemporary Art']


met_modern = met_modern.dropna(subset=['AccessionYear'])
met_modern['AccessionYear'] = met_modern['AccessionYear'].astype(int)
met_before_2017 = met_modern[met_modern['AccessionYear'] < 2017]


met_before_2017['Artist Begin Date'] = met_before_2017['Artist Begin Date'].replace(np.nan,'no')
met_before_2017['Artist End Date'] = met_before_2017['Artist End Date'].replace(np.nan,'no')


display(met_before_2017['Artist Nationality'].unique())
met_before_2017['Artist Nationality'] = met_before_2017['Artist Nationality'].replace(np.nan,'no')

array(['American', 'Spanish', 'German|German', 'French', 'French|French',
       'British|British', 'British', nan, 'Austrian', 'Hungarian',
       'American ', 'British, Scottish', 'American, born Luxembourg',
       'American, born Poland', 'Mexican', 'American, born Germany',
       'Italian, born Greece', 'Italian', 'French, born Switzerland',
       'German', 'British, born United States', 'French, born Russia',
       'Israeli', 'American, born Hungary', 'Czech',
       'American, born Russia', 'Russian, born Romania', 'Australian',
       'Japanese', 'American, born Denmark', 'French, born Germany',
       'Russian', 'French, born Poland|American, born Lithuania',
       'Norwegian', 'American, born Canada', 'Swedish',
       'American|American, born England', 'German, born Switzerland',
       'French|French|French|French', 'American, born Spain',
       'American, born Japan', 'American|Italian', 'Swedish|Swedish',
       'American, born Lithuania', 'American, born The Netherlands',
       'American, born Ukraine', 'American, born France',
       'Austrian|Austrian', 'American|American, born Lithuania',
       'American|American', 'American|Japanese', 'Spanish|Italian',
       'Venezuelan', 'American, born Bulgaria', 'Chilean', 'Finnish',
       'Belgian', 'Indian', 'British, born Germany', 'Austrian|German',
       'American, born Philippines', 'American, born China',
       'American, born Austria', 'American, born Germany|American',
       'Brazilian', 'Canadian', 'American, born Sweden',
       'American|American|American', 'American, born Italy',
       'American|French', 'Dutch', 'American, born England',
       'Austrian|Czech|Austrian', 'Romanian', 'Danish',
       'British, Scottish|British', 'American, born England|American',
       'British, born Canada', 'Swiss', 'Lebanese',
       'French, born Hungary|French', 'Uruguayan', 'Rumanian',
       'American, born Armenia', 'American, born Panama',
       'Italian, born Argentina', 'French, born Japan', 'Czechoslovakian',
       'Australian, born India', 'Colombian',
       'American, born Puerto Rico', 'American, born Cuba',
       'Austrian|Czech', 'American, born Switzerland',
       'Israeli, born Romania', 'American|Dutch', 'Finnish|Finnish',
       'American|American|German', 'Austrian, born Germany',
       'Danish|Danish', 'French|French|French', 'Irish',
       'American, born Hungary|German', 'German|Russian',
       'American, born Ireland',
       'American, born Austria|American, born Austria', 'Polish',
       'French|Dutch', 'American, born Greece', 'French|Italian',
       'Italian|Italian', 'German|German|German', 'Dutch|Dutch',
       'Italian|Italian|Italian', 'British|American',
       'Italian|Italian|Italian|Italian', 'Spanish|French',
       'Italian, born Austria', 'American|Israeli',
       'Italian, born Austria|Italian', 'American, born Romania',
       'Japanese|Japanese', 'Japanese|Italian|Japanese',
       'American, born Italy|American|Italian',
       'American, born Italy|American', 'Argentinian', 'Spanish|Spanish',
       'Italian|Argentinian', 'American|American, born Italy',
       'Australian|American', 'Yugoslavian', 'Czech|Czechoslovakian',
       'British|British|British', 'Israeli, born Palestine',
       'American|Mexico', 'American|American|American|American',
       'American|American, born Russia|American, born Russia', 'Korean',
       'New Zealander', 'Mexican, born United States',
       'Austrian, born Germany|Austrian|Austrian', 'Saint Lucian',
       'Chinese', 'British, born Austria', 'Cuban',
       'French, born Lithuania', 'Taiwanese', 'Danish|Italian',
       'American|German', 'Kuwaiti', 'French, born Romania',
       'French|American', 'American, born Egypt', 'Iranian',
       'Spanish|French|French', 'Italian|Italian, born Austria',
       'French|American and French', 'American and French|French',
       'French, born Switzerland|French', 'Andorran|French',
       'American, born Norway', 'American|Swedish', 'French|German',
       'German|French', 'American, born Luxembourg|American',
       'American|American, born Luxembourg', 'American German',
       'Austrian|Austrian, born Hungary', 'Peruvian', 'Italian|American',
       'Swedish|Danish', 'American, born Belgium',
       'American, born Finland', 'France', 'Portuguese',
       'French|French, born Switzerland', 'Chinese|American',
       'British, born United States|American', 'British, born France',
       'British, born China', 'Denmark|Danish', 'Dominican',
       'French, born Poland', 'American, born Germany|German',
       'American|Turkish', 'American|British',
       'American, born Austria-Hungary|American', 'American|Hungarian',
       'French|American|American', 'Swiss|German/Swiss',
       'French, born Switzerland|French|French|Swiss', 'Danish|Swiss',
       'American|American, born Austria-Hungary', 'Hungarian|American',
       'American, born Russia|American',
       'Czech|Czech|Czechoslovakian|Czech', 'Dutch|Dutch|German',
       'Dutch|Italian', 'South African', 'German|American, born Austria',
       'German|Italian', 'Swedish|Italian', 'British, born Scotland',
       'German, born Poland', 'Mexican, born England',
       'British, Scottish|Italian', 'Italian|British, Scottish',
       'French|Romanian|French', 'Romanian|French', 'Japanese|French',
       'French, born Switzerland|German', 'French, born Hungary',
       'Tajikistan', 'Swiss|Swiss', 'French, born China', 'French|Czech',
       'Turkish', 'Austrian|Austrian|Austrian', 'Slovakian', 'Egyptian',
       'Ghanaian', 'Kashmir', 'British, born Argentina',
       'Swedish, born Austria', 'Danish-Icelandic', 'Ukrainian',
       'Pakistani', 'Israeli|Italian', 'Mexican|French', 'French|Swiss',
       'Guatemalan', 'Icelandic', 'American|American|American '],
      dtype=object)


index = met_before_2017['Artist Nationality'].index
for x in index:
    if met_before_2017.loc[x,'Artist Nationality'] != 'no':
        if 'born' in met_before_2017.loc[x,'Artist Nationality']:
            b = met_before_2017.loc[x,'Artist Nationality'].split(' ')
            born = b[-1]
            if '|' in born:
                n = born.split('|')
                met_before_2017.loc[x,'Artist Nationality'] = n[0]
            else:
                met_before_2017.loc[x,'Artist Nationality'] = born
        elif '|' in met_before_2017.loc[x,'Artist Nationality']:
            if 'born' in met_before_2017.loc[x,'Artist Nationality']:
                n = met_before_2017.loc[x,'Artist Nationality'].split(',')
                met_before_2017.loc[x,'Artist Nationality'] = n[0]
            else:
                n = met_before_2017.loc[x,'Artist Nationality'].split('|')
                met_before_2017.loc[x,'Artist Nationality'] = n[0] 
met_before_2017['Artist Nationality'] = met_before_2017['Artist Nationality'].replace('no',np.nan)


index = met_before_2017['Artist Begin Date'].index
met_before_2017['Artist Begin Date'].astype(str)
for x in index:
    if met_before_2017.loc[x,'Artist Begin Date'] != np.nan:
        if '|' in met_before_2017.loc[x,'Artist Begin Date']:
            n = met_before_2017.loc[x,'Artist Begin Date'].split('|')
            met_before_2017.loc[x,'Artist Begin Date'] = n[0]


index = met_before_2017['Artist End Date'].index
met_before_2017['Artist End Date'] = met_before_2017['Artist End Date'].astype(str, copy=True, errors='raise')
for x in index:
    if met_before_2017.loc[x,'Artist End Date'] != np.nan:
        if '|' in met_before_2017.loc[x,'Artist End Date']:
            n = met_before_2017.loc[x,'Artist End Date'].split('|')
            met_before_2017.loc[x,'Artist End Date'] = n[0]


met['Artist Gender'].unique()

array([nan, '|', '|Female', '||', '||||', 'Female|', 'Female', '|||',
       'Female|Female', '||Female', '|Female|Female', 'Female||',
       'Female|Female|Female|Female|Female||Female|Female||||||Female|Female|Female|Female|||Female|||Female|Female|',
       '|Female|', 'Female||Female', 'Female|Female|Female|', '||||||||',
       '|||||||||||||||||||||||||||', 'Female|Female|', 'Female|Female||',
       'Female|||', '|Female||', '|Female||Female', '||Female|',
       '|Female|Female|Female', '|||Female', 'Female|Female|Female',
       '||||||', '|Female|||', '|||||', '||||Female|', '||||||||||',
       '||||||||||||||||', '|||||||', '|||||||||||||', '|||||||||',
       '||||||||||||||||||||',
       'Female|||||||||||||||||||||||||||||Female||Female||||',
       'Female||||Female|||Female|||||||||||||||||Female|',
       '|Female||||||||||||||Female', '|||||||Female|||||',
       'Female||||Female|||||Female|||',
       '||||Female||||||Female|||||Female||||Female', '||||Female',
       '||||||||||||||||||||||||||||||', '||Female||',
       'Female|||||||Female|Female||||Female|',
       'Female|||Female|||Female|||Female||||', '|||||||||||||||||',
       '||||||||||||||||||', '||Female|Female|Female|||||Female||||',
       '||||||||||||', '|||||||||||||||',
       '||||||||||||||||||||||||||||||||||||||', '||Female||||||',
       '||||||||||||||||||||||||||', 'Female|||Female|', '|||||||||||',
       '||Female|||||||||||Female|||||||', '|||||||||Female',
       '||||||||Female||||||||', '|||||||||||||||Female||||',
       '|||Female|', '|||||||||||||||||||',
       '|||||||||||||||||||||||||||||', '|||||||Female|',
       '||||||||||||||Female|||||||', '||||||||||||||',
       '||||||||||Female', '||Female||Female||',
       '|Female||||||||||||||||Female|||||||||||||||',
       '|||||Female||||||||||||Female|||Female|||||||||',
       '||||||||||||Female|||||Female||||||||||||||',
       '|||||||||||||||||Female||||||||||||||||',
       '|Female|||||||||||||||||||||||',
       '||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||',
       '|Female||||||||||||||||||||||||||Female||||Female|||||||||||||||||',
       '||||||||||||||||Female|||||||||||||', 'Female||||||',
       '||||||||||||||||||||||Female||||Female|Female|Female|||||||Female|||Female|||||||||||||||||||||||||||||Female',
       'Female||||||||||||', '||||||||||||||||||||||||||||||||',
       '|||||||||||||||||||||||||', '|||||Female|||||||',
       '|||||||||||||||||||||||||||||||||', '|||||Female||',
       '|||Female|||||', '|||||Female', '||||||||||||||||||||||||',
       '|||||||Female|||||||||||||||||||||||||',
       '|||||||||||||||||||||||||||||||||||||', '|||||Female||||Female|',
       '||||||||Female|Female|||||||||||||||||||',
       '|||||||||Female|||||||||||||||||||||||', '|||Female||',
       '|||||||||||||||||||||||||||||||||||||||||||||||||',
       '||||||||Female||||', '||||||||||||||||||||||',
       '|||||||||||||||||||||||||||||||||||||||||||||||', 'Female||||',
       '||||||||||||Female|||||', '|Female||||||||',
       '||||||||||||Female||||Female||||Female|||||||||||',
       '||Female||||', '|||||||||||||||||||||||||||||||||||',
       '||||Female|||', 'Female||Female|Female', 'Female|Female||Female',
       '||||||||Female|', '|Female|Female|||Female', '||||||Female',
       '||||||||||||||||||||||||||||', '|||Female|||',
       '||Female|Female|Female|||||||Female||',
       '|||||||||||||||||Female|||||', '||||Female|||||||',
       '||||||||||||||||||||||||||||||Female|||||',
       '|||||||||||||||||||||Female||||||||||||||||||||',
       '|||||||||||||||||||||', '||||||||||||||||||||||||||Female|',
       '||Female|||', '||||||Female||', '|||Female||Female|',
       '||||||||||||||Female|||||Female||',
       '||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||',
       '|||||||Female', '|||||||||||||||||||||||||||||||||||||||||',
       '||||||||Female|||||||',
       '|||Female|||Female|Female||||||Female|||||||||||||||||',
       '||||||||||||||||||||Female|||||||||||||',
       '|||||||||||||||||||||||||||||||||||||||', '|||||Female|',
       '||||||||||||||Female|||||', '|||||Female||||||', '|Female|||||',
       '||||Female|Female||||Female||||||||Female|||',
       '|||||||||||||Female|||', '||||||Female|||||',
       '||Female||||||Female||||||||',
       '||||||||||||||||||||||||||||||||||||||||||||||||||',
       '|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||',
       '||Female||||Female|Female',
       '|||||||||||||||Female||Female||||||||Female||||',
       'Female||Female||Female|||||||||||||', 'Female|||||||',
       'Female|||Female||||||||||Female|||||',
       'Female||Female||||||Female|', '|||||Female|Female||||',
       '||||Female||||||||', '|||||Female||||||||||||',
       '||||||||||||||||||||||||||||||||||Female||||||||||Female',
       '||||||||||||Female||||', '||||||||||Female||', '||||Female||',
       '|Female||||||||Female|||||||Female|||||||||Female||||',
       '||||Female|||||||||||',
       '||||||||||||||||||||||||||||||Female|Female||||||||||||||||||||||||||||||||||||||||||Female||||Female|||||||||Female|Female|||||||||||||||||Female||Female||||||||||||||||||||||||||||',
       '|Female||||',
       '|||||Female||||||||||||||Female|||||||||||||||||||||||||',
       '||||||||Female||||||||||||||||||||', 'Female|||||',
       '||||||||Female|||||Female||||||||||||Female||Female|Female|||||||||||||||Female|||||||',
       '|||Female||||||||||||||||', '|||Female|||||Female',
       '||Female|||Female|||', '|Female|||||||||Female|',
       '|||Female||||||Female|||||||Female||||',
       'Female|||||Female||||||||||||||||Female|',
       '||Female|Female||||Female|Female|||||||||Female|',
       '||Female|Female||||||Female||Female||||',
       '|||||||||||||||Female|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||Female||||||||',
       '|||Female||||||||', '|||||||||Female|||||',
       '|||||||Female|||||||', '||Female|Female||',
       '||||Female||Female||||Female|||||', '|||||||||||||||||||||||',
       '|||||||||||Female||||||||||Female||||||',
       '||||||||||||||||||||Female||', '||||||||||||||||||Female||',
       '|Female|||||||||||Female||||||',
       '||Female||Female|Female||Female|Female||||Female||Female|Female||Female||Female|||',
       'Female||||Female||||||Female||||Female||Female|||||Female||||||Female|Female|||Female|Female',
       '||||Female|||||', '|||Female|||||||||||Female||||||||',
       '|||||||||||||||Female|||', '|||Female|||Female|||',
       '||||||||||||||||||||||||||||||||||||||||||||',
       '||||||||||||||Female|||||||||||||||||||||||', '|Female|Female||',
       'Female||||||Female', '||||||Female|||', '||Female||||||||',
       '||Female|||||', '||||||||||Female|||||||Female|||||||||||||',
       '||||||||||||Female|||||||||',
       '||||||||||||||||||Female|||||||||||||||||||||||||||||',
       '||||||||||||||||||||||||||||||Female||||||||||||||Female|||',
       '||||||||||||Female||||||||Female|',
       '|Female||||Female|Female||||||',
       '|||||||||||||Female|||||||Female||',
       '||||||||||||Female|||Female||',
       '|Female||Female||||Female|Female|Female|Female|Female|Female|Female|Female|',
       '||||||||||||Female|||||||||||Female||Female|Female||||||||||||Female||',
       '|||||||||||||||||||||||||||||||||||||||||||',
       'Female|Female||Female|',
       'Female||||||||||||||Female|||||||||||||||||', '||Female|||||||',
       'Female|||Female|||Female|', '|Female||||||||||||||||||||||||||||',
       '|Female|||||||', '|||||Female||||||||||||||',
       '||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||',
       'Female||||Female|||Female|Female|Female|Female||||||Female||Female||Female|Female|||Female||Female|Female||Female|Female|Female||||Female||||',
       '|||||||||Female||||||||', '|||||||||||||||||Female',
       '||Female|||||||||', 'Female|Female|||',
       '||||||Female|||||||||||||||||||Female|||||||||',
       '|||||||Female|||||||||||||||',
       '||||||||||||||||||||||||||||||||||||||||||||||||||||||||Female||||||||||||||||Female|||||||||||||||||||||||||||||||||||',
       '||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||Female|||||||||||||Female||||||||||||||||||||||||||||||||||||||||||||||||||||||||||',
       '|||||||||||||||||||||||||||||||Female||||||||',
       '|Female||Female|Female|||||',
       '|||||||||||||||||||||||||Female||||',
       '|||||||||||||||||||||Female||||||', 'Female|||||||||||',
       '||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||Female',
       '|Female|||Female', 'Female||||Female', 'Female|||Female',
       'Female||||Female|', '|Female|Female|',
       '|||||||||||||||||Female||Female|||',
       '||Female||||||||||||||||||||||||',
       '|||||||||||||Female||||||||||||', 'Female||||||||||||||',
       'Female|||||||||||||', 'Female|||||Female',
       '||||||||||||||Female||Female|||||||', '|||||Female||||',
       '||||||||||||||||||||Female||||||', '||Female|Female|',
       'Female||Female|Female|Female||Female|Female||',
       'Female|Female|Female|Female|Female|Female|Female||Female|Female|Female|Female|Female|Female|Female|Female|Female|Female||Female|Female|Female',
       '|||Female|||||Female|', '|||||||||||Female', '|||Female|Female',
       '|||Female|||Female||||Female||', '||||||Female|',
       '||||Female|||Female|Female|', 'Female|||Female||Female||||',
       '|Female|Female|Female||||||', 'Female|Female|Female|||||||',
       '|Female||Female||||Female||', '|||Female||Female|Female',
       '|Female|Female||Female|||Female||Female', '|Female||Female|',
       '||Female||Female', 'Female||Female||', '||Female|Female',
       '||||||Female||||||||||||||||||||||', '||Female||Female|||',
       '||||||||Female||||||',
       'Female|Female|Female|Female|Female|Female|Female',
       '||Female|||||||Female|', '|||Female||||',
       'Female|||||Female||||Female|', '|Female||Female|||',
       'Female||Female|', 'Female||Female|||', '||||Female|Female',
       '|||||||||||||||||||||||||||||||Female||||||||Female|||||||||||Female||||||||||||||||||||||||||||||||||||',
       '|||||Female|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||',
       '|||||||||||||||||||||||||||Female|||||Female|||||||||||||||||Female|||||||Female||',
       '||||||||||Female|||||||||||||||||||||||||||||Female|||||||||',
       '||||||||Female|||Female||'], dtype=object)


met_country_before2017 = met_before_2017[met_before_2017['Artist Nationality'] != 'no']


met_country_before2017['Artist Nationality'].unique()

array(['American', 'Spanish', 'German', 'French', 'British', nan,
       'Austrian', 'Hungarian', 'American ', 'British, Scottish',
       'Luxembourg', 'Poland', 'Mexican', 'Germany', 'Greece', 'Italian',
       'Switzerland', 'States', 'Russia', 'Israeli', 'Hungary', 'Czech',
       'Romania', 'Australian', 'Japanese', 'Denmark', 'Russian',
       'Lithuania', 'Norwegian', 'Canada', 'Swedish', 'England', 'Spain',
       'Japan', 'Netherlands', 'Ukraine', 'France', 'Venezuelan',
       'Bulgaria', 'Chilean', 'Finnish', 'Belgian', 'Indian',
       'Philippines', 'China', 'Austria', 'Brazilian', 'Canadian',
       'Sweden', 'Italy', 'Dutch', 'Romanian', 'Danish', 'Swiss',
       'Lebanese', 'Uruguayan', 'Rumanian', 'Armenia', 'Panama',
       'Argentina', 'Czechoslovakian', 'India', 'Colombian', 'Rico',
       'Cuba', 'Irish', 'Ireland', 'Polish', 'Argentinian', 'Yugoslavian',
       'Palestine', 'Korean', 'New Zealander', 'Saint Lucian', 'Chinese',
       'Cuban', 'Taiwanese', 'Kuwaiti', 'Egypt', 'Iranian',
       'American and French', 'Andorran', 'Norway', 'American German',
       'Peruvian', 'Belgium', 'Finland', 'Portuguese', 'Dominican',
       'Austria-Hungary', 'South African', 'Scotland', 'Tajikistan',
       'Turkish', 'Slovakian', 'Egyptian', 'Ghanaian', 'Kashmir',
       'Danish-Icelandic', 'Ukrainian', 'Pakistani', 'Guatemalan',
       'Icelandic'], dtype=object)


countries_met = ['American', 'Spanish', 'German', 'French', 'British', 'Austrian',
       'Hungarian', 'American ', 'British, Scottish','Italian','Czech','Norwegian', 'Swedish','Finnish',
       'Belgian','Canadian', 'Dutch', 'Romanian','Danish', 'Swiss','Rumanian',
       'Czechoslovakian', 'Irish', 'Polish','Yugoslavian','American and French','American German',
                 'France', 'Portuguese', 'Denmark','Slovakian','Danish-Icelandic','Icelandic']
non_european = []
met_country_before2017['European'] = ['False'] * len(met_country_before2017['Artist Nationality'])
index = met_country_before2017['Artist Nationality'].index

for x in index:
    if met_country_before2017.loc[x, 'Artist Nationality'] in countries_met:
        met_country_before2017.loc[x,'European?'] = 'True'
    elif met_country_before2017.loc[x, 'Artist Nationality'] == np.nan:
        met_country_before2017.loc[x,'European?'] = 'True' 
    else:
        if met_country_before2017.loc[x, 'Artist Nationality'] in non_european:
            met_country_before2017.loc[x,'European?'] = 'False'
        else:
            non_european.append(met_country_before2017.loc[x, 'Artist Nationality'])
            met_country_before2017.loc[x,'European?'] = 'False'


# get the counts of works by Western and non-Western artists in MoMA before 2013
moma_European = names_dates_moma['European?'].value_counts()


# get the counts of works by Western and non-Western artists in the Met
met_European = met_country_before2017['European?'].value_counts()


# create and format pie charts

# create and format pie charts
fig = plt.figure()
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)
moma_European.plot.pie(ax=ax1,  autopct='%.2f')
ax1.set_title("MoMA Western vs Non Western")


met_European.plot.pie(ax=ax2, autopct='%.2f')
ax2.set_title("Met Western vs Non Western")

fig.subplots_adjust(wspace=.5)
fig.set_figheight(12)
fig.set_figwidth(12)


met_country_before2017


#Create new columns that assigns a true or false value to show if the work is by an American or not
names_dates_moma = names_dates_moma.assign(American=np.where(names_dates_moma['Nationality']=="American", True, False))
met_country_before2017 = met_country_before2017.assign(American=np.where(met_country_before2017['Artist Nationality']=="American", True, False))

#Calculate the mean of the number of American and the Non-American artists for the MoMA
american_moma = (names_dates_moma['American'] == True)
american_moma_num = american_moma.mean()
nonamerican_moma = (names_dates_moma['American'] == False)
nonamerican_moma_num = nonamerican_moma.mean()

#Calculate the mean of the number of American and the Non-American artists for the Met
america_met_before_2017 = (met_country_before2017['American'] == True)
american_met_before_2017 = america_met_before_2017.mean()
nonamerica_met_before_2017 = (met_country_before2017['American'] == False)
nonamerican_met_before_2017 = nonamerica_met_before_2017.mean()


# Graph MoMA data!
plotdata_moma = pd.DataFrame(
    {"% of artworks": [american_moma_num,nonamerican_moma_num]}, 
    index=['Proportion of American Artists in the MoMA','Proportion of Non-American Artists in the MoMA'])
plotdata_moma.plot(kind="barh", title = "Examining National Biases MoMA")

# Graph Met data!
plotdata_tate = pd.DataFrame(
    {"% of artworks": [american_met_before_2017,nonamerican_met_before_2017]}, 
    index=['Proportion of American Artists in the Met','Proportion of Non-American Artists in the Met' ])
plotdata_tate.plot(kind="barh", title = "Examining National Biases in the Met")

<AxesSubplot:title={'center':'Examining National Biases in the Met'}>

	Artwork ID	Title	Artist ID	Name	Date	Medium	Dimensions	Acquisition Date	Credit	Catalogue	...	Height (cm)	Length (cm)	Width (cm)	Depth (cm)	Weight (kg)	Duration (s)	Gender	Nationality	Birth Year	Death Year
0	2	Ferdinandsbrücke Project, Vienna, Austria, Ele...	6210	Otto Wagner	1896	Ink and cut-and-pasted painted pages on paper	19 1/8 x 66 1/2" (48.6 x 168.9 cm)	1996-04-09	Fractional and promised gift of Jo Carole and ...	Y	...	48.6000	NaN	168.9000	NaN	NaN	NaN	Male	Austrian	1841.0	1918.0
1	3	City of Music, National Superior Conservatory ...	7470	Christian de Portzamparc	1987	Paint and colored pencil on print	16 x 11 3/4" (40.6 x 29.8 cm)	1995-01-17	Gift of the architect in honor of Lily Auchinc...	Y	...	40.6401	NaN	29.8451	NaN	NaN	NaN	Male	French	1944.0	NaN
2	4	Villa near Vienna Project, Outside Vienna, Aus...	7605	Emil Hoppe	1903	Graphite, pen, color pencil, ink, and gouache ...	13 1/2 x 12 1/2" (34.3 x 31.8 cm)	1997-01-15	Gift of Jo Carole and Ronald S. Lauder	Y	...	34.3000	NaN	31.8000	NaN	NaN	NaN	Male	Austrian	1876.0	1957.0
3	5	The Manhattan Transcripts Project, New York, N...	7056	Bernard Tschumi	1980	Photographic reproduction with colored synthet...	20 x 20" (50.8 x 50.8 cm)	1995-01-17	Purchase and partial gift of the architect in ...	Y	...	50.8000	NaN	50.8000	NaN	NaN	NaN	Male	NaN	1944.0	NaN
4	6	Villa, project, outside Vienna, Austria, Exter...	7605	Emil Hoppe	1903	Graphite, color pencil, ink, and gouache on tr...	15 1/8 x 7 1/2" (38.4 x 19.1 cm)	1997-01-15	Gift of Jo Carole and Ronald S. Lauder	Y	...	38.4000	NaN	19.1000	NaN	NaN	NaN	Male	Austrian	1876.0	1957.0

	Artwork ID	Title	Artist ID	Name	Date	Medium	Dimensions	Acquisition Date	Credit	Catalogue	...	Nationality	Birth Year	Death Year	age_made	alive?	Acquisition Year	European?	decile	dec start year	dec end year
61694	59708	Max Reinhardt (Head) (Kopf)	3197	Oskar Kokoschka	1919.0	Lithograph	composition (irreg. ): 15 3/16 x 11 15/16" (38...	1929-11-19	Gift of Paul J. Sachs	Y	...	Austrian	1886.0	1980.0	33.0	True	1929	True	1	1929	1953
61744	59762	The Lords of the World (Die Herren der Welt )	5260	Georg Scholz	1922.0	Lithograph	composition: 11 3/4 x 15 3/4" (29.8 x 40 cm); ...	1929-11-19	Gift of Paul J. Sachs	Y	...	German	1890.0	1945.0	32.0	True	1929	True	1	1929	1953
61711	59728	Dialogue (Zwiesprache)	4533	Max Pechstein	1920.0	Woodcut	composition: 15 13/16 x 12 9/16" (40.2 x 31.9 ...	1929-11-19	Gift of Paul J. Sachs	Y	...	German	1881.0	1955.0	39.0	True	1929	True	1	1929	1953
61660	59673	Woman, Standing in the Garden (Frau, im Garten...	3197	Oskar Kokoschka	1916.0	Lithograph	composition (irreg.): 9 13/16 x 5 11/16" (24.9...	1929-11-19	Gift of Paul J. Sachs	Y	...	Austrian	1886.0	1980.0	30.0	True	1929	True	1	1929	1953
61635	59646	Marine	1832	Lyonel Feininger	1918.0	Woodcut	composition (irreg.): 6 5/8 x 8 7/8" (16.8 x 2...	1929-11-19	Gift of Paul J. Sachs	Y	...	American	1871.0	1956.0	47.0	True	1929	True	1	1929	1953

	id	accession_number	artist	artistRole	artistId	title	dateText	medium	creditLine	year	acquisitionYear	dimensions	width	height	depth	units	inscription	thumbnailCopyright	thumbnailUrl	url
0	1035	A00001	Blake, Robert	artist	38	A Figure Bowing before a Seated Old Man with h...	date not known	Watercolour, ink, chalk and graphite on paper....	Presented by Mrs John Richmond 1922	NaN	1922.0	support: 394 x 419 mm	394	419	NaN	mm	NaN	NaN	http://www.tate.org.uk/art/images/work/A/A00/A...	http://www.tate.org.uk/art/artworks/blake-a-fi...
1	1036	A00002	Blake, Robert	artist	38	Two Drawings of Frightened Figures, Probably f...	date not known	Graphite on paper	Presented by Mrs John Richmond 1922	NaN	1922.0	support: 311 x 213 mm	311	213	NaN	mm	NaN	NaN	http://www.tate.org.uk/art/images/work/A/A00/A...	http://www.tate.org.uk/art/artworks/blake-two-...
2	1037	A00003	Blake, Robert	artist	38	The Preaching of Warning. Verso: An Old Man En...	?c.1785	Graphite on paper. Verso: graphite on paper	Presented by Mrs John Richmond 1922	1785	1922.0	support: 343 x 467 mm	343	467	NaN	mm	NaN	NaN	http://www.tate.org.uk/art/images/work/A/A00/A...	http://www.tate.org.uk/art/artworks/blake-the-...
3	1038	A00004	Blake, Robert	artist	38	Six Drawings of Figures with Outstretched Arms	date not known	Graphite on paper	Presented by Mrs John Richmond 1922	NaN	1922.0	support: 318 x 394 mm	318	394	NaN	mm	NaN	NaN	http://www.tate.org.uk/art/images/work/A/A00/A...	http://www.tate.org.uk/art/artworks/blake-six-...
4	1039	A00005	Blake, William	artist	39	The Circle of the Lustful: Francesca da Rimini...	1826–7, reprinted 1892	Line engraving on paper	Purchased with the assistance of a special gra...	1826	1919.0	image: 243 x 335 mm	243	335	NaN	mm	NaN	NaN	http://www.tate.org.uk/art/images/work/A/A00/A...	http://www.tate.org.uk/art/artworks/blake-the-...

	id	name	gender	dates	yearOfBirth	yearOfDeath	placeOfBirth	placeOfDeath	url
0	10093	Abakanowicz, Magdalena	Female	born 1930	1930.0	NaN	Polska	NaN	http://www.tate.org.uk/art/artists/magdalena-a...
1	0	Abbey, Edwin Austin	Male	1852–1911	1852.0	1911.0	Philadelphia, United States	London, United Kingdom	http://www.tate.org.uk/art/artists/edwin-austi...
2	2756	Abbott, Berenice	Female	1898–1991	1898.0	1991.0	Springfield, United States	Monson, United States	http://www.tate.org.uk/art/artists/berenice-ab...
3	1	Abbott, Lemuel Francis	Male	1760–1803	1760.0	1803.0	Leicestershire, United Kingdom	London, United Kingdom	http://www.tate.org.uk/art/artists/lemuel-fran...
4	622	Abrahams, Ivor	Male	born 1935	1935.0	NaN	Wigan, United Kingdom	NaN	http://www.tate.org.uk/art/artists/ivor-abraha...

	id	accession_number	artist	artistRole	artistId	title	medium	year	acquisitionYear	dimensions	gender	placeOfBirth	yearOfBirth	yearOfDeath	age_made	alive?	countryOfOrigin	European?
0	1035	A00001	Blake, Robert	artist	38	A Figure Bowing before a Seated Old Man with h...	Watercolour, ink, chalk and graphite on paper....	NaN	1922.0	support: 394 x 419 mm	Male	London, United Kingdom	1762.0	1787.0	NaN	False	United Kingdom	True
1	1036	A00002	Blake, Robert	artist	38	Two Drawings of Frightened Figures, Probably f...	Graphite on paper	NaN	1922.0	support: 311 x 213 mm	Male	London, United Kingdom	1762.0	1787.0	NaN	False	United Kingdom	True
2	1037	A00003	Blake, Robert	artist	38	The Preaching of Warning. Verso: An Old Man En...	Graphite on paper. Verso: graphite on paper	1785	1922.0	support: 343 x 467 mm	Male	London, United Kingdom	1762.0	1787.0	23.0	False	United Kingdom	True
3	1038	A00004	Blake, Robert	artist	38	Six Drawings of Figures with Outstretched Arms	Graphite on paper	NaN	1922.0	support: 318 x 394 mm	Male	London, United Kingdom	1762.0	1787.0	NaN	False	United Kingdom	True
4	1039	A00005	Blake, William	artist	39	The Circle of the Lustful: Francesca da Rimini...	Line engraving on paper	1826	1919.0	image: 243 x 335 mm	Male	London, United Kingdom	1757.0	1827.0	69.0	False	United Kingdom	True
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
69313	122959	T13864	P-Orridge, Genesis	artist	16646	It’s That Time Of The Month (from Tampax Romana)	Wood, Perspex, clock case, tampons and human b...	1975	2013.0	object: 305 x 305 x 135 mm	Male	Manchester, United Kingdom	1950.0	NaN	25.0	True	United Kingdom	True
69314	122960	T13865	P-Orridge, Genesis	artist	16646	Larvae (from Tampax Romana)	Perspex, Wood, hairpiece, tampon and human blood	1975	2013.0	object: 305 x 305 x 135 mm	Male	Manchester, United Kingdom	1950.0	NaN	25.0	True	United Kingdom	True
69315	122961	T13866	P-Orridge, Genesis	artist	16646	Living Womb (from Tampax Romana)	Wood, Perspex, plastic, photograph on paper, t...	1976	2013.0	object: 305 x 305 x 135 mm	Male	Manchester, United Kingdom	1950.0	NaN	26.0	True	United Kingdom	True
69316	121181	T13867	Hatoum, Mona	artist	2365	Present Tense	Soap and glass beads	1996	2013.0	displayed: 45 x 2410 x 2990 mm	Female	Bayrut, Al-Lubnan	1952.0	NaN	44.0	True	Al-Lubnan	False
69317	112306	T13868	Creed, Martin	artist	2760	Work No. 227: The lights going on and off	Gallery lighting	2000	2013.0	Overall display dimensions variable	Male	Wakefield, United Kingdom	1968.0	NaN	32.0	True	United Kingdom	True

Evaluating the changes in art museums ¶

Lindsay Hardy and Emily O'Connell ¶

Goals¶

The questions that we plan to answer:¶

Comparison with other museums¶

Outline¶

Collaboration Plan¶

MoMA Data Extraction, Cleaning, and Loading¶

Analysis: Trends in the MoMA Dataset¶

Has MoMA acquired more diverse (based on gender and nationality) works of art in recent years?¶

Has the MoMA strived to acquire art by more recent artists?¶

Examining National and Regional Diversity¶

Geographical Analysis¶

Comparing the Map of works per country pre-2012 and at the present¶

Comparing pre-2012 statistics with the present¶

Lorenz Curve and Gini Coefficient¶

Lorenz Curve¶

Gini Coefficient¶

Linear regression predicting proportion of art by female artists¶

Tate Data Extraction, Cleaning, and Loading¶

Analysis: Tate vs MoMA¶

How does the diversity of the two museums compare?¶

Comparing Western vs non-Western Art in each museum¶

Comparing Art by Male vs Female artists in each museum¶

Comparing Art by Male vs Female artists of non-Western art in each museum¶

Do countries favor their own art over foreign art? Could this be a bias towards American or British art across many museums?¶

Examining gender and home country bias in the two museums¶

Met Data Extraction, Cleaning, and Loading¶

Analysis: Met vs MoMA¶

Comparing Western vs non-Western Art in each museum¶

Proportion of American vs Non American Works for the MoMA and the Met¶

Final Conclusion¶

	Title	Name	Date	Acquisition Date	Artwork ID	Gender	Credit	Nationality	age_made	alive?	European?
0	Ferdinandsbrücke Project, Vienna, Austria, Ele...	Otto Wagner	1896.0	1996-04-09	2	Male	Fractional and promised gift of Jo Carole and ...	Austrian	55.0	False	True
1	City of Music, National Superior Conservatory ...	Christian de Portzamparc	1987.0	1995-01-17	3	Male	Gift of the architect in honor of Lily Auchinc...	French	43.0	True	True
2	Villa near Vienna Project, Outside Vienna, Aus...	Emil Hoppe	1903.0	1997-01-15	4	Male	Gift of Jo Carole and Ronald S. Lauder	Austrian	27.0	False	True
3	The Manhattan Transcripts Project, New York, N...	Bernard Tschumi	1980.0	1995-01-17	5	Male	Purchase and partial gift of the architect in ...	NaN	36.0	True	NaN
4	Villa, project, outside Vienna, Austria, Exter...	Emil Hoppe	1903.0	1997-01-15	6	Male	Gift of Jo Carole and Ronald S. Lauder	Austrian	27.0	False	True

	cumulative_prop_of_artists	cumulative_proportion_of_female_artists	decile start	decile end	date range
0	0	0.000000	0	0	0-0
1	10	4.831752	1929	1953	1929-1953
2	20	8.083892	1953	1964	1953-1964
3	30	11.833809	1964	1968	1964,1968
4	40	17.773943	1968	1974	1968-1974
5	50	26.866662	1974	1986	1974-1986
6	60	45.503418	1986	1998	1986-1998
7	70	58.711090	1998	2005	1998-2005
8	80	76.431937	2005	2009	2005-2009
9	90	90.960377	2009	2013	2009-2013
10	100	100.000000	2013	2016	2013-2016

	summary stats
slope	0.0024934
intercept	-4.8223284
r_value	0.6689979
p_value	0.0000000
std_err	0.0003059

	summary stats
slope	0.0024978
intercept	-4.8310949
r_value	0.6389450
p_value	0.0000000
std_err	0.0003427

	Object Number	Is Highlight	Is Timeline Work	Is Public Domain	Object ID	Gallery Number	Department	AccessionYear	Object Name	Title	...	River	Classification	Rights and Reproduction	Link Resource	Object Wikidata URL	Metadata Date	Repository	Tags	Tags AAT URL	Tags Wikidata URL
0	1979.486.1	False	False	False	1	NaN	The American Wing	1979	Coin	One-dollar Liberty Head Coin	...	NaN	Metal	NaN	http://www.metmuseum.org/art/collection/search/1	NaN	NaN	Metropolitan Museum of Art, New York, NY	NaN	NaN	NaN
1	1980.264.5	False	False	False	2	NaN	The American Wing	1980	Coin	Ten-dollar Liberty Head Coin	...	NaN	Metal	NaN	http://www.metmuseum.org/art/collection/search/2	NaN	NaN	Metropolitan Museum of Art, New York, NY	NaN	NaN	NaN
2	67.265.9	False	False	False	3	NaN	The American Wing	1967	Coin	Two-and-a-Half Dollar Coin	...	NaN	Metal	NaN	http://www.metmuseum.org/art/collection/search/3	NaN	NaN	Metropolitan Museum of Art, New York, NY	NaN	NaN	NaN
3	67.265.10	False	False	False	4	NaN	The American Wing	1967	Coin	Two-and-a-Half Dollar Coin	...	NaN	Metal	NaN	http://www.metmuseum.org/art/collection/search/4	NaN	NaN	Metropolitan Museum of Art, New York, NY	NaN	NaN	NaN
4	67.265.11	False	False	False	5	NaN	The American Wing	1967	Coin	Two-and-a-Half Dollar Coin	...	NaN	Metal	NaN	http://www.metmuseum.org/art/collection/search/5	NaN	NaN	Metropolitan Museum of Art, New York, NY	NaN	NaN	NaN

	Department	AccessionYear	Object Name	Title	Culture	Period	Artist Display Name	Artist Nationality	Artist Begin Date	Artist End Date	Artist Gender	Object Date	European	European?
8708	Modern and Contemporary Art	1976	Vase	Vase	American	NaN	George E. Ohr\|Biloxi Art Pottery	American	1857	1918	\|	1894–1906	False	True
87592	Modern and Contemporary Art	1986	Painting	Berthe David-Weill	Spanish	NaN	Salvador Dalí	Spanish	1904	1989	NaN	1952	False	True
117644	Modern and Contemporary Art	1991	Chess set and board	Chess Set	German (Weimar)	NaN	Josef Hartwig\|Heinz Nösselt	German	1880	1955	\|	ca. 1923	False	True
138438	Modern and Contemporary Art	1975	Figure	Woodcock	American	NaN	Edward Marshall Boehm	American	1912	1969	NaN	1940s	False	True
243271	Modern and Contemporary Art	2007	Drawing	Head of a Man	French	NaN	Roger de la Fresnaye	French	1885	1925	NaN	1925	False	True
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
473774	Modern and Contemporary Art	2002	Sugar bowl and lid	"Stratoware" Sugar bowl with lid	NaN	NaN	Eva Zeisel\|Francis Blod\|Universal Potteries In...	NaN	1906	2011	Female\|\|	designed ca. 1940; manufactured 1942–43	False	False
473775	Modern and Contemporary Art	2002	Creamer	"Stratoware" Creamer	NaN	NaN	Eva Zeisel\|Francis Blod\|Universal Potteries In...	NaN	1906	2011	Female\|\|	designed ca. 1940; manufactured 1942–43	False	False
473776	Modern and Contemporary Art	2002	Salt shaker	"Stratoware" Salt shaker	NaN	NaN	Universal Potteries Inc. (Cambridge, Ohio)\|Fra...	NaN	1906	2011	\|\|Female	designed ca. 1940; manufactured 1942–43	False	False
473777	Modern and Contemporary Art	2002	Pepper shaker	"Stratoware" Pepper shaker	NaN	NaN	Universal Potteries Inc. (Cambridge, Ohio)\|Eva...	NaN	1906	2011	\|Female\|	designed ca. 1940; manufactured 1942–43	False	False
473778	Modern and Contemporary Art	2002	Pitcher and lid	"Stratoware" Pitcher with lid	NaN	NaN	Eva Zeisel\|Francis Blod\|Universal Potteries In...	NaN	1906	2011	Female\|\|	designed ca. 1940; manufactured 1942–43	False	False