# install yfinance and upgrade dependencies
!pip install yfinance --quiet 2> /dev/null 
!pip install --upgrade pandas --quiet 2> /dev/null 
!pip install --upgrade pandas-datareader --quiet 2> /dev/null


# data structures and quality of life tools
import pandas as pd
import numpy as np
import re
import scipy.optimize
import datetime as dt
from collections import defaultdict
import copy

# libraries to collect data
import yfinance as yf
import requests
from bs4 import BeautifulSoup
import pandas_datareader.data as pdr

# libraries for visualization
import matplotlib.pyplot as plt
import matplotlib as mpl
# from matplotlib.pyplot import figure #Is this used?
import seaborn as sns


# retrieve data
url = 'https://raw.githubusercontent.com/danielwei816/Logrollers/main/SenatorCleaned.csv'
disclosures = pd.read_csv(url)

# remove redundant index column
disclosures.drop("Unnamed: 0", inplace = True, axis = 1)
disclosures.head()


disclosures.loc[4825]

Name                 James M Inhofe
Transaction.Date          10/5/2015
Owner                          Self
Ticker                          WFM
Asset.Name                      WFM
Asset.Type                    Stock
Type                    Sale (Full)
Amount                          NaN
Comment                          --
Name: 4825, dtype: object


# isolate stock trades
trades = disclosures[disclosures['Asset.Type'] == "Stock"]
# manually mark Inhofe's row
trades.at[4825, 'Amount'] = 50000000
# remove all trades without a stock ticker
trades = trades[trades['Ticker'].str.contains('--') == False]
trades.reset_index(inplace = True)
# drop previous index column
trades = trades.drop(['index'], axis=1)
len(trades)

12345


trades = trades.rename(columns={
  'Transaction.Date': 'Date',
  'Asset.Name': 'Asset_Name',
  'Asset.Type': 'Asset_Type',
  'Type': 'Transaction_Type',
  'Amount': 'Min_Value'
})
trades.head()


trades['Transaction_Type'].unique()

array(['Purchase', 'Sale (Full)', 'Sale (Partial)', 'Exchange'],
      dtype=object)


trades['Transaction_Type'] = trades['Transaction_Type'].apply(lambda item: 
                            ("Full" if item == "Sale (Full)"
                             else "Partial" if item == "Sale (Partial)" 
                             else item)
                            ) 
trades['Transaction_Type'].unique()

array(['Purchase', 'Full', 'Partial', 'Exchange'], dtype=object)


len(trades[trades["Transaction_Type"] == "Exchange"])

86


# retrieve rows with an entire exchange
indexes = np.where((trades["Transaction_Type"] == "Exchange"))[0]
# create a regex that retrieves the new and old tickers + asset names
ticker_regex = re.compile("\^?(.*)\\r\\n\^?(.*)")
name_regex = re.compile("(.*) \(.*\\r\\n(.*) \(.*")

for i in indexes:
  # break the transaction into two matching trades
  sale = trades.loc[i].copy()
  purchase = trades.loc[i].copy()

  # match the expected format
  ticker_match = ticker_regex.match(trades.at[i, "Ticker"])
  name_match = name_regex.match(trades.at[i, "Asset_Name"])

  # remove the exchange from the data set regardless of format
  trades = trades.drop([i])
  if ticker_match != None:
    # add the purchase/sale pair if there was a match
    sale['Ticker'] = ticker_match.group(1)
    purchase['Ticker'] = ticker_match.group(2)
    sale['Asset_Name'] = name_match.group(1)
    purchase['Asset_Name'] = name_match.group(2)
    sale['Transaction_Type'] = "Full"
    purchase['Transaction_Type'] = "Purchase"
    trades = trades.append(sale)
    trades = trades.append(purchase)

# reset the table indices
trades.reset_index(inplace = True)
trades.drop(['index'], axis = 1, inplace = True)
trades.tail()


trades.dtypes

Name                 object
Date                 object
Owner                object
Ticker               object
Asset_Name           object
Asset_Type           object
Transaction_Type     object
Min_Value           float64
Comment              object
dtype: object


trades['Date'] = pd.to_datetime(trades['Date'])
trades['Min_Value'] = pd.to_numeric(trades['Min_Value'])
trades.dtypes

Name                        object
Date                datetime64[ns]
Owner                       object
Ticker                      object
Asset_Name                  object
Asset_Type                  object
Transaction_Type            object
Min_Value                  float64
Comment                     object
dtype: object


trades['Date'].max()-trades['Date'].min()

Timedelta('3531 days 00:00:00')


comments = trades[trades['Comment'] != "--"]["Comment"]
print(comments.unique()[0:10])
print(f"Total comments: {len(comments)}\
      \nUnique comments: {len(comments.unique())}\
      \nTotal rows: {len(trades)}")

['Agilent Tech common stock completed a spinoff on 10/22/14 to Keysight Tech'
 'KMB completed a spinoff of its health care business business now known as HYH on 11/01/14'
 'Dividend Reinvestment'
 '2015 Issued IR RSUs vested and actual shares issued; part of shares sold w/ remainder in UBS CSSP'
 '2016 Issued IR RSUs vested and actual shares issued; part of shares sold w/ remainder in UBS CSSP'
 'Transaction is due to sale of a security from redemption of incentive units vested on April 21, 2017'
 'Complete Disposition of Ingersoll Rand Stock'
 'Complete Disposition of Vulcan Materials Stock' 'Mandatory Cash Merger'
 'WTR-Aqua America, Inc name change to WTRG-Essential Utilities, Inc on 2/3/2020.']
Total comments: 842      
Unique comments: 124      
Total rows: 12325


range_map = {
    1: 1000,
    1001: 15000,
    15001: 50000,
    50001: 100000,
    100001: 250000,
    250001: 500000,
    500001: 1000000,
    1000001: 5000000,
    5000001: 25000000,
    25000001: 50000000,
    50000000: 175000000 
}

# correct amount lower bound for trades under $1000 as determined by the comment
trades['Min_Value'] = trades.apply(lambda row: 
          1 if 'less than' in row['Comment'].lower() else row['Min_Value'], axis=1)

# compute maximum and average transaction amounts
trades['Max_Value'] = trades.apply(lambda row: range_map[row['Min_Value']], axis=1)
trades['Avg_Value'] = (trades['Min_Value'] + trades['Max_Value']) / 2
trades.head()


# obtain the sorted list of transaction amounts
ranges = list(set(trades['Min_Value']))
ranges.sort()

# find the frequency of each transaction amount
counts = [list(trades['Min_Value']).count(x) for x in ranges]

# fit exponential models to this data including and excluding the $1-$1000 range
xs = np.arange(0, len(counts))
def fit_exp(x, a, b):
    return a * np.exp(-b * x)
init = (1e4, 1) # start with values near those we expect
params1, cv = scipy.optimize.curve_fit(fit_exp, xs[1:], counts[1:], init)
params2, cv = scipy.optimize.curve_fit(fit_exp, xs, counts, init)

# create x axis labels
labels = [f"\${x:,} - \${range_map[x]:,}" for x in ranges]

f, ax = plt.subplots(figsize=(14, 8))

# plot bars and regression lines
plt.bar(xs, counts, tick_label = labels, log = True)
plt.plot(xs, fit_exp(xs, *params1), label = "Above $1000", color = "orange")
plt.plot(xs, fit_exp(xs, *params2), label = "Full Model", color = "green")

# format and label the graph
plt.xticks(ticks = xs, rotation = 85)
plt.ylim(0.5, 1e5)
plt.title("Frequency of Trades by Amount")
plt.xlabel("Amount Range (US Dollars)")
plt.ylabel("Frequency")
plt.legend()
plt.show()

# print equations
print(f"Modeling all of the data gives\n\t{params2[0]:.2f} * e ^ (-{params2[1]:.2f}x)")
print(f"\nModeling required disclosures gives\n\t{params1[0]:.2f} * e ^ (-{params1[1]:.2f}x)")

Modeling all of the data gives
	3774.76 * e ^ (-0.29x)

Modeling required disclosures gives
	42389.44 * e ^ (-1.51x)


# get unique senator names
names = list(set(trades['Name']))

# find the frequency of each senator in the data
counts = [list(trades['Name']).count(x) for x in names]

# sort from largest frequency to smallest frequency
trade_freq = dict(sorted(zip(names, counts), \
                         key = lambda item: item[1], reverse=True))

# plot bars
f, ax = plt.subplots(figsize=(14, 8))
plt.bar(trade_freq.keys(), trade_freq.values())

# format and label the graph
plt.xticks(ticks = list(trade_freq.keys()), rotation = 85)
plt.title("Frequency of Trades by Senator (All Senators)")
plt.xlabel("Senator")
plt.ylabel("Number of Trades")
plt.show()

# print key statistics
print(f"{len(names)} unique senators")
print(f"The most frequent traders have made as many as {max(counts)} trades")

55 unique senators
The most frequent traders have made as many as 3537 trades


top_traders = 7
# plot bars
f, ax = plt.subplots(figsize=(14, 8))
plt.bar(list(trade_freq.keys())[:top_traders], list(trade_freq.values())[:top_traders])

# format and label the graph
plt.xticks(ticks = list(trade_freq.keys())[:top_traders], rotation = 85)
plt.title("Frequency of Trades by Senator (High Frequency)")
plt.xlabel("Senator")
plt.ylabel("Number of Trades")
plt.show()

# print the top traders with number of trades
[(k,v) for k,v in trade_freq.items()][:top_traders]

[(' Robert P Corker Jr.', 3537),
 (' David A Perdue Jr', 3410),
 (' Thad Cochran', 619),
 (' Shelley M Capito', 598),
 (' Sheldon Whitehouse', 560),
 (' Thomas R Carper', 553),
 (' Pat Roberts', 528)]


# plot bars
f, ax = plt.subplots(figsize=(14, 8))
plt.bar(list(trade_freq.keys())[top_traders:], list(trade_freq.values())[top_traders:])

# format and label the graph
plt.xticks(ticks = list(trade_freq.keys())[top_traders:], rotation = 85)
plt.title("Frequency of Trades by Senator (Low Frequency)")
plt.xlabel("Senator")
plt.ylabel("Number of Trades")
plt.show()


# get unique senator names
names = list(set(trades['Name']))

# find the volume for each senator
volumes = [trades[trades['Name'] == x]['Min_Value'].sum() for x in names]

# sort from largest volume to smallest volume
trade_vol = dict(sorted(zip(names, volumes), \
                         key = lambda item: item[1], reverse=True))

# plot bars
f, ax = plt.subplots(figsize=(14, 8))
plt.bar(trade_vol.keys(), trade_vol.values(), log = False)

# format and label the graph
plt.xticks(ticks = list(trade_vol.keys()), rotation = 85)
ax.ticklabel_format(axis = 'y', style = 'plain')
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
plt.title("Volume of Trades by Senator (Conservative Transaction Amount Estimates)")
plt.xlabel("Senator")
plt.ylabel("Total US Dollars Transacted")
plt.show()

# print key statistics
print(f"Single senators have traded as much as ${max(volumes):,.2f}")
print(f"Senators traded a total of ${sum(volumes):,.2f}")

Single senators have traded as much as $58,042,259.00
Senators traded a total of $207,349,324.00


# get unique senator names
names = list(set(trades['Name']))

# find the volume for each senator
volumes = [trades[trades['Name'] == x]['Avg_Value'].sum() for x in names]

# sort from largest volume to smallest volume
trade_vol = dict(sorted(zip(names, volumes), \
                         key = lambda item: item[1], reverse=True))

# plot bars
f, ax = plt.subplots(figsize=(14, 8))
plt.bar(trade_vol.keys(), trade_vol.values(), log = False)

# format and label the graph
plt.xticks(ticks = list(trade_vol.keys()), rotation = 85)
ax.ticklabel_format(axis = 'y', style = 'plain')
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
plt.title("Volume of Trades by Senator (Moderate Transaction Amount Estimates)")
plt.xlabel("Senator")
plt.ylabel("Total US Dollars Transacted")
plt.show()

# print key statistics
print(f"Single senators have traded as much as ${max(volumes):,.2f}")
print(f"Senators traded a total of ${sum(volumes):,.2f}")

Single senators have traded as much as $125,998,629.50
Senators traded a total of $497,987,162.00


# get unique senator names
names = list(set(trades['Name']))

# find the volume for each senator
volumes = [trades[trades['Name'] == x]['Max_Value'].sum() for x in names]

# sort from largest volume to smallest volume
trade_vol = dict(sorted(zip(names, volumes), \
                         key = lambda item: item[1], reverse=True))

# plot bars
f, ax = plt.subplots(figsize=(14, 8))
plt.bar(trade_vol.keys(), trade_vol.values(), log = False)

# format and label the graph
plt.xticks(ticks = list(trade_vol.keys()), rotation = 85)
ax.ticklabel_format(axis = 'y', style = 'plain')
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
plt.title("Volume of Trades by Senator (Liberal Transaction Amount Estimates)")
plt.xlabel("Senator")
plt.ylabel("Total US Dollars Transacted")
plt.show()

# print key statistics
print(f"Single senators have traded as much as ${max(volumes):,}")
print(f"Senators traded a total of ${sum(volumes):,}")

Single senators have traded as much as $193,955,000
Senators traded a total of $788,625,000


# stock_list = list(trades["Ticker"].unique())
# data = yf.download(stock_list, start=trades["Date"].min(), end=trades["Date"].max() + dt.timedelta(days=1))
# data['Adj Close'].reset_index().to_csv("tickers.csv")


price_history = pd.read_csv("https://raw.githubusercontent.com/danielwei816/Logrollers/main/tickers.csv")
price_history['Date'] = pd.to_datetime(price_history['Date'])
price_history.drop(['Unnamed: 0'], axis = 1, inplace = True)
price_history.tail()


trades["Min_Shares"] = [np.nan] * len(trades)
trades["Avg_Shares"] = [np.nan] * len(trades)
trades["Max_Shares"] = [np.nan] * len(trades)

for i, row in trades.iterrows():
  ticker = row["Ticker"]
  start = row["Date"]

  # retrieve price for the stock on the transaction date
  price_df = price_history[price_history['Date'] == row["Date"]].reset_index()
  
  # if found, use this price to compute {min, avg, max} shares
  if len(price_df) == 1:
    
    price = price_df.loc[0, row["Ticker"]]
    
    if not np.isnan(price):
      trades.at[i, "Min_Shares"] = row["Min_Value"] / price
      trades.at[i, "Avg_Shares"] = row["Avg_Value"] / price
      trades.at[i, "Max_Shares"] = row["Max_Value"] / price


len(trades[~trades['Max_Shares'].isnull()])

10019


# nested dictionary: min_real_gains[senator][ticker] 
# will return the minimum profit for that senator on that stock
# the default value of min_real_gains is a defaultdict with default 0
min_real_gains = defaultdict(lambda: defaultdict(float))
max_real_gains = defaultdict(lambda: defaultdict(float))
avg_real_gains = defaultdict(lambda: defaultdict(float))

# store total cost of purchasing stocks for each senator default is 0
min_cost = defaultdict(float)
max_cost = defaultdict(float)
avg_cost = defaultdict(float)

for (senator, stock), group in trades.groupby(['Name', 'Ticker']):
  # sum purchase value
  total_purchases_min = sum(group[group['Transaction_Type'] == 'Purchase']['Min_Value'])
  total_purchases_max = sum(group[group['Transaction_Type'] == 'Purchase']['Max_Value'])
  total_purchases_avg = sum(group[group['Transaction_Type'] == 'Purchase']['Avg_Value'])

  # sum sale values
  total_sales_min = sum(group[(group['Transaction_Type'] == 'Full') | 
      (group['Transaction_Type'] == 'Partial')]['Min_Value'])
  total_sales_max = sum(group[(group['Transaction_Type'] == 'Full') | 
      (group['Transaction_Type'] == 'Partial')]['Max_Value'])
  total_sales_avg = sum(group[(group['Transaction_Type'] == 'Full') | 
      (group['Transaction_Type'] == 'Partial')]['Avg_Value']) 

  # record the profit for the current (senator, stock) combination
  min_real_gains[senator][stock] = total_sales_min - total_purchases_max
  max_real_gains[senator][stock] = total_sales_max - total_purchases_min
  avg_real_gains[senator][stock] = total_sales_avg - total_purchases_avg

  # track total dollars invested for this senator
  min_cost[senator] += total_purchases_min
  max_cost[senator] += total_purchases_max
  avg_cost[senator] += total_purchases_avg


# nested dictionary: min_shares_held[senator][ticker] will return 
# the minimum number of shares retained at the end of the disclosure period
min_shares_held = defaultdict(lambda: defaultdict(float))
max_shares_held = defaultdict(lambda: defaultdict(float))
avg_shares_held = defaultdict(lambda: defaultdict(float))

for i, row in trades.iterrows():
  senator = row['Name']
  stock = row['Ticker']
  transaction_type = row['Transaction_Type']
  trade_min_shares = row['Min_Shares'] if not np.isnan(row['Min_Shares']) else 0
  trade_max_shares = row['Max_Shares'] if not np.isnan(row['Max_Shares']) else 0
  trade_avg_shares = row['Avg_Shares'] if not np.isnan(row['Avg_Shares']) else 0

  # Update number of shares held
  if transaction_type == 'Purchase':
    min_shares_held[senator][stock] += trade_min_shares
    max_shares_held[senator][stock] += trade_max_shares
    avg_shares_held[senator][stock] += trade_avg_shares
  else:
    min_shares_held[senator][stock] -= trade_max_shares
    max_shares_held[senator][stock] -= trade_min_shares
    avg_shares_held[senator][stock] -= trade_avg_shares


start_date = trades['Date'].min()
end_date = trades['Date'].max()

min_total_gains = copy.deepcopy(min_real_gains)
max_total_gains = copy.deepcopy(max_real_gains)
avg_total_gains = copy.deepcopy(avg_real_gains)

for senator in min_shares_held:
  min_shares = min_shares_held[senator]
  max_shares = max_shares_held[senator]
  avg_shares = avg_shares_held[senator]
  
  # loop through the tickers. These are the same for min, avg, max shares
  for stock in min_shares:
    # retreive start and end prices of the stock over the disclosure period
    curr_price = price_history[price_history['Date'] == end_date][stock].values[0]
    start_price = price_history[price_history['Date'] == start_date][stock].values[0]

    min_retained = min_shares[stock]
    max_retained = max_shares[stock]
    avg_retained = avg_shares[stock]
    # calculate value of current/initial assets
    min_unreal_gains = min_retained * curr_price if min_retained > 0 else min_retained * start_price
    max_unreal_gains = max_retained * curr_price if max_retained > 0 else max_retained * start_price
    avg_unreal_gains = avg_retained * curr_price if avg_retained > 0 else avg_retained * start_price
    
    # adjust each senator's total cost if they have an initial asset
    min_cost[senator] += -min_unreal_gains if min_unreal_gains < 0 else 0
    max_cost[senator] += -max_unreal_gains if max_unreal_gains < 0 else 0
    avg_cost[senator] += -avg_unreal_gains if avg_unreal_gains < 0 else 0

    # sum realized and unrealized gains to get total gains
    min_total_gains[senator][stock] += min_unreal_gains if not np.isnan(min_unreal_gains) else 0
    max_total_gains[senator][stock] += max_unreal_gains if not np.isnan(max_unreal_gains) else 0
    avg_total_gains[senator][stock] += avg_unreal_gains if not np.isnan(avg_unreal_gains) else 0


# sum all senator stock prices for all dates and compute percent return
price_history['agg_price'] = price_history.sum(axis=1, numeric_only=True)
start_agg_price = price_history.iloc[1]['agg_price']
end_agg_price = price_history.iloc[len(price_history)-1]['agg_price']
(end_agg_price - start_agg_price) / start_agg_price * 100

102.25719828397546


sp500 = yf.download(['^GSPC'], start=trades["Date"].min(), end=trades["Date"].max() + dt.timedelta(days=1)).reset_index()
sp500 = sp500[['Date', 'Adj Close']]
sp500 = sp500.rename(columns={'Adj Close': 'Price'})
start_sp500_price = sp500.loc[1, 'Price']
end_sp500_price = sp500.loc[len(sp500) - 1, 'Price']
sp500_change = (end_sp500_price - start_sp500_price) / start_sp500_price * 100
sp500_change

[*********************100%***********************]  1 of 1 completed

252.47801880667592


# calculate percent gain for each group on each date
price_history['percent_gain'] = (price_history['agg_price'] - start_agg_price) / start_agg_price * 100
sp500['percent_gain'] = (sp500['Price'] - start_sp500_price) / start_sp500_price * 100

# plot lines
pct_merged = pd.merge(price_history[['Date', 'percent_gain']], sp500[['Date', 'percent_gain']], on=['Date'])
pct_merged = pct_merged.rename(columns={'percent_gain_x': 'Senator Stocks',
                  'percent_gain_y': 'S&P 500'})
sns.set(rc = {'figure.figsize':(15,8)},font_scale=1.5)
pct_plot = sns.lineplot(x='Date', y='value', hue='variable', 
             data=pd.melt(pct_merged, ['Date']))

# label plot
pct_plot.set_xlabel("Year", fontsize = 20)
pct_plot.set_ylabel("Percentage Gain", fontsize = 20)
pct_plot.set_title('Percent Gain of Senator Stocks vs S&P 500 Over TIme', fontsize = 20)

Text(0.5, 1.0, 'Percent Gain of Senator Stocks vs S&P 500 Over TIme')


min_total_returns = {}
max_total_returns = {}
avg_total_returns = {}
# calculating total returns for each senator across all stocks purchased/sold
for senator in min_total_gains:
  min_total_returns[senator] = sum([val for val in min_total_gains[senator].values()])
  max_total_returns[senator] = sum([val for val in max_total_gains[senator].values()])
  avg_total_returns[senator] = sum([val for val in avg_total_gains[senator].values()])


min_roi = {}
max_roi = {}
avg_roi = {}
# compute percent retrusn for all senators across all investments
for senator in min_total_returns:
  min_roi[senator] = min_total_returns[senator]/max_cost[senator] * 100
  max_roi[senator] = max_total_returns[senator]/min_cost[senator] * 100
  avg_roi[senator] = avg_total_returns[senator]/avg_cost[senator] * 100

# sort dictionaries based on the average roi
sorted_avg_roi = {k: v for k, v in sorted(avg_roi.items(), key=lambda item: avg_roi[item[0]], reverse=True)}
sorted_min_roi = {k: v for k, v in sorted(min_roi.items(), key=lambda item: avg_roi[item[0]], reverse=True)}
sorted_max_roi = {k: v for k, v in sorted(max_roi.items(), key=lambda item: avg_roi[item[0]], reverse=True)}


# plot min, max, avg returns for all senators as well as the S&P 500
f, ax = plt.subplots(figsize=(15, 20))
sns.set_color_codes("muted")
sns.barplot(y=['S&P 500'] + list(sorted_max_roi.keys()), x=[0] + list(sorted_max_roi.values()), label='Maximum ROI', orient='h', color='orange')
sns.barplot(y=['S&P 500'] + list(sorted_avg_roi.keys()), x=[0] + list(sorted_avg_roi.values()), label='Average ROI', orient='h', color="g")
sns.barplot(y=['S&P 500'] + list(sorted_min_roi.keys()), x=[0] + list(sorted_min_roi.values()), label='Minimum ROI', orient='h', color='r')
sns.barplot(y=['S&P 500'] + list(sorted_min_roi.keys()), x=[sp500_change] + [0]*len(sorted_min_roi), orient='h', color="b")
ax.legend(ncol=2, loc="lower right", frameon=True)
ax.set(ylabel="Senator", xlabel="Percent Return on Investments", title='Percent Return on Investments by Senator')

[Text(0, 0.5, 'Senator'),
 Text(0.5, 0, 'Percent Return on Investments'),
 Text(0.5, 1.0, 'Percent Return on Investments by Senator')]


len([val for val in max_roi.items() if val[1] > sp500_change])

38


[val for val in avg_roi.items() if val[1] > sp500_change]

[(' A. Mitchell McConnell Jr.', 392.7092354234115),
 (' Barbara A Mikulski', 498.8457609991058),
 (' Daniel S Sullivan', 257.79433766608923),
 (' James M Inhofe', 1624.869850122107),
 (' Mark R Warner', 868.0109398806682),
 (' Michael F Bennet', 1999.9880000799994),
 (' Tina Smith', 533.2333737994525)]


[val for val in min_roi.items() if val[1] > sp500_change]

[(' James M Inhofe', 553.846650964982),
 (' Mark R Warner', 530.4690056376435),
 (' Michael F Bennet', 950.003),
 (' Tina Smith', 310.2210358690841)]


# sum the profits over all senators
min_senate_returns = sum([val for val in min_total_returns.values()])
max_senate_returns = sum([val for val in max_total_returns.values()])
avg_senate_returns = sum([val for val in avg_total_returns.values()])

# sum the costs over all senators
min_senate_cost = sum([val for val in min_cost.values()])
max_senate_cost = sum([val for val in max_cost.values()])
avg_senate_cost = sum([val for val in avg_cost.values()])

# print percent returns
print(f"Minimum (percent) return: {min_senate_returns/max_senate_cost:.2%}")
print(f"Average (percent) return: {avg_senate_returns/avg_senate_cost:.2%}")
print(f"Maxmum (percent) return: {max_senate_returns/min_senate_cost:.2%}")

Minimum (percent) return: -70.46%
Average (percent) return: 130.67%
Maxmum (percent) return: 469.65%


trades['Comment'].unique()

array(['--',
       'Agilent Tech common stock completed a spinoff on 10/22/14 to Keysight Tech',
       'KMB completed a spinoff of its health care business business now known as HYH on 11/01/14',
       'Dividend Reinvestment',
       '2015 Issued IR RSUs vested and actual shares issued; part of shares sold w/ remainder in UBS CSSP',
       '2016 Issued IR RSUs vested and actual shares issued; part of shares sold w/ remainder in UBS CSSP',
       'Transaction is due to sale of a security from redemption of incentive units vested on April 21, 2017',
       'Complete Disposition of Ingersoll Rand Stock',
       'Complete Disposition of Vulcan Materials Stock',
       'Mandatory Cash Merger',
       'WTR-Aqua America, Inc name change to WTRG-Essential Utilities, Inc on 2/3/2020.',
       "The filer's portfolio is managed by a third party investment professional. The filer was advised of these transactions on July 7, 2020. This activity is part of an ongoing long term portfolio diversification strategy that regularly includes similar transactions.",
       "The filer's portfolio is managed by a third party investment professional. The filer was advised of these transactions on April 7, 2020. This activity is part of an ongoing long-term portfolio diversification strategy that regularly includes similar transactions.",
       "The filer's portfolio is managed by a third party investment professional. The filer was advised of these transactions on June 2, 2020. This activity is part of an ongoing long-term portfolio diversification strategy that regularly includes similar transactions.",
       'Filer notified of transaction >45 days after it occurred.',
       "The filer's portfolio is managed by a third party investment professional. The filer was advised of these transactions on April 13, 2021. This activity is part of an ongoing long-term portfolio diversification strategy that regularly includes similar transactions",
       'This purchase is a reflection of Perdue exercising his stock option listed on previous disclosures',
       'Full sale from Wells Fargo Acct', 'Full sale of Wells Fargo Acct',
       'Full sale from Wells Fargo account.',
       'Quarterly dividend reinvestment',
       'Quarterly Dividend Reinvestment', 'R', 'SEP', 'Sep', 'Fr', 'FR',
       'Full sale from Wells Fargo Acct. This full sale was less than $1,000 but disclosing anyway for full transparency.',
       'sep',
       'This comment was incorrectly labeled as "partial" when it should have been labeled "full"',
       'This transaction was incorrectly labeled "partial" when it should have been labeled "full"',
       'This purchase initially listed the wrong ticker symbol and is now corrected',
       'Corrected the owner of the asset from "spouse" to "joint"',
       'Rate: 6.25% Maturation Date: 6/1/2035',
       'This asset was acquired through the reverse stock split from DowDuPont on June 3.',
       'Transaction incorrectly noted as partial sale. Corrected to display a purchase.',
       'Transaction incorrectly filed as a partial sale. Corrected to display purchase.',
       '20% Share in Brombach Family LP - Merrill Lynch',
       'The wrong ticker was used in the initial reporting of this transaction. The Vanguard Emerging Markets ticker was used instead of the Vanguard Ultra-Short Term Bond ticker.',
       'Less than $1001.', 'Purchase was less than $1001.',
       'Transaction was less than $1000',
       'Transaction was less than $1000.', 'Sale was less than $1000.',
       'Less than $1000', 'Less than $1000.',
       'Note: Transaction was less than $1,000.',
       'Note: This purchase was less than $1,000.',
       'Changed transaction date from 1/4/17 (Settlement date) to 12/29/16 (Trade date).',
       'LINN ENERGY LLC PUBLICLY TRADED PARTNERSHIP',
       'VANGUARD NATURAL RESOURCES PUBLICLY TRADED PARTNERSHIP',
       'LEGACY RESERVES LTD PUBLICLY TRADED PARTNERSHIP',
       'ENERGY TRANSFER PARTNERS PUBLICLY TRADED PARTNERSHIP',
       'CARLYLE GROUP LTD PUBLICLY TRADED PARTNERSHIP',
       'Shares acquired result from a stock dividend by UA - Under Armour Inc.',
       'Transaction within Wells Fargo Advisors IRA - Clearbridge Managed Equities account.',
       'Transaction within Wells Fargo Advisors IRA - NWQ Managed Equities account.',
       "Stock inherited from deceased mother's estate. Transfer paperwork signed 4/21/2017.",
       'Notified to filer on or after March 16, 2020',
       'Timing and method of April transactions implemented at discretion of third party advisers.',
       'Transactions on lines 148 through 175 notified to filer on or after April 1, 2020',
       'Notified to filer on or after March 1, 2020',
       'Equity compensation by former employer with predetermined sale dates pursuant to SEC Rule 10b5-1 plan filed with such company',
       'Equity pursuant to the company he founded with predetermined sale dates pursuant to SEC Rule 10b5-1 plan filed with such company',
       'Equity pursuant to the company he founded with predetermined dates (exercise of stock options) pursuant to SEC Rule 10b5-1 plan filed with such company',
       'Exercise of stock options prior to expiration date',
       'Sale of exercised options (sold to cover taxes and cost of prior exercise of options); shares sold through a 10b5-1 plan, which is an SEC compliant pre-determined plan',
       'Partial exercise of stock options',
       'Sale of exercised options and shares received as compensation; shares sold through a 10b5-1 plan, which is an SEC compliant pre determined plan',
       'Shares sold through a 10b5-1 plan, which is an SEC compliant pre-determined plan',
       'Lines 1-19 notified to filer on or after March 1, 2020; lines 20-42 notified to filer on or after March 18, 2020',
       'Originally reported as Purchase in error', 'Sale of two blocks',
       'Underlying asset of MRW Biotech Investors, LLC. Previously held in New River Management V.',
       'Underlying asset of MRW Biotech Investors, LLC',
       'Underlying asset of MRW Biotech Investors, LLC. Previously held in New River V',
       'r', 'Jt', '555', 'seo', 'jt', 'roth', 'ugma--lily', 'fran45',
       'Child #2',
       'Name change - Old Florida Bank became IberiaBank Corp. due to acquisition',
       'Shares acquired through special dividend issued by AIV in the form of stock.',
       'same transaction using settlement date was deleted in 11/18/14 report',
       'Formerly reported as Isis Pharmaceuticals',
       'LW received as a split from CAG on 11/09/16',
       'Acquired as a result of a 9/21/16 inheritance',
       'Acquired as a result of a 9/21/16 inheritance.',
       "Rec'd in spinoff from United Technologies & Raytheon Technologies merger //Due to standing separation agreement with financial advisor, Senator first notified of activity 5/26/20 in compliance with Senate disclosure process.",
       "Rec'd in spinoff from United Technologies & Raytheon Technologies merger // Due to standing separation agreement with financial advisor, Senator first notified of activity 5/26/20 in compliance with Senate disclosure process.",
       "Rec'd in spinoff from United Technologies & Raytheon Technologies merger// Due to standing separation agreement with financial advisor, Senator first notified of activity 5/26/20 in compliance with Senate disclosure process.",
       'KLXI IS A SPINOFF OF BEAV', 'Overlooked in PTR report',
       'Inadvertently omitted on original report',
       'inadvertently omitted on original report',
       'Split into 2 share classes A & C. He sold the A shares and kept the C shares.',
       'KMB completed spinoff of healthcare business now known as HYH on 11/01/14.',
       'Asset acquired through UBS accounts in small pieces(less than $1000) from Jan - Apr 2014.',
       'This is NOT a purchase, it is a spinoff from the old DLPH.',
       'This is a capital gain reinvestment.',
       'The filer’s financial advisors actively manage his accounts without the filer’s day to day involvement. The filer was unaware that the specific transactions in this report occurred. Upon becoming aware of the transactions, the filer expeditiously prepared and submitted this report to the Committee.',
       'Report amended to reflect correct ticker symbol.',
       'Spin off from asset still held (Pentair plc) and then sold.',
       'Asset held in Truchas Capital 401K Plan',
       'Asset held in Charles Schwab - Rollover IRA',
       'Remainder of stock was sold later in the year and under the threshold.',
       'Asset owned by Roundstone Ventures LLC',
       'Asset held by Roundstone Ventures, LLC',
       'This transaction was filed on October 2 but was re-filed with the new system',
       'ESRX merged with CI, Cigna.',
       'Alcon Inc. (ALC) shares received in exchange for Novartis AG (NVS) shares as a result of spin-off.',
       'Exchange occurred as a result of the Sprint/T-Mobile merger.',
       'Shares of CTL received as a result of merger.',
       'Exchange of stock due to corporate merger.',
       'Exchange of stock due to corporate spin-off of the legacy assets and liabilities of NTRP not merged into PTPI.',
       'Exchange of stock due to partial corporate spin-off and renaming.',
       'Due to standing separation agreement with financial advisor, Senator first notified of activity 5/26/20 in compliance with Senate disclosure process.',
       "Post merger UTX changed it's name from United Technologies Corporation to Raytheon Technologies Corporation // Due to standing separation agreement with financial advisor, Senator first notified of activity 5/26/20 in compliance with Senate disclosure process.",
       'Merger - stock of Access National Corporation exchanged for Union Bankshares Corporation',
       'LIN acquired from PX due to mandatory stock merger.',
       'FLIR Systems Merger With Teledyne Asset Held in Roundstone Ventures, LLC'],
      dtype=object)

	Name	Date	Owner	Ticker	Asset_Name	Asset_Type	Transaction_Type	Min_Value	Comment
12320	Thad Cochran	12/5/2017	Self	APTV	Aptiv PLC	Stock	Purchase	1001.0	--
12321	Thomas R Carper	2/5/2021	Spouse	DD	DuPont de Nemours, Inc.	Stock	Full	1001.0	--
12322	Thomas R Carper	2/5/2021	Spouse	IFF	International Flavors & Fragrances Inc.	Stock	Purchase	1001.0	--
12323	William F Hagerty IV	5/17/2021	Self	FL4.SG	FLIR SYSTEMS INC. Registered Sh	Stock	Full	15001.0	FLIR Systems Merger With Teledyne Asset Held i...
12324	William F Hagerty IV	5/17/2021	Self	TDY	Teledyne Technologies Incorporated	Stock	Purchase	15001.0	FLIR Systems Merger With Teledyne Asset Held i...

	Date	A	AA	AAGIY	AAL	AAN	AAON	AAPL	AAT	ABB	...	YUM	YUMC	ZAYO	ZBH	ZION	ZIOP	ZM	ZMH	ZNGA	ZTS
2495	2021-07-26	149.865753	38.875099	47.913841	22.080000	27.719999	60.000000	148.143631	35.804115	36.950001	...	121.938988	64.216637	NaN	154.444763	50.569202	NaN	373.140015	NaN	10.35	200.258850
2496	2021-07-27	149.566513	38.186604	45.772606	21.459999	30.129999	59.869999	145.936234	35.960251	36.830002	...	123.944145	60.990372	NaN	157.530762	50.765667	NaN	367.540009	NaN	10.14	201.305908
2497	2021-07-28	151.072662	38.146690	45.673016	21.629999	28.639999	60.840000	144.156403	36.253006	36.840000	...	121.109268	62.105621	NaN	157.259888	51.433636	NaN	369.489990	NaN	10.25	202.702011
2498	2021-07-29	152.279572	39.284206	47.127060	21.170000	28.520000	62.110001	144.812653	36.321320	37.009998	...	128.715027	62.135498	NaN	157.434021	51.905140	NaN	386.019989	NaN	10.21	203.549606
2499	2021-07-30	152.838135	40.062508	47.744534	20.379999	28.870001	62.150002	145.031403	36.038322	36.619999	...	129.781799	61.926384	NaN	158.091843	51.227352	NaN	378.100006	NaN	10.10	202.133591

	Name	Transaction.Date	Owner	Ticker	Asset.Name	Asset.Type	Type	Amount	Comment
0	Sheldon Whitehouse	11/25/2014	Self	M	Macy's, Inc. (NYSE)	Stock	Purchase	1001.0	--
1	Sheldon Whitehouse	12/18/2014	Self	BEAV	B/E Aerospace Inc. (NASDAQ)	Stock	Sale (Full)	15001.0	--
2	Sheldon Whitehouse	12/18/2014	Self	KORS	Michael Kors Holdings Limited (NYSE)	Stock	Sale (Partial)	1001.0	--
3	Sheldon Whitehouse	12/18/2014	Self	T	AT&T, Inc. (NYSE)	Stock	Purchase	1001.0	--
4	Sheldon Whitehouse	12/18/2014	Self	KEYS	Keysight Technologies, Inc. (NYSE)	Stock	Sale (Full)	1001.0	Agilent Tech common stock completed a spinoff ...

	Name	Date	Owner	Ticker	Asset_Name	Asset_Type	Transaction_Type	Min_Value	Comment	Max_Value	Avg_Value
0	Sheldon Whitehouse	2014-11-25	Self	M	Macy's, Inc. (NYSE)	Stock	Purchase	1001.0	--	15000	8000.5
1	Sheldon Whitehouse	2014-12-18	Self	BEAV	B/E Aerospace Inc. (NASDAQ)	Stock	Full	15001.0	--	50000	32500.5
2	Sheldon Whitehouse	2014-12-18	Self	KORS	Michael Kors Holdings Limited (NYSE)	Stock	Partial	1001.0	--	15000	8000.5
3	Sheldon Whitehouse	2014-12-18	Self	T	AT&T, Inc. (NYSE)	Stock	Purchase	1001.0	--	15000	8000.5
4	Sheldon Whitehouse	2014-12-18	Self	KEYS	Keysight Technologies, Inc. (NYSE)	Stock	Full	1001.0	Agilent Tech common stock completed a spinoff ...	15000	8000.5

Logrolling: Congressional Trading¶

Importing Data¶

Trades¶

Data Cleansing¶

Isolating Stock Trades and Jim Inhofe¶

Jim Inhofe missing data¶

Column Names and Data Types¶

Renaming Attributes and Values¶

Exchanging Stocks¶

Translating Exchanges Into Purchases/Sales¶

Data Types¶

Comments on Comments¶

Class 1: New Company Formation¶

Class 2: Stock Manager¶

Class 3: Error¶

Class 4: Not Required¶

Class 5: Capital Source¶

Challenges¶

Clarifying Transaction Ranges¶

Exploratory Analysis¶

Frequency of Trades¶

By Amount¶

By Senator¶

Top Traders¶

Other Traders¶

Volume of Trades¶

Analyzing Returns¶

Accessing Stock Price Data¶

Calculating Number of Shares Per Trade¶

Computing Total Returns¶

Computing Shares Retained¶

Value of Holdings¶

Do Senators Pick Better Stocks?¶

Comparing Percent Returns¶

Visualizing Senator Stocks vs. S&P 500¶

Individual Senators vs. S&P500¶

All Senators vs. S&P 500¶

Recommendations and Concluding Remarks¶

Policy Recommendations¶

Future Work¶

Appendix¶

Unique Comments¶