{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import scipy.stats\n", "import numpy as np\n", "import random\n", "import seaborn as sns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Drug Experiments" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "amw1 = pd.read_csv('/Need to Download for Supplemental/Drug Experiments/aMW/1.csv') #Read csv with R-Bias values from the aMW trial 1\n", "amw2 = pd.read_csv('/Need to Download for Supplemental/Drug Experiments/aMW/2.csv') #Read csv with R-Bias values from the aMW trial 2\n", "amw3 = pd.read_csv('/Need to Download for Supplemental/Drug Experiments/aMW/3.csv') #Read csv with R-Bias values from the aMW trial 3\n", "\n", "control1 = pd.read_csv('/Need to Download for Supplemental/Drug Experiments/Control of Drug Experiments/1.csv') #Read csv with R-Bias values from the control trial 1\n", "control2 = pd.read_csv('/Need to Download for Supplemental/Drug Experiments/Control of Drug Experiments/2.csv') #Read csv with R-Bias values from the control trial 2\n", "control3 = pd.read_csv('/Need to Download for Supplemental/Drug Experiments/Control of Drug Experiments/3.csv') #Read csv with R-Bias values from the control trial 3\n", "\n", "spring5htp1 = pd.read_csv('/Need to Download for Supplemental/Drug Experiments/5-HTP/1.csv') #Read csv with R-Bias values from the 5htp trial 1\n", "spring5htp2 = pd.read_csv('/Need to Download for Supplemental/Drug Experiments/5-HTP/2.csv') #Read csv with R-Bias values from the 5htp trial 2\n", "\n", "amw1.columns = [1,2,3,8,9,10,15,16,17] #Label csv columns by Day number\n", "amw2.columns = [1,2,3,8,9,10,15,16,17] #Label csv columns by Day number\n", "amw3.columns = [1,2,3,8,9,10,15,16,17] #Label csv columns by Day number\n", "\n", "control1.columns = [1,2,3,8,9,10,15,16,17] #Label csv columns by Day number\n", "control2.columns = [1,2,3,8,9,10,15,16,17] #Label csv columns by Day number\n", "control3.columns = [1,2,3,8,9,10,15,16,17] #Label csv columns by Day number\n", "\n", "spring5htp1.columns = [1,2,3,8,9,10,15,16,17] #Label csv columns by Day number\n", "spring5htp2.columns = [1,2,3,8,9,10,15,16,17] #Label csv columns by Day number" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "amw = pd.concat([amw2,amw1,amw3], axis = 0) #Concatenate the 3 aMW trials into one DataFrame\n", "control = pd.concat([control2,control1,control3], axis = 0) #Concatenate the 3 control trials into one DataFrame\n", "spring5htp = pd.concat([spring5htp1, spring5htp2], axis = 0) #Concatenate the 2 5-HTP trials into one DataFrame\n", "\n", "amw.index = range(len(amw)) #Reset the DataFrame index\n", "control.index = range(len(control)) #Reset the DataFrame index\n", "spring5htp.index = range(len(spring5htp)) #Reset the DataFrame index" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Preliminary Clean-Up" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Control" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "controlday1 = pd.DataFrame(control.iloc[:,0]) #Create a DataFrame with only Day 1's R-Bias Values\n", "controlday2 = pd.DataFrame(control.iloc[:,1]) #Create a DataFrame with only Day 2's R-Bias Values\n", "controlday3 = pd.DataFrame(control.iloc[:,2]) #Create a DataFrame with only Day 3's R-Bias Values\n", "controlday8 = pd.DataFrame(control.iloc[:,3]) #Create a DataFrame with only Day 8's R-Bias Values\n", "controlday9 = pd.DataFrame(control.iloc[:,4]) #Create a DataFrame with only Day 9's R-Bias Values\n", "controlday10 = pd.DataFrame(control.iloc[:,5]) #Create a DataFrame with only Day 10's R-Bias Values\n", "controlday15 = pd.DataFrame(control.iloc[:,6]) #Create a DataFrame with only Day 15's R-Bias Values\n", "controlday16 = pd.DataFrame(control.iloc[:,7]) #Create a DataFrame with only Day 16's R-Bias Values\n", "controlday17 = pd.DataFrame(control.iloc[:,8]) #Create a DataFrame with only Day 17's R-Bias Values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "nanscontrol1nanvalues = controlday1.loc[pd.isna(controlday1[1]), :].index.to_frame() #Collect the index numbers of NaN values in the Control Day 1 R-Bias values\n", "controlrvalue = [] # Creates an empty list to store the R-values\n", "\n", "#Day 2\n", "nanscontrol2nanvalues = controlday2.loc[pd.isna(controlday2[2]), :].index.to_frame() #Collect the index numbers of NaN values in the Control Day 2 R-Bias values\n", "nanscontrolday1to2wonan = pd.concat([nanscontrol2nanvalues, nanscontrol1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlday2wonan = controlday2.drop(nanscontrolday1to2wonan[0]) #Drop the list of NaN values from Control Day 2 R-Bias values\n", "controlday1to2wonan = controlday1.drop(nanscontrolday1to2wonan[0]) #Drop the list of NaN values from Control Day 2 R-Bias values\n", "\n", "#Day 3\n", "nanscontrol3nanvalues = controlday3.loc[pd.isna(controlday3[3]), :].index.to_frame() #Collect the index numbers of NaN values in the Control Day 3 R-Bias values\n", "nanscontrolday1to3wonan = pd.concat([nanscontrol3nanvalues, nanscontrol1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlday3wonan = controlday3.drop(nanscontrolday1to3wonan[0]) #Drop the list of NaN values from Control Day 3 R-Bias values\n", "controlday1to3wonan = controlday1.drop(nanscontrolday1to3wonan[0]) #Drop the list of NaN values from Control Day 3 R-Bias values\n", "\n", "#Day 8\n", "nanscontrol8nanvalues = controlday8.loc[pd.isna(controlday8[8]), :].index.to_frame() #Collect the index numbers of NaN values in the Control Day 8 R-Bias values\n", "nanscontrolday1to8wonan = pd.concat([nanscontrol8nanvalues, nanscontrol1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlday8wonan = controlday8.drop(nanscontrolday1to8wonan[0]) #Drop the list of NaN values from Control Day 8 R-Bias values\n", "controlday1to8wonan = controlday1.drop(nanscontrolday1to8wonan[0]) #Drop the list of NaN values from Control Day 8 R-Bias values\n", "\n", "#Day 9\n", "nanscontrol9nanvalues = controlday9.loc[pd.isna(controlday9[9]), :].index.to_frame() #Collect the index numbers of NaN values in the Control Day 9 R-Bias values\n", "nanscontrolday1to9wonan = pd.concat([nanscontrol9nanvalues, nanscontrol1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlday9wonan = controlday9.drop(nanscontrolday1to9wonan[0]) #Drop the list of NaN values from Control Day 9 R-Bias values\n", "controlday1to9wonan = controlday1.drop(nanscontrolday1to9wonan[0]) #Drop the list of NaN values from Control Day 9 R-Bias values\n", "\n", "#Day 10\n", "nanscontrol10nanvalues = controlday10.loc[pd.isna(controlday10[10]), :].index.to_frame() #Collect the index numbers of NaN values in the Control Day 10 R-Bias values\n", "nanscontrolday1to10wonan = pd.concat([nanscontrol10nanvalues, nanscontrol1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlday10wonan = controlday10.drop(nanscontrolday1to10wonan[0]) #Drop the list of NaN values from Control Day 10 R-Bias values\n", "controlday1to10wonan = controlday1.drop(nanscontrolday1to10wonan[0]) #Drop the list of NaN values from Control Day 10 R-Bias values\n", "\n", "#Day 15\n", "nanscontrol15nanvalues = controlday15.loc[pd.isna(controlday15[15]), :].index.to_frame() #Collect the index numbers of NaN values in the Control Day 15 R-Bias values\n", "nanscontrolday1to15wonan = pd.concat([nanscontrol15nanvalues, nanscontrol1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlday15wonan = controlday15.drop(nanscontrolday1to15wonan[0]) #Drop the list of NaN values from Control Day 15 R-Bias values\n", "controlday1to15wonan = controlday1.drop(nanscontrolday1to15wonan[0]) #Drop the list of NaN values from Control Day 15 R-Bias values\n", "\n", "#Day 16\n", "nanscontrol16nanvalues = controlday16.loc[pd.isna(controlday16[16]), :].index.to_frame() #Collect the index numbers of NaN values in the Control Day 16 R-Bias values\n", "nanscontrolday1to16wonan = pd.concat([nanscontrol16nanvalues, nanscontrol1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlday16wonan = controlday16.drop(nanscontrolday1to16wonan[0]) #Drop the list of NaN values from Control Day 16 R-Bias values\n", "controlday1to16wonan = controlday1.drop(nanscontrolday1to16wonan[0]) #Drop the list of NaN values from Control Day 16 R-Bias values\n", "\n", "#Day 17\n", "nanscontrol17nanvalues = controlday17.loc[pd.isna(controlday17[17]), :].index.to_frame() #Collect the index numbers of NaN values in the Control Day 17 R-Bias values\n", "nanscontrolday1to17wonan = pd.concat([nanscontrol17nanvalues, nanscontrol1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlday17wonan = controlday17.drop(nanscontrolday1to17wonan[0]) #Drop the list of NaN values from Control Day 17 R-Bias values\n", "controlday1to17wonan = controlday1.drop(nanscontrolday1to17wonan[0]) #Drop the list of NaN values from Control Day 17 R-Bias values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### AMW" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "amwday1 = pd.DataFrame(amw.iloc[:,0]) #Create a DataFrame with only Day 1's R-Bias Values\n", "amwday2 = pd.DataFrame(amw.iloc[:,1]) #Create a DataFrame with only Day 2's R-Bias Values\n", "amwday3 = pd.DataFrame(amw.iloc[:,2]) #Create a DataFrame with only Day 3's R-Bias Values\n", "amwday8 = pd.DataFrame(amw.iloc[:,3]) #Create a DataFrame with only Day 8's R-Bias Values\n", "amwday9 = pd.DataFrame(amw.iloc[:,4]) #Create a DataFrame with only Day 9's R-Bias Values\n", "amwday10 = pd.DataFrame(amw.iloc[:,5]) #Create a DataFrame with only Day 10's R-Bias Values\n", "amwday15 = pd.DataFrame(amw.iloc[:,6]) #Create a DataFrame with only Day 15's R-Bias Values\n", "amwday16 = pd.DataFrame(amw.iloc[:,7]) #Create a DataFrame with only Day 16's R-Bias Values\n", "amwday17 = pd.DataFrame(amw.iloc[:,8]) #Create a DataFrame with only Day 17's R-Bias Values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "nansamw1nanvalues = amwday1.loc[pd.isna(amwday1[1]), :].index.to_frame() #Collect the index numbers of NaN values in the amw Day 1 R-Bias values\n", "amwrvalue = [] # Creates an empty list to store the R-values\n", "\n", "#Day 2\n", "nansamw2nanvalues = amwday2.loc[pd.isna(amwday2[2]), :].index.to_frame() #Collect the index numbers of NaN values in the amw Day 2 R-Bias values\n", "nansamwday1to2wonan = pd.concat([nansamw2nanvalues, nansamw1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "amwday2wonan = amwday2.drop(nansamwday1to2wonan[0]) #Drop the list of NaN values from amw Day 2 R-Bias values\n", "amwday1to2wonan = amwday1.drop(nansamwday1to2wonan[0]) #Drop the list of NaN values from amw Day 2 R-Bias values\n", "\n", "#Day 3\n", "nansamw3nanvalues = amwday3.loc[pd.isna(amwday3[3]), :].index.to_frame() #Collect the index numbers of NaN values in the amw Day 3 R-Bias values\n", "nansamwday1to3wonan = pd.concat([nansamw3nanvalues, nansamw1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "amwday3wonan = amwday3.drop(nansamwday1to3wonan[0]) #Drop the list of NaN values from amw Day 3 R-Bias values\n", "amwday1to3wonan = amwday1.drop(nansamwday1to3wonan[0]) #Drop the list of NaN values from amw Day 3 R-Bias values\n", "\n", "#Day 8\n", "nansamw8nanvalues = amwday8.loc[pd.isna(amwday8[8]), :].index.to_frame() #Collect the index numbers of NaN values in the amw Day 8 R-Bias values\n", "nansamwday1to8wonan = pd.concat([nansamw8nanvalues, nansamw1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "amwday8wonan = amwday8.drop(nansamwday1to8wonan[0]) #Drop the list of NaN values from amw Day 8 R-Bias values\n", "amwday1to8wonan = amwday1.drop(nansamwday1to8wonan[0]) #Drop the list of NaN values from amw Day 8 R-Bias values\n", "\n", "#Day 9\n", "nansamw9nanvalues = amwday9.loc[pd.isna(amwday9[9]), :].index.to_frame() #Collect the index numbers of NaN values in the amw Day 9 R-Bias values\n", "nansamwday1to9wonan = pd.concat([nansamw9nanvalues, nansamw1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "amwday9wonan = amwday9.drop(nansamwday1to9wonan[0]) #Drop the list of NaN values from amw Day 9 R-Bias values\n", "amwday1to9wonan = amwday1.drop(nansamwday1to9wonan[0]) #Drop the list of NaN values from amw Day 9 R-Bias values\n", "\n", "#Day 10\n", "nansamw10nanvalues = amwday10.loc[pd.isna(amwday10[10]), :].index.to_frame() #Collect the index numbers of NaN values in the amw Day 10 R-Bias values\n", "nansamwday1to10wonan = pd.concat([nansamw10nanvalues, nansamw1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "amwday10wonan = amwday10.drop(nansamwday1to10wonan[0]) #Drop the list of NaN values from amw Day 10 R-Bias values\n", "amwday1to10wonan = amwday1.drop(nansamwday1to10wonan[0]) #Drop the list of NaN values from amw Day 10 R-Bias values\n", "\n", "#Day 15\n", "nansamw15nanvalues = amwday15.loc[pd.isna(amwday15[15]), :].index.to_frame() #Collect the index numbers of NaN values in the amw Day 15 R-Bias values\n", "nansamwday1to15wonan = pd.concat([nansamw15nanvalues, nansamw1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "amwday15wonan = amwday15.drop(nansamwday1to15wonan[0]) #Drop the list of NaN values from amw Day 15 R-Bias values\n", "amwday1to15wonan = amwday1.drop(nansamwday1to15wonan[0]) #Drop the list of NaN values from amw Day 15 R-Bias values\n", "\n", "#Day 16\n", "nansamw16nanvalues = amwday16.loc[pd.isna(amwday16[16]), :].index.to_frame() #Collect the index numbers of NaN values in the amw Day 16 R-Bias values\n", "nansamwday1to16wonan = pd.concat([nansamw16nanvalues, nansamw1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "amwday16wonan = amwday16.drop(nansamwday1to16wonan[0]) #Drop the list of NaN values from amw Day 16 R-Bias values\n", "amwday1to16wonan = amwday1.drop(nansamwday1to16wonan[0]) #Drop the list of NaN values from amw Day 16 R-Bias values\n", "\n", "#Day 17\n", "nansamw17nanvalues = amwday17.loc[pd.isna(amwday17[17]), :].index.to_frame() #Collect the index numbers of NaN values in the amw Day 17 R-Bias values\n", "nansamwday1to17wonan = pd.concat([nansamw17nanvalues, nansamw1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "amwday17wonan = amwday17.drop(nansamwday1to17wonan[0]) #Drop the list of NaN values from amw Day 17 R-Bias values\n", "amwday1to17wonan = amwday1.drop(nansamwday1to17wonan[0]) #Drop the list of NaN values from amw Day 17 R-Bias values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5HTP" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "spring5htpday1 = pd.DataFrame(spring5htp.iloc[:,0]) #Create a DataFrame with only Day 1's R-Bias Values\n", "spring5htpday2 = pd.DataFrame(spring5htp.iloc[:,1]) #Create a DataFrame with only Day 2's R-Bias Values\n", "spring5htpday3 = pd.DataFrame(spring5htp.iloc[:,2]) #Create a DataFrame with only Day 3's R-Bias Values\n", "spring5htpday8 = pd.DataFrame(spring5htp.iloc[:,3]) #Create a DataFrame with only Day 8's R-Bias Values\n", "spring5htpday9 = pd.DataFrame(spring5htp.iloc[:,4]) #Create a DataFrame with only Day 9's R-Bias Values\n", "spring5htpday10 = pd.DataFrame(spring5htp.iloc[:,5]) #Create a DataFrame with only Day 10's R-Bias Values\n", "spring5htpday15 = pd.DataFrame(spring5htp.iloc[:,6]) #Create a DataFrame with only Day 15's R-Bias Values\n", "spring5htpday16 = pd.DataFrame(spring5htp.iloc[:,7]) #Create a DataFrame with only Day 16's R-Bias Values\n", "spring5htpday17 = pd.DataFrame(spring5htp.iloc[:,8]) #Create a DataFrame with only Day 17's R-Bias Values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Filter out the NaN values from the datasets that we will be comparing and calculate the R-values of each day compared to the first\n", "\n", "nansspring5htp1nanvalues = spring5htpday1.loc[pd.isna(spring5htpday1[1]), :].index.to_frame() #Collect the index numbers of NaN values in the spring5htp Day 1 R-Bias values\n", "spring5htprvalue = [] # Creates an empty list to store the R-values\n", "\n", "#Day 2\n", "nansspring5htp2nanvalues = spring5htpday2.loc[pd.isna(spring5htpday2[2]), :].index.to_frame() #Collect the index numbers of NaN values in the spring5htp Day 2 R-Bias values\n", "nansspring5htpday1to2wonan = pd.concat([nansspring5htp2nanvalues, nansspring5htp1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "spring5htpday2wonan = spring5htpday2.drop(nansspring5htpday1to2wonan[0]) #Drop the list of NaN values from spring5htp Day 2 R-Bias values\n", "spring5htpday1to2wonan = spring5htpday1.drop(nansspring5htpday1to2wonan[0]) #Drop the list of NaN values from spring5htp Day 2 R-Bias values\n", "\n", "res = scipy.stats.linregress(spring5htpday1to2wonan.iloc[:,0], spring5htpday2wonan.iloc[:,0]) #Calculates a linear least-squares regression between the R-Bias values of Day 1 and the R-Bias values of Day 2.\n", "spring5htprvalue.append(res.rvalue) #Adds the r-value of the linear regression to a list\n", "\n", "#Day 3\n", "nansspring5htp3nanvalues = spring5htpday3.loc[pd.isna(spring5htpday3[3]), :].index.to_frame() #Collect the index numbers of NaN values in the spring5htp Day 3 R-Bias values\n", "nansspring5htpday1to3wonan = pd.concat([nansspring5htp3nanvalues, nansspring5htp1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "spring5htpday3wonan = spring5htpday3.drop(nansspring5htpday1to3wonan[0]) #Drop the list of NaN values from spring5htp Day 3 R-Bias values\n", "spring5htpday1to3wonan = spring5htpday1.drop(nansspring5htpday1to3wonan[0]) #Drop the list of NaN values from spring5htp Day 3 R-Bias values\n", "\n", "res = scipy.stats.linregress(spring5htpday1to3wonan.iloc[:,0], spring5htpday3wonan.iloc[:,0]) #Calculates a linear least-squares regression between the R-Bias values of Day 1 and the R-Bias values of Day 3.\n", "spring5htprvalue.append(res.rvalue) #Adds the r-value of the linear regression to a list\n", "\n", "#Day 8\n", "nansspring5htp8nanvalues = spring5htpday8.loc[pd.isna(spring5htpday8[8]), :].index.to_frame() #Collect the index numbers of NaN values in the spring5htp Day 8 R-Bias values\n", "nansspring5htpday1to8wonan = pd.concat([nansspring5htp8nanvalues, nansspring5htp1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "spring5htpday8wonan = spring5htpday8.drop(nansspring5htpday1to8wonan[0]) #Drop the list of NaN values from spring5htp Day 8 R-Bias values\n", "spring5htpday1to8wonan = spring5htpday1.drop(nansspring5htpday1to8wonan[0]) #Drop the list of NaN values from spring5htp Day 8 R-Bias values\n", "\n", "res = scipy.stats.linregress(spring5htpday1to8wonan.iloc[:,0], spring5htpday8wonan.iloc[:,0]) #Calculates a linear least-squares regression between the R-Bias values of Day 1 and the R-Bias values of Day 8.\n", "spring5htprvalue.append(res.rvalue) #Adds the r-value of the linear regression to a list\n", "\n", "#Day 9\n", "nansspring5htp9nanvalues = spring5htpday9.loc[pd.isna(spring5htpday9[9]), :].index.to_frame() #Collect the index numbers of NaN values in the spring5htp Day 9 R-Bias values\n", "nansspring5htpday1to9wonan = pd.concat([nansspring5htp9nanvalues, nansspring5htp1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "spring5htpday9wonan = spring5htpday9.drop(nansspring5htpday1to9wonan[0]) #Drop the list of NaN values from spring5htp Day 9 R-Bias values\n", "spring5htpday1to9wonan = spring5htpday1.drop(nansspring5htpday1to9wonan[0]) #Drop the list of NaN values from spring5htp Day 9 R-Bias values\n", "\n", "res = scipy.stats.linregress(spring5htpday1to9wonan.iloc[:,0], spring5htpday9wonan.iloc[:,0]) #Calculates a linear least-squares regression between the R-Bias values of Day 1 and the R-Bias values of Day 9.\n", "spring5htprvalue.append(res.rvalue) #Adds the r-value of the linear regression to a list\n", "\n", "#Day 10\n", "nansspring5htp10nanvalues = spring5htpday10.loc[pd.isna(spring5htpday10[10]), :].index.to_frame() #Collect the index numbers of NaN values in the spring5htp Day 10 R-Bias values\n", "nansspring5htpday1to10wonan = pd.concat([nansspring5htp10nanvalues, nansspring5htp1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "spring5htpday10wonan = spring5htpday10.drop(nansspring5htpday1to10wonan[0]) #Drop the list of NaN values from spring5htp Day 10 R-Bias values\n", "spring5htpday1to10wonan = spring5htpday1.drop(nansspring5htpday1to10wonan[0]) #Drop the list of NaN values from spring5htp Day 10 R-Bias values\n", "\n", "res = scipy.stats.linregress(spring5htpday1to10wonan.iloc[:,0], spring5htpday10wonan.iloc[:,0]) #Calculates a linear least-squares regression between the R-Bias values of Day 1 and the R-Bias values of Day 10.\n", "spring5htprvalue.append(res.rvalue) #Adds the r-value of the linear regression to a list\n", "\n", "#Day 15\n", "nansspring5htp15nanvalues = spring5htpday15.loc[pd.isna(spring5htpday15[15]), :].index.to_frame() #Collect the index numbers of NaN values in the spring5htp Day 15 R-Bias values\n", "nansspring5htpday1to15wonan = pd.concat([nansspring5htp15nanvalues, nansspring5htp1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "spring5htpday15wonan = spring5htpday15.drop(nansspring5htpday1to15wonan[0]) #Drop the list of NaN values from spring5htp Day 15 R-Bias values\n", "spring5htpday1to15wonan = spring5htpday1.drop(nansspring5htpday1to15wonan[0]) #Drop the list of NaN values from spring5htp Day 15 R-Bias values\n", "\n", "res = scipy.stats.linregress(spring5htpday1to15wonan.iloc[:,0], spring5htpday15wonan.iloc[:,0]) #Calculates a linear least-squares regression between the R-Bias values of Day 1 and the R-Bias values of Day 15.\n", "spring5htprvalue.append(res.rvalue) #Adds the r-value of the linear regression to a list\n", "\n", "#Day 16\n", "nansspring5htp16nanvalues = spring5htpday16.loc[pd.isna(spring5htpday16[16]), :].index.to_frame() #Collect the index numbers of NaN values in the spring5htp Day 16 R-Bias values\n", "nansspring5htpday1to16wonan = pd.concat([nansspring5htp16nanvalues, nansspring5htp1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "spring5htpday16wonan = spring5htpday16.drop(nansspring5htpday1to16wonan[0]) #Drop the list of NaN values from spring5htp Day 16 R-Bias values\n", "spring5htpday1to16wonan = spring5htpday1.drop(nansspring5htpday1to16wonan[0]) #Drop the list of NaN values from spring5htp Day 16 R-Bias values\n", "\n", "res = scipy.stats.linregress(spring5htpday1to16wonan.iloc[:,0], spring5htpday16wonan.iloc[:,0]) #Calculates a linear least-squares regression between the R-Bias values of Day 1 and the R-Bias values of Day 16.\n", "spring5htprvalue.append(res.rvalue) #Adds the r-value of the linear regression to a list\n", "\n", "#Day 17\n", "nansspring5htp17nanvalues = spring5htpday17.loc[pd.isna(spring5htpday17[17]), :].index.to_frame() #Collect the index numbers of NaN values in the spring5htp Day 17 R-Bias values\n", "nansspring5htpday1to17wonan = pd.concat([nansspring5htp17nanvalues, nansspring5htp1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "spring5htpday17wonan = spring5htpday17.drop(nansspring5htpday1to17wonan[0]) #Drop the list of NaN values from spring5htp Day 17 R-Bias values\n", "spring5htpday1to17wonan = spring5htpday1.drop(nansspring5htpday1to17wonan[0]) #Drop the list of NaN values from spring5htp Day 17 R-Bias values\n", "\n", "res = scipy.stats.linregress(spring5htpday1to17wonan.iloc[:,0], spring5htpday17wonan.iloc[:,0]) #Calculates a linear least-squares regression between the R-Bias values of Day 1 and the R-Bias values of Day 17.\n", "spring5htprvalue.append(res.rvalue) #Adds the r-value of the linear regression to a list" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Bootstrapping of the R-values Throughout Time Compared to Day 1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### In order to understand the correlation of variability between Day 1 of testing and the rest of our experimental days, we bootstrapped the R-bias values from each day and found the r-values compared to Day 1. We performed this 1000 times and graphed the mean of the r-values (solid line) and the 95% CI." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dayxdataframes = [controlday2wonan,controlday3wonan,controlday8wonan,controlday9wonan,controlday10wonan,controlday15wonan,controlday16wonan,controlday17wonan] #Create a list of Day x DataFrames\n", "day1dataframes = [controlday1to2wonan,controlday1to3wonan,controlday1to8wonan,controlday1to9wonan,controlday1to10wonan,controlday1to15wonan,controlday1to16wonan,controlday1to17wonan] #Create a list of Day 1 DataFrames\n", "controlbootstrap = pd.DataFrame() #Create an empty DataFrame that will be populated with the 1000 bootstrapped r-values from each Day x\n", "\n", "for w in range(len(dayxdataframes)): #Repeat the bootstrapping process for however many days are in dayxdataframes list (8 days)\n", " numdraws=len(dayxdataframes[w]) #Draw the same number of values as the selected Day x DataFrame for each bootstrapping sample\n", " numsimulation=1000 #Simulate 1000 different iterations of Day x R-bias values\n", " controldayxbootstrap = [] #Create an empty list that will be populated with the r-values of each simulation\n", " lst = list(range(0,len(dayxdataframes[w]))) #Create a list from 0 to the length of Day x's index\n", " dayxdataframes[w].index = range(0,len(dayxdataframes[w])) #Reset Day x's index\n", " day1dataframes[w].index = range(0,len(day1dataframes[w])) #Reset Day 1's index\n", " for x in range(numsimulation): #Repeat the r-value simulation 1000 times (since numsimulation=1000)\n", " oneset = (random.choices(lst, k = numdraws)) #Randomly choose a value of Day x's index\n", " controldayxwonansamp = dayxdataframes[w].iloc[oneset] #Pull the randomly chosen index values from Day x's DataFrame \n", " controlday1toxwonansamp = day1dataframes[w].iloc[oneset] #Pull the randomly chosen index values from Day 1's DataFrame\n", " res = scipy.stats.linregress(controldayxwonansamp.iloc[:,0], controlday1toxwonansamp.iloc[:,0]) #Perform linear regression statistics on the Day x and Day 1 samples\n", " controldayxbootstrap.append(res.rvalue) #Save the r-value of the samples to the controldayxbootstrap list\n", " controldayxbootstrap.sort() #Sort the 1000 r-values in ascending order\n", " controldayxbootstrap = pd.DataFrame(controldayxbootstrap) #Turn the list into a DataFrame for concatenation\n", " controlbootstrap = pd.concat([controlbootstrap,controldayxbootstrap], axis = 1) #Concatenate the sorted r-values from each day into a DataFrame\n", "controlbootstrap.columns = [2,3,8,9,10,15,16,17] #Rename the column names with Day x's number" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dayxdataframes = [amwday2wonan,amwday3wonan,amwday8wonan,amwday9wonan,amwday10wonan,amwday15wonan,amwday16wonan,amwday17wonan] #Create a list of Day x DataFrames\n", "day1dataframes = [amwday1to2wonan,amwday1to3wonan,amwday1to8wonan,amwday1to9wonan,amwday1to10wonan,amwday1to15wonan,amwday1to16wonan,amwday1to17wonan] #Create a list of Day 1 DataFrames\n", "amwbootstrap = pd.DataFrame() #Create an empty DataFrame that will be populated with the 1000 bootstrapped r-values from each Day x\n", "\n", "for w in range(len(dayxdataframes)): #Repeat the bootstrapping process for however many days are in dayxdataframes list (8 days)\n", " numdraws=len(dayxdataframes[w]) #Draw the same number of values as the selected Day x DataFrame for each bootstrapping sample\n", " numsimulation=1000 #Simulate 1000 different iterations of Day x R-bias values\n", " amwdayxbootstrap = [] #Create an empty list that will be populated with the r-values of each simulation\n", " lst = list(range(0,len(dayxdataframes[w]))) #Create a list from 0 to the length of Day x's index\n", " dayxdataframes[w].index = range(0,len(dayxdataframes[w])) #Reset Day x's index\n", " day1dataframes[w].index = range(0,len(day1dataframes[w])) #Reset Day 1's index\n", " for x in range(numsimulation): #Repeat the r-value simulation 1000 times (since numsimulation=1000)\n", " oneset = (random.choices(lst, k = numdraws)) #Randomly choose a value of Day x's index\n", " amwdayxwonansamp = dayxdataframes[w].iloc[oneset] #Pull the randomly chosen index values from Day x's DataFrame \n", " amwday1toxwonansamp = day1dataframes[w].iloc[oneset] #Pull the randomly chosen index values from Day 1's DataFrame\n", " res = scipy.stats.linregress(amwdayxwonansamp.iloc[:,0], amwday1toxwonansamp.iloc[:,0]) #Perform linear regression statistics on the Day x and Day 1 samples\n", " amwdayxbootstrap.append(res.rvalue) #Save the r-value of the samples to the amwdayxbootstrap list\n", " amwdayxbootstrap.sort() #Sort the 1000 r-values in ascending order\n", " amwdayxbootstrap = pd.DataFrame(amwdayxbootstrap) #Turn the list into a DataFrame for concatenation\n", " amwbootstrap = pd.concat([amwbootstrap,amwdayxbootstrap], axis = 1) #Concatenate the sorted r-values from each day into a DataFrame\n", "amwbootstrap.columns = [2,3,8,9,10,15,16,17] #Rename the column names with Day x's number" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dayxdataframes = [spring5htpday2wonan,spring5htpday3wonan,spring5htpday8wonan,spring5htpday9wonan,spring5htpday10wonan,spring5htpday15wonan,spring5htpday16wonan,spring5htpday17wonan] #Create a list of Day x DataFrames\n", "day1dataframes = [spring5htpday1to2wonan,spring5htpday1to3wonan,spring5htpday1to8wonan,spring5htpday1to9wonan,spring5htpday1to10wonan,spring5htpday1to15wonan,spring5htpday1to16wonan,spring5htpday1to17wonan] #Create a list of Day 1 DataFrames\n", "spring5htpbootstrap = pd.DataFrame() #Create an empty DataFrame that will be populated with the 1000 bootstrapped r-values from each Day x\n", "\n", "for w in range(len(dayxdataframes)): #Repeat the bootstrapping process for however many days are in dayxdataframes list (8 days)\n", " numdraws=len(dayxdataframes[w]) #Draw the same number of values as the selected Day x DataFrame for each bootstrapping sample\n", " numsimulation=1000 #Simulate 1000 different iterations of Day x R-bias values\n", " spring5htpdayxbootstrap = [] #Create an empty list that will be populated with the r-values of each simulation\n", " lst = list(range(0,len(dayxdataframes[w]))) #Create a list from 0 to the length of Day x's index\n", " dayxdataframes[w].index = range(0,len(dayxdataframes[w])) #Reset Day x's index\n", " day1dataframes[w].index = range(0,len(day1dataframes[w])) #Reset Day 1's index\n", " for x in range(numsimulation): #Repeat the r-value simulation 1000 times (since numsimulation=1000)\n", " oneset = (random.choices(lst, k = numdraws)) #Randomly choose a value of Day x's index\n", " spring5htpdayxwonansamp = dayxdataframes[w].iloc[oneset] #Pull the randomly chosen index values from Day x's DataFrame \n", " spring5htpday1toxwonansamp = day1dataframes[w].iloc[oneset] #Pull the randomly chosen index values from Day 1's DataFrame\n", " res = scipy.stats.linregress(spring5htpdayxwonansamp.iloc[:,0], spring5htpday1toxwonansamp.iloc[:,0]) #Perform linear regression statistics on the Day x and Day 1 samples\n", " spring5htpdayxbootstrap.append(res.rvalue) #Save the r-value of the samples to the spring5htpdayxbootstrap list\n", " spring5htpdayxbootstrap.sort() #Sort the 1000 r-values in ascending order\n", " spring5htpdayxbootstrap = pd.DataFrame(spring5htpdayxbootstrap) #Turn the list into a DataFrame for concatenation\n", " spring5htpbootstrap = pd.concat([spring5htpbootstrap,spring5htpdayxbootstrap], axis = 1) #Concatenate the sorted r-values from each day into a DataFrame\n", "spring5htpbootstrap.columns = [2,3,8,9,10,15,16,17] #Rename the column names with Day x's number" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "plt.plot(controlbootstrap.mean(), c = '#b92024', marker = '.') #Plot the mean r-value per day of the bootstrapped samples of the control with a solid line\n", "plt.fill_between([2,3,8,9,10,15,16,17],(controlbootstrap.iloc[50]), (controlbootstrap.iloc[950]), color='#b92024', alpha=.1) #Plot the 95% CI of the of the bootstrapped samples of the control through the shaded regions\n", "\n", "plt.plot(amwbootstrap.mean(), c = '#3e5daa', marker = '.') #Plot the mean r-value per day of the bootstrapped samples of the aMW with a solid line\n", "plt.fill_between([2,3,8,9,10,15,16,17],(amwbootstrap.iloc[50]), (amwbootstrap.iloc[950]), color='#3e5daa', alpha=.1) #Plot the 95% CI of the of the bootstrapped samples of the aMW through the shaded regions\n", "\n", "plt.plot(spring5htpbootstrap.mean(),c = '#CF9FFF', marker = '.') #Plot the mean r-value per day of the bootstrapped samples of the 5-HTP with a solid line\n", "plt.fill_between([2,3,8,9,10,15,16,17],(spring5htpbootstrap.iloc[50]), (spring5htpbootstrap.iloc[950]), color = '#CF9FFF', alpha=.1) #Plot the 95% CI of the of the bootstrapped samples of the 5-HTP through the shaded regions\n", "\n", "plt.title('Bootstrapped R-values Throughout Time Compared to Day 1') #Set the title\n", "plt.xlabel('Day') #Set the x-axis label\n", "plt.legend([ 'control', 'aMW','5-HTP']) #Create a legend\n", "\n", "plt.savefig('/Users/athenaye/Desktop/Drug_Bootstrapped R-values Throughout Time Compared to Day 1.pdf', dpi=300)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 1-Day Intervals" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Scatter Plot" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### To create a scatterplot of the R-bias values between Day x and Day x+1, we must remove all NaNs from each data point" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "controlsamelabels = control #Create a DataFrame identical to the control DataFrame \n", "controlsamelabels.columns = [0,0,0,0,0,0,0,0,0] #Rename all columns as 0 (same column names makes concatenating possible)\n", "\n", "#Create separate DataFrames for each day of the experiment\n", "controlsamelabelsday1 = pd.DataFrame(controlsamelabels.iloc[:,0])\n", "controlsamelabelsday2 = pd.DataFrame(controlsamelabels.iloc[:,1])\n", "controlsamelabelsday3 = pd.DataFrame(controlsamelabels.iloc[:,2])\n", "controlsamelabelsday8 = pd.DataFrame(controlsamelabels.iloc[:,3])\n", "controlsamelabelsday9 = pd.DataFrame(controlsamelabels.iloc[:,4])\n", "controlsamelabelsday10 = pd.DataFrame(controlsamelabels.iloc[:,5])\n", "controlsamelabelsday15 = pd.DataFrame(controlsamelabels.iloc[:,6])\n", "controlsamelabelsday16 = pd.DataFrame(controlsamelabels.iloc[:,7])\n", "controlsamelabelsday17 = pd.DataFrame(controlsamelabels.iloc[:,8])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "titleofday1 = [controlsamelabelsday1,controlsamelabelsday2,controlsamelabelsday8,controlsamelabelsday9,controlsamelabelsday15,controlsamelabelsday16] #Create a list of DataFrames of all possible Day x's\n", "titleofday2 = [controlsamelabelsday2,controlsamelabelsday3,controlsamelabelsday9,controlsamelabelsday10,controlsamelabelsday16,controlsamelabelsday17] #Create a list of DataFrames of all possible Day x+1's\n", "\n", "dayxdf = pd.DataFrame() #Create an empty DataFrame for the Day x R-bias values\n", "dayxplusonedf = pd.DataFrame() #Create an empty DataFrame for the Day x+1 R-bias values\n", "\n", "for w in range(len(titleofday1)): #Repeat this process for as many possible Day x's there are\n", " nanslist = [] #Create an empty list for all NaN values to be logged\n", " \n", " nanslist = titleofday1[w].loc[titleofday1[w].isnull().any(axis=1)].index.to_list() #Create a list of all NaN index positions in Day x DataFrame \n", " nanslist1 = titleofday2[w].loc[titleofday2[w].isnull().any(axis=1)].index.to_list() #Create a list of all NaN index positions in Day x+1 DataFrame\n", " nanslist.extend(nanslist1) #Create a list that combines all the NaN index values from Day x to Day x+1\n", "\n", " controlsamelabelsday1wonan = titleofday1[w].drop(nanslist) #Drop all rows that have a NaN from the Day x DataFrame\n", " controlsamelabelsday2wonan = titleofday2[w].drop(nanslist) #Drop all rows that have a NaN from the Day x+1 DataFrame\n", " dayxdf = pd.concat([dayxdf, controlsamelabelsday1wonan]) #Concatenate new Day x DataFrames without the NaN values to each other, adding more rows\n", " dayxplusonedf = pd.concat([dayxplusonedf, controlsamelabelsday2wonan]) #Concatenate new Day x DataFrames without the NaN values to each other, adding more rows\n", "control1daydifferences = pd.concat([dayxdf, dayxplusonedf], axis = 1) #Concatenate the Day x and Day x+1 DataFrames into one, creating a DataFrame with two columns" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "amwsamelabels = amw #Create a DataFrame identical to the amw DataFrame \n", "amwsamelabels.columns = [0,0,0,0,0,0,0,0,0] #Rename all columns as 0 (same column names makes concatenating possible)\n", "\n", "#Create separate DataFrames for each day of the experiment\n", "amwsamelabelsday1 = pd.DataFrame(amwsamelabels.iloc[:,0])\n", "amwsamelabelsday2 = pd.DataFrame(amwsamelabels.iloc[:,1])\n", "amwsamelabelsday3 = pd.DataFrame(amwsamelabels.iloc[:,2])\n", "amwsamelabelsday8 = pd.DataFrame(amwsamelabels.iloc[:,3])\n", "amwsamelabelsday9 = pd.DataFrame(amwsamelabels.iloc[:,4])\n", "amwsamelabelsday10 = pd.DataFrame(amwsamelabels.iloc[:,5])\n", "amwsamelabelsday15 = pd.DataFrame(amwsamelabels.iloc[:,6])\n", "amwsamelabelsday16 = pd.DataFrame(amwsamelabels.iloc[:,7])\n", "amwsamelabelsday17 = pd.DataFrame(amwsamelabels.iloc[:,8])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "titleofday1 = [amwsamelabelsday1,amwsamelabelsday2,amwsamelabelsday8,amwsamelabelsday9,amwsamelabelsday15,amwsamelabelsday16] #Create a list of DataFrames of all possible Day x's\n", "titleofday2 = [amwsamelabelsday2,amwsamelabelsday3,amwsamelabelsday9,amwsamelabelsday10,amwsamelabelsday16,amwsamelabelsday17] #Create a list of DataFrames of all possible Day x+1's\n", "\n", "dayxdf = pd.DataFrame() #Create an empty DataFrame for the Day x R-bias values\n", "dayxplusonedf = pd.DataFrame() #Create an empty DataFrame for the Day x+1 R-bias values\n", "\n", "for w in range(len(titleofday1)): #Repeat this process for as many possible Day x's there are\n", " nanslist = [] #Create an empty list for all NaN values to be logged\n", " \n", " nanslist = titleofday1[w].loc[titleofday1[w].isnull().any(axis=1)].index.to_list() #Create a list of all NaN index positions in Day x DataFrame \n", " nanslist1 = titleofday2[w].loc[titleofday2[w].isnull().any(axis=1)].index.to_list() #Create a list of all NaN index positions in Day x+1 DataFrame\n", " nanslist.extend(nanslist1) #Create a list that combines all the NaN index values from Day x to Day x+1\n", "\n", " amwsamelabelsday1wonan = titleofday1[w].drop(nanslist) #Drop all rows that have a NaN from the Day x DataFrame\n", " amwsamelabelsday2wonan = titleofday2[w].drop(nanslist) #Drop all rows that have a NaN from the Day x+1 DataFrame\n", " dayxdf = pd.concat([dayxdf, amwsamelabelsday1wonan]) #Concatenate new Day x DataFrames without the NaN values to each other, adding more rows\n", " dayxplusonedf = pd.concat([dayxplusonedf, amwsamelabelsday2wonan]) #Concatenate new Day x DataFrames without the NaN values to each other, adding more rows\n", "amw1daydifferences = pd.concat([dayxdf, dayxplusonedf], axis = 1) #Concatenate the Day x and Day x+1 DataFrames into one, creating a DataFrame with two columns" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "spring5htpsamelabels = spring5htp #Create a DataFrame identical to the spring5htp DataFrame \n", "spring5htpsamelabels.columns = [0,0,0,0,0,0,0,0,0] #Rename all columns as 0 (same column names makes concatenating possible)\n", "\n", "#Create separate DataFrames for each day of the experiment\n", "spring5htpsamelabelsday1 = pd.DataFrame(spring5htpsamelabels.iloc[:,0])\n", "spring5htpsamelabelsday2 = pd.DataFrame(spring5htpsamelabels.iloc[:,1])\n", "spring5htpsamelabelsday3 = pd.DataFrame(spring5htpsamelabels.iloc[:,2])\n", "spring5htpsamelabelsday8 = pd.DataFrame(spring5htpsamelabels.iloc[:,3])\n", "spring5htpsamelabelsday9 = pd.DataFrame(spring5htpsamelabels.iloc[:,4])\n", "spring5htpsamelabelsday10 = pd.DataFrame(spring5htpsamelabels.iloc[:,5])\n", "spring5htpsamelabelsday15 = pd.DataFrame(spring5htpsamelabels.iloc[:,6])\n", "spring5htpsamelabelsday16 = pd.DataFrame(spring5htpsamelabels.iloc[:,7])\n", "spring5htpsamelabelsday17 = pd.DataFrame(spring5htpsamelabels.iloc[:,8])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "titleofday1 = [spring5htpsamelabelsday1,spring5htpsamelabelsday2,spring5htpsamelabelsday8,spring5htpsamelabelsday9,spring5htpsamelabelsday15,spring5htpsamelabelsday16] #Create a list of DataFrames of all possible Day x's\n", "titleofday2 = [spring5htpsamelabelsday2,spring5htpsamelabelsday3,spring5htpsamelabelsday9,spring5htpsamelabelsday10,spring5htpsamelabelsday16,spring5htpsamelabelsday17] #Create a list of DataFrames of all possible Day x+1's\n", "\n", "dayxdf = pd.DataFrame() #Create an empty DataFrame for the Day x R-bias values\n", "dayxplusonedf = pd.DataFrame() #Create an empty DataFrame for the Day x+1 R-bias values\n", "\n", "for w in range(len(titleofday1)): #Repeat this process for as many possible Day x's there are\n", " nanslist = [] #Create an empty list for all NaN values to be logged\n", " \n", " nanslist = titleofday1[w].loc[titleofday1[w].isnull().any(axis=1)].index.to_list() #Create a list of all NaN index positions in Day x DataFrame \n", " nanslist1 = titleofday2[w].loc[titleofday2[w].isnull().any(axis=1)].index.to_list() #Create a list of all NaN index positions in Day x+1 DataFrame\n", " nanslist.extend(nanslist1) #Create a list that combines all the NaN index values from Day x to Day x+1\n", "\n", " spring5htpsamelabelsday1wonan = titleofday1[w].drop(nanslist) #Drop all rows that have a NaN from the Day x DataFrame\n", " spring5htpsamelabelsday2wonan = titleofday2[w].drop(nanslist) #Drop all rows that have a NaN from the Day x+1 DataFrame\n", " dayxdf = pd.concat([dayxdf, spring5htpsamelabelsday1wonan]) #Concatenate new Day x DataFrames without the NaN values to each other, adding more rows\n", " dayxplusonedf = pd.concat([dayxplusonedf, spring5htpsamelabelsday2wonan]) #Concatenate new Day x DataFrames without the NaN values to each other, adding more rows\n", "spring5htp1daydifferences = pd.concat([dayxdf, dayxplusonedf], axis = 1) #Concatenate the Day x and Day x+1 DataFrames into one, creating a DataFrame with two columns" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sns.regplot(x=control1daydifferences.iloc[:,0], y=control1daydifferences.iloc[:,1], color = '#b92024', marker = '.') #Plot the linear regression fit of the control DataFrame\n", "sns.regplot(x=amw1daydifferences.iloc[:,0], y=amw1daydifferences.iloc[:,1], color = '#3e5daa', marker = '.') #Plot the linear regression fit of the aMW DataFrame\n", "sns.regplot(x=spring5htp1daydifferences.iloc[:,0], y=spring5htp1daydifferences.iloc[:,1], color = '#b56dff', marker = '.') #Plot the linear regression fit of the 5-HTP DataFrame\n", "\n", "plt.savefig('/Users/athenaye/Desktop/Drug_Scatterplot.pdf', dpi=300)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Violin Plot" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "controlbootstrap" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pd.DataFrame.iteritems = pd.DataFrame.items #Pandas 2.0 uses .items instead of .iteritems. Because this was written with a previous Pandas version, this line remedies an Attribute Error\n", "\n", "#Control\n", "numdraws=len(control1daydifferences) #Draw the same number of values as the selected DataFrame for each bootstrapping sample\n", "numsimulation=1000 #Simulate 1000 different iterations of Day x R-bias values\n", "controlbootstrap = [] #Create an empty list to populate with r-values of the bootstrapped samples\n", "controldayx = pd.DataFrame(control1daydifferences.iloc[:,0]) #Create a DataFrame of all control Day x R-Bias values\n", "controldayxplusone = pd.DataFrame(control1daydifferences.iloc[:,1]) #Create a DataFrame of all control Day x+1 R-Bias values\n", "lst = list(range(len(control1daydifferences))) #Create a list of values from 0 to the length of Day x's index\n", "for x in range(numsimulation): #Repeat the r-value simulation 1000 times (since numsimulation=1000)\n", " oneset = (random.choices(lst, k = numdraws)) #Randomly choose a value of the index from control1daydifferences DataFrame\n", " controldayxplusonesamp = controldayxplusone.iloc[oneset] #Pull the randomly chosen index values from Day x+1's DataFrame \n", " controldayxsamp = controldayx.iloc[oneset] #Pull the randomly chosen index values from Day x's DataFrame \n", " res = scipy.stats.linregress(controldayxsamp.iloc[:,0], controldayxplusonesamp.iloc[:,0]) #Perform linear regression statistics on the Day x and Day x+1 samples\n", " controlbootstrap.append(res.rvalue) #Save the r-value of the samples to the controlbootstrap list\n", " \n", "controlbootstrap.sort() #Sort the 1000 r-values in ascending order\n", "controlbootstrap = pd.DataFrame(controlbootstrap) #Convert controlbootstrap list to a DataFrame for further manipulation\n", "\n", "#aMW\n", "numdraws=len(amw1daydifferences) #Draw the same number of values as the selected DataFrame for each bootstrapping sample\n", "numsimulation=1000 #Simulate 1000 different iterations of Day x R-bias values\n", "amwbootstrap = [] #Create an empty list to populate with r-values of the bootstrapped samples\n", "amwdayx = pd.DataFrame(amw1daydifferences.iloc[:,0]) #Create a DataFrame of all amw Day x R-Bias values\n", "amwdayxplusone = pd.DataFrame(amw1daydifferences.iloc[:,1]) #Create a DataFrame of all amw Day x+1 R-Bias values\n", "lst = list(range(len(amw1daydifferences))) #Create a list of values from 0 to the length of Day x's index\n", "for x in range(numsimulation): #Repeat the r-value simulation 1000 times (since numsimulation=1000)\n", " oneset = (random.choices(lst, k = numdraws)) #Randomly choose a value of the index from amw1daydifferences DataFrame\n", " amwdayxplusonesamp = amwdayxplusone.iloc[oneset] #Pull the randomly chosen index values from Day x+1's DataFrame \n", " amwdayxsamp = amwdayx.iloc[oneset] #Pull the randomly chosen index values from Day x's DataFrame \n", " res = scipy.stats.linregress(amwdayxsamp.iloc[:,0], amwdayxplusonesamp.iloc[:,0]) #Perform linear regression statistics on the Day x and Day x+1 samples\n", " amwbootstrap.append(res.rvalue) #Save the r-value of the samples to the amwbootstrap list\n", " \n", "amwbootstrap.sort() #Sort the 1000 r-values in ascending order\n", "amwbootstrap = pd.DataFrame(amwbootstrap) #Convert amwbootstrap list to a DataFrame for further manipulation\n", "\n", "#spring5htp\n", "numdraws=len(spring5htp1daydifferences) #Draw the same number of values as the selected DataFrame for each bootstrapping sample\n", "numsimulation=1000 #Simulate 1000 different iterations of Day x R-bias values\n", "spring5htpbootstrap = [] #Create an empty list to populate with r-values of the bootstrapped samples\n", "spring5htpdayx = pd.DataFrame(spring5htp1daydifferences.iloc[:,0]) #Create a DataFrame of all spring5htp Day x R-Bias values\n", "spring5htpdayxplusone = pd.DataFrame(spring5htp1daydifferences.iloc[:,1]) #Create a DataFrame of all spring5htp Day x+1 R-Bias values\n", "lst = list(range(len(spring5htp1daydifferences))) #Create a list of values from 0 to the length of Day x's index\n", "for x in range(numsimulation): #Repeat the r-value simulation 1000 times (since numsimulation=1000)\n", " oneset = (random.choices(lst, k = numdraws)) #Randomly choose a value of the index from spring5htp1daydifferences DataFrame\n", " spring5htpdayxplusonesamp = spring5htpdayxplusone.iloc[oneset] #Pull the randomly chosen index values from Day x+1's DataFrame \n", " spring5htpdayxsamp = spring5htpdayx.iloc[oneset] #Pull the randomly chosen index values from Day x's DataFrame \n", " res = scipy.stats.linregress(spring5htpdayxsamp.iloc[:,0], spring5htpdayxplusonesamp.iloc[:,0]) #Perform linear regression statistics on the Day x and Day x+1 samples\n", " spring5htpbootstrap.append(res.rvalue) #Save the r-value of the samples to the spring5htpbootstrap list\n", " \n", "spring5htpbootstrap.sort() #Sort the 1000 r-values in ascending order\n", "spring5htpbootstrap = pd.DataFrame(spring5htpbootstrap) #Convert spring5htpbootstrap list to a DataFrame for further manipulation\n", "\n", "my_pal = {\"aMW\": \"#3e5daa\", \"Control\": \"#b92024\", \"5-HTP\": \"#CF9FFF\"} #Set color palette\n", "differences1day = pd.concat([amwbootstrap,controlbootstrap,spring5htpbootstrap], axis = 1) #Concatenate the bootstrapped r-values into one DataFrame that has 3 columns\n", "differences1day.columns = ['aMW', 'Control', '5-HTP'] #Name the columns\n", "sns.violinplot(data = differences1day, palette = my_pal).set_title('1 Day Difference') #Output a violin plot of bootstrapped r-values\n", "\n", "plt.savefig('/Users/athenaye/Desktop/Drug_ViolinPlot.pdf', dpi=300)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "differences1day.mean()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# CRISPR Experiments" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "controlofTrH = pd.read_csv('/Need to Download for Supplemental/CRISPR Experiments/Control of TrH/1.csv') #Load R-Bias control values as DataFrame\n", "controlofTrH.columns = [1,2,3,8,9,10,15,16,17] #Name columns based on experimental day\n", "\n", "trh = pd.read_csv('/Need to Download for Supplemental/CRISPR Experiments/TrH/1.csv') #Load R-Bias TrH values as DataFrame\n", "trh.columns = [1,2,3,8,9,10,15,16,17] #Name columns based on experimental day" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Preliminary Clean-Up" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Control" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Create a DataFrame for each experimental day of the controls\n", "controlofTrHday1 = pd.DataFrame(controlofTrH.iloc[:,0])\n", "controlofTrHday2 = pd.DataFrame(controlofTrH.iloc[:,1])\n", "controlofTrHday3 = pd.DataFrame(controlofTrH.iloc[:,2])\n", "controlofTrHday8 = pd.DataFrame(controlofTrH.iloc[:,3])\n", "controlofTrHday9 = pd.DataFrame(controlofTrH.iloc[:,4])\n", "controlofTrHday10 = pd.DataFrame(controlofTrH.iloc[:,5])\n", "controlofTrHday15 = pd.DataFrame(controlofTrH.iloc[:,6])\n", "controlofTrHday16 = pd.DataFrame(controlofTrH.iloc[:,7])\n", "controlofTrHday17 = pd.DataFrame(controlofTrH.iloc[:,8])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "nanscontrolofTrH1nanvalues = controlofTrHday1.loc[pd.isna(controlofTrHday1[1]), :].index.to_frame() #Collect the index numbers of NaN values in the controlofTrH Day 1 R-Bias values\n", "controlofTrHrvalue = [] # Creates an empty list to store the R-values\n", "\n", "#Day 2\n", "nanscontrolofTrH2nanvalues = controlofTrHday2.loc[pd.isna(controlofTrHday2[2]), :].index.to_frame() #Collect the index numbers of NaN values in the controlofTrH Day 2 R-Bias values\n", "nanscontrolofTrHday1to2wonan = pd.concat([nanscontrolofTrH2nanvalues, nanscontrolofTrH1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlofTrHday2wonan = controlofTrHday2.drop(nanscontrolofTrHday1to2wonan[0]) #Drop the list of NaN values from controlofTrH Day 2 R-Bias values\n", "controlofTrHday1to2wonan = controlofTrHday1.drop(nanscontrolofTrHday1to2wonan[0]) #Drop the list of NaN values from controlofTrH Day 2 R-Bias values\n", "\n", "#Day 3\n", "nanscontrolofTrH3nanvalues = controlofTrHday3.loc[pd.isna(controlofTrHday3[3]), :].index.to_frame() #Collect the index numbers of NaN values in the controlofTrH Day 3 R-Bias values\n", "nanscontrolofTrHday1to3wonan = pd.concat([nanscontrolofTrH3nanvalues, nanscontrolofTrH1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlofTrHday3wonan = controlofTrHday3.drop(nanscontrolofTrHday1to3wonan[0]) #Drop the list of NaN values from controlofTrH Day 3 R-Bias values\n", "controlofTrHday1to3wonan = controlofTrHday1.drop(nanscontrolofTrHday1to3wonan[0]) #Drop the list of NaN values from controlofTrH Day 3 R-Bias values\n", "\n", "#Day 8\n", "nanscontrolofTrH8nanvalues = controlofTrHday8.loc[pd.isna(controlofTrHday8[8]), :].index.to_frame() #Collect the index numbers of NaN values in the controlofTrH Day 8 R-Bias values\n", "nanscontrolofTrHday1to8wonan = pd.concat([nanscontrolofTrH8nanvalues, nanscontrolofTrH1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlofTrHday8wonan = controlofTrHday8.drop(nanscontrolofTrHday1to8wonan[0]) #Drop the list of NaN values from controlofTrH Day 8 R-Bias values\n", "controlofTrHday1to8wonan = controlofTrHday1.drop(nanscontrolofTrHday1to8wonan[0]) #Drop the list of NaN values from controlofTrH Day 8 R-Bias values\n", "\n", "#Day 9\n", "nanscontrolofTrH9nanvalues = controlofTrHday9.loc[pd.isna(controlofTrHday9[9]), :].index.to_frame() #Collect the index numbers of NaN values in the controlofTrH Day 9 R-Bias values\n", "nanscontrolofTrHday1to9wonan = pd.concat([nanscontrolofTrH9nanvalues, nanscontrolofTrH1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlofTrHday9wonan = controlofTrHday9.drop(nanscontrolofTrHday1to9wonan[0]) #Drop the list of NaN values from controlofTrH Day 9 R-Bias values\n", "controlofTrHday1to9wonan = controlofTrHday1.drop(nanscontrolofTrHday1to9wonan[0]) #Drop the list of NaN values from controlofTrH Day 9 R-Bias values\n", "\n", "#Day 10\n", "nanscontrolofTrH10nanvalues = controlofTrHday10.loc[pd.isna(controlofTrHday10[10]), :].index.to_frame() #Collect the index numbers of NaN values in the controlofTrH Day 10 R-Bias values\n", "nanscontrolofTrHday1to10wonan = pd.concat([nanscontrolofTrH10nanvalues, nanscontrolofTrH1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlofTrHday10wonan = controlofTrHday10.drop(nanscontrolofTrHday1to10wonan[0]) #Drop the list of NaN values from controlofTrH Day 10 R-Bias values\n", "controlofTrHday1to10wonan = controlofTrHday1.drop(nanscontrolofTrHday1to10wonan[0]) #Drop the list of NaN values from controlofTrH Day 10 R-Bias values\n", "\n", "#Day 15\n", "nanscontrolofTrH15nanvalues = controlofTrHday15.loc[pd.isna(controlofTrHday15[15]), :].index.to_frame() #Collect the index numbers of NaN values in the controlofTrH Day 15 R-Bias values\n", "nanscontrolofTrHday1to15wonan = pd.concat([nanscontrolofTrH15nanvalues, nanscontrolofTrH1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlofTrHday15wonan = controlofTrHday15.drop(nanscontrolofTrHday1to15wonan[0]) #Drop the list of NaN values from controlofTrH Day 15 R-Bias values\n", "controlofTrHday1to15wonan = controlofTrHday1.drop(nanscontrolofTrHday1to15wonan[0]) #Drop the list of NaN values from controlofTrH Day 15 R-Bias values\n", "\n", "#Day 16\n", "nanscontrolofTrH16nanvalues = controlofTrHday16.loc[pd.isna(controlofTrHday16[16]), :].index.to_frame() #Collect the index numbers of NaN values in the controlofTrH Day 16 R-Bias values\n", "nanscontrolofTrHday1to16wonan = pd.concat([nanscontrolofTrH16nanvalues, nanscontrolofTrH1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlofTrHday16wonan = controlofTrHday16.drop(nanscontrolofTrHday1to16wonan[0]) #Drop the list of NaN values from controlofTrH Day 16 R-Bias values\n", "controlofTrHday1to16wonan = controlofTrHday1.drop(nanscontrolofTrHday1to16wonan[0]) #Drop the list of NaN values from controlofTrH Day 16 R-Bias values\n", "\n", "#Day 17\n", "nanscontrolofTrH17nanvalues = controlofTrHday17.loc[pd.isna(controlofTrHday17[17]), :].index.to_frame() #Collect the index numbers of NaN values in the controlofTrH Day 17 R-Bias values\n", "nanscontrolofTrHday1to17wonan = pd.concat([nanscontrolofTrH17nanvalues, nanscontrolofTrH1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "controlofTrHday17wonan = controlofTrHday17.drop(nanscontrolofTrHday1to17wonan[0]) #Drop the list of NaN values from controlofTrH Day 17 R-Bias values\n", "controlofTrHday1to17wonan = controlofTrHday1.drop(nanscontrolofTrHday1to17wonan[0]) #Drop the list of NaN values from controlofTrH Day 17 R-Bias values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### TrH" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Create a DataFrame for each experimental day of the TrH experimental group\n", "trhday1 = pd.DataFrame(trh.iloc[:,0])\n", "trhday2 = pd.DataFrame(trh.iloc[:,1])\n", "trhday3 = pd.DataFrame(trh.iloc[:,2])\n", "trhday8 = pd.DataFrame(trh.iloc[:,3])\n", "trhday9 = pd.DataFrame(trh.iloc[:,4])\n", "trhday10 = pd.DataFrame(trh.iloc[:,5])\n", "trhday15 = pd.DataFrame(trh.iloc[:,6])\n", "trhday16 = pd.DataFrame(trh.iloc[:,7])\n", "trhday17 = pd.DataFrame(trh.iloc[:,8])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "nanstrh1nanvalues = trhday1.loc[pd.isna(trhday1[1]), :].index.to_frame() #Collect the index numbers of NaN values in the trh Day 1 R-Bias values\n", "trhrvalue = [] # Creates an empty list to store the R-values\n", "\n", "#Day 2\n", "nanstrh2nanvalues = trhday2.loc[pd.isna(trhday2[2]), :].index.to_frame() #Collect the index numbers of NaN values in the trh Day 2 R-Bias values\n", "nanstrhday1to2wonan = pd.concat([nanstrh2nanvalues, nanstrh1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "trhday2wonan = trhday2.drop(nanstrhday1to2wonan[0]) #Drop the list of NaN values from trh Day 2 R-Bias values\n", "trhday1to2wonan = trhday1.drop(nanstrhday1to2wonan[0]) #Drop the list of NaN values from trh Day 2 R-Bias values\n", "\n", "#Day 3\n", "nanstrh3nanvalues = trhday3.loc[pd.isna(trhday3[3]), :].index.to_frame() #Collect the index numbers of NaN values in the trh Day 3 R-Bias values\n", "nanstrhday1to3wonan = pd.concat([nanstrh3nanvalues, nanstrh1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "trhday3wonan = trhday3.drop(nanstrhday1to3wonan[0]) #Drop the list of NaN values from trh Day 3 R-Bias values\n", "trhday1to3wonan = trhday1.drop(nanstrhday1to3wonan[0]) #Drop the list of NaN values from trh Day 3 R-Bias values\n", "\n", "#Day 8\n", "nanstrh8nanvalues = trhday8.loc[pd.isna(trhday8[8]), :].index.to_frame() #Collect the index numbers of NaN values in the trh Day 8 R-Bias values\n", "nanstrhday1to8wonan = pd.concat([nanstrh8nanvalues, nanstrh1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "trhday8wonan = trhday8.drop(nanstrhday1to8wonan[0]) #Drop the list of NaN values from trh Day 8 R-Bias values\n", "trhday1to8wonan = trhday1.drop(nanstrhday1to8wonan[0]) #Drop the list of NaN values from trh Day 8 R-Bias values\n", "\n", "#Day 9\n", "nanstrh9nanvalues = trhday9.loc[pd.isna(trhday9[9]), :].index.to_frame() #Collect the index numbers of NaN values in the trh Day 9 R-Bias values\n", "nanstrhday1to9wonan = pd.concat([nanstrh9nanvalues, nanstrh1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "trhday9wonan = trhday9.drop(nanstrhday1to9wonan[0]) #Drop the list of NaN values from trh Day 9 R-Bias values\n", "trhday1to9wonan = trhday1.drop(nanstrhday1to9wonan[0]) #Drop the list of NaN values from trh Day 9 R-Bias values\n", "\n", "#Day 10\n", "nanstrh10nanvalues = trhday10.loc[pd.isna(trhday10[10]), :].index.to_frame() #Collect the index numbers of NaN values in the trh Day 10 R-Bias values\n", "nanstrhday1to10wonan = pd.concat([nanstrh10nanvalues, nanstrh1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "trhday10wonan = trhday10.drop(nanstrhday1to10wonan[0]) #Drop the list of NaN values from trh Day 10 R-Bias values\n", "trhday1to10wonan = trhday1.drop(nanstrhday1to10wonan[0]) #Drop the list of NaN values from trh Day 10 R-Bias values\n", "\n", "#Day 15\n", "nanstrh15nanvalues = trhday15.loc[pd.isna(trhday15[15]), :].index.to_frame() #Collect the index numbers of NaN values in the trh Day 15 R-Bias values\n", "nanstrhday1to15wonan = pd.concat([nanstrh15nanvalues, nanstrh1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "trhday15wonan = trhday15.drop(nanstrhday1to15wonan[0]) #Drop the list of NaN values from trh Day 15 R-Bias values\n", "trhday1to15wonan = trhday1.drop(nanstrhday1to15wonan[0]) #Drop the list of NaN values from trh Day 15 R-Bias values\n", "\n", "#Day 16\n", "nanstrh16nanvalues = trhday16.loc[pd.isna(trhday16[16]), :].index.to_frame() #Collect the index numbers of NaN values in the trh Day 16 R-Bias values\n", "nanstrhday1to16wonan = pd.concat([nanstrh16nanvalues, nanstrh1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "trhday16wonan = trhday16.drop(nanstrhday1to16wonan[0]) #Drop the list of NaN values from trh Day 16 R-Bias values\n", "trhday1to16wonan = trhday1.drop(nanstrhday1to16wonan[0]) #Drop the list of NaN values from trh Day 16 R-Bias values\n", "\n", "#Day 17\n", "nanstrh17nanvalues = trhday17.loc[pd.isna(trhday17[17]), :].index.to_frame() #Collect the index numbers of NaN values in the trh Day 17 R-Bias values\n", "nanstrhday1to17wonan = pd.concat([nanstrh17nanvalues, nanstrh1nanvalues]).drop_duplicates() #Create a DataFrame that combines these NaN values and drops the duplicates between the two days\n", "\n", "trhday17wonan = trhday17.drop(nanstrhday1to17wonan[0]) #Drop the list of NaN values from trh Day 17 R-Bias values\n", "trhday1to17wonan = trhday1.drop(nanstrhday1to17wonan[0]) #Drop the list of NaN values from trh Day 17 R-Bias values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## R-values Throughout Time Compared to Day 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dayxdataframes = [controlofTrHday2wonan,controlofTrHday3wonan,controlofTrHday8wonan,controlofTrHday9wonan,controlofTrHday10wonan,controlofTrHday15wonan,controlofTrHday16wonan,controlofTrHday17wonan] #Create a list of Day x DataFrames\n", "day1dataframes = [controlofTrHday1to2wonan,controlofTrHday1to3wonan,controlofTrHday1to8wonan,controlofTrHday1to9wonan,controlofTrHday1to10wonan,controlofTrHday1to15wonan,controlofTrHday1to16wonan,controlofTrHday1to17wonan] #Create a list of Day 1 DataFrames\n", "controlofTrHbootstrap = pd.DataFrame() #Create an empty DataFrame that will be populated with the 1000 bootstrapped r-values from each Day x\n", "\n", "for w in range(len(dayxdataframes)): #Repeat the bootstrapping process for however many days are in dayxdataframes list (8 days)\n", " numdraws=len(dayxdataframes[w]) #Draw the same number of values as the selected Day x DataFrame for each bootstrapping sample\n", " numsimulation=1000 #Simulate 1000 different iterations of Day x R-bias values\n", " controlofTrHdayxbootstrap = [] #Create an empty list that will be populated with the r-values of each simulation\n", " lst = list(range(0,len(dayxdataframes[w]))) #Create a list from 0 to the length of Day x's index\n", " dayxdataframes[w].index = range(0,len(dayxdataframes[w])) #Reset Day x's index\n", " day1dataframes[w].index = range(0,len(day1dataframes[w])) #Reset Day 1's index\n", " for x in range(numsimulation): #Repeat the r-value simulation 1000 times (since numsimulation=1000)\n", " oneset = (random.choices(lst, k = numdraws)) #Randomly choose a value of Day x's index\n", " controlofTrHdayxwonansamp = dayxdataframes[w].iloc[oneset] #Pull the randomly chosen index values from Day x's DataFrame \n", " controlofTrHday1toxwonansamp = day1dataframes[w].iloc[oneset] #Pull the randomly chosen index values from Day 1's DataFrame\n", " res = scipy.stats.linregress(controlofTrHdayxwonansamp.iloc[:,0], controlofTrHday1toxwonansamp.iloc[:,0]) #Perform linear regression statistics on the Day x and Day 1 samples\n", " controlofTrHdayxbootstrap.append(res.rvalue) #Save the r-value of the samples to the controlofTrHdayxbootstrap list\n", " controlofTrHdayxbootstrap.sort() #Sort the 1000 r-values in ascending order\n", " controlofTrHdayxbootstrap = pd.DataFrame(controlofTrHdayxbootstrap) #Turn the list into a DataFrame for concatenation\n", " controlofTrHbootstrap = pd.concat([controlofTrHbootstrap,controlofTrHdayxbootstrap], axis = 1) #Concatenate the sorted r-values from each day into a DataFrame\n", "controlofTrHbootstrap.columns = [2,3,8,9,10,15,16,17] #Rename the column names with Day x's number" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dayxdataframes = [trhday2wonan,trhday3wonan,trhday8wonan,trhday9wonan,trhday10wonan,trhday15wonan,trhday16wonan,trhday17wonan] #Create a list of Day x DataFrames\n", "day1dataframes = [trhday1to2wonan,trhday1to3wonan,trhday1to8wonan,trhday1to9wonan,trhday1to10wonan,trhday1to15wonan,trhday1to16wonan,trhday1to17wonan] #Create a list of Day 1 DataFrames\n", "trhbootstrap = pd.DataFrame() #Create an empty DataFrame that will be populated with the 1000 bootstrapped r-values from each Day x\n", "\n", "for w in range(len(dayxdataframes)): #Repeat the bootstrapping process for however many days are in dayxdataframes list (8 days)\n", " numdraws=len(dayxdataframes[w]) #Draw the same number of values as the selected Day x DataFrame for each bootstrapping sample\n", " numsimulation=1000 #Simulate 1000 different iterations of Day x R-bias values\n", " trhdayxbootstrap = [] #Create an empty list that will be populated with the r-values of each simulation\n", " lst = list(range(0,len(dayxdataframes[w]))) #Create a list from 0 to the length of Day x's index\n", " dayxdataframes[w].index = range(0,len(dayxdataframes[w])) #Reset Day x's index\n", " day1dataframes[w].index = range(0,len(day1dataframes[w])) #Reset Day 1's index\n", " for x in range(numsimulation): #Repeat the r-value simulation 1000 times (since numsimulation=1000)\n", " oneset = (random.choices(lst, k = numdraws)) #Randomly choose a value of Day x's index\n", " trhdayxwonansamp = dayxdataframes[w].iloc[oneset] #Pull the randomly chosen index values from Day x's DataFrame \n", " trhday1toxwonansamp = day1dataframes[w].iloc[oneset] #Pull the randomly chosen index values from Day 1's DataFrame\n", " res = scipy.stats.linregress(trhdayxwonansamp.iloc[:,0], trhday1toxwonansamp.iloc[:,0]) #Perform linear regression statistics on the Day x and Day 1 samples\n", " trhdayxbootstrap.append(res.rvalue) #Save the r-value of the samples to the trhdayxbootstrap list\n", " trhdayxbootstrap.sort() #Sort the 1000 r-values in ascending order\n", " trhdayxbootstrap = pd.DataFrame(trhdayxbootstrap) #Turn the list into a DataFrame for concatenation\n", " trhbootstrap = pd.concat([trhbootstrap,trhdayxbootstrap], axis = 1) #Concatenate the sorted r-values from each day into a DataFrame\n", "trhbootstrap.columns = [2,3,8,9,10,15,16,17] #Rename the column names with Day x's numbe" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.plot(controlofTrHbootstrap.mean(), c = '#b92024', marker = '.') \n", "plt.fill_between([2,3,8,9,10,15,16,17],(controlofTrHbootstrap.iloc[50]), (controlofTrHbootstrap.iloc[950]), color='#b92024', alpha=.1) \n", "\n", "plt.plot(trhbootstrap.mean(), c = '#656565', marker = '.') #Plot the mean r-value per day of the bootstrapped samples of the TrH group with a solid line\n", "plt.fill_between([2,3,8,9,10,15,16,17],(trhbootstrap.iloc[50]), (trhbootstrap.iloc[950]), color='#3e5daa', alpha=.1) #Plot the 95% CI of the of the bootstrapped samples of the TrH group through the shaded regions\n", "\n", "plt.title('Bootstrapped R-Values Compared to Day 1') #Set the title\n", "plt.xlabel('Day') #Set the x-axis label\n", "plt.legend([ 'control', 'TrH']) #Create a legend\n", "\n", "plt.savefig('/Users/athenaye/Desktop/CRISPR_Bootstrapped R-Values Compared to Day 1.pdf', dpi=300)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Scatter Plot" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "controlofTrHsamelabels = controlofTrH #Create a DataFrame identical to the controlofTrH DataFrame \n", "controlofTrHsamelabels.columns = [0,0,0,0,0,0,0,0,0] #Rename all columns as 0 (same column names makes concatenating possible)\n", "\n", "#Create separate DataFrames for each day of the experiment\n", "controlofTrHsamelabelsday1 = pd.DataFrame(controlofTrHsamelabels.iloc[:,0])\n", "controlofTrHsamelabelsday2 = pd.DataFrame(controlofTrHsamelabels.iloc[:,1])\n", "controlofTrHsamelabelsday3 = pd.DataFrame(controlofTrHsamelabels.iloc[:,2])\n", "controlofTrHsamelabelsday8 = pd.DataFrame(controlofTrHsamelabels.iloc[:,3])\n", "controlofTrHsamelabelsday9 = pd.DataFrame(controlofTrHsamelabels.iloc[:,4])\n", "controlofTrHsamelabelsday10 = pd.DataFrame(controlofTrHsamelabels.iloc[:,5])\n", "controlofTrHsamelabelsday15 = pd.DataFrame(controlofTrHsamelabels.iloc[:,6])\n", "controlofTrHsamelabelsday16 = pd.DataFrame(controlofTrHsamelabels.iloc[:,7])\n", "controlofTrHsamelabelsday17 = pd.DataFrame(controlofTrHsamelabels.iloc[:,8])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "titleofday1 = [controlofTrHsamelabelsday1,controlofTrHsamelabelsday2,controlofTrHsamelabelsday8,controlofTrHsamelabelsday9,controlofTrHsamelabelsday15,controlofTrHsamelabelsday16] #Create a list of DataFrames of all possible Day x's\n", "titleofday2 = [controlofTrHsamelabelsday2,controlofTrHsamelabelsday3,controlofTrHsamelabelsday9,controlofTrHsamelabelsday10,controlofTrHsamelabelsday16,controlofTrHsamelabelsday17] #Create a list of DataFrames of all possible Day x+1's\n", "\n", "dayxdf = pd.DataFrame() #Create an empty DataFrame for the Day x R-bias values\n", "dayxplusonedf = pd.DataFrame() #Create an empty DataFrame for the Day x+1 R-bias values\n", "\n", "for w in range(len(titleofday1)): #Repeat this process for as many possible Day x's there are\n", " nanslist = [] #Create an empty list for all NaN values to be logged\n", " \n", " nanslist = titleofday1[w].loc[titleofday1[w].isnull().any(axis=1)].index.to_list() #Create a list of all NaN index positions in Day x DataFrame \n", " nanslist1 = titleofday2[w].loc[titleofday2[w].isnull().any(axis=1)].index.to_list() #Create a list of all NaN index positions in Day x+1 DataFrame\n", " nanslist.extend(nanslist1) #Create a list that combines all the NaN index values from Day x to Day x+1\n", "\n", " controlofTrHsamelabelsday1wonan = titleofday1[w].drop(nanslist) #Drop all rows that have a NaN from the Day x DataFrame\n", " controlofTrHsamelabelsday2wonan = titleofday2[w].drop(nanslist) #Drop all rows that have a NaN from the Day x+1 DataFrame\n", " dayxdf = pd.concat([dayxdf, controlofTrHsamelabelsday1wonan]) #Concatenate new Day x DataFrames without the NaN values to each other, adding more rows\n", " dayxplusonedf = pd.concat([dayxplusonedf, controlofTrHsamelabelsday2wonan]) #Concatenate new Day x DataFrames without the NaN values to each other, adding more rows\n", "controlofTrH1daydifferences = pd.concat([dayxdf, dayxplusonedf], axis = 1) #Concatenate the Day x and Day x+1 DataFrames into one, creating a DataFrame with two columns" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "trhsamelabels = trh #Create a DataFrame identical to the trh DataFrame \n", "trhsamelabels.columns = [0,0,0,0,0,0,0,0,0] #Rename all columns as 0 (same column names makes concatenating possible)\n", "\n", "#Create separate DataFrames for each day of the experiment\n", "trhsamelabelsday1 = pd.DataFrame(trhsamelabels.iloc[:,0])\n", "trhsamelabelsday2 = pd.DataFrame(trhsamelabels.iloc[:,1])\n", "trhsamelabelsday3 = pd.DataFrame(trhsamelabels.iloc[:,2])\n", "trhsamelabelsday8 = pd.DataFrame(trhsamelabels.iloc[:,3])\n", "trhsamelabelsday9 = pd.DataFrame(trhsamelabels.iloc[:,4])\n", "trhsamelabelsday10 = pd.DataFrame(trhsamelabels.iloc[:,5])\n", "trhsamelabelsday15 = pd.DataFrame(trhsamelabels.iloc[:,6])\n", "trhsamelabelsday16 = pd.DataFrame(trhsamelabels.iloc[:,7])\n", "trhsamelabelsday17 = pd.DataFrame(trhsamelabels.iloc[:,8])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "titleofday1 = [trhsamelabelsday1,trhsamelabelsday2,trhsamelabelsday8,trhsamelabelsday9,trhsamelabelsday15,trhsamelabelsday16] #Create a list of DataFrames of all possible Day x's\n", "titleofday2 = [trhsamelabelsday2,trhsamelabelsday3,trhsamelabelsday9,trhsamelabelsday10,trhsamelabelsday16,trhsamelabelsday17] #Create a list of DataFrames of all possible Day x+1's\n", "\n", "dayxdf = pd.DataFrame() #Create an empty DataFrame for the Day x R-bias values\n", "dayxplusonedf = pd.DataFrame() #Create an empty DataFrame for the Day x+1 R-bias values\n", "\n", "for w in range(len(titleofday1)): #Repeat this process for as many possible Day x's there are\n", " nanslist = [] #Create an empty list for all NaN values to be logged\n", " \n", " nanslist = titleofday1[w].loc[titleofday1[w].isnull().any(axis=1)].index.to_list() #Create a list of all NaN index positions in Day x DataFrame \n", " nanslist1 = titleofday2[w].loc[titleofday2[w].isnull().any(axis=1)].index.to_list() #Create a list of all NaN index positions in Day x+1 DataFrame\n", " nanslist.extend(nanslist1) #Create a list that combines all the NaN index values from Day x to Day x+1\n", "\n", " trhsamelabelsday1wonan = titleofday1[w].drop(nanslist) #Drop all rows that have a NaN from the Day x DataFrame\n", " trhsamelabelsday2wonan = titleofday2[w].drop(nanslist) #Drop all rows that have a NaN from the Day x+1 DataFrame\n", " dayxdf = pd.concat([dayxdf, trhsamelabelsday1wonan]) #Concatenate new Day x DataFrames without the NaN values to each other, adding more rows\n", " dayxplusonedf = pd.concat([dayxplusonedf, trhsamelabelsday2wonan]) #Concatenate new Day x DataFrames without the NaN values to each other, adding more rows\n", "trh1daydifferences = pd.concat([dayxdf, dayxplusonedf], axis = 1) #Concatenate the Day x and Day x+1 DataFrames into one, creating a DataFrame with two columns" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sns.regplot(x=controlofTrH1daydifferences.iloc[:,0], y=controlofTrH1daydifferences.iloc[:,1], color = '#b92024', marker = '.')\n", "sns.regplot(x=trh1daydifferences.iloc[:,0], y=trh1daydifferences.iloc[:,1], color = '#656565', marker = '.')\n", "plt.legend([ 'control', 'TrH'])\n", "\n", "plt.savefig('/Users/athenaye/Desktop/CRISPR_ScatterPlot.pdf', dpi=300)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Violin Plot" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pd.DataFrame.iteritems = pd.DataFrame.items #Pandas 2.0 uses .items instead of .iteritems. Because this was written with a previous Pandas version, this line remedies an Attribute Error\n", "\n", "#controlofTrH\n", "numdraws=len(controlofTrH1daydifferences) #Draw the same number of values as the selected DataFrame for each bootstrapping sample\n", "numsimulation=1000 #Simulate 1000 different iterations of Day x R-bias values\n", "controlofTrHbootstrap = [] #Create an empty list to populate with r-values of the bootstrapped samples\n", "controlofTrHdayx = pd.DataFrame(controlofTrH1daydifferences.iloc[:,0]) #Create a DataFrame of all controlofTrH Day x R-Bias values\n", "controlofTrHdayxplusone = pd.DataFrame(controlofTrH1daydifferences.iloc[:,1]) #Create a DataFrame of all controlofTrH Day x+1 R-Bias values\n", "lst = list(range(len(controlofTrH1daydifferences))) #Create a list of values from 0 to the length of Day x's index\n", "for x in range(numsimulation): #Repeat the r-value simulation 1000 times (since numsimulation=1000)\n", " oneset = (random.choices(lst, k = numdraws)) #Randomly choose a value of the index from controlofTrH1daydifferences DataFrame\n", " controlofTrHdayxplusonesamp = controlofTrHdayxplusone.iloc[oneset] #Pull the randomly chosen index values from Day x+1's DataFrame \n", " controlofTrHdayxsamp = controlofTrHdayx.iloc[oneset] #Pull the randomly chosen index values from Day x's DataFrame \n", " res = scipy.stats.linregress(controlofTrHdayxsamp.iloc[:,0], controlofTrHdayxplusonesamp.iloc[:,0]) #Perform linear regression statistics on the Day x and Day x+1 samples\n", " controlofTrHbootstrap.append(res.rvalue) #Save the r-value of the samples to the controlofTrHbootstrap list\n", " \n", "controlofTrHbootstrap.sort() #Sort the 1000 r-values in ascending order\n", "controlofTrHbootstrap = pd.DataFrame(controlofTrHbootstrap) #Convert controlofTrHbootstrap list to a DataFrame for further manipulation\n", "\n", "#TrH\n", "numdraws=len(trh1daydifferences) #Draw the same number of values as the selected DataFrame for each bootstrapping sample\n", "numsimulation=1000 #Simulate 1000 different iterations of Day x R-bias values\n", "trhbootstrap = [] #Create an empty list to populate with r-values of the bootstrapped samples\n", "trhdayx = pd.DataFrame(trh1daydifferences.iloc[:,0]) #Create a DataFrame of all trh Day x R-Bias values\n", "trhdayxplusone = pd.DataFrame(trh1daydifferences.iloc[:,1]) #Create a DataFrame of all trh Day x+1 R-Bias values\n", "lst = list(range(len(trh1daydifferences))) #Create a list of values from 0 to the length of Day x's index\n", "for x in range(numsimulation): #Repeat the r-value simulation 1000 times (since numsimulation=1000)\n", " oneset = (random.choices(lst, k = numdraws)) #Randomly choose a value of the index from trh1daydifferences DataFrame\n", " trhdayxplusonesamp = trhdayxplusone.iloc[oneset] #Pull the randomly chosen index values from Day x+1's DataFrame \n", " trhdayxsamp = trhdayx.iloc[oneset] #Pull the randomly chosen index values from Day x's DataFrame \n", " res = scipy.stats.linregress(trhdayxsamp.iloc[:,0], trhdayxplusonesamp.iloc[:,0]) #Perform linear regression statistics on the Day x and Day x+1 samples\n", " trhbootstrap.append(res.rvalue) #Save the r-value of the samples to the trhbootstrap list\n", "\n", "trhbootstrap.sort() #Sort the 1000 r-values in ascending order\n", "trhbootstrap = pd.DataFrame(trhbootstrap) #Convert controlofTrHbootstrap list to a DataFrame for further manipulation\n", "\n", " \n", "my_pal = {\"controlofTrH\": \"#b92024\", \"TrH\": \"#656565\", } #Set color palette\n", "differences1day = pd.concat([controlofTrHbootstrap, trhbootstrap], axis = 1) #Concatenate the bootstrapped r-values into one DataFrame that has 2 columns\n", "differences1day.columns = ['controlofTrH', 'TrH'] #Name the columns\n", "sns.violinplot(data = differences1day, palette = my_pal).set_title('1 Day Difference') #Output a violin plot of bootstrapped r-values\n", "\n", "plt.savefig('/Users/athenaye/Desktop/CRISPR_ViolinPlot.pdf', dpi=300)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "differences1day.mean() #Find mean of bootstrapped r-values" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }