import numpy as np import glob import h5py import re import xarray as xr import continuousanalysis as ca from astropy.timeseries import LombScargle from joblib import Parallel, delayed import astropy.units as u import pandas as pd import time import scipy as sci from pandarallel import pandarallel import seaborn.objects as so from seaborn import axes_style def Test(): print('test') def load_raw_data(path): centroid_path=glob.glob(path+'*centroid.bin') centroid=np.fromfile(centroid_path[0], dtype='single') c_length=centroid.shape[0] c_length_t=int(c_length/2/42) centroid=centroid.reshape([c_length_t, 2, 42]) time_path=glob.glob(path+'*time.bin') time=np.fromfile(time_path[0], dtype='single') # l=glob.glob(path) # print(l) return centroid, time def load_24hrmatfiles(path, num=2): tpath=glob.glob(path+str(num)+'TimeArray24h_u*.mat') apath=glob.glob(path+str(num)+'*AngleArray*.mat') tfile=h5py.File(tpath[0], 'r') # t_varname=tfile.keys() # print(t_varname) tmatrix=tfile.get('timearray') afile=h5py.File(apath[0], 'r') # a_varname=afile.keys() amatrix=afile.get('angle') return tmatrix, amatrix def convertmattonc(path, duration=24, savepath='../Data', calcpower=True, vector=False, circumvert=False): print("Getting all the stuff") tpaths=np.sort(glob.glob(path+'*TimeArray'+str(duration)+'h_u.mat')) if vector: apaths=np.sort(glob.glob(path+'*VectorArray'+str(duration)+'h.mat')) else: apaths=np.sort(glob.glob(path+'*AngleArray'+str(duration)+'h.mat')) cpaths=np.sort(glob.glob(path+'*CentroidArray'+str(duration)+'h.mat')) if len(tpaths)==len(apaths) & len(tpaths)==len(cpaths): for i, tpath in enumerate(tpaths): print(i) batchi=int(re.search('([0-9]*)Time',tpath).group(1)) th5=h5py.File(tpath) tmat=np.array(th5['timearray']).squeeze() print(th5['timearray'].shape) print(tmat.shape) # tmat if len(tmat.shape)>1: numtrials=tmat.shape[0] numt=tmat.shape[1] else: numtrials=1 numt=tmat.shape[0] print("numtrials: ", numtrials) print("numt: ", numt) tmat=np.reshape(tmat,[numtrials, numt]) tar=xr.DataArray(tmat, dims=['Trial', 't'], coords={"Trial":np.arange(0,numtrials), "t":np.arange(0,numt)}, name="Timestamps") tar.coords["Batch"]=batchi tar=tar.expand_dims("Batch") # tsave=str(batchi)+"_Timestamps"+str(duration)+"h.nc" # tar.to_netcdf(tsave) # if i==0: # tar_all=tar # else: # tar_all=xr.concat([tar_all,tar], dim="Batch") ah5=h5py.File(apaths[i]) print(apaths[i]) if vector: amat=np.array(ah5['vector']).squeeze() else: amat=np.array(ah5['angle']).squeeze() print(amat.shape) if len(amat.shape)==3: numtrials=amat.shape[0] numt=amat.shape[2] numflies=amat.shape[1] elif len(amat.shape)==2: numtrials=1 numt=amat.shape[1] numflies=amat.shape[0] amat=np.reshape(amat,[numtrials, numflies, numt]) aar=xr.DataArray(amat, dims=['Trial', 'Fly', 't'], coords={"Trial":np.arange(0,numtrials), "t":np.arange(0,numt)}, name="Angles") aar.coords["Batch"]=batchi aar=aar.expand_dims("Batch") asave=str(batchi)+"_Angles_"+str(duration)+"h.nc" # print(aar) Warning("Error") # print(asave) # print(aar.shape) # aar.to_netcdf(asave) bar=aar.to_dataset().merge(tar) # Version without Trial # print("crashes right after here?") # print(asave) # bar.to_netcdf(asave) # return bar anglesxtimestamp=xr.concat(Parallel(n_jobs=-1, verbose=4)(delayed(removetrials)(bar, fi) for fi, f in enumerate(bar["Fly"])), dim="Fly") # anglesxtimestamp=bar.to_dataframe().dropna(subset=["Angles", "Timestamps"]).reset_index().drop(columns=["t", "Trial"]).set_index(["Timestamps", "Fly", "Batch"]).to_xarray() print(asave) # return anglesxtimestamp anglesxtimestamp.to_netcdf(asave) # downsamplesave=str(batchi)+"_DownsampleMeans"+str(duration)+"h.csv" # angledf_resampled=dailyaverage(anglesxtimestamp) # angledf_resampled.to_csv(downsamplesave) # lsarray[:,:,batchi-1]=np.array(calculatepowerall(anglesxtimestamp, batchi=1)).T # return anglesxtimestamp # return bar if calcpower: # f=ca.calcfreq(fres=1000, log=True, fmax=5, fmin=1/60/60/24/30) lsarray=calculatepowerall(anglesxtimestamp, batchi=batchi, computeshuffled=True, circumvert=circumvert) if vector: lssave=str(batchi)+"_VectorPower"+str(duration)+"h.nc" else: lssave=str(batchi)+"_Power"+str(duration)+"h.nc" lsarray.to_netcdf(lssave) return def removetrials(bar, fly): return bar.sel(Fly=fly).to_dataframe().dropna(subset=["Angles", "Timestamps"]).reset_index().drop(columns=["t", "Trial"]).set_index(["Timestamps", "Batch"]).to_xarray() # def calcpower def dailyaverage(anglexarray, circling=False): tic=time.perf_counter() anglexarray["Timestamps"]=pd.to_datetime(anglexarray["Timestamps"], origin="unix", unit='s') if circling: anglexarray["Angles"]=np.sin(anglexarray["Angles"]) angledf=anglexarray["Angles"].to_dataframe().reset_index().set_index("Timestamps") angledf["Fly"]=pd.Categorical(angledf["Fly"]) toc=time.perf_counter() print(toc-tic) # return angledf angledf_pivot=angledf.pivot(columns=["Batch", "Fly"], values="Angles") angledf_pivot_resampled=angledf_pivot.resample('1D').mean() toc=time.perf_counter() print(toc-tic) angledf_pivot_resampled_error=angledf_pivot.resample('1D').sem() toc=time.perf_counter() print(toc-tic) angledf_resampled=angledf_pivot_resampled.melt(ignore_index=False, value_name="Angles").reset_index() angledf_resampled_error=angledf_pivot_resampled_error.melt(ignore_index=False, value_name="Angles").reset_index() angledf_resampled["Fly"]=pd.Categorical(angledf_resampled["Fly"]) angledf_resampled["Angle SEM"]=angledf_resampled_error["Angles"] angledf_resampled["A_lb"]=angledf_resampled["Angles"]-angledf_resampled["Angle SEM"] angledf_resampled["A_ub"]=angledf_resampled["Angles"]+angledf_resampled["Angle SEM"] # return angledf_resampled angledf_resampled["Day"]=np.array(angledf_resampled["Timestamps"].values, dtype=int) angledf_resampled["Day"]=angledf_resampled["Day"]-angledf_resampled["Day"][0] angledf_resampled["Day"]=angledf_resampled["Day"]//86400000000000 angledf_resampled=angledf_resampled[~np.isnan(angledf_resampled["Angles"])] toc=time.perf_counter() print(toc-tic) return angledf_resampled def calculatepowerall(anglesxtimestamp, batchi=1, computeshuffled=False, circumvert=True): f=ca.calcfreq(fres=100, log=True, fmax=5, fmin=1/60/60/24/30) # lsarray=xr.DataArray(np.zeros([f.shape[0], bar["Fly"].values.shape[0], bar["Trial"].values.shape[0],1]), # coords={"Freq":f, "Fly":bar["Fly"].values, "Trial":bar["Trial"].values, "Batch":[batchi]}, # dims=["Freq", "Fly", "Trial", "Batch"], # name="Power") if computeshuffled: lsarray=xr.DataArray(np.zeros([f.shape[0], anglesxtimestamp["Fly"].values.shape[0],1, 2]), coords={"Freq":f, "Fly":anglesxtimestamp["Fly"].values, "Batch":[batchi], "Shuffled":["Unshuffled", "Shuffled"]}, dims=["Freq", "Fly","Batch", "Shuffled"], name="Power") else: lsarray=xr.DataArray(np.zeros([f.shape[0], anglesxtimestamp["Fly"].values.shape[0],1]), coords={"Freq":f, "Fly":anglesxtimestamp["Fly"].values, "Batch":[batchi]}, dims=["Freq", "Fly","Batch"], name="Power") # print(lsarray) # return # for trial in bar["Trial"].values: output=Parallel(n_jobs=1, verbose=4)(delayed(calculatepowerforonefly)(f, anglesxtimestamp, fly, circumvert=circumvert) for fly, flynum in enumerate(anglesxtimestamp["Fly"])) if computeshuffled: lsarray[:,:,0, 0]=np.array(output).T # print("Computing Shuffled Version Now") output=Parallel(n_jobs=-1, verbose=4)(delayed(calculatepowerforonefly)(f, anglesxtimestamp, fly, shuffle=True, circumvert=circumvert) for fly, flynum in enumerate(anglesxtimestamp["Fly"])) lsarray[:,:,0, 1]=np.array(output).T else: lsarray[:,:,0]=np.array(output).T # lsarray["Fly"]=anglesxtimestamp["Fly"] return lsarray def calculatepowerforonefly(f, anglesxtimestamp, fly, shuffle=False, circumvert=True): # print(anglesxtimestamp) y=anglesxtimestamp["angle"].squeeze()[fly,:] if circumvert: y=np.sin(y) # print("Power Shapes") # print(y.shape) # print(fly) yi=np.isnan(y) #Problem is here, wrong dimensions (42 instead of) # print(yi.shape) t=anglesxtimestamp["timestamps"][~yi]*u.second y=anglesxtimestamp["angle"].squeeze()[fly,~yi] if shuffle: rng = np.random.default_rng() rng.shuffle(y.values) ls=LombScargle(t=t, y=y) # slicesize=lsarray[:,fly,:].shape try: p=ls.power(f) except: # print(trial, fly) # when there isn't enough non nan flies p=np.empty(f.shape) p.fill(np.NaN) # return([t,y]) # print(p.shape) # print(slicesize) # print("Calculated Power for Fly "+str(int(fly))) return p def lombscarglelowpass(dataframe, maxsamples=-1, plotres=1000, lp=60*60*24): x=dataframe.dropna().index.to_numpy() y=dataframe.dropna()["Angles"] # min(x) yi=~np.isnan(y) yi if maxsamples<1: imax=x.shape[0]-1 else: imax=min(x.shape[0]-1, maxsamples) # yi=np.arange(0,imax) x=np.array(x,dtype=float) t=x*10**-9 t-=t[0] t=t*u.second print("Total time is", t[-1].to(u.day)) print("Processing until", t[imax].to(u.day)) ls=LombScargle(t[0:imax], y[0:imax], nterms=1) f, p=ls.autopower() print(imax) tmax=t[imax] print(tmax) t_fit=np.linspace(0, np.array(tmax), plotres)*u.second if lp>0: numlowpass=sum(f<(1/(lp*u.second))) else: numlowpass=len(f)-1 fsum=np.zeros([t_fit.shape[0], numlowpass]) fweighted=np.zeros([t_fit.shape[0], numlowpass]) for i, fi in enumerate(f[0:numlowpass]): # print(i) fsum[:,i]=np.array(ls.model(t_fit, f[i])) fweighted[:,i]=fsum[:,i]*p[i] fweighted/=np.sum(np.array(p[0:numlowpass])) # fweighted=fsum*p pdfsum=pd.DataFrame(fsum[:,0:numlowpass]) pdfsum.index=pd.TimedeltaIndex(np.array(t_fit), unit='s', name="Time") pdfsum.columns=pd.Index(f[0:numlowpass], name="Frequency (1/s)") pdfsum=pd.DataFrame(pdfsum.mean(axis=1), columns=["Averaged"]) pdfsum["Weighted"]=np.sum(fweighted, axis=1) return pdfsum def exponentialnan(x, sigma=24): mp=int(np.floor(x.shape[0]/2)) # sigma=60*24 win=1/(sigma*np.sqrt(2)*np.pi)*np.exp(-.5*np.abs(np.arange(x.shape[0])-mp)/sigma)**2 win*=~np.isnan(x) return np.nansum(x*win/np.nansum(win)) def nandownsample(x): return np.nanmean(x) def ntermslombscargle(dataframe, nterms=30, maxsamples=-1, basefreq=1/60/60/24/30): x=dataframe.dropna().index.to_numpy() y=dataframe.dropna()["Angles"] # min(x) yi=~np.isnan(y) yi if maxsamples<1: imax=x.shape[0]-1 else: imax=min(x.shape[0]-1, maxsamples) # yi=np.arange(0,imax) x=np.array(x,dtype=float) t=x*10**-9 t-=t[0] t=t*u.second print("Total time is", t[-1].to(u.day)) print("Processing until", t[imax].to(u.day)) ls=LombScargle(t[0:imax], y[0:imax], nterms=nterms) t_fit=np.linspace(0,14,1000)*u.day baseperiod=(1/(basefreq/u.second).to(1/u.day)).to_string() fit=ls.model(t_fit, basefreq/u.second) method_str="Fit of "+str(nterms)+" terms, Bp="+baseperiod fitdf=pd.DataFrame({"Turning":fit, "Method":method_str}, index=pd.TimedeltaIndex(t_fit.value, unit='D', name="Time")).reset_index() # plt.plot(fit) return fitdf def loadncdata(batch, hour): a=xr.load_dataset("1_Angles24h.nc") a["Fly"] b=xr.load_dataset("1_Timestamps24h.nc") c=b.merge(a) c["Timestamps"] d=c.to_dataframe().dropna(subset=["Angles", "Timestamps"]).reset_index().drop(columns=["t", "Trial"]).set_index(["Timestamps", "Fly", "Batch"]).to_xarray() def loadallpowerdata(path="./", hour='2'): # powerpath=glob.glob(path+'*.Power'+hour+'h.nc') allpower=xr.open_mfdataset(path+'*Power'+hour+'h.nc') return allpower def plotallpowers(allpower): # Expects an xarray with Power x hours x fly x batch return def blacknan(x, threshold=.5): window=sci.signal.windows.blackman(x.shape[0]) if np.nansum(window[~np.isnan(x).squeeze()]/np.sum(window))>(threshold): return np.nanmean(np.multiply(window,x.squeeze()/np.mean(window[~np.isnan(x).squeeze()]))) else: return np.nan def blacknanerror(x): if np.nansum(sci.signal.windows.blackman(x.shape[0])[~np.isnan(x).squeeze()])>(threshold): weights=sci.signal.windows.blackman(x.shape[0])*x rng=rng return np.nanmean(sci.signal.windows.blackman(x.shape[0])*x) else: return np.nan def blacknanscramble(x,nbootstrap=1000): #Scramble the data to make a smoothed white noise version to show what's going on dfx=x.resample('60s').mean().rolling(window=3001, min_periods=1, center=True).apply(blacknan) rng = np.random.default_rng() dfx["Method"]=Blacknan for i in range(nbootstrap): rng.shuffle(x.values) return def getpercentilesfromshuffleddata(shuffleddata, raw, threshold=.5): numshuffles=shuffleddata.shape[1] p50t=int(numshuffles*.5) p5t=int(numshuffles*.05) p95t=int(numshuffles*.95) shuffleddatasort=np.sort(shuffleddata, axis=1) mean=shuffleddatasort.mean(axis=1) p50=shuffleddatasort[:,p50t] p5=shuffleddatasort[:,p5t] p95=shuffleddatasort[:,p95t] resampled1h=raw.resample('1H').mean() percentiledf=pd.DataFrame({"Shuffled":mean, "Median":p50, "lb":p5, "ub":p95}, index=pd.Index(data=resampled1h.index, name="Hours")) percentiledf=percentiledf.resample('1H').mean() percentiledf.reset_index(inplace=True) percentiledf["Raw"]=resampled1h.values percentiledf["Low Pass Filtered"]=resampled1h.rolling(window=51, min_periods=10, center=True).apply(blacknan, kwargs={"threshold":threshold}).values percentiledf["Chunk"]=0 percentiledf.loc[1:,"Chunk"]=np.cumsum(np.diff(np.isnan(percentiledf["Low Pass Filtered"]))) percentiledf=percentiledf.drop(columns=["Median",]).melt(id_vars=["Hours", "lb", "ub", "Chunk", "Raw"]) percentiledf.Hours=np.array(np.array(percentiledf.Hours, dtype='timedelta64[h]'), dtype="int") # percentiledf=percentiledf[[]] return percentiledf def parallelshuffle(x): rng = np.random.default_rng() return rng.permutation(x).squeeze() def parallelchunkandshuffle(x, chunksize=1000): x=x.squeeze() rng = np.random.default_rng() chunks=np.array_split(x, chunksize) def parallelshuffleandfilter(x_df, threshold=.5): # rng = np.random.default_rng() x=x_df.sample(frac=1).reset_index(drop=True) x.index=x_df.index x=x.resample('1H').mean() return x.rolling(window=51, min_periods=10, center=True).apply(blacknan, kwargs={"threshold":threshold}) def shuffleandfilter(rawdata, numshuffles=1000, threshold=.5, parallel=True): time_len=rawdata.shape[0] shuffleddata=np.zeros([numshuffles, time_len]) if parallel: njobs=-1 else: njobs=1 shuffleddata[:,:]=Parallel(n_jobs=1)(delayed(parallelshuffle)(rawdata) for i in range(numshuffles)) # return shuffleddata shuffleddata=pd.DataFrame(shuffleddata.T, index=rawdata.index) #s index=nonanindex shuffleddata=shuffleddata.resample('1H').mean() if parallel: pandarallel.initialize(progress_bar=False) shuffleddata=shuffleddata.rolling(window=51, min_periods=10, center=True, ).parallel_apply(blacknan, kwargs={"threshold":threshold}) else: shuffleddata=shuffleddata.rolling(window=51, min_periods=10, center=True, ).apply(blacknan, kwargs={"threshold":threshold}) # shuffledata. percentiledf=getpercentilesfromshuffleddata(shuffleddata, rawdata, threshold) return percentiledf def getallflies(xarray, numshuffles=1000, maxflies=50, save=False): #get the xarray with xr.load_dataset flyindex=pd.MultiIndex.from_product([xarray["Batch"].values, xarray["Fly"].values]) if save: Parallel(n_jobs=1, verbose=5)(delayed(getonefly)(xarray=xarray, f=f, b=b, numshuffles=numshuffles, save=save) for b, f in flyindex) return else: pdall=Parallel(n_jobs=4, verbose=5)(delayed(getonefly)(xarray=xarray, f=f, b=b, numshuffles=numshuffles, save=save) for b, f in flyindex) pdall=pd.concat(pdall) return pdall.reset_index() def getonefly(xarray, f, b, numshuffles, save=False): # print("F",f,"B",b, end="") name=str('F'+str(f)+'B'+str(b)+'.csv') if save & len(glob.glob(name))>0: return ddf=xarray.sel(Fly=f, Batch=b).squeeze().to_dataframe().drop(columns="Batch") ddf.index=pd.to_datetime(ddf.index, origin="unix", unit='s') ddfs=np.sin(ddf) ddfs0=ddfs ddfs0.index=ddfs.index-ddfs.index[0] nonanraw=ddfs0.dropna() pdsubset=shuffleandfilter(nonanraw, numshuffles=numshuffles, parallel=False) pdsubset["Fly"]=f pdsubset["Batch"]=b if save: pdsubset.to_csv(name) return else: return pdsubset.dropna() def getallfliesbatch(): pdall=Parallel(n_jobs=-1, verbose=5)(delayed(getonefly)(xarray=xarray, f=f, numshuffles=numshuffles) for f in xarray["Fly"].values[0:maxflies]) pdall=pd.concat(pdall) return def nonchunkedmemoryhelper(list): for l in list: print(l) xarray=xr.load_dataset(l) flyindex=pd.MultiIndex.from_product([xarray["Batch"].values, xarray["Fly"].values]) print(flyindex) Parallel(n_jobs=1, verbose=5, temp_folder='partemp')(delayed(getonefly)(xarray=xarray, f=f, b=b, numshuffles=1000, save=True) for b, f in flyindex) def daskcompute(xarraychunk): xarraychunk=xarraychunk.dropna(dim="Timestamps").to_dataframe().drop(columns="Batch") xarraychunk.index=pd.to_datetime(xarraychunk.index, origin="unix", unit='s') xarraychunk=np.sin(xarraychunk) if len(xarraychunk)>0: xarraychunk.index=xarraychunk.index-xarraychunk.index[0] list=Parallel(n_jobs=-1, verbose=5)(delayed(parallelshuffleandfilter)(xarraychunk) for i in range(1000)) percentiledata=getpercentilesfromshuffleddata(listruns, xarraychunk) def getmeanandstdofalltimestamps(xarraydataset): xarraydataset=xarraydataset.to_dataframe().reset_index().drop(columns=["Trial","t"]) xarraydataset["Timestamps"]=pd.to_datetime(xarraydataset["Timestamps"], origin="unix", unit='s') xarraydataset["batchxfly"]="f"+xarraydataset["Fly"].astype(str)+"b"+xarraydataset["Batch"].astype(str) xarraydataset=xarraydataset.drop(columns=["Fly","Batch"]) xarraydataset["Timestamps"]=xarraydataset.groupby("batchxfly")["Timestamps"].transform(lambda x: x-x.iloc[0]).dropna() return xarraydataset, xarraydataset.groupby("batchxfly").agg(["mean","std", "count"]) def import_matlab_netcdf_files(file_path): imported_xarray=xr.load_dataset(file_path, engine="netcdf4") imported_xarray['timestamp'].values=pd.to_datetime(imported_xarray['timestamp'].values.squeeze(),unit='s', origin='unix').values.reshape(1,1,max(imported_xarray['timestamp'].values.shape)) return imported_xarray def import_dask_matlab_files(file_path="", duration=[2, 24], batch=np.arange(1,8), fly=np.arange(1,43), override=False): # Loop through all the files and calculate the summary statistics. Parallize the loop iterables=[duration, batch, fly] index=pd.MultiIndex.from_product(iterables, names=["Duration", "Batch", "Fly"]) # index Parallel(n_jobs=8, verbose=5)(delayed(importonefile)(file_path, i[0],i[1],i[2], override=override) for i in index) # return # def nestedload(path): # xr.open_mfdataset # returnb def importonefile(file_path, d, b, f, save_timeseries=False, save_summary=True, bins_and_power=True, override=False): #load the raw data and create a summary file that has the relevant data we're interested in oneflylist=glob.glob(file_path+"CirclingData_"+str(d)+"h_B"+str(b)+"_F"+str(f)+"_T*.nc") print(d,b, f) savename="Circling_Summary_"+str(d)+"h_B"+str(b)+"_F"+str(f)+".nc" if len(glob.glob("./Summary Data/"+savename))>0: if not override: # print(override) print(savename+" already exists") return # print(file_path+"CirclingData_"+str(d)+"h_B"+str(b)+"_F"+str(f)+"_T*.nc") # xarraysubset=[ if len(oneflylist)>0: if len(oneflylist)==1: print("Only One trial detected for ", savename) xarraysubset=xr.open_mfdataset(oneflylist, combine="nested", concat_dim="t").squeeze().compute().stack({"thack":["t"]}).swap_dims({"thack":"timestamps"}) elif len(oneflylist)>0: # xarraysubset=xr.open_mfdataset(oneflylist, combine="nested", concat_dim="t", chunks=-1).squeeze().compute().stack({"t_hack":["t", "Trial"]}).swap_dims({"t_hack":"timestamps"}) # return xarraysubset # xarraysubset=xr.open_mfdataset(oneflylist, combine="nested", concat_dim="t", chunks=-1).squeeze() # return xarraysubset # print("Trial Testing:", d,b, f) xarraysubset=xr.open_mfdataset(oneflylist, combine="nested", concat_dim="t").squeeze().compute().stack({"thack":["Trial", "t"]}).swap_dims({"thack":"timestamps"}) # return xarraysubset # xarraysubset=xr.open_mfdataset(oneflylist, combine="nested", concat_dim="t").compute().squeeze().swap_dims({"t":"timestamps"}) xarraysubset["timestamps"]=pd.to_datetime(xarraysubset["timestamps"], unit='s', origin='unix') # print("Calculated datetime") xarraysubset=xarraysubset.sortby("timestamps") # print("Sortedtimestamp") xarraysubset["timestamps"]=xarraysubset["timestamps"]-xarraysubset["timestamps"][0] # print("Zeroed Timestamps") # xarraysubset["timestamps"].expand_dims(["Fly", "Batch"]) # xarraysubset=xarraysubset.swap_dims({"timestamps":"t_hack"}) # print("reswapped dims") xarraysubset=xarraysubset.unstack().expand_dims(["Fly", "Batch"]).drop("thack") # return xarraysubset if save_timeseries: # print(d, b, f) # print("CirclingData_"+str(d)+"h_B"+str(b)+"_F"+str(f)+".nc") pathname="CirclingData_"+str(d)+"h_B"+str(b)+"_F"+str(f)+".nc" # print(type(pathname)) xarraysubset.to_netcdf(pathname) print("CirclingData_"+str(d)+"h_B"+str(b)+"_F"+str(f)+".nc") if save_summary: xarraysubset=xarrayone_processing(xarraysubset) if bins_and_power: savename="Circling_Summary_"+str(d)+"h_B"+str(b)+"_F"+str(f)+".nc" print(savename) xarraysubset=get_bins(xarraysubset, save_name=savename) else: print("Couldn't Find any files here: "+file_path+"CirclingData_"+str(d)+"h_B"+str(b)+"_F"+str(f)+"_T*.nc") # xarraysubset=[] print("End", d, b, f) # return xarraysubset #for debugging, def xarrayone_processing(xarraysubset): varnames=list(xarraysubset) # print(varnames) while 'inx' in varnames: varnames.remove('inx') while 'iny' in varnames: varnames.remove('iny') if 'speed' in varnames: varnames.append('speed_logged') xarraysubset["speed_logged"]=np.log(xarraysubset["speed"]) # print("Speed Logged") for foobar in varnames: # print(foobar) xarraysubset[foobar+"_summary"]=xr.concat([xarraysubset[foobar].mean(dim="timestamps", skipna=True).compute().expand_dims(dim={"Measure":["Mean"]}), xarraysubset[foobar].std(dim="timestamps").compute().expand_dims(dim={"Measure":["Std"]}), xarraysubset[foobar].min(dim="timestamps").compute().expand_dims(dim={"Measure":["Min"]}), xarraysubset[foobar].max(dim="timestamps").compute().expand_dims(dim={"Measure":["Max"]}), ], dim="Measure") # if foobar="speed_logged": # for bin #set to time delta # xarraysubset["timestamps"]=xarraysubset["timestamps"]-xarraysubset["timestamps"][0] return xarraysubset def get_bins(x_array, numbins=201, save_name="", savepath="./Summary Data/"): f=ca.calcfreq(fres=1000, log=True, fmax=5, fmin=1/60/60/24/30)/u.second t=x_array["timestamps"] bin_ds=xr.Dataset() a=x_array["direction"].plot.hist(bins=np.linspace(-np.pi, np.pi, numbins)) # print("Got here") bin_ds["direction_bins"]=xr.DataArray(np.expand_dims(a[0], axis=[1,2]), coords={"Bins_direction":np.mean([a[1][0:-1], a[1][1:]], axis=0),"Fly":x_array.Fly, "Batch":x_array.Batch}) # x_array["direction_bins"].expand_dims(dim={"Fly":x_array.Fly}) p=calcpowerforfly(x_array["direction"],x_array["timestamps"]) ps=calcpowerforfly(x_array["direction"],x_array["timestamps"],shuffle=True) bin_ds["direction_psd"]=xr.concat([xr.DataArray(np.expand_dims(p, axis=[1,2,3]), coords={"Freq":f,"Fly":x_array.Fly, "Batch":x_array.Batch, "Shuffled":["Non-shuffled Data"]}), xr.DataArray(np.expand_dims(ps, axis=[1,2,3]), coords={"Freq":f,"Fly":x_array.Fly, "Batch":x_array.Batch, "Shuffled":['Shuffled']})], dim="Shuffled") a=x_array["angle"].plot.hist(bins=np.linspace(-1, 1, numbins)) bin_ds["angle_bins"]=xr.DataArray(np.expand_dims(a[0], axis=[1,2]), {"Bins_angle":np.mean([a[1][0:-1], a[1][1:]], axis=0),"Fly":x_array.Fly, "Batch":x_array.Batch}) p=calcpowerforfly(x_array["angle"],x_array["timestamps"]) ps=calcpowerforfly(x_array["angle"],x_array["timestamps"],shuffle=True) bin_ds["angle_psd"]=xr.concat([xr.DataArray(np.expand_dims(p, axis=[1,2,3]), coords={"Freq":f,"Fly":x_array.Fly, "Batch":x_array.Batch, "Shuffled":["Non-shuffled Data"]}), xr.DataArray(np.expand_dims(ps, axis=[1,2,3]), coords={"Freq":f,"Fly":x_array.Fly, "Batch":x_array.Batch, "Shuffled":['Shuffled']})], dim="Shuffled") # bin_ds["angle_psd"]=xr.DataArray(np.expand_dims(ps, axis=[1,2,3]), coords={"Freq":f,"Fly":x_array.Fly, "Batch":x_array.Batch, "Shuffled":['Shuffled']}) a=x_array["theta"].plot.hist(bins=np.linspace(-np.pi, np.pi, numbins)) bin_ds["theta_bins"]=xr.DataArray(np.expand_dims(a[0], axis=[1,2]), {"Bins_theta":np.mean([a[1][0:-1], a[1][1:]], axis=0),"Fly":x_array.Fly, "Batch":x_array.Batch}) bin_ds["theta_summary"]=x_array["theta_summary"] p=calcpowerforfly(x_array["theta"],x_array["timestamps"]) ps=calcpowerforfly(x_array["theta"],x_array["timestamps"],shuffle=True) bin_ds["theta_psd"]=xr.concat([xr.DataArray(np.expand_dims(p, axis=[1,2,3]), coords={"Freq":f,"Fly":x_array.Fly, "Batch":x_array.Batch, "Shuffled":["Non-shuffled Data"]}), xr.DataArray(np.expand_dims(ps, axis=[1,2,3]), coords={"Freq":f,"Fly":x_array.Fly, "Batch":x_array.Batch, "Shuffled":['Shuffled']})], dim="Shuffled") a=x_array["turning"].plot.hist(bins=np.linspace(-2*np.pi, 2*np.pi, numbins)) bin_ds["turning_bins"]=xr.DataArray(np.expand_dims(a[0], axis=[1,2]), {"Bins_turning":np.mean([a[1][0:-1], a[1][1:]], axis=0),"Fly":x_array.Fly, "Batch":x_array.Batch}) bin_ds["turning_summary"]=x_array["turning_summary"] p=calcpowerforfly(x_array["turning"],x_array["timestamps"]) ps=calcpowerforfly(x_array["turning"],x_array["timestamps"],shuffle=True) bin_ds["turning_psd"]=xr.concat([xr.DataArray(np.expand_dims(p, axis=[1,2,3]), coords={"Freq":f,"Fly":x_array.Fly, "Batch":x_array.Batch, "Shuffled":["Non-shuffled Data"]}), xr.DataArray(np.expand_dims(ps, axis=[1,2,3]), coords={"Freq":f,"Fly":x_array.Fly, "Batch":x_array.Batch, "Shuffled":['Shuffled']})], dim="Shuffled") a=x_array["speed_logged"].plot.hist(bins=np.linspace(-7, 7, numbins)) bin_ds["speed_logged_bins"]=xr.DataArray(np.expand_dims(a[0], axis=[1,2]), {"Bins_speed_logged":np.mean([a[1][0:-1], a[1][1:]], axis=0),"Fly":x_array.Fly, "Batch":x_array.Batch}) bin_ds["speed_logged_summary"]=x_array["speed_logged_summary"] p=calcpowerforfly(x_array["speed"],x_array["timestamps"]) ps=calcpowerforfly(x_array["speed"],x_array["timestamps"],shuffle=True) bin_ds["speed_psd"]=xr.concat([xr.DataArray(np.expand_dims(p, axis=[1,2,3]), coords={"Freq":f,"Fly":x_array.Fly, "Batch":x_array.Batch, "Shuffled":["Non-shuffled Data"]}), xr.DataArray(np.expand_dims(ps, axis=[1,2,3]), coords={"Freq":f,"Fly":x_array.Fly, "Batch":x_array.Batch, "Shuffled":['Shuffled']})], dim="Shuffled") # print(bin_ds["speed_psd"]) # a=x_array["turning"].plot.hist(bins=np.linspace(-1, 1, 201)) # x_array["turning_bins"]=xr.DataArray(a[0], {"Bins_turning":np.mean([a[1][0:-1], a[1][1:]], axis=0)}) a=x_array["r"].plot.hist(bins=np.linspace(0, 1.2, numbins)) bin_ds["r_bins"]=xr.DataArray(np.expand_dims(a[0], axis=[1,2]), {"Bins_r":np.mean([a[1][0:-1], a[1][1:]], axis=0),"Fly":x_array.Fly, "Batch":x_array.Batch}) p=calcpowerforfly(x_array["r"],x_array["timestamps"]) ps=calcpowerforfly(x_array["r"],x_array["timestamps"],shuffle=True) bin_ds["r_psd"]=xr.concat([xr.DataArray(np.expand_dims(p, axis=[1,2,3]), coords={"Freq":f,"Fly":x_array.Fly, "Batch":x_array.Batch, "Shuffled":["Non-shuffled Data"]}), xr.DataArray(np.expand_dims(ps, axis=[1,2,3]), coords={"Freq":f,"Fly":x_array.Fly, "Batch":x_array.Batch, "Shuffled":['Shuffled']})], dim="Shuffled") for varitem in list(x_array.variables): if "summary" in varitem: bin_ds[varitem]=x_array[varitem] # return bin_ds if len(save_name): bin_ds.to_netcdf(savepath+save_name) return bin_ds # def makehistogram() def calcpowerforfly(y, t, f=ca.calcfreq(fres=1000, log=True, fmax=5, fmin=1/60/60/24/30)/u.second, shuffle=False): y=y.squeeze().values yi=np.isnan(y) # f=ca.calcfreq(fres=1000, log=True, fmax=5, fmin=1/60/60/24/30)/u.second # print(y.shape) t=np.array(t.squeeze()[~yi], dtype=float)*u.second*10**-9 # return yi t=t.squeeze() y=y[~yi] # return y,t if shuffle: rng = np.random.default_rng() rng.shuffle(y) ls=LombScargle(t=t, y=y) try: p=ls.power(f) except: # print(trial, fly) # when there isn't enough non nan flies p=np.empty(f.shape) p.fill(np.NaN) # return([t,y]) # print(p.shape) # print(slicesize) # print("Calculated Power for Fly "+str(int(fly))) return p def loadallsummarydata(path="", duration=2): # loads all summary data, as produced by get_bins # nestedlist_batch.append(glob.glob("CirclingData_2h_B"+str(batch)+"_F*.nc")) nestedlist_batch=[] batch=1 all_xarray=xr.open_mfdataset(glob.glob(path+"Circling_Summary_"+str(duration)+"h_B"+str(batch)+"_F*.nc"), combine="nested", concat_dim=["Fly"]).stack(flyid=["Batch", "Fly"]) # sum1=test1.stack(flykey=["Fly", "Batch"]) print(all_xarray["flyid"].shape) for batch in np.arange(2,7): # nestedlist_batch=[] print(batch) # nestedlist_batch.append(glob.glob("CirclingData_2h_B"+str(batch)+"_F*.nc")) if len(glob.glob(path+"Circling_Summary_"+str(duration)+"h_B"+str(batch)+"_F*.nc"))>0: new_xarray=xr.open_mfdataset(glob.glob(path+"Circling_Summary_"+str(duration)+"h_B"+str(batch)+"_F*.nc"), combine="nested", concat_dim=["Fly"]).stack(flyid=["Batch", "Fly"]) all_xarray=xr.concat([all_xarray,new_xarray], dim="flyid") print(all_xarray["flyid"].shape) return all_xarray def makehistogram(summary_xarray, data_category, xlabel="", ylabel="Counts", title=""): if "_bins" in data_category and len(xlabel)<1: # print(data_label) data_category=data_category[:-5] if len(xlabel)<1: xlabel=data_category .capitalize() if len(title)<1: title="Distribution of "+data_category.capitalize()+" across all Fly Behavior" summary_df=summary_xarray[data_category+"_bins"].sum(dim="flyid").to_dataframe().reset_index() summary_df["recording_length"]=pd.Categorical(summary_df["recording_length"]) # return summary_df summary_df.sort_values(by="recording_length", ascending=False, inplace=True) # return summary_df histogram=( so.Plot(data=summary_df, x="Bins_"+data_category, y=data_category+"_bins", color="recording_length").add(so.Bars()) .label(x=xlabel, y=ylabel, title=title, color="Recording Length") .theme({**axes_style("ticks")}) ) return histogram def plotsummary(summary_xarray): return def calculate_bouts(xarray, target_var="speed"): speed_series=xarray['speed'].squeeze().to_series() target_series=xarray[target_var].squeeze().to_series() bouts=(((np.log(speed_series)>.5).rolling(10, center=True).sum())>1) boutnumber=((np.round(bouts).diff())>0).cumsum() groups=boutnumber*bouts # return [target_series, groups] chunkmeans=target_series.reset_index().groupby(np.array(groups)).mean(numeric_only=False).set_index('timestamps') # return chunkmeans chunkmeans.dropna(inplace=True) # valuename=target_var valuename="value" chunkmeans.columns=["Chunk "+target_var] chunkDB=chunkmeans.melt(ignore_index=False, value_name=valuename) # return chunkDB # # turning_all=pd.concat([chunkDB.melt(),allDB.melt(), dayDB.melt(), hourDB.melt(), minDB.melt()], axis=0) byframe=pd.DataFrame({"Frame "+target_var:target_series[bouts]}).melt(ignore_index=False, value_name=valuename) rollingaverage_hour=(target_series[bouts].resample('1H').mean()).dropna() hourDB=pd.DataFrame({"Hour "+target_var:rollingaverage_hour}).melt(ignore_index=False, value_name=valuename) rollingaverage_hour=(target_series[bouts].resample('1D').mean()).dropna() dayDB=pd.DataFrame({"Day "+target_var:rollingaverage_hour}).melt(ignore_index=False, value_name=valuename) # dayDB rollingaverage_min=(target_series[bouts].resample('60s').mean()).dropna() minDB=pd.DataFrame({"Minute "+target_var:rollingaverage_min}).melt(ignore_index=False, value_name=valuename) dataframe=pd.concat([chunkDB, byframe, dayDB, hourDB, minDB], axis=0) dataframe["Fly"]=int(xarray["Fly"]) dataframe["Batch"]=int(xarray["Batch"]) dataframe["Fly"]=pd.Categorical(dataframe["Fly"]) dataframe["Batch"]=pd.Categorical(dataframe["Batch"]) dataframe["Recording Length"]=int(xarray["recording_length"]) dataframe["Recording Length"]=pd.Categorical(dataframe["Recording Length"]) return dataframe def bootstrapintervals(xarray, target_var="speed"): speed_series=xarray['speed'].squeeze().to_series() if target_var=="log_speed": target_series=np.log(speed_series) else: target_series=xarray[target_var].squeeze().to_series() flynum=xarray["Fly"] batchnum=xarray["Batch"] recordinglength=int(xarray["recording_length"]) bouts=(((np.log(speed_series)>.5).rolling(10, center=True).sum())>1) boutnumber=((np.round(bouts).diff())>0).cumsum() groups=boutnumber*bouts # return [target_series, groups] chunkmeans=target_series.reset_index().groupby(np.array(groups)).mean(numeric_only=False).set_index('timestamps') # return chunkmeans chunkmeans.dropna(inplace=True) # valuename=target_var valuename="value" chunkmeans.columns=["Chunk "+target_var] # return chunkmeans # return target_series[bouts] chunkpd=bootstrap_helper(chunkmeans, "Activity Bout", target_var, fly=flynum, batch=batchnum, recording_length=recordinglength) # return chunkpd # ci=sci.stats.bootstrap((chunkmeans,), np.mean, method='basic').confidence_interval # return pd.DataFrame({"Variable":target_var, "Method":"Chunked", "Fly":"Fly1", "Batch":"BatchX", "CI_low":ci[0], "Mean":np.mean(chunkmeans), "CI_high":ci[1]}) # chunkDB=chunkmeans.melt(ignore_index=False, value_name=valuename) # return chunkDB # turning_all=pd.concat([chunkDB.melt(),allDB.melt(), dayDB.melt(), hourDB.melt(), minDB.melt()], axis=0) # return target_series[bouts].resample('1H').mean().dropna() hourpd=bootstrap_helper((target_series[bouts].resample('1H').mean()).dropna(), "Hour", target_var, fly=flynum, batch=batchnum, recording_length=recordinglength) framepd=bootstrap_helper(target_series[bouts].dropna()[1:], "Frame", target_var, fly=flynum, batch=batchnum, recording_length=recordinglength) minutepd=bootstrap_helper((target_series[bouts].resample('60s').mean()).dropna(), "Minute", target_var, fly=flynum, batch=batchnum, recording_length=recordinglength) daypd=bootstrap_helper((target_series[bouts].resample('1D').mean()).dropna(), "Day", target_var, fly=flynum, batch=batchnum, recording_length=recordinglength) dfall=pd.concat([framepd, chunkpd, minutepd, hourpd, daypd], ignore_index=True) dfall["FlyID"]=pd.Categorical("B"+dfall["Batch"].astype(str)+"-F"+dfall["Fly"].astype(str)) return dfall byframe=pd.DataFrame({"Frame "+target_var:target_series[bouts]}).melt(ignore_index=False, value_name=valuename) rollingaverage_hour=(target_series[bouts].resample('1H').mean()).dropna() hourDB=pd.DataFrame({"Hour "+target_var:rollingaverage_hour}).melt(ignore_index=False, value_name=valuename) rollingaverage_hour=(target_series[bouts].resample('1D').mean()).dropna() dayDB=pd.DataFrame({"Day "+target_var:rollingaverage_hour}).melt(ignore_index=False, value_name=valuename) # dayDB rollingaverage_min=(target_series[bouts].resample('60s').mean()).dropna() minDB=pd.DataFrame({"Minute "+target_var:rollingaverage_min}).melt(ignore_index=False, value_name=valuename) dataframe=pd.concat([chunkDB, byframe, dayDB, hourDB, minDB], axis=0) dataframe["Fly"]=int(xarray["Fly"]) dataframe["Batch"]=int(xarray["Batch"]) dataframe["Fly"]=pd.Categorical(dataframe["Fly"]) dataframe["Batch"]=pd.Categorical(dataframe["Batch"]) dataframe["Recording Length"]=int(xarray["recording_length"]) dataframe["Recording Length"]=pd.Categorical(dataframe["Recording Length"]) return dataframe def bootstrap_helper(series, method_name, variable_name, fly=0, batch=0, recording_length=9): ci=sci.stats.bootstrap((series.values,), np.mean, method='basic', batch=100).confidence_interval return pd.DataFrame({"Variable":variable_name, "Block Method":method_name, "Fly":fly, "Batch":batch, "Recording Length":recording_length, "CI_low":ci[0], "Mean":np.mean(series.values), "CI_high":ci[1]}, index=[0]) def compareflies(filelist, range='all', variables=['direction']): if range=='all': range=np.arange(len(filelist)) # needs_initialization=True # for fly in range: b=Parallel(n_jobs=1, verbose=10)(delayed(compareflies_parallel)(file, variables) for file in filelist) return pd.concat(b, ignore_index=True ) # return all def compareflies_parallel(filepath, variables=['direction']): # print(variables) xarray=xr.open_dataset(filepath) a=Parallel(n_jobs=1, verbose=0)(delayed(bootstrapintervals)(xarray, v) for v in variables) return pd.concat(a, ignore_index=True)