clear all %close all rng('default') load ORN_PN_colors load analysis_dir_path manualLabelHome=fullfile(analysis_dir_path, 'PN_analysis_oct_vs_air_choice/train'); publishedOdorPath=fullfile(analysis_dir_path, 'utilities/odorPanel_12_DoORData.mat'); load(publishedOdorPath); manualLabelledFolders=dir(manualLabelHome); manualLabelledFolders=manualLabelledFolders(3:end); labels=cell(1,length(manualLabelledFolders)); glomsannotated=zeros(1,length(manualLabelledFolders)); totalPutativeGloms=zeros(1,length(manualLabelledFolders)); nodors=13; odortimes=[6:9]; % hard-coded specific time interval for summarizing odor response odortimesfortimecourse=[1:18]; responsesTensorFullyOrthogonal=NaN*zeros(27,2,39,nodors,length(odortimesfortimecourse)); % flynum HARD CODED responsesGlomByOdor=[]; responsesTimeCourseGlomByOdor=[]; flyodors=[]; flygloms=[]; glombyodorflies=[]; behaviorOcc=[]; behaviorpreOcc=[]; leftRightSplitVal=1000; firstSecondPanelSplitVal=500; flyNum=0; glomfound=zeros(1,39); oldFlyNum=9000; oldDate=[]; oldLobe=[]; for i=1:length(manualLabelledFolders) currname=manualLabelledFolders(i).name; if ~strcmp(currname(1),'.') if strcmp(currname(end),'1') currVol='Volumes'; else currVol='Volumes2'; end if strcmp(currname(end-2),'r') currLobe='rightLobe'; cL=2; else currLobe='leftLobe'; cL=1; end if strcmp(currname(1),'H') currDate=currname(8:13); else currDate=currname(1:6); end underscores=strfind(currname,'_'); currFlyNum=currname((underscores(1)+4):(underscores(2)-1)); if ~strcmp(currDate,oldDate) || ~strcmp(currFlyNum,oldFlyNum) flyNum=flyNum+1; end currFiles=dir([manualLabelHome '/' manualLabelledFolders(i).name]); currFiles=currFiles(3:end); for j=1:length(currFiles) cfcname=currFiles(j).name; if ~strcmp(cfcname(1),'.') % load manual label file, response data, and behavior data load([manualLabelHome '/' manualLabelledFolders(i).name '/' currFiles(j).name]) end end behaviorOcc=[behaviorOcc occ-preocc]; %behaviorOcc=[behaviorOcc occ]; behaviorpreOcc=[behaviorpreOcc preocc]; manualClusterLabels=clusterLabels; totalPutativeGloms(i)=length(manualClusterLabels); gs=median(grnResponse(:,:,odortimes),3); % use median gs=zeros(13,size(grnResponse,2)); for j=1:size(grnResponse,2) temp = squeeze(grnResponse(:,j,odortimes)-nanmedian(grnResponse(:,j,1:5),3)); gs(:,j)= transpose(max(temp')); end %gs=prctile(grnResponse(:,:,odortimes),75); % use percentile responseTemp=NaN*zeros(length(publishedOR.gh146glomerulusNames),nodors); responseTempTimeCourse=NaN*zeros(length(publishedOR.gh146glomerulusNames)*length(odortimesfortimecourse),size(grnResponse,1)); responsesTensorTemp=NaN*zeros(length(publishedOR.gh146glomerulusNames),size(grnResponse,1),length(odortimesfortimecourse)); labels{i}.manual=cell(1,length(manualClusterLabels)); % record manual labels for j=1:length(manualClusterLabels) labels{i}.manual{j}=manualClusterLabels{j}; end % fill in response matrix for j=1:length(manualClusterLabels) for k=1:length(publishedOR.gh146glomerulusNames) if strcmp(labels{i}.manual{j},publishedOR.gh146glomerulusNames{k}) responseTemp(k,:)=gs(:,j); glomsannotated(i)=glomsannotated(i)+1; glomfound(k)=glomfound(k)+1; for oo=1:nodors responseTempTimeCourse((((k-1)*length(odortimesfortimecourse)+1):k*length(odortimesfortimecourse)),oo)=grnResponse(oo,j,odortimesfortimecourse); responsesTensorTemp(k,oo,:)=grnResponse(oo,j,odortimesfortimecourse); end break end end end responseTempT=responseTemp'; responseTempTimeCourseT=responseTempTimeCourse'; responsesTensorFullyOrthogonal(flyNum,cL,:,:,:)=responsesTensorTemp; responsesGlomByOdor=[responsesGlomByOdor responseTempT(:)]; responsesTimeCourseGlomByOdor=[responsesTimeCourseGlomByOdor responseTempTimeCourseT(:)]; glombyodorflies=[glombyodorflies (flyNum+(cL-1)*leftRightSplitVal)]; oldFlyNum=currFlyNum; oldDate=currDate; oldLobe=currLobe; end end % % remove antenna lobes with fewer than X glomeruli annotated % glomthreshold=5; % toremove=find(glomsannotated<=glomthreshold); % responsesGlomByOdor(:,toremove)=[]; % responsesTimeCourseGlomByOdor(:,toremove)=[]; % glombyodorflies(toremove)=[]; % data points that correspond to each fly flyindices=cell(1,flyNum); flyindicesL=cell(1,flyNum); flyindicesR=cell(1,flyNum); for i=1:flyNum [temp temp2]=find(glombyodorflies==i); [tem3 temp4]=find(glombyodorflies==(i+leftRightSplitVal)); flyindices{i}=[temp2 temp4]; flyindicesL{i}=temp2; flyindicesR{i}=temp4; end % shuffle data points that correspond to each fly flyindicesShuffled=cell(1,flyNum); j=1; for i=randperm(flyNum) [temp temp2]=find(glombyodorflies==i); [tem3 temp4]=find(glombyodorflies==(i+leftRightSplitVal)); flyindicesShuffled{j}=[temp2 temp4]; j=j+1; end mycmap=distinguishable_colors(flyNum); %% perform pca on responses clear responsesNoResponseRemoved fracIn=0.1; % best results when fracIn is high, ~0.5, only using high confidence glomeruli medianResponseOrTimeCourse=1; % 1 for median response only, 0 for full time course if medianResponseOrTimeCourse responsesNoResponseRemoved=responsesGlomByOdor; else responsesNoResponseRemoved=responsesTimeCourseGlomByOdor; end % remove fully empty rows %totallyempty=~any(isfinite(responsesNoResponseRemoved),2); %responsesNoResponseRemoved(totallyempty,:)=[]; gNames=publishedOR.gh146glomerulusNames; glomsFound=glomfound; numFinite=sum(isfinite(responsesNoResponseRemoved),2); toRemove=find(numFinite/size(responsesNoResponseRemoved,2)<=fracIn); responsesNoResponseRemoved(toRemove,:)=[]; % remove air and ethanol responsesNoResponseRemoved(1:13:end)=0; responsesNoResponseRemoved(8:13:end)=0; if medianResponseOrTimeCourse temp=nodors; fp=toRemove(find(mod(toRemove,temp)==1)); glomsremoved=((fp-1)/temp)+1; gNames(glomsremoved)=[]; glomsFound(glomsremoved)=[]; else temp=length(odortimesfortimecourse)*nodors; fp=toRemove(find(mod(toRemove,temp)==1)); glomsremoved=((fp-1)/temp)+1; gNames(glomsremoved)=[]; end % REMOVE D gNames(1)=[]; responsesNoResponseRemoved(1:13,:)=[]; % % fill nans with mean for i=1:size(responsesNoResponseRemoved,1) for j=1:size(responsesNoResponseRemoved,2) if isnan(responsesNoResponseRemoved(i,j)) responsesNoResponseRemoved(i,j)=nanmean(responsesNoResponseRemoved(i,:)); end end end % fill missing values with linear interpolation % data=responsesNoResponseRemoved'; % dz=data; % dataFilled=fillWithRegressedValues(dz); % responsesNoResponseRemoved=dataFilled'; yesZscore=0; if yesZscore responsesNoResponseRemoved=responsesNoResponseRemoved'; responsesNoResponseRemoved=(responsesNoResponseRemoved-mean(responsesNoResponseRemoved))./std(responsesNoResponseRemoved); responsesNoResponseRemoved=responsesNoResponseRemoved'; end opt = statset('pca'); opt.Display='iter'; [COEFF, SCORE, LATENT, TSQUARED, EXPLAINED] = pca(responsesNoResponseRemoved','Options',opt); figure; %1 plot(cumsum(EXPLAINED),'o-','LineWidth',3) ylabel('Variance Explained (%)') xlabel('PC #') box off set(gca,'FontSize',15) % calculate glom contribution to each pc glomcontributionAbsolute=zeros(length(gNames),size(COEFF,2)); glomcontributionMean=zeros(length(gNames),size(COEFF,2)); odorcontributionAbsolute=zeros(nodors,size(COEFF,2)); odorcontributionMean=zeros(nodors,size(COEFF,2)); for i=1:length(gNames) if medianResponseOrTimeCourse temp=nodors; else temp=length(odortimesfortimecourse)*nodors; end currind=((i-1)*temp)+1; for j=1:size(COEFF,2) glomcontributionMean(i,j)=mean(COEFF(currind:(currind+temp-1),j)); glomcontributionAbsolute(i,j)=mean(abs(COEFF(currind:(currind+temp-1),j))); end end if medianResponseOrTimeCourse for i=1:nodors for j=1:size(COEFF,2) odorcontributionMean(i,j)=mean(COEFF(i:nodors:end,j)); odorcontributionAbsolute(i,j)=mean(abs(COEFF(i:nodors:end,j))); end end end figure; %2 imagesc(glomcontributionMean) set(gca,'ytick',1:length(gNames),'yticklabel',string(gNames),'FontSize',10) ytickangle(30) xlabel('PC #') set(gca,'FontSize',15) %% varianceToKeep=100; % percent of variance to keep co = SCORE; totalVarianceExplained=cumsum(EXPLAINED); pcsWithinVariance=find(totalVarianceExplained1 withinLLobe(j)=sqrt(sum((co(lcurr(1),1:pcstouse)-co(lcurr(2),1:pcstouse)).^2)); end if length(rcurr)>1 withinRLobe(j)=sqrt(sum((co(rcurr(1),1:pcstouse)-co(rcurr(2),1:pcstouse)).^2)); end if length(lcurr)>0 && length(rcurr)>0 withinDifferentLobe(j)=0; for kk=1:length(lcurr) for jj=1:length(rcurr) withinDifferentLobe(j)=withinDifferentLobe(j)+sqrt(sum((co(lcurr(kk),1:pcstouse)-co(rcurr(jj),1:pcstouse)).^2)); end end withinDifferentLobe(j)=withinDifferentLobe(j)/(length(lcurr)*length(rcurr)); end % calculate across-fly distances ltempsAcross=find(glombyodorflies~=(j)); ltempsAcross=ltempsAcross(find(glombyodorflies(ltempsAcross)~=(j+leftRightSplitVal))); ltempsAcross=ltempsAcross(find(glombyodorflies(ltempsAcross)leftRightSplitVal)); rcurrAcross=rtempsAcross; if length(lcurr)>0 acrossLLobe(j)=0; for jj=1:length(lcurrAcross) for kk=1:length(lcurr) acrossLLobe(j)=acrossLLobe(j)+sqrt(sum((co(lcurr(kk),1:pcstouse)-(co(lcurrAcross(jj),1:pcstouse))).^2)); end end acrossLLobe(j)=(acrossLLobe(j))/(length(lcurrAcross)*length(lcurr)); end if length(rcurr)>0 acrossRLobe(j)=0; for jj=1:length(rcurrAcross) for kk=1:length(rcurr) acrossRLobe(j)=acrossRLobe(j)+sqrt(sum((co(rcurr(kk),1:pcstouse)-(co(rcurrAcross(jj),1:pcstouse))).^2)); end end acrossRLobe(j)=(acrossRLobe(j))/(length(rcurrAcross)*length(rcurr)); end allAcross=[lcurrAcross rcurrAcross]; allCurr=[lcurr rcurr]; if length(lcurr)>0 && length(rcurr)>0 acrossAllLobe(j)=0; for jj=1:length(allAcross) for kk=1:length(allCurr) acrossAllLobe(j)=acrossAllLobe(j)+sqrt(sum((co(allCurr(kk),1:pcstouse)-(co(allAcross(jj),1:pcstouse))).^2)); end end acrossAllLobe(j)=acrossAllLobe(j)/(length(allAcross)*length(allCurr)); end end msize=10; lsize=3; % % % plot average across all odors for flies with both lobe data figure %3 hold on % plot(0,0,'Marker',m{1},'Color','k') % plot(0,0,'Marker',m{2},'Color','k') %legend('Left Lobe','Right Lobe') for j=1:(flyNum) ltemps=find(glombyodorflies==(j)); lcurr=ltemps(1:end); rtemps=find(glombyodorflies==(j+1000)); rcurr=rtemps(1:end); if length(lcurr)>0 && length(rcurr)>0 plot(co(lcurr(1:size(lcurr,2)),1),co(lcurr(1:size(lcurr,2)),2),'Color',mycmap(j,:),'Marker','*','LineWidth',lsize,'MarkerSize',msize); plot(co(rcurr(1:size(rcurr,2)),1),co(rcurr(1:size(rcurr,2)),2),'Color',mycmap(j,:),'Marker','o','LineWidth',lsize,'MarkerSize',msize); plot([co(lcurr(1:size(lcurr,2)),1)' co(rcurr(1:size(rcurr,2)),1)' co(lcurr(1),1)],[co(lcurr(1:size(lcurr,2)),2)' co(rcurr(1:size(rcurr,2)),2)' co(lcurr(1),2)],'Color',mycmap(j,:),'LineStyle','--','LineWidth',lsize,'MarkerSize',msize); end end xlabel('PC 1 Score') ylabel('PC 2 Score') set(gca,'FontSize',15) figure %4 hold on for j=1:(flyNum) ltemps=find(glombyodorflies==(j)); lcurr=ltemps(1:end); rtemps=find(glombyodorflies==(j+1000)); rcurr=rtemps(1:end); if length(lcurr)>0 && length(rcurr)>0 h3=plot(mean(co(lcurr(1:size(lcurr,2)),1)),mean(co(lcurr(1:size(lcurr,2)),2)),'Color',mycmap(j,:),'Marker','*','LineWidth',lsize,'MarkerSize',msize); %text(mean(co(lcurr(1:size(lcurr,2)),1)),mean(co(lcurr(1:size(lcurr,2)),2)),[num2str(j)],'FontSize',15) h4=plot(mean(co(rcurr(1:size(rcurr,2)),1)),mean(co(rcurr(1:size(rcurr,2)),2)),'Color',mycmap(j,:),'Marker','o','LineWidth',lsize,'MarkerSize',msize); h5=plot([mean(co(lcurr(1:size(lcurr,2)),1))' mean(co(rcurr(1:size(rcurr,2)),1))'],[mean(co(lcurr(1:size(lcurr,2)),2))' mean(co(rcurr(1:size(rcurr,2)),2))'],'Color',mycmap(j,:),'LineStyle','--','LineWidth',lsize,'MarkerSize',1); end end h1=plot(-100,-100,'Marker','o','MarkerSize',msize,'LineStyle','none','LineWidth',lsize,'Color','k'); h2=plot(-100,-100,'Marker','*','MarkerSize',msize,'LineStyle','none','LineWidth',lsize,'Color','k'); legend([h1 h2],{'Left Lobe','Right Lobe'}) legend boxoff box off axis([min(co(:,1)) max(co(:,1)) min(co(:,2)) max(co(:,2))]) xlabel('PC 1 Score') ylabel('PC 2 Score') set(gca,'FontSize',15) %Boxplots of distances % summarize across flies withinleft=withinLLobe; withinright=withinRLobe; withinacross=withinDifferentLobe; acrossleft=acrossLLobe; acrossright=acrossRLobe; acrossall=acrossAllLobe; figure %5 boxplot([withinleft(:) withinright(:) withinacross(:) acrossleft(:) acrossright(:) acrossall(:)]) ylabel('Distance in Coding Space') xlabels{1}='Within Fly (Left Lobe)'; xlabels{2}='Within Fly (Right Lobe)'; xlabels{3}='Within Fly (Opposite Lobes)'; xlabels{4}='Across Fly (Left Lobe)'; xlabels{5}='Across Fly (Right Lobe)'; xlabels{6}='Across Fly (Both Lobes)'; set(gca,'xtick',1:6,'xticklabel',xlabels,'FontSize',10) xtickangle(30) box off set(gca,'FontSize',15) %% use fitlm pcstouse=[1]; behaviorprediction=(SCORE(:,pcstouse)); flyTruePref=zeros(1,flyNum); flyPredictedPref=zeros(1,flyNum); ally=behaviorOcc'; linmodel=fitlm(behaviorprediction,ally); myprediction=predict(linmodel,behaviorprediction); figure %6 plot(myprediction,ally,'o','LineWidth',3) xlabel('Predicted Preference') ylabel('Measured Preference') set(gca,'FontSize',15) box off linmodel nactivity=zeros(flyNum,length(pcstouse)); for i=1:flyNum flyTruePref(i)=mean(ally(flyindices{i})); flyPredictedPref(i)=mean(mean(behaviorprediction(flyindices{i},:))); nactivity(i,:)=mean(behaviorprediction(flyindices{i},:)); end linmodel=fitlm(nactivity,flyTruePref); myprediction=predict(linmodel,nactivity); figure %7 plot(myprediction,flyTruePref,'.','Color',pcolor, 'LineWidth',3) for i=1:flyNum hold on %text(myprediction(i)+0.01,flyTruePref(i),num2str(i),'FontSize',15) end [r p]=corrcoef(myprediction,flyTruePref); text(-.25,-.25,['r = ' num2str(r(1,2),'%2.2f')],'FontSize',15) text(-.3,-.25,['p = ' num2str(p(1,2),'%2.2f')],'FontSize',15) xlabel('Predicted Preference') ylabel('Measured Preference') set(gca,'FontSize',15) box on axis([-.6 .3 -.6 .3]) axis square linmodel % FIG 1l figure %8 plot((myprediction-mean(myprediction))/(std(myprediction)),(flyTruePref-mean(flyTruePref))/(std(flyTruePref)),'.','Color',pcolor, 'LineWidth',3) for i=1:flyNum hold on %text(myprediction(i)+0.01,flyTruePref(i),num2str(i),'FontSize',15) end [r p]=corrcoef((myprediction-mean(myprediction))/(std(myprediction)),(flyTruePref-mean(flyTruePref))/(std(flyTruePref))); text(-.25,-.25,['r = ' num2str(r(1,2),'%2.2f')],'FontSize',15) text(-.3,-.25,['p = ' num2str(p(1,2),'%2.2f')],'FontSize',15) set(gca,'FontSize',15) box on axis([-2 3 -2 3]) xlabel('predicted preference (z-scored)') ylabel('measured preference (z-scored)') set(gca,'xtick','') set(gca,'ytick','') axis square beta=linmodel.Coefficients.Estimate; PCContribution=COEFF(:,pcstouse); figure; %9 plot(PCContribution,'*','LineWidth',2,'MarkerSize',8) hold on plot(zeros(1,length(PCContribution(:,1))),'k--','LineWidth',3) j=1; for i=1:nodors:length(PCContribution) plot((i-0.5)*ones(1,5), linspace(min(PCContribution),max(PCContribution),5),'k--','LineWidth',2) %text(i+floor(nodors/3),min(PCContribution),num2str(glomsFound(j)),'FontSize',15) j=j+1; end set(gca,'xtick',(1:nodors:length(PCContribution))+floor(nodors/2),'xticklabel',string(gNames),'FontSize',10) xtickangle(30) ylabel('PC loadings') box off set(gca,'FontSize',15) currpc=PCContribution; %save trainpc currpc gNames %save trainDataModel linmodel currpc gNames %% use average across all dimensions behaviorprediction=transpose(mean(responsesNoResponseRemoved)); flyTruePref=zeros(1,flyNum); flyPredictedPref=zeros(1,flyNum); ally=behaviorOcc'; linmodel=fitlm(behaviorprediction,ally); myprediction=predict(linmodel,behaviorprediction); figure %10 plot(myprediction,ally,'o','LineWidth',3) xlabel('Predicted Preference') ylabel('Measured Preference') set(gca,'FontSize',15) box off linmodel nactivity=zeros(flyNum,1); for i=1:flyNum flyTruePref(i)=mean(ally(flyindices{i})); flyPredictedPref(i)=mean(mean(behaviorprediction(flyindices{i},:))); nactivity(i,:)=mean(behaviorprediction(flyindices{i},:)); end linmodel=fitlm(nactivity,flyTruePref); myprediction=predict(linmodel,nactivity); figure %11 plot(myprediction,flyTruePref,'o','Color',pcolor, 'LineWidth',3) for i=1:flyNum hold on %text(myprediction(i)+0.01,flyTruePref(i),num2str(i),'FontSize',15) end [r p]=corrcoef(myprediction,flyTruePref); text(-.25,-.25,['r = ' num2str(r(1,2),'%2.2f')],'FontSize',15) text(-.3,-.25,['p = ' num2str(p(1,2),'%2.2f')],'FontSize',15) xlabel('Predicted Preference') ylabel('Measured Preference') set(gca,'FontSize',15) box off linmodel beta=linmodel.Coefficients.Estimate; %save trainpc currpc gNames save trainDataModel_averageAllDimensions linmodel gNames %% bootstrap warning('off') iters=250; bootstrapsamples=flyNum; highestPCtouse=5; yesZscore=0; % zscore data matrix before performing PCA (0=no, 1=yes) fracIn=0.1; % best results when fracIn is high, ~0.5, only using high confidence glomeruli medianResponseOrTimeCourse=1; % 1 for median response only, 0 for full time course if medianResponseOrTimeCourse responsesNoResponseRemoved=responsesGlomByOdor; else responsesNoResponseRemoved=responsesTimeCourseGlomByOdor; end gNames=publishedOR.gh146glomerulusNames; glomsFound=glomfound; numFinite=sum(isfinite(responsesNoResponseRemoved),2); toRemove=find(numFinite/size(responsesNoResponseRemoved,2)<=fracIn); responsesNoResponseRemoved(toRemove,:)=[]; if medianResponseOrTimeCourse temp=nodors; fp=toRemove(find(mod(toRemove,temp)==1)); glomsremoved=((fp-1)/temp)+1; gNames(glomsremoved)=[]; glomsFound(glomsremoved)=[]; else temp=length(odortimesfortimecourse)*nodors; fp=toRemove(find(mod(toRemove,temp)==1)); glomsremoved=((fp-1)/temp)+1; gNames(glomsremoved)=[]; end % remove D gNames(1)=[]; responsesNoResponseRemoved(1:13,:)=[]; % remove air and ethanol %responsesNoResponseRemoved(1:nodors:end,:)=0; %responsesNoResponseRemoved(8:nodors:end,:)=0; % % fill nans with mean for i=1:size(responsesNoResponseRemoved,1) for j=1:size(responsesNoResponseRemoved,2) if isnan(responsesNoResponseRemoved(i,j)) responsesNoResponseRemoved(i,j)=nanmean(responsesNoResponseRemoved(i,:)); end end end testR=zeros(highestPCtouse,iters); testRshuffled=zeros(highestPCtouse,iters); testR2=zeros(highestPCtouse,iters); testR2shuffled=zeros(highestPCtouse,iters); testNRSS=zeros(highestPCtouse,iters); % normalized residual sum of squares averagePredictor=cell(highestPCtouse,1); bestPredictorLoading=cell(iters,1); % loadings for the best predictor bestPredictor=zeros(iters,1); % PC which is the best predictor bestPredictorR=zeros(iters,1); pcPredictorRank=zeros(iters,highestPCtouse); vExplained=cell(1,highestPCtouse); relativeOctMchactivation=zeros(iters,highestPCtouse); rawRelativeOctactivation=zeros(iters,highestPCtouse,length(gNames)); rawRelativeMchactivation=zeros(iters,highestPCtouse,length(gNames)); rawPC=cell(1,highestPCtouse); bestPCOctMchactivation=zeros(1,iters); bestPCOctMchcorrelation=zeros(1,iters); octmchloadingcorr=zeros(iters,highestPCtouse); % correlation between oct and mch loading for each pc for jjj=1:iters % select bootstrap sample flies trainflies=zeros(1,bootstrapsamples); for i=1:bootstrapsamples trainflies(i)=1+round((flyNum-1)*rand(1)); end flyindicestrain=cell(1,length(trainflies)); datapts=cell(1,length(trainflies)); lastdatapt=0; for i=1:length(trainflies) [temp temp2]=find(glombyodorflies==trainflies(i)); [tem3 temp4]=find(glombyodorflies==(trainflies(i)+leftRightSplitVal)); flyindicestrain{i}=[temp2 temp4]; datapts{i}=[(lastdatapt+1):(lastdatapt+length(flyindicestrain{i}))]; lastdatapt=(lastdatapt+length(flyindicestrain{i})); end traindata=[]; for i=1:length(trainflies) for j=1:length(flyindicestrain{i}) traindata=[traindata responsesNoResponseRemoved(:,flyindicestrain{i}(j))]; end end [COEFF, SCORE, LATENT, TSQUARED, EXPLAINED] = pca(traindata'); vExplained{jjj}=EXPLAINED; co = SCORE; rawActivation{jjj}=zeros(size(COEFF,1),highestPCtouse); for pcstouse=1:highestPCtouse currpc=COEFF(:,pcstouse); if jjj==1 rawPC{pcstouse}=zeros(size(COEFF,1),iters); end rawPC{pcstouse}(:,jjj)=currpc; % generate linear model based on current PC behaviorprediction=(traindata'*currpc); flyTruePref=zeros(1,length(trainflies)); flyTruePrefShuffled=zeros(1,length(trainflies)); ally=behaviorOcc; nactivity=zeros(length(trainflies),length(pcstouse)); shuffledindtrain=randperm(length(trainflies)); % for comparing to shuffled null model for i=1:length(trainflies) flyTruePref(i)=mean(ally(flyindicestrain{i})); flyTruePrefShuffled(i)=mean(ally(flyindicestrain{shuffledindtrain(i)})); nactivity(i,:)=mean(behaviorprediction(datapts{i},:)); end linmodel=fitlm(nactivity,flyTruePref); linmodelShuffled=fitlm(nactivity,flyTruePrefShuffled); beta=linmodel.Coefficients.Estimate; PCContribution=currpc*beta(2:end); mycorr=corrcoef(nactivity,flyTruePref); mycorrShuffled=corrcoef(nactivity,flyTruePrefShuffled); myr2=linmodel.Rsquared.Ordinary; myr2shuffled=linmodelShuffled.Rsquared.Ordinary; if jjj==1 %averagePredictor{pcstouse}=PCContribution'; averagePredictor{pcstouse}=currpc'; else %averagePredictor{pcstouse}=averagePredictor{pcstouse}+PCContribution'; averagePredictor{pcstouse}=averagePredictor{pcstouse}+currpc'; end testR(pcstouse,jjj)=mycorr(1,2); testRshuffled(pcstouse,jjj)=mycorrShuffled(1,2); testR2(pcstouse,jjj)=myr2; testR2shuffled(pcstouse,jjj)=myr2shuffled; end % find best predictor [val ind]=max(testR(:,jjj)); bestPredictor(jjj)=ind; bestPredictorLoading{jjj}=COEFF(:,ind); bestPredictorR(jjj)=val; [vals inds]=sort(testR(:,jjj),'descend'); pcPredictorRank(jjj,:)=inds; bestpc=COEFF(:,ind); bestPCOctMchactivation(jjj)=mean((bestpc(2:13:end)-bestpc(11:13:end))./(1+(abs(bestpc(2:13:end))+abs(bestpc(11:13:end))))); bestPCOctMchcorrelation(jjj)=octmchloadingcorr(jjj,ind); if mod(jjj,10)==0 disp(['iteration ' num2str(jjj)]) end % if mod(jjj,20)==0 % boxplot(testR(:,1:jjj)') % xlabel('PC #') % ylabel('Test Data r value') % box off % set(gca,'FontSize',15) % drawnow % end %catch %end end for i=1:highestPCtouse averagePredictor{i}=averagePredictor{i}/iters; end % % figure % subplot(2,1,1) % distributionPlot(testR','histOpt',1,'colormap',1-gray(64),'showMM',0) % xlabel('PC used for linear model') % ylabel('Unshuffled R') % box off % set(gca,'FontSize',15) % subplot(2,1,2) % distributionPlot(testRshuffled','histOpt',1,'colormap',1-gray(64),'showMM',0) % xlabel('PC used for linear model') % ylabel('Shuffled R') % box off % set(gca,'FontSize',15) % % figure % subplot(1,2,1) % distributionPlot(testR2','histOpt',1,'colormap',1-gray(64),'showMM',0) % xlabel('PC used for linear model') % ylabel('Unshuffled R^2') % axis([0 highestPCtouse+1 0 0.8]) % box off % set(gca,'FontSize',15) % subplot(1,2,2) % distributionPlot(testR2shuffled','histOpt',1,'colormap',1-gray(64),'showMM',0) % xlabel('PC used for linear model') % ylabel('Shuffled R^2') % axis([0 highestPCtouse+1 0 0.8]) % box off % set(gca,'FontSize',15) % figure 12 violinPlot(testR2',pcolor) xlabel('PC used for linear model') ylabel('Unshuffled R^2') axis([0 highestPCtouse+1 0 1]) box off set(gca,'FontSize',15) % figure 13 violinPlot(testR2shuffled',pcolor) xlabel('PC used for linear model') ylabel('Shuffled R^2') axis([0 highestPCtouse+1 0 1]) box off set(gca,'FontSize',15) testR2t=testR2'; testR2shuffledt=testR2shuffled'; labs=[ones(1,iters) 2*ones(1,iters) 3*ones(1,iters) 4*ones(1,iters) 5*ones(1,iters)]; % FIG 1k PN OCT-AIR preference prediction figure %14 boxplot(testR2t(:),labs,'plotstyle','compact','BoxStyle','filled','Colors',pcolor,'medianstyle','target','symbol','','outliersize',1) xlabel('PC used for linear model') ylabel('Unshuffled R^2') axis([0 highestPCtouse+1 0 0.75]) set(gca,'FontSize',15) set(gca,'xtick','') set(gca,'ytick','') figure %15 boxplot(testR2shuffledt(:),labs,'plotstyle','compact','BoxStyle','filled','Colors',pcolor,'medianstyle','target','symbol','','outliersize',1) xlabel('PC used for linear model') ylabel('Shuffled R^2') axis([0 highestPCtouse+1 0 0.75]) set(gca,'FontSize',15) set(gca,'xtick','') set(gca,'ytick','') %% run train/test many times and save replicate data warning('off') iters=50; highestPCtouse=5; yesZscore=0; % zscore data matrix before performing PCA (0=no, 1=yes) normalizeResponse=0; fracIn=0.4; % best results when fracIn is high, ~0.5, only using high confidence glomeruli medianResponseOrTimeCourse=1; % 1 for median response only, 0 for full time course if medianResponseOrTimeCourse responsesNoResponseRemoved=responsesGlomByOdor; else responsesNoResponseRemoved=responsesTimeCourseGlomByOdor; end gNames=publishedOR.gh146glomerulusNames; glomsFound=glomfound; numFinite=sum(isfinite(responsesNoResponseRemoved),2); toRemove=find(numFinite/size(responsesNoResponseRemoved,2)<=fracIn); responsesNoResponseRemoved(toRemove,:)=[]; if medianResponseOrTimeCourse temp=nodors; fp=toRemove(find(mod(toRemove,temp)==1)); glomsremoved=((fp-1)/temp)+1; gNames(glomsremoved)=[]; glomsFound(glomsremoved)=[]; else temp=length(odortimesfortimecourse)*nodors; fp=toRemove(find(mod(toRemove,temp)==1)); glomsremoved=((fp-1)/temp)+1; gNames(glomsremoved)=[]; end % remove air and ethanol %responsesNoResponseRemoved(1:nodors:end,:)=0; %responsesNoResponseRemoved(8:nodors:end,:)=0; % normalize responsesNoResponseRemoved if normalizeResponse for i=1:size(responsesNoResponseRemoved,2) responsesNoResponseRemoved(:,i)=responsesNoResponseRemoved(:,i)/nanmean(abs(responsesNoResponseRemoved(:,i))); end end testR=zeros(highestPCtouse,iters); testRshuffled=zeros(highestPCtouse,iters); testNRSS=zeros(highestPCtouse,iters); % normalized residual sum of squares averagePredictor=cell(highestPCtouse,1); bestPredictorLoading=cell(iters,1); % loadings for the best predictor bestPredictor=zeros(iters,1); % PC which is the best predictor bestPredictorR=zeros(iters,1); pcPredictorRank=zeros(iters,highestPCtouse); vExplained=cell(1,highestPCtouse); relativeOctMchactivation=zeros(iters,highestPCtouse); rawRelativeOctactivation=zeros(iters,highestPCtouse,length(gNames)); rawRelativeMchactivation=zeros(iters,highestPCtouse,length(gNames)); rawPC=cell(1,highestPCtouse); bestPCOctMchactivation=zeros(1,iters); bestPCOctMchcorrelation=zeros(1,iters); octmchloadingcorr=zeros(iters,highestPCtouse); % correlation between oct and mch loading for each pc for jjj=1:iters % Split data into randomly assigned train/test sets testsize=50; trainsize=100-testsize; randomizedOrder=randperm(flyNum); holdoutflies=randomizedOrder(1:round((length(randomizedOrder)*testsize/100))); trainflies=setxor(1:flyNum,holdoutflies); traintoremove=[]; for i=1:length(holdoutflies) temp=find(glombyodorflies==(holdoutflies(i))); temp2=find(glombyodorflies==(holdoutflies(i)+leftRightSplitVal)); traintoremove=[traintoremove temp temp2]; end testtoremove=setxor(1:length(glombyodorflies),traintoremove); flyindicestrain=cell(1,length(trainflies)); glombyodorfliestrain=glombyodorflies; glombyodorfliestrain(traintoremove)=[]; for i=1:length(trainflies) [temp temp2]=find(glombyodorfliestrain==trainflies(i)); [tem3 temp4]=find(glombyodorfliestrain==(trainflies(i)+leftRightSplitVal)); flyindicestrain{i}=[temp2 temp4]; end flyindicestest=cell(1,length(holdoutflies)); glombyodorfliestest=glombyodorflies; glombyodorfliestest(testtoremove)=[]; for i=1:length(holdoutflies) [temp temp2]=find(glombyodorfliestest==holdoutflies(i)); [tem3 temp4]=find(glombyodorfliestest==(holdoutflies(i)+leftRightSplitVal)); flyindicestest{i}=[temp2 temp4]; end % % fill nans with mean for i=1:size(responsesNoResponseRemoved,1) for j=1:size(responsesNoResponseRemoved,2) if isnan(responsesNoResponseRemoved(i,j)) responsesNoResponseRemoved(i,j)=nanmean(responsesNoResponseRemoved(i,:)); end end end testdata=responsesNoResponseRemoved; testdata(:,testtoremove)=[]; traindata=responsesNoResponseRemoved; traindata(:,traintoremove)=[]; if yesZscore % z-score train and test data ztrain=traindata'; ztrain=(ztrain-mean(ztrain))./(std(ztrain)); ztrain=ztrain'; ztest=testdata'; ztest=(ztest-mean(ztest))./(std(ztest)); ztest=ztest'; traindata=ztrain; testdata=ztest; end [COEFF, SCORE, LATENT, TSQUARED, EXPLAINED] = pca(traindata'); vExplained{jjj}=EXPLAINED; co = SCORE; rawActivation{jjj}=zeros(size(COEFF,1),highestPCtouse); for pcstouse=1:highestPCtouse currpc=COEFF(:,pcstouse); if jjj==1 rawPC{pcstouse}=zeros(size(COEFF,1),iters); end rawPC{pcstouse}(:,jjj)=currpc; % generate linear model based on current PC behaviorprediction=(traindata'*currpc); flyTruePref=zeros(1,length(trainflies)); flyTruePrefShuffled=zeros(1,length(trainflies)); ally=behaviorOcc'; ally(traintoremove)=[]; nactivity=zeros(length(trainflies),length(pcstouse)); shuffledindtrain=randperm(length(trainflies)); % for comparing to shuffled null model for i=1:length(trainflies) flyTruePref(i)=mean(ally(flyindicestrain{i})); flyTruePrefShuffled(i)=mean(ally(flyindicestrain{shuffledindtrain(i)})); nactivity(i,:)=mean(behaviorprediction(flyindicestrain{i},:)); end linmodel=fitlm(nactivity,flyTruePref); beta=linmodel.Coefficients.Estimate; PCContribution=currpc*beta(2:end); % apply predictor to testdata and evaluate how held out points fit in % model %behaviorprediction=testdata'*PCContribution; behaviorprediction=testdata'*currpc; testflyTruePref=zeros(1,length(holdoutflies)); testflyTruePrefShuffled=zeros(1,length(holdoutflies)); flyPredictedPref=zeros(1,length(holdoutflies)); ally=behaviorOcc'; ally(testtoremove)=[]; testnactivity=zeros(length(holdoutflies),1); shuffledindtest=randperm(length(holdoutflies)); % for comparing to shuffled null model for i=1:length(holdoutflies) testflyTruePref(i)=mean(ally(flyindicestest{i})); testflyTruePrefShuffled(i)=mean(ally(flyindicestest{shuffledindtest(i)})); testnactivity(i,:)=mean(behaviorprediction(flyindicestest{i},:)); end mytestprediction=predict(linmodel,testnactivity); myr=corrcoef(mytestprediction,testflyTruePref); myrshuffled=corrcoef(mytestprediction,testflyTruePref(randperm(length(testflyTruePref)))); if jjj==1 %averagePredictor{pcstouse}=PCContribution'; averagePredictor{pcstouse}=currpc'; else %averagePredictor{pcstouse}=averagePredictor{pcstouse}+PCContribution'; averagePredictor{pcstouse}=averagePredictor{pcstouse}+currpc'; end testR(pcstouse,jjj)=myr(1,2); testRshuffled(pcstouse,jjj)=myrshuffled(1,2); testNRSS(pcstouse,jjj)=nanmean((((testflyTruePref-mytestprediction')).^2)); relativeOctMchactivation(jjj,pcstouse)=mean((currpc(2:13:end)-currpc(11:13:end))./(1+(abs(currpc(2:13:end))+abs(currpc(11:13:end))))); rawRelativeOctactivation(jjj,pcstouse,:)=currpc(2:13:end); rawRelativeMchactivation(jjj,pcstouse,:)=currpc(11:13:end); [octmchcorr asd]=corr(currpc(2:13:end),currpc(11:13:end)); octmchloadingcorr(jjj,pcstouse)=octmchcorr; end % find best predictor [val ind]=max(testR(:,jjj)); bestPredictor(jjj)=ind; bestPredictorLoading{jjj}=COEFF(:,ind); bestPredictorR(jjj)=val; [vals inds]=sort(testR(:,jjj),'descend'); pcPredictorRank(jjj,:)=inds; bestpc=COEFF(:,ind); bestPCOctMchactivation(jjj)=mean((bestpc(2:13:end)-bestpc(11:13:end))./(1+(abs(bestpc(2:13:end))+abs(bestpc(11:13:end))))); bestPCOctMchcorrelation(jjj)=octmchloadingcorr(jjj,ind); if mod(jjj,10)==0 disp(['iteration ' num2str(jjj)]) end % if mod(jjj,20)==0 % boxplot(testR(:,1:jjj)') % xlabel('PC #') % ylabel('Test Data r value') % box off % set(gca,'FontSize',15) % drawnow % end %catch %end end for i=1:highestPCtouse averagePredictor{i}=averagePredictor{i}/iters; end myr2=(testR.^2').*sign(testR'); myr2shuffled=(testRshuffled.^2').*sign(testRshuffled'); % figure % subplot(1,2,1) % distributionPlot(myr2,'histOpt',1,'colormap',1-gray(64),'showMM',0) % xlabel('PC used for linear model') % ylabel('Unshuffled R^2') % axis([0 highestPCtouse+1 -0.95 0.95]) % box off % set(gca,'FontSize',15) % subplot(1,2,2) % distributionPlot(myr2shuffled,'histOpt',1,'colormap',1-gray(64),'showMM',0) % xlabel('PC used for linear model') % ylabel('Shuffled R^2') % axis([0 highestPCtouse+1 -0.95 0.95]) % box off % set(gca,'FontSize',15) % % figure %16 distributionPlot(myr2,'histOpt',1,'colormap',pcolor,'showMM',0) xlabel('PC used for linear model') ylabel('Unshuffled R^2') axis([0 highestPCtouse+1 -1 1]) box off set(gca,'FontSize',15) figure %17 distributionPlot(myr2shuffled,'histOpt',1,'colormap',pcolor,'showMM',0) xlabel('PC used for linear model') ylabel('Shuffled R^2') axis([0 highestPCtouse+1 -1 1]) box off set(gca,'FontSize',15) % figure 18 violinPlot(myr2,pcolor) xlabel('PC used for linear model') ylabel('Unshuffled R^2') axis([0 highestPCtouse+1 -1 1]) box off set(gca,'FontSize',15) % figure 19 violinPlot(myr2shuffled,pcolor) xlabel('PC used for linear model') ylabel('Shuffled R^2') axis([0 highestPCtouse+1 -1 1]) box off set(gca,'FontSize',15) % % figure % subplot(2,2,1) % boxplot(testR') % xlabel('PC used for linear model') % ylabel('Correlation between predicted vs. true preference (test data)') % box off % set(gca,'FontSize',15) % % subplot(2,2,2) % hist(bestPredictor) % xlabel('Best predictor PC #') % boxplot(pcPredictorRank) % xlabel('PC #') % ylabel('PC Predictor Rank') % % subplot(2,2,3) % hist(bestPredictorR) % xlabel('Best predictor''s r-value') % % bploadings=zeros(1,size(COEFF,1)); % allbpl=zeros(iters,size(COEFF,1)); % for j=1:iters % bploadings=bploadings+bestPredictorLoading{j}'; % allbpl(j,:)=bestPredictorLoading{j}'; % end % bploadings=bploadings/iters; % % % subplot(2,2,4) % plot(bploadings,'*','LineWidth',3,'MarkerSize',10) % hold on % plot(zeros(1,size(COEFF,1)),'k--','LineWidth',3) % j=1; % for i=1:nodors:length(averagePredictor{4}) % plot((i-0.5)*ones(1,100), linspace(min(bploadings),max(bploadings)),'k--','LineWidth',2) % text(i+floor(nodors/3),min(bploadings),num2str(glomsFound(j)),'FontSize',15) % j=j+1; % end % set(gca,'xtick',(1:nodors:length(bploadings))+floor(nodors/2),'xticklabel',string(gNames),'FontSize',10) % xtickangle(30) % ylabel('Best predictor''s loadings') % box off % set(gca,'FontSize',15) % % % % figure % % boxplot(relativeOctMchactivation) % % xlabel('PC #') % % ylabel('Abs(Oct-MCH) Loading') % % set(gca,'FontSize',15) % % % % figure % % hist(relativeOctMchactivation(:,4),50) % % xlabel('PC #') % % ylabel('Abs(Oct-MCH) Loading') % % set(gca,'FontSize',15) % % % % figure % % distributionPlot(relativeOctMchactivation,'histOpt',1,'colormap',1-gray(64)); % % xlabel('PC #') % % ylabel('Average OCT-MCH Loading') % % set(gca,'FontSize',15) % % % show best predictor loadings for each iteration % bpls=zeros(length(bestPredictorLoading{1}),iters); % oa=zeros(1,iters); % for j=1:iters % bpls(:,j)=bestPredictorLoading{j}'; % oa(j)=corr(bestPredictorLoading{j}(2:13:end),bestPredictorLoading{j}(11:13:end)); % end % figure; % imagesc(bpls) % xlabel('iteration') % ylabel('dimension') % % figure; hist(oa) % % ex=zeros(1,30); % for j=1:iters % ex=ex+vExplained{j}(1:30)'; % end % ex=ex/iters; % figure % plot(log10(ex),'LineWidth',3) % xlabel('PC #') % ylabel('log10(Variance Explained)') % set(gca,'FontSize',20) % % % run kmeans on allbpl (how many "best pcs" are there) % [IDX, bestpredictorC, SUMD]= kmeans(allbpl, 20); % figure; % imagesc(bestpredictorC) % xlabel('Dimensions') % ylabel('Predictor Class (from kmeans)') % title('Best PC predictor loadings (all)') % set(gca,'FontSize',15) % % % make a "best predictor matrix" % bestpredictors=bestpredictorC(1,:); % for i=2:size(bestpredictorC,1) % for j=1:size(bestpredictors,1) % temp=corrcoef(bestpredictorC(i,:),bestpredictors(j,:)); % temp2=temp(1,2); % if abs(temp2)>0.4 % if temp2>0 % bestpredictors(j,:)=(bestpredictors(j,:)+bestpredictorC(i,:))/2; % else % bestpredictors(j,:)=(bestpredictors(j,:)-bestpredictorC(i,:))/2; % end % break % elseif j==size(bestpredictors,1) % bestpredictors=[bestpredictors; bestpredictorC(i,:)]; % end % % end % end % % figure; % imagesc(bestpredictors) % xlabel('Dimensions') % ylabel('Predictor Class (from kmeans)') % title('Best PC predictor loadings (clustered)') % set(gca,'FontSize',15) % % % % run kmeans on each rawPC sweep to find possibilities % maxK=20; % for i=1:highestPCtouse % [IDX, C, SUMD]= kmeans(rawPC{i}', maxK); % kidx{i}=IDX; % kc{i}=C; % end % figure % for i=1:highestPCtouse % subplot(2,5,i) % boxplot(testR(i,:),kidx{i}) % ylabel('Measured-Predicted Fit Correlation') % title(['PC ' num2str(i)]) % end % % figure % for i=1:highestPCtouse % subplot(2,5,i) % % boxplot(octmchloadingcorr(:,i),kidx{i}) % ylabel('OCT-MCH correlation') % title(['PC ' num2str(i)]) % end % % % go through all PCs and select each one which led to a behavior prediction % predcorr=0.2; % select PCs with behavior prediction correlation higher than this number % bestOfPc=[]; % for i=1:highestPCtouse % for j=1:maxK % if median(testR(i,kidx{i}==j))>predcorr % if bestOfPc % bestOfPc=kc{i}(j,:); % else % bestOfPc=[bestOfPc; kc{i}(j,:)]; % end % disp(['saving PC# ' num2str(i) ' cluster ' num2str(j)]) % end % end % end % % % make a "best of predictor matrix" % bestpredictorsAll=bestOfPc(1,:); % for i=2:size(bestOfPc,1) % for j=1:size(bestpredictorsAll,1) % temp=corrcoef(bestOfPc(i,:),bestpredictorsAll(j,:)); % temp2=temp(1,2); % if abs(temp2)>0.6 % if temp2>0 % bestpredictorsAll(j,:)=(bestpredictorsAll(j,:)+bestOfPc(i,:))/2; % else % bestpredictorsAll(j,:)=(bestpredictorsAll(j,:)-bestOfPc(i,:))/2; % end % break % elseif j==size(bestpredictorsAll,1) % bestpredictorsAll=[bestpredictorsAll; bestOfPc(i,:)]; % end % % end % end % % figure; % imagesc(bestpredictorsAll) % xlabel('Dimensions') % ylabel('Predictor Class (from kmeans)') % title('Best PC predictor loadings (clustered)') % set(gca,'FontSize',15)