% Plot panels from the boostrapped KEGG enrichment analysis. % 1) Create bootstrapped shuffled and unshuffled average minimum p-value bar plots. % 2) Create bootstrapped average number metrics hit by average minimum p-value scatter plots. % 3) Create average minimum p-value paired dot plots. % 4) Create KEGG pathway by gene boostrapped heatmaps split by a priori groups. fdir = uigetdir(pwd,'Select decathlon_paper_data directory'); D = load_decathlon_structs(fdir,'D_enrichment_results'); % generate p-value bar plots for shuffled and unshuffled data figure; for i=1:numel(D) subplot(numel(D),1,i); hold on; h1 = barh(flip(D(i).avg_min_pval),'FaceColor',[.6 .6 .6], 'EdgeColor', 'none'); h2 = barh(flip(D(i).shuffled_avg_min_pval),'FaceColor',[0 0 0], 'EdgeColor', 'none'); set(gca,'YTick',1:numel(D(i).cat_labels),... 'YTickLabel',flip(D(i).cat_labels),'TickLength',[0 0],'XLim',[0 15]); legend([h1;h2],{'unshuffled','shuffled'},'Location','SouthEast'); title(sprintf('decathlon %i',i)); xlabel('-log_{10}[p-value]'); ylabel('enrichment category'); ax = gca; ax.YAxis.FontSize = 6; ax.YAxis.Label.FontSize = 10; end % enrichment p-value x num metrics scatter plot figure; for i=1:numel(D) % scatter unshuffled data subplot(numel(D),1,i); hold on; lh1 = pretty_scatter(D(i).avg_metrics_hit,D(i).avg_min_pval,[.6 .6 .6],'MarkerSize',3); title(sprintf('decathlon %i',i)); xlabel('num metrics'); ylabel('-log_{10}[p]'); % apply text labels to significant nodes p_filt = find(D(i).avg_min_pval>2); x = D(i).avg_metrics_hit; y = D(i).avg_min_pval; for j=1:numel(p_filt) text(x(p_filt(j))-.2,y(p_filt(j)),D(i).cat_labels(p_filt(j)),... 'HorizontalAlignment','right','FontSize',6); end end % pair dot plot pairs = unique_idx_pairs(numel(D),true); D_p = D; for i=1:size(pairs,1) [p_a,p_b] = ismember(D_p(pairs(i,1)).cat_labels,D_p(pairs(i,2)).cat_labels); D_p(pairs(i,1)).avg_min_pval = D_p(pairs(i,1)).avg_min_pval(p_a); D_p(pairs(i,2)).avg_min_pval = D_p(pairs(i,2)).avg_min_pval(p_b(p_b>0)); D_p(pairs(i,1)).cat_labels = D_p(pairs(i,1)).cat_labels(p_a); D_p(pairs(i,2)).cat_labels = D_p(pairs(i,2)).cat_labels(p_b(p_b>0)); end figure; for i=1:size(pairs,1) subplot(1,size(pairs,1),i); a = D_p(pairs(i,1)).avg_min_pval; b = D_p(pairs(i,2)).avg_min_pval; plot_pair_dot(a,b,'k',[]); xticks = arrayfun(@(ii) sprintf('D%i',ii),pairs(i,:),'UniformOutput',false); set(gca,'XTick',[1 2],'XTickLabel',xticks,'YLim',[0 15]); title('unshuffled'); ylabel('-log_{10}[p-value]'); hold on; for j=1:numel(D_p(pairs(i,2)).cat_labels) text(2.1,b(j),D_p(pairs(i,2)).cat_labels{j},'FontSize',6); end end % ---- PLOT KEGG PATHWAY x BEHAVIORAL METRIC HEATMAPS ---- % % load behavioral data D_b = load_decathlon_structs(fdir,'D_als_filled'); % get field sorting order for i=1:numel(D_b) fields = D_b(i).fields; [a,m,d] = parse_fieldnames(fields); fields = cellfun(@(a,m) sprintf('%s %s',a,m),a,m,'UniformOutput',false); is_circ = strcmp(a,'Circadian'); fields(is_circ) = cellfun(@(s,d) sprintf('%s (%i)',s,d),... fields(is_circ),num2cell(d(is_circ)),'UniformOutput',false); D_b(i).fields = fields; % sort by apriori group for d1, then match d2 to d1 [~, grp_name, grp_idx] = group_apriori_fields(D_b(i)); D_b(i).fields = D_b(i).fields(cat(1,grp_idx{:})); D_b(i).data = D_b(i).data(:,cat(1,grp_idx{:})); end for i=1:numel(D) [~, ~, grp_idx] = group_apriori_fields(D_b(i)); figure; imagesc(D(i).p_metric); colormap(flip(bone)); caxis([0 1]); cb = colorbar; cb.Label.String = 'Bootstrap probability'; title(sprintf('decathlon batch #%i',i)); set(gca,'YTick',1:numel(D(i).cat_labels),'YTickLabels',... D(i).cat_labels,'FontSize',8,'TickLength',[0 0],... 'XTick',1:numel(D(i).metric_labels),'XTickLabels',... pretty_labels(D(i).metric_labels),... 'XTickLabelRotation',90); id_group = cellfun(@(gi,i) repmat(i,numel(gi),1),... grp_idx, num2cell(1:numel(grp_idx))', 'UniformOutput', false); id_group = cat(1,id_group{:}); % create patch coordinates for groups % create patches for a priori groups and assays vx_groups = repmat([0;0;1;1;0],1,numel(grp_idx)); vy_groups = NaN(5,numel(grp_idx)); color_groups = jet(numel(grp_idx)); for j = 1:numel(grp_idx) idx = [find(id_group==j,1) find(id_group==j,1,'Last')]; if ~isempty(idx) y = [idx(1) idx([2 2])+1 idx([1 1])]'; y = y + 0.2.*[1;-1;-1;1;1]; vy_groups(:,j) = y; end end max_y = numel(cat(1,grp_idx{:})); hold on; patch('YData',vx_groups-1,'XData',vy_groups-0.5,... 'FaceColor','flat','CData',linspace(0,1,numel(grp_idx))); axis(gca,'equal','tight'); end % ---- PLOT KEGG PATHWAY BY GENE HEATMAPS ---- % figure; for i = 1:numel(D) subplot(numel(D),1,i); imagesc(D(i).prob_gene_given_cat); caxis([0 1]); colormap(flip(bone,1)); set(gca,'YTick',1:numel(D(i).cat_labels),'YTickLabel',D(i).cat_labels,'TickLength',[0 0]); ax = gca; ah.XAxis.FontSize = 6; ax.YAxis.FontSize = 5; ax.YAxis.Label.FontSize = 8; cb = colorbar; cb.Label.FontSize = 6; cb.FontSize = 6; end % ---- PLOT KEGG PATHWAY BY GENE HEATMAPS PARTITIONED BY APRIORI GRP ---- % for j = 1:numel(D) f = figure; fn = fieldnames(D(j).apriori_cat_x_gene); for i = 1:numel(fn) subplot(ceil(numel(fn)/2),2,i); imagesc(D(j).apriori_cat_x_gene.(fn{i})); colormap(flip(bone)); colorbar; caxis([0 1]); title(sprintf('Decathlon %i - %s',j,fn{i})); ylabel('KEGG enrichment'); xlabel('gene'); set(gca,'YTick',1:numel(D(j).cat_labels),'YTickLabel',D(j).cat_labels,'TickLength',[0 0],'XTick',[]); ax = gca; ax.YAxis.FontSize = 5; ax.YAxis.Label.FontSize = 10; end end