Source code for brainlit.BrainLine.analyze_results

import numpy as np
from cloudreg.scripts.transform_points import NGLink
from cloudvolume import CloudVolume, exceptions
from tqdm import tqdm
from skimage import io, measure
from brainlit.BrainLine.util import (
    _find_atlas_level_label,
    _fold,
    _setup_atlas_graph,
    _get_atlas_level_nodes,
    _get_corners,
)
import napari
import scipy.ndimage as ndi
from scipy.stats import ttest_ind
from scipy.signal import convolve2d
import seaborn as sns
from statannotations.Annotator import Annotator
from statannotations.stats.StatTest import StatTest
from statannotations import stats
import pandas as pd
import matplotlib.pyplot as plt
import os
from joblib import Parallel, delayed
import pickle
import brainrender
from brainrender import Scene
from brainrender.actors import Points, Volume
import json
from cloudvolume.exceptions import OutOfBoundsError
from pathlib import Path
import random


class BrainDistribution:
    """Helps generate plots and visualizations for brain connection distributions across subtyeps.

    Arguments:
        brain_ids (list): List of brain IDs (keys of data json file).

    Attributes:
        brain_ids (list): List of brain IDs (keys of data json file).
    """

    def __init__(
        self,
        brain_ids: list,
        data_file: str,
        ontology_file: str,
        fixes_file: str = None,
    ):
        self.brain_ids = brain_ids
        with open(data_file) as f:
            data = json.load(f)
        self.brain2paths = data["brain2paths"]
        self.subtype_counts = self._get_subtype_counts()
        self.ontology_file = ontology_file

        if fixes_file == None:
            self.ontology_fixes = None
        else:
            with open(fixes_file) as f:
                data = json.load(f)
            self.ontology_fixes = data

    def _slicetolabels(self, slice, fold_on: bool = False, atlas_level: int = 5):
        region_graph = _setup_atlas_graph(self.ontology_file)
        atlas_level_nodes = _get_atlas_level_nodes(atlas_level, region_graph)
        newslice = np.copy(slice)
        new_labels = {}

        for label in tqdm(np.unique(slice), desc=f"Relabeling slice"):
            atlas_level_label = _find_atlas_level_label(
                label, atlas_level_nodes, atlas_level, region_graph
            )
            newslice[slice == label] = atlas_level_label
            if atlas_level_label not in new_labels.keys():
                if atlas_level_label in region_graph.nodes:
                    name = region_graph.nodes[atlas_level_label]["name"]
                else:
                    name = "??"
                new_labels[atlas_level_label] = name

        labels = measure.label(newslice)
        borders = 0 * labels
        for label in tqdm(np.unique(labels), desc=f"Processing labels"):
            if label != 0:
                mask = np.array(labels == label, dtype="int")
                erode = np.array(ndi.binary_erosion(mask))
                outline = mask - erode
                borders += outline

        if fold_on:
            half_width = np.round(borders.shape[1] / 2).astype(int)
            borders = borders[:, :half_width]
            newslice = newslice[:, :half_width]
        else:
            half_width = -1
        return newslice, borders, half_width

    def _get_subtype_counts(self):
        brain2paths = self.brain2paths
        brain_ids = self.brain_ids
        counts = {}
        for brain_id in brain_ids:
            subtype = brain2paths[brain_id]["subtype"]
            if subtype in counts.keys():
                counts[subtype] = counts[subtype] + 1
            else:
                counts[subtype] = 1
        return counts


[docs]class SomaDistribution(BrainDistribution): """Object to generate various analysis images for results from a set of brain IDs. An implementation of BrainDistribution class. Arguments: brain_ids (list): List of brain IDs (keys of data json file). data_files (str): Path to json file that contains information about samples. show_plots (bool): Whether to show plots, only works if you have graphics access. Attributes: brain2paths (dict): Information about different data samples. show_plots (bool): Whether to show plots, only works if you have graphics access. atlas_points (dict): Key - sample ID, Value - coordinates of soma detection. id_to_regioncounts (dict): Key - sample ID, Value - dictionary of detection counts by region. region_graph (nx.DiGraph): Graph of region hierarchy with soma detection counts as node attributes. """ def __init__( self, brain_ids: list, data_file: str, ontology_file: str, fixes_file: str = None, show_plots: bool = True, bootstrap: int = 0, shuffle: bool = False, ): super().__init__(brain_ids, data_file, ontology_file, fixes_file) self.show_plots = show_plots atlas_points = self._retrieve_soma_coords(brain_ids) self.atlas_points = atlas_points if shuffle: subtypes = [] for brain_id in brain_ids: subtypes.append(self.brain2paths[brain_id]["subtype"]) random.shuffle(subtypes) for subtype, brain_id in zip(subtypes, brain_ids): self.brain2paths[brain_id]["subtype"] = subtype if bootstrap > 0: new_ids = [] for brain_id in tqdm(brain_ids, desc="bootstrapping..."): subtype = self.brain2paths[brain_id]["subtype"] for bootstrap_iter in range(bootstrap): name = f"{brain_id}-{bootstrap_iter}" n = atlas_points[brain_id].shape[0] sample = np.random.randint(0, n, size=n) pts = atlas_points[brain_id][sample, :] self.atlas_points[name] = pts new_ids.append(name) self.subtype_counts[subtype] += 1 self.brain2paths[name] = {"subtype": subtype} self.brain_ids += new_ids id_to_regioncounts_l = self._get_regions(atlas_points, side="l") self.id_to_regioncounts = id_to_regioncounts_l region_graph_l = self._setup_regiongraph() self.region_graph_l = region_graph_l id_to_regioncounts_r = self._get_regions(atlas_points, side="r") self.id_to_regioncounts = id_to_regioncounts_r region_graph_r = self._setup_regiongraph() self.region_graph_r = region_graph_r id_to_regioncounts = self._get_regions(atlas_points) self.id_to_regioncounts = id_to_regioncounts region_graph = self._setup_regiongraph() self.region_graph = region_graph def _retrieve_soma_coords(self, brain_ids: list): brain2paths = self.brain2paths atlas_points = {} for brain_id in brain_ids: if "somas_atlas_path" in brain2paths[brain_id].keys(): json_dir = brain2paths[brain_id]["somas_atlas_path"] jsons = os.listdir(json_dir) jsons = [json for json in jsons if ".json" in json] points = [] for json_file in jsons: json_path = json_dir + json_file print( f"Brain {brain_id}: Collecting atlas space soma points from file: {json_path}" ) with open(json_path) as f: data = json.load(f) for pt in data: points.append(pt["point"]) atlas_points[brain_id] = np.array(points) elif "somas_atlas_url" in brain2paths[brain_id].keys(): viz_link = brain2paths[brain_id]["somas_atlas_url"] viz_link = NGLink(viz_link.split("json_url=")[-1]) ngl_json = viz_link._json for layer in ngl_json["layers"]: if layer["type"] == "annotation": points = [] for annot in layer["annotations"]: points.append(annot["point"]) print( f'Brain {brain_id}: Collecting atlas space soma points from layer: {layer["name"]}' ) atlas_points[brain_id] = np.array(points) else: raise ValueError(f"No somas_atlas_url layer for brain: {brain_id}") return atlas_points def _get_regions(self, points: dict, side: str = None): brain2paths = self.brain2paths ontology_fixes = self.ontology_fixes if "filepath" in brain2paths["atlas"].keys(): vol_atlas = io.imread(brain2paths["atlas"]["filepath"]) else: vol_atlas = CloudVolume(brain2paths["atlas"]["url"]) id_to_regioncounts = {} for brain_id in tqdm(points.keys(), desc="Finding soma regions of brains"): brain_dict = {} for point in tqdm( points[brain_id], desc="Finding soma regions", leave=False ): point_int = [int(np.round(p)) for p in point] if side == "l" and point_int[2] >= 570: continue elif side == "r" and point_int[2] < 570: continue try: region = int(vol_atlas[point_int[0], point_int[1], point_int[2]]) if ontology_fixes != None and str(region) in ontology_fixes.keys(): region = ontology_fixes[str(region)]["replacement"] except IndexError: continue except OutOfBoundsError: continue if region in brain_dict.keys(): brain_dict[region] = brain_dict[region] + 1 else: brain_dict[region] = 1 id_to_regioncounts[brain_id] = brain_dict return id_to_regioncounts
[docs] def napari_coronal_section( self, z: int, subtype_colors: dict, symbols: list = ["o", "+", "^", "vbar"], fold_on: bool = False, plot_type: str = "napari", ): """Generate napari view with allen atlas and points of soma detections. Args: z (int): index of coronal slice in Allen atlas. subtype_colors (dict): Mapping of subtypes (in soma_data.py file) to colors for soma plotting. symbols (list): Napari point symbols to use for different samples of the same subtype. Defaults to ["o", "+", "^", "vbar"]. fold_on (bool, optional): Whether views should be a hemisphere, in which case detections from the other side are mirrored. Defaults to False. plot_type (str, optional): Whether to plot via napari ('napari') or pyplot ('plt'). Returns: pyplot.figure.Figure: pyplot figure only if plot_type=='plt'. """ brain2paths = self.brain2paths atlas_points = self.atlas_points subtype_counts = self._get_subtype_counts() depth_radius = 10 if "filepath" in brain2paths["atlas"].keys(): vol_atlas = io.imread(brain2paths["atlas"]["filepath"]) else: vol_atlas = CloudVolume(brain2paths["atlas"]["url"]) slice = np.squeeze(vol_atlas[z, :, :]) newslice, borders, half_width = self._slicetolabels(slice, fold_on=fold_on) heatmap = np.zeros( [borders.shape[0], borders.shape[1], depth_radius * 2 + 1, 3] ) if self.show_plots: if plot_type == "napari": v = napari.Viewer() v.add_labels(newslice, scale=[10, 10]) elif plot_type == "plt": fig = plt.figure() gtype_counts = {} for key in subtype_colors.keys(): gtype_counts[key] = 0 for key in tqdm(atlas_points.keys(), desc=f"Processing brains in z={z}"): ra = atlas_points[key] gtype = brain2paths[key]["subtype"] ra = atlas_points[key] points = ra[(ra[:, 0] <= z + depth_radius) & (ra[:, 0] >= z - depth_radius)] # only select points that fall on an ROI fg_points = [] channel_map = {"red": 0, "green": 1, "blue": 2} channel = channel_map[subtype_colors[gtype]] for point in points: depth = int(point[0]) - z + depth_radius im_coord = [int(point[1]), int(point[2])] if ( im_coord[0] in range(0, newslice.shape[0]) and im_coord[1] in range(0, newslice.shape[1]) and newslice[im_coord[0], im_coord[1]] != 0 ): if fold_on and im_coord[1] >= half_width: im_coord[1] = 2 * half_width - im_coord[1] fg_points.append([im_coord[0], im_coord[1]]) heatmap[int(im_coord[0]), int(im_coord[1]), depth, channel] += ( 1 / subtype_counts[gtype] ) if self.show_plots: if plot_type == "napari": v.add_points( fg_points, symbol=symbols[gtype_counts[gtype]], face_color=subtype_colors[gtype], size=10, name=f"{key}: {gtype}", scale=[10, 10], ) elif plot_type == "plt": fg_points = np.array(fg_points) plt.scatter( fg_points[:, 1], fg_points[:, 0], c=subtype_colors[gtype], marker=symbols[gtype_counts[gtype]], label=f"{key}: {gtype}", ) gtype_counts[gtype] = gtype_counts[gtype] + 1 if self.show_plots: if plot_type == "napari": for c in range(3): heatmap[:, :, :, c] = ndi.gaussian_filter( heatmap[:, :, :, c], sigma=3 ) v.add_image( heatmap[:, :, depth_radius, :], scale=[10, 10], name=f"Heatmap" ) # , rgb=True) v.add_labels(borders * 2, scale=[10, 10], name=f"z={z}") v.scale_bar.unit = "um" v.scale_bar.visible = True napari.run() return v elif plot_type == "plt": plt.imshow(borders, alpha=borders.astype("float")) plt.legend() return fig
[docs] def brainrender_somas(self, subtype_colors: dict, brain_region: str = "DR"): """Generate brainrender viewer with soma detections. Args: subtype_colors (_type_): Mapping of subtypes (in soma_data.py file) to colors for soma plotting. brain_region (str, optional): Brain region to display with the detections (e.g. dorsal raphe nucleus). Defaults to "DR". """ brain_ids = self.brain_ids brain2paths = self.brain2paths brainrender.settings.WHOLE_SCREEN = False scene = Scene(atlas_name="allen_mouse_50um", title="Input Somas") scene.add_brain_region(brain_region, alpha=0.15) for brain_id in brain_ids: if "somas_atlas_path" in brain2paths[brain_id].keys(): continue viz_link = brain2paths[brain_id]["somas_atlas_url"] viz_link = NGLink(viz_link.split("json_url=")[-1]) ngl_json = viz_link._json for layer in ngl_json["layers"]: if layer["type"] == "annotation": points = [] for annot in tqdm( layer["annotations"], desc="Going through points...", leave=False, ): struct_coord = np.array(annot["point"]) / 5 try: region = scene.atlas.structure_from_coords(struct_coord) if region != 0 and np.any(struct_coord < 0) == False: points.append(annot["point"]) except IndexError: continue points = np.array(points) * 10 scene.add( Points( points, name="CELLS", colors=subtype_colors[brain2paths[brain_id]["subtype"]], ) ) scene.content if self.show_plots: scene.render()
[docs] def region_barchart( self, regions: list, composite_regions: dict = {}, normalize_region: int = -1 ): """Generate bar charts comparing soma detection counts between regions. Args: regions (list): List of Allen atlas brain region IDs to display data for (ID's found here: http://api.brain-map.org/api/v2/structure_graph_download/1.json) composite_regions (dict, optional): Mapping from a custom composite region (str, e.g. "Amygdala") to a set of regions that compose it (list of ints e.g. [131, 295, 319, 780]). Defaults to {}. normalize_region (int, optional): Region ID to normalize data for the normalized bar chart. Defaults to -1. """ subtype_counts = self._get_subtype_counts() id_to_somatotals = self._count_somas() df = self._make_bar_df( regions, composite_regions, id_to_somatotals, subtype_counts, normalize_region, ) subtypes = df["Subtype"].unique() if normalize_region >= 0: fig, axes = plt.subplots(2, 3, figsize=(39, 40), dpi=300) else: fig, axes = plt.subplots(2, 2, figsize=(26, 40), dpi=300) sns.set(font_scale=2) # first panel fig_args = { "x": "Somas (#)", "y": "Region", "hue": "Subtype", "data": df, } sns.set(font_scale=2) bplot = sns.scatterplot(ax=axes[0, 0], **fig_args) fig_args["boxprops"] = {"facecolor": "none"} bplot = sns.boxplot(ax=axes[0, 0], orient="h", **fig_args) bplot.set_xscale("log") fig_args.pop("boxprops") fig_args["style"] = "Brain ID" sns.scatterplot(ax=axes[1, 0], **fig_args) bplot.set_xscale("log") fig_args.pop("style") if len(subtypes) > 1: annotator = self._configure_annotator(df, axes[0, 0], "Somas (#)") annotator.new_plot(bplot, orient="h", plot="boxplot", **fig_args) annotator.apply_and_annotate() # second panel fig_args["x"] = "Percent of Total Somas (%)" bplot = sns.scatterplot(ax=axes[0, 1], **fig_args) fig_args["boxprops"] = {"facecolor": "none"} bplot = sns.boxplot(ax=axes[0, 1], orient="h", **fig_args) bplot.set_xscale("log") fig_args.pop("boxprops") fig_args["style"] = "Brain ID" sns.scatterplot(ax=axes[1, 1], **fig_args) bplot.set_xscale("log") fig_args.pop("style") if len(subtypes) > 1: annotator = self._configure_annotator( df, axes[0, 1], "Percent of Total Somas (%)" ) annotator.new_plot(bplot, orient="h", plot="boxplot", **fig_args) annotator.apply_and_annotate() # third panel if normalize_region >= 0: fig_args["x"] = "Normalized Somas" sns.set(font_scale=2) bplot = sns.scatterplot(ax=axes[0, 2], **fig_args) fig_args["boxprops"] = {"facecolor": "none"} bplot = sns.boxplot(ax=axes[0, 2], orient="h", **fig_args) bplot.set_xscale("log") fig_args.pop("boxprops") fig_args["style"] = "Brain ID" sns.scatterplot(ax=axes[1, 2], **fig_args) bplot.set_xscale("log") fig_args.pop("style") if len(subtypes) > 1: annotator = self._configure_annotator( df, axes[0, 2], "Normalized Somas" ) annotator.new_plot(bplot, orient="h", plot="boxplot", **fig_args) annotator.apply_and_annotate() fig.tight_layout() if self.show_plots: plt.show()
def _setup_regiongraph(self): brain_ids = self.brain_ids id_to_regioncounts = self.id_to_regioncounts region_graph = _setup_atlas_graph(self.ontology_file) max_level = 0 # set to 0 for node in region_graph.nodes: if region_graph.nodes[node]["level"] > max_level: max_level = region_graph.nodes[node]["level"] for brain_id in brain_ids: region_graph.nodes[node][brain_id] = 0 # add counts for brain_id in brain_ids: regioncounts = id_to_regioncounts[brain_id] for region in regioncounts.keys(): if region in region_graph.nodes: region_graph.nodes[region][brain_id] = ( region_graph.nodes[region][brain_id] + id_to_regioncounts[brain_id][region] ) # propagate counts up the hierarchy for brain_id in brain_ids: for lvl in range(max_level, 0, -1): for node in region_graph.nodes: if region_graph.nodes[node]["level"] == lvl: parent = list(region_graph.in_edges(node))[0][0] region_graph.nodes[parent][brain_id] = ( region_graph.nodes[parent][brain_id] + region_graph.nodes[node][brain_id] ) return region_graph def _count_somas(self): id_to_somatotals = {} brain_ids = self.brain_ids atlas_points = self.atlas_points for brain_id in brain_ids: points = atlas_points[brain_id] id_to_somatotals[brain_id] = points.shape[0] return id_to_somatotals def _make_bar_df( self, regions: list, composite_regions, id_to_somatotals: dict, subtype_counts: dict, normalize_region, ): region_graph = self.region_graph brain_ids = self.brain_ids subtypes = [] somas = [] somas_norm = [] somas_pct = [] region_name = [] brain_ids_data = [] for region in regions: print(f"Populating: {region_graph.nodes[region]['name']}") for brain_id in brain_ids: subtype = self.brain2paths[brain_id]["subtype"] soma_count = region_graph.nodes[region][brain_id] somas.append(soma_count) if ( normalize_region >= 0 and region_graph.nodes[normalize_region][brain_id] > 0 ): somas_norm.append( soma_count / region_graph.nodes[normalize_region][brain_id] ) else: print(f"Warning: brain {brain_id} has no inputs from DRN") somas_norm.append(0) somas_pct.append( region_graph.nodes[region][brain_id] / id_to_somatotals[brain_id] * 100 ) subtypes.append(subtype + f" (n={subtype_counts[subtype]})") region_name.append(region_graph.nodes[region]["name"]) brain_ids_data.append(brain_id) for region_component_name in composite_regions.keys(): print(f"Populating: " + region_component_name) region_components = composite_regions[region_component_name] for brain_id in brain_ids: subtype = self.brain2paths[brain_id]["subtype"] soma_count = 0 for region_component in region_components: soma_count += region_graph.nodes[region_component][brain_id] somas.append(soma_count) if ( normalize_region >= 0 and region_graph.nodes[normalize_region][brain_id] > 0 ): somas_norm.append( soma_count / region_graph.nodes[normalize_region][brain_id] ) else: print(f"Warning: brain {brain_id} has no inputs from DRN") somas_norm.append(0) somas_pct.append(soma_count / id_to_somatotals[brain_id] * 100) subtypes.append(subtype + f" (n={subtype_counts[subtype]})") region_name.append(region_component_name) brain_ids_data.append(brain_id) d = { "Somas (#)": somas, "Percent of Total Somas (%)": somas_pct, "Subtype": subtypes, "Region": region_name, "Brain ID": brain_ids_data, } if normalize_region >= 0: d["Normalized Somas"] = somas_norm df = pd.DataFrame(data=d) return df def _configure_annotator(self, df, axis, ind_variable: str): subtype_counts = self._get_subtype_counts() test = "Mann-Whitney" test = StatTest( _log_ttest_ind, test_long_name="Log t-test_ind", test_short_name="log-t" ) # test = "t-test_ind" correction = "fdr_by" pairs = [] unq_subregions = df["Region"].unique() subtypes = df["Subtype"].unique() subtype_pairs = [ (a, b) for idx, a in enumerate(subtypes) for b in subtypes[idx + 1 :] if subtype_counts[a.split("(")[0].strip()] > 1 and subtype_counts[b.split("(")[0].strip()] > 1 ] for subtype_pair in subtype_pairs: for subregion in unq_subregions: pairs.append( ( (subregion, subtype_pair[0]), (subregion, subtype_pair[1]), ) ) fig_args = { "y": ind_variable, "x": "Region", "hue": "Subtype", "data": df, # "jitter": False, "dodge": True, } annotator = Annotator(axis, pairs, **fig_args) annotator.configure( test=test, text_format="star", loc="outside", comparisons_correction=correction, ) return annotator def dcorr_test(self, subtype1, subtype2, bin_size=[2000, 2000, 2000]): brain2paths = self.brain2paths brain_ids = self.brain_ids atlas_points = self.atlas_points bin_size = np.array(bin_size) atlas_vol = CloudVolume(brain2paths["atlas"]["url"]) shp = np.array(atlas_vol.shape[:3]) res = np.array(atlas_vol.resolution) / 1000 bin_size_vox = np.divide(bin_size, res) subtypes = [] vectors = [] for brain_id in tqdm(brain_ids, desc="Building feature vectors..."): if brain2paths[brain_id]["subtype"] == subtype1: subtypes.append(0) elif brain2paths[brain_id]["subtype"] == subtype2: subtypes.append(1) else: continue blocks = np.zeros([int(s / b) + 1 for s, b in zip(shp, bin_size_vox)]) points = atlas_points[brain_id] for xi, x in enumerate(np.arange(0, shp[0], bin_size_vox[0])): for yi, y in enumerate(np.arange(0, shp[1], bin_size_vox[1])): for zi, z in enumerate(np.arange(0, shp[2], bin_size_vox[2])): bin_points = points[ (points[:, 0] >= x) & (points[:, 0] < x + bin_size_vox[0]) & (points[:, 1] >= y) & (points[:, 1] < y + bin_size_vox[1]) & (points[:, 2] >= z) & (points[:, 2] < z + bin_size_vox[2]), :, ] blocks[xi, yi, zi] = bin_points.shape[0] # raise ValueError() vector = np.array(blocks.flatten(), dtype="float") vector /= np.sum(vector) vectors.append(vector) subtypes = np.array(subtypes) vectors = np.array(vectors) print(dcor.independence.distance_covariance_test(subtypes, vectors)) z = 800 if "filepath" in brain2paths["atlas"].keys(): vol_atlas = io.imread(brain2paths["atlas"]["filepath"]) else: vol_atlas = CloudVolume(brain2paths["atlas"]["url"]) slice = np.squeeze(vol_atlas[z, :, :]) newslice, borders, half_width = self._slicetolabels(slice, fold_on=False) v = napari.Viewer() v.add_image( np.squeeze(blocks[int(10 * z / bin_size[0]), :, :]), scale=[bin_size[1], bin_size[2]], name="block", ) v.add_labels( borders * 2, scale=[10, 10], name=f"{subtype1} vs {subtype2} z={z}" ) v.scale_bar.unit = "um" v.scale_bar.visible = True napari.run()
def _log_ttest_ind(group_data1, group_data2, verbose=1, **stats_params): group_data1_log = np.log(group_data1) group_data2_log = np.log(group_data2) return ttest_ind(group_data1_log, group_data2_log, **stats_params) def _get_corners_collection( vol_mask, vol_reg, block_size, max_coords: list = [-1, -1, -1], min_coords: list = [-1, -1, -1], ): corners = _get_corners( vol_mask.shape, chunk_size=block_size, max_coords=max_coords, min_coords=min_coords, ) new_corners = [] for corner in corners: x = corner[0][0] x2 = corner[1][0] x_reg = int(x / 8) x2_reg = np.amin([int(x2 / 8), vol_reg.shape[0]]) y = corner[0][1] y2 = corner[1][1] y_reg = int(y / 8) y2_reg = np.amin([int(y2 / 8), vol_reg.shape[1]]) z = corner[0][2] z2 = corner[1][2] new_corners.append( [[x_reg, y_reg, z], [x2_reg, y2_reg, z2], corner[0], corner[1]] ) return new_corners def _compute_composition_corner(corners, outdir, dir_base_mask, dir_base_s3): l_c1 = corners[0] l_c2 = corners[1] m_c1 = corners[2] m_c2 = corners[3] fname = str(l_c1[0]) + "_" + str(l_c1[1]) + "_" + str(l_c1[2]) + ".pickle" fname = outdir / fname if os.path.exists(fname): return dir = dir_base_mask + "axon_mask" vol_mask = CloudVolume(dir, parallel=1, mip=0, fill_missing=True) dir = dir_base_s3 + "atlas_to_target" vol_reg = CloudVolume(dir, parallel=1, mip=0, fill_missing=True) try: labels = vol_reg[l_c1[0] : l_c2[0], l_c1[1] : l_c2[1], l_c1[2] : l_c2[2]] labels = np.repeat(np.repeat(labels, 8, axis=0), 8, axis=1) if np.all(labels == 0): return mask = vol_mask[m_c1[0] : m_c2[0], m_c1[1] : m_c2[1], m_c1[2] : m_c2[2]] except exceptions.EmptyVolumeException: return width = np.amin([mask.shape[0], labels.shape[0]]) height = np.amin([mask.shape[1], labels.shape[1]]) mask = mask[:width, :height, :] labels = labels[:width, :height, :] labels_unique = np.unique(labels[labels > 0]) volumes = {} for unq in labels_unique: cur_vol = np.sum(mask[labels == unq]) cur_total = np.sum(labels == unq) volumes[unq] = [cur_total, cur_vol] with open(fname, "wb") as f: pickle.dump(volumes, f) def _combine_regional_segmentations(outdir): files = os.listdir(outdir) volumes = {} for file in tqdm(files, desc="Assembling results"): if "pickle" in file: filename = outdir / file with open(filename, "rb") as f: result = pickle.load(f) for key in result.keys(): addition = result[key] if key in volumes.keys(): cur_vol = volumes[key][1] cur_total = volumes[key][0] else: cur_vol = 0 cur_total = 0 cur_vol += addition[1] cur_total += addition[0] volumes[key] = [cur_total, cur_vol] return volumes def collect_regional_segmentation( brain_id: str, data_file: str, outdir: str, ncpu: int = 1, min_coords: list = [-1, -1, -1], max_coords: list = [-1, -1, -1], s3_reg: bool = False, ): """Combine segmentation and registration to generate counts of axon voxels across brain regions. Note this scripts writes a file for every chunk, which might be many. Args: brain_id (str): Brain ID to process, from axon_data.py data_file (str): path to json file with data information. outdir (str): Path to directory to write files. ncpu (int, optional): Number of cpus to use for parallel processing. Defaults to 1. min_coords (list, optional): Lower limits of brain to complete processing, -1 will lead to processing from the beginning of the axis. Defaults to [-1, -1, -1]. max_coords (list, optional): Upper limits of brain to complete processing, -1 will lead to processing to the end of the axis. Defaults to [-1, -1, -1]. """ isExist = os.path.exists(outdir) if not isExist: print(f"Creating directory: {outdir}") os.makedirs(outdir) else: print(f"Downloaded data will be stored in {outdir}") outdir = Path(outdir) with open(data_file) as f: data = json.load(f) brain2paths = data["brain2paths"] dir_base_mask = brain2paths[brain_id]["base_s3"] if s3_reg: dir_base_s3 = brain2paths[brain_id]["base_s3"] else: dir_base_s3 = dir_base_mask dir = os.path.join(dir_base_mask, "axon_mask") vol_mask = CloudVolume(dir, parallel=1, mip=0, fill_missing=True) print(f"Mask shape: {vol_mask.shape}") dir = os.path.join(dir_base_s3, "atlas_to_target") vol_reg = CloudVolume(dir, parallel=1, mip=0, fill_missing=True) print(f"Atlas shape: {vol_reg.shape}") corners = _get_corners_collection( vol_mask, vol_reg, block_size=[512, 512, 512], max_coords=max_coords, min_coords=min_coords, ) Parallel(n_jobs=ncpu)( delayed(_compute_composition_corner)(corner, outdir, dir_base_mask, dir_base_s3) for corner in tqdm(corners, desc="Finding labels") ) volumes = _combine_regional_segmentations(outdir) fname = f"wholebrain_{brain_id}.pkl" outpath = outdir / fname with open(outpath, "wb") as f: pickle.dump(volumes, f)
[docs]class AxonDistribution(BrainDistribution): """Generates visualizations of results of axon segmentations of a set of brains. Implements BrainDistribution. Arguments: brain_ids (list): List of brain IDs (keys of data json file). data_files (str): Path to json file that contains information about samples. regional_distribution_dir (str): Path to directory with pkl files that countain segmentation results by region. show_plots (bool): Whether to show plots, only works if you have graphics access. Attributes: regional_distribution_dir (str): Path to directory with pkl files that countain segmentation results by region. region_graph (nx.DiGraph): Graph of region hierarchy with segmentation results as node attributes. total_axon_vols (dict): Key - sample ID Value - Total volume of segmented axon. brain2paths (dict): Information about different data samples. show_plots (bool): Whether to show plots, only works if you have graphics access. """ def __init__( self, brain_ids: list, data_file: str, regional_distribution_dir: str, ontology_file: str, fixes_file: str = None, show_plots: bool = True, ): super().__init__(brain_ids, data_file, ontology_file, fixes_file) self.regional_distribution_dir = regional_distribution_dir region_graph, total_axon_vols = self._setup_regiongraph( regional_distribution_dir ) self.region_graph, self.total_axon_vols = region_graph, total_axon_vols self.show_plots = show_plots def _setup_regiongraph(self, regional_distribution_dir): regional_distribution_dir = self.regional_distribution_dir brain_ids = self.brain_ids ontology_fixes = self.ontology_fixes region_graph = _setup_atlas_graph(self.ontology_file) max_level = 0 # set to 0 for node in region_graph.nodes: if region_graph.nodes[node]["level"] > max_level: max_level = region_graph.nodes[node]["level"] for brain_id in brain_ids: region_graph.nodes[node][brain_id + " axon"] = 0 region_graph.nodes[node][brain_id + " total"] = 0 # add data for brain_id in brain_ids: region_distribution = ( regional_distribution_dir + "wholebrain_" + brain_id + ".pkl" ) with open(region_distribution, "rb") as f: quantification_dict = pickle.load(f) for region in quantification_dict.keys(): if ontology_fixes != None and str(region) in ontology_fixes.keys(): region_idx = ontology_fixes[str(region)]["replacement"] else: region_idx = region if region_idx in region_graph.nodes: region_graph.nodes[region_idx][ brain_id + " axon" ] = region_graph.nodes[region_idx][brain_id + " axon"] + float( quantification_dict[region_idx][1] ) region_graph.nodes[region_idx][ brain_id + " total" ] = region_graph.nodes[region_idx][brain_id + " total"] + float( quantification_dict[region_idx][0] ) total_axon_vols = {} for brain in brain_ids: total = 0 for node in region_graph.nodes: total += region_graph.nodes[node][brain + " axon"] total_axon_vols[brain] = total # propagate counts up the hierarchy for brain_id in brain_ids: for lvl in range(max_level, 0, -1): for node in region_graph.nodes: if region_graph.nodes[node]["level"] == lvl: parent = list(region_graph.in_edges(node))[0][0] region_graph.nodes[parent][brain_id + " axon"] = ( region_graph.nodes[parent][brain_id + " axon"] + region_graph.nodes[node][brain_id + " axon"] ) region_graph.nodes[parent][brain_id + " total"] = ( region_graph.nodes[parent][brain_id + " total"] + region_graph.nodes[node][brain_id + " total"] ) return region_graph, total_axon_vols
[docs] def napari_coronal_section( self, z: int, subtype_colors: dict, fold_on: bool = False ): """Generate napari viewer with allen parcellation and heat map of axon segmentations. Args: z (int): Index of coronal slice of allen atlas. subtype_colors (dict): Mapping of subtype to color to be used in visualization. fold_on (bool, optional): Whether to plot a single hemisphere, with results from other side mirrored. Defaults to False. """ brain2paths = self.brain2paths if "filepath" in brain2paths["atlas"].keys(): vol_atlas = io.imread(brain2paths["atlas"]["filepath"]) else: vol_atlas = CloudVolume(brain2paths["atlas"]["url"]) slice = np.squeeze(np.array(vol_atlas[z, :, :])) newslice, borders, half_width = self._slicetolabels(slice, fold_on=fold_on) if self.show_plots: v = napari.Viewer() v.add_labels(newslice, scale=[10, 10]) heatmaps = {subtype: 0 * newslice for subtype in subtype_colors.keys()} for brain_id in self.brain_ids: print(brain_id) subtype = brain2paths[brain_id]["subtype"] transformed_mask_vol = CloudVolume( brain2paths[brain_id]["transformed_mask"], fill_missing=True ) mask_slice = np.squeeze(transformed_mask_vol[z - 10 : z + 10, :, :]) mask_slice = ndi.gaussian_filter(mask_slice.astype(float), sigma=(3, 3, 3))[ 10, :, : ] if fold_on: mask_slice = _fold(mask_slice) mask_slice[newslice == 0] = 0 heatmaps[subtype] = heatmaps[subtype] + mask_slice for subtype in heatmaps.keys(): heatmaps[subtype] = heatmaps[subtype] / np.amax(heatmaps[subtype]) rgb_heatmap = [-1, -1, -1] for subtype in heatmaps.keys(): if subtype_colors[subtype] == "red": rgb_heatmap[0] = heatmaps[subtype] elif subtype_colors[subtype] == "green": rgb_heatmap[1] = heatmaps[subtype] elif subtype_colors[subtype] == "blue": rgb_heatmap[2] = heatmaps[subtype] rgb_heatmap = [0 * newslice if type(i) == int else i for i in rgb_heatmap] rgb_heatmap = np.stack(rgb_heatmap, axis=-1) if self.show_plots: v.add_image(rgb_heatmap, rgb=True, scale=[10, 10], name=f"{subtype_colors}") v.add_labels(borders * 2, scale=[10, 10], name=f"z={z}") v.scale_bar.unit = "um" v.scale_bar.visible = True napari.run() return v
[docs] def brainrender_axons(self, subtype_colors: dict, brain_region: str = "DR"): """Generate brainrender view to show axon segmentations. Args: subtype_colors (dict): Mapping of subtype to color to be used in visualization. """ brain_ids = self.brain_ids brain2paths = self.brain2paths brainrender.settings.WHOLE_SCREEN = False scene = Scene(atlas_name="allen_mouse_50um", title="Input Somas") scene.add_brain_region(brain_region, alpha=0.15) for subtype in subtype_colors.keys(): im_total = None for i, brain_id in enumerate(brain_ids): if brain2paths[brain_id]["subtype"] == subtype: print(f"Downloading transformed_mask from brain: {brain_id}") vol = CloudVolume( brain2paths[brain_id]["transformed_mask"], fill_missing=True ) if im_total == None: im_total = np.array(vol[:, :, :, :]) else: im_total += np.array(vol[:, :, :, :]) im_total = np.squeeze(im_total) im_total = np.swapaxes(im_total, 0, 2) # make a volume actor and add actor = Volume( im_total, voxel_size=20, # size of a voxel's edge in microns as_surface=False, # if true a surface mesh is rendered instead of a volume c=subtype_colors[ subtype ], # use matplotlib colormaps to color the volume ) scene.add(actor) # render scene.content if self.show_plots: scene.render()
[docs] def region_barchart( self, regions: list, composite_regions: dict = {}, normalize_region: int = -1 ): """Generate bar charts with statistical tests to compare segmentations between brains. Args: regions (list): List of Allen atlas brain region IDs to display data for (ID's found here: http://api.brain-map.org/api/v2/structure_graph_download/1.json) composite_regions (dict, optional): Mapping from a custom composite region (str, e.g. "Amygdala") to a set of regions that compose it (list of ints e.g. [131, 295, 319, 780]). Defaults to {}. normalize_region (int, optional): Region ID to normalize data for the normalized bar chart. Defaults to -1. """ subtype_counts = self._get_subtype_counts() print(subtype_counts) df = self._make_bar_df( regions, composite_regions, subtype_counts, normalize_region ) subtypes = df["Subtype"].unique() if normalize_region >= 0: fig, axes = plt.subplots(1, 3, figsize=(39, 20)) else: fig, axes = plt.subplots(1, 2, figsize=(26, 20)) sns.set(font_scale=2) # first panel fig_args = { "x": "Axon Density (%)", "y": "Region", "hue": "Subtype", "data": df, } sns.set(font_scale=2) bplot = sns.scatterplot(ax=axes[0], **fig_args) fig_args["boxprops"] = {"facecolor": "none"} bplot = sns.boxplot(ax=axes[0], orient="h", **fig_args) bplot.set_xscale("log") if len(subtypes) > 1: annotator = self._configure_annotator(df, axes[0], "Axon Density (%)") annotator.new_plot(bplot, orient="h", plot="boxplot", **fig_args) annotator.apply_and_annotate() # second panel fig_args = { "x": "Percent Total Axon Volume (%)", "y": "Region", "hue": "Subtype", "data": df, } bplot = sns.scatterplot(ax=axes[1], **fig_args) fig_args["boxprops"] = {"facecolor": "none"} bplot = sns.boxplot(ax=axes[1], orient="h", **fig_args) bplot.set_xscale("log") if len(subtypes) > 1: annotator = self._configure_annotator( df, axes[1], "Percent Total Axon Volume (%)" ) annotator.new_plot(bplot, orient="h", plot="boxplot", **fig_args) annotator.apply_and_annotate() if normalize_region >= 0: # third panel fig_args = { "x": "Normalized Axon Density", "y": "Region", "hue": "Subtype", "data": df, } sns.set(font_scale=2) bplot = sns.scatterplot(ax=axes[2], **fig_args) fig_args["boxprops"] = {"facecolor": "none"} bplot = sns.boxplot(ax=axes[2], orient="h", **fig_args) bplot.set_xscale("log") if len(subtypes) > 1: annotator = self._configure_annotator( df, axes[2], "Normalized Axon Density" ) annotator.new_plot(bplot, orient="h", plot="boxplot", **fig_args) annotator.apply_and_annotate() fig.tight_layout() if self.show_plots: plt.show()
def _make_bar_df( self, regions, composite_regions, subtype_counts, normalize_region ): region_graph = self.region_graph brain_ids = self.brain_ids total_axon_vols = self.total_axon_vols subtypes = [] axon_vols = [] axon_denss_norm = [] axon_denss = [] region_name = [] brain_ids_data = [] for region in regions: print(f"Populating: {region_graph.nodes[region]['name']}") for brain_id in brain_ids: subtype = self.brain2paths[brain_id]["subtype"] axon_vol = region_graph.nodes[region][brain_id + " axon"] total_vol = region_graph.nodes[region][brain_id + " total"] if ( normalize_region >= 0 and region_graph.nodes[normalize_region][brain_id + " axon"] > 0 ): norm_factor = ( region_graph.nodes[normalize_region][brain_id + " axon"] / region_graph.nodes[normalize_region][brain_id + " total"] ) else: norm_factor = 1 if normalize_region >= 0: print( f"Warning: brain {brain_id} has no projection in normalizing region: {normalize_region}" ) if total_vol == 0 and axon_vol == 0: axon_denss.append(0) axon_denss_norm.append(0) elif total_vol == 0: raise ValueError("positive axon volume in zero volume region?") else: dens = axon_vol / total_vol axon_denss.append(dens * 100) axon_denss_norm.append(dens / norm_factor) axon_vols.append(axon_vol / total_axon_vols[brain_id] * 100) subtypes.append(subtype + f" (n={subtype_counts[subtype]})") region_name.append(region_graph.nodes[region]["name"]) brain_ids_data.append(brain_id) for region_component_name in composite_regions.keys(): print(f"Populating: " + region_component_name) region_components = composite_regions[region_component_name] for brain_id in brain_ids: subtype = self.brain2paths[brain_id]["subtype"] axon_vol = 0 total_vol = 0 for region_component in region_components: axon_vol += region_graph.nodes[region_component][brain_id + " axon"] total_vol += region_graph.nodes[region_component][ brain_id + " total" ] if ( normalize_region >= 0 and region_graph.nodes[normalize_region][brain_id + " axon"] > 0 ): norm_factor = ( region_graph.nodes[normalize_region][brain_id + " axon"] / region_graph.nodes[normalize_region][brain_id + " total"] ) else: norm_factor = 1 if normalize_region >= 0: print( f"Warning: brain {brain_id} has no projection in normalizing region: {normalize_region}" ) if total_vol == 0 and axon_vol == 0: axon_denss.append(0) axon_denss_norm.append(0) elif total_vol == 0: raise ValueError("positive axon volume in zero volume region?") else: dens = axon_vol / total_vol axon_denss.append(dens * 100) axon_denss_norm.append(dens / norm_factor) axon_vols.append(axon_vol / total_axon_vols[brain_id] * 100) subtypes.append(subtype + f" (n={subtype_counts[subtype]})") region_name.append(region_component_name) brain_ids_data.append(brain_id) d = { "Percent Total Axon Volume (%)": axon_vols, "Axon Density (%)": axon_denss, "Subtype": subtypes, "Region": region_name, "Brain ID": brain_ids_data, } if normalize_region >= 0: d["Normalized Axon Density"] = axon_denss_norm df = pd.DataFrame(data=d) return df def _configure_annotator(self, df, axis, ind_variable: str): subtype_counts = self.subtype_counts test = StatTest( _log_ttest_ind, test_long_name="Log t-test_ind", test_short_name="log-t" ) # test = "t-test_ind" # test = "Mann-Whitney" correction = "fdr_by" pairs = [] unq_subregions = df["Region"].unique() subtypes = df["Subtype"].unique() subtypes = [ subtype for subtype in subtypes if subtype_counts[subtype.split(" (n=")[0]] > 1 ] subtype_pairs = [ (a, b) for idx, a in enumerate(subtypes) for b in subtypes[idx + 1 :] ] print(subtype_pairs) for subtype_pair in subtype_pairs: for subregion in unq_subregions: pairs.append( ( (subregion, subtype_pair[0]), (subregion, subtype_pair[1]), ) ) fig_args = { "y": ind_variable, "x": "Region", "hue": "Subtype", "data": df, } annotator = Annotator(axis, pairs, **fig_args) annotator.configure( test=test, text_format="star", loc="outside", comparisons_correction=correction, ) return annotator