Source code for brainlit.BrainLine.apply_ilastik

import os
import shutil
from tqdm import tqdm
import subprocess
from joblib import Parallel, delayed
import multiprocessing
from brainlit.BrainLine.util import _find_sample_names, _get_corners
from datetime import date
from cloudvolume import CloudVolume, exceptions, Bbox
import numpy as np
import h5py
from skimage import io, measure
from pathlib import Path
from os.path import isfile, join
from os import listdir
import random
from cloudreg.scripts.transform_points import NGLink
from cloudreg.scripts.visualization import create_viz_link_from_json
import pandas as pd
from brainlit.BrainLine.imports import *
import json
from typing import Union
import igneous.task_creation as tc
from taskqueue import LocalTaskQueue


[docs]class ApplyIlastik:
    """Applies ilastik to subvolumes for the purpose of validating machine learning algorithms.

    Arguments:
        ilastik_path (str): Path to ilastik executable.
        project_path (str): Path to ilastik project.
        brains_path (str): Path to directory that contains brain samples subdirectories.
        brains (list): List of brain sample names.

    Attributes:
        ilastk_path (str): Path to ilastik executable.
        project_path (str): Path to ilastik project.
        brains_path (Path): Path to directory that contains brain samples subdirectories.
        brains (list): List of brain sample names.

    """

    def __init__(
        self, ilastik_path: str, project_path: str, brains_path: str, brains: list
    ):
        self.ilastik_path = ilastik_path
        self.project_path = project_path
        self.brains_path = Path(brains_path)
        self.brains = brains

    def _apply_ilastik(self, fnames):
        command = [
            self.ilastik_path,
            "--headless",
            f"--project={self.project_path}",
        ] + fnames

        subprocess.run(
            command,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )

[docs]    def process_subvols(self, dset: str = "val", ncpu: int = 6):
        """Apply ilastik to all validation subvolumes of the specified brain ids in the specified directory

        Args:
            ncpu (int, optional): Number of cpus to use for segmentation. Defaults to 6.
        """
        items_total = []
        for brain in tqdm(self.brains, desc="Gathering brains..."):
            if brain == "8557":
                brain_name = "r1"
            elif brain == "8555":
                brain_name = "r2"
            else:
                brain_name = brain
            path = self.brains_path / f"brain{brain_name}" / dset

            items_total += _find_sample_names(path, dset="", add_dir=True)
            print(f"Applying ilastik to {items_total}")

            n_images = len(items_total) // ncpu
            items_chunks = [
                items_total[i : i + n_images]
                for i in range(0, len(items_total), n_images)
            ]

            Parallel(n_jobs=ncpu)(
                delayed(self._apply_ilastik)(items_chunk)
                for items_chunk in tqdm(items_chunks, desc="running ilastik...")
            )

[docs]    def move_results(self):
        """Move results from process_subvols to a new subfolder."""
        for brain in tqdm(self.brains, desc="Moving results"):
            # if brain == "8557":
            #     brain_name = "r1"
            # elif brain == "8555":
            #     brain_name = "r2"
            # else:
            #     brain_name = brain

            brain_dir = self.brains_path / f"brain{brain}" / "val"
            results_dir = brain_dir / f"results{date.today()}"

            if not os.path.exists(results_dir):
                print(f"Creating directory: {results_dir}")
                os.makedirs(results_dir)

            items = _find_sample_names(brain_dir, dset="", add_dir=False)
            for item in items:
                prob_fname = f"{item[:-3]}_Probabilities.h5"
                result_path = brain_dir / prob_fname
                shutil.move(result_path, results_dir / prob_fname)


[docs]def plot_results(
    data_dir: str,
    brain_ids: list,
    object_type: str,
    positive_channel: int,
    dset: str = "val",
    doubles: list = [],
    show_plot: bool = True,
):
    """Plot precision recall curve for a specified brain.

    Args:
        data_dir (str): Path to directory where brain subvolumes are stored.
        brain_id (str): Brain id to examine (brain2paths key from _data.py file).
        object_type (str): soma or axon, the type of data to examine.
        positive_channel (int): Channel that represents neuron in the predictions.
        doubles (list, optional): Filenames of soma subvolumes that contain two somas, if applicable. Defaults to [].
        show_plot (bool, optional): Whether to run pyplot, useful for pytests when figures should not be displayed. Defaults to True.

    Raises:
        ValueError: _description_

    Returns:
        float: Best f-score across all thresholds.
        float: Threshold that yields the best validation f-score.
    """
    recalls = []
    precisions = []
    brain_ids_data = []
    best_fscores = {}
    best_precisions = []
    best_recalls = []

    size_thresh = 500

    thresholds = list(np.arange(0.0, 1.0, 0.02))
    for brain_id in tqdm(brain_ids, desc="Processing Brains"):
        if brain_id == "8557":
            brain_name = "r1"
        elif brain_id == "8555":
            brain_name = "r2"
        else:
            brain_name = brain_id
        base_dir = data_dir + f"/brain{brain_name}/{dset}/"
        data_files = _find_sample_names(base_dir, dset="", add_dir=True)
        test_files = [
            file[: file.rfind(".")] + "_Probabilities.h5" for file in data_files
        ]

        best_fscore = 0
        best_thresh = -1
        for threshold in thresholds:
            tot_pos = 0
            tot_neg = 0
            true_pos = 0
            false_pos = 0
            for filename in tqdm(test_files, disable=True):
                f = h5py.File(filename, "r")
                pred = f.get("exported_data")
                channel_dim = np.argmin(pred.shape)
                if channel_dim == 0:
                    pred = pred[positive_channel, :, :, :]
                elif channel_dim == 3:
                    pred = pred[:, :, :, positive_channel]
                else:
                    raise ValueError(
                        f"Channel dimension should be first or last, not {channel_dim}"
                    )

                mask = pred > threshold
                cntr = [s // 2 for s in mask.shape]

                if object_type == "soma":
                    if filename.split("/")[-1].split("_Probabilities")[0] in doubles:
                        newpos = 2
                    else:
                        newpos = 1

                    labels = measure.label(mask)
                    props = measure.regionprops(labels)
                    if "pos" in filename:
                        num_detected = 0
                        tot_pos += newpos
                        no_cntr_yet = True
                        for prop in props:
                            if prop["area"] > size_thresh:
                                if labels[cntr[0], cntr[1], cntr[2]] == prop["label"]:
                                    true_pos += 1
                                    num_detected += 1
                                    no_cntr_yet = False
                                elif num_detected < newpos - no_cntr_yet:
                                    true_pos += 1
                                    num_detected += 1
                                else:
                                    false_pos += 1
                    elif "neg" in filename:
                        tot_neg += 1
                        for prop in props:
                            if prop["area"] > size_thresh:
                                false_pos += 1
                elif object_type == "axon":
                    filename_lab_3 = filename[:-17] + "-image_3channel_Labels.h5"
                    filename_lab_2 = (
                        filename[:-17] + "-image_2channel_Labels.h5"
                    )  # backward compatibility
                    if os.path.isfile(filename_lab_3):
                        filename_lab = filename_lab_3
                    elif os.path.isfile(filename_lab_2):
                        filename_lab = filename_lab_2

                    f = h5py.File(filename_lab, "r")
                    gt = f.get("exported_data")
                    if channel_dim == 0:
                        gt = gt[0, :, :, :]
                    elif channel_dim == 3:
                        gt = gt[:, :, :, 0]
                    pos_labels = gt == 2
                    neg_labels = gt == 1

                    tot_pos += np.sum(pos_labels)
                    tot_neg += np.sum(neg_labels)
                    true_pos += np.sum(np.logical_and(mask, pos_labels))
                    false_pos += np.sum(np.logical_and(mask, neg_labels))
                else:
                    raise ValueError(
                        f"object_type must be axon or soma, not {object_type}"
                    )

            brain_ids_data.append(brain_id)
            recall = true_pos / tot_pos
            recalls.append(recall)
            if true_pos + false_pos == 0:
                precision = 0
            else:
                precision = true_pos / (true_pos + false_pos)
            precisions.append(precision)
            if precision == 0 and recall == 0:
                fscore = 0
            else:
                fscore = 2 * precision * recall / (precision + recall)

            if fscore > best_fscore:
                best_fscore = fscore
                best_thresh = threshold
                best_prec = precision
                best_recall = recall

        best_fscores[brain_id] = (best_fscore, best_thresh)
        best_precisions.append(best_prec)
        best_recalls.append(best_recall)

    for i, brain_id in enumerate(brain_ids_data):
        brain_ids_data[i] = (
            brain_id
            + f" - FS: {best_fscores[brain_id][0]:.2f} @ {best_fscores[brain_id][1]:.2f}"
        )

    dict = {
        "ID": brain_ids_data,
        "Recall": recalls,
        "Precision": precisions,
    }
    df = pd.DataFrame(dict)

    print("If this performance is not adequate, improve model and try again")

    if show_plot:
        sns.set(font_scale=2)
        fig = plt.figure(figsize=(8, 8))
        sns.lineplot(
            data=df, x="Recall", y="Precision", hue="ID", estimator=np.amax, ci=False
        )
        plt.scatter(
            best_recall,
            best_prec,
        )
        plt.xlim([0, 1.1])
        plt.ylim([0, 1.1])
        print(f"Brain {brain_id} Validation: {tot_pos}+ {tot_neg}-")
        plt.legend()
        return fig, best_fscore, best_thresh  # plt.show()

    return best_fscore, best_thresh


[docs]def examine_threshold(
    data_dir: str,
    brain_id: str,
    threshold: float,
    object_type: str,
    positive_channel: int,
    dset: str = "val",
    doubles: list = [],
    show_plot: bool = True,
):
    """Display results in napari of all subvolumes that were below some performance threshold, at a given threshold.

    Args:
        data_dir (str): Path to directory where brain subvolumes are stored.
        brain_id (str): Brain ID to examine (from _data.py file).
        threshold (float): Threshold to examine.
        object_type (str): soma or axon, the data type being examined.
        positive_channel (int): 0 or 1, Channel that represents neuron in the predictions.
        doubles (list, optional): Filenames of soma subvolumes that contain two somas, if applicable. Defaults to [].
        show_plot (bool, optional): Whether to run napari, useful for pytests when figures should not be displayed. Defaults to True.

    Raises:
        ValueError: If object_type is neither axon nor soma
        ValueError: If positive_channel is not 0 or 1.
    """
    base_dir = data_dir + f"/brain{brain_id}/{dset}/"

    data_files = _find_sample_names(base_dir, dset="", add_dir=True)
    test_files = [file[: file.rfind(".")] + "_Probabilities.h5" for file in data_files]

    size_thresh = 500

    for im_fname, filename in tqdm(
        zip(data_files, test_files), disable=True, total=len(data_files)
    ):
        print(f"*************File: {im_fname}*********")
        f = h5py.File(filename, "r")
        pred = f.get("exported_data")
        channel_dim = np.argmin(pred.shape)
        if channel_dim == 0:
            pred = pred[positive_channel, :, :, :]
        elif channel_dim == 3:
            pred = pred[:, :, :, positive_channel]
        else:
            raise ValueError(
                f"Channel dimension should be first or last, not {channel_dim}"
            )

        mask = pred > threshold
        cntr = [s // 2 for s in mask.shape]

        if object_type == "soma":
            if filename.split("/")[-1].split("_Probabilities")[0] in doubles:
                newpos = 2
            else:
                newpos = 1

            labels = measure.label(mask)
            props = measure.regionprops(labels)
            if "pos" in filename:
                no_cntr_yet = True
                num_detected = 0
                for prop in props:
                    area = prop["area"]
                    if area > size_thresh:
                        print(f"area of detected object: {area}")
                        if labels[cntr[0], cntr[1], cntr[2]] == prop["label"]:
                            num_detected += 1
                            no_cntr_yet = False
                        elif num_detected < newpos - no_cntr_yet:
                            num_detected += 1
                        elif show_plot:
                            print(f"Soma false positive Area: {area}")
                            f = h5py.File(im_fname, "r")
                            im = f.get("image_3channel")
                            viewer = napari.Viewer(ndisplay=3)
                            viewer.add_image(
                                im[0, :, :, :], name=filename.split("/")[-1]
                            )
                            viewer.add_image(im[1, :, :, :], name="bg")
                            viewer.add_image(im[2, :, :, :], name="endo")
                            viewer.add_labels(mask)
                            viewer.add_labels(
                                labels == prop["label"],
                                name=f"soma false positive area: {area}",
                            )

                if num_detected < newpos and show_plot:
                    print(f"Soma false negative")
                    f = h5py.File(im_fname, "r")
                    im = f.get("image_3channel")
                    viewer = napari.Viewer(ndisplay=3)
                    viewer.add_image(im[0, :, :, :], name=filename.split("/")[-1])
                    viewer.add_image(im[1, :, :, :], name="bg")
                    viewer.add_image(im[2, :, :, :], name="endo")
                    viewer.add_labels(mask, name="Soma false negative")

            elif "neg" in filename:
                for prop in props:
                    area = prop["area"]
                    if area > size_thresh and show_plot:
                        print(f"Nonsoma false positive Area: {area}")
                        f = h5py.File(im_fname, "r")
                        im = f.get("image_3channel")
                        viewer = napari.Viewer(ndisplay=3)
                        viewer.add_image(im[0, :, :, :], name=filename.split("/")[-1])
                        viewer.add_image(im[1, :, :, :], name="bg")
                        viewer.add_image(im[2, :, :, :], name="endo")
                        viewer.add_labels(mask)
                        viewer.add_labels(
                            labels == prop["label"],
                            name=f"nonsoma false positive area: {area}",
                        )
        elif object_type == "axon":
            filename_lab_3 = filename[:-17] + "-image_3channel_Labels.h5"
            filename_lab_2 = (
                filename[:-17] + "-image_2channel_Labels.h5"
            )  # backward compatibility
            if os.path.isfile(filename_lab_3):
                fname_lab = filename_lab_3
            elif os.path.isfile(filename_lab_2):
                fname_lab = filename_lab_2
            f = h5py.File(fname_lab, "r")
            gt = f.get("exported_data")
            if channel_dim == 0:
                gt = gt[0, :, :, :]
            elif channel_dim == 3:
                gt = gt[:, :, :, 0]

            if positive_channel == 1:
                pos_labels = gt == 2
                neg_labels = gt == 1
            elif positive_channel == 0:
                pos_labels = gt == 1
                neg_labels = gt == 2
            else:
                raise ValueError(
                    f"positive_channel expected to be 0 or 1 not {positive_channel}"
                )

            true_pos = np.sum(np.logical_and(mask, pos_labels))
            false_pos = np.sum(np.logical_and(mask, neg_labels))
            true_labels = np.sum(pos_labels)

            if true_labels == 0:
                recall = 1
            else:
                recall = true_pos / true_labels

            if true_pos + false_pos == 0:
                precision = 1
            else:
                precision = true_pos / (true_pos + false_pos)

            if (precision < 0.8 or recall < 0.8) and show_plot:
                f = h5py.File(im_fname, "r")
                keys = list(f.keys())
                if len(keys) > 1:
                    raise ValueError(f"Multiple keys in image file: {keys}")
                else:
                    key = keys[0]
                im = f.get(key)
                print(f"prec{precision} recall: {recall}")
                viewer = napari.Viewer(ndisplay=3)
                if len(im.shape) == 3:
                    viewer.add_image(im, name=f"{im_fname}")
                else:
                    for layer in range(im.shape[0]):
                        viewer.add_image(im[layer, :, :, :], name=f"{layer}-{im_fname}")
                viewer.add_labels(mask, name="mask")
                viewer.add_labels(pos_labels + 2 * neg_labels, name="pos labels")
            else:
                print(f"Precision: {precision}, recall: {recall}")

        else:
            raise ValueError(f"object_type must be axon or soma, not {object_type}")


[docs]class ApplyIlastik_LargeImage:
    """Apply ilastik to large image, where chunking is necessary.

    Arguments:
        ilastk_path (str): Path to ilastik executable.
        ilastik_project (str): Path to ilastik project.
        ncpu (int): Number of cpus to use for applying ilastik in parallel.
        object_type (str): Soma for soma detection or axon for axon segmentaiton.
        results_dir: (str or Path): For soma detection, the directory to write detection results.

    Attributes:
        ilastk_path (str): Path to ilastik executable.
        ilastik_project (str): Path to ilastik project.
        ncpu (int): Number of cpus to use for applying ilastik in parallel.
        object_type (str): Soma for soma detection or axon for axon segmentaiton.
        results_dir: (Path): For soma detection, the directory to write detection results.

    """

    def __init__(
        self,
        ilastik_path: str,
        ilastik_project: str,
        ncpu: int,
        data_file: str,
        results_dir: Union[str, Path] = None,
    ):
        with open(data_file) as f:
            data = json.load(f)
        object_type = data["object_type"]
        self.brain2paths = data["brain2paths"]

        self.ilastik_path = ilastik_path
        self.ilastik_project = ilastik_project
        self.ncpu = ncpu
        self.object_type = object_type

        if object_type == "axon":
            if results_dir != None:
                raise ValueError(
                    f"cannot give results_dir for object type {object_type}"
                )
        elif object_type == "soma":
            if isinstance(results_dir, str):
                results_dir = Path(results_dir)
        else:
            raise ValueError(f"object_type must be soma or axon not {object_type}")
        self.results_dir = results_dir

[docs]    def apply_ilastik_parallel(
        self,
        brain_id: str,
        layer_names: list,
        threshold: float,
        data_dir: str,
        chunk_size: list,
        min_coords: list = [-1, -1, -1],
        max_coords: list = [-1, -1, -1],
    ):
        """Apply ilastik to large brain, in parallel.

        Args:
            brain_id (str): Brain ID (key in brain2paths in _data.py file).
            layer_names (list): Precomputed layer names to be appended to the base path.
            threshold (float): Threshold for the ilastik predictor.
            data_dir (str or Path): Path to directory where downloaded data will be temporarily stored.
            chunk_size (list): Size of chunks to be used for parallel application of ilastik.
            min_coords (list, optional): Lower bound of bounding box on which to apply ilastk (i.e. does not apply ilastik before these bounds). Defaults to [-1, -1, -1].
            max_coords (list, optional): Upper bound of bounding box on which to apply ilastk (i.e. does not apply ilastik beyond these bounds). Defaults to [-1, -1, -1].
        """
        results_dir = self.results_dir
        volume_base_dir_read = self.brain2paths[brain_id]["base_local"]
        volume_base_dir_write = self.brain2paths[brain_id]["base_s3"]
        sample_path = volume_base_dir_read + layer_names[1]
        vol = CloudVolume(sample_path, parallel=True, mip=0, fill_missing=True)
        shape = vol.shape
        print(f"Processing: {sample_path} with shape {shape} at threshold {threshold}")

        isExist = os.path.exists(data_dir)
        if not isExist:
            print(f"Creating directory: {data_dir}")
            os.makedirs(data_dir)
        else:
            print(f"Downloaded data will be stored in {data_dir}")
        if isinstance(data_dir, str):
            data_dir = Path(data_dir)
        elif not isinstance(data_dir, Path):
            raise ValueError(f"data_dir must be str or Path")

        if self.object_type == "soma":
            isExist = os.path.exists(results_dir)
            if not isExist:
                print(f"Creating directory: {results_dir}")
                os.makedirs(results_dir)
            else:
                print(f"Soma detections data will be stored in {results_dir}")
        elif self.object_type == "axon":
            mask_dir = volume_base_dir_write + "axon_mask"
            try:
                CloudVolume(mask_dir)
            except:
                assert np.all(
                    [
                        c_ilastik % c_vol == 0
                        for c_ilastik, c_vol in zip(chunk_size, [128, 128, 2])
                    ]
                )
                self._make_mask_info(mask_dir, vol, [128, 128, 2])

        corners = _get_corners(
            shape, chunk_size, max_coords=max_coords, min_coords=min_coords
        )

        if self.ncpu == 1:
            for corner in tqdm(corners):
                self._process_chunk(
                    corner[0],
                    corner[1],
                    volume_base_dir_read,
                    volume_base_dir_write,
                    layer_names,
                    threshold,
                    data_dir,
                    self.object_type,
                    results_dir,
                )
        else:
            Parallel(n_jobs=self.ncpu)(
                delayed(self._process_chunk)(
                    corner[0],
                    corner[1],
                    volume_base_dir_read,
                    volume_base_dir_write,
                    layer_names,
                    threshold,
                    data_dir,
                    self.object_type,
                    results_dir,
                )
                for corner in tqdm(corners, leave=False)
            )

    def _make_mask_info(self, mask_dir: str, vol: CloudVolume, chunk_size: list):
        info = CloudVolume.create_new_info(
            num_channels=1,
            layer_type="segmentation",
            data_type="uint64",  # Channel images might be 'uint8'
            encoding="raw",  # raw, jpeg, compressed_segmentation, fpzip, kempressed
            resolution=vol.resolution,  # Voxel scaling, units are in nanometers
            voxel_offset=vol.voxel_offset,  # x,y,z offset in voxels from the origin
            # mesh            = 'mesh',
            # Pick a convenient size for your underlying chunk representation
            # Powers of two are recommended, doesn't need to cover image exactly
            chunk_size=chunk_size,  # units are voxels
            volume_size=vol.volume_size,  # e.g. a cubic millimeter dataset
        )
        vol_mask = CloudVolume(mask_dir, info=info, compress=False)
        vol_mask.commit_info()

    def _process_chunk(
        self,
        c1: list,
        c2: list,
        volume_base_dir_read: str,
        volume_base_dir_write: str,
        layer_names: list,
        threshold: float,
        data_dir: Path,
        object_type: str,
        results_dir: str = None,
    ):
        mip = 0
        area_threshold = 500

        vols = []
        for layer_name in layer_names:
            if layer_name == "zero":
                vol = "zero"
            else:
                dir = volume_base_dir_read + layer_name
                vol = CloudVolume(dir, parallel=1, mip=mip, fill_missing=True)
                dtype = vol.dtype
            vols.append(vol)

        try:
            ims = []
            for vol in vols:
                if vol == "zero":
                    im = np.zeros([c2[i] - c1[i] for i in range(3)], dtype=dtype)
                else:
                    im = np.squeeze(vol[c1[0] : c2[0], c1[1] : c2[1], c1[2] : c2[2]])
                ims.append(im)

            image_3channel = np.squeeze(np.stack(ims, axis=0))
        except:
            print(f"File read error at: {c1}")
            return

        fname = f"image_{c1[0]}_{c1[1]}_{c1[2]}.h5"
        fname = data_dir / fname
        with h5py.File(fname, "w") as f:
            dset = f.create_dataset("image_3channel", data=image_3channel)

        for attempt in range(3):
            subprocess.run(
                [
                    f"{self.ilastik_path}",
                    "--headless",
                    f"--project={self.ilastik_project}",
                    fname,
                ],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
            )

            fname_prob = str(fname).split(".")[0] + "_Probabilities.h5"
            try:
                with h5py.File(fname_prob, "r") as f:
                    pred = f.get("exported_data")
                    if object_type == "soma":
                        pred = pred[0, :, :, :]
                        mask = pred > threshold
                        labels = measure.label(mask)
                        props = measure.regionprops(labels)
                    elif object_type == "axon":
                        pred = pred[1, :, :, :]
                        mask = np.array(pred > threshold).astype("uint64")
                break
            except:
                if attempt >= 2:
                    raise ValueError(f"Tried to evaluate thrice and failed")
                if os.path.isfile(fname_prob):
                    os.remove(fname_prob)
                continue

        if object_type == "soma":
            fname_results = f"image_{c1[0]}_{c1[1]}_{c1[2]}_somas.txt"
            fname_results = results_dir / fname_results

            results = []
            for prop in props:
                if prop["area"] > area_threshold:
                    location = list(np.add(c1, prop["centroid"]))
                    results.append(location)
            if len(results) > 0:
                with open(fname_results, "w") as f2:
                    for location in results:
                        f2.write(str(location))
                        f2.write("\n")
        elif object_type == "axon":
            dir_mask = volume_base_dir_write + "axon_mask"
            vol_mask = CloudVolume(
                dir_mask, parallel=1, mip=mip, fill_missing=True, compress=False
            )
            vol_mask[c1[0] : c2[0], c1[1] : c2[1], c1[2] : c2[2]] = mask

        os.remove(fname)
        os.remove(str(fname.with_suffix("")) + "_Probabilities.h5")

[docs]    def collect_soma_results(self, brain_id: str):
        """Combine all soma detections and post to neuroglancer. Intended for use after apply_ilastik_parallel.

        Args:
            brain_id (str): ID to process.
        """
        coords = []
        coords_target_space = []
        results_dir = self.results_dir
        onlyfiles = [
            join(results_dir, f)
            for f in listdir(results_dir)
            if isfile(join(results_dir, f))
        ]
        onlyfiles = [f for f in onlyfiles if ".txt" in f]
        onlyfiles = [f for f in onlyfiles if "all_somas" not in f]
        div_factor = [8, 8, 1]
        for file in tqdm(onlyfiles, desc="reading files"):
            print(file)
            file1 = open(file, "r")
            lines = file1.readlines()

            for line in tqdm(lines, desc="parsing coordinates", leave=False):
                if line != "\n":
                    line = " ".join(line.split())
                    elements = line.split(",")
                    coord = [elements[0][1:], elements[1], elements[2][:-1]]

                    coords_target_space.append([float(e.strip()) for e in coord])
                    coord = [
                        int(round(float(e.strip()) / f))
                        for e, f in zip(coord, div_factor)
                    ]
                    coords.append(coord)
        print(f"{len(coords)} somas detected, first is: {coords_target_space[0]}")
        all_somas_path = results_dir / f"all_somas_{brain_id}.txt"
        print(f"Writing {all_somas_path}...")
        with open(all_somas_path, "w") as f:
            for coord in coords_target_space:
                f.write(f"{coord}")
                f.write("\n")

        if len(coords_target_space) > 10000:
            random.shuffle(coords_target_space)
            point_chunks = [
                coords_target_space[i : i + 10000]
                for i in range(0, len(coords_target_space), 10000)
            ]
            name = "detected_somas_partial"
        else:
            point_chunks = [coords_target_space]
            name = "detected_somas"

        for coords_target_space in point_chunks:
            ng_link = self.brain2paths[brain_id]["val_info"]["url"]
            viz_link = NGLink(ng_link.split("json_url=")[-1])
            ngl_json = viz_link._json

            ngl_json["layers"] = [
                layer for layer in ngl_json["layers"] if layer["type"] != "annotation"
            ]
            ngl_json["layers"].append(
                {"type": "annotation", "points": coords_target_space, "name": name}
            )
            viz_link = create_viz_link_from_json(
                ngl_json, neuroglancer_link="https://viz.neurodata.io/?json_url="
            )
            print(f"Viz link with detections: {viz_link}")

[docs]    def collect_axon_results(self, brain_id: str, ng_layer_name: str):
        """Generate neuroglancer link with the axon_mask segmentation. Intended for use after apply_ilastik_parallel

        Args:
            brain_id (str): ID to process.
            ng_layer_name (str): Name of neuroglancer layer in val_info URL with image data.
        """
        ng_link = self.brain2paths[brain_id]["val_info"]["url"]
        viz_link = NGLink(ng_link.split("json_url=")[-1])
        ngl_json = viz_link._json

        ngl_json["layers"] = [
            layer for layer in ngl_json["layers"] if layer["type"] != "annotation"
        ]
        for layer in ngl_json["layers"]:
            if layer["name"] == ng_layer_name:
                source_pieces = layer["source"]["url"].split("/")
                source = ""
                for piece in source_pieces[:-1]:
                    source += piece
                    source += "/"
                source += "axon_mask"

                ngl_json["layers"].append(
                    {"type": "segmentation", "source": source, "name": "axon_mask"}
                )
                break
        print(ngl_json)
        viz_link = create_viz_link_from_json(
            ngl_json, neuroglancer_link="https://viz.neurodata.io/?json_url="
        )
        print(f"Viz link with segmentation: {viz_link}")


def downsample_mask(brain, data_file, ncpu: int = 1, bounds: list = None):
    """Use Igneous pipeline to downsample new axon_mask segmentation. Necessary to transform the mask into atlas space.

    Args:
        brain (str): ID key whose axon_mask will be downsampled.
        data_file (str): Path to data JSON.
        ncpu (int, optional): Number of cores to use for downsampling. Defaults to 1.
        bounds (list, optional): List of two lists of length 3 specifying corners of bounding box to restrict downsampling to. Defaults to None.

    Raises:
        ValueError: _description_
    """
    with open(data_file) as f:
        data = json.load(f)
    object_type = data["object_type"]
    brain2paths = data["brain2paths"]
    if object_type != "axon":
        raise ValueError(f"Entered non-axon data file")
    dir_base = brain2paths[brain]["base_s3"]
    layer_path = dir_base + "axon_mask"

    tq = LocalTaskQueue(parallel=ncpu)

    if bounds != None:
        bounds = Bbox(bounds[0], bounds[1])

    tasks = tc.create_downsampling_tasks(
        layer_path,  # e.g. 'gs://bucket/dataset/layer'
        mip=0,  # Start downsampling from this mip level (writes to next level up)
        fill_missing=True,  # Ignore missing chunks and fill them with black
        axis="z",
        num_mips=2,  # number of downsamples to produce. Downloaded shape is chunk_size * 2^num_mip
        chunk_size=None,  # manually set chunk size of next scales, overrides preserve_chunk_size
        preserve_chunk_size=True,  # use existing chunk size, don't halve to get more downsamples
        sparse=False,  # for sparse segmentation, allow inflation of pixels against background
        bounds=bounds,  # mip 0 bounding box to downsample
        encoding=None,  # e.g. 'raw', 'compressed_segmentation', etc
        delete_black_uploads=True,  # issue a delete instead of uploading files containing all background
        background_color=0,  # Designates the background color
        compress="gzip",  # None, 'gzip', and 'br' (brotli) are options
        factor=(2, 2, 2),  # common options are (2,2,1) and (2,2,2)
    )

    tq.insert(tasks)
    tq.execute()