This notebook is part of the deepcell-spots documentation: https://deepcell-spots.readthedocs.io/

Singleplex FISH Analysis

[1]:
import glob
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from deepcell_toolbox.processing import histogram_normalization,normalize
from deepcell_toolbox.deep_watershed import deep_watershed
from deepcell.applications import CytoplasmSegmentation,NuclearSegmentation
from deepcell_spots.applications import Polaris,SpotDetection
from deepcell_spots.singleplex import *

class OverrideCytoplasmSegmentation(CytoplasmSegmentation):

    def __init__(self, model=None):
        super(CytoplasmSegmentation, self).__init__(model, model_image_shape=(512, 512, 1))
        # Override preprocessing input
        self.preprocessing_fn = histogram_normalization
        self.postprocessing_fn = deep_watershed

Import images

[2]:
from tensorflow.keras.utils import get_file
from deepcell.datasets import Dataset

def load_data(self, path=None, test_size=0.2, seed=0):
    path = path if path else self.path

    basepath = os.path.expanduser(os.path.join('~', '.keras', 'datasets'))
    prefix = path.split(os.path.sep)[:-1]
    data_dir = os.path.join(basepath, *prefix) if prefix else basepath
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    elif not os.path.isdir(data_dir):
        raise IOError('{} exists but is not a directory'.format(data_dir))

    path = get_file(path,
                    origin=self.url,
                    file_hash=self.file_hash)

    data = Image.open(path)

    return(data)

Dataset.load_data = load_data
[3]:
from deepcell.datasets import Dataset

datafile = Dataset(
    path='scGFP_Maximum intensity projection.tif',
    url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/singleplex/scGFP_Maximum+intensity+projection.tif',
    file_hash='a0e5c38ef3b26e521d3837f1098393c4',
    metadata={})
image_stack = datafile.load_data()
[4]:
image_stack.seek(0)
fluor_image = np.expand_dims(np.array(image_stack), axis=[0,-1])
image_stack.seek(1)
cyto_image = np.expand_dims(np.array(image_stack), axis=[0,-1])
image_stack.seek(2)
nuc_image = np.expand_dims(np.array(image_stack), axis=[0,-1])
image_stack.seek(3)
spots_image = np.expand_dims(np.array(image_stack), axis=[0,-1])
[5]:
fig,ax = plt.subplots(2,2,figsize=(10,10))

ax[0,0].imshow(nuc_image[0,...,0],cmap='gray')
ax[0,0].set_title('Nuclear label')
ax[0,1].imshow(cyto_image[0,...,0],cmap='gray')
ax[0,1].set_title('Cytoplasmic label')
ax[1,0].imshow(fluor_image[0,...,0],cmap='gray')
ax[1,0].set_title('Fluorescent channel')
ax[1,1].imshow(spots_image[0,...,0],cmap='gray')
ax[1,1].set_title('FISH channel')

for i in range(np.shape(ax)[0]):
    for ii in range(np.shape(ax)[1]):
        ax[i,ii].set_xticks([])
        ax[i,ii].set_yticks([])

plt.show()
_images/notebooks_Singleplex-FISH-Analysis_7_0.png

Cytoplasmic segmentation

[6]:
# Polaris app performs cytoplasmic segmentation by default
app = Polaris()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/core.py:1059: UserWarning: deepcell_spots.dotnet is not loaded, but a Lambda layer uses it. It may cause errors.
  , UserWarning)
Downloading data from https://deepcell-data.s3-us-west-1.amazonaws.com/saved-models/CytoplasmSegmentation-2.tar.gz
95117312/95115934 [==============================] - 3s 0us/step
WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.
[7]:
# image_mpp is estimate for pixel size in microns
cyto_pred = app.predict(spots_image=spots_image, segmentation_image=fluor_image, image_mpp=0.2)
[8]:
# app returns a list of dictionaries
# dictionary keys are 'spots_assignment', 'cell_segmentation', 'spot_locations'
spot_dict_cyto = cyto_pred[0]['spots_assignment']
labeled_im_cyto = cyto_pred[0]['cell_segmentation']

coords_new,cmap_list = process_spot_dict(spot_dict_cyto)

fig,ax = plt.subplots(2,2,figsize=(15,15))

ax[0,0].imshow(fluor_image[0,...,0],cmap='gray',vmax=200)
ax[0,0].set_title('Cytoplasmic label')
ax[0,1].imshow(labeled_im_cyto[0,...,0],cmap='jet')
ax[0,1].set_title('Cytoplasmic segmentation')
ax[1,0].imshow(spots_image[0,...,0],cmap='gray',vmax=100)
ax[1,0].set_title('SeqFISH')
ax[1,1].imshow(spots_image[0,...,0],cmap='gray',vmax=100)
ax[1,1].scatter(coords_new[:,1],coords_new[:,0],c=cmap_list,cmap='jet', s=8)
ax[1,1].set_title('Spot assignment to cells')

for i in range(np.shape(ax)[0]):
    for ii in range(np.shape(ax)[1]):
        ax[i,ii].set_xticks([])
        ax[i,ii].set_yticks([])

plt.tight_layout()
plt.show()
_images/notebooks_Singleplex-FISH-Analysis_11_0.png

Nuclear segmentation

[9]:
# `segmentation_compartment` variable can be set to 'nucleus', 'cytoplasm', or 'None'
app = Polaris(segmentation_type='nucleus')
Downloading data from https://deepcell-data.s3-us-west-1.amazonaws.com/saved-models/NuclearSegmentation-3.tar.gz
95150080/95148111 [==============================] - 2s 0us/step
WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.
WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.
[10]:
nuc_pred = app.predict(spots_image=spots_image, segmentation_image=nuc_image, image_mpp=0.15)
[11]:
spot_dict_nuc = nuc_pred[0]['spots_assignment']
labeled_im_nuc = nuc_pred[0]['cell_segmentation']

coords_new,cmap_list = process_spot_dict(spot_dict_nuc)

fig,ax = plt.subplots(2,2,figsize=(15,15))

ax[0,0].imshow(nuc_image[0,...,0],cmap='gray',vmax=100)
ax[0,0].set_title('Nuclear label')
ax[0,1].imshow(labeled_im_nuc[0,...,0],cmap='jet')
ax[0,1].set_title('Nuclear segmentation')
ax[1,0].imshow(spots_image[0,...,0],cmap='gray',vmax=100)
ax[1,0].set_title('SeqFISH')
ax[1,1].imshow(spots_image[0,...,0],cmap='gray',vmax=100)
ax[1,1].scatter(coords_new[:,1],coords_new[:,0],c=cmap_list,cmap='jet', s=8)
ax[1,1].set_title('Spot assignment to cells')

for i in range(np.shape(ax)[0]):
    for ii in range(np.shape(ax)[1]):
        ax[i,ii].set_xticks([])
        ax[i,ii].set_yticks([])

plt.tight_layout()
plt.show()
_images/notebooks_Singleplex-FISH-Analysis_15_0.png

No segmentation

[13]:
app = Polaris(segmentation_type='no segmentation')
/usr/local/lib/python3.6/dist-packages/deepcell_spots/applications/polaris.py:97: UserWarning: No segmentation application instantiated.
  warnings.warn('No segmentation application instantiated.')
[14]:
# if `segmentation_type` is 'no segmentation', app returns a list of lists
spots_pred = app.predict(spots_image=spots_image)
[15]:
fig,ax = plt.subplots(1,2,figsize=(15,15))

ax[0].imshow(spots_image[0,...,0],cmap='gray',vmax=100)
ax[0].set_title('SeqFISH')
ax[1].imshow(spots_image[0,...,0],cmap='gray',vmax=100)
ax[1].scatter(spots_pred[0][:,1], spots_pred[0][:,0], c='m', s=8)
ax[1].set_title('Detected spots')

for i in range(np.shape(ax)[0]):
        ax[i].set_xticks([])
        ax[i].set_yticks([])

plt.tight_layout()
plt.show()
_images/notebooks_Singleplex-FISH-Analysis_19_0.png

This notebook is part of the deepcell-spots documentation: https://deepcell-spots.readthedocs.io/

Multiplex FISH Analysis

[1]:
import glob
import numpy as np
import pandas as pd
import scipy.io
import os
import time
import cv2
import sys
import random
import collections
import matplotlib.pyplot as plt
from scipy.spatial import distance
from skimage import transform
from skimage.feature import register_translation
from sklearn.cluster import DBSCAN
from deepcell_spots.applications import SpotDetection
from deepcell_spots.spot_em import define_edges
from deepcell_spots.point_metrics import *
from deepcell_spots.singleplex import *
from deepcell_spots.multiplex import *
from deepcell_spots.image_alignment import *
from deepcell.applications import CytoplasmSegmentation
import tensorflow as tf
from deepcell_toolbox.processing import histogram_normalization,normalize
from deepcell_toolbox.deep_watershed import deep_watershed
[2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
  try:
    tf.config.set_logical_device_configuration(
        gpus[0],
        [tf.config.LogicalDeviceConfiguration(memory_limit=5000)])
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)
1 Physical GPUs, 1 Logical GPUs

Import data

We can define the directory that contains all of the data and information for the experiment, including the image set, the codebook, and a .csv file describing the organization of the data.

[3]:
from tensorflow.keras.utils import get_file
from deepcell.datasets import Dataset

def load_csv(self, path=None, test_size=0.2, seed=0):
    path = path if path else self.path

    basepath = os.path.expanduser(os.path.join('~', '.keras', 'datasets'))
    prefix = path.split(os.path.sep)[:-1]
    data_dir = os.path.join(basepath, *prefix) if prefix else basepath
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    elif not os.path.isdir(data_dir):
        raise IOError('{} exists but is not a directory'.format(data_dir))

    path = get_file(path,
                    origin=self.url,
                    file_hash=self.file_hash)

    data = pd.read_csv(path)

    return(data)
[4]:
Dataset.load_data = load_csv

codebook_files = Dataset(
    path='codebook-example.csv',
    url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/multiplex/codebook-example.csv',
    file_hash='81eacbc9558b7e1b5dfbeb9cb34b1a59',
    metadata={})
dataorg_files = Dataset(
    path='dataorg-example.csv',
    url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/multiplex/dataorg-example.csv',
    file_hash='691ec7efcd31c37aedf1253b5e8d7cdc',
    metadata={})

codebook = codebook_files.load_data()
dataorg = dataorg_files.load_data()
[5]:
codebook.head()
[5]:
name Spots 1 Spots 2 Spots 3 Spots 4 Spots 5 Spots 6 Spots 7 Spots 8 Spots 10 Spots 11
0 Gabbr1 1 1 0 0 0 1 0 0 0 1
1 Gabbr2 0 0 0 1 1 1 0 0 0 1
2 Tmem119 1 0 1 0 1 0 0 1 0 0
3 Gpr34 0 0 1 1 1 0 0 0 1 0
4 Fcrls 0 0 0 1 1 0 1 1 0 0
[6]:
dataorg
[6]:
readoutName fileName imagingRound color frame zPos
0 Spots 1 hal-config-749z7-638z7-546z7-477z9-405z7_00.npy -1 546 [14, 15, 16, 17, 18, 19, 20] [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5]
1 Spots 2 hal-config-749z7-638z7-546z7-477z9-405z7_00.npy -1 638 [7, 8, 9, 10, 11, 12, 13] [10.5, 9.0, 7.5, 6.0, 4.5, 3.0, 1.5]
2 Spots 3 hal-config-749z7-638z7-546z7-477z9-405z7_00.npy -1 749 [0, 1, 2, 3, 4, 5, 6] [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5]
3 Spots 4 hal-config-749z7-638z7-546z7-477z9-405z7_00_0.npy 0 546 [14, 15, 16, 17, 18, 19, 20] [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5]
4 Spots 5 hal-config-749z7-638z7-546z7-477z9-405z7_00_0.npy 0 638 [7, 8, 9, 10, 11, 12, 13] [10.5, 9.0, 7.5, 6.0, 4.5, 3.0, 1.5]
5 Spots 6 hal-config-749z7-638z7-546z7-477z9-405z7_00_0.npy 0 749 [0, 1, 2, 3, 4, 5, 6] [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5]
6 Spots 7 hal-config-749z7-638z7-546z7-477z9-405z7_00_1.npy 1 546 [14, 15, 16, 17, 18, 19, 20] [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5]
7 Spots 8 hal-config-749z7-638z7-546z7-477z9-405z7_00_1.npy 1 638 [7, 8, 9, 10, 11, 12, 13] [10.5, 9.0, 7.5, 6.0, 4.5, 3.0, 1.5]
8 Spots 9 hal-config-749z7-638z7-546z7-477z9-405z7_00_1.npy 1 749 [0, 1, 2, 3, 4, 5, 6] [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5]
9 Spots 10 hal-config-749z7-638z7-546z7-477z9-405z7_00_2.npy 2 546 [14, 15, 16, 17, 18, 19, 20] [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5]
10 Spots 11 hal-config-749z7-638z7-546z7-477z9-405z7_00_2.npy 2 638 [7, 8, 9, 10, 11, 12, 13] [10.5, 9.0, 7.5, 6.0, 4.5, 3.0, 1.5]
11 Spots 12 hal-config-749z7-638z7-546z7-477z9-405z7_00_2.npy 2 749 [0, 1, 2, 3, 4, 5, 6] [1.5, 3.0, 4.5, 6.0, 7.5, 9.0, 10.5]
12 Cytoplasm NaN -1 477 [21, 22, 23, 24, 25, 26, 27] [10.5, 9.0, 7.5, 6.0, 4.5, 3.0, 1.5]
13 Reference NaN -1 477 [29] [6.0]
[7]:
def load_data(self, path=None, test_size=0.2, seed=0):
    path = path if path else self.path

    basepath = os.path.expanduser(os.path.join('~', '.keras', 'datasets'))
    prefix = path.split(os.path.sep)[:-1]
    data_dir = os.path.join(basepath, *prefix) if prefix else basepath
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    elif not os.path.isdir(data_dir):
        raise IOError('{} exists but is not a directory'.format(data_dir))

    path = get_file(path,
                    origin=self.url,
                    file_hash=self.file_hash)

#     data = pd.read_csv(path)

    return(path)
[8]:
image_files0 = Dataset(
    path='hal-config-749z7-638z7-546z7-477z9-405z7_00_0.npy',
    url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/multiplex/hal-config-749z7-638z7-546z7-477z9-405z7_00_0.npy',
    file_hash='65e67e1012ee6cfcb29f4a574473969c',
    metadata={})
image_files1 = Dataset(
    path='hal-config-749z7-638z7-546z7-477z9-405z7_00_1.npy',
    url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/multiplex/hal-config-749z7-638z7-546z7-477z9-405z7_00_1.npy',
    file_hash='0822b3af6f45a6187beab1286e212e81',
    metadata={})
image_files2 = Dataset(
    path='hal-config-749z7-638z7-546z7-477z9-405z7_00_2.npy',
    url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/multiplex/hal-config-749z7-638z7-546z7-477z9-405z7_00_2.npy',
    file_hash='a4aa06ba6c0b92931ea013e655bea290',
    metadata={})
image_files = Dataset(
    path='hal-config-749z7-638z7-546z7-477z9-405z7_00.npy',
    url='https://deepcell-data.s3.us-west-1.amazonaws.com/spot_detection/multiplex/hal-config-749z7-638z7-546z7-477z9-405z7_00.npy',
    file_hash='17e00336b3a4bec123de1f6df6c39ae5',
    metadata={})
[9]:
Dataset.load_data = load_data

image_files0.load_data()
image_files1.load_data()
image_files2.load_data()
image_files.load_data()
[9]:
'/root/.keras/datasets/hal-config-749z7-638z7-546z7-477z9-405z7_00.npy'

Image alignment

[10]:
# Read in the images according to the .csv file defining the organization of the data
# Performs a maximum projection of spot images
# Takes middle z-slice for reference images and cytoplasm images

datadir = '/root/.keras/datasets/'

max_im_dict, reference_dict, cytoplasm_dict = read_images(datadir, dataorg)
Working on: Spots 1
Working on: Spots 2
Working on: Spots 3
Working on: Spots 4
Working on: Spots 5
Working on: Spots 6
Working on: Spots 7
Working on: Spots 8
Working on: Spots 9
Working on: Spots 10
Working on: Spots 11
Working on: Spots 12
[11]:
# Align the spot and cytoplasm images according to reference images
aligned = align_images(max_im_dict, reference_dict)
aligned_cytoplasm = align_images(cytoplasm_dict, reference_dict)
[12]:
# Visualize a subset of images from different rounds
name_list = ['Spots 1','Spots 2','Spots 7','Spots 8']

fig,ax = plt.subplots(1,4,figsize=(20,20))
for i in range(len(name_list)):
    ax[i].imshow(aligned[name_list[i]][0,:,:,0],vmax=10000)
_images/notebooks_Multiplex-FISH-Analysis_16_0.png
[13]:
# Crop zero-value pixels after alignment
crop_dict = crop_images(aligned)
crop_cytoplasm_dict = crop_images(aligned_cytoplasm)

# Visualize a subset of images from different rounds
fig,ax = plt.subplots(1,4,figsize=(20,20))

for i in range(len(name_list)):
    ax[i].imshow(crop_dict[name_list[i]][0,:,:,0])
_images/notebooks_Multiplex-FISH-Analysis_17_0.png

Spot detection

[14]:
# Instantiate the spot detection application
spots_app = SpotDetection()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/core.py:1059: UserWarning: deepcell_spots.dotnet is not loaded, but a Lambda layer uses it. It may cause errors.
  , UserWarning)
[15]:
# Predict spot locations
# if prediction is slow (over 5 min) try downgrading to TF 2.4
tic = time.perf_counter()
spots_app.postprocessing_fn = None

cp_dict = {}
for i,item in enumerate(crop_dict.keys()):
    if item in codebook.columns:
        cp_dict[item] = spots_app.predict(crop_dict[item][:,:,:,:])
    else:
        cp_dict[item] = np.random.random((2,1,np.shape(crop_dict[item])[1], np.shape(crop_dict[item])[2],2))*0.01

toc = time.perf_counter()
print(f"Analyzed {i} rounds in {toc - tic:0.4f} seconds")
Analyzed 11 rounds in 49.7890 seconds

Cell segmentation

[16]:
# Instantiate cytoplasm segmentation model
cyto_app = CytoplasmSegmentation()
cyto_app.preprocessing_fn = histogram_normalization
cyto_app.postprocessing_fn = deep_watershed
WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.
[17]:
# Predict cytoplasm segmentation
im = crop_cytoplasm_dict['Spots 1']
labeled_im_cyto = cyto_app.predict(im, image_mpp=0.25)

# Visualize
fig,ax = plt.subplots(1, 2, figsize=(12,6))

ax[0].imshow(im[0,:,:,0], vmax=10000, cmap='gray')
ax[1].imshow(labeled_im_cyto[0,:,:,0], cmap='plasma')
[17]:
<matplotlib.image.AxesImage at 0x7f508a1406a0>
_images/notebooks_Multiplex-FISH-Analysis_23_1.png

Assign gene barcodes

[18]:
decoded_spots_df = assign_gene_identities(cp_dict, dataorg, threshold=0.95, codebook=codebook)
decoded_spots_df.head()
100%|██████████| 60/60 [00:07<00:00,  7.93it/s]
100%|██████████| 106/106 [00:00<00:00, 611.60it/s]
[18]:
Name Code Probability X Y
0 Laptm4a 18 0.563712 1944 1602
1 Blank-9 29 0.852931 1944 1414
2 background 31 0.627804 1944 382
3 Blank-4 24 0.961169 1944 368
4 background 31 0.523489 1944 339
[19]:
thr=0.7
df = pd.concat([decoded_spots_df.Name[decoded_spots_df.Probability>thr].value_counts(), decoded_spots_df.Name[decoded_spots_df.Probability <=thr].replace(np.unique(decoded_spots_df.Name),'thr').value_counts()]).sort_index(axis=0)#.sort_values(ascending=False)
fig, ax = plt.subplots(1, 1, figsize=(14,3), dpi=100, facecolor='w', edgecolor='k')
df.plot(kind='bar',width=0.7,rot=90,logy=True,fontsize=6,ax=ax)
num_decoded_barcodes = sum((decoded_spots_df.Name!='background')&(decoded_spots_df.Name!='infeasible')&(decoded_spots_df.Name!='NaN')&(decoded_spots_df.Probability>thr))
for p in ax.patches:
    ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005),size=6)
plt.title('Histogram of decoded barcodes afther thresholding with {}: \n in total {} spots detected while {} spots decoded ({:.02f}%)'.format(thr,decoded_spots_df.shape[0], num_decoded_barcodes, 100*num_decoded_barcodes/ decoded_spots_df.shape[0]),fontsize=10)
plt.show()
_images/notebooks_Multiplex-FISH-Analysis_26_0.png

Assign spots to cells

[20]:
output_df = assign_spots_to_cells(decoded_spots_df, labeled_im_cyto)
output_df.head()
[20]:
Name Code Probability X Y Cell
0 Laptm4a 18 0.563712 1944 1602 0
1 Blank-9 29 0.852931 1944 1414 148
2 background 31 0.627804 1944 382 0
3 Blank-4 24 0.961169 1944 368 0
4 background 31 0.523489 1944 339 137
[21]:
# Visualize
plot_threshold = 0.7

fig,ax = plt.subplots(1, 3, figsize=(24,9))
ax[0].imshow(labeled_im_cyto[0,:,:,0], cmap='plasma')
ax[0].set_title('Cell Segmentation')
ax[1].imshow(crop_dict['Spots 2'][0,:,:,0], cmap='gray')
ax[1].set_title('Spots Image')
ax[2].imshow(crop_dict['Spots 2'][0,:,:,0], cmap='gray')
ax[2].scatter(output_df.loc[output_df['Probability'] > plot_threshold]['Y'],
            output_df.loc[output_df['Probability'] > plot_threshold]['X'],
#             facecolors='None', edgecolors='r'
              c=output_df.loc[output_df['Probability'] > plot_threshold]['Cell'], cmap='plasma', s=20
             )
ax[2].set_title('Assigned Spots to Cells')
for i in range(len(ax)):
    ax[i].set_xlim([0,500])
    ax[i].set_ylim([1500,1000])
    ax[i].set_xticks([])
    ax[i].set_yticks([])

plt.tight_layout()
plt.show()
_images/notebooks_Multiplex-FISH-Analysis_29_0.png

deepcell_spots package

Applications

deepcell_spots.applications package

deepcell_spots.applications.polaris

Singleplex FISH analysis application

class deepcell_spots.applications.polaris.Polaris(segmentation_model=None, segmentation_type='cytoplasm', spots_model=None)[source]

Bases: object

Loads spot detection and cell segmentation applications from deepcell_spots and deepcell_tf, respectively.

The predict method calls the predict method of each application.

Example:

from skimage.io import imread
from deepcell_spots.applications import Polaris

# Load the images
spots_im = imread('spots_image.png')
cyto_im = imread('cyto_image.png')

# Expand image dimensions to rank 4
spots_im = np.expand_dims(spots_im, axis=[0,-1])
cyto_im = np.expand_dims(cyto_im, axis=[0,-1])

# Create the application
app = Polaris()

# Find the spot locations
result = app.predict(spots_image=spots_im,
                     segmentation_image=cyto_im)
spots_dict = result[0]['spots_assignment']
labeled_im = result[0]['cell_segmentation']
coords = result[0]['spot_locations']
Parameters
  • segmentation_model (tf.keras.Model) – The model to load. If None, a pre-trained model will be downloaded.

  • segmentation_compartment (str) – The cellular compartment for generating segmentation predictions. Valid values are ‘cytoplasm’, ‘nucleus’, ‘no segmentation’. Defaults to ‘cytoplasm’.

  • spots_model (tf.keras.Model) – The model to load. If None, a pre-trained model will be downloaded.

predict(spots_image, segmentation_image=None, image_mpp=None, spots_threshold=0.95, spots_clip=False)[source]

Generates prediction output consisting of a labeled cell segmentation image, detected spot locations, and a dictionary of spot locations assigned to labeled cells of the input.

Input images are required to have 4 dimensions [batch, x, y, channel]. Channel dimension should be 2.

Additional empty dimensions can be added using np.expand_dims.

Parameters
  • spots_image (numpy.array) – Input image for spot detection with shape [batch, x, y, channel].

  • segmentation_image (numpy.array) – Input image for cell segmentation with shape [batch, x, y, channel]. Defaults to None.

  • image_mpp (float) – Microns per pixel for image.

  • spots_threshold (float) – Probability threshold for a pixel to be considered as a spot.

  • spots_clip (bool) – Determines if pixel values will be clipped by percentile. Defaults to false.

Raises
  • ValueError – Threshold value must be between 0 and 1.

  • ValueError – Segmentation application must be instantiated if segmentation image is defined.

Returns

List of dictionaries, length equal to batch dimension.

Return type

list

deepcell_spots.applications.spot_detection

Spot detection application

class deepcell_spots.applications.spot_detection.SpotDetection(model=None)[source]

Bases: deepcell.applications.Application

Loads a deepcell.model_zoo.featurenet.FeatureNet model for fluorescent spot detection with pretrained weights.

The predict method handles prep and post processing steps to return a list of spot locations.

Example:

from skimage.io import imread
from deepcell_spots.applications import SpotDetection

# Load the image
im = imread('spots_image.png')

# Expand image dimensions to rank 4
im = np.expand_dims(im, axis=-1)
im = np.expand_dims(im, axis=0)

# Create the application
app = SpotDetection()

# Find spot locations
coords = app.predict(im)
Parameters

model (tf.keras.Model) – The model to load. If None, a pre-trained model will be downloaded.

dataset_metadata = {'name': 'general_train', 'other': 'Pooled FISH data including MERFISH data\n and SunTag viral RNA data'}

Metadata for the dataset used to train the model

model_metadata = {'batch_size': 1, 'lr': 0.01, 'lr_decay': 0.99, 'n_epochs': 10, 'training_seed': 0, 'training_steps_per_epoch': 552}

Metadata for the model and training process

predict(image, batch_size=4, pad_mode='reflect', preprocess_kwargs=None, postprocess_kwargs=None, threshold=0.95, clip=False)[source]

Generates a list of coordinate spot locations of the input running prediction with appropriate pre and post processing functions.

Input images are required to have 4 dimensions [batch, x, y, channel].

Additional empty dimensions can be added using np.expand_dims.

Parameters
  • image (numpy.array) – Input image with shape [batch, x, y, channel].

  • batch_size (int) – Number of images to predict on per batch.

  • pad_mode (str) – The padding mode, one of “constant” or “reflect”.

  • preprocess_kwargs (dict) – Keyword arguments to pass to the pre-processing function.

  • postprocess_kwargs (dict) – Keyword arguments to pass to the post-processing function.

  • threshold (float) – Probability threshold for a pixel to be considered as a spot.

  • clip (bool) – Determines if pixel values will be clipped by percentile.

Raises
  • ValueError – Input data must match required rank of the application, calculated as one dimension more (batch dimension) than expected by the model.

  • ValueError – Input data must match required number of channels.

  • ValueError – Threshold value must be between 0 and 1.

Returns

Coordinate locations of detected spots.

Return type

numpy.array

deepcell_spots.applications.spot_detection.output_to_dictionary(output_images, output_names)[source]

Formats model output from list to dictionary.

Parameters
  • output_images (list) – Model output list of length 2 containing classification prediction and regression prediction

  • output_names (list) – Model output names

Returns

Dictionary with output names as keys and output images as values

deepcell_spots.cluster_vis

Visualization tools for spot expectation maximization

deepcell_spots.cluster_vis.jitter(coords, size)[source]

Add Gaussian noise to a list of coordinates for plotting when coordinates overlap.

Parameters
  • coords (matrix) – Matrix with dimensions (number of detections) x 2

  • size (int) – Standard deviation of the Gaussian noise distribution in pixels.

Returns

Coords with noise added to locations

Return type

array

deepcell_spots.cluster_vis.label_graph_ann(G, coords_df, exclude_last=False)[source]

Labels the annotator associated with each node in the graph

Parameters
  • G (networkx.Graph) – Graph with edges indicating clusters of points assumed to be derived from the same ground truth detection

  • coords_df (DataFrame) – Data frame with columns ‘x’ and ‘y’ which encode the spot locations and ‘Algorithm’ which encodes the algorithm that corresponds with that spot

  • exclude_last (bool) – Only set as True to exclude a point that has been included for the purpose of normalization

Returns

Labeled graph

Return type

networkx.Graph

deepcell_spots.cluster_vis.label_graph_gt(G, detection_data, gt)[source]

Labels the ground truth identity of each node in the graph.

Intended for simulated data.

Parameters
  • G (networkx.Graph) – Graph with edges indicating clusters of points assumed to be derived from the same ground truth detection

  • detection_data (numpy.array) – Matrix with dimensions (number of clusters) x (number of algorithms) with value of 1 if an algorithm detected the cluster and 0 if it did not.

  • gt (numpy.array) – Array with length (number of cluster) with value of 1 if cluster is a true positive detection and 0 if it is a false positive.

Returns

Labeled graph

Return type

networkx.Graph

deepcell_spots.cluster_vis.label_graph_prob(G, detection_data, p_matrix)[source]

Labels the EM output probability of being a ground truth true detection for each cluster in the graph.

Parameters
  • G (networkx.Graph) – Graph with edges indicating clusters of points assumed to be derived from the same ground truth detection

  • detection_data (numpy.array) – Matrix with dimensions (number of clusters) x (number of algorithms) with value of 1 if an algorithm detected the cluster and 0 if it did not.

  • p_matrix (matrix) – Matrix with dimensions (number of clusters) x 2 where first column is the probability that a cluster is a true positive and second column is the probability that it is a false positive.

Returns

Labeled graph

Return type

networkx.Graph

deepcell_spots.data_utils

Functions for making training data sets

deepcell_spots.data_utils.get_data(file_name, test_size=0.2, seed=0, allow_pickle=False)[source]

Load data from .npz file and split into train and test sets.

This is a copy of deepcell.utils.data_utils.get_data, with allow_pickle added and mode removed.

Parameters
  • file_name (str) – path to .npz file to load.

  • test_size (float) – percent of data to leave as testing holdout.

  • seed – seed number for random train/test split repeatability.

  • allow_pickle (bool) – if True, allow loading pickled object arrays stored in npz files (via numpy.load).

Returns

Dictionary of training data and a dictionary of testing data.

Return type

(dict, dict)

deepcell_spots.data_utils.slice_annotated_image(X, y, reshape_size, overlap=0)[source]

Slice images in X into smaller parts.

Similar to deepcell.utils.data_utils.reshape_matrix.

Parameters
  • X (np.array) – array containing images with size (img_number, y, x, channel).

  • reshape_size (list) – Shape of reshaped image [y_size, x_size].

  • overlap (int) – Number of pixels overlapping in each row/column with the pixels from the same row/column in the neighboring slice.

  • y – List or array containing coordinate annotations. Has length (img_number), each element of the list is a (N, 2) np.array where N=the number of points in the image.

Returns

Two outputs (1) Stack of reshaped images in order of small to large y position, then small to large x position in the original image np.array of size (n*img_number, y_size, x_size, channel) where n is number of images each image in X was sliced into. If the original image lengths aren’t divisible by y_size, x_size, the last image in each row / column overlaps with the one before and (2) list of length n*img_number.

Return type

(array, list)

deepcell_spots.data_utils.slice_image(X, reshape_size, overlap=0)[source]

Slice images in X into smaller parts.

Similar to deepcell.utils.data_utils.reshape_matrix.

Parameters
  • X (np.array) – array containing images with size (img_number, y, x, channel).

  • reshape_size (list) – Shape of reshaped image [y_size, x_size].

  • overlap (int) – Number of pixels overlapping in each row/column with the pixels from the same row/column in the neighboring slice.

Returns

Stack of reshaped images in order of small to large y, then small to large x position in the original image np.array of size (n*img_number, y_size, x_size, channel) where n = number of images each image in X was sliced into if the original image lengths aren’t divisible by y_size, x_size, the last image in each row / column overlaps with the one before.

Return type

np.array

deepcell_spots.dotnet

CNN architechture with classification and regression outputs for dot center detection

deepcell_spots.dotnet.classification_head(input_shape, n_features=2, n_dense_filters=128, reg=1e-05, init='he_normal', name='classification_head')[source]

Creates a classification head.

Parameters
  • input_shape (tuple) – Shape of input image.

  • n_features (int) – Number of output features (number of possible classes for each pixel). Default is 2: contains point / does not contain point).

  • n_dense_filters (int) –

  • reg (float) – Regularization value

  • init (str) – Method for initalizing weights.

Returns

tensorflow.keras.Model for classification (softmax output).

deepcell_spots.dotnet.default_heads(input_shape, num_classes)[source]

Create a list of the default heads for spot detection.

Parameters
  • input_shape (tuple) – Shape of input image.

  • num_classes (int) – Number of output features (number of possible classes for each pixel).

Returns

A list of tuples, where the first element is the name of the submodel and the second element is the submodel itself.

Return type

list(tuple)

deepcell_spots.dotnet.dot_net_2D(receptive_field=13, input_shape=(256, 256, 1), inputs=None, n_skips=3, norm_method='std', padding_mode='reflect', **kwargs)[source]

Creates a 2D featurenet with prediction heads for spot detection.

Model architecture based on deepcell.model_zoo.bn_feature_net_skip_2D.

Parameters
  • receptive_field (int) – the receptive field of the neural network.

  • input_shape (tuple) – Shape of input image.

  • inputs (tensor) – optional input tensor

  • n_skips (int) – The number of skip-connections.

  • norm_method (str) – Normalization method to use with the :mod:deepcell.layers.normalization.ImageNormalization2D layer.

  • padding_mode (str) – Type of padding, one of ('reflect' or 'zero').

  • kwargs (dict) – Other model options defined in ~bn_feature_net_2D.

Returns

2D FeatureNet with prediction heads for spot detection.

Return type

tensorflow.keras.Model

deepcell_spots.dotnet.offset_regression_head(input_shape, regression_feature_size=256, name='offset_regression_head')[source]

Creates a offset regression head.

Parameters
  • input_shape (tuple) – Shape of input image.

  • regression_feature_size (int) –

Returns

tensorflow.keras.Model for offset regression.

deepcell_spots.dotnet_losses

Custom loss functions for DeepCell spots

class deepcell_spots.dotnet_losses.DotNetLosses(alpha=0.25, gamma=2.0, sigma=3.0, n_classes=2, focal=False, d_pixels=1, mu=0, beta=0)[source]

Bases: object

classification_loss(y_true, y_pred)[source]
Parameters
  • y_true – numpy array of size (batch, Ly, Lx, 2) one hot encoded pixel classification.

  • y_pred – numpy array of size (batch, Ly, Lx, 2) one hot encoded pixel classification.

Returns

focal / weighted categorical cross entropy loss

Return type

float

classification_loss_regularized(y_true, y_pred)[source]

Regularized classification loss.

Parameters
  • y_true – numpy array of size (batch, Ly, Lx, 2) one hot encoded pixel classification.

  • y_pred – numpy array of size (batch, Ly, Lx, 2) one hot encoded pixel classification.

  • mu (float) – weight of regularization term.

Returns

focal / weighted categorical cross entropy loss

Return type

float

regression_loss(y_true, y_pred)[source]

Calculates the regression loss of the shift from pixel center, only for pixels containing a dot (true regression shifts smaller in absolute value than 0.5).

Parameters
  • y_true – tensor of shape (batch, Ly, Lx, 2).

  • y_pred – tensor of shape (batch, Ly, Lx, 2). Ly, Lx are the dimensions of a single image. Dimension 3 contains delta_y and delta_x.

  • d_pixels (int) – the number of pixels on each side of a point containing pixels over which to calculate the regression loss for the offset image (0 = calculate for point containing pixels only, 1 = calculate for 8-nearest neighbors, …).

Returns

the normalized smooth L1 loss over all the input pixels with regressed point within the same pixel, i.e. delta_y = y(...,0) and delta_x = y(...,1) <= 0.5 in absolute value.

Return type

float

deepcell_spots.dotnet_losses.smooth_l1(y_true, y_pred, sigma=3.0)[source]

Compute the smooth L1 loss of y_pred w.r.t. y_true.

Similar to deepcell.losses.smooth_l1 without summation over channel axis.

Parameters
  • y_true – Tensor from the generator of shape (B, ?, ?). The last value for each box is the state of the anchor (ignore, negative, positive).

  • y_pred – Tensor from the network of shape (B, ?, ?). Same shape as y_true.

  • sigma – The point where the loss changes from L2 to L1.

Returns

The pixelwise smooth L1 loss of y_pred w.r.t. y_true. Has same shape as each of the inputs: (B, ?, ?).

deepcell_spots.image_alignment

deepcell_spots.image_alignment.align_images(image_dict, reference_dict)[source]

Aligns input images with alignment transformation learned from reference images.

Parameters
  • image_dict (dict) – Dictionary where keys are image IDs ('readoutName') and values are images to be aligned for each readout name.

  • reference_dict (dict) – Dictionary where keys are image IDs ('readoutName') and values are fiducial channel (image used for alignment) for each readout name (multiple readout names may have the same reference image).

Returns

Dictionary where keys are image IDs ('readoutName') and values are

images from image_dict that have been aligned by transformations learned from images from reference_dict.

Return type

aligned_dict (dict)

deepcell_spots.image_alignment.crop_images(aligned_dict)[source]

Crops images to remove zero-value pixels resulting from image alignment.

Parameters

aligned_dict (dict) – Dictionary where keys are image IDs ('readoutName') and values are images from image_dict that have been aligned with align_images.

Returns

Dictionary where keys are image IDs ('readoutName') and values are

images from image_dict that have been aligned with align_images with zero-value pixels cropped out.

Return type

crop_dict (dict)

deepcell_spots.image_alignment.read_images(root_dir, dataorg, verbose=True)[source]

Reads in image files from given directories and parses them into dictionaries of different types.

Parameters
  • root_dir (str) – Directory containing all image files

  • image_files (list) – List of image names (str) in root directory. Paths must be to images must be saved in .npy format.

  • dataorg (pandas.DataFrame) – Data frame with required columns 'fileName' (item in image_files), 'readoutName' (unique ID name given to each channel in each image), 'fiducialFrame' (frame number for image to be used for alignment), 'cytoplasmFrame' (frame number for image to be used for cell segmentation).

  • verbose (bool, optional) – Boolean determining if file names are printed as they are processed. Defaults to True.

Returns

max_im_dict is a dictionary where keys are image IDs ('readoutName') and values are maximum intensity projections of frames associated with that readout name. fiducial_dict is a dictionary where keys are image IDs ('readoutName') and values are fiducial channel (image used for alignment) for each readout name (multiple readout names may have the same). cytoplasm_dict is a dictionary where keys are image IDs ('readoutName') and values are cytoplasm label image for each readout name (multiple readout names may have the same).

Return type

(dict, dict, dict)

deepcell_spots.image_generators

Spot detection image generators

class deepcell_spots.image_generators.ImageFullyConvDotDataGenerator(*args, **kwargs)[source]

Bases: tensorflow.keras.preprocessing.image.ImageDataGenerator

Generates batches of tensor image data with real-time data augmentation.

The data will be looped over in batches.

Parameters
  • featurewise_center (bool) – Whether to set input mean to 0 over the dataset, feature-wise.

  • samplewise_center (bool) – Whether to set each sample mean to 0.

  • featurewise_std_normalization (bool) – Whether to divide inputs by std of the dataset, feature-wise.

  • samplewise_std_normalization (bool) – Whether to divide each input by its std.

  • zca_epsilon (float) – Epsilon for ZCA whitening. Default is 1e-6.

  • zca_whitening (bool) – Whether to apply ZCA whitening.

  • rotation_range (int) – Degree range for random rotations.

  • width_shift_range

    float, 1-D array-like or int. Values for the following formats:

    • float: fraction of total width, if < 1, or pixels if >= 1.

    • 1-D array-like: random elements from the array.

    • int: integer number of pixels from interval (-width_shift_range, +width_shift_range) with width_shift_range=2 possible values are ints [-1, 0, +1], same as with width_shift_range=[-1, 0, +1], while with width_shift_range=1.0 possible values are floats in the interval [-1.0, +1.0).

  • shear_range (float) – Shear angle in counter-clockwise direction in degrees

  • zoom_range – float or [lower, upper], Range for random zoom. If a float, [lower, upper] = [1-zoom_range, 1+zoom_range].

  • channel_shift_range (float) – Range for random channel shifts.

  • fill_mode (str) –

    One of ("constant", "nearest", "reflect" or "wrap"). Default is "nearest". Points outside the boundaries of the input are filled according to the given mode:

    • 'constant': kkkkkkkk|abcd|kkkkkkkk (cval=k)

    • 'nearest': aaaaaaaa|abcd|dddddddd

    • 'reflect': abcddcba|abcd|dcbaabcd

    • 'wrap': abcdabcd|abcd|abcdabcd

  • cval – float or int, value used for points outside the boundaries when fill_mode = "constant".

  • horizontal_flip (bool) – Whether to randomly flip inputs horizontally.

  • vertical_flip (bool) – Whether to randomly flip inputs vertically.

  • rescale – rescaling factor. Defaults to None. If None or 0, no rescaling is applied, otherwise we multiply the data by the value provided (before applying any other transformation).

  • preprocessing_function – function that will be implied on each input. The function will run after the image is resized and augmented. The function should take one image (Numpy tensor with rank 3) as argument and should output a Numpy tensor with the same shape.

  • data_format – One of ("channels_first", "channels_last"). "channels_last" means that the images should have shape (samples, height, width, channels), "channels_first" means that the images should have shape (samples, channels, height, width). It defaults to the image_data_format value found in your Keras config file at json. If you never set it, then it will be "channels_last".

  • validation_split (float) – Fraction of images reserved for validation (strictly between 0 and 1).

apply_points_transform(y, transform_parameters, image_shape)[source]

Applies an affine transformation to a list of point coordinates according to given parameters.

Parameters
  • y (np.array) – Array of shape (N, 2) which contains points in the format [y, x] or list of such arrays. (y Cartesian coordinate before the x, as in matrix/image indexing convention. Not to be confused with the variables X,y as in data and labels)

  • transform_parameters (dict) –

    Dictionary with string - parameter pairs describing the transformation.

    • 'theta': Float. Rotation angle in degrees.

    • 'tx': Float. Shift in the x direction.

    • 'ty': Float. Shift in the y direction.

    • 'shear': Float. Shear angle in degrees.

    • 'zx': Float. Zoom in the x direction.

    • 'zy': Float. Zoom in the y direction.

    • 'flip_horizontal': Boolean. Horizontal flip.

    • 'flip_vertical': Boolean. Vertical flip.

    • 'channel_shift_intensity': Float. Channel shift intensity.

    • 'brightness': Float. Brightness shift intensity. (taken from: keras ImageDataGenerator documentation)

  • image_shape (tuple) – shape of 2-dimensional image.

flow(train_dict, batch_size=1, skip=None, transform=None, transform_kwargs={}, shuffle=True, seed=None, save_to_dir=None, save_prefix='', save_format='png')[source]

Generates batches of augmented/normalized data with given arrays.

Parameters
  • train_dict (dict) – dictionary of X and y tensors. Both should be rank 4.

  • batch_size (int) – Size of a batch.

  • shuffle (bool) – Whether to shuffle the data between epochs.

  • seed (int) – Random seed for data shuffling.

  • save_to_dir (str) – Optional directory where to save the pictures being yielded, in a viewable format. This is useful for visualizing the random transformations being applied, for debugging purposes.

  • save_prefix (str) – Prefix to use for saving sample images (if save_to_dir is set).

  • save_format (str) – Format to use for saving sample images (if save_to_dir is set).

Returns

An Iterator yielding tuples of (x, y) where x is a numpy array of image data and y is a numpy array of labels of the same shape.

random_transform(x, y=None, seed=None)[source]

Applies a random transformation to an image

Parameters
  • x – 3D tensor or list of 3D tensors.

  • y (np.array) – Array of shape (N, 2) which contains points in the format [y, x] or list of such arrays referring to coordinates in the image x, optional.

  • seed – Random seed.

  • fill_mode (str) –

    One of ("constant", "nearest", "reflect" or "wrap") for type of padding used for points outside of the input image which correspond to points inside the output image. Points outside the boundaries of the input are filled according to the given mode:

    • ’constant’: kkkkkkkk|abcd|kkkkkkkk (cval=k)

    • ’nearest’: aaaaaaaa|abcd|dddddddd

    • ’reflect’: abcddcba|abcd|dcbaabcd

    • ’wrap’: abcdabcd|abcd|abcdabcd

Returns

A randomly transformed version of the input (same shape). If y is passed, it is transformed if necessary and returned. The transformed y contains input and padding (for fill_mode=’reflect’ or ‘wrap’) points mapped to output image space, which are inside the output image (transformed points mapped to outside of the output image boundaries are deleted)

class deepcell_spots.image_generators.ImageFullyConvDotIterator(train_dict, image_data_generator, batch_size=1, skip=None, shuffle=False, transform=None, transform_kwargs={}, seed=None, data_format='channels_last', save_to_dir=None, save_prefix='', save_format='png')[source]

Bases: tensorflow.keras.preprocessing.image.Iterator

Iterator yielding data from Numpy arrays (X and y).

Parameters
  • train_dict (dict) – dictionary consisting of numpy arrays for X and y. X has dimensions (batch, Ly, Lx, channel) and y has length batch containing np.arrays of shape (N, 2) where N is the number of points in the image.

  • image_data_generator – Instance of ImageDataGenerator to use for random transformations and normalization.

  • batch_size (int) – Size of a batch.

  • shuffle (bool) – Whether to shuffle the data between epochs.

  • seed (int) – Random seed for data shuffling.

  • data_format (str) – One of (channels_first, channels_last).

  • save_to_dir (str) – Optional directory where to save the pictures being yielded, in a viewable format. This is useful for visualizing the random transformations being applied, for debugging purposes.

  • save_prefix (str) – Prefix to use for saving sample images (if save_to_dir is set).

  • save_format (str) – Format to use for saving sample images (if save_to_dir is set).

Raises
  • ValueError – Training batches and labels should have the same length.

  • ValueError – Input data in ImageFullyConvIterator should have rank 4.

next()[source]

For python 2.x. Returns the next batch.

point_list_to_annotations(points, image_shape, dy=1, dx=1)[source]

Generate label images used in loss calculation from point labels.

Parameters
  • points (np.array) – array of size (N, 2) which contains points in the format [y, x].

  • image_shape (tuple) – shape of 2-dimensional image.

  • dy – pixel y width.

  • dx – pixel x width.

Returns

Dictionary with two keys, detections and offset.
  • detections is array of shape (image_shape,2) with pixels one hot encoding spot locations.

  • offset is array of shape (image_shape,2) with pixel values equal to signed distance to nearest spot in x- and y-directions.

Return type

annotations (dict)

deepcell_spots.multiplex

deepcell_spots.multiplex.assign_gene_identities(cp_dict, dataorg, threshold, codebook)[source]

Assigns gene identity to barcoded spots.

Parameters
  • cp_dict (dict) – Dictionary where keys are image IDs ('readoutName') and values are classification prediction output from the spot detection model.

  • dataorg (pandas.DataFrame) – Dataframe containing information about organization of image files.

  • threshold (float) – value for the probability threshold a spot must exceed to be considered a spot.

  • codebook (pandas.DataFrame) – Dataframe with columns for each imaging round, rows are barcodes for genes values in data frame are 0 if that barcode includes that imaging round and 1 if the barcode does not.

Returns

Dataframe with the spot locations, gene identity,

and probability of assignment.

Return type

pandas.DataFrame

deepcell_spots.multiplex.assign_spots_to_cells(decoded_spots_df, cytoplasm_pred)[source]

Adds column to spots DataFrame with identity of cell for each spot

Parameters
  • decoded_spots_df (pandas.DataFrame) – Dataframe with the spot locations, gene identity, and probability of assignment

  • cytoplasm_pred (array) – Image where pixel values are labels for segmented cell cytoplasms.

Returns

Dataframe with the spot locations, gene identity,

probability of assignment, and cell identity.

Return type

pandas.DataFrame

deepcell_spots.multiplex.cluster_points(spots_to_cells_dict, cell_id, threshold=1.5, match_method='min_dist')[source]

Clusters points between rounds with one of two methods: 'min_dist' or 'mutual_nearest_neighbor'.

Parameters
  • spots_to_cells_dict (dict) – Dict of dicts, keys are image IDs ('readoutName'), values are dictionaries where keys are cell cytoplasm labels and values are detected spots associated with that cell label, there is one item in list for each image in coords_dict.

  • cell_id (int) – Integer key in spots_to_cells_dict.

  • threshold (float, optional) – Distance threshold in pixels for matching points between rounds. Defaults to 1.5.

  • match_method (str, optional) – Method for matching spots between rounds. Options are 'min_dist' and 'mutual_nearest_neighbor'. Defaults to 'min_dist'.

deepcell_spots.multiplex.error_correction(barcode, codebook_dict)[source]

Corrects barcodes that have no match in codebook.

To be assigned, a barcode may have a maximum of one bit flipped (Hamming distance of one) from input barcode.

Parameters
  • barcode (str) – String of binary barcode list, where values are 1 or 0 depending on whether transcripts with that barcode are labeled in a particular round.

  • codebook_dict (dict) – Codebook converted into a dictionary where the keys are the binary barcode and the values are the gene names.

deepcell_spots.multiplex.gene_counts(spots_to_cells_dict, codebook, threshold=1.5, match_method='min_dist', error_corr=True)[source]

Assigns combinatorial barcodes corresponding to gene identities.

Matches spots between rounds with one of two methods: 'min_dist' or 'mutual_nearest_neighbor'.

Parameters
  • spots_to_cells_dict (dict) – Dict of dicts, keys are image IDs ('readoutName'), values are dictionaries where keys are cell cytoplasm labels and values are detected spots associated with that cell label, there is one item in list for each image in coords_dict.

  • codebook (Pandas DataFrame) – DataFrame with columns for each imaging round, rows are barcodes for genes values in data frame are 0 if that barcode includes that imaging round and 1 if the barcode does not

  • threshold (float, optional) – Distance threshold in pixels for matching points between rounds

  • match_method (str, optional) – Method for matching spots between rounds. Options are 'min_dist' and 'mutual_nearest_neighbor'. Defaults to 'min_dist'.

  • error_corr (bool, optional) – Boolean that determines whether error correction is performed on barcodes that don’t have an exact match. Defaults to True.

Returns

DataFrame containing gene counts for each cell.

Return type

pandas.DateFrame

deepcell_spots.multiplex.gene_counts_DBSCAN(spots_to_cells_dict, codebook, threshold, error_corr=True)[source]

Assigns combinatorial barcodes corresponding to gene identities. Matches spots between rounds with DBSCAN clustering.

Parameters
  • spots_to_cells_dict (dict) – Dictionary of dictionaries, keys are image IDs ('readoutName'), values are dictionaries where keys are cell cytoplasm labels and values are detected spots associated with that cell label, there is one item in list for each image in coords_dict.

  • codebook (pandas.DataFrame) – DataFrame with columns for each imaging round, rows are barcodes for genes values in data frame are 0 if that barcode includes that imaging round and 1 if the barcode does not.

  • threshold (float) – Distance threshold in pixels for matching points between rounds.

  • error_corr (bool, optional) – Boolean that determines whether error correction is performed on barcodes that don’t have an exact match. Defaults to True.

Returns

DataFrame containing gene counts for each cell.

Return type

pandas.DateFrame

deepcell_spots.multiplex.multiplex_match_spots_to_cells(coords_dict, cytoplasm_pred)[source]

Matches detected spots to labeled cell cytoplasms.

Parameters
  • coords_dict (dict) – Dictionary where keys are image IDs ('readoutName') and values are coordinates of detected spots

  • cytoplasm_pred (matrix) – Image where pixel values are labels for segmented cell cytoplasms.

Returns

dict of dicts, keys are image IDs ('readoutName'),

values are dictionaries where keys are cell cytoplasm labels and values are detected spots associated with that cell label, there is one item in list for each image in coords_dict.

Return type

dict

deepcell_spots.point_metrics

Custom metrics for comparison of sets of points A set of points is an unordered collection of points defined here by a list of the point coordinates. The metrics defined here quantify the similarity between two sets of points, taking into account their spatial structure. Specifically, the distance between points is taken into account (as opposed to the Jaccard distance for example)

deepcell_spots.point_metrics.get_mean_stats(y_test, y_pred, threshold=0.98, d_thresh=1)[source]

Calculates the precision, recall, F1 score, and sum of min distances for stack of predictions.

Parameters
  • y_test (array) – Array of shape (N1,d), set of N1 points in d dimensions.

  • y_pred (array) – A batch of predictions, of the format: y_pred[annot_type][ind] is an annotation for image ind in the batch where annot_type = 0 or 1: 0 - classification (from classification head), 1 - offset_regression (from regression head).

  • threshold (float) – Probability threshold for determining spot locations.

  • d_thresh (float) – A distance threshold used in the definition of tp and fp.

deepcell_spots.point_metrics.match_points_min_dist(pts1, pts2, threshold=None)[source]

Find a pairing between two sets of points that minimizes the sum of the Euclidean distances between matched points from each set.

Parameters
  • pts1 (array) – Array of shape (N1,d), set of N1 points in d dimensions.

  • pts2 (array) – Array of shape (N2,d), set of N2 points in d dimensions. Each row of pts1 and pts2 should be the coordinates of a single d-dimensional point.

  • threshold (float) – A distance threshold for matching two points. Points that are more than the threshold distance apart, cannot be matched.

Returns

An array of row indices and one of

corresponding column indices giving the optimal assignment, as described in: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html

Return type

(numpy.array, numpy.array)

deepcell_spots.point_metrics.match_points_mutual_nearest_neighbor(pts1, pts2, threshold=None)[source]

Find a pairing between two sets of points that ensures that each pair of points are mutual nearest neighbors.

Parameters
  • pts1 (array) – Array of shape (N1,d), set of N1 points in d dimensions.

  • pts2 (array) – Array of shape (N2,d), set of N2 points in d dimensions. Each row of pts1 and pts2 should be the coordinates of a single d-dimensional point.

  • threshold (float) – A distance threshold for matching two points. Points that are more than the threshold distance apart, cannot be matched.

Returns

An array of row indices and one of

corresponding column indices giving the optimal assignment, as described in: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html

Return type

(numpy.array, numpy.array)

deepcell_spots.point_metrics.model_benchmarking(pred, coords, threshold, min_distance)[source]

Calculates the precision, recall, F1 score, Jacard Index, root mean square error, and sum of min distances for stack of predictions.

Parameters
  • pred (array) – A batch of predictions, of the format: y_pred[annot_type][ind] is an annotation for image ind in the batch where annot_type = 0 or 1: 0 - classification (from classification head), 1 - offset_regression (from regression head).

  • coords (list) – Nested list of coordinate locations for ground truth spots from a single annotator.

  • threshold (float) – A number in [0, 1]. Pixels with classification score > threshold are considered containing a spot center, and their corresponding regression values will be used to create a final spot position prediction which will be added to the output spot center coordinates list.

  • min_distance (float) – The minimum distance between detected spots in pixels

Returns

values for the precision of the predicted spot numbers list: values for the recall of the predicted spot numbers list: values for the f1 score of the predicted spot numbers list: values for the jacard index of the predicted spot numbers list: values for the root mean square error of the spot locations list: values for the sum of min distance of the spot locations

Return type

list

deepcell_spots.point_metrics.point_F1_score(points_true, points_pred, threshold, match_points_function=<function match_points_mutual_nearest_neighbor>)[source]

Calculates the F1 score of dot detection using the following definitions:

The F1 score is equal to: F1 score = 2*p*r / (p+r) where the precision is equal to: (the number of true positives) / (total number of predicted points) and the recall is equal to: (the number of true positives) / (total number of true points) and true positive (tp) = a predicted dot p with a matching true dot t, where the matching between predicted and true points is such that the total distance between matched points is minimized, and points can be matched only if the distance between them is smaller than the threshold. Otherwise, the predicted dot is a false positive (fp) and the true dot is a false negative (fn).

Parameters
  • pts1 (array) – Array of shape (N1,d), set of N1 points in d dimensions.

  • pts2 (array) – Array of shape (N2,d), set of N2 points in d dimensions. Each row of pts1 and pts2 should be the coordinates of a single d-dimensional point.

  • threshold (float) – A distance threshold used in the definition of tp and fp.

Returns

the F1 score as defined above (a number between 0 and 1)

Return type

float

deepcell_spots.point_metrics.point_precision(points_true, points_pred, threshold, match_points_function=<function match_points_mutual_nearest_neighbor>)[source]

Calculates the precision, tp/(tp + fp), of point detection using the following definitions:

true positive (tp) = a predicted dot p with a matching true dot t, where the matching between predicted and true points is such that the total distance between matched points is minimized, and points can be matched only if the distance between them is smaller than the threshold. Otherwise, the predicted dot is a false positive (fp).

The precision is equal to: (the number of true positives) / (total number of predicted points)

Parameters
  • pts1 (array) – Array of shape (N1,d), set of N1 points in d dimensions.

  • pts2 (array) – Array of shape (N2,d), set of N2 points in d dimensions. Each row of pts1 and pts2 should be the coordinates of a single d-dimensional point.

  • threshold (float) – A distance threshold used in the definition of tp and fp.

  • match_points_function – A function that matches points in two sets, and has three parameters: pts1, pts2, threshold - two sets of points, and a threshold distance for allowing a match supported matching functions are match_points_min_dist, match_points_mutual_nearest_neighbor.

Returns

the precision as defined above (a number between 0 and 1).

Return type

float

deepcell_spots.point_metrics.point_recall(points_true, points_pred, threshold, match_points_function=<function match_points_mutual_nearest_neighbor>)[source]

Calculates the recall, tp/(tp + fn), of point detection using the following definitions:

true positive (tp) = a predicted dot p with a matching true dot t, where the matching between predicted and true points is such that the total distance between matched points is minimized, and points can be matched only if the distance between them is smaller than the threshold. Otherwise, the true dot is a false negative (fn).

The recall is equal to: (the number of true positives) / (total number of true points)

Parameters
  • pts1 (array) – Array of shape (N1,d), set of N1 points in d dimensions.

  • pts2 (array) – Array of shape (N2,d), set of N2 points in d dimensions. Each row of pts1 and pts2 should be the coordinates of a single d-dimensional point.

  • threshold (float) – A distance threshold used in the definition of tp and fn.

  • match_points_function – A function that matches points in two sets, and has three parameters: pts1, pts2, threshold - two sets of points, and a threshold distance for allowing a match supported matching functions are match_points_min_dist, match_points_mutual_nearest_neighbor.

Returns

the recall as defined above (a number between 0 and 1).

Return type

float

deepcell_spots.point_metrics.stats_points(points_true, points_pred, threshold, match_points_function=<function match_points_mutual_nearest_neighbor>)[source]

Calculates point-based statistics (precision, recall, F1, JAC, RMSE, d_md).

Parameters
  • pts1 (array) – Array of shape (N1,d), set of N1 points in d dimensions.

  • pts2 (array) – Array of shape (N2,d), set of N2 points in d dimensions. Each row of pts1 and pts2 should be the coordinates of a single d-dimensional point.

  • threshold (float) – A distance threshold used in the definition of tp and fp.

Returns

containing the calculated statistics

Return type

dictionary

deepcell_spots.point_metrics.sum_of_min_distance(pts1, pts2, normalized=False)[source]

Calculates the sum of minimal distance measure between two sets of d-dimensional points as suggested by Eiter and Mannila in: https://link.springer.com/article/10.1007/s002360050075

Parameters
  • pts1 (array) – Array of shape (N1,d), set of N1 points in d dimensions.

  • pts2 (array) – Array of shape (N2,d), set of N2 points in d dimensions. Each row of pts1 and pts2 should be the coordinates of a single d-dimensional point.

  • normalized (bool) – If True, each sum will be normalized by the number of elements in it, resulting in an intensive distance measure which doesn’t scale like the number of points.

Returns

the sum of minimal distance between point sets X and Y, defined as: d(X,Y) = 1/2 * (sum over x in X of min on y in Y of d(x,y) + sum over y in Y of min on x in X of d(x,y)) = 1/2( sum over x in X of d(x,Y) + sum over y in Y of d(X,y)) where d(x,y) is the Euclidean distance Note that this isn’t a metric in the mathematical sense (it doesn’t satisfy the triangle inequality)

Return type

float

deepcell_spots.postprocessing_utils

Functions that convert deep learning model output to list of detected spots

deepcell_spots.postprocessing_utils.y_annotations_to_point_list(y_pred, threshold=0.95)[source]

Convert raw prediction to a predicted point list: classification of pixel as containing dot > threshold, , and their corresponding regression values will be used to create a final spot position prediction which will be added to the output spot center coordinates list.

Parameters
  • y_pred (array) – a dictionary of predictions with keys 'classification' and 'offset_regression' corresponding to the named outputs of the dot_net_2D model.

  • ind (int) – the index of the image in the batch for which to convert the annotations.

  • threshold (float) – a number in [0, 1]. Pixels with classification score > threshold are considered containing a spot center.

Returns

spot center coordinates of the format [[y0, x0], [y1, x1],...]

Return type

array

deepcell_spots.postprocessing_utils.y_annotations_to_point_list_cc(y_pred, threshold=0.95)[source]

Convert raw prediction to a predicted point list: classification of connected component as containing dot > threshold, , and their corresponding regression values will be used to create a final spot position prediction which will be added to the output spot center coordinates list.

Parameters
  • y_pred (array) – a dictionary of predictions with keys 'classification' and 'offset_regression' corresponding to the named outputs of the dot_net_2D model.

  • threshold (float) – a number in [0, 1]. Pixels with classification score > threshold are considered containing a spot center.

Returns

spot center coordinates of the format [[y0, x0], [y1, x1],...]

Return type

array

deepcell_spots.postprocessing_utils.y_annotations_to_point_list_max(y_pred, threshold=0.95, min_distance=2)[source]

Convert raw prediction to a predicted point list using skimage.feature.peak_local_max to determine local maxima in classification prediction image, and their corresponding regression values will be used to create a final spot position prediction which will be added to the output spot center coordinates list.

Parameters
  • y_pred (array) – a dictionary of predictions with keys 'classification' and 'offset_regression' corresponding to the named outputs of the dot_net_2D model.

  • threshold (float) – a number in [0, 1]. Pixels with classification score > threshold are considered as containing a spot center.

  • min_distance (float) – the minimum distance between detected spots in pixels.

Returns

spot center coordinates of the format [[y0, x0], [y1, x1],…]

Return type

array

deepcell_spots.postprocessing_utils.y_annotations_to_point_list_restrictive(y_pred, threshold=0.95)[source]

Convert raw prediction to a predicted point list: classification of pixel as containing dot > threshold AND center regression is contained in the pixel. The corresponding regression values will be used to create a final spot position prediction which will be added to the output spot center coordinates list.

Parameters
  • y_pred (array) – a dictionary of predictions with keys 'classification' and 'offset_regression' corresponding to the named outputs of the dot_net_2D model.

  • ind (int) – the index of the image in the batch for which to convert the annotations.

  • threshold (float) – a number in [0, 1]. Pixels with classification score > threshold are considered containing a spot center.

Returns

spot center coordinates of the format [[y0, x0], [y1, x1],...].

Return type

array

deepcell_spots.preprocessing_utils

Image normalization methods

deepcell_spots.preprocessing_utils.mean_std_normalize(image, epsilon=1e-07)[source]

Normalize image data by subtracting standard deviation pixel value and dividing by mean pixel value.

Parameters
  • image (numpy.array) – 4D numpy array of image data.

  • epsilon (float) – fuzz factor used in numeric expressions.

Returns

normalized image data.

Return type

numpy.array

deepcell_spots.preprocessing_utils.min_max_normalize(image, clip=False)[source]
Normalize image data by subtracting minimum pixel value and

dividing by the maximum pixel value.

Parameters
  • image (numpy.array) – 4D numpy array of image data.

  • clip (boolean) – Defaults to false. Determines if pixel values are clipped by percentile.

Returns

normalized image data.

Return type

numpy.array

deepcell_spots.simulate_data

Data simulators for spot images for benchmarking deep learning model and annotator detections for benchmarking EM algorithm

deepcell_spots.simulate_data.add_gaussian_noise(image, m, s)[source]

Adds gaussian random noise with mean m and standard deviation s to the input image.

Parameters
  • image (numpy.array) – 2D image to add noise.

  • m – mean of gaussian random noise to be added to each pixel of image.

  • s – standard deviation of gaussian random noise to be added to each pixel of image.

Returns

The noisy image.

Return type

numpy.array

deepcell_spots.simulate_data.gaussian_spot_image_generator(L, N_min, N_max, sigma_mean, sigma_std, A_mean=1, A_std=0, noise_mean=0, noise_std=0, segmask=False, yield_pos=False)[source]

Generates random images of Gaussian spots with random uniformly distributed center positions in the image area, i.e. in [0,L-1]*[0,L-1]. The number of spots in an image is uniformly distributed in [N_min, N_max]. Each spot is a gaussian with standard deviation normally distributed with sigma_mean, sigma_std, and cutoff value of 0.5 (it is redrawn if a smaller value is drawn). The intensity of each spot is normally distributed.

Parameters
  • L – generated image side length - the generated images have shape (L,L)

  • N_max (N_min,) – the number of spots plotted in each image is uniformly distributed in [N_min, N_max].

  • sigma_std (sigma_mean,) – the mean and standard deviation of the normally distributed spot width sigma (i.e. each spot is a Gaussian with standard deviation sigma).

  • A_std (A_mean,) – the intensity of each spot is normally distributed in with mean A_mean, and standard deviation A_std.

  • yield_pos – if True, will yield lists of x and y positions and bounding boxes in addition to image and label image.

  • noise_std (noise_mean,) – mean and std of white noise to be added to every pixel of the image

Returns

(L, L) numpy array simulated image label: (L, L) numpy array of - 0 background, 1 for pixel of (rounded) spot center if segmask is False segmentation mask if segmask is True (pixel values are 0 in background, 1,...,N for pixels belonging to the N spots in the image)

Return type

img

deepcell_spots.simulate_data.is_in_image(x, y, a, L)[source]

Determines if a square with defined vertices is contained in an image with larger dimensions

Parameters
  • x (int) – Value for the x coordinate of the top left corner of the square of interest

  • y (int) – Value for the y coordinate of the top left corner of the square of interest

  • a (int) – Value for the side length of the square of interest

  • L (int) – Value for the dimensions of the larger image

Returns

Whether the square is contained in image dimensions

Return type

bool

deepcell_spots.simulate_data.is_overlapping(x_list, y_list, a_list, x, y, a)[source]

Determines if a square overlaps with a list of other squares.

Returns boolean, True if square overlaps with any of squares in list, False if it doesn’t overlap with any of squares in list

Parameters
  • x_list (list) – List of x coordinates for top left corners of squares to be compared with square of interest.

  • y_list (list) – List of y coordinates for top left corners of squares to be compared with square of interest.

  • a_list (list) – List of side lengths of squares to be compared with square of interest.

  • x (int) – Value for the x coordinate of the top left corner of the square of interest

  • y (int) – Value for the y coordinate of the top left corner of the square of interest

  • a (int) – Value for the side length of the square of interest

Returns

Whether the square overlaps with any of squares in list.

Return type

bool

deepcell_spots.simulate_data.percent_correct(gt, data_array)[source]

Calculates the percent of detections correctly labeled.

Returns a value from 0 to 1 indicating the fraction of detections correctly labeled.

Parameters
  • gt (array) – Array of ground truth cluster labels. 1 indicates a true detection and 0 indicates a false detection.

  • data_array (array) – Array of simulated detections with length number of detections. A value of 1 indicates a detected clsuter and a value of 0 indicates an undetected cluster.

Returns

Value for fraction of detections correctly labeled compared to ground truth.

Return type

percent_corr (float)

deepcell_spots.simulate_data.sim_annotators(gt, tpr_list, fpr_list)[source]

Simulate the detections of multiple annotators with different TPRs and FPRs on the same ground truth data.

Returns a matrix of simulated detection data with dimensions clusters x annotators.

Parameters
  • gt (array) – Array of ground truth cluster labels. 1 indicates a true detection and 0 indicates a false detection.

  • tpr_list (array) – Array of TPR values for each annotator. For a ground truth value of 1, the TPR is the probability that the function will output 1, indicating that the simulated annotator detected the true cluster.

  • fpr_list (array) – Array of FPR values for each annotator. For a ground truth value of 0, the FPR is the probability that the funciton will output 1, indicating that the simulated annotator falsely detected the cluster.

Returns

Matrix of simulated detection data with dimensions clusters x annotators. A value of 1 indicates a detected cluster and a value of 0 indicates an undetected cluster.

Return type

numpy.array

deepcell_spots.simulate_data.sim_detections(gt, tpr, fpr)[source]

Simulates detection data for a set of ground truth cluster labels and an annotator with a specified TPR and FPR.

Returns an array of with same length as input gt, where 1 indicates the simulated annotator detected a cluster and 0 indicates an undetected cluster.

Parameters
  • gt (array) – Array of ground truth cluster labels. 1 indicates a true detection and 0 indicates a false detection.

  • tpr (float) – The true positive rate of the annotator. For a ground truth value of 1, it is the probability that the function will output 1, indicating that the simulated annotator detected the true cluster.

  • fpr (float) – The false positive rate of the annotator. For a ground truth value of 0, it is the probability that the funciton will output 1, indicating that the simulated annotator falsely detected the cluster.

Returns

Array of detected cluster labels. A value of 1 indicates that a cluster was detected by the annotator, and 0 indicates that the cluster was not detected by the annotator.

Return type

array

deepcell_spots.simulate_data.sim_gt_clusters(num_clusters, tp_ratio)[source]

Generate random simulated labels (true detection or false detection) for clusters, with a specified rate of true detections and false detections, tp_ratio.

Returns a list of length num_clusters of cluster labels with value 1 for a true detection and 0 for a false detection.

Parameters
  • num_clusters (int) – The number of cluster labels to be generated.

  • tp_ratio (float) – The average percentage of the detections that are true detections.

Returns

List of random simulated cluster labels 1 or 0.

Return type

list

deepcell_spots.singleplex

Tools for analysis of singleplex FISH images

deepcell_spots.singleplex.match_spots_to_cells(labeled_im, coords)[source]

Assigns detected spots to regions of a labeled image.

Returns a dictionary where keys are labeled regions of input image and values are spot coordinates corresponding with that labeled region.

Parameters
  • labeled_im (array) – Image output from segmentation algorithm with dimensions (1,x,y,1) where pixels values label regions of the image corresponding with objects of interest (nuclei, cytoplasm, etc.).

  • coords (array) – Array of coordinates for spot location with dimensions (number of spots,2).

Returns

Dictionary where keys are labeled regions of input image and values are spot coordinates corresponding with that labeled region.

Return type

dict

deepcell_spots.singleplex.process_spot_dict(spot_dict)[source]

Processes spot dictionary into an array of coordinates and list of region labels for spots.

Parameters

spot_dict (dict) – Dictionary where keys are labeled regions of input image and values are spot coordinates corresponding with that labeled region.

Returns

(1) Array of coordinates for spot location with dimensions (number of spots,2). Re-ordered to correspond with list of region labels. (2) List of region labels corresponding with coordinates. Intended to be used to color a cmap when visualizing spots.

Return type

(array, list)

deepcell_spots.singleplex.remove_nuc_spots_from_cyto(labeled_im_nuc, labeled_im_cyto, coords)[source]

Removes spots in nuclear regions from spots assigned to cytoplasmic regions.

Returns a dictionary where keys are labeled cytoplasmic regions of input image and values are spot coordinates corresponding with that labeled cytoplasm region.

Parameters
  • labeled_im_nuc (array) – Image output from segmentation algorithm with dimensions (1,x,y,1) where pixels values label nuclear regions.

  • labeled_im_cyto (array) – Image output from segmentation algorithm with dimensions (1,x,y,1) where pixels values label cytoplasmic regions.

  • coords (array) – Array of coordinates for spot location with dimensions (number of spots,2).

Returns

Dictionary where keys are labeled regions of input image and

values are spot coordinates corresponding with that labeled region (cytoplasm excluding nucleus).

Return type

dict

deepcell_spots.spot_em

Expectation maximization functions for spot detection

deepcell_spots.spot_em.calc_tpr_fpr(gt, data)[source]

Calculate the true postivie rate and false positive rate for a pair of ground truth labels and detection data.

Parameters

gt (array) – Array of ground truth cluster labels. A value of 1 indicates a true detection and a value of 0 indicates a false detection.

data (array): Array of detection data with same length.

A value of 1 indicates a detected cluster and a value of 0 indicates an undetected cluster.

Returns

(1) Value for the true positive rate of an annotator. This is the probability that an annotator will detect a spot that is labeled as a ground truth true detection. (2) Value for the false positive rate of an annotator. This is the probability that an annotator will detect a spot that is labeled as a ground truth false detection.

Return type

(float, float)

deepcell_spots.spot_em.cluster_coords(coords_df, threshold=1.5)[source]

Clusters coordinates in each image by proximity. If clusters contain more than one detection from a single algorithm, the detection closest to the centroid of the cluster is retained and all others are separated into new clusters.

Parameters
  • coords_df (DataFrame) – Dataframe containing algorithm, image, and location information about each cluster.

  • threshold (float) – Distance in pixels below which detections will be grouped into clusters.

Returns

Dataframe containing algorithm, image, location,

and cluster information about each cluster.

Return type

coords_df (DataFrame)

deepcell_spots.spot_em.define_edges(coords_df, threshold)[source]

Defines that adjacency matrix for the multiple annotators, connecting points that are sufficiently close to one another. It is assumed that these spots are derived from the same ground truth spot in the original image.

Parameters
  • coords (DataFrame) – Dataframe with columns 'x' and 'y' which encode the spot locations and 'Algorithm' which encodes the algorithm that corresponds with that spot

  • threshold (float) – The distance in pixels. Detections closer than the threshold distance will be grouped into a cluster of detections, assumed to be derived from the same ground truth detection.

Returns

Matrix of dimensions (number of detections) x (number of detections) defining edges of a graph clustering detections by detections from different annotators derived from the same ground truth detection. A value of 1 denotes two connected nodes in the eventual graph and a value of 0 denotes disconnected nodes.

Return type

numpy.array

deepcell_spots.spot_em.det_likelihood(cluster_data, pr_list)[source]

Calculate the likelihood that a cluster is a true positive or false positive. To calculate the likelihood of a true positive, pr_list should be a list of TPRs for all annotators. To calculate the likelihood of a cluster being a false positive, pr_list should be a list of FPRs for all annotators.

Returns a value for the likelihood that a cluster is either a true positive or a false positive.

Parameters
  • cluster_data (array) – Array of detection labels for each annotator. Entry has value 1 if annotator detected the cluster, and entry has value 0 if annotator did not detect the cluster.

  • pr_list (array) – Array of true postive rates for each annotator if one wants to calculate the likelihood that the cluster is a true positive, or array of false positive rates for each annotator if one wants to calculate the likelihood that the cluster is a false positive.

Returns

Value for the likelihood that a cluster is either a true positive or a false positive detection.

Return type

float

deepcell_spots.spot_em.em_spot(cluster_matrix, tp_list, fp_list, prior=0.9, max_iter=10)[source]

Estimate the TPR/FPR and probability of true detection for various spot annotators using expectation maximization.

Returns the true positive rate and false positive rate for each annotator, and returns the probability that each spot is a true detection or false detection.

Parameters
  • cluster_matrix (matrix) – Matrix of detection labels for each spot for each annotator. Dimensions spots x annotators. A value of 1 indicates that the spot was detected by that annotator and a value of 0 indicates that the spot was not detected by that annotator.

  • tp_list (array) – Array of initial guesses for the true positive rates for each annotator.

  • fp_list (array) – Array of initial guesses for the false positive rates for each annotator.

  • prior (float) – Value for the prior probability that a spot is a true positive.

  • max_iter (int) – Value for the number of times the expectation maximization algorithm will iteratively calculate the MLE for the TPR and FPR of the annotators.

Returns

(1) Array of final estimates for the true positive rates for each annotator. (2) Array of final estimates for the false postitive rates for each annotator. (3) Matrix of probabilities that each cluster is a true detection (column 0) or false detection (column 1). Dimensions (spots x 2).

Return type

(array, array, matrix)

deepcell_spots.spot_em.load_coords(coords_dict)[source]

Loads a dictionary of coordinate spot locations into a DataFrame.

Parameters

coords_dict (dictionary) – Dictionary in which keys are names of spot detection algorithms and values are coordinate locations of spots detected with each algorithm. Coordinates are nested list (length is number of images) of lists of shape (number spots, 2).

Returns

Dataframe containing algorithm, image, and location

information about each cluster.

Return type

coords_df (DataFrame)

deepcell_spots.spot_em.norm_marg_likelihood(cluster_data, tp_list, fp_list, prior)[source]

Calculates the normalized marginal likelihood that each cluster is a true positive or a false positive.

Parameters
  • cluster_data (array) – Array of detection labels for each annotator. Entry has value 1 if annotator detected the cluster, and entry has value 0 if annotator did not detect the cluster.

  • tp_list (array) – Array of true postive rates for each annotator.

  • fp_list (array) – Array of false postive rates for each annotator.

Returns

(1) Value for the normalized marginal likelihood that a cluster is either a true positive detection (2) Value for the normalized marginal likelihood that a cluster is either a false positive detection.

Return type

(float, float)

deepcell_spots.spot_em.predict_cluster_probabilities(coords_df, tpr_dict, fpr_dict, prior=0.9, max_iter=10)[source]

Predicts the probability that each cluster of detections corresponds with a true positive detection.

Parameters
  • coords_df (DataFrame) – Dataframe containing algorithm, image, location, and cluster information about each cluster.

  • tpr_dict (dictionary) – Dictionary in which keys are algorithm names and values are estimates for TPR of each algorithm.

  • fpr_dict (dictionary) – Dictionary in which keys are algorithm names and values are estimates for FPR of each algorithm.

  • prior (float) – Prior probability that a cluster will correspond with a true positive detection. Value must be between 0 and 1.

  • max_iter (int) – Number of iterations performed by EM algorithm.

Returns

Dataframe containing algorithm, image, location,

cluster, spot probability, and centroid information about each cluster.

Return type

coords_df (DataFrame)

deepcell_spots.training

Functions for training convolutional neural networks

deepcell_spots.training.train_model_dot(model, dataset, expt='', test_size=0.2, seed=0, n_epoch=10, batch_size=1, num_gpus=None, frames_per_batch=5, optimizer=tensorflow.keras.optimizers.SGD, log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, focal=False, sigma=3.0, alpha=0.25, gamma=0.5, lr_sched=deepcell.utils.train_utils.rate_scheduler, rotation_range=0, flip=True, shear=0, zoom_range=0, fill_mode='nearest', cval=0.0, **kwargs)[source]

Train a dot center detection model using fully convolutional mode.

Parameters
  • model (tensorflow.keras.Model) – The model to train.

  • dataset (str) – Path to a dataset to train the model with.

  • expt (str) – Experiment, substring to include in model name.

  • test_size (float) – Percent of data to leave as test data.

  • seed (int) – Random seed used for train-test split.

  • n_epoch (int) – Number of training epochs.

  • batch_size (int) – Number of batches per training step.

  • num_gpus (int) – The number of GPUs to train on.

  • frames_per_batch (int) – Number of training frames if training 3D data.

  • log_dir (str) – Filepath to save tensorboard logs. If None, disables the tensorboard callback.

  • model_dir (str) – Directory to save the model file.

  • model_name (str) – Name of the model (and name of output file).

  • focal (bool) – If true, uses focal loss.

  • sigma (float) – The point where the loss changes from L2 to L1.

  • alpha (float) – Scale the focal weight with alpha.

  • gamma (float) – Parameter for focal loss (Take the power of the focal weight with gamma.).

  • optimizer (object) – Pre-initialized optimizer object (SGD, Adam, etc.).

  • lr_sched (function) – Learning rate scheduler function.

  • rotation_range (int) – Maximum rotation range for image augmentation.

  • flip (bool) – Enables horizontal and vertical flipping for augmentation.

  • shear (int) – Maximum shear range for image augmentation.

  • zoom_range (tuple) – Minimum and maximum zoom values (0.8, 1.2).

  • fill_mode (str) – padding style for data augmentation (input parameter of tf.keras.preprocessing.image.ImageDataGenerator).

  • cval (float or int) – used for pixels outside the boundaries of the input image when fill_mode='constant'.

  • kwargs (dict) – Other parameters to pass to _transform_masks.

Returns

The trained model.

Return type

tensorflow.keras.Model

deepcell_spots.utils

Functions for image augmentation

deepcell_spots.utils.affine_transform_points(points, transform_parameters, image_shape, img_row_axis=0, img_col_axis=1, fill_mode='nearest')[source]

Perform an affine transform mapping input coordinates referring to the input image of the apply_transform function of the class ImageDataGenerator To the output space of that function. Returned points are (original and padding points) contained in the output image.

Parameters
  • transform_parameters – dictionary of affine transformation parameters such as the output of ImageDataGenerator method get_random_transform.

  • points – (N, 2) numpy array which contains points in the format [y, x] (NOTE: as in image/matrix notation, not Cartesian notation) points are labels for the input image - they should be -0.5 <= x <= Lx-0.5, -0.5 <= y <= Ly-0.5 where Lx = image_shape[img_col_axis] and Ly = image_shape[img_row_axis].

  • image_shape (tuple) – the shape of the image which contains the points (for 2D image, has length 2).

  • img_row_axis – the index of the axis (0 or 1) to be flipped when flip_vertical is True.

  • img_col_axis – the index of the axis (0 or 1) to be flipped when flip_horizontal is True.

  • fill_mode

    One of ("constant", "nearest", "reflect" or "wrap"). Default is 'nearest'. Points outside the boundaries of the input are filled according to the given mode:

    • 'constant': kkkkkkkk|abcd|kkkkkkkk (cval=k)

    • 'nearest': aaaaaaaa|abcd|dddddddd

    • 'reflect': abcddcba|abcd|dcbaabcd

    • 'wrap': abcdabcd|abcd|abcdabcd

Returns

list of points / or numpy array of shape (N',2)

which contains points in the format [y, x]. NOTE N' != N because points in the original image may fall outside of the transformed output image. Also, if fill_mode is 'reflect' or 'wrap', point images in the padding of the input image can be inside the output image.

Return type

transformed_points

deepcell_spots.utils.generate_transformation_matrix(transform_parameters, image_shape, img_row_axis, img_col_axis)[source]

Given a dictionary of affine transformation parameters (such as the one generated by the ImageDataGenerator function get_random_transform), generate the transformation matrix and offset which apply_affine_transform generates and passes to scipy.ndimage.interpolation.affine_transform:

ndimage.interpolation.affine_transform(
        x_channel,
        final_affine_matrix,
        final_offset,
        order=order,
        mode=fill_mode,
        cval=cval)

this function performs the calculations performed by tf.keras.preprocessing.image.apply_affine_transform to obtain final_affine_matrix and final_offset, and returns them.

A point p in the output image of affine_transform corresponds to the point pT+s in the input image

Parameters

transform_parameters

dictionary of affine transformation parameters such as the output of ImageDataGenerator method get_random_transform. (as used in input to apply_transform called on image) From keras-preprocessing/keras_preprocessing/image/image_data_generator.py method apply_transform documentation: Dictionary with string - parameter pairs describing the transformation. Currently, the following parameters from the dictionary are used:

  • 'theta': Float. Rotation angle in degrees.

  • 'tx': Float. Shift in the x direction.

  • 'ty': Float. Shift in the y direction.

  • 'shear': Float. Shear angle in degrees.

  • 'zx': Float. Zoom in the x direction.

  • 'zy': Float. Zoom in the y direction.

  • 'flip_horizontal': Boolean. Horizontal flip. - NOT USED HERE

  • 'flip_vertical': Boolean. Vertical flip. - NOT USED HERE

  • 'channel_shift_intensity': Float. Channel shift intensity. - NOT USED HERE

  • 'brightness': Float. Brightness shift intensity. - NOT USED HERE

Returns

final_affine_matrix (2*2 matrix ,denote below: T), final_offset (length 2 vector, denote below: s)

Return type

(array, array)

deepcell_spots.utils.subpixel_distance_transform(point_list, image_shape, dy=1, dx=1)[source]

For each pixel in image, return the vectorial distance to a point in point_list that is in the pixel nearest to it.

Parameters
  • point_list – Array of size (N,2) of point coordinates [y, x] (y before x as in image/matrix indexing)

  • image_shape(Ly,Lx) specifies the shape of an image that contains the coordinates. The coordinates should be in dy*[-0.5, Ly-0.5] x dx*[-0.5, Lx-0.5]

  • dy – pixel width in y axis

  • dx – pixel width in x axis

Returns

(Ly, Lx), nearest_point[i,j] is the index in point_list of

a point in a point-containing pixel which is closest to pixel [i,j]. Note no uniqueness of the point or the pixel, since there could be several point-containing pixels with minimal distance to pixel [i,j] and there could be several points contained in the pixel [i,j] but only one is chosen delta_x[i,j], delta_y[i,j] are elements of the vectorial distance between the chosen point which nearest_point[i,j] refers to, and the center of the pixel [i,j], which is at x =j * dx, y = i * dy.

numpy.array: (Ly, Lx) numpy array of signed y distance between a point

from point_list that is near pixel [i,j] and the center of the pixel.

numpy.array: (Ly, Lx) numpy array of signed x distance between a point

from point_list that is near pixel [i,j] and the center of the pixel.

Return type

numpy.array

DeepCell Spots

Build Status Documentation Status Coverage Status Modified Apache 2.0 PyPI version PyPi Monthly Downloads Python Versions

deepcell-spots is a deep learning library for fluorescent spot detection image analysis. It allows you to apply pre-existing models and train new deep learning models for spot detection. It is written in Python and built using TensorFlow, Keras and DeepCell. More detailed documentation is available here.

Spot Detection Example

DeepCell Spots Application

deepcell-spots contains an applications that greatly simplify the implementation of deep learning models for spot detection. deepcell-spots.applications.SpotDetection contains a pre-trained model for fluorescent spot detection on images derived from assays such as RNA FISH and in-situ sequencing. This model returns a list of coordinate locations for fluorescent spots detected in the input image. deepcell-spots.applications.Polaris pairs this spot detection model with DeepCell models for nuclear and cytoplasmic segmentation.

How to Use

from deepcell_spots.applications import SpotDetection

app = SpotDetection()
# image is an np array with dimensions (batch,x,y,channel)
# threshold is the probability threshold that a pixel must exceed to be considered a spot
coords = app.predict(image,threshold=0.9)

DeepCell-Spots for Developers

Build and run a local docker container, similarly to the instructions for deepcell-tf. The relevant parts are copied here with modifications to work for deepcell-spots. For more elaborate instructions, see the deepcell-tf README.

Build a local docker container, specifying the deepcell version with DEEPCELL_VERSION

git clone https://github.com/vanvalenlab/deepcell-spots.git
cd deepcell-spots
docker build --build-arg DEEPCELL_VERSION=0.12.0-gpu -t $USER/deepcell-spots .

Run the new docker image

# '"device=0"' refers to the specific GPU(s) to run DeepCell-Spots on, and is not required
docker run --gpus '"device=0"' -it \
-p 8888:8888 \
$USER/deepcell-spots

It can also be helpful to mount the local copy of the repository and the notebooks to speed up local development.

# you can now start the docker image with the code mounted for easy editing
docker run --gpus '"device=0"' -it \
    -p 8888:8888 \
    -v $PWD/deepcell-spots/deepcell_spots:/usr/local/lib/python3.6/dist-packages/deepcell_spots \
    -v $PWD/notebooks:/notebooks \
    -v /$PWD:/data \
    $USER/deepcell-spots

License

This software is licensed under a modified APACHE2. See LICENSE for full details.

Trademarks

All other trademarks referenced herein are the property of their respective owners.

Credits

Van Valen Lab, Caltech

For more information on deploying DeepCell in the cloud:

Refer to the DeepCell Kiosk documentation