\( \newcommand{\matr}[1] {\mathbf{#1}} \newcommand{\vertbar} {\rule[-1ex]{0.5pt}{2.5ex}} \newcommand{\horzbar} {\rule[.5ex]{2.5ex}{0.5pt}} \)
deepdream of
          a sidewalk

Experiment 1.3

Color dot dataset.

This notebook isn’t really an an experiment as such. It’s just testing out a conversion of the data from experiment 1.1.1 into a labelled dataset. A significant alternation of the data is that the main circle is placed not only in the center of the imageā€”it is placed in 1 of 16 positions, spaced out in a 4x4 grid. This is not too much of a liberty, as I was never aiming to try rely on some 1-1 match between retina and network activations. The benefit of this extension is that we force a network to learn a positional invariant function. I feel this is a fair expectation. Without the dataset transations, the model would only need to look at 2 adjacent pixels to obtain the two color codes. We could go much further and modulate the shape of the dot. Small shape changes would be fine, I think, but significant changes (such as using a paint splatter type shape) would produce images that I’m confident would also change the human classifications. For now, the translations are a simple augmentation that I don’t think raises any issues.

import cv2
import numpy as np
from enum import Enum
import colorsys
import moviepy.editor as mpe
import moviepy
from typing import *
import random
import pandas as pd
import json
import torch
from icecream import ic
import IPython
def imshow(img):
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    _,ret = cv2.imencode('.jpg', img) 
    i = IPython.display.Image(data=ret)
    IPython.display.display(i)
exp_1_1_data = pd.read_csv('./resources/experiment_1_1_1.csv')
orange_rgbs = exp_1_1_data.loc[lambda r:r['ans'] == 0, :]
NUM_POSITIONS = 16  
IMG_SIZE = 224
ROW_LEN = NUM_POSITIONS**0.5
RADIUS = int(IMG_SIZE / (2 * ROW_LEN)) # 28
assert ROW_LEN == 4
assert RADIUS == 28
COLOR_ID_TO_LABEL = {0: 'orange', 1: 'brown', 2: 'both', 3: 'neither'}

class ColorOption:
    def __init__(self, circle_rgb, bg_rgb):
        self.circle_rgb = circle_rgb
        self.bg_rgb = bg_rgb
    
    
def generate_sample():
    pos = random.randrange(0, NUM_POSITIONS)
    color = random_color()
    circle_img = circle_img(pos, color.circle_rgb)
  

def random_orange() -> ColorOption:
    # TODO replace with function.
    orange_rgbs = exp_1_1_data.loc[lambda r:r['ans'] == 0, :]
    idx = random.randrange(0, orange_rgbs.shape[0])
    circle_rgb  = json.loads(orange_rgbs.iloc[idx]['circle_rgb'])
    bg_rgb = json.loads(orange_rgbs.iloc[idx]['bg_rgb'])
    circle_rgb = np.array(circle_rgb)*255
    bg_rgb = np.array(bg_rgb)*255
    return ColorOption(circle_rgb, bg_rgb)

    
def random_brown() -> ColorOption:
    # TODO replace with function.
    brown_rgbs = exp_1_1_data.loc[lambda r:r['ans'] == 1, :]
    idx = random.randrange(0, brown_rgbs.shape[0])
    circle_rgb  = json.loads(brown_rgbs.iloc[idx]['circle_rgb'])
    bg_rgb = json.loads(brown_rgbs.iloc[idx]['bg_rgb'])
    circle_rgb = np.array(circle_rgb)*255
    bg_rgb = np.array(bg_rgb)*255
    return ColorOption(circle_rgb, bg_rgb)


def random_color() -> ColorOption:
    fctns = [random_orange, random_brown]
    idx = random.randrange(0, 2)
    label = "orange" if idx == 0 else "brown"
    f = fctns[idx]
    return f(), label


def coords(position_idx : int) -> Tuple[int, int]:
    """Calculates the (y, x) coordinates of a position index.
    
    Origin is top left.
    """
    if not 0 <= position_idx < NUM_POSITIONS:
        raise Exception(f'Position index must be within 0 and {NUM_POSITIONS}. '
                        'Got: {position_idx}')
    row = position_idx // ROW_LEN
    col = position_idx % ROW_LEN
    y = int(RADIUS * (row * 2 + 1))
    x = int(RADIUS * (col * 2 + 1))
    return (y, x)
     
    
def to_cv2_coords(numpy_coords):
    return (numpy_coords[1], numpy_coords[0])
    
    
def circle_img(position_idx, circle_color, bg_color):
    img = np.zeros((IMG_SIZE, IMG_SIZE, 3), np.uint8)
    img[:] = bg_color
    center = to_cv2_coords(coords(position_idx))
    img = cv2.circle(img, center, RADIUS, circle_color, thickness=-1, lineType=cv2.LINE_AA)
    return img


def create_samples(num_samples):
    frames = []
    labels = []
    for n in range(num_samples):
        color, label = random_color()
        pos = random.randrange(0, NUM_POSITIONS)
        frame = circle_img(pos, color.circle_rgb, color.bg_rgb)
        frames.append(frame)
        labels.append(label)
    return frames, labels
def demo_data():
    FPS = 2
    #frames = [circle_img(p, (255, 255, 255), (50, 20, 20)) for p in range(NUM_POSITIONS)]
    #labels = ['WB-0']*len(frames)
    frames, labels = create_samples(30)
    x_clip = mpe.ImageSequenceClip(frames, fps=2)
    y_clip = mpe.TextClip('WB-0', font='DejaVu-Sans')

    class FrameText(mpe.VideoClip):
        def __init__(self, text, fps):
            def make_frame(f):
               return mpe.TextClip(text[int(f)], font='DejaVu-Sans', color='white').get_frame(f)
            self.duration = 1.0 * len(text) / fps
            mpe.VideoClip.__init__(self, make_frame=make_frame, duration=self.duration)

    y_clip =   FrameText(labels, FPS)
    label_clip = mpe.CompositeVideoClip([mpe.ImageClip(np.zeros((IMG_SIZE, IMG_SIZE, 3)), duration=5), y_clip])
    comp_clip = mpe.clips_array([[y_clip],[x_clip]])
    return comp_clip
clip = demo_data() 
clip.ipython_display(rd_kwargs={'logger':None})
class ColorDotDataset(torch.utils.data.Dataset):
    "Colored circles on colored background, a dataset."
    
    def __init__(self, labelled_colors):
        """Generate dataset from an array of (label, circle_rgb, bg_rgb) tuples.
        """
        #self._labelled_colors = 
        self._labelled_colors = labelled_colors
        
    def __len__(self):
        return len(self._labelled_colors) * NUM_POSITIONS
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        color_idx = idx // NUM_POSITIONS
        pos = idx % NUM_POSITIONS
        label, circle_rgb, bg_rgb = self._labelled_colors[color_idx]
        circle_rgb = np.array(circle_rgb)*255
        bg_rgb = np.array(bg_rgb)*255
        ic((label, circle_rgb, bg_rgb))
        return {'image': circle_img(pos, circle_rgb, bg_rgb), 'label': label}
    
    
def train_test_val_split(labelled_colors, split_ratio=[6, 1, 1]):
    divisions = np.sum(np.array(split_ratio))
    num_per_division = len(labelled_colors) // divisions
    remainder = len(labelled_colors) % divisions
    num_train = num_per_division * split_ratio[0] + remainder
    num_test = num_per_division * split_ratio[1]
    num_val = num_per_division * split_ratio[2]
    
    train_ds = ColorDotDataset(labelled_colors[0: num_train])
    test_ds = ColorDotDataset(labelled_colors[num_train: num_train + num_test])
    val_ds = ColorDotDataset(labelled_colors[num_train + num_test:])
    return (train_ds, test_ds, val_ds)


def load_labelled_colors(csv_path):
    # TOD: how to properly filter and map pandas data?
    table = pd.read_csv('./resources/experiment_1_1_1.csv')
    include_labels = {'orange', 'brown', 'neither'}
    ans = []
    #table[0] = table[0].apply(lambda x : COLOR_ID_TO_LABEL[x])
    #filtered = table[table['ans'] in include_labels]
    for idx, row in table.iterrows():
        label = COLOR_ID_TO_LABEL[row['ans']]
        if not label in include_labels:
            continue
        circle_rgb = json.loads(row['circle_rgb'])
        bg_rgb = json.loads(row['bg_rgb'])
        ans.append((label, circle_rgb, bg_rgb))
    return ans


def load_datasets(csv_path):
    labelled_colors = load_labelled_colors(csv_path)
    datasets = train_test_val_split(labelled_colors)
    return datasets
    
    
def test_dataset():
    train, test, val = load_datasets('./resources/experiment_1_1_1.csv')
    imshow(test[130]['image'])
   
test_dataset()
ic| (label, circle_rgb, bg_rgb): ('orange',
                                  array([184.11741041, 154.17305441, 134.55422661]),
                                  array([154.58544903, 154.58544903, 154.58544903]))

png

Notes

I was reminded to be careful about dealing with image dimension orders, color dimension orders and color value ranges. All easily mixed up given that different API take different formats. Definitely need to have visual tests to make sure things are working correctly.