Module biobench.beluga
Individual re-identification of Beluga whales (Delphinapterus leucas) using this LILA BC dataset.
We use a very simple method:
- Embed all images using a vision backbone.
- For each image, treat it as a test image and find its nearest neighbor (k=1).
- Give a score of 1.0 if the nearest neighbor is the same individual, otherwise 0.0.
You could improve this with nearest centroid classification, k>1, or any number of fine-tuning techniques. But we are simply interested in seeing if models embed images of the same individual closer together in representation space.
If you use this task, please cite the original dataset paper and the paper that proposed this evaluation method:
@article{algasov2024understanding,
title={Understanding the Impact of Training Set Size on Animal Re-identification},
author={Algasov, Aleksandr and Nepovinnykh, Ekaterina and Eerola, Tuomas and K{"a}lvi{"a}inen, Heikki and Stewart, Charles V and Otarashvili, Lasha and Holmberg, Jason A},
journal={arXiv preprint arXiv:2405.15976},
year={2024}
}
@inproceedings{vcermak2024wildlifedatasets,
title={WildlifeDatasets: An open-source toolkit for animal re-identification},
author={{{C}}erm{'a}k, Vojt{{e}}ch and Picek, Lukas and Adam, Luk{'a}{{s}} and Papafitsoros, Kostas},
booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
pages={5953--5963},
year={2024}
}
Sub-modules
biobench.beluga.download
-
Downloads the Begula whale dataset from lila.science.
Functions
def benchmark(cfg: Experiment) ‑> Report
-
Expand source code
@beartype.beartype def benchmark(cfg: config.Experiment) -> reporting.Report: """Run the BelugaID benchmark.""" backbone = registry.load_vision_backbone(cfg.model) # Embed all images. features = get_features(cfg, backbone) # Convert string names into integer labels. encoder = sklearn.preprocessing.OrdinalEncoder(dtype=int) y = encoder.fit_transform(features.labels.reshape(-1, 1)).reshape(-1) clf = sklearn.neighbors.KNeighborsClassifier(n_neighbors=5, weights="uniform") clf.fit(features.x, y) y_hat = clf.predict(None) preds = [ reporting.Prediction( str(img_id), float(pred == true), {"y_pred": pred.item(), "y_true": true.item()}, ) for img_id, pred, true in zip(features.ids, y_hat, y) ] return reporting.Report("beluga", preds, cfg)
Run the BelugaID benchmark.
def bootstrap_scores(df: polars.dataframe.frame.DataFrame,
*,
b: int = 0,
rng: numpy.random._generator.Generator | None = None) ‑> dict[str, jaxtyping.Float[ndarray, 'b']]-
Expand source code
@jaxtyped(typechecker=beartype.beartype) def bootstrap_scores( df: pl.DataFrame, *, b: int = 0, rng: np.random.Generator | None = None ) -> dict[str, Float[np.ndarray, " b"]]: assert df.get_column("task_name").unique().to_list() == ["beluga"] return reporting.bootstrap_scores_macro_f1(df, b=b, rng=rng)
def collate_fn(batch)
-
Expand source code
@beartype.beartype def collate_fn(batch): imgs = torch.stack([img for img, _ in batch]) metadata = [meta for _, meta in batch] return imgs, metadata
def get_features(cfg: Experiment,
backbone: VisionBackbone) ‑> Features-
Expand source code
@beartype.beartype @torch.no_grad() def get_features(cfg: config.Experiment, backbone: registry.VisionBackbone) -> Features: """ Get a block of features from a vision backbone. Args: args: BelugaID arguments. backbone: visual backbone. """ img_transform = backbone.make_img_transform() backbone = torch.compile(backbone.to(cfg.device)) if not os.path.isdir(cfg.data.beluga): msg = f"Path '{cfg.data.beluga}' doesn't exist. Did you download the Beluga dataset?" raise ValueError(msg) dataset = torchvision.datasets.CocoDetection( os.path.join(cfg.data.beluga, "beluga.coco", "images", "train2022"), os.path.join( cfg.data.beluga, "beluga.coco", "annotations", "instances_train2022.json" ), img_transform, ) dataloader = torch.utils.data.DataLoader( dataset=dataset, batch_size=cfg.batch_size, num_workers=cfg.n_workers, drop_last=False, shuffle=False, collate_fn=collate_fn, ) all_features, all_labels, all_ids = [], [], [] def probe(batch): imgs, _ = batch imgs = imgs.to(cfg.device, non_blocking=True) with torch.amp.autocast("cuda"): backbone.img_encode(imgs).img_features with helpers.auto_batch_size(dataloader, probe=probe): total = len(dataloader) if not cfg.debug else 2 it = iter(dataloader) for b in helpers.progress(range(total), desc="beluga"): imgs, metadata = next(it) imgs = imgs.to(cfg.device, non_blocking=True) with torch.amp.autocast("cuda"): features = backbone.img_encode(imgs).img_features assert all(len(meta) == 1 for meta in metadata) labels = [meta[0]["name"] for meta in metadata] ids = [str(meta[0]["image_id"]) for meta in metadata] all_features.append(features.cpu()) all_labels.extend(labels) all_ids.extend(ids) all_features = torch.cat(all_features, dim=0).cpu() all_ids = np.array(all_ids) all_labels = np.array(all_labels) return Features(all_features, all_labels, all_ids)
Get a block of features from a vision backbone.
Args
args
- BelugaID arguments.
backbone
- visual backbone.
Classes
class Features (x: jaxtyping.Float[Tensor, 'n dim'],
labels: jaxtyping.Shaped[ndarray, 'n'],
ids: jaxtyping.Shaped[ndarray, 'n'])-
Expand source code
@jaxtyped(typechecker=beartype.beartype) @dataclasses.dataclass(frozen=True) class Features: """A block of features.""" x: Float[Tensor, "n dim"] """Input features; from a `biobench.registry.VisionBackbone`.""" labels: Shaped[np.ndarray, " n"] """Individual name.""" ids: Shaped[np.ndarray, " n"] """Array of image ids.""" def y(self, encoder): return encoder.transform(self.labels.reshape(-1, 1)).reshape(-1) @property def n(self) -> int: return len(self.ids)
A block of features.
Instance variables
var ids : jaxtyping.Shaped[ndarray, 'n']
-
Array of image ids.
var labels : jaxtyping.Shaped[ndarray, 'n']
-
Individual name.
prop n : int
-
Expand source code
@property def n(self) -> int: return len(self.ids)
var x : jaxtyping.Float[Tensor, 'n dim']
-
Input features; from a
VisionBackbone
.
Methods
def y(self, encoder)
-
Expand source code
def y(self, encoder): return encoder.transform(self.labels.reshape(-1, 1)).reshape(-1)