Source code for mirar.processors.utils.image_selector

"""
Module containing processors and functions to select a subset of images from a batch
"""
import logging

from mirar.data import Dataset, ImageBatch
from mirar.errors import ProcessorError
from mirar.paths import TARGET_KEY
from mirar.processors.base_processor import BaseImageProcessor, CleanupProcessor

logger = logging.getLogger(__name__)


[docs] class ParsingError(KeyError, ProcessorError): """ Exception arising due to errors in parsing Image headers """
[docs] def select_from_images( batch: ImageBatch, key: str = TARGET_KEY, target_values: str | list[str] = "science", ) -> ImageBatch: """ Returns a subset of images in a batch with have values of <key> equal to a value in <target values> :param batch: image batch to sort :param key: header key to filter on :param target_values: accepted value(s) for key :return: image batch containing the subset of images which pass """ # Enforce string in list for later matching if not isinstance(target_values, list): target_values = [str(target_values)] else: target_values = [str(x) for x in target_values] new_batch = ImageBatch() for image in batch: try: if str(image[key]) in target_values: new_batch.append(image) except KeyError as exc: logger.error(exc) raise ParsingError(exc) from exc return new_batch
[docs] class ImageSelector(BaseImageProcessor, CleanupProcessor): """ Processor to only select a subset of images from a batch. Images can be selected using header keywords. For example, using: ImageSelector(("OBSCLASS", "SCIENCE")) selects Images with header["OBSCLASS"]=="SCIENCE" """ base_key = "select" def __init__(self, *args: tuple[str, str | list[str]]): super().__init__() self.targets = args def __str__(self): reqs = [] for target in self.targets: if isinstance(target[1], list): reqs.append(f"{target[0]} = {' or '.join(target[1])}") else: reqs.append(f"{target[0]} = {target[1]}") return f"Processor to select images where {', and '.join(reqs)}" def _apply_to_images( self, batch: ImageBatch, ) -> ImageBatch: for header_key, target_values in self.targets: batch = select_from_images( batch, key=header_key, target_values=target_values ) return batch
[docs] def split_images_into_batches( images: ImageBatch, split_key: str | list[str] ) -> Dataset: """ Function to split a single :class:`~mirar.data.image_data.ImageBatch` object into multiple :class:`~mirar.data.base_data.DataBatch` objects. Each new batch will have the same value of <split_key>. Returns a dataset containing the new batches :param images: Image batch to split :param split_key: Key to split batch :return: Dataset containing new image batches """ if isinstance(split_key, str): split_key = [split_key] groups = {} for image in images: uid = [] for key in split_key: uid.append(str(image[key])) uid = "_".join(uid) if uid not in groups: groups[uid] = [image] else: groups[uid] += [image] logger.debug(groups) res = Dataset([ImageBatch(x) for x in groups.values()]) return res
[docs] class ImageBatcher(BaseImageProcessor): """ Module to split :class:`~mirar.data.image_data.ImageBatch` object into multiple :class:`~mirar.data.base_data.DataBatch` objects. Images are batched using the `split_key` argument. For example, you can batch by filter, like this: ImageBatcher(split_key="filter") which will return N batches for the N different filters present in the directory you are reducing. If you do not require batching at some point in your reductions, you can split by BASE_NAME_KEY: ImageBatcher(split_key=BASE_NAME_KEY) which returns ImageBatches of length 1, one for each file in the directory you're working with. """ base_key = "batch" def __init__(self, split_key: str | list[str]): super().__init__() self.split_key = split_key def __str__(self) -> str: if isinstance(self.split_key, list): split = self.split_key else: split = [self.split_key] return ( f"Groups images into batches, with each batch having " f"the same value of {' and '.join(split)}" ) def _apply_to_images( self, batch: ImageBatch, ) -> ImageBatch: return batch
[docs] def update_dataset(self, dataset: Dataset) -> Dataset: new_dataset = Dataset() for batch in dataset: new = split_images_into_batches(batch, split_key=self.split_key) new_dataset += new return new_dataset
[docs] class ImageDebatcher(BaseImageProcessor): """ Processor to group all incoming :class:`~mirar.data.image_data.ImageBatch` objects into a single batch. This is helpful if you've already batched at an earlier stage in your workflow, and you want to start over and batch by a different split key. """ base_key = "debatch" def _apply_to_images( self, batch: ImageBatch, ) -> ImageBatch: return batch def __str__(self) -> str: return "Processor to combine all images into a single ImageBatch"
[docs] def update_dataset(self, dataset: Dataset) -> Dataset: combo_batch = ImageBatch() for batch in dataset: combo_batch += batch return Dataset([combo_batch])