| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217 |
- from __future__ import annotations
- from typing import Any
- import torch
- from tqdm.auto import tqdm
- from ultralytics.engine.results import Results
- from ultralytics.models.yolo.detect import DetectionPredictor
- from ultralytics.utils import ops
- import wandb
- def scale_bounding_box_to_original_image_shape(
- box: torch.Tensor,
- resized_image_shape: tuple,
- original_image_shape: tuple,
- ratio_pad: bool,
- ) -> list[int]:
- """YOLOv8 resizes images during training and the label values are normalized based on this resized shape.
- This function rescales the bounding box labels to the original
- image shape.
- Reference: https://github.com/ultralytics/ultralytics/blob/main/ultralytics/yolo/utils/callbacks/comet.py#L105
- """
- resized_image_height, resized_image_width = resized_image_shape
- # Convert normalized xywh format predictions to xyxy in resized scale format
- box = ops.xywhn2xyxy(box, h=resized_image_height, w=resized_image_width)
- # Scale box predictions from resized image scale back to original image scale
- box = ops.scale_boxes(resized_image_shape, box, original_image_shape, ratio_pad)
- # # Convert bounding box format from xyxy to xywh for Comet logging
- box = ops.xyxy2xywh(box)
- return box.tolist()
- def get_ground_truth_bbox_annotations(
- img_idx: int, image_path: str, batch: dict, class_name_map: dict = None
- ) -> list[dict[str, Any]]:
- """Get ground truth bounding box annotation data in the form required for `wandb.Image` overlay system."""
- indices = batch["batch_idx"] == img_idx
- bboxes = batch["bboxes"][indices]
- if len(batch["cls"][indices]):
- cls_labels = batch["cls"][indices].squeeze(1).tolist()
- else:
- cls_labels = []
- class_name_map_reverse = {v: k for k, v in class_name_map.items()}
- if len(bboxes) == 0:
- wandb.termwarn(
- f"Image: {image_path} has no bounding boxes labels", repeat=False
- )
- return None
- if len(batch["cls"][indices]):
- cls_labels = batch["cls"][indices].squeeze(1).tolist()
- else:
- cls_labels = []
- if class_name_map:
- cls_labels = [str(class_name_map[label]) for label in cls_labels]
- original_image_shape = batch["ori_shape"][img_idx]
- resized_image_shape = batch["resized_shape"][img_idx]
- ratio_pad = batch["ratio_pad"][img_idx]
- data = []
- for box, label in zip(bboxes, cls_labels):
- box = scale_bounding_box_to_original_image_shape(
- box, resized_image_shape, original_image_shape, ratio_pad
- )
- data.append(
- {
- "position": {
- "middle": [int(box[0]), int(box[1])],
- "width": int(box[2]),
- "height": int(box[3]),
- },
- "domain": "pixel",
- "class_id": class_name_map_reverse[label],
- "box_caption": label,
- }
- )
- return data
- def get_mean_confidence_map(
- classes: list, confidence: list, class_id_to_label: dict
- ) -> dict[str, float]:
- """Get Mean-confidence map from the predictions to be logged into a `wandb.Table`."""
- confidence_map = {v: [] for _, v in class_id_to_label.items()}
- for class_idx, confidence_value in zip(classes, confidence):
- confidence_map[class_id_to_label[class_idx]].append(confidence_value)
- updated_confidence_map = {}
- for label, confidence_list in confidence_map.items():
- if len(confidence_list) > 0:
- updated_confidence_map[label] = sum(confidence_list) / len(confidence_list)
- else:
- updated_confidence_map[label] = 0
- return updated_confidence_map
- def get_boxes(result: Results) -> tuple[dict, dict]:
- """Convert an ultralytics prediction result into metadata for the `wandb.Image` overlay system."""
- boxes = result.boxes.xywh.long().numpy()
- classes = result.boxes.cls.long().numpy()
- confidence = result.boxes.conf.numpy()
- class_id_to_label = {int(k): str(v) for k, v in result.names.items()}
- mean_confidence_map = get_mean_confidence_map(
- classes, confidence, class_id_to_label
- )
- box_data = []
- for idx in range(len(boxes)):
- box_data.append(
- {
- "position": {
- "middle": [int(boxes[idx][0]), int(boxes[idx][1])],
- "width": int(boxes[idx][2]),
- "height": int(boxes[idx][3]),
- },
- "domain": "pixel",
- "class_id": int(classes[idx]),
- "box_caption": class_id_to_label[int(classes[idx])],
- "scores": {"confidence": float(confidence[idx])},
- }
- )
- boxes = {
- "predictions": {
- "box_data": box_data,
- "class_labels": class_id_to_label,
- },
- }
- return boxes, mean_confidence_map
- def plot_bbox_predictions(
- result: Results, model_name: str, table: wandb.Table | None = None
- ) -> wandb.Table | tuple[wandb.Image, dict, dict]:
- """Plot the images with the W&B overlay system.
- The `wandb.Image` is either added to a `wandb.Table` or returned.
- """
- result = result.to("cpu")
- boxes, mean_confidence_map = get_boxes(result)
- image = wandb.Image(result.orig_img[:, :, ::-1], boxes=boxes)
- if table is not None:
- table.add_data(
- model_name,
- image,
- len(boxes["predictions"]["box_data"]),
- mean_confidence_map,
- result.speed,
- )
- return table
- return image, boxes["predictions"], mean_confidence_map
- def plot_detection_validation_results(
- dataloader: Any,
- class_label_map: dict,
- model_name: str,
- predictor: DetectionPredictor,
- table: wandb.Table,
- max_validation_batches: int,
- epoch: int | None = None,
- ) -> wandb.Table:
- """Plot validation results in a table."""
- data_idx = 0
- num_dataloader_batches = len(dataloader.dataset) // dataloader.batch_size
- max_validation_batches = min(max_validation_batches, num_dataloader_batches)
- for batch_idx, batch in enumerate(dataloader):
- prediction_results = predictor(batch["im_file"])
- progress_bar_result_iterable = tqdm(
- enumerate(prediction_results),
- total=len(prediction_results),
- desc=f"Generating Visualizations for batch-{batch_idx + 1}/{max_validation_batches}",
- )
- for img_idx, prediction_result in progress_bar_result_iterable:
- prediction_result = prediction_result.to("cpu")
- _, prediction_box_data, mean_confidence_map = plot_bbox_predictions(
- prediction_result, model_name
- )
- try:
- ground_truth_data = get_ground_truth_bbox_annotations(
- img_idx, batch["im_file"][img_idx], batch, class_label_map
- )
- wandb_image = wandb.Image(
- batch["im_file"][img_idx],
- boxes={
- "ground-truth": {
- "box_data": ground_truth_data,
- "class_labels": class_label_map,
- },
- "predictions": {
- "box_data": prediction_box_data["box_data"],
- "class_labels": class_label_map,
- },
- },
- )
- table_rows = [
- data_idx,
- batch_idx,
- wandb_image,
- mean_confidence_map,
- prediction_result.speed,
- ]
- table_rows = [epoch] + table_rows if epoch is not None else table_rows
- table_rows = [model_name] + table_rows
- table.add_data(*table_rows)
- data_idx += 1
- except TypeError:
- pass
- if batch_idx + 1 == max_validation_batches:
- break
- return table
|