domain_adaptation.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786
  1. """Domain adaptation transforms for image augmentation.
  2. This module provides transformations designed to bridge the domain gap between
  3. datasets by adapting the style of an input image to match that of reference images
  4. from a target domain. Adaptations are based on matching statistical properties
  5. like histograms, frequency spectra, or overall pixel distributions.
  6. """
  7. from __future__ import annotations
  8. import warnings
  9. from collections.abc import Sequence
  10. from typing import Annotated, Any, Callable, Literal, cast
  11. import cv2
  12. import numpy as np
  13. from pydantic import AfterValidator, field_validator, model_validator
  14. from typing_extensions import Self
  15. from albumentations.augmentations.mixing.domain_adaptation_functional import (
  16. adapt_pixel_distribution,
  17. apply_histogram,
  18. fourier_domain_adaptation,
  19. )
  20. from albumentations.augmentations.utils import read_rgb_image
  21. from albumentations.core.pydantic import ZeroOneRangeType, check_range_bounds, nondecreasing
  22. from albumentations.core.transforms_interface import BaseTransformInitSchema, ImageOnlyTransform
  23. __all__ = [
  24. "FDA",
  25. "HistogramMatching",
  26. "PixelDistributionAdaptation",
  27. ]
  28. MAX_BETA_LIMIT = 0.5
  29. # Base class for Domain Adaptation Init Schema
  30. class BaseDomainAdaptationInitSchema(BaseTransformInitSchema):
  31. reference_images: Sequence[Any] | None
  32. read_fn: Callable[[Any], np.ndarray] | None
  33. metadata_key: str
  34. @model_validator(mode="after")
  35. def _check_deprecated_args(self) -> Self:
  36. if self.reference_images is not None:
  37. warnings.warn(
  38. "'reference_images' and 'read_fn' arguments are deprecated. "
  39. "Please pass pre-loaded reference images "
  40. f"using the '{self.metadata_key}' key in the input data dictionary.",
  41. DeprecationWarning,
  42. stacklevel=3, # Adjust stacklevel as needed
  43. )
  44. if self.read_fn is None:
  45. msg = "read_fn cannot be None when using the deprecated 'reference_images' argument."
  46. raise ValueError(msg)
  47. return self
  48. class BaseDomainAdaptation(ImageOnlyTransform):
  49. """Base class for domain adaptation transforms.
  50. Domain adaptation transforms modify source images to match the characteristics of a target domain.
  51. These transforms typically require an additional reference image or dataset from the target domain
  52. to extract style information or domain-specific features.
  53. This base class provides the framework for implementing various domain adaptation techniques such as
  54. color transfer, style transfer, frequency domain adaptation, or histogram matching.
  55. Args:
  56. reference_images (Sequence[Any] | None): Deprecated. Sequence of references to images from the target
  57. domain. Should be used with read_fn to load actual images. Prefer passing pre-loaded images via
  58. metadata_key.
  59. read_fn (Callable[[Any], np.ndarray] | None): Deprecated. Function to read an image from a reference.
  60. Should be used with reference_images.
  61. metadata_key (str): Key in the input data dictionary that contains pre-loaded target domain images.
  62. p (float): Probability of applying the transform. Default: 0.5.
  63. Targets:
  64. image
  65. Image types:
  66. uint8, float32
  67. Notes:
  68. - Subclasses should implement the `apply` method to perform the actual adaptation.
  69. - Use `targets_as_params` property to define what additional data your transform needs.
  70. - Override `get_params_dependent_on_data` to extract the target domain data.
  71. - Domain adaptation often requires per-sample auxiliary data, which should be passed
  72. through the main data dictionary rather than at initialization time.
  73. Examples:
  74. >>> import numpy as np
  75. >>> import albumentations as A
  76. >>> import cv2
  77. >>>
  78. >>> # Implement a simple color transfer domain adaptation transform
  79. >>> class SimpleColorTransfer(A.BaseDomainAdaptation):
  80. ... class InitSchema(A.BaseTransformInitSchema):
  81. ... intensity: float = Field(gt=0, le=1)
  82. ... reference_key: str
  83. ...
  84. ... def __init__(
  85. ... self,
  86. ... intensity: float = 0.5,
  87. ... reference_key: str = "target_image",
  88. ... p: float = 1.0
  89. ... ):
  90. ... super().__init__(p=p)
  91. ... self.intensity = intensity
  92. ... self.reference_key = reference_key
  93. ...
  94. ... @property
  95. ... def targets_as_params(self) -> list[str]:
  96. ... return [self.reference_key] # We need target domain image
  97. ...
  98. ... def get_params_dependent_on_data(
  99. ... self,
  100. ... params: dict[str, Any],
  101. ... data: dict[str, Any]
  102. ... ) -> dict[str, Any]:
  103. ... target_image = data.get(self.reference_key)
  104. ... if target_image is None:
  105. ... # Fallback if target image is not provided
  106. ... return {"target_image": None}
  107. ... return {"target_image": target_image}
  108. ...
  109. ... def apply(
  110. ... self,
  111. ... img: np.ndarray,
  112. ... target_image: np.ndarray = None,
  113. ... **params
  114. ... ) -> np.ndarray:
  115. ... if target_image is None:
  116. ... return img
  117. ...
  118. ... # Simple color transfer implementation
  119. ... # Calculate mean and std of source and target images
  120. ... src_mean = np.mean(img, axis=(0, 1))
  121. ... src_std = np.std(img, axis=(0, 1))
  122. ... tgt_mean = np.mean(target_image, axis=(0, 1))
  123. ... tgt_std = np.std(target_image, axis=(0, 1))
  124. ...
  125. ... # Normalize source image
  126. ... normalized = (img - src_mean) / (src_std + 1e-7)
  127. ...
  128. ... # Scale by target statistics and blend with original
  129. ... transformed = normalized * tgt_std + tgt_mean
  130. ... transformed = np.clip(transformed, 0, 255).astype(np.uint8)
  131. ...
  132. ... # Blend the result based on intensity
  133. ... result = cv2.addWeighted(img, 1 - self.intensity, transformed, self.intensity, 0)
  134. ... return result
  135. >>>
  136. >>> # Usage example with a target image from a different domain
  137. >>> source_image = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
  138. >>> target_image = np.random.randint(100, 200, (200, 200, 3), dtype=np.uint8) # Different domain image
  139. >>>
  140. >>> # Create the transform with the pipeline
  141. >>> transform = A.Compose([
  142. ... SimpleColorTransfer(intensity=0.7, reference_key="target_img", p=1.0),
  143. ... ])
  144. >>>
  145. >>> # Apply the transform with the target image passed in the data dictionary
  146. >>> result = transform(image=source_image, target_img=target_image)
  147. >>> adapted_image = result["image"] # Image with characteristics transferred from target domain
  148. """
  149. InitSchema: type[BaseDomainAdaptationInitSchema]
  150. def __init__(
  151. self,
  152. reference_images: Sequence[Any] | None,
  153. read_fn: Callable[[Any], np.ndarray] | None,
  154. metadata_key: str,
  155. p: float = 0.5,
  156. ):
  157. super().__init__(p=p)
  158. self.reference_images = reference_images
  159. self.read_fn = read_fn
  160. self.metadata_key = metadata_key
  161. @property
  162. def targets_as_params(self) -> list[str]:
  163. return [self.metadata_key]
  164. def _get_reference_image(self, data: dict[str, Any]) -> np.ndarray:
  165. """Retrieves the reference image from metadata or deprecated arguments."""
  166. reference_image = None
  167. if metadata_images := data.get(self.metadata_key):
  168. if not isinstance(metadata_images, Sequence) or not metadata_images:
  169. raise ValueError(
  170. f"Metadata key '{self.metadata_key}' should contain a non-empty sequence of numpy arrays.",
  171. )
  172. if not isinstance(metadata_images[0], np.ndarray):
  173. raise ValueError(
  174. f"Images in metadata key '{self.metadata_key}' should be numpy arrays.",
  175. )
  176. reference_image = self.py_random.choice(metadata_images)
  177. if self.reference_images is not None:
  178. warnings.warn(
  179. f"Both 'reference_images' (deprecated constructor argument) and metadata via "
  180. f"'{self.metadata_key}' were provided. Prioritizing metadata.",
  181. UserWarning,
  182. stacklevel=3, # Adjust stacklevel as needed
  183. )
  184. elif self.reference_images is not None:
  185. # Deprecation warning is handled by the InitSchema validator
  186. if self.read_fn is None:
  187. # This case should ideally be caught by InitSchema, but safety check
  188. msg = "read_fn cannot be None when using the deprecated 'reference_images' argument."
  189. raise ValueError(msg)
  190. ref_source = self.py_random.choice(self.reference_images)
  191. reference_image = self.read_fn(ref_source)
  192. else:
  193. raise ValueError(
  194. f"{self.__class__.__name__} requires reference images. Provide them via the `metadata_key` "
  195. f"'{self.metadata_key}' in the input data, or use the deprecated 'reference_images' argument.",
  196. )
  197. if reference_image is None:
  198. # Should not happen if logic above is correct, but safety check
  199. msg = "Could not obtain a reference image."
  200. raise RuntimeError(msg)
  201. return reference_image
  202. def to_dict_private(self) -> dict[str, Any]:
  203. """Convert the transform to a dictionary for serialization.
  204. Raises:
  205. NotImplementedError: Domain adaptation transforms cannot be reliably serialized
  206. when using metadata key or deprecated arguments.
  207. """
  208. if self.reference_images is not None:
  209. msg = (
  210. f"{self.__class__.__name__} cannot be reliably serialized when using the deprecated 'reference_images'."
  211. )
  212. raise NotImplementedError(msg)
  213. msg = (
  214. f"{self.__class__.__name__} cannot be reliably serialized due to its dependency "
  215. "on external data via metadata."
  216. )
  217. raise NotImplementedError(msg)
  218. class HistogramMatching(BaseDomainAdaptation):
  219. """Adjust the pixel value distribution of an input image to match a reference image.
  220. This transform modifies the pixel intensities of the input image so that its histogram
  221. matches the histogram of a provided reference image. This process is applied independently
  222. to each channel of the image if it is multi-channel.
  223. Why use Histogram Matching?
  224. **Domain Adaptation:** Helps bridge the gap between images from different sources
  225. (e.g., different cameras, lighting conditions, synthetic vs. real data) by aligning
  226. their overall intensity and contrast characteristics.
  227. *Use Case Example:* Imagine you have labeled training images from one source (e.g., daytime photos,
  228. medical scans from Hospital A) but expect your model to work on images from a different
  229. source at test time (e.g., nighttime photos, scans from Hospital B). You might only have
  230. unlabeled images from the target (test) domain. HistogramMatching can be used to make your
  231. labeled training images resemble the *style* (intensity and contrast distribution) of the
  232. unlabeled target images. By training on these adapted images, your model may generalize
  233. better to the target domain without needing labels for it.
  234. How it works:
  235. The core idea is to map the pixel values of the input image such that its cumulative
  236. distribution function (CDF) matches the CDF of the reference image. This effectively
  237. reshapes the input image's histogram to resemble the reference's histogram.
  238. Args:
  239. metadata_key (str): Key in the input `data` dictionary to retrieve the reference image(s).
  240. The value should be a sequence (e.g., list) of numpy arrays (pre-loaded images).
  241. Default: "hm_metadata".
  242. blend_ratio (tuple[float, float]): Range for the blending factor between the original
  243. and the histogram-matched image. A value of 0 means the original image is returned,
  244. 1 means the fully matched image is returned. A random value within this range [min, max]
  245. is sampled for each application. This allows for varying degrees of adaptation.
  246. Default: (0.5, 1.0).
  247. p (float): Probability of applying the transform. Default: 0.5.
  248. Targets:
  249. image
  250. Image types:
  251. uint8, float32
  252. Note:
  253. - Requires at least one reference image to be provided via the `metadata_key` argument.
  254. - The `reference_images` and `read_fn` constructor arguments are deprecated.
  255. Examples:
  256. >>> import numpy as np
  257. >>> import albumentations as A
  258. >>> import cv2
  259. >>>
  260. >>> # Create sample images for demonstration
  261. >>> # Source image: dark image with low contrast
  262. >>> source_image = np.ones((100, 100, 3), dtype=np.uint8) * 50 # Dark gray image
  263. >>> source_image[30:70, 30:70] = 100 # Add slightly brighter square in center
  264. >>>
  265. >>> # Target image: higher brightness and contrast
  266. >>> target_image = np.ones((100, 100, 3), dtype=np.uint8) * 150 # Bright image
  267. >>> target_image[20:80, 20:80] = 200 # Add even brighter square
  268. >>>
  269. >>> # Initialize the histogram matching transform with custom settings
  270. >>> transform = A.Compose([
  271. ... A.HistogramMatching(
  272. ... blend_ratio=(0.7, 0.9), # Control the strength of histogram matching
  273. ... metadata_key="reference_imgs", # Custom metadata key
  274. ... p=1.0
  275. ... )
  276. ... ])
  277. >>>
  278. >>> # Apply the transform
  279. >>> result = transform(
  280. ... image=source_image,
  281. ... reference_imgs=[target_image] # Pass reference image via metadata key
  282. ... )
  283. >>>
  284. >>> # Get the histogram-matched image
  285. >>> matched_image = result["image"]
  286. >>>
  287. >>> # The matched_image will have brightness and contrast similar to target_image
  288. >>> # while preserving the content of source_image
  289. >>>
  290. >>> # Multiple reference images can be provided:
  291. >>> ref_imgs = [
  292. ... target_image,
  293. ... np.random.randint(100, 200, (100, 100, 3), dtype=np.uint8) # Another reference image
  294. ... ]
  295. >>> multiple_refs_result = transform(image=source_image, reference_imgs=ref_imgs)
  296. >>> # A random reference image from the list will be chosen for each transform application
  297. References:
  298. Histogram Matching in scikit-image:
  299. https://scikit-image.org/docs/dev/auto_examples/color_exposure/plot_histogram_matching.html
  300. """
  301. class InitSchema(BaseDomainAdaptationInitSchema):
  302. blend_ratio: Annotated[
  303. tuple[float, float],
  304. AfterValidator(nondecreasing),
  305. AfterValidator(check_range_bounds(0, 1)),
  306. ]
  307. def __init__(
  308. self,
  309. reference_images: Sequence[Any] | None = None,
  310. blend_ratio: tuple[float, float] = (0.5, 1.0),
  311. read_fn: Callable[[Any], np.ndarray] | None = read_rgb_image,
  312. metadata_key: str = "hm_metadata",
  313. p: float = 0.5,
  314. ):
  315. super().__init__(reference_images=reference_images, read_fn=read_fn, metadata_key=metadata_key, p=p)
  316. self.blend_ratio = blend_ratio
  317. def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
  318. """Generate parameters for the transform based on input data.
  319. Args:
  320. params (dict[str, Any]): Parameters from the previous transform in the pipeline
  321. data (dict[str, Any]): Input data dictionary containing the image and metadata
  322. Returns:
  323. dict[str, Any]: Dictionary containing the reference image and blend ratio
  324. """
  325. reference_image = self._get_reference_image(data)
  326. return {
  327. "reference_image": reference_image,
  328. "blend_ratio": self.py_random.uniform(*self.blend_ratio),
  329. }
  330. def apply(
  331. self,
  332. img: np.ndarray,
  333. reference_image: np.ndarray,
  334. blend_ratio: float,
  335. **params: Any,
  336. ) -> np.ndarray:
  337. """Apply histogram matching to the input image.
  338. Args:
  339. img (np.ndarray): Input image to be transformed
  340. reference_image (np.ndarray): Reference image for histogram matching
  341. blend_ratio (float): Blending factor between the original and matched image
  342. **params (Any): Additional parameters
  343. Returns:
  344. np.ndarray: Transformed image with histogram matched to the reference image
  345. """
  346. return apply_histogram(img, reference_image, blend_ratio)
  347. class FDA(BaseDomainAdaptation):
  348. """Fourier Domain Adaptation (FDA).
  349. Adapts the style of the input image to match the style of a reference image
  350. by manipulating their frequency components in the Fourier domain. This is
  351. particularly useful for unsupervised domain adaptation (UDA).
  352. Why use FDA?
  353. **Domain Adaptation:** FDA helps bridge the domain gap between source and target
  354. datasets (e.g., synthetic vs. real, day vs. night) by aligning their low-frequency
  355. Fourier spectrum components. This can improve model performance on the target domain
  356. without requiring target labels.
  357. *Use Case Example:* Imagine you have labeled training data acquired under certain conditions
  358. (e.g., images from Hospital A using a specific scanner) but need your model to perform well
  359. on data from a different distribution (e.g., unlabeled images from Hospital B with a different scanner).
  360. FDA can adapt the labeled source images to match the *style* (frequency characteristics)
  361. of the unlabeled target images, potentially improving the model's generalization to the
  362. target domain at test time.
  363. How it works:
  364. FDA operates in the frequency domain. It replaces the low-frequency components
  365. of the source image's Fourier transform with the low-frequency components from the
  366. reference (target domain) image's Fourier transform. The `beta_limit` parameter
  367. controls the size of the frequency window being swapped.
  368. Args:
  369. metadata_key (str): Key in the input `data` dictionary to retrieve the reference image(s).
  370. The value should be a sequence (e.g., list) of numpy arrays (pre-loaded images).
  371. Default: "fda_metadata".
  372. beta_limit (tuple[float, float] | float): Controls the extent of the low-frequency
  373. spectrum swap. A larger beta means more components are swapped. Corresponds to the L
  374. parameter in the original paper. Should be in the range [0, 0.5]. Sampling is uniform
  375. within the provided range [min, max]. Default: (0, 0.1).
  376. p (float): Probability of applying the transform. Default: 0.5.
  377. Targets:
  378. image
  379. Image types:
  380. uint8, float32
  381. Note:
  382. - Requires at least one reference image to be provided via the `metadata_key` argument.
  383. - The `reference_images` and `read_fn` constructor arguments are deprecated.
  384. Examples:
  385. >>> import numpy as np
  386. >>> import albumentations as A
  387. >>> import cv2
  388. >>>
  389. >>> # Create sample images for demonstration
  390. >>> # Source image: synthetic or simulated image (e.g., from a rendered game environment)
  391. >>> source_img = np.zeros((100, 100, 3), dtype=np.uint8)
  392. >>> # Create a pattern in the source image
  393. >>> source_img[20:80, 20:80, 0] = 200 # Red square
  394. >>> source_img[40:60, 40:60, 1] = 200 # Green inner square
  395. >>>
  396. >>> # Target domain image: real-world image with different texture/frequency characteristics
  397. >>> # For this example, we'll create an image with different frequency patterns
  398. >>> target_img = np.zeros((100, 100, 3), dtype=np.uint8)
  399. >>> for i in range(100):
  400. ... for j in range(100):
  401. ... # Create a high-frequency pattern
  402. ... target_img[i, j, 0] = ((i + j) % 8) * 30
  403. ... target_img[i, j, 1] = ((i - j) % 8) * 30
  404. ... target_img[i, j, 2] = ((i * j) % 8) * 30
  405. >>>
  406. >>> # Example 1: FDA with minimal adaptation (small beta value)
  407. >>> # This will subtly adjust the frequency characteristics
  408. >>> minimal_fda = A.Compose([
  409. ... A.FDA(
  410. ... beta_limit=(0.01, 0.05), # Small beta range for subtle adaptation
  411. ... metadata_key="target_domain", # Custom metadata key
  412. ... p=1.0
  413. ... )
  414. ... ])
  415. >>>
  416. >>> # Apply the transform with minimal adaptation
  417. >>> minimal_result = minimal_fda(
  418. ... image=source_img,
  419. ... target_domain=[target_img] # Pass reference image via custom metadata key
  420. ... )
  421. >>> minimal_adapted_img = minimal_result["image"]
  422. >>>
  423. >>> # Example 2: FDA with moderate adaptation (medium beta value)
  424. >>> moderate_fda = A.Compose([
  425. ... A.FDA(
  426. ... beta_limit=(0.1, 0.2), # Medium beta range
  427. ... metadata_key="target_domain",
  428. ... p=1.0
  429. ... )
  430. ... ])
  431. >>>
  432. >>> moderate_result = moderate_fda(image=source_img, target_domain=[target_img])
  433. >>> moderate_adapted_img = moderate_result["image"]
  434. >>>
  435. >>> # Example 3: FDA with strong adaptation (larger beta value)
  436. >>> strong_fda = A.Compose([
  437. ... A.FDA(
  438. ... beta_limit=(0.3, 0.5), # Larger beta range (upper limit is MAX_BETA_LIMIT)
  439. ... metadata_key="target_domain",
  440. ... p=1.0
  441. ... )
  442. ... ])
  443. >>>
  444. >>> strong_result = strong_fda(image=source_img, target_domain=[target_img])
  445. >>> strong_adapted_img = strong_result["image"]
  446. >>>
  447. >>> # Example 4: Using multiple target domain images
  448. >>> # Creating a list of target domain images with different characteristics
  449. >>> target_imgs = [target_img]
  450. >>>
  451. >>> # Add another target image with different pattern
  452. >>> another_target = np.zeros((100, 100, 3), dtype=np.uint8)
  453. >>> for i in range(100):
  454. ... for j in range(100):
  455. ... another_target[i, j, 0] = (i // 10) * 25
  456. ... another_target[i, j, 1] = (j // 10) * 25
  457. ... another_target[i, j, 2] = ((i + j) // 10) * 25
  458. >>> target_imgs.append(another_target)
  459. >>>
  460. >>> # Using default FDA settings with multiple target images
  461. >>> multi_target_fda = A.Compose([
  462. ... A.FDA(p=1.0) # Using default settings with default metadata_key="fda_metadata"
  463. ... ])
  464. >>>
  465. >>> # A random target image will be selected from the list for each application
  466. >>> multi_target_result = multi_target_fda(image=source_img, fda_metadata=target_imgs)
  467. >>> adapted_image = multi_target_result["image"]
  468. References:
  469. - FDA: https://github.com/YanchaoYang/FDA
  470. - FDA: https://openaccess.thecvf.com/content_CVPR_2020/papers/Yang_FDA_Fourier_Domain_Adaptation_for_Semantic_Segmentation_CVPR_2020_paper.pdf
  471. """
  472. class InitSchema(BaseDomainAdaptationInitSchema):
  473. beta_limit: ZeroOneRangeType
  474. @field_validator("beta_limit")
  475. @classmethod
  476. def _check_ranges(cls, value: tuple[float, float]) -> tuple[float, float]:
  477. bounds = 0, MAX_BETA_LIMIT
  478. if not bounds[0] <= value[0] <= value[1] <= bounds[1]:
  479. raise ValueError(f"Values should be in the range {bounds} got {value} ")
  480. return value
  481. def __init__(
  482. self,
  483. reference_images: Sequence[Any] | None = None,
  484. beta_limit: tuple[float, float] | float = (0, 0.1),
  485. read_fn: Callable[[Any], np.ndarray] | None = read_rgb_image,
  486. metadata_key: str = "fda_metadata",
  487. p: float = 0.5,
  488. ):
  489. super().__init__(reference_images=reference_images, read_fn=read_fn, metadata_key=metadata_key, p=p)
  490. self.beta_limit = cast("tuple[float, float]", beta_limit)
  491. def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
  492. """Generate parameters for the transform based on input data."""
  493. target_image = self._get_reference_image(data)
  494. height, width = params["shape"][:2]
  495. # Resize the target image to match the input image dimensions
  496. target_image_resized = cv2.resize(target_image, dsize=(width, height))
  497. return {"target_image": target_image_resized, "beta": self.py_random.uniform(*self.beta_limit)}
  498. def apply(
  499. self,
  500. img: np.ndarray,
  501. target_image: np.ndarray,
  502. beta: float,
  503. **params: Any,
  504. ) -> np.ndarray:
  505. """Apply Fourier Domain Adaptation to the input image.
  506. Args:
  507. img (np.ndarray): Input image to be transformed
  508. target_image (np.ndarray): Target domain image for adaptation
  509. beta (float): Coefficient controlling the extent of frequency component swapping
  510. **params (Any): Additional parameters
  511. Returns:
  512. np.ndarray: Transformed image with adapted frequency components
  513. """
  514. return fourier_domain_adaptation(img, target_image, beta)
  515. class PixelDistributionAdaptation(BaseDomainAdaptation):
  516. """Adapts the pixel value distribution of an input image to match a reference image
  517. using statistical transformations (PCA, StandardScaler, or MinMaxScaler).
  518. This transform aims to harmonize images from different domains by aligning their pixel-level
  519. statistical properties.
  520. Why use Pixel Distribution Adaptation?
  521. **Domain Adaptation:** Useful for aligning images across domains with differing pixel statistics
  522. (e.g., caused by different sensors, lighting, or post-processing).
  523. *Use Case Example:* Consider having labeled data from Scanner A and needing the model to perform
  524. well on unlabeled data from Scanner B, where images might have different overall brightness,
  525. contrast, or color biases. This transform can adapt the labeled images from Scanner A to
  526. mimic the pixel distribution *style* of the images from Scanner B, potentially improving
  527. generalization without needing labels for Scanner B data.
  528. How it works:
  529. 1. A chosen statistical transform (`transform_type`) is fitted to both the input (source) image
  530. and the reference (target) image separately.
  531. 2. The input image is transformed using the transform fitted on it (moving it to a standardized space).
  532. 3. The inverse transform *fitted on the reference image* is applied to the result from step 2
  533. (moving the standardized input into the reference image's statistical space).
  534. 4. The result is optionally blended with the original input image using `blend_ratio`.
  535. Args:
  536. metadata_key (str): Key in the input `data` dictionary to retrieve the reference image(s).
  537. The value should be a sequence (e.g., list) of numpy arrays (pre-loaded images).
  538. Default: "pda_metadata".
  539. blend_ratio (tuple[float, float]): Specifies the minimum and maximum blend ratio for mixing
  540. the adapted image with the original. A value of 0 means the original image is returned,
  541. 1 means the fully adapted image is returned. A random value within this range [min, max]
  542. is sampled for each application. Default: (0.25, 1.0).
  543. transform_type (Literal["pca", "standard", "minmax"]): Specifies the type of statistical
  544. transformation to apply:
  545. - "pca": Principal Component Analysis.
  546. - "standard": StandardScaler (zero mean, unit variance).
  547. - "minmax": MinMaxScaler (scales to [0, 1] range).
  548. Default: "pca".
  549. p (float): The probability of applying the transform. Default: 0.5.
  550. Targets:
  551. image
  552. Image types:
  553. uint8, float32
  554. Note:
  555. - Requires at least one reference image to be provided via the `metadata_key` argument.
  556. - The `reference_images` and `read_fn` constructor arguments are deprecated.
  557. Examples:
  558. >>> import numpy as np
  559. >>> import albumentations as A
  560. >>> import cv2
  561. >>>
  562. >>> # Create sample images for demonstration
  563. >>> # Source image: simulated image from domain A (e.g., medical scan from one scanner)
  564. >>> source_image = np.random.normal(100, 20, (100, 100, 3)).clip(0, 255).astype(np.uint8)
  565. >>>
  566. >>> # Reference image: image from domain B with different statistical properties
  567. >>> # (e.g., scan from a different scanner with different intensity distribution)
  568. >>> reference_image = np.random.normal(150, 30, (100, 100, 3)).clip(0, 255).astype(np.uint8)
  569. >>>
  570. >>> # Example 1: Using PCA transformation (default)
  571. >>> pca_transform = A.Compose([
  572. ... A.PixelDistributionAdaptation(
  573. ... transform_type="pca",
  574. ... blend_ratio=(0.8, 1.0), # Strong adaptation
  575. ... metadata_key="reference_images",
  576. ... p=1.0
  577. ... )
  578. ... ])
  579. >>>
  580. >>> # Apply the transform with the reference image
  581. >>> pca_result = pca_transform(
  582. ... image=source_image,
  583. ... reference_images=[reference_image]
  584. ... )
  585. >>>
  586. >>> # Get the adapted image
  587. >>> pca_adapted_image = pca_result["image"]
  588. >>>
  589. >>> # Example 2: Using StandardScaler transformation
  590. >>> standard_transform = A.Compose([
  591. ... A.PixelDistributionAdaptation(
  592. ... transform_type="standard",
  593. ... blend_ratio=(0.5, 0.7), # Moderate adaptation
  594. ... metadata_key="reference_images",
  595. ... p=1.0
  596. ... )
  597. ... ])
  598. >>>
  599. >>> standard_result = standard_transform(
  600. ... image=source_image,
  601. ... reference_images=[reference_image]
  602. ... )
  603. >>> standard_adapted_image = standard_result["image"]
  604. >>>
  605. >>> # Example 3: Using MinMaxScaler transformation
  606. >>> minmax_transform = A.Compose([
  607. ... A.PixelDistributionAdaptation(
  608. ... transform_type="minmax",
  609. ... blend_ratio=(0.3, 0.5), # Subtle adaptation
  610. ... metadata_key="reference_images",
  611. ... p=1.0
  612. ... )
  613. ... ])
  614. >>>
  615. >>> minmax_result = minmax_transform(
  616. ... image=source_image,
  617. ... reference_images=[reference_image]
  618. ... )
  619. >>> minmax_adapted_image = minmax_result["image"]
  620. >>>
  621. >>> # Example 4: Using multiple reference images
  622. >>> # When multiple reference images are provided, one is randomly selected for each transformation
  623. >>> multiple_references = [
  624. ... reference_image,
  625. ... np.random.normal(180, 25, (100, 100, 3)).clip(0, 255).astype(np.uint8),
  626. ... np.random.normal(120, 40, (100, 100, 3)).clip(0, 255).astype(np.uint8)
  627. ... ]
  628. >>>
  629. >>> multi_ref_transform = A.Compose([
  630. ... A.PixelDistributionAdaptation(p=1.0) # Using default settings
  631. ... ])
  632. >>>
  633. >>> # Each time the transform is applied, it randomly selects one of the reference images
  634. >>> multi_ref_result = multi_ref_transform(
  635. ... image=source_image,
  636. ... pda_metadata=multiple_references # Using the default metadata key
  637. ... )
  638. >>> adapted_image = multi_ref_result["image"]
  639. References:
  640. Qudida: https://github.com/arsenyinfo/qudida
  641. """
  642. class InitSchema(BaseDomainAdaptationInitSchema):
  643. blend_ratio: Annotated[
  644. tuple[float, float],
  645. AfterValidator(nondecreasing),
  646. AfterValidator(check_range_bounds(0, 1)),
  647. ]
  648. transform_type: Literal["pca", "standard", "minmax"]
  649. def __init__(
  650. self,
  651. reference_images: Sequence[Any] | None = None,
  652. blend_ratio: tuple[float, float] = (0.25, 1.0),
  653. read_fn: Callable[[Any], np.ndarray] | None = read_rgb_image,
  654. transform_type: Literal["pca", "standard", "minmax"] = "pca",
  655. metadata_key: str = "pda_metadata",
  656. p: float = 0.5,
  657. ):
  658. super().__init__(reference_images=reference_images, read_fn=read_fn, metadata_key=metadata_key, p=p)
  659. self.blend_ratio = blend_ratio
  660. self.transform_type = transform_type
  661. def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
  662. """Get parameters for the transform."""
  663. reference_image = self._get_reference_image(data)
  664. return {
  665. "reference_image": reference_image,
  666. "blend_ratio": self.py_random.uniform(*self.blend_ratio),
  667. }
  668. def apply(self, img: np.ndarray, reference_image: np.ndarray, blend_ratio: float, **params: Any) -> np.ndarray:
  669. """Apply pixel distribution adaptation to the input image.
  670. Args:
  671. img (np.ndarray): Input image to be transformed
  672. reference_image (np.ndarray): Reference image for distribution adaptation
  673. blend_ratio (float): Blending factor between the original and adapted image
  674. **params (Any): Additional parameters
  675. Returns:
  676. np.ndarray: Transformed image with pixel distribution adapted to the reference image
  677. """
  678. return adapt_pixel_distribution(
  679. img,
  680. ref=reference_image,
  681. weight=blend_ratio,
  682. transform_type=self.transform_type,
  683. )