_qap.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760
  1. import numpy as np
  2. import operator
  3. import warnings
  4. import numbers
  5. from . import (linear_sum_assignment, OptimizeResult)
  6. from ._optimize import _check_unknown_options
  7. from scipy._lib._util import check_random_state
  8. import itertools
  9. QUADRATIC_ASSIGNMENT_METHODS = ['faq', '2opt']
  10. def quadratic_assignment(A, B, method="faq", options=None):
  11. r"""
  12. Approximates solution to the quadratic assignment problem and
  13. the graph matching problem.
  14. Quadratic assignment solves problems of the following form:
  15. .. math::
  16. \min_P & \ {\ \text{trace}(A^T P B P^T)}\\
  17. \mbox{s.t. } & {P \ \epsilon \ \mathcal{P}}\\
  18. where :math:`\mathcal{P}` is the set of all permutation matrices,
  19. and :math:`A` and :math:`B` are square matrices.
  20. Graph matching tries to *maximize* the same objective function.
  21. This algorithm can be thought of as finding the alignment of the
  22. nodes of two graphs that minimizes the number of induced edge
  23. disagreements, or, in the case of weighted graphs, the sum of squared
  24. edge weight differences.
  25. Note that the quadratic assignment problem is NP-hard. The results given
  26. here are approximations and are not guaranteed to be optimal.
  27. Parameters
  28. ----------
  29. A : 2-D array, square
  30. The square matrix :math:`A` in the objective function above.
  31. B : 2-D array, square
  32. The square matrix :math:`B` in the objective function above.
  33. method : str in {'faq', '2opt'} (default: 'faq')
  34. The algorithm used to solve the problem.
  35. :ref:`'faq' <optimize.qap-faq>` (default) and
  36. :ref:`'2opt' <optimize.qap-2opt>` are available.
  37. options : dict, optional
  38. A dictionary of solver options. All solvers support the following:
  39. maximize : bool (default: False)
  40. Maximizes the objective function if ``True``.
  41. partial_match : 2-D array of integers, optional (default: None)
  42. Fixes part of the matching. Also known as a "seed" [2]_.
  43. Each row of `partial_match` specifies a pair of matched nodes:
  44. node ``partial_match[i, 0]`` of `A` is matched to node
  45. ``partial_match[i, 1]`` of `B`. The array has shape ``(m, 2)``,
  46. where ``m`` is not greater than the number of nodes, :math:`n`.
  47. rng : `numpy.random.Generator`, optional
  48. Pseudorandom number generator state. When `rng` is None, a new
  49. `numpy.random.Generator` is created using entropy from the
  50. operating system. Types other than `numpy.random.Generator` are
  51. passed to `numpy.random.default_rng` to instantiate a ``Generator``.
  52. .. versionchanged:: 1.15.0
  53. As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
  54. transition from use of `numpy.random.RandomState` to
  55. `numpy.random.Generator` is occurring. Supplying
  56. `np.random.RandomState` to this function will now emit a
  57. `DeprecationWarning`. In SciPy 1.17 its use will raise an exception.
  58. In addition relying on global state using `np.random.seed`
  59. will emit a `FutureWarning`. In SciPy 1.17 the global random number
  60. generator will no longer be used.
  61. Use of an int-like seed will raise a `FutureWarning`, in SciPy 1.17 it
  62. will be normalized via `np.random.default_rng` rather than
  63. `np.random.RandomState`.
  64. For method-specific options, see
  65. :func:`show_options('quadratic_assignment') <show_options>`.
  66. Returns
  67. -------
  68. res : OptimizeResult
  69. `OptimizeResult` containing the following fields.
  70. col_ind : 1-D array
  71. Column indices corresponding to the best permutation found of the
  72. nodes of `B`.
  73. fun : float
  74. The objective value of the solution.
  75. nit : int
  76. The number of iterations performed during optimization.
  77. Notes
  78. -----
  79. The default method :ref:`'faq' <optimize.qap-faq>` uses the Fast
  80. Approximate QAP algorithm [1]_; it typically offers the best combination of
  81. speed and accuracy.
  82. Method :ref:`'2opt' <optimize.qap-2opt>` can be computationally expensive,
  83. but may be a useful alternative, or it can be used to refine the solution
  84. returned by another method.
  85. References
  86. ----------
  87. .. [1] J.T. Vogelstein, J.M. Conroy, V. Lyzinski, L.J. Podrazik,
  88. S.G. Kratzer, E.T. Harley, D.E. Fishkind, R.J. Vogelstein, and
  89. C.E. Priebe, "Fast approximate quadratic programming for graph
  90. matching," PLOS one, vol. 10, no. 4, p. e0121002, 2015,
  91. :doi:`10.1371/journal.pone.0121002`
  92. .. [2] D. Fishkind, S. Adali, H. Patsolic, L. Meng, D. Singh, V. Lyzinski,
  93. C. Priebe, "Seeded graph matching", Pattern Recognit. 87 (2019):
  94. 203-215, :doi:`10.1016/j.patcog.2018.09.014`
  95. .. [3] "2-opt," Wikipedia.
  96. https://en.wikipedia.org/wiki/2-opt
  97. Examples
  98. --------
  99. >>> import numpy as np
  100. >>> from scipy.optimize import quadratic_assignment
  101. >>> rng = np.random.default_rng()
  102. >>> A = np.array([[0, 80, 150, 170], [80, 0, 130, 100],
  103. ... [150, 130, 0, 120], [170, 100, 120, 0]])
  104. >>> B = np.array([[0, 5, 2, 7], [0, 0, 3, 8],
  105. ... [0, 0, 0, 3], [0, 0, 0, 0]])
  106. >>> res = quadratic_assignment(A, B, options={'rng': rng})
  107. >>> print(res)
  108. fun: 3260
  109. col_ind: [0 3 2 1]
  110. nit: 9
  111. The see the relationship between the returned ``col_ind`` and ``fun``,
  112. use ``col_ind`` to form the best permutation matrix found, then evaluate
  113. the objective function :math:`f(P) = trace(A^T P B P^T )`.
  114. >>> perm = res['col_ind']
  115. >>> P = np.eye(len(A), dtype=int)[perm]
  116. >>> fun = np.trace(A.T @ P @ B @ P.T)
  117. >>> print(fun)
  118. 3260
  119. Alternatively, to avoid constructing the permutation matrix explicitly,
  120. directly permute the rows and columns of the distance matrix.
  121. >>> fun = np.trace(A.T @ B[perm][:, perm])
  122. >>> print(fun)
  123. 3260
  124. Although not guaranteed in general, ``quadratic_assignment`` happens to
  125. have found the globally optimal solution.
  126. >>> from itertools import permutations
  127. >>> perm_opt, fun_opt = None, np.inf
  128. >>> for perm in permutations([0, 1, 2, 3]):
  129. ... perm = np.array(perm)
  130. ... fun = np.trace(A.T @ B[perm][:, perm])
  131. ... if fun < fun_opt:
  132. ... fun_opt, perm_opt = fun, perm
  133. >>> print(np.array_equal(perm_opt, res['col_ind']))
  134. True
  135. Here is an example for which the default method,
  136. :ref:`'faq' <optimize.qap-faq>`, does not find the global optimum.
  137. >>> A = np.array([[0, 5, 8, 6], [5, 0, 5, 1],
  138. ... [8, 5, 0, 2], [6, 1, 2, 0]])
  139. >>> B = np.array([[0, 1, 8, 4], [1, 0, 5, 2],
  140. ... [8, 5, 0, 5], [4, 2, 5, 0]])
  141. >>> res = quadratic_assignment(A, B, options={'rng': rng})
  142. >>> print(res)
  143. fun: 178
  144. col_ind: [1 0 3 2]
  145. nit: 13
  146. If accuracy is important, consider using :ref:`'2opt' <optimize.qap-2opt>`
  147. to refine the solution.
  148. >>> guess = np.array([np.arange(len(A)), res.col_ind]).T
  149. >>> res = quadratic_assignment(A, B, method="2opt",
  150. ... options = {'rng': rng, 'partial_guess': guess})
  151. >>> print(res)
  152. fun: 176
  153. col_ind: [1 2 3 0]
  154. nit: 17
  155. """
  156. if options is None:
  157. options = {}
  158. method = method.lower()
  159. methods = {"faq": _quadratic_assignment_faq,
  160. "2opt": _quadratic_assignment_2opt}
  161. if method not in methods:
  162. raise ValueError(f"method {method} must be in {methods}.")
  163. _spec007_transition(options.get("rng", None))
  164. res = methods[method](A, B, **options)
  165. return res
  166. def _spec007_transition(rng):
  167. if isinstance(rng, np.random.RandomState):
  168. warnings.warn(
  169. "Use of `RandomState` with `quadratic_assignment` is deprecated"
  170. " and will result in an exception in SciPy 1.17",
  171. DeprecationWarning,
  172. stacklevel=2
  173. )
  174. if ((rng is None or rng is np.random) and
  175. np.random.mtrand._rand._bit_generator._seed_seq is None):
  176. warnings.warn(
  177. "The NumPy global RNG was seeded by calling `np.random.seed`."
  178. " From SciPy 1.17, this function will no longer use the global RNG.",
  179. FutureWarning,
  180. stacklevel=2
  181. )
  182. if isinstance(rng, numbers.Integral | np.integer):
  183. warnings.warn(
  184. "The behavior when the rng option is an integer is changing: the value"
  185. " will be normalized using np.random.default_rng beginning in SciPy 1.17,"
  186. " and the resulting Generator will be used to generate random numbers.",
  187. FutureWarning,
  188. stacklevel=2
  189. )
  190. def _calc_score(A, B, perm):
  191. # equivalent to objective function but avoids matmul
  192. return np.sum(A * B[perm][:, perm])
  193. def _common_input_validation(A, B, partial_match):
  194. A = np.atleast_2d(A)
  195. B = np.atleast_2d(B)
  196. if partial_match is None:
  197. partial_match = np.array([[], []]).T
  198. partial_match = np.atleast_2d(partial_match).astype(int)
  199. msg = None
  200. if A.shape[0] != A.shape[1]:
  201. msg = "`A` must be square"
  202. elif B.shape[0] != B.shape[1]:
  203. msg = "`B` must be square"
  204. elif A.ndim != 2 or B.ndim != 2:
  205. msg = "`A` and `B` must have exactly two dimensions"
  206. elif A.shape != B.shape:
  207. msg = "`A` and `B` matrices must be of equal size"
  208. elif partial_match.shape[0] > A.shape[0]:
  209. msg = "`partial_match` can have only as many seeds as there are nodes"
  210. elif partial_match.shape[1] != 2:
  211. msg = "`partial_match` must have two columns"
  212. elif partial_match.ndim != 2:
  213. msg = "`partial_match` must have exactly two dimensions"
  214. elif (partial_match < 0).any():
  215. msg = "`partial_match` must contain only positive indices"
  216. elif (partial_match >= len(A)).any():
  217. msg = "`partial_match` entries must be less than number of nodes"
  218. elif (not len(set(partial_match[:, 0])) == len(partial_match[:, 0]) or
  219. not len(set(partial_match[:, 1])) == len(partial_match[:, 1])):
  220. msg = "`partial_match` column entries must be unique"
  221. if msg is not None:
  222. raise ValueError(msg)
  223. return A, B, partial_match
  224. def _quadratic_assignment_faq(A, B,
  225. maximize=False, partial_match=None, rng=None,
  226. P0="barycenter", shuffle_input=False, maxiter=30,
  227. tol=0.03, **unknown_options):
  228. r"""Solve the quadratic assignment problem (approximately).
  229. This function solves the Quadratic Assignment Problem (QAP) and the
  230. Graph Matching Problem (GMP) using the Fast Approximate QAP Algorithm
  231. (FAQ) [1]_.
  232. Quadratic assignment solves problems of the following form:
  233. .. math::
  234. \min_P & \ {\ \text{trace}(A^T P B P^T)}\\
  235. \mbox{s.t. } & {P \ \epsilon \ \mathcal{P}}\\
  236. where :math:`\mathcal{P}` is the set of all permutation matrices,
  237. and :math:`A` and :math:`B` are square matrices.
  238. Graph matching tries to *maximize* the same objective function.
  239. This algorithm can be thought of as finding the alignment of the
  240. nodes of two graphs that minimizes the number of induced edge
  241. disagreements, or, in the case of weighted graphs, the sum of squared
  242. edge weight differences.
  243. Note that the quadratic assignment problem is NP-hard. The results given
  244. here are approximations and are not guaranteed to be optimal.
  245. Parameters
  246. ----------
  247. A : 2-D array, square
  248. The square matrix :math:`A` in the objective function above.
  249. B : 2-D array, square
  250. The square matrix :math:`B` in the objective function above.
  251. method : str in {'faq', '2opt'} (default: 'faq')
  252. The algorithm used to solve the problem. This is the method-specific
  253. documentation for 'faq'.
  254. :ref:`'2opt' <optimize.qap-2opt>` is also available.
  255. Options
  256. -------
  257. maximize : bool (default: False)
  258. Maximizes the objective function if ``True``.
  259. partial_match : 2-D array of integers, optional (default: None)
  260. Fixes part of the matching. Also known as a "seed" [2]_.
  261. Each row of `partial_match` specifies a pair of matched nodes:
  262. node ``partial_match[i, 0]`` of `A` is matched to node
  263. ``partial_match[i, 1]`` of `B`. The array has shape ``(m, 2)``, where
  264. ``m`` is not greater than the number of nodes, :math:`n`.
  265. rng : {None, int, `numpy.random.Generator`}, optional
  266. Pseudorandom number generator state. See `quadratic_assignment` for details.
  267. P0 : 2-D array, "barycenter", or "randomized" (default: "barycenter")
  268. Initial position. Must be a doubly-stochastic matrix [3]_.
  269. If the initial position is an array, it must be a doubly stochastic
  270. matrix of size :math:`m' \times m'` where :math:`m' = n - m`.
  271. If ``"barycenter"`` (default), the initial position is the barycenter
  272. of the Birkhoff polytope (the space of doubly stochastic matrices).
  273. This is a :math:`m' \times m'` matrix with all entries equal to
  274. :math:`1 / m'`.
  275. If ``"randomized"`` the initial search position is
  276. :math:`P_0 = (J + K) / 2`, where :math:`J` is the barycenter and
  277. :math:`K` is a random doubly stochastic matrix.
  278. shuffle_input : bool (default: False)
  279. Set to `True` to resolve degenerate gradients randomly. For
  280. non-degenerate gradients this option has no effect.
  281. maxiter : int, positive (default: 30)
  282. Integer specifying the max number of Frank-Wolfe iterations performed.
  283. tol : float (default: 0.03)
  284. Tolerance for termination. Frank-Wolfe iteration terminates when
  285. :math:`\frac{||P_{i}-P_{i+1}||_F}{\sqrt{m'}} \leq tol`,
  286. where :math:`i` is the iteration number.
  287. Returns
  288. -------
  289. res : OptimizeResult
  290. `OptimizeResult` containing the following fields.
  291. col_ind : 1-D array
  292. Column indices corresponding to the best permutation found of the
  293. nodes of `B`.
  294. fun : float
  295. The objective value of the solution.
  296. nit : int
  297. The number of Frank-Wolfe iterations performed.
  298. Notes
  299. -----
  300. The algorithm may be sensitive to the initial permutation matrix (or
  301. search "position") due to the possibility of several local minima
  302. within the feasible region. A barycenter initialization is more likely to
  303. result in a better solution than a single random initialization. However,
  304. calling ``quadratic_assignment`` several times with different random
  305. initializations may result in a better optimum at the cost of longer
  306. total execution time.
  307. Examples
  308. --------
  309. As mentioned above, a barycenter initialization often results in a better
  310. solution than a single random initialization.
  311. >>> from scipy.optimize import quadratic_assignment
  312. >>> import numpy as np
  313. >>> rng = np.random.default_rng()
  314. >>> n = 15
  315. >>> A = rng.random((n, n))
  316. >>> B = rng.random((n, n))
  317. >>> options = {"rng": rng}
  318. >>> res = quadratic_assignment(A, B, options=options) # FAQ is default method
  319. >>> print(res.fun)
  320. 47.797048706380636 # may vary
  321. >>> options = {"rng": rng, "P0": "randomized"} # use randomized initialization
  322. >>> res = quadratic_assignment(A, B, options=options)
  323. >>> print(res.fun)
  324. 47.37287069769966 # may vary
  325. However, consider running from several randomized initializations and
  326. keeping the best result.
  327. >>> res = min([quadratic_assignment(A, B, options=options)
  328. ... for i in range(30)], key=lambda x: x.fun)
  329. >>> print(res.fun)
  330. 46.55974835248574 # may vary
  331. The '2-opt' method can be used to attempt to refine the results.
  332. >>> options = {"partial_guess": np.array([np.arange(n), res.col_ind]).T, "rng": rng}
  333. >>> res = quadratic_assignment(A, B, method="2opt", options=options)
  334. >>> print(res.fun)
  335. 46.55974835248574 # may vary
  336. References
  337. ----------
  338. .. [1] J.T. Vogelstein, J.M. Conroy, V. Lyzinski, L.J. Podrazik,
  339. S.G. Kratzer, E.T. Harley, D.E. Fishkind, R.J. Vogelstein, and
  340. C.E. Priebe, "Fast approximate quadratic programming for graph
  341. matching," PLOS one, vol. 10, no. 4, p. e0121002, 2015,
  342. :doi:`10.1371/journal.pone.0121002`
  343. .. [2] D. Fishkind, S. Adali, H. Patsolic, L. Meng, D. Singh, V. Lyzinski,
  344. C. Priebe, "Seeded graph matching", Pattern Recognit. 87 (2019):
  345. 203-215, :doi:`10.1016/j.patcog.2018.09.014`
  346. .. [3] "Doubly stochastic Matrix," Wikipedia.
  347. https://en.wikipedia.org/wiki/Doubly_stochastic_matrix
  348. """
  349. _check_unknown_options(unknown_options)
  350. maxiter = operator.index(maxiter)
  351. # ValueError check
  352. A, B, partial_match = _common_input_validation(A, B, partial_match)
  353. msg = None
  354. if isinstance(P0, str) and P0 not in {'barycenter', 'randomized'}:
  355. msg = "Invalid 'P0' parameter string"
  356. elif maxiter <= 0:
  357. msg = "'maxiter' must be a positive integer"
  358. elif tol <= 0:
  359. msg = "'tol' must be a positive float"
  360. if msg is not None:
  361. raise ValueError(msg)
  362. rng = check_random_state(rng)
  363. n = len(A) # number of vertices in graphs
  364. n_seeds = len(partial_match) # number of seeds
  365. n_unseed = n - n_seeds
  366. # [1] Algorithm 1 Line 1 - choose initialization
  367. if not isinstance(P0, str):
  368. P0 = np.atleast_2d(P0)
  369. if P0.shape != (n_unseed, n_unseed):
  370. msg = "`P0` matrix must have shape m' x m', where m'=n-m"
  371. elif ((P0 < 0).any() or not np.allclose(np.sum(P0, axis=0), 1)
  372. or not np.allclose(np.sum(P0, axis=1), 1)):
  373. msg = "`P0` matrix must be doubly stochastic"
  374. if msg is not None:
  375. raise ValueError(msg)
  376. elif P0 == 'barycenter':
  377. P0 = np.ones((n_unseed, n_unseed)) / n_unseed
  378. elif P0 == 'randomized':
  379. J = np.ones((n_unseed, n_unseed)) / n_unseed
  380. # generate a nxn matrix where each entry is a random number [0, 1]
  381. # would use rand, but Generators don't have it
  382. # would use random, but old mtrand.RandomStates don't have it
  383. K = _doubly_stochastic(rng.uniform(size=(n_unseed, n_unseed)))
  384. P0 = (J + K) / 2
  385. # check trivial cases
  386. if n == 0 or n_seeds == n:
  387. score = _calc_score(A, B, partial_match[:, 1])
  388. res = {"col_ind": partial_match[:, 1], "fun": score, "nit": 0}
  389. return OptimizeResult(res)
  390. obj_func_scalar = 1
  391. if maximize:
  392. obj_func_scalar = -1
  393. nonseed_B = np.setdiff1d(range(n), partial_match[:, 1])
  394. if shuffle_input:
  395. nonseed_B = rng.permutation(nonseed_B)
  396. nonseed_A = np.setdiff1d(range(n), partial_match[:, 0])
  397. perm_A = np.concatenate([partial_match[:, 0], nonseed_A])
  398. perm_B = np.concatenate([partial_match[:, 1], nonseed_B])
  399. # definitions according to Seeded Graph Matching [2].
  400. A11, A12, A21, A22 = _split_matrix(A[perm_A][:, perm_A], n_seeds)
  401. B11, B12, B21, B22 = _split_matrix(B[perm_B][:, perm_B], n_seeds)
  402. const_sum = A21 @ B21.T + A12.T @ B12
  403. P = P0
  404. # [1] Algorithm 1 Line 2 - loop while stopping criteria not met
  405. for n_iter in range(1, maxiter+1):
  406. # [1] Algorithm 1 Line 3 - compute the gradient of f(P) = -tr(APB^tP^t)
  407. grad_fp = (const_sum + A22 @ P @ B22.T + A22.T @ P @ B22)
  408. # [1] Algorithm 1 Line 4 - get direction Q by solving Eq. 8
  409. _, cols = linear_sum_assignment(grad_fp, maximize=maximize)
  410. Q = np.eye(n_unseed)[cols]
  411. # [1] Algorithm 1 Line 5 - compute the step size
  412. # Noting that e.g. trace(Ax) = trace(A)*x, expand and re-collect
  413. # terms as ax**2 + bx + c. c does not affect location of minimum
  414. # and can be ignored. Also, note that trace(A@B) = (A.T*B).sum();
  415. # apply where possible for efficiency.
  416. R = P - Q
  417. b21 = ((R.T @ A21) * B21).sum()
  418. b12 = ((R.T @ A12.T) * B12.T).sum()
  419. AR22 = A22.T @ R
  420. BR22 = B22 @ R.T
  421. b22a = (AR22 * B22.T[cols]).sum()
  422. b22b = (A22 * BR22[cols]).sum()
  423. a = (AR22.T * BR22).sum()
  424. b = b21 + b12 + b22a + b22b
  425. # critical point of ax^2 + bx + c is at x = -d/(2*e)
  426. # if a * obj_func_scalar > 0, it is a minimum
  427. # if minimum is not in [0, 1], only endpoints need to be considered
  428. if a*obj_func_scalar > 0 and 0 <= -b/(2*a) <= 1:
  429. alpha = -b/(2*a)
  430. else:
  431. alpha = np.argmin([0, (b + a)*obj_func_scalar])
  432. # [1] Algorithm 1 Line 6 - Update P
  433. P_i1 = alpha * P + (1 - alpha) * Q
  434. if np.linalg.norm(P - P_i1) / np.sqrt(n_unseed) < tol:
  435. P = P_i1
  436. break
  437. P = P_i1
  438. # [1] Algorithm 1 Line 7 - end main loop
  439. # [1] Algorithm 1 Line 8 - project onto the set of permutation matrices
  440. _, col = linear_sum_assignment(P, maximize=True)
  441. perm = np.concatenate((np.arange(n_seeds), col + n_seeds))
  442. unshuffled_perm = np.zeros(n, dtype=int)
  443. unshuffled_perm[perm_A] = perm_B[perm]
  444. score = _calc_score(A, B, unshuffled_perm)
  445. res = {"col_ind": unshuffled_perm, "fun": score, "nit": n_iter}
  446. return OptimizeResult(res)
  447. def _split_matrix(X, n):
  448. # definitions according to Seeded Graph Matching [2].
  449. upper, lower = X[:n], X[n:]
  450. return upper[:, :n], upper[:, n:], lower[:, :n], lower[:, n:]
  451. def _doubly_stochastic(P, tol=1e-3):
  452. # Adapted from @btaba implementation
  453. # https://github.com/btaba/sinkhorn_knopp
  454. # of Sinkhorn-Knopp algorithm
  455. # https://projecteuclid.org/euclid.pjm/1102992505
  456. max_iter = 1000
  457. c = 1 / P.sum(axis=0)
  458. r = 1 / (P @ c)
  459. P_eps = P
  460. for it in range(max_iter):
  461. if ((np.abs(P_eps.sum(axis=1) - 1) < tol).all() and
  462. (np.abs(P_eps.sum(axis=0) - 1) < tol).all()):
  463. # All column/row sums ~= 1 within threshold
  464. break
  465. c = 1 / (r @ P)
  466. r = 1 / (P @ c)
  467. P_eps = r[:, None] * P * c
  468. return P_eps
  469. def _quadratic_assignment_2opt(A, B, maximize=False, rng=None,
  470. partial_match=None,
  471. partial_guess=None,
  472. **unknown_options):
  473. r"""Solve the quadratic assignment problem (approximately).
  474. This function solves the Quadratic Assignment Problem (QAP) and the
  475. Graph Matching Problem (GMP) using the 2-opt algorithm [1]_.
  476. Quadratic assignment solves problems of the following form:
  477. .. math::
  478. \min_P & \ {\ \text{trace}(A^T P B P^T)}\\
  479. \mbox{s.t. } & {P \ \epsilon \ \mathcal{P}}\\
  480. where :math:`\mathcal{P}` is the set of all permutation matrices,
  481. and :math:`A` and :math:`B` are square matrices.
  482. Graph matching tries to *maximize* the same objective function.
  483. This algorithm can be thought of as finding the alignment of the
  484. nodes of two graphs that minimizes the number of induced edge
  485. disagreements, or, in the case of weighted graphs, the sum of squared
  486. edge weight differences.
  487. Note that the quadratic assignment problem is NP-hard. The results given
  488. here are approximations and are not guaranteed to be optimal.
  489. Parameters
  490. ----------
  491. A : 2-D array, square
  492. The square matrix :math:`A` in the objective function above.
  493. B : 2-D array, square
  494. The square matrix :math:`B` in the objective function above.
  495. method : str in {'faq', '2opt'} (default: 'faq')
  496. The algorithm used to solve the problem. This is the method-specific
  497. documentation for '2opt'.
  498. :ref:`'faq' <optimize.qap-faq>` is also available.
  499. Options
  500. -------
  501. maximize : bool (default: False)
  502. Maximizes the objective function if ``True``.
  503. rng : {None, int, `numpy.random.Generator`}, optional
  504. Pseudorandom number generator state. See `quadratic_assignment` for details.
  505. partial_match : 2-D array of integers, optional (default: None)
  506. Fixes part of the matching. Also known as a "seed" [2]_.
  507. Each row of `partial_match` specifies a pair of matched nodes: node
  508. ``partial_match[i, 0]`` of `A` is matched to node
  509. ``partial_match[i, 1]`` of `B`. The array has shape ``(m, 2)``,
  510. where ``m`` is not greater than the number of nodes, :math:`n`.
  511. .. note::
  512. `partial_match` must be sorted by the first column.
  513. partial_guess : 2-D array of integers, optional (default: None)
  514. A guess for the matching between the two matrices. Unlike
  515. `partial_match`, `partial_guess` does not fix the indices; they are
  516. still free to be optimized.
  517. Each row of `partial_guess` specifies a pair of matched nodes: node
  518. ``partial_guess[i, 0]`` of `A` is matched to node
  519. ``partial_guess[i, 1]`` of `B`. The array has shape ``(m, 2)``,
  520. where ``m`` is not greater than the number of nodes, :math:`n`.
  521. .. note::
  522. `partial_guess` must be sorted by the first column.
  523. Returns
  524. -------
  525. res : OptimizeResult
  526. `OptimizeResult` containing the following fields.
  527. col_ind : 1-D array
  528. Column indices corresponding to the best permutation found of the
  529. nodes of `B`.
  530. fun : float
  531. The objective value of the solution.
  532. nit : int
  533. The number of iterations performed during optimization.
  534. Notes
  535. -----
  536. This is a greedy algorithm that works similarly to bubble sort: beginning
  537. with an initial permutation, it iteratively swaps pairs of indices to
  538. improve the objective function until no such improvements are possible.
  539. References
  540. ----------
  541. .. [1] "2-opt," Wikipedia.
  542. https://en.wikipedia.org/wiki/2-opt
  543. .. [2] D. Fishkind, S. Adali, H. Patsolic, L. Meng, D. Singh, V. Lyzinski,
  544. C. Priebe, "Seeded graph matching", Pattern Recognit. 87 (2019):
  545. 203-215, https://doi.org/10.1016/j.patcog.2018.09.014
  546. """
  547. _check_unknown_options(unknown_options)
  548. rng = check_random_state(rng)
  549. A, B, partial_match = _common_input_validation(A, B, partial_match)
  550. N = len(A)
  551. # check trivial cases
  552. if N == 0 or partial_match.shape[0] == N:
  553. score = _calc_score(A, B, partial_match[:, 1])
  554. res = {"col_ind": partial_match[:, 1], "fun": score, "nit": 0}
  555. return OptimizeResult(res)
  556. if partial_guess is None:
  557. partial_guess = np.array([[], []]).T
  558. partial_guess = np.atleast_2d(partial_guess).astype(int)
  559. msg = None
  560. if partial_guess.shape[0] > A.shape[0]:
  561. msg = ("`partial_guess` can have only as "
  562. "many entries as there are nodes")
  563. elif partial_guess.shape[1] != 2:
  564. msg = "`partial_guess` must have two columns"
  565. elif partial_guess.ndim != 2:
  566. msg = "`partial_guess` must have exactly two dimensions"
  567. elif (partial_guess < 0).any():
  568. msg = "`partial_guess` must contain only positive indices"
  569. elif (partial_guess >= len(A)).any():
  570. msg = "`partial_guess` entries must be less than number of nodes"
  571. elif (not len(set(partial_guess[:, 0])) == len(partial_guess[:, 0]) or
  572. not len(set(partial_guess[:, 1])) == len(partial_guess[:, 1])):
  573. msg = "`partial_guess` column entries must be unique"
  574. if msg is not None:
  575. raise ValueError(msg)
  576. fixed_rows = None
  577. if partial_match.size or partial_guess.size:
  578. # use partial_match and partial_guess for initial permutation,
  579. # but randomly permute the rest.
  580. guess_rows = np.zeros(N, dtype=bool)
  581. guess_cols = np.zeros(N, dtype=bool)
  582. fixed_rows = np.zeros(N, dtype=bool)
  583. fixed_cols = np.zeros(N, dtype=bool)
  584. perm = np.zeros(N, dtype=int)
  585. rg, cg = partial_guess.T
  586. guess_rows[rg] = True
  587. guess_cols[cg] = True
  588. perm[guess_rows] = cg
  589. # match overrides guess
  590. rf, cf = partial_match.T
  591. fixed_rows[rf] = True
  592. fixed_cols[cf] = True
  593. perm[fixed_rows] = cf
  594. random_rows = ~fixed_rows & ~guess_rows
  595. random_cols = ~fixed_cols & ~guess_cols
  596. perm[random_rows] = rng.permutation(np.arange(N)[random_cols])
  597. else:
  598. perm = rng.permutation(np.arange(N))
  599. best_score = _calc_score(A, B, perm)
  600. i_free = np.arange(N)
  601. if fixed_rows is not None:
  602. i_free = i_free[~fixed_rows]
  603. better = operator.gt if maximize else operator.lt
  604. n_iter = 0
  605. done = False
  606. while not done:
  607. # equivalent to nested for loops i in range(N), j in range(i, N)
  608. for i, j in itertools.combinations_with_replacement(i_free, 2):
  609. n_iter += 1
  610. perm[i], perm[j] = perm[j], perm[i]
  611. score = _calc_score(A, B, perm)
  612. if better(score, best_score):
  613. best_score = score
  614. break
  615. # faster to swap back than to create a new list every time
  616. perm[i], perm[j] = perm[j], perm[i]
  617. else: # no swaps made
  618. done = True
  619. res = {"col_ind": perm, "fun": best_score, "nit": n_iter}
  620. return OptimizeResult(res)