qs.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. from math import exp, log
  2. from sympy.core.random import _randint
  3. from sympy.external.gmpy import bit_scan1, gcd, invert, sqrt as isqrt
  4. from sympy.ntheory.factor_ import _perfect_power
  5. from sympy.ntheory.primetest import isprime
  6. from sympy.ntheory.residue_ntheory import _sqrt_mod_prime_power
  7. class SievePolynomial:
  8. def __init__(self, a, b, N):
  9. """This class denotes the sieve polynomial.
  10. Provide methods to compute `(a*x + b)**2 - N` and
  11. `a*x + b` when given `x`.
  12. Parameters
  13. ==========
  14. a : parameter of the sieve polynomial
  15. b : parameter of the sieve polynomial
  16. N : number to be factored
  17. """
  18. self.a = a
  19. self.b = b
  20. self.a2 = a**2
  21. self.ab = 2*a*b
  22. self.b2 = b**2 - N
  23. def eval_u(self, x):
  24. return self.a*x + self.b
  25. def eval_v(self, x):
  26. return (self.a2*x + self.ab)*x + self.b2
  27. class FactorBaseElem:
  28. """This class stores an element of the `factor_base`.
  29. """
  30. def __init__(self, prime, tmem_p, log_p):
  31. """
  32. Initialization of factor_base_elem.
  33. Parameters
  34. ==========
  35. prime : prime number of the factor_base
  36. tmem_p : Integer square root of x**2 = n mod prime
  37. log_p : Compute Natural Logarithm of the prime
  38. """
  39. self.prime = prime
  40. self.tmem_p = tmem_p
  41. self.log_p = log_p
  42. # `soln1` and `soln2` are solutions to
  43. # the equation `(a*x + b)**2 - N = 0 (mod p)`.
  44. self.soln1 = None
  45. self.soln2 = None
  46. self.b_ainv = None
  47. def _generate_factor_base(prime_bound, n):
  48. """Generate `factor_base` for Quadratic Sieve. The `factor_base`
  49. consists of all the points whose ``legendre_symbol(n, p) == 1``
  50. and ``p < num_primes``. Along with the prime `factor_base` also stores
  51. natural logarithm of prime and the residue n modulo p.
  52. It also returns the of primes numbers in the `factor_base` which are
  53. close to 1000 and 5000.
  54. Parameters
  55. ==========
  56. prime_bound : upper prime bound of the factor_base
  57. n : integer to be factored
  58. """
  59. from sympy.ntheory.generate import sieve
  60. factor_base = []
  61. idx_1000, idx_5000 = None, None
  62. for prime in sieve.primerange(1, prime_bound):
  63. if pow(n, (prime - 1) // 2, prime) == 1:
  64. if prime > 1000 and idx_1000 is None:
  65. idx_1000 = len(factor_base) - 1
  66. if prime > 5000 and idx_5000 is None:
  67. idx_5000 = len(factor_base) - 1
  68. residue = _sqrt_mod_prime_power(n, prime, 1)[0]
  69. log_p = round(log(prime)*2**10)
  70. factor_base.append(FactorBaseElem(prime, residue, log_p))
  71. return idx_1000, idx_5000, factor_base
  72. def _generate_polynomial(N, M, factor_base, idx_1000, idx_5000, randint):
  73. """ Generate sieve polynomials indefinitely.
  74. Information such as `soln1` in the `factor_base` associated with
  75. the polynomial is modified in place.
  76. Parameters
  77. ==========
  78. N : Number to be factored
  79. M : sieve interval
  80. factor_base : factor_base primes
  81. idx_1000 : index of prime number in the factor_base near 1000
  82. idx_5000 : index of prime number in the factor_base near to 5000
  83. randint : A callable that takes two integers (a, b) and returns a random integer
  84. n such that a <= n <= b, similar to `random.randint`.
  85. """
  86. approx_val = log(2*N)/2 - log(M)
  87. start = idx_1000 or 0
  88. end = idx_5000 or (len(factor_base) - 1)
  89. while True:
  90. # Choose `a` that is close to `sqrt(2*N) / M`
  91. best_a, best_q, best_ratio = None, None, None
  92. for _ in range(50):
  93. a = 1
  94. q = []
  95. while log(a) < approx_val:
  96. rand_p = 0
  97. while(rand_p == 0 or rand_p in q):
  98. rand_p = randint(start, end)
  99. p = factor_base[rand_p].prime
  100. a *= p
  101. q.append(rand_p)
  102. ratio = exp(log(a) - approx_val)
  103. if best_ratio is None or abs(ratio - 1) < abs(best_ratio - 1):
  104. best_q = q
  105. best_a = a
  106. best_ratio = ratio
  107. # Set `b` using the Chinese remainder theorem
  108. a = best_a
  109. q = best_q
  110. B = []
  111. for val in q:
  112. q_l = factor_base[val].prime
  113. gamma = factor_base[val].tmem_p * invert(a // q_l, q_l) % q_l
  114. if 2*gamma > q_l:
  115. gamma = q_l - gamma
  116. B.append(a//q_l*gamma)
  117. b = sum(B)
  118. g = SievePolynomial(a, b, N)
  119. for fb in factor_base:
  120. if a % fb.prime == 0:
  121. fb.soln1 = None
  122. continue
  123. a_inv = invert(a, fb.prime)
  124. fb.b_ainv = [2*b_elem*a_inv % fb.prime for b_elem in B]
  125. fb.soln1 = (a_inv*(fb.tmem_p - b)) % fb.prime
  126. fb.soln2 = (a_inv*(-fb.tmem_p - b)) % fb.prime
  127. yield g
  128. # Update `b` with Gray code
  129. for i in range(1, 2**(len(B)-1)):
  130. v = bit_scan1(i)
  131. neg_pow = 2*((i >> (v + 1)) % 2) - 1
  132. b = g.b + 2*neg_pow*B[v]
  133. a = g.a
  134. g = SievePolynomial(a, b, N)
  135. for fb in factor_base:
  136. if fb.soln1 is None:
  137. continue
  138. fb.soln1 = (fb.soln1 - neg_pow*fb.b_ainv[v]) % fb.prime
  139. fb.soln2 = (fb.soln2 - neg_pow*fb.b_ainv[v]) % fb.prime
  140. yield g
  141. def _gen_sieve_array(M, factor_base):
  142. """Sieve Stage of the Quadratic Sieve. For every prime in the factor_base
  143. that does not divide the coefficient `a` we add log_p over the sieve_array
  144. such that ``-M <= soln1 + i*p <= M`` and ``-M <= soln2 + i*p <= M`` where `i`
  145. is an integer. When p = 2 then log_p is only added using
  146. ``-M <= soln1 + i*p <= M``.
  147. Parameters
  148. ==========
  149. M : sieve interval
  150. factor_base : factor_base primes
  151. """
  152. sieve_array = [0]*(2*M + 1)
  153. for factor in factor_base:
  154. if factor.soln1 is None: #The prime does not divides a
  155. continue
  156. for idx in range((M + factor.soln1) % factor.prime, 2*M, factor.prime):
  157. sieve_array[idx] += factor.log_p
  158. if factor.prime == 2:
  159. continue
  160. #if prime is 2 then sieve only with soln_1_p
  161. for idx in range((M + factor.soln2) % factor.prime, 2*M, factor.prime):
  162. sieve_array[idx] += factor.log_p
  163. return sieve_array
  164. def _check_smoothness(num, factor_base):
  165. r""" Check if `num` is smooth with respect to the given `factor_base`
  166. and compute its factorization vector.
  167. Parameters
  168. ==========
  169. num : integer whose smootheness is to be checked
  170. factor_base : factor_base primes
  171. """
  172. if num < 0:
  173. num *= -1
  174. vec = 1
  175. else:
  176. vec = 0
  177. for i, fb in enumerate(factor_base, 1):
  178. if num % fb.prime:
  179. continue
  180. e = 1
  181. num //= fb.prime
  182. while num % fb.prime == 0:
  183. e += 1
  184. num //= fb.prime
  185. if e % 2:
  186. vec += 1 << i
  187. return vec, num
  188. def _trial_division_stage(N, M, factor_base, sieve_array, sieve_poly, partial_relations, ERROR_TERM):
  189. """Trial division stage. Here we trial divide the values generetated
  190. by sieve_poly in the sieve interval and if it is a smooth number then
  191. it is stored in `smooth_relations`. Moreover, if we find two partial relations
  192. with same large prime then they are combined to form a smooth relation.
  193. First we iterate over sieve array and look for values which are greater
  194. than accumulated_val, as these values have a high chance of being smooth
  195. number. Then using these values we find smooth relations.
  196. In general, let ``t**2 = u*p modN`` and ``r**2 = v*p modN`` be two partial relations
  197. with the same large prime p. Then they can be combined ``(t*r/p)**2 = u*v modN``
  198. to form a smooth relation.
  199. Parameters
  200. ==========
  201. N : Number to be factored
  202. M : sieve interval
  203. factor_base : factor_base primes
  204. sieve_array : stores log_p values
  205. sieve_poly : polynomial from which we find smooth relations
  206. partial_relations : stores partial relations with one large prime
  207. ERROR_TERM : error term for accumulated_val
  208. """
  209. accumulated_val = (log(M) + log(N)/2 - ERROR_TERM) * 2**10
  210. smooth_relations = []
  211. proper_factor = set()
  212. partial_relation_upper_bound = 128*factor_base[-1].prime
  213. for x, val in enumerate(sieve_array, -M):
  214. if val < accumulated_val:
  215. continue
  216. v = sieve_poly.eval_v(x)
  217. vec, num = _check_smoothness(v, factor_base)
  218. if num == 1:
  219. smooth_relations.append((sieve_poly.eval_u(x), v, vec))
  220. elif num < partial_relation_upper_bound and isprime(num):
  221. if N % num == 0:
  222. proper_factor.add(num)
  223. continue
  224. u = sieve_poly.eval_u(x)
  225. if num in partial_relations:
  226. u_prev, v_prev, vec_prev = partial_relations.pop(num)
  227. u = u*u_prev*invert(num, N) % N
  228. v = v*v_prev // num**2
  229. vec ^= vec_prev
  230. smooth_relations.append((u, v, vec))
  231. else:
  232. partial_relations[num] = (u, v, vec)
  233. return smooth_relations, proper_factor
  234. def _find_factor(N, smooth_relations, col):
  235. """ Finds proper factor of N using fast gaussian reduction for modulo 2 matrix.
  236. Parameters
  237. ==========
  238. N : Number to be factored
  239. smooth_relations : Smooth relations vectors matrix
  240. col : Number of columns in the matrix
  241. Reference
  242. ==========
  243. .. [1] A fast algorithm for gaussian elimination over GF(2) and
  244. its implementation on the GAPP. Cetin K.Koc, Sarath N.Arachchige
  245. """
  246. matrix = [s_relation[2] for s_relation in smooth_relations]
  247. row = len(matrix)
  248. mark = [False] * row
  249. for pos in range(col):
  250. m = 1 << pos
  251. for i in range(row):
  252. if p := matrix[i] & m:
  253. add_col = p ^ matrix[i]
  254. matrix[i] = m
  255. mark[i] = True
  256. for j in range(i + 1, row):
  257. if matrix[j] & m:
  258. matrix[j] ^= add_col
  259. break
  260. for m, mat, rel in zip(mark, matrix, smooth_relations):
  261. if m:
  262. continue
  263. u, v = rel[0], rel[1]
  264. for m1, mat1, rel1 in zip(mark, matrix, smooth_relations):
  265. if m1 and mat & mat1:
  266. u *= rel1[0]
  267. v *= rel1[1]
  268. # assert is_square(v)
  269. v = isqrt(v)
  270. if 1 < (g := gcd(u - v, N)) < N:
  271. yield g
  272. def qs(N, prime_bound, M, ERROR_TERM=25, seed=1234):
  273. """Performs factorization using Self-Initializing Quadratic Sieve.
  274. In SIQS, let N be a number to be factored, and this N should not be a
  275. perfect power. If we find two integers such that ``X**2 = Y**2 modN`` and
  276. ``X != +-Y modN``, then `gcd(X + Y, N)` will reveal a proper factor of N.
  277. In order to find these integers X and Y we try to find relations of form
  278. t**2 = u modN where u is a product of small primes. If we have enough of
  279. these relations then we can form ``(t1*t2...ti)**2 = u1*u2...ui modN`` such that
  280. the right hand side is a square, thus we found a relation of ``X**2 = Y**2 modN``.
  281. Here, several optimizations are done like using multiple polynomials for
  282. sieving, fast changing between polynomials and using partial relations.
  283. The use of partial relations can speeds up the factoring by 2 times.
  284. Parameters
  285. ==========
  286. N : Number to be Factored
  287. prime_bound : upper bound for primes in the factor base
  288. M : Sieve Interval
  289. ERROR_TERM : Error term for checking smoothness
  290. seed : seed of random number generator
  291. Returns
  292. =======
  293. set(int) : A set of factors of N without considering multiplicity.
  294. Returns ``{N}`` if factorization fails.
  295. Examples
  296. ========
  297. >>> from sympy.ntheory import qs
  298. >>> qs(25645121643901801, 2000, 10000)
  299. {5394769, 4753701529}
  300. >>> qs(9804659461513846513, 2000, 10000)
  301. {4641991, 2112166839943}
  302. See Also
  303. ========
  304. qs_factor
  305. References
  306. ==========
  307. .. [1] https://pdfs.semanticscholar.org/5c52/8a975c1405bd35c65993abf5a4edb667c1db.pdf
  308. .. [2] https://www.rieselprime.de/ziki/Self-initializing_quadratic_sieve
  309. """
  310. return set(qs_factor(N, prime_bound, M, ERROR_TERM, seed))
  311. def qs_factor(N, prime_bound, M, ERROR_TERM=25, seed=1234):
  312. """ Performs factorization using Self-Initializing Quadratic Sieve.
  313. Parameters
  314. ==========
  315. N : Number to be Factored
  316. prime_bound : upper bound for primes in the factor base
  317. M : Sieve Interval
  318. ERROR_TERM : Error term for checking smoothness
  319. seed : seed of random number generator
  320. Returns
  321. =======
  322. dict[int, int] : Factors of N.
  323. Returns ``{N: 1}`` if factorization fails.
  324. Note that the key is not always a prime number.
  325. Examples
  326. ========
  327. >>> from sympy.ntheory import qs_factor
  328. >>> qs_factor(1009 * 100003, 2000, 10000)
  329. {1009: 1, 100003: 1}
  330. See Also
  331. ========
  332. qs
  333. """
  334. if N < 2:
  335. raise ValueError("N should be greater than 1")
  336. factors = {}
  337. smooth_relations = []
  338. partial_relations = {}
  339. # Eliminate the possibility of even numbers,
  340. # prime numbers, and perfect powers.
  341. if N % 2 == 0:
  342. e = 1
  343. N //= 2
  344. while N % 2 == 0:
  345. N //= 2
  346. e += 1
  347. factors[2] = e
  348. if isprime(N):
  349. factors[N] = 1
  350. return factors
  351. if result := _perfect_power(N, 3):
  352. n, e = result
  353. factors[n] = e
  354. return factors
  355. N_copy = N
  356. randint = _randint(seed)
  357. idx_1000, idx_5000, factor_base = _generate_factor_base(prime_bound, N)
  358. threshold = len(factor_base) * 105//100
  359. for g in _generate_polynomial(N, M, factor_base, idx_1000, idx_5000, randint):
  360. sieve_array = _gen_sieve_array(M, factor_base)
  361. s_rel, p_f = _trial_division_stage(N, M, factor_base, sieve_array, g, partial_relations, ERROR_TERM)
  362. smooth_relations += s_rel
  363. for p in p_f:
  364. if N_copy % p:
  365. continue
  366. e = 1
  367. N_copy //= p
  368. while N_copy % p == 0:
  369. N_copy //= p
  370. e += 1
  371. factors[p] = e
  372. if threshold <= len(smooth_relations):
  373. break
  374. for factor in _find_factor(N, smooth_relations, len(factor_base) + 1):
  375. if N_copy % factor == 0:
  376. e = 1
  377. N_copy //= factor
  378. while N_copy % factor == 0:
  379. N_copy //= factor
  380. e += 1
  381. factors[factor] = e
  382. if N_copy == 1 or isprime(N_copy):
  383. break
  384. if N_copy != 1:
  385. factors[N_copy] = 1
  386. return factors