Skip to content

Add Pollard's Rho algorithm for integer factorization #5598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 28, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 148 additions & 0 deletions maths/pollard_rho.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
from math import gcd
from typing import Union


def pollard_rho(
num: int,
seed: int = 2,
step: int = 1,
attempts: int = 3,
) -> Union[int, None]:
"""
Use Pollard's Rho algorithm to return a nontrivial factor of ``num``.
The returned factor may be composite and require further factorization.
If the algorithm will return None if it fails to find a factor within
the specified number of attempts or within the specified number of steps.
If ``num`` is prime, this algorithm is guaranteed to return None.
https://en.wikipedia.org/wiki/Pollard%27s_rho_algorithm

>>> pollard_rho(18446744073709551617)
274177
>>> pollard_rho(97546105601219326301)
9876543191
>>> pollard_rho(100)
2
>>> pollard_rho(17)
>>> pollard_rho(17**3)
17
>>> pollard_rho(17**3, attempts=1)
>>> pollard_rho(3*5*7)
21
>>> pollard_rho(1)
Traceback (most recent call last):
...
ValueError: The input value cannot be less than 2
"""
# A value less than 2 can cause an infinite loop in the algorithm.
if num < 2:
raise ValueError("The input value cannot be less than 2")

# Because of the relationship between ``f(f(x))`` and ``f(x)``, this
# algorithm struggles to find factors that are divisible by two.
# As a workaround, we specifically check for two and even inputs.
# See: https://math.stackexchange.com/a/2856214/165820
if num > 2 and num % 2 == 0:
return 2

# Pollard's Rho algorithm requires a function that returns pseudorandom
# values between 0 <= X < ``num``. It doesn't need to be random in the
# sense that the output value is cryptographically secure or difficult
# to calculate, it only needs to be random in the sense that all output
# values should be equally likely to appear.
# For this reason, Pollard suggested using ``f(x) = (x**2 - 1) % num``
# However, the success of Pollard's algorithm isn't guaranteed and is
# determined in part by the initial seed and the chosen random function.
# To make retries easier, we will instead use ``f(x) = (x**2 + C) % num``
# where ``C`` is a value that we can modify between each attempt.
def rand_fn(value: int, step: int, modulus: int) -> int:
"""
Returns a pseudorandom value modulo ``modulus`` based on the
input ``value`` and attempt-specific ``step`` size.

>>> rand_fn(0, 0, 0)
Traceback (most recent call last):
...
ZeroDivisionError: integer division or modulo by zero
>>> rand_fn(1, 2, 3)
0
>>> rand_fn(0, 10, 7)
3
>>> rand_fn(1234, 1, 17)
16
"""
return (pow(value, 2) + step) % modulus

for attempt in range(attempts):
# These track the position within the cycle detection logic.
tortoise = seed
hare = seed

while True:
# At each iteration, the tortoise moves one step and the hare moves two.
tortoise = rand_fn(tortoise, step, num)
hare = rand_fn(hare, step, num)
hare = rand_fn(hare, step, num)

# At some point both the tortoise and the hare will enter a cycle whose
# length ``p`` is a divisor of ``num``. Once in that cycle, at some point
# the tortoise and hare will end up on the same value modulo ``p``.
# We can detect when this happens because the position difference between
# the tortoise and the hare will share a common divisor with ``num``.
divisor = gcd(hare - tortoise, num)

if divisor == 1:
# No common divisor yet, just keep searching.
continue
else:
# We found a common divisor!
if divisor == num:
# Unfortunately, the divisor is ``num`` itself and is useless.
break
else:
# The divisor is a nontrivial factor of ``num``!
return divisor

# If we made it here, then this attempt failed.
# We need to pick a new starting seed for the tortoise and hare
# in addition to a new step value for the random function.
# To keep this example implementation deterministic, the
# new values will be generated based on currently available
# values instead of using something like ``random.randint``.

# We can use the hare's position as the new seed.
# This is actually what Richard Brent's the "optimized" variant does.
seed = hare

# The new step value for the random function can just be incremented.
# At first the results will be similar to what the old function would
# have produced, but the value will quickly diverge after a bit.
step += 1

# We haven't found a divisor within the requested number of attempts.
# We were unlucky or ``num`` itself is actually prime.
return None


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser()
parser.add_argument(
"num",
type=int,
help="The value to find a divisor of",
)
parser.add_argument(
"--attempts",
type=int,
default=3,
help="The number of attempts before giving up",
)
args = parser.parse_args()

divisor = pollard_rho(args.num, attempts=args.attempts)
if divisor is None:
print(f"{args.num} is probably prime")
else:
quotient = args.num // divisor
print(f"{args.num} = {divisor} * {quotient}")