如果我们按照定义,那么伪随机数生成是通过算法生成随机数序列的过程,使得生成的随机数的属性近似于从分布中采样的随机数序列的属性。当我们说随机时,意味着预测这个序列的概率并不比随机猜测更好。尽管我们担心这里的随机性,但伪随机数生成并不是真正的随机过程。为什么?因为序列是由提供给算法的初始值或初始状态决定的。用于生成这些随机数序列的算法称为伪随机数生成器(Pseudo Random Number Generator)。
import jax from jax import jit import jax.numpy as jnp
# If I set the seed, would I get the same sequence of random numbers every time? for i inrange(10): # Set initial value by providing a seed value seed = 0 np.random.seed(seed) # Generate a random integer from a range of [0, 5) random_number = np.random.randint(0, 5) print(f"Seed: {seed} -> Random number generated: {random_number}")
# Seed: 0 -> Random number generated: 4 # Seed: 0 -> Random number generated: 4 # Seed: 0 -> Random number generated: 4 # Seed: 0 -> Random number generated: 4 # Seed: 0 -> Random number generated: 4 # Seed: 0 -> Random number generated: 4 # Seed: 0 -> Random number generated: 4 # Seed: 0 -> Random number generated: 4 # Seed: 0 -> Random number generated: 4 # Seed: 0 -> Random number generated: 4
for i inrange(5): # Set initial value by providing a seed value seed = 1234 np.random.seed(seed) # Choose array1 and array2 indices array_1_idx = np.random.choice(array, size=8) array_2_idx = np.random.choice(array, size=2) # Split the array into two sets array_1 = array[array_1_idx] array_2 = array[array_2_idx] print(f"Iteration: {i+1} Seed value: {seed}\n") print(f"First array: {array_1} Second array: {array_2}") print("="*50) print("")
# Same example but with a different kind of random number generator for i inrange(5): # Set initial value by providing a seed value seed = 0 rng = np.random.default_rng(seed) # Choose array1 and array2 indices array_1_idx = rng.choice(array, size=8) array_2_idx = rng.choice(array, size=2) # Split the array into two sets array_1 = array[array_1_idx] array_2 = array[array_2_idx] print(f"Iteration: {i+1} Seed value: {seed}\n") print(f"First array: {array_1} Second array: {array_2}") print("="*50) print("")
# Run the function a few times in parallel to check if we get # same RNG sequence for i inrange(5): res = [] for child_seed in child_seeds: res.append(delayed(get_sequence)(child_seed)) res = Parallel(n_jobs=2)(res) print(f"Iteration: {i+1} Sequences: {res}") print("="*70)
# Passing the original key to a random function random_integers = random.randint(key=key, minval=0, maxval=10, shape=[5]) print(random_integers)
# [2 4 9 9 4]
# What if we want to call another function? # Don't use the same key. Split the original key, and then pass it print("Original key: ", key)
# Split the key. By default the number of splits is set to 2 # You can specify explicitly how many splits you want to do key, subkey = random.split(key, num=2)
print("New key: ", key) print("Subkey: ", subkey)
# Original key: [ 0 1234] # New key: [2113592192 1902136347] # Subkey: [603280156 445306386]
# Call another random function with the new key random_floats = random.normal(key=key, shape=(5,), dtype=jnp.float32) print(random_floats)
print("Generated all at once: ", random_integers_1) print("Generated sequentially: ", random_integers_2)
# Generated all at once: [2 4 9 9 4] # Generated sequentially: [1 5 8 7]
# Possible highly correlated outputs. # Not a very good example but serves the demonstration purpose defsampler1(key): return random.uniform(key=key, minval=0, maxval=1, shape=(2,))