""" This module provides functions to draw weighted samples from a population """ import random import numpy as np def find_interval(x, partition, endpoints=True): """ find_interval -> i If endpoints is True, "i" will be the index for which applies partition[i] < x < partition[i+1], if such an index exists. -1 otherwise If endpoints is False, "i" will be the smallest index for which applies x < partition[i]. If no such index exists "i" will be set to len(partition) """ for i in range(0, len(partition)): if x < partition[i]: return i-1 if endpoints else i return -1 if endpoints else len(partition) def weighted_choice(sequence, weights): """ weighted_choice selects a random element of the sequence according to the list of weights """ x = np.random.random() cum_weights = [0] + list(np.cumsum(weights)) index = find_interval(x, cum_weights) return sequence[index] def cartesian_choice(*iterables): """ A list with random choices from each iterable of iterables is being created in respective order. The result list can be seen as an element of the Cartesian product of the iterables """ res = [] for population in iterables: res.append(random.choice(population)) return res def weighted_cartesian_choice(*iterables): """ A list with weighted random choices from each iterable of iterables is being created in respective order """ res = [] for population, weights in iterables: lst = weighted_choice(population, weights) res.append(lst) return res def weighted_sample(population, weights, k): """ This function draws a random sample of length k from the sequence 'population' according to the list of weights """ sample = set() population = list(population) weights = list(weights) while len(sample) < k: choice = weighted_sample(population, weights) sample.add(choice) index = population.index(choice) weights.pop(index) population.remove(choice) weights = [ x / sum(weights) for x in weights] return list(sample) def weighted_sample_alternative(population, weights, k): """ Alternative way to previous implementation. This function draws a random sample of length k from the sequence 'population' according to the list of weights """ sample = set() population = list(population) weights = list(weights) while len(sample) < k: choice = weighted_sample(population, weights) if choice not in sample: sample.add(choice) return list(sample)