Spaces:

visakh7843
/

Sheet_Music_Generator

Running

App Files Files

Sheet_Music_Generator / n-grams /markov.py

visakh7843

n-grams markov implementation with conversion script and files

1d4f575 over 2 years ago

raw

history blame

4.68 kB

	# based on markov.py by Allison Parish
	# https://github.com/aparrish/rwet-examples/blob/master/ngrams/markov.py

	import random

	def build_model(tokens, n):
	"Builds a Markov model from the list of tokens, using n-grams of length n."
	model = dict()
	if len(tokens) < n:
	return model
	for i in range(len(tokens) - n):
	gram = tuple(tokens[i:i+n])
	next_token = tokens[i+n]
	if gram in model:
	model[gram].append(next_token)
	else:
	model[gram] = [next_token]
	final_gram = tuple(tokens[len(tokens)-n:])
	# if final_gram in model:
	# model[final_gram].append(None)
	# else:
	# model[final_gram] = [None]
	return model

	def generate(model, n, seed=None, max_iterations=100):
	"""Generates a list of tokens from information in model, using n as the
	length of n-grams in the model. Starts the generation with the n-gram
	given as seed. If more than max_iteration iterations are reached, the
	process is stopped. (This is to prevent infinite loops)"""
	if seed is None:
	seed = random.choice(list(model.keys()))
	else:
	seed = (seed,)
	output = list(seed)
	current = tuple(seed)
	for i in range(max_iterations):
	if current in model:
	possible_next_tokens = model[current]
	next_token = random.choice(possible_next_tokens)
	if next_token is None:
	print('next token is none')
	break
	output.append(next_token)
	current = tuple(output[-n:])
	else:
	break
	# print 'output: ' + output[1]
	return output

	def merge_models(models):
	"Merges two or more Markov models."
	merged_model = dict()
	for model in models:
	for key, val in model.items():
	if key in merged_model:
	merged_model[key].extend(val)
	else:
	merged_model[key] = val
	return merged_model

	def generate_from_token_lists(token_lines, n, count=14, max_iterations=100):
	"""Generates text from a list of lists of tokens. This function is intended
	for input text where each line forms a distinct unit (e.g., poetry), and
	where the desired output is to recreate lines in that form. It does this
	by keeping track of the n-gram that comes at the beginning of each line,
	and then only generating lines that begin with one of these "beginnings."
	It also builds a separate Markov model for each line, and then merges
	those models together, to ensure that lines end with n-grams statistically
	likely to end lines in the original text."""
	beginnings = list()
	models = list()
	for token_line in token_lines:
	beginning = token_line[:n]
	beginnings.append(beginning)
	line_model = build_model(token_line, n)
	models.append(line_model)
	combined_model = merge_models(models)
	generated_list = list()
	for i in range(count):
	generated_str = generate(combined_model, n, random.choice(beginnings),
	max_iterations)
	generated_list.append(generated_str)
	return generated_list

	def char_level_generate(lines, n, count=14, max_iterations=100):
	"""Generates Markov chain text from the given lines, using character-level
	n-grams of length n. Returns a list of count items."""
	token_lines = [list(line) for line in lines]
	generated = generate_from_token_lists(token_lines, n, count, max_iterations)
	return [''.join(item) for item in generated]

	def word_level_generate(lines, n, count=14, max_iterations=100):
	"""Generates Markov chain text from the given lines, using word-level
	n-grams of length n. Returns a list of count items."""
	token_lines = [line.split() for line in lines]
	generated = generate_from_token_lists(token_lines, n, count, max_iterations)
	return [' '.join(item) for item in generated]

	def generate_model_from_token_lists(token_lines, n, count=14, max_iterations=100):
	"""Generates text from a list of lists of tokens. This function is intended
	for input text where each line forms a distinct unit (e.g., poetry), and
	where the desired output is to recreate lines in that form. It does this
	by keeping track of the n-gram that comes at the beginning of each line,
	and then only generating lines that begin with one of these "beginnings."
	It also builds a separate Markov model for each line, and then merges
	those models together, to ensure that lines end with n-grams statistically
	likely to end lines in the original text."""
	beginnings = list()
	models = list()
	for token_line in token_lines:
	beginning = token_line[:n]
	beginnings.append(beginning)
	line_model = build_model(token_line, n)
	models.append(line_model)
	combined_model = merge_models(models)
	return combined_model


	# if __name__ == '__main__':
	# import sys
	# n = int(sys.argv[1])
	# lines = list()
	# for line in sys.stdin:
	# line = line.strip()
	# lines.append(line)
	# for generated in char_level_generate(lines, n):
	# print(generated)