summaryrefslogtreecommitdiffstats
path: root/markov.go
blob: 556e3650a0d9d5eaf255636efab3e8a542cc2d92 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
// This program implements a Markov chain algorithm. It generates pseudo-random
// prose based on the input text. It works by breaking each phrase into two parts:
// a multi-word prefix, and a single word suffix. It generates output by randomly
// choosing a suffix that follows the prefix.
//
// The program is based on Chapter 3 of "The Practice of Programming" by Brian Kernighan
// and Rob Pike.
package main

import (
	"bufio"
	"flag"
	"fmt"
	"io"
	"log"
	"math/rand"
	"os"
)

const (
	nPrefDefault  = 2     // number of prefix words
	maxGenDefault = 10000 // maximum number of words to generate
)

func main() {
	nPref := flag.Uint("p", nPrefDefault, "number of prefix words")
	maxGen := flag.Uint("g", maxGenDefault, "maximum number of words to generate")
	flag.Parse()

	states := build(os.Stdin, *nPref)
	generate(states, *maxGen)
}

// build populates a StateMap with words read from `in'. States are keyed by
// `nPref'-word prefixes.
func build(in io.Reader, nPref uint) StateMap {
	states := StateMap{make(map[string][]string), nPref}
	prefix := make([]string, nPref)

	scanner := bufio.NewScanner(in)
	scanner.Split(bufio.ScanWords)
	for scanner.Scan() {
		suffix := scanner.Text()
		states.addSuffix(prefix, suffix)
		prefix = advance(prefix, suffix)
	}

	states.addSuffix(prefix, "") // ending state
	return states
}

// generate produces at most `maxWords' of output, one word per line.
func generate(states StateMap, maxWords uint) {
	prefix := make([]string, states.nPref)
	var i uint
	for i = 0; i < maxWords; i++ {
		suffixes, ok := states.get(prefix)
		if !ok {
			log.Fatal("state map missing prefix:", prefix)
		}
		suffix := suffixes[rand.Intn(len(suffixes))]
		if suffix == "" { // ending state
			return
		}
		fmt.Println(suffix)
		prefix = advance(prefix, suffix)
	}
}

// StateMap associates each multi-word prefix with a list of suffix words.  The
// start-state prefix is a slice of empty strings, with the suffix list containing the
// first word of the input text. The end-state has the last few words of the input as
// prefix, and the suffix list contains an empty string.
type StateMap struct {
	m     map[string][]string // prefix -> []suffix
	nPref uint                // number of prefix words
}

// addSuffix appends `suffix' to the list of suffixes associated with `prefix'.
func (sm StateMap) addSuffix(prefix []string, suffix string) {
	key := key(prefix)
	sm.m[key] = append(sm.m[key], suffix)
}

// get retrieves the list of suffixes associated with `prefix', or returns false if
// prefix is not a valid key to the state map.
func (sm StateMap) get(prefix []string) (suffixes []string, ok bool) {
	suffixes, ok = sm.m[key(prefix)]
	return
}

// key converts a multi-word prefix into a hashable string that can be used as a map key.
func key(prefix []string) string {
	return fmt.Sprintf("%q", prefix)
}

// advance shifts the prefix forward to contain `word', returning the modified prefix.
func advance(prefix []string, word string) []string {
	if len(prefix) == 1 {
		prefix[0] = word
	} else if len(prefix) > 1 {
		prefix = append(prefix[1:], word)
	}
	return prefix
}