summaryrefslogtreecommitdiffstats
path: root/markov.go
blob: 9d073a04a38d63e06d157684d937e3b0b196c2ba (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
// This program implements a Markov chain algorithm. It generates pseudo-random
// prose based on the input text. It works by breaking each phrase into two parts:
// a multi-word prefix, and a single word suffix. It generates output by randomly
// choosing a suffix that follows the prefix.
//
// The program is based on Chapter 3 of "The Practice of Programming" by Brian Kernighan
// and Rob Pike.
package main

import (
	"bufio"
	"flag"
	"fmt"
	"io"
	"math/rand"
	"os"
)

const (
	nPref         = 2     // number of prefix words
	maxGenDefault = 10000 // maximum number of words to generate
)

type Prefix struct {
	words [nPref]string
}

// push advances the prefix to contain `word'.
func (p *Prefix) push(word string) {
	if nPref < 1 {
		return
	} else if nPref == 1 {
		p.words[0] = word
	} else {
		p.words = [nPref]string(append(p.words[1:], word))
	}
}

// StateMap associates each multi-word Prefix with a list of suffix words.  The starting
// state is an empty Prefix{} with the suffix list containing the first word of the
// input text. The ending state has the last few words of the input as prefix, and
// the suffix list contains an empty string.
type StateMap map[Prefix][]string

func main() {
	maxGen := flag.Int("w", maxGenDefault, "maximum number of words to generate")
	flag.Parse()

	states := build(os.Stdin)
	generate(states, *maxGen)
}

// build populates a StateMap with words read from `in'.
func build(in io.Reader) StateMap {
	states := make(StateMap)
	prefix := Prefix{}

	scanner := bufio.NewScanner(in)
	scanner.Split(bufio.ScanWords)
	for scanner.Scan() {
		suffix := scanner.Text()
		states[prefix] = append(states[prefix], suffix)
		prefix.push(suffix)
	}

	states[prefix] = append(states[prefix], "") // ending state

	return states
}

// generate produces at most `maxWords' of output, one word per line.
func generate(states StateMap, maxWords int) {
	prefix := Prefix{}
	for i := 0; i < maxWords; i++ {
		suffixes := states[prefix]
		suffix := suffixes[rand.Intn(len(suffixes))]
		if suffix == "" { // ending state
			return
		}
		fmt.Println(suffix)
		prefix.push(suffix)
	}
}