summaryrefslogtreecommitdiffstats
path: root/markov.go
blob: c75e0a2fce3f6b288abea4783e7aed0e0c131150 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
// This program implements a Markov chain algorithm. It generates pseudo-random
// prose based on the given input text. It works by breaking each phrase into two
// parts: a multi-word prefix, and a single suffix word. It generates output by randomly
// choosing a suffix that follows the prefix.
//
// The program is based on Chapter 3 of "The Practice of Programming" by
// Brian Kernighan and Rob Pike.
package main

import (
	"bufio"
	"fmt"
	"io"
	"math/rand"
	"os"
)

const (
	nPref  = 2     // number of prefix words
	maxGen = 10000 // maximum number of words to generate
)

type Prefix struct {
	words [nPref]string
}

// push advances the prefix to contain `word'.
func (p *Prefix) push(word string) {
	if nPref < 1 {
		return
	} else if nPref == 1 {
		p.words[0] = word
	} else {
		p.words = [nPref]string(append(p.words[1:], word))
	}
}

// StateMap associates each multi-word Prefix with a list of suffix words.
// The starting state is an empty Prefix{} with the suffix list containing the first word of
// the input text. The ending state is the last few words of the input text with an empty
// suffix list.
type StateMap map[Prefix][]string

func main() {
	states := build(os.Stdin)
	generate(states, maxGen)
}

// build populates a StateMap with words read from `in'.
func build(in io.Reader) StateMap {
	states := make(StateMap)
	prefix := Prefix{}

	scanner := bufio.NewScanner(in)
	scanner.Split(bufio.ScanWords)
	for scanner.Scan() {
		suffix := scanner.Text()
		states[prefix] = append(states[prefix], suffix)
		prefix.push(suffix)
	}

	states[prefix] = []string{} // ending state

	return states
}

// generate produces at most `nWords' of output, one word per line.
func generate(states StateMap, nWords int) {
	prefix := Prefix{}
	for i := 0; i < maxGen; i++ {
		suffixes := states[prefix]
		if len(suffixes) <= 0 { // ending state
			return
		}
		suffix := suffixes[rand.Intn(len(suffixes))]
		fmt.Println(suffix)
		prefix.push(suffix)
	}
}