summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--markov.go96
1 files changed, 59 insertions, 37 deletions
diff --git a/markov.go b/markov.go
index 9d073a0..556e365 100644
--- a/markov.go
+++ b/markov.go
@@ -12,72 +12,94 @@ import (
"flag"
"fmt"
"io"
+ "log"
"math/rand"
"os"
)
const (
- nPref = 2 // number of prefix words
+ nPrefDefault = 2 // number of prefix words
maxGenDefault = 10000 // maximum number of words to generate
)
-type Prefix struct {
- words [nPref]string
-}
-
-// push advances the prefix to contain `word'.
-func (p *Prefix) push(word string) {
- if nPref < 1 {
- return
- } else if nPref == 1 {
- p.words[0] = word
- } else {
- p.words = [nPref]string(append(p.words[1:], word))
- }
-}
-
-// StateMap associates each multi-word Prefix with a list of suffix words. The starting
-// state is an empty Prefix{} with the suffix list containing the first word of the
-// input text. The ending state has the last few words of the input as prefix, and
-// the suffix list contains an empty string.
-type StateMap map[Prefix][]string
-
func main() {
- maxGen := flag.Int("w", maxGenDefault, "maximum number of words to generate")
+ nPref := flag.Uint("p", nPrefDefault, "number of prefix words")
+ maxGen := flag.Uint("g", maxGenDefault, "maximum number of words to generate")
flag.Parse()
- states := build(os.Stdin)
+ states := build(os.Stdin, *nPref)
generate(states, *maxGen)
}
-// build populates a StateMap with words read from `in'.
-func build(in io.Reader) StateMap {
- states := make(StateMap)
- prefix := Prefix{}
+// build populates a StateMap with words read from `in'. States are keyed by
+// `nPref'-word prefixes.
+func build(in io.Reader, nPref uint) StateMap {
+ states := StateMap{make(map[string][]string), nPref}
+ prefix := make([]string, nPref)
scanner := bufio.NewScanner(in)
scanner.Split(bufio.ScanWords)
for scanner.Scan() {
suffix := scanner.Text()
- states[prefix] = append(states[prefix], suffix)
- prefix.push(suffix)
+ states.addSuffix(prefix, suffix)
+ prefix = advance(prefix, suffix)
}
- states[prefix] = append(states[prefix], "") // ending state
-
+ states.addSuffix(prefix, "") // ending state
return states
}
// generate produces at most `maxWords' of output, one word per line.
-func generate(states StateMap, maxWords int) {
- prefix := Prefix{}
- for i := 0; i < maxWords; i++ {
- suffixes := states[prefix]
+func generate(states StateMap, maxWords uint) {
+ prefix := make([]string, states.nPref)
+ var i uint
+ for i = 0; i < maxWords; i++ {
+ suffixes, ok := states.get(prefix)
+ if !ok {
+ log.Fatal("state map missing prefix:", prefix)
+ }
suffix := suffixes[rand.Intn(len(suffixes))]
if suffix == "" { // ending state
return
}
fmt.Println(suffix)
- prefix.push(suffix)
+ prefix = advance(prefix, suffix)
+ }
+}
+
+// StateMap associates each multi-word prefix with a list of suffix words. The
+// start-state prefix is a slice of empty strings, with the suffix list containing the
+// first word of the input text. The end-state has the last few words of the input as
+// prefix, and the suffix list contains an empty string.
+type StateMap struct {
+ m map[string][]string // prefix -> []suffix
+ nPref uint // number of prefix words
+}
+
+// addSuffix appends `suffix' to the list of suffixes associated with `prefix'.
+func (sm StateMap) addSuffix(prefix []string, suffix string) {
+ key := key(prefix)
+ sm.m[key] = append(sm.m[key], suffix)
+}
+
+// get retrieves the list of suffixes associated with `prefix', or returns false if
+// prefix is not a valid key to the state map.
+func (sm StateMap) get(prefix []string) (suffixes []string, ok bool) {
+ suffixes, ok = sm.m[key(prefix)]
+ return
+}
+
+// key converts a multi-word prefix into a hashable string that can be used as a map key.
+func key(prefix []string) string {
+ return fmt.Sprintf("%q", prefix)
+}
+
+// advance shifts the prefix forward to contain `word', returning the modified prefix.
+func advance(prefix []string, word string) []string {
+ if len(prefix) == 1 {
+ prefix[0] = word
+ } else if len(prefix) > 1 {
+ prefix = append(prefix[1:], word)
}
+ return prefix
}