diff options
| author | Sam Anthony <sam@samanthony.xyz> | 2024-09-01 20:22:11 -0400 |
|---|---|---|
| committer | Sam Anthony <sam@samanthony.xyz> | 2024-09-01 20:22:11 -0400 |
| commit | 1343ef5b4c922ed512ae64a86d22970ebff7155d (patch) | |
| tree | 58cffab945089abb645bddc633130219d631c640 | |
| parent | 3a18f19dffeff93ddd2d3fb0ae8c8cd8b877241a (diff) | |
| download | markov-master.zip | |
| -rw-r--r-- | markov.go | 96 |
1 files changed, 59 insertions, 37 deletions
@@ -12,72 +12,94 @@ import ( "flag" "fmt" "io" + "log" "math/rand" "os" ) const ( - nPref = 2 // number of prefix words + nPrefDefault = 2 // number of prefix words maxGenDefault = 10000 // maximum number of words to generate ) -type Prefix struct { - words [nPref]string -} - -// push advances the prefix to contain `word'. -func (p *Prefix) push(word string) { - if nPref < 1 { - return - } else if nPref == 1 { - p.words[0] = word - } else { - p.words = [nPref]string(append(p.words[1:], word)) - } -} - -// StateMap associates each multi-word Prefix with a list of suffix words. The starting -// state is an empty Prefix{} with the suffix list containing the first word of the -// input text. The ending state has the last few words of the input as prefix, and -// the suffix list contains an empty string. -type StateMap map[Prefix][]string - func main() { - maxGen := flag.Int("w", maxGenDefault, "maximum number of words to generate") + nPref := flag.Uint("p", nPrefDefault, "number of prefix words") + maxGen := flag.Uint("g", maxGenDefault, "maximum number of words to generate") flag.Parse() - states := build(os.Stdin) + states := build(os.Stdin, *nPref) generate(states, *maxGen) } -// build populates a StateMap with words read from `in'. -func build(in io.Reader) StateMap { - states := make(StateMap) - prefix := Prefix{} +// build populates a StateMap with words read from `in'. States are keyed by +// `nPref'-word prefixes. +func build(in io.Reader, nPref uint) StateMap { + states := StateMap{make(map[string][]string), nPref} + prefix := make([]string, nPref) scanner := bufio.NewScanner(in) scanner.Split(bufio.ScanWords) for scanner.Scan() { suffix := scanner.Text() - states[prefix] = append(states[prefix], suffix) - prefix.push(suffix) + states.addSuffix(prefix, suffix) + prefix = advance(prefix, suffix) } - states[prefix] = append(states[prefix], "") // ending state - + states.addSuffix(prefix, "") // ending state return states } // generate produces at most `maxWords' of output, one word per line. -func generate(states StateMap, maxWords int) { - prefix := Prefix{} - for i := 0; i < maxWords; i++ { - suffixes := states[prefix] +func generate(states StateMap, maxWords uint) { + prefix := make([]string, states.nPref) + var i uint + for i = 0; i < maxWords; i++ { + suffixes, ok := states.get(prefix) + if !ok { + log.Fatal("state map missing prefix:", prefix) + } suffix := suffixes[rand.Intn(len(suffixes))] if suffix == "" { // ending state return } fmt.Println(suffix) - prefix.push(suffix) + prefix = advance(prefix, suffix) + } +} + +// StateMap associates each multi-word prefix with a list of suffix words. The +// start-state prefix is a slice of empty strings, with the suffix list containing the +// first word of the input text. The end-state has the last few words of the input as +// prefix, and the suffix list contains an empty string. +type StateMap struct { + m map[string][]string // prefix -> []suffix + nPref uint // number of prefix words +} + +// addSuffix appends `suffix' to the list of suffixes associated with `prefix'. +func (sm StateMap) addSuffix(prefix []string, suffix string) { + key := key(prefix) + sm.m[key] = append(sm.m[key], suffix) +} + +// get retrieves the list of suffixes associated with `prefix', or returns false if +// prefix is not a valid key to the state map. +func (sm StateMap) get(prefix []string) (suffixes []string, ok bool) { + suffixes, ok = sm.m[key(prefix)] + return +} + +// key converts a multi-word prefix into a hashable string that can be used as a map key. +func key(prefix []string) string { + return fmt.Sprintf("%q", prefix) +} + +// advance shifts the prefix forward to contain `word', returning the modified prefix. +func advance(prefix []string, word string) []string { + if len(prefix) == 1 { + prefix[0] = word + } else if len(prefix) > 1 { + prefix = append(prefix[1:], word) } + return prefix } |