summaryrefslogtreecommitdiffstats
path: root/tests/match.awk
diff options
context:
space:
mode:
authorSam Anthony <sam@samanthony.xyz>2024-08-31 15:20:11 -0400
committerSam Anthony <sam@samanthony.xyz>2024-08-31 15:28:31 -0400
commit29c1c728b20c838bc94c7342f4664e5ecbdac113 (patch)
treeca1c7bc76f420c547757c8b66743d9ec5452763d /tests/match.awk
parent7b9206498ee8d5f6327da84e72c00bbc6ff1cf48 (diff)
downloadmarkov-29c1c728b20c838bc94c7342f4664e5ecbdac113.zip
test that all generated words, pairs, and triples also occur in the input
Diffstat (limited to 'tests/match.awk')
-rw-r--r--tests/match.awk25
1 files changed, 25 insertions, 0 deletions
diff --git a/tests/match.awk b/tests/match.awk
new file mode 100644
index 0000000..8f35452
--- /dev/null
+++ b/tests/match.awk
@@ -0,0 +1,25 @@
+# Check that all words, pairs, and triples in output ARGV[2] are in original ARGV[1]
+BEGIN {
+ while (getline <ARGV[1] > 0) {
+ for (i = 1; i <= NF; i++) {
+ wd[++nw] = $i # input words
+ single[$i]++
+ }
+ }
+ for (i = 1; i < nw; i++)
+ pair[wd[i],wd[i+1]]++
+ for (i = 1; i < nw-1; i++)
+ triple[wd[i],wd[i+1],wd[i+2]]++
+
+ while (getline <ARGV[2] > 0) {
+ outwd[++ow] = $0 # output words
+ if (!($0 in single))
+ print "unexpected word:", $0
+ }
+ for (i = 1; i < ow; i++)
+ if (!((outwd[i],outwd[i+1]) in pair))
+ print "unexpected pair:", outwd[i], outwd[i+1]
+ for (i = 1; i < ow-1; i++)
+ if (!((outwd[i],outwd[i+1],outwd[i+2]) in triple))
+ print "unexpected triple:", outwd[i], outwd[i+1], outwd[i+2]
+} \ No newline at end of file