Initial commit
This commit is contained in:
commit
f3bd7b0207
3 changed files with 106 additions and 0 deletions
89
src/nim_word_prediction.nim
Normal file
89
src/nim_word_prediction.nim
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
import parseopt, tables, strutils
|
||||
|
||||
import neo
|
||||
|
||||
proc predict(word:string, matrix:Matrix[uint16],
|
||||
word2int:Table[string, int],
|
||||
int2word:Table[int, string]):string =
|
||||
if not word2int.hasKey(word):
|
||||
return ""
|
||||
let
|
||||
w = word2int[word]
|
||||
rowW = matrix.row(w)
|
||||
var
|
||||
maxI = 0
|
||||
maxW = 0u16
|
||||
for i, x in rowW:
|
||||
if x > maxW:
|
||||
maxI = i
|
||||
maxW = x
|
||||
return int2word[maxI]
|
||||
|
||||
proc updateMatrix(matrix:var Matrix[uint16], word1:string, word2:string,
|
||||
word2int:var Table[string, int],
|
||||
int2word:var Table[int, string]) =
|
||||
if not word2int.hasKey(word1):
|
||||
let newIndex = len(word2int)
|
||||
word2int[word1] = newIndex
|
||||
int2word[newIndex] = word1
|
||||
if not word2int.hasKey(word2):
|
||||
let newIndex = len(word2int)
|
||||
word2int[word2] = newIndex
|
||||
int2word[newIndex] = word2
|
||||
let
|
||||
w1 = word2int[word1]
|
||||
w2 = word2int[word2]
|
||||
if w1 < 20000 and w2 < 20000:
|
||||
matrix[w1, w2] += 1
|
||||
|
||||
proc interactive(matrix:Matrix[uint16],
|
||||
word2int:Table[string, int],
|
||||
int2word:Table[int, string]) =
|
||||
echo "-= Interactive prediction =-"
|
||||
for line in stdin.lines:
|
||||
echo predict(line, matrix, word2int, int2word)
|
||||
|
||||
proc main() =
|
||||
var
|
||||
p = initOptParser()
|
||||
interactive = false
|
||||
statistics = false
|
||||
word = "I"
|
||||
texts:seq[string]
|
||||
matrix = zeros(20000, 20000, uint16)
|
||||
word2int = initTable[string, int]()
|
||||
int2word = initTable[int, string]()
|
||||
|
||||
# Parse arguments
|
||||
while true:
|
||||
p.next()
|
||||
case p.kind
|
||||
of cmdEnd: break
|
||||
of cmdShortOption, cmdLongOption:
|
||||
if p.key == "i" or p.key == "interactive":
|
||||
interactive = true
|
||||
if p.key == "s" or p.key == "statistics":
|
||||
statistics = true
|
||||
if p.key == "t" or p.key == "text":
|
||||
texts.add(readFile(p.val))
|
||||
of cmdArgument:
|
||||
word = p.key
|
||||
|
||||
# Generate matrix
|
||||
for text in texts:
|
||||
var prevWord = ""
|
||||
for word in text.split():
|
||||
updateMatrix(matrix, prevWord, word, word2int, int2word)
|
||||
prevWord = word
|
||||
|
||||
if statistics:
|
||||
echo "-= Statistics =-"
|
||||
echo "Size of vocabulary: " & $word2int.len
|
||||
|
||||
if interactive:
|
||||
interactive(matrix, word2int, int2word)
|
||||
else:
|
||||
echo predict(word, matrix, word2int, int2word)
|
||||
|
||||
when isMainModule:
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue