Initial commit

This commit is contained in:
joachimschmidt557 2019-07-13 23:08:45 +02:00
commit f3bd7b0207
3 changed files with 106 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
nim_word_prediction
data/

View file

@ -0,0 +1,15 @@
# Package
version = "0.1.0"
author = "joachimschmidt557"
description = "Next word prediction"
license = "MIT"
srcDir = "src"
bin = @["nim_word_prediction"]
# Dependencies
requires "nim >= 0.20.0"
requires "neo >= 0.2.5"

View file

@ -0,0 +1,89 @@
import parseopt, tables, strutils
import neo
proc predict(word:string, matrix:Matrix[uint16],
word2int:Table[string, int],
int2word:Table[int, string]):string =
if not word2int.hasKey(word):
return ""
let
w = word2int[word]
rowW = matrix.row(w)
var
maxI = 0
maxW = 0u16
for i, x in rowW:
if x > maxW:
maxI = i
maxW = x
return int2word[maxI]
proc updateMatrix(matrix:var Matrix[uint16], word1:string, word2:string,
word2int:var Table[string, int],
int2word:var Table[int, string]) =
if not word2int.hasKey(word1):
let newIndex = len(word2int)
word2int[word1] = newIndex
int2word[newIndex] = word1
if not word2int.hasKey(word2):
let newIndex = len(word2int)
word2int[word2] = newIndex
int2word[newIndex] = word2
let
w1 = word2int[word1]
w2 = word2int[word2]
if w1 < 20000 and w2 < 20000:
matrix[w1, w2] += 1
proc interactive(matrix:Matrix[uint16],
word2int:Table[string, int],
int2word:Table[int, string]) =
echo "-= Interactive prediction =-"
for line in stdin.lines:
echo predict(line, matrix, word2int, int2word)
proc main() =
var
p = initOptParser()
interactive = false
statistics = false
word = "I"
texts:seq[string]
matrix = zeros(20000, 20000, uint16)
word2int = initTable[string, int]()
int2word = initTable[int, string]()
# Parse arguments
while true:
p.next()
case p.kind
of cmdEnd: break
of cmdShortOption, cmdLongOption:
if p.key == "i" or p.key == "interactive":
interactive = true
if p.key == "s" or p.key == "statistics":
statistics = true
if p.key == "t" or p.key == "text":
texts.add(readFile(p.val))
of cmdArgument:
word = p.key
# Generate matrix
for text in texts:
var prevWord = ""
for word in text.split():
updateMatrix(matrix, prevWord, word, word2int, int2word)
prevWord = word
if statistics:
echo "-= Statistics =-"
echo "Size of vocabulary: " & $word2int.len
if interactive:
interactive(matrix, word2int, int2word)
else:
echo predict(word, matrix, word2int, int2word)
when isMainModule:
main()