Initial commit
This commit is contained in:
commit
f3bd7b0207
3 changed files with 106 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
nim_word_prediction
|
||||||
|
data/
|
||||||
15
nim_word_prediction.nimble
Normal file
15
nim_word_prediction.nimble
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
# Package
|
||||||
|
|
||||||
|
version = "0.1.0"
|
||||||
|
author = "joachimschmidt557"
|
||||||
|
description = "Next word prediction"
|
||||||
|
license = "MIT"
|
||||||
|
srcDir = "src"
|
||||||
|
bin = @["nim_word_prediction"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Dependencies
|
||||||
|
|
||||||
|
requires "nim >= 0.20.0"
|
||||||
|
requires "neo >= 0.2.5"
|
||||||
89
src/nim_word_prediction.nim
Normal file
89
src/nim_word_prediction.nim
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
import parseopt, tables, strutils
|
||||||
|
|
||||||
|
import neo
|
||||||
|
|
||||||
|
proc predict(word:string, matrix:Matrix[uint16],
|
||||||
|
word2int:Table[string, int],
|
||||||
|
int2word:Table[int, string]):string =
|
||||||
|
if not word2int.hasKey(word):
|
||||||
|
return ""
|
||||||
|
let
|
||||||
|
w = word2int[word]
|
||||||
|
rowW = matrix.row(w)
|
||||||
|
var
|
||||||
|
maxI = 0
|
||||||
|
maxW = 0u16
|
||||||
|
for i, x in rowW:
|
||||||
|
if x > maxW:
|
||||||
|
maxI = i
|
||||||
|
maxW = x
|
||||||
|
return int2word[maxI]
|
||||||
|
|
||||||
|
proc updateMatrix(matrix:var Matrix[uint16], word1:string, word2:string,
|
||||||
|
word2int:var Table[string, int],
|
||||||
|
int2word:var Table[int, string]) =
|
||||||
|
if not word2int.hasKey(word1):
|
||||||
|
let newIndex = len(word2int)
|
||||||
|
word2int[word1] = newIndex
|
||||||
|
int2word[newIndex] = word1
|
||||||
|
if not word2int.hasKey(word2):
|
||||||
|
let newIndex = len(word2int)
|
||||||
|
word2int[word2] = newIndex
|
||||||
|
int2word[newIndex] = word2
|
||||||
|
let
|
||||||
|
w1 = word2int[word1]
|
||||||
|
w2 = word2int[word2]
|
||||||
|
if w1 < 20000 and w2 < 20000:
|
||||||
|
matrix[w1, w2] += 1
|
||||||
|
|
||||||
|
proc interactive(matrix:Matrix[uint16],
|
||||||
|
word2int:Table[string, int],
|
||||||
|
int2word:Table[int, string]) =
|
||||||
|
echo "-= Interactive prediction =-"
|
||||||
|
for line in stdin.lines:
|
||||||
|
echo predict(line, matrix, word2int, int2word)
|
||||||
|
|
||||||
|
proc main() =
|
||||||
|
var
|
||||||
|
p = initOptParser()
|
||||||
|
interactive = false
|
||||||
|
statistics = false
|
||||||
|
word = "I"
|
||||||
|
texts:seq[string]
|
||||||
|
matrix = zeros(20000, 20000, uint16)
|
||||||
|
word2int = initTable[string, int]()
|
||||||
|
int2word = initTable[int, string]()
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
while true:
|
||||||
|
p.next()
|
||||||
|
case p.kind
|
||||||
|
of cmdEnd: break
|
||||||
|
of cmdShortOption, cmdLongOption:
|
||||||
|
if p.key == "i" or p.key == "interactive":
|
||||||
|
interactive = true
|
||||||
|
if p.key == "s" or p.key == "statistics":
|
||||||
|
statistics = true
|
||||||
|
if p.key == "t" or p.key == "text":
|
||||||
|
texts.add(readFile(p.val))
|
||||||
|
of cmdArgument:
|
||||||
|
word = p.key
|
||||||
|
|
||||||
|
# Generate matrix
|
||||||
|
for text in texts:
|
||||||
|
var prevWord = ""
|
||||||
|
for word in text.split():
|
||||||
|
updateMatrix(matrix, prevWord, word, word2int, int2word)
|
||||||
|
prevWord = word
|
||||||
|
|
||||||
|
if statistics:
|
||||||
|
echo "-= Statistics =-"
|
||||||
|
echo "Size of vocabulary: " & $word2int.len
|
||||||
|
|
||||||
|
if interactive:
|
||||||
|
interactive(matrix, word2int, int2word)
|
||||||
|
else:
|
||||||
|
echo predict(word, matrix, word2int, int2word)
|
||||||
|
|
||||||
|
when isMainModule:
|
||||||
|
main()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue