Alexis Palmer commited on
Commit
397ad86
1 Parent(s): bb475f1

new mapudungun word list

Browse files
Files changed (2) hide show
  1. app.py +11 -6
  2. mapudungun.easy.filtered +71 -0
app.py CHANGED
@@ -4,14 +4,19 @@ import re
4
  import random
5
 
6
  ### load and prepare corpus
7
- corpus = util.load_raw_text(corpus_directory="map_avenue")
8
 
9
- corpus = corpus.lower()
10
- word_regex = r"[a-z]+"
11
- def tokenize(text: str):
12
- return re.findall(word_regex, text)
 
 
 
 
 
 
13
 
14
- words = tokenize(corpus)
15
 
16
  lexicon = set()
17
  for word in words:
 
4
  import random
5
 
6
  ### load and prepare corpus
7
+ #corpus = util.load_raw_text(corpus_directory="map_avenue")
8
 
9
+ corpus = util.load_single_raw_text_file("mapudungun.easy.filtered")
10
+
11
+ #corpus = corpus.lower()
12
+ #word_regex = r"[a-z]+"
13
+ #def tokenize(text: str):
14
+ # return re.findall(word_regex, text)
15
+
16
+ #words = tokenize(corpus)
17
+ words = corpus.split()
18
+ print(words)
19
 
 
20
 
21
  lexicon = set()
22
  for word in words:
mapudungun.easy.filtered ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diwlliñ
2
+ chod
3
+ kompañ
4
+ ñukekantu
5
+ kallfü
6
+ chaw
7
+ epu
8
+ mellfü
9
+ küla
10
+ kawellu
11
+ wün'
12
+ üñüm
13
+ kinchika
14
+ namün'
15
+ kiñe
16
+ kelüchod
17
+ mari
18
+ kal'
19
+ pütra
20
+ yu
21
+ lig
22
+ ufisa
23
+ pangi
24
+ ñuke
25
+ challwa
26
+ chawkantu
27
+ karü
28
+ kadü
29
+ milla
30
+ kurü
31
+ küwü
32
+ kolü
33
+ kelü
34
+ waka
35
+ dewü
36
+ ñarki
37
+ kapüra
38
+ llampüdken
39
+ llalliñ
40
+ fallke
41
+ kechu
42
+ pilun
43
+ ligkelü
44
+ aylla
45
+ pura
46
+ paine
47
+ nawel
48
+ awawe
49
+ pel'
50
+ mañke
51
+ trulitruli
52
+ longko
53
+ wala
54
+ trewa
55
+ achawall
56
+ meli
57
+ chang
58
+ regle
59
+ kayu
60
+ foro
61
+ lien
62
+ filu
63
+ kewün'
64
+ peyu
65
+ rukü
66
+ luku
67
+ furi
68
+ lipang
69
+ koneku
70
+ nge
71
+ sañwe