pierreguillou
commited on
Commit
•
e2bf74f
1
Parent(s):
5947c6a
Upload 6 files
Browse files- files/blank.pdf +0 -0
- files/blank.png +0 -0
- files/example.pdf +0 -0
- files/languages_iso.csv +184 -0
- files/languages_tesseract.csv +127 -0
- files/wo_content.png +0 -0
files/blank.pdf
ADDED
Binary file (1.15 kB). View file
|
|
files/blank.png
ADDED
files/example.pdf
ADDED
Binary file (343 kB). View file
|
|
files/languages_iso.csv
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Language,LangCode
|
2 |
+
Abkhazian,ab
|
3 |
+
Afar,aa
|
4 |
+
Afrikaans,af
|
5 |
+
Akan,ak
|
6 |
+
Albanian,sq
|
7 |
+
Amharic,am
|
8 |
+
Arabic,ar
|
9 |
+
Aragonese,an
|
10 |
+
Armenian,hy
|
11 |
+
Assamese,as
|
12 |
+
Avaric,av
|
13 |
+
Avestan,ae
|
14 |
+
Aymara,ay
|
15 |
+
Azerbaijani,az
|
16 |
+
Bambara,bm
|
17 |
+
Bashkir,ba
|
18 |
+
Basque,eu
|
19 |
+
Belarusian,be
|
20 |
+
Bengali,bn
|
21 |
+
Bislama,bi
|
22 |
+
Bosnian,bs
|
23 |
+
Breton,br
|
24 |
+
Bulgarian,bg
|
25 |
+
Burmese,my
|
26 |
+
"Catalan, Valencian",ca
|
27 |
+
Chamorro,ch
|
28 |
+
Chechen,ce
|
29 |
+
"Chichewa, Chewa, Nyanja",ny
|
30 |
+
Chinese,zh
|
31 |
+
"Church Slavonic, Old Slavonic, Old Church Slavonic",cu
|
32 |
+
Chuvash,cv
|
33 |
+
Cornish,kw
|
34 |
+
Corsican,co
|
35 |
+
Cree,cr
|
36 |
+
Croatian,hr
|
37 |
+
Czech,cs
|
38 |
+
Danish,da
|
39 |
+
"Divehi, Dhivehi, Maldivian",dv
|
40 |
+
"Dutch, Flemish",nl
|
41 |
+
Dzongkha,dz
|
42 |
+
English,en
|
43 |
+
Esperanto,eo
|
44 |
+
Estonian,et
|
45 |
+
Ewe,ee
|
46 |
+
Faroese,fo
|
47 |
+
Fijian,fj
|
48 |
+
Finnish,fi
|
49 |
+
French,fr
|
50 |
+
Western Frisian,fy
|
51 |
+
Fulah,ff
|
52 |
+
"Gaelic, Scottish Gaelic",gd
|
53 |
+
Galician,gl
|
54 |
+
Ganda,lg
|
55 |
+
Georgian,ka
|
56 |
+
German,de
|
57 |
+
"Greek, Modern (1453–)",el
|
58 |
+
"Kalaallisut, Greenlandic",kl
|
59 |
+
Guarani,gn
|
60 |
+
Gujarati,gu
|
61 |
+
"Haitian, Haitian Creole",ht
|
62 |
+
Hausa,ha
|
63 |
+
Hebrew,he
|
64 |
+
Herero,hz
|
65 |
+
Hindi,hi
|
66 |
+
Hiri Motu,ho
|
67 |
+
Hungarian,hu
|
68 |
+
Icelandic,is
|
69 |
+
Ido,io
|
70 |
+
Igbo,ig
|
71 |
+
Indonesian,id
|
72 |
+
Interlingua (International Auxiliary Language Association),ia
|
73 |
+
"Interlingue, Occidental",ie
|
74 |
+
Inuktitut,iu
|
75 |
+
Inupiaq,ik
|
76 |
+
Irish,ga
|
77 |
+
Italian,it
|
78 |
+
Japanese,ja
|
79 |
+
Javanese,jv
|
80 |
+
Kannada,kn
|
81 |
+
Kanuri,kr
|
82 |
+
Kashmiri,ks
|
83 |
+
Kazakh,kk
|
84 |
+
Central Khmer,km
|
85 |
+
"Kikuyu, Gikuyu",ki
|
86 |
+
Kinyarwanda,rw
|
87 |
+
"Kirghiz, Kyrgyz",ky
|
88 |
+
Komi,kv
|
89 |
+
Kongo,kg
|
90 |
+
Korean,ko
|
91 |
+
"Kuanyama, Kwanyama",kj
|
92 |
+
Kurdish,ku
|
93 |
+
Lao,lo
|
94 |
+
Latin,la
|
95 |
+
Latvian,lv
|
96 |
+
"Limburgan, Limburger, Limburgish",li
|
97 |
+
Lingala,ln
|
98 |
+
Lithuanian,lt
|
99 |
+
Luba-Katanga,lu
|
100 |
+
"Luxembourgish, Letzeburgesch",lb
|
101 |
+
Macedonian,mk
|
102 |
+
Malagasy,mg
|
103 |
+
Malay,ms
|
104 |
+
Malayalam,ml
|
105 |
+
Maltese,mt
|
106 |
+
Manx,gv
|
107 |
+
Maori,mi
|
108 |
+
Marathi,mr
|
109 |
+
Marshallese,mh
|
110 |
+
Mongolian,mn
|
111 |
+
Nauru,na
|
112 |
+
"Navajo, Navaho",nv
|
113 |
+
North Ndebele,nd
|
114 |
+
South Ndebele,nr
|
115 |
+
Ndonga,ng
|
116 |
+
Nepali,ne
|
117 |
+
Norwegian,no
|
118 |
+
Norwegian Bokmål,nb
|
119 |
+
Norwegian Nynorsk,nn
|
120 |
+
"Sichuan Yi, Nuosu",ii
|
121 |
+
Occitan,oc
|
122 |
+
Ojibwa,oj
|
123 |
+
Oriya,or
|
124 |
+
Oromo,om
|
125 |
+
"Ossetian, Ossetic",os
|
126 |
+
Pali,pi
|
127 |
+
"Pashto, Pushto",ps
|
128 |
+
Persian,fa
|
129 |
+
Polish,pl
|
130 |
+
Portuguese,pt
|
131 |
+
"Punjabi, Panjabi",pa
|
132 |
+
Quechua,qu
|
133 |
+
"Romanian, Moldavian, Moldovan",ro
|
134 |
+
Romansh,rm
|
135 |
+
Rundi,rn
|
136 |
+
Russian,ru
|
137 |
+
Northern Sami,se
|
138 |
+
Samoan,sm
|
139 |
+
Sango,sg
|
140 |
+
Sanskrit,sa
|
141 |
+
Sardinian,sc
|
142 |
+
Serbian,sr
|
143 |
+
Shona,sn
|
144 |
+
Sindhi,sd
|
145 |
+
"Sinhala, Sinhalese",si
|
146 |
+
Slovak,sk
|
147 |
+
Slovenian,sl
|
148 |
+
Somali,so
|
149 |
+
Southern Sotho,st
|
150 |
+
"Spanish, Castilian",es
|
151 |
+
Sundanese,su
|
152 |
+
Swahili,sw
|
153 |
+
Swati,ss
|
154 |
+
Swedish,sv
|
155 |
+
Tagalog,tl
|
156 |
+
Tahitian,ty
|
157 |
+
Tajik,tg
|
158 |
+
Tamil,ta
|
159 |
+
Tatar,tt
|
160 |
+
Telugu,te
|
161 |
+
Thai,th
|
162 |
+
Tibetan,bo
|
163 |
+
Tigrinya,ti
|
164 |
+
Tonga (Tonga Islands),to
|
165 |
+
Tsonga,ts
|
166 |
+
Tswana,tn
|
167 |
+
Turkish,tr
|
168 |
+
Turkmen,tk
|
169 |
+
Twi,tw
|
170 |
+
"Uighur, Uyghur",ug
|
171 |
+
Ukrainian,uk
|
172 |
+
Urdu,ur
|
173 |
+
Uzbek,uz
|
174 |
+
Venda,ve
|
175 |
+
Vietnamese,vi
|
176 |
+
Volapük,vo
|
177 |
+
Walloon,wa
|
178 |
+
Welsh,cy
|
179 |
+
Wolof,wo
|
180 |
+
Xhosa,xh
|
181 |
+
Yiddish,yi
|
182 |
+
Yoruba,yo
|
183 |
+
"Zhuang, Chuang",za
|
184 |
+
Zulu,zu
|
files/languages_tesseract.csv
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Language,LangCode
|
2 |
+
Afrikaans,afr
|
3 |
+
Amharic,amh
|
4 |
+
Arabic,ara
|
5 |
+
Assamese,asm
|
6 |
+
Azerbaijani,aze
|
7 |
+
Azerbaijani - Cyrilic,aze_cyrl
|
8 |
+
Belarusian,bel
|
9 |
+
Bengali,ben
|
10 |
+
Tibetan,bod
|
11 |
+
Bosnian,bos
|
12 |
+
Breton,bre
|
13 |
+
Bulgarian,bul
|
14 |
+
Catalan; Valencian,cat
|
15 |
+
Cebuano,ceb
|
16 |
+
Czech,ces
|
17 |
+
Chinese - Simplified,chi_sim
|
18 |
+
Chinese - Traditional,chi_tra
|
19 |
+
Cherokee,chr
|
20 |
+
Corsican,cos
|
21 |
+
Welsh,cym
|
22 |
+
Danish,dan
|
23 |
+
Danish - Fraktur (contrib),dan_frak
|
24 |
+
German,deu
|
25 |
+
German - Fraktur (contrib),deu_frak
|
26 |
+
Dzongkha,dzo
|
27 |
+
"Greek, Modern (1453-)",ell
|
28 |
+
English,eng
|
29 |
+
"English, Middle (1100-1500)",enm
|
30 |
+
Esperanto,epo
|
31 |
+
Math / equation detection module,equ
|
32 |
+
Estonian,est
|
33 |
+
Basque,eus
|
34 |
+
Faroese,fao
|
35 |
+
Persian,fas
|
36 |
+
Filipino (old - Tagalog),fil
|
37 |
+
Finnish,fin
|
38 |
+
French,fra
|
39 |
+
German - Fraktur,frk
|
40 |
+
"French, Middle (ca.1400-1600)",frm
|
41 |
+
Western Frisian,fry
|
42 |
+
Scottish Gaelic,gla
|
43 |
+
Irish,gle
|
44 |
+
Galician,glg
|
45 |
+
"Greek, Ancient (to 1453) (contrib)",grc
|
46 |
+
Gujarati,guj
|
47 |
+
Haitian; Haitian Creole,hat
|
48 |
+
Hebrew,heb
|
49 |
+
Hindi,hin
|
50 |
+
Croatian,hrv
|
51 |
+
Hungarian,hun
|
52 |
+
Armenian,hye
|
53 |
+
Inuktitut,iku
|
54 |
+
Indonesian,ind
|
55 |
+
Icelandic,isl
|
56 |
+
Italian,ita
|
57 |
+
Italian - Old,ita_old
|
58 |
+
Javanese,jav
|
59 |
+
Japanese,jpn
|
60 |
+
Kannada,kan
|
61 |
+
Georgian,kat
|
62 |
+
Georgian - Old,kat_old
|
63 |
+
Kazakh,kaz
|
64 |
+
Central Khmer,khm
|
65 |
+
Kirghiz; Kyrgyz,kir
|
66 |
+
Kurmanji (Kurdish - Latin Script),kmr
|
67 |
+
Korean,kor
|
68 |
+
Korean (vertical),kor_vert
|
69 |
+
Kurdish (Arabic Script),kur
|
70 |
+
Lao,lao
|
71 |
+
Latin,lat
|
72 |
+
Latvian,lav
|
73 |
+
Lithuanian,lit
|
74 |
+
Luxembourgish,ltz
|
75 |
+
Malayalam,mal
|
76 |
+
Marathi,mar
|
77 |
+
Macedonian,mkd
|
78 |
+
Maltese,mlt
|
79 |
+
Mongolian,mon
|
80 |
+
Maori,mri
|
81 |
+
Malay,msa
|
82 |
+
Burmese,mya
|
83 |
+
Nepali,nep
|
84 |
+
Dutch; Flemish,nld
|
85 |
+
Norwegian,nor
|
86 |
+
Occitan (post 1500),oci
|
87 |
+
Oriya,ori
|
88 |
+
Orientation and script detection module,osd
|
89 |
+
Panjabi; Punjabi,pan
|
90 |
+
Polish,pol
|
91 |
+
Portuguese,por
|
92 |
+
Pushto; Pashto,pus
|
93 |
+
Quechua,que
|
94 |
+
Romanian; Moldavian; Moldovan,ron
|
95 |
+
Russian,rus
|
96 |
+
Sanskrit,san
|
97 |
+
Sinhala; Sinhalese,sin
|
98 |
+
Slovak,slk
|
99 |
+
Slovak - Fraktur (contrib),slk_frak
|
100 |
+
Slovenian,slv
|
101 |
+
Sindhi,snd
|
102 |
+
Spanish; Castilian,spa
|
103 |
+
Spanish; Castilian - Old,spa_old
|
104 |
+
Albanian,sqi
|
105 |
+
Serbian,srp
|
106 |
+
Serbian - Latin,srp_latn
|
107 |
+
Sundanese,sun
|
108 |
+
Swahili,swa
|
109 |
+
Swedish,swe
|
110 |
+
Syriac,syr
|
111 |
+
Tamil,tam
|
112 |
+
Tatar,tat
|
113 |
+
Telugu,tel
|
114 |
+
Tajik,tgk
|
115 |
+
Tagalog (new - Filipino),tgl
|
116 |
+
Thai,tha
|
117 |
+
Tigrinya,tir
|
118 |
+
Tonga,ton
|
119 |
+
Turkish,tur
|
120 |
+
Uighur; Uyghur,uig
|
121 |
+
Ukrainian,ukr
|
122 |
+
Urdu,urd
|
123 |
+
Uzbek,uzb
|
124 |
+
Uzbek - Cyrilic,uzb_cyrl
|
125 |
+
Vietnamese,vie
|
126 |
+
Yiddish,yid
|
127 |
+
Yoruba,yor
|
files/wo_content.png
ADDED