Spaces:
Runtime error
Runtime error
grosenthal
commited on
Commit
•
6873a01
1
Parent(s):
fdd800c
start work on explainer page
Browse files- app.py +1 -1
- src/aineid/src/{App.tsx → App.jsx} +11 -2
- src/aineid/src/ProcessExplainer.jsx +205 -0
- src/aineid/src/{TopBar.tsx → TopBar.jsx} +16 -2
app.py
CHANGED
@@ -250,7 +250,7 @@ def process_handler(text):
|
|
250 |
'base': text,
|
251 |
'morph': morph_text,
|
252 |
'morph_simplified': morph_simplified_text,
|
253 |
-
'
|
254 |
},
|
255 |
'tokenized':{
|
256 |
'base': tokenize_base(text),
|
|
|
250 |
'base': text,
|
251 |
'morph': morph_text,
|
252 |
'morph_simplified': morph_simplified_text,
|
253 |
+
'case': case_text
|
254 |
},
|
255 |
'tokenized':{
|
256 |
'base': tokenize_base(text),
|
src/aineid/src/{App.tsx → App.jsx}
RENAMED
@@ -1,4 +1,5 @@
|
|
1 |
import * as React from "react"
|
|
|
2 |
import {
|
3 |
ChakraProvider,
|
4 |
Box,
|
@@ -14,17 +15,25 @@ import { ColorModeSwitcher } from "./ColorModeSwitcher"
|
|
14 |
import { Logo } from "./Logo"
|
15 |
import TopBar from "./TopBar"
|
16 |
import MainPage from "./MainPage"
|
|
|
17 |
export const App = () => {
|
18 |
const bg = useColorModeValue("brand.800", "brand.200");
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
return (
|
21 |
<ChakraProvider theme={theme}>
|
22 |
<Flex direction="column" overflow='hidden'>
|
23 |
<Box>
|
24 |
-
<TopBar />
|
25 |
</Box>
|
26 |
<Box pt={73} mb={-73} >
|
27 |
-
|
|
|
|
|
28 |
</Box>
|
29 |
|
30 |
</Flex>
|
|
|
1 |
import * as React from "react"
|
2 |
+
import {useState} from 'react'
|
3 |
import {
|
4 |
ChakraProvider,
|
5 |
Box,
|
|
|
15 |
import { Logo } from "./Logo"
|
16 |
import TopBar from "./TopBar"
|
17 |
import MainPage from "./MainPage"
|
18 |
+
import ProcessExplainer from './ProcessExplainer'
|
19 |
export const App = () => {
|
20 |
const bg = useColorModeValue("brand.800", "brand.200");
|
21 |
+
const [currentPage, setCurrentPage] = useState("main")
|
22 |
+
const [processInputText, setProcessInputText] = useState("Post hoc, ergo propter hoc");
|
23 |
+
const [tokenized, setTokenized] = useState({});
|
24 |
+
const [isProcessLoading, setIsProcessLoading] = useState(false);
|
25 |
+
let processState = {processInputText, setProcessInputText, tokenized, setTokenized, isProcessLoading, setIsProcessLoading};
|
26 |
|
27 |
return (
|
28 |
<ChakraProvider theme={theme}>
|
29 |
<Flex direction="column" overflow='hidden'>
|
30 |
<Box>
|
31 |
+
<TopBar currentPage={currentPage} setCurrentPage={setCurrentPage}/>
|
32 |
</Box>
|
33 |
<Box pt={73} mb={-73} >
|
34 |
+
{
|
35 |
+
currentPage == 'main' ? <MainPage /> : <ProcessExplainer state={processState}/>
|
36 |
+
}
|
37 |
</Box>
|
38 |
|
39 |
</Flex>
|
src/aineid/src/ProcessExplainer.jsx
ADDED
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import React, { useState } from "react";
|
2 |
+
import {
|
3 |
+
Input,
|
4 |
+
useToast,
|
5 |
+
} from "@chakra-ui/react";
|
6 |
+
import {
|
7 |
+
Box,
|
8 |
+
Flex,
|
9 |
+
Heading,
|
10 |
+
useColorMode,
|
11 |
+
Textarea,
|
12 |
+
Text,
|
13 |
+
Button,
|
14 |
+
Spinner,
|
15 |
+
Alert,
|
16 |
+
AlertIcon,
|
17 |
+
Code,
|
18 |
+
Stack
|
19 |
+
} from "@chakra-ui/react";
|
20 |
+
import axios from "axios";
|
21 |
+
import { Link } from '@chakra-ui/react'
|
22 |
+
function ProcessExplainer({state}) {
|
23 |
+
|
24 |
+
let {processInputText, setProcessInputText, tokenized, setTokenized, isProcessLoading, setIsProcessLoading} = state;
|
25 |
+
// const [processInputText, setProcessInputText] = useState("Post hoc, ergo propter hoc");
|
26 |
+
// const [tokenized, setTokenized] = useState({});
|
27 |
+
// const [isProcessLoading, setIsProcessLoading] = useState(false);
|
28 |
+
const { colorMode } = useColorMode();
|
29 |
+
|
30 |
+
const bgColor = { light: "white", dark: "gray.800" };
|
31 |
+
const placeholderColor = { light: "gray.400", dark: "gray.500" };
|
32 |
+
|
33 |
+
const toast = useToast();
|
34 |
+
|
35 |
+
const handleInputChange = (event) => {
|
36 |
+
setProcessInputText(event.target.value);
|
37 |
+
};
|
38 |
+
|
39 |
+
const nameMap = {
|
40 |
+
base: "Raw input",
|
41 |
+
case: "Case-Split",
|
42 |
+
morph: "Morphological Split",
|
43 |
+
morph_simplified: "Simplified Morphological Split"
|
44 |
+
};
|
45 |
+
|
46 |
+
const handleButtonClick = async () => {
|
47 |
+
setIsProcessLoading(true);
|
48 |
+
|
49 |
+
try {
|
50 |
+
axios
|
51 |
+
.get(`https://grosenthal-latin-english-eco.hf.space/process/?text=${processInputText}`)
|
52 |
+
.then((response) => {
|
53 |
+
setIsProcessLoading(false);
|
54 |
+
setTokenized(JSON.parse(response.data));
|
55 |
+
})
|
56 |
+
.catch((error) => {
|
57 |
+
setIsProcessLoading(false);
|
58 |
+
console.error(error);
|
59 |
+
});
|
60 |
+
|
61 |
+
|
62 |
+
} catch (error) {
|
63 |
+
toast({
|
64 |
+
title: "Error",
|
65 |
+
description: error.message,
|
66 |
+
status: "error",
|
67 |
+
duration: 5000,
|
68 |
+
isClosable: true,
|
69 |
+
});
|
70 |
+
}
|
71 |
+
|
72 |
+
setIsProcessLoading(false);
|
73 |
+
};
|
74 |
+
|
75 |
+
return (
|
76 |
+
<Box mt="0rem">
|
77 |
+
<Flex
|
78 |
+
justify="center"
|
79 |
+
minH="100vh"
|
80 |
+
bgGradient={
|
81 |
+
colorMode == "dark"
|
82 |
+
? "linear(to-b, teal.400, teal.800)"
|
83 |
+
: "linear(red.100 0%, orange.100 25%, yellow.100 50%)"
|
84 |
+
}
|
85 |
+
mt={0}
|
86 |
+
>
|
87 |
+
<Flex direction="column" align="center">
|
88 |
+
<Box maxW="6xl" p={8} rounded="md" textAlign="center" mb={4}>
|
89 |
+
<Box>
|
90 |
+
<Heading as="h2" size="4xl" mb={8}>
|
91 |
+
About
|
92 |
+
</Heading>
|
93 |
+
<Text fontSize="xl" mb={8}>
|
94 |
+
AI doesn't have an intrinsic understanding of language. Explore the process that your input undergoes to be translated.
|
95 |
+
</Text>
|
96 |
+
</Box>
|
97 |
+
</Box>
|
98 |
+
<Box
|
99 |
+
maxW="4xl"
|
100 |
+
bg={bgColor[colorMode]}
|
101 |
+
boxShadow="xl"
|
102 |
+
p={8}
|
103 |
+
rounded="md"
|
104 |
+
textAlign="center"
|
105 |
+
>
|
106 |
+
{!tokenized['processed_texts'] || isProcessLoading ? (
|
107 |
+
<Box>
|
108 |
+
<Textarea
|
109 |
+
placeholder="Latin Input"
|
110 |
+
size="lg"
|
111 |
+
_placeholder={{ color: placeholderColor[colorMode] }}
|
112 |
+
value={processInputText}
|
113 |
+
onChange={handleInputChange}
|
114 |
+
isInvalid={/\d/.test(processInputText)}
|
115 |
+
resize="vertical"
|
116 |
+
mb={4}
|
117 |
+
/>
|
118 |
+
{/\d/.test(processInputText) &&
|
119 |
+
<Alert status='warning' mb={6}>
|
120 |
+
<AlertIcon />
|
121 |
+
You have English numbers in your source text - these should not be found in Latin source. Either remove them or be prepared to receive corrupted translations.
|
122 |
+
</Alert>}
|
123 |
+
{
|
124 |
+
isProcessLoading ?
|
125 |
+
<Spinner size="xl" />
|
126 |
+
:
|
127 |
+
<Button colorScheme="teal" size="lg" onClick={handleButtonClick}>
|
128 |
+
Submit
|
129 |
+
</Button>
|
130 |
+
}
|
131 |
+
|
132 |
+
</Box>
|
133 |
+
) : (
|
134 |
+
<Box>
|
135 |
+
<Heading as="h2" size="xl" mb={4}>
|
136 |
+
Pre-processing
|
137 |
+
</Heading>
|
138 |
+
<Text textAlign={"left"}>
|
139 |
+
First, your text needs to be pre-processed. For the base model, this mostly consists of normalizing letters (turning а̄ into a and œ into oe).
|
140 |
+
However, for the other models, this is a far more advanced process. After normalizing, we use the tool {' '}
|
141 |
+
<Link color='teal.500' href='https://latin-words.com/'>
|
142 |
+
Whitaker's Words
|
143 |
+
</Link>
|
144 |
+
{' '} to perform lookups on each word. An example output for the word "amat" is:
|
145 |
+
<Stack direction='column' textAlign={"left"} mt="3" mb="3">
|
146 |
+
<Code>
|
147 |
+
am.at V 1 1 PRES ACTIVE IND 3 S
|
148 |
+
</Code>
|
149 |
+
<Code>
|
150 |
+
amo, amare, amavi, amatus V (1st) [XXXAO]
|
151 |
+
</Code>
|
152 |
+
<Code>
|
153 |
+
love, like; fall in love with; be fond of; have a tendency to;
|
154 |
+
</Code>
|
155 |
+
</Stack>
|
156 |
+
</Text>
|
157 |
+
{"\n"}
|
158 |
+
<Text textAlign={"left"}>
|
159 |
+
|
160 |
+
This provides us with extremely useful <Link color='teal.500' href='https://en.wikipedia.org/wiki/Morphology_(linguistics)'> Morphological Information</Link> on the word.
|
161 |
+
|
162 |
+
First, we get a <Text as="span" fontWeight={"bold"}>stem-case split:</Text> "am" is the stem and "at" is the case - all forms stemming from "am" will have similar root meaning (love), but will be different in their specific inflection implying voice, person, number, and more.
|
163 |
+
We use this information for the "Case-Split" model, by splitting every inflected word into two - the stem and the case marked by "CASE_" at the start. Words that do not have any variant forms stay the same. "amat" would be converted to "am CASE_at".
|
164 |
+
</Text>
|
165 |
+
|
166 |
+
<Text textAlign={"left"} mt="3">
|
167 |
+
|
168 |
+
We also get outright <Text as="span" fontWeight={"bold"}>morphological splits:</Text> everything on the top line of the Whitaker's Words output, with spaces removed - V11PRESACTIVEIND3S.
|
169 |
+
This seemingly nonsensical string of characters tells us that "amat" is a 3rd Person Singular Present Active Indicative form of "amo", which is a 1st conjugation 1st type verb. We want to provide all of this rich
|
170 |
+
information about the usage of the word to the model, so in the same way, we convert the words - this time to the root and morphology: "amat" turns into "am V11PRESACTIVEIND3S". Given that no Latin word
|
171 |
+
has a digit in it, we can automatically distinguish these words in the model without having to add CASE_ to the front of them.
|
172 |
+
We use this information for the "Morphological Split" model.
|
173 |
+
</Text>
|
174 |
+
|
175 |
+
<Text textAlign={"left"} mt="3">
|
176 |
+
|
177 |
+
It seems that the morphological information we acquire might be, indeed, a little too specialized - does the model really care whether the noun is 1st declension or 2nd?
|
178 |
+
To assuage those worries, we have a <Text as="span" fontWeight={"bold"}>simplified morphological splits</Text> model, which takes the information of the morphological split model and throws away all the unnecessary.
|
179 |
+
We make the morphological strings more readable and throw away the seemingly unecessary information, and result in "am Verb_Present_Active_Indicative_Third_Singular" - the morphology is still only one word, though.
|
180 |
+
</Text>
|
181 |
+
|
182 |
+
<Text textAlign={"left"} mt="3">
|
183 |
+
|
184 |
+
Your text, after pre-processing in the various methods, looks like:
|
185 |
+
{Object.entries(tokenized['processed_texts'] || {}).map(([key, value]) => (
|
186 |
+
<Box key={key} mb={4}>
|
187 |
+
<Text fontWeight="bold">{nameMap[key]}:</Text>
|
188 |
+
<Text>{value}</Text>
|
189 |
+
</Box>
|
190 |
+
))}
|
191 |
+
</Text>
|
192 |
+
<Button colorScheme="teal" size="lg" onClick={() => setTokenized({})}>
|
193 |
+
Translate another text
|
194 |
+
</Button>
|
195 |
+
|
196 |
+
</Box>
|
197 |
+
)}
|
198 |
+
</Box>
|
199 |
+
</Flex>
|
200 |
+
</Flex>
|
201 |
+
</Box>
|
202 |
+
);
|
203 |
+
}
|
204 |
+
|
205 |
+
export default ProcessExplainer;
|
src/aineid/src/{TopBar.tsx → TopBar.jsx}
RENAMED
@@ -2,7 +2,7 @@ import { Box, Flex, useColorMode } from "@chakra-ui/react";
|
|
2 |
import { ColorModeSwitcher } from "./ColorModeSwitcher";
|
3 |
import { Heading } from "@chakra-ui/layout";
|
4 |
|
5 |
-
function TopBar() {
|
6 |
const { colorMode } = useColorMode();
|
7 |
const textColor = {
|
8 |
light: "gray.700",
|
@@ -30,13 +30,27 @@ function TopBar() {
|
|
30 |
fontSize="xl"
|
31 |
fontWeight="bold"
|
32 |
color={textColor[colorMode]}
|
33 |
-
textAlign="center"
|
34 |
w="100%"
|
35 |
maxW="400px"
|
36 |
mx="auto"
|
|
|
37 |
>
|
38 |
AIneid
|
39 |
</Heading>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
<ColorModeSwitcher justifySelf="flex-end" />
|
41 |
</Flex>
|
42 |
</Box>
|
|
|
2 |
import { ColorModeSwitcher } from "./ColorModeSwitcher";
|
3 |
import { Heading } from "@chakra-ui/layout";
|
4 |
|
5 |
+
function TopBar({ currentPage, setCurrentPage }) {
|
6 |
const { colorMode } = useColorMode();
|
7 |
const textColor = {
|
8 |
light: "gray.700",
|
|
|
30 |
fontSize="xl"
|
31 |
fontWeight="bold"
|
32 |
color={textColor[colorMode]}
|
33 |
+
// textAlign="center"
|
34 |
w="100%"
|
35 |
maxW="400px"
|
36 |
mx="auto"
|
37 |
+
onClick={() => setCurrentPage("main")}
|
38 |
>
|
39 |
AIneid
|
40 |
</Heading>
|
41 |
+
|
42 |
+
<Heading
|
43 |
+
fontSize="l"
|
44 |
+
fontWeight="bold"
|
45 |
+
color={textColor[colorMode]}
|
46 |
+
textAlign="right"
|
47 |
+
w="100%"
|
48 |
+
maxW="400px"
|
49 |
+
mx="auto"
|
50 |
+
onClick={() => setCurrentPage("about")}
|
51 |
+
>
|
52 |
+
About
|
53 |
+
</Heading>
|
54 |
<ColorModeSwitcher justifySelf="flex-end" />
|
55 |
</Flex>
|
56 |
</Box>
|