grosenthal commited on
Commit
6873a01
1 Parent(s): fdd800c

start work on explainer page

Browse files
app.py CHANGED
@@ -250,7 +250,7 @@ def process_handler(text):
250
  'base': text,
251
  'morph': morph_text,
252
  'morph_simplified': morph_simplified_text,
253
- 'split': case_text
254
  },
255
  'tokenized':{
256
  'base': tokenize_base(text),
 
250
  'base': text,
251
  'morph': morph_text,
252
  'morph_simplified': morph_simplified_text,
253
+ 'case': case_text
254
  },
255
  'tokenized':{
256
  'base': tokenize_base(text),
src/aineid/src/{App.tsx → App.jsx} RENAMED
@@ -1,4 +1,5 @@
1
  import * as React from "react"
 
2
  import {
3
  ChakraProvider,
4
  Box,
@@ -14,17 +15,25 @@ import { ColorModeSwitcher } from "./ColorModeSwitcher"
14
  import { Logo } from "./Logo"
15
  import TopBar from "./TopBar"
16
  import MainPage from "./MainPage"
 
17
  export const App = () => {
18
  const bg = useColorModeValue("brand.800", "brand.200");
 
 
 
 
 
19
 
20
  return (
21
  <ChakraProvider theme={theme}>
22
  <Flex direction="column" overflow='hidden'>
23
  <Box>
24
- <TopBar />
25
  </Box>
26
  <Box pt={73} mb={-73} >
27
- <MainPage />
 
 
28
  </Box>
29
 
30
  </Flex>
 
1
  import * as React from "react"
2
+ import {useState} from 'react'
3
  import {
4
  ChakraProvider,
5
  Box,
 
15
  import { Logo } from "./Logo"
16
  import TopBar from "./TopBar"
17
  import MainPage from "./MainPage"
18
+ import ProcessExplainer from './ProcessExplainer'
19
  export const App = () => {
20
  const bg = useColorModeValue("brand.800", "brand.200");
21
+ const [currentPage, setCurrentPage] = useState("main")
22
+ const [processInputText, setProcessInputText] = useState("Post hoc, ergo propter hoc");
23
+ const [tokenized, setTokenized] = useState({});
24
+ const [isProcessLoading, setIsProcessLoading] = useState(false);
25
+ let processState = {processInputText, setProcessInputText, tokenized, setTokenized, isProcessLoading, setIsProcessLoading};
26
 
27
  return (
28
  <ChakraProvider theme={theme}>
29
  <Flex direction="column" overflow='hidden'>
30
  <Box>
31
+ <TopBar currentPage={currentPage} setCurrentPage={setCurrentPage}/>
32
  </Box>
33
  <Box pt={73} mb={-73} >
34
+ {
35
+ currentPage == 'main' ? <MainPage /> : <ProcessExplainer state={processState}/>
36
+ }
37
  </Box>
38
 
39
  </Flex>
src/aineid/src/ProcessExplainer.jsx ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState } from "react";
2
+ import {
3
+ Input,
4
+ useToast,
5
+ } from "@chakra-ui/react";
6
+ import {
7
+ Box,
8
+ Flex,
9
+ Heading,
10
+ useColorMode,
11
+ Textarea,
12
+ Text,
13
+ Button,
14
+ Spinner,
15
+ Alert,
16
+ AlertIcon,
17
+ Code,
18
+ Stack
19
+ } from "@chakra-ui/react";
20
+ import axios from "axios";
21
+ import { Link } from '@chakra-ui/react'
22
+ function ProcessExplainer({state}) {
23
+
24
+ let {processInputText, setProcessInputText, tokenized, setTokenized, isProcessLoading, setIsProcessLoading} = state;
25
+ // const [processInputText, setProcessInputText] = useState("Post hoc, ergo propter hoc");
26
+ // const [tokenized, setTokenized] = useState({});
27
+ // const [isProcessLoading, setIsProcessLoading] = useState(false);
28
+ const { colorMode } = useColorMode();
29
+
30
+ const bgColor = { light: "white", dark: "gray.800" };
31
+ const placeholderColor = { light: "gray.400", dark: "gray.500" };
32
+
33
+ const toast = useToast();
34
+
35
+ const handleInputChange = (event) => {
36
+ setProcessInputText(event.target.value);
37
+ };
38
+
39
+ const nameMap = {
40
+ base: "Raw input",
41
+ case: "Case-Split",
42
+ morph: "Morphological Split",
43
+ morph_simplified: "Simplified Morphological Split"
44
+ };
45
+
46
+ const handleButtonClick = async () => {
47
+ setIsProcessLoading(true);
48
+
49
+ try {
50
+ axios
51
+ .get(`https://grosenthal-latin-english-eco.hf.space/process/?text=${processInputText}`)
52
+ .then((response) => {
53
+ setIsProcessLoading(false);
54
+ setTokenized(JSON.parse(response.data));
55
+ })
56
+ .catch((error) => {
57
+ setIsProcessLoading(false);
58
+ console.error(error);
59
+ });
60
+
61
+
62
+ } catch (error) {
63
+ toast({
64
+ title: "Error",
65
+ description: error.message,
66
+ status: "error",
67
+ duration: 5000,
68
+ isClosable: true,
69
+ });
70
+ }
71
+
72
+ setIsProcessLoading(false);
73
+ };
74
+
75
+ return (
76
+ <Box mt="0rem">
77
+ <Flex
78
+ justify="center"
79
+ minH="100vh"
80
+ bgGradient={
81
+ colorMode == "dark"
82
+ ? "linear(to-b, teal.400, teal.800)"
83
+ : "linear(red.100 0%, orange.100 25%, yellow.100 50%)"
84
+ }
85
+ mt={0}
86
+ >
87
+ <Flex direction="column" align="center">
88
+ <Box maxW="6xl" p={8} rounded="md" textAlign="center" mb={4}>
89
+ <Box>
90
+ <Heading as="h2" size="4xl" mb={8}>
91
+ About
92
+ </Heading>
93
+ <Text fontSize="xl" mb={8}>
94
+ AI doesn't have an intrinsic understanding of language. Explore the process that your input undergoes to be translated.
95
+ </Text>
96
+ </Box>
97
+ </Box>
98
+ <Box
99
+ maxW="4xl"
100
+ bg={bgColor[colorMode]}
101
+ boxShadow="xl"
102
+ p={8}
103
+ rounded="md"
104
+ textAlign="center"
105
+ >
106
+ {!tokenized['processed_texts'] || isProcessLoading ? (
107
+ <Box>
108
+ <Textarea
109
+ placeholder="Latin Input"
110
+ size="lg"
111
+ _placeholder={{ color: placeholderColor[colorMode] }}
112
+ value={processInputText}
113
+ onChange={handleInputChange}
114
+ isInvalid={/\d/.test(processInputText)}
115
+ resize="vertical"
116
+ mb={4}
117
+ />
118
+ {/\d/.test(processInputText) &&
119
+ <Alert status='warning' mb={6}>
120
+ <AlertIcon />
121
+ You have English numbers in your source text - these should not be found in Latin source. Either remove them or be prepared to receive corrupted translations.
122
+ </Alert>}
123
+ {
124
+ isProcessLoading ?
125
+ <Spinner size="xl" />
126
+ :
127
+ <Button colorScheme="teal" size="lg" onClick={handleButtonClick}>
128
+ Submit
129
+ </Button>
130
+ }
131
+
132
+ </Box>
133
+ ) : (
134
+ <Box>
135
+ <Heading as="h2" size="xl" mb={4}>
136
+ Pre-processing
137
+ </Heading>
138
+ <Text textAlign={"left"}>
139
+ First, your text needs to be pre-processed. For the base model, this mostly consists of normalizing letters (turning а̄ into a and œ into oe).
140
+ However, for the other models, this is a far more advanced process. After normalizing, we use the tool {' '}
141
+ <Link color='teal.500' href='https://latin-words.com/'>
142
+ Whitaker's Words
143
+ </Link>
144
+ {' '} to perform lookups on each word. An example output for the word "amat" is:
145
+ <Stack direction='column' textAlign={"left"} mt="3" mb="3">
146
+ <Code>
147
+ am.at V 1 1 PRES ACTIVE IND 3 S
148
+ </Code>
149
+ <Code>
150
+ amo, amare, amavi, amatus V (1st) [XXXAO]
151
+ </Code>
152
+ <Code>
153
+ love, like; fall in love with; be fond of; have a tendency to;
154
+ </Code>
155
+ </Stack>
156
+ </Text>
157
+ {"\n"}
158
+ <Text textAlign={"left"}>
159
+
160
+ This provides us with extremely useful <Link color='teal.500' href='https://en.wikipedia.org/wiki/Morphology_(linguistics)'> Morphological Information</Link> on the word.
161
+
162
+ First, we get a <Text as="span" fontWeight={"bold"}>stem-case split:</Text> "am" is the stem and "at" is the case - all forms stemming from "am" will have similar root meaning (love), but will be different in their specific inflection implying voice, person, number, and more.
163
+ We use this information for the "Case-Split" model, by splitting every inflected word into two - the stem and the case marked by "CASE_" at the start. Words that do not have any variant forms stay the same. "amat" would be converted to "am CASE_at".
164
+ </Text>
165
+
166
+ <Text textAlign={"left"} mt="3">
167
+
168
+ We also get outright <Text as="span" fontWeight={"bold"}>morphological splits:</Text> everything on the top line of the Whitaker's Words output, with spaces removed - V11PRESACTIVEIND3S.
169
+ This seemingly nonsensical string of characters tells us that "amat" is a 3rd Person Singular Present Active Indicative form of "amo", which is a 1st conjugation 1st type verb. We want to provide all of this rich
170
+ information about the usage of the word to the model, so in the same way, we convert the words - this time to the root and morphology: "amat" turns into "am V11PRESACTIVEIND3S". Given that no Latin word
171
+ has a digit in it, we can automatically distinguish these words in the model without having to add CASE_ to the front of them.
172
+ We use this information for the "Morphological Split" model.
173
+ </Text>
174
+
175
+ <Text textAlign={"left"} mt="3">
176
+
177
+ It seems that the morphological information we acquire might be, indeed, a little too specialized - does the model really care whether the noun is 1st declension or 2nd?
178
+ To assuage those worries, we have a <Text as="span" fontWeight={"bold"}>simplified morphological splits</Text> model, which takes the information of the morphological split model and throws away all the unnecessary.
179
+ We make the morphological strings more readable and throw away the seemingly unecessary information, and result in "am Verb_Present_Active_Indicative_Third_Singular" - the morphology is still only one word, though.
180
+ </Text>
181
+
182
+ <Text textAlign={"left"} mt="3">
183
+
184
+ Your text, after pre-processing in the various methods, looks like:
185
+ {Object.entries(tokenized['processed_texts'] || {}).map(([key, value]) => (
186
+ <Box key={key} mb={4}>
187
+ <Text fontWeight="bold">{nameMap[key]}:</Text>
188
+ <Text>{value}</Text>
189
+ </Box>
190
+ ))}
191
+ </Text>
192
+ <Button colorScheme="teal" size="lg" onClick={() => setTokenized({})}>
193
+ Translate another text
194
+ </Button>
195
+
196
+ </Box>
197
+ )}
198
+ </Box>
199
+ </Flex>
200
+ </Flex>
201
+ </Box>
202
+ );
203
+ }
204
+
205
+ export default ProcessExplainer;
src/aineid/src/{TopBar.tsx → TopBar.jsx} RENAMED
@@ -2,7 +2,7 @@ import { Box, Flex, useColorMode } from "@chakra-ui/react";
2
  import { ColorModeSwitcher } from "./ColorModeSwitcher";
3
  import { Heading } from "@chakra-ui/layout";
4
 
5
- function TopBar() {
6
  const { colorMode } = useColorMode();
7
  const textColor = {
8
  light: "gray.700",
@@ -30,13 +30,27 @@ function TopBar() {
30
  fontSize="xl"
31
  fontWeight="bold"
32
  color={textColor[colorMode]}
33
- textAlign="center"
34
  w="100%"
35
  maxW="400px"
36
  mx="auto"
 
37
  >
38
  AIneid
39
  </Heading>
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  <ColorModeSwitcher justifySelf="flex-end" />
41
  </Flex>
42
  </Box>
 
2
  import { ColorModeSwitcher } from "./ColorModeSwitcher";
3
  import { Heading } from "@chakra-ui/layout";
4
 
5
+ function TopBar({ currentPage, setCurrentPage }) {
6
  const { colorMode } = useColorMode();
7
  const textColor = {
8
  light: "gray.700",
 
30
  fontSize="xl"
31
  fontWeight="bold"
32
  color={textColor[colorMode]}
33
+ // textAlign="center"
34
  w="100%"
35
  maxW="400px"
36
  mx="auto"
37
+ onClick={() => setCurrentPage("main")}
38
  >
39
  AIneid
40
  </Heading>
41
+
42
+ <Heading
43
+ fontSize="l"
44
+ fontWeight="bold"
45
+ color={textColor[colorMode]}
46
+ textAlign="right"
47
+ w="100%"
48
+ maxW="400px"
49
+ mx="auto"
50
+ onClick={() => setCurrentPage("about")}
51
+ >
52
+ About
53
+ </Heading>
54
  <ColorModeSwitcher justifySelf="flex-end" />
55
  </Flex>
56
  </Box>