'
+ image_html = "
"
+
+ for path in [Path(f"characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
+ if path.exists():
+ image_html = f'
})
'
+ break
+
+ container_html += f'{image_html}
{character}'
+ container_html += "
"
+ cards.append([container_html, character])
+
+ return cards
+
+
+def select_character(evt: gr.SelectData):
+ return (evt.value[1])
+
+
+def ui():
+ with gr.Accordion("Character gallery", open=False):
+ update = gr.Button("Refresh")
+ gr.HTML(value="")
+ gallery = gr.Dataset(components=[gr.HTML(visible=False)],
+ label="",
+ samples=generate_html(),
+ elem_classes=["character-gallery"],
+ samples_per_page=50
+ )
+ update.click(generate_html, [], gallery)
+ gallery.select(select_character, None, gradio['character_menu'])
diff --git a/extensions/google_translate/requirements.txt b/extensions/google_translate/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..554a00df62818f96ba7d396ae39d8e58efbe9bfe
--- /dev/null
+++ b/extensions/google_translate/requirements.txt
@@ -0,0 +1 @@
+deep-translator==1.9.2
diff --git a/extensions/google_translate/script.py b/extensions/google_translate/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dfdbcd0c0a9b889497dc2a147007e997c3cda80
--- /dev/null
+++ b/extensions/google_translate/script.py
@@ -0,0 +1,56 @@
+import gradio as gr
+from deep_translator import GoogleTranslator
+
+params = {
+ "activate": True,
+ "language string": "ja",
+}
+
+language_codes = {'Afrikaans': 'af', 'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Azerbaijani': 'az', 'Basque': 'eu', 'Belarusian': 'be', 'Bengali': 'bn', 'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chinese (Simplified)': 'zh-CN', 'Chinese (Traditional)': 'zh-TW', 'Corsican': 'co', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', 'Dutch': 'nl', 'English': 'en', 'Esperanto': 'eo', 'Estonian': 'et', 'Finnish': 'fi', 'French': 'fr', 'Frisian': 'fy', 'Galician': 'gl', 'Georgian': 'ka', 'German': 'de', 'Greek': 'el', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw', 'Hebrew': 'iw', 'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Indonesian': 'id', 'Irish': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': 'jw', 'Kannada': 'kn', 'Kazakh': 'kk', 'Khmer': 'km', 'Korean': 'ko', 'Kurdish': 'ku', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la', 'Latvian': 'lv', 'Lithuanian': 'lt', 'Luxembourgish': 'lb', 'Macedonian': 'mk', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi', 'Marathi': 'mr', 'Mongolian': 'mn', 'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Norwegian': 'no', 'Nyanja (Chichewa)': 'ny', 'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese (Portugal, Brazil)': 'pt', 'Punjabi': 'pa', 'Romanian': 'ro', 'Russian': 'ru', 'Samoan': 'sm', 'Scots Gaelic': 'gd', 'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala (Sinhalese)': 'si', 'Slovak': 'sk', 'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv', 'Tagalog (Filipino)': 'tl', 'Tajik': 'tg', 'Tamil': 'ta', 'Telugu': 'te', 'Thai': 'th', 'Turkish': 'tr', 'Ukrainian': 'uk', 'Urdu': 'ur', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo', 'Zulu': 'zu'}
+
+
+def input_modifier(string):
+ """
+ This function is applied to your text inputs before
+ they are fed into the model.
+ """
+ if not params['activate']:
+ return string
+
+ return GoogleTranslator(source=params['language string'], target='en').translate(string)
+
+
+def output_modifier(string):
+ """
+ This function is applied to the model outputs.
+ """
+ if not params['activate']:
+ return string
+
+ return GoogleTranslator(source='en', target=params['language string']).translate(string)
+
+
+def bot_prefix_modifier(string):
+ """
+ This function is only applied in chat mode. It modifies
+ the prefix text for the Bot and can be used to bias its
+ behavior.
+ """
+
+ return string
+
+
+def ui():
+ # Finding the language name from the language code to use as the default value
+ language_name = list(language_codes.keys())[list(language_codes.values()).index(params['language string'])]
+
+ # Gradio elements
+ with gr.Row():
+ activate = gr.Checkbox(value=params['activate'], label='Activate translation')
+
+ with gr.Row():
+ language = gr.Dropdown(value=language_name, choices=[k for k in language_codes], label='Language')
+
+ # Event functions to update the parameters in the backend
+ activate.change(lambda x: params.update({"activate": x}), activate, None)
+ language.change(lambda x: params.update({"language string": language_codes[x]}), language, None)
diff --git a/extensions/llava/script.py b/extensions/llava/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..781d584b78ebf8e7c0c87e4203665286b92cf81c
--- /dev/null
+++ b/extensions/llava/script.py
@@ -0,0 +1,8 @@
+import gradio as gr
+
+from modules.logging_colors import logger
+
+
+def ui():
+ gr.Markdown("### This extension is deprecated, use \"multimodal\" extension instead")
+ logger.error("LLaVA extension is deprecated, use \"multimodal\" extension instead")
diff --git a/extensions/multimodal/DOCS.md b/extensions/multimodal/DOCS.md
new file mode 100644
index 0000000000000000000000000000000000000000..eaa4365e9a304a14ebbdb1d4d435f3a2a1f7a7d2
--- /dev/null
+++ b/extensions/multimodal/DOCS.md
@@ -0,0 +1,85 @@
+# Technical description of multimodal extension
+
+## Working principle
+Multimodality extension does most of the stuff which is required for any image input:
+
+- adds the UI
+- saves the images as base64 JPEGs to history
+- provides the hooks to the UI
+- if there are images in the prompt, it:
+ - splits the prompt to text and image parts
+ - adds image start/end markers to text parts, then encodes and embeds the text parts
+ - calls the vision pipeline to embed the images
+ - stitches the embeddings together, and returns them to text generation
+- loads the appropriate vision pipeline, selected either from model name, or by specifying --multimodal-pipeline parameter
+
+Now, for the pipelines, they:
+
+- load the required vision models
+- return some consts, for example the number of tokens taken up by image
+- and most importantly: return the embeddings for LLM, given a list of images
+
+## Prompts/history
+
+To save images in prompt/history, this extension is using a base64 JPEG, wrapped in a HTML tag, like so:
+```
+