☰', elem_id='gr-hover')
+
+ with gr.Column(scale=10, elem_id='chat-input-container'):
+ shared.gradio['textbox'] = gr.Textbox(label='', placeholder='Send a message', elem_id='chat-input', elem_classes=['add_scrollbar'])
+ shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
+ shared.gradio['typing-dots'] = gr.HTML(value='
', label='typing', elem_id='typing-container')
+
+ with gr.Column(scale=1, elem_id='generate-stop-container'):
+ with gr.Row():
+ shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop', visible=False)
+ shared.gradio['Generate'] = gr.Button('Generate', elem_id='Generate', variant='primary')
+
+ # Hover menu buttons
+ with gr.Column(elem_id='chat-buttons'):
+ with gr.Row():
+ shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')
+ shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')
+ shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
+
+ with gr.Row():
+ shared.gradio['Replace last reply'] = gr.Button('Replace last reply (Ctrl + Shift + L)', elem_id='Replace-last')
+ shared.gradio['Copy last reply'] = gr.Button('Copy last reply (Ctrl + Shift + K)', elem_id='Copy-last')
+ shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
+
+ with gr.Row():
+ shared.gradio['Send dummy message'] = gr.Button('Send dummy message')
+ shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
+
+ with gr.Row():
+ shared.gradio['send-chat-to-default'] = gr.Button('Send to default')
+ shared.gradio['send-chat-to-notebook'] = gr.Button('Send to notebook')
+
+ with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
+ with gr.Column():
+ with gr.Row():
+ shared.gradio['unique_id'] = gr.Dropdown(label='Past chats', elem_classes=['slim-dropdown'], interactive=not mu)
+
+ with gr.Row():
+ shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', visible=False, elem_classes='refresh-button')
+ shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', visible=False, elem_classes='refresh-button')
+ shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes='refresh-button')
+
+ with gr.Row(elem_id='rename-row'):
+ shared.gradio['rename_to'] = gr.Textbox(label='Rename to:', placeholder='New name', visible=False, elem_classes=['no-background'])
+ shared.gradio['rename_to-confirm'] = gr.Button('Confirm', visible=False, elem_classes='refresh-button')
+ shared.gradio['rename_to-cancel'] = gr.Button('Cancel', visible=False, elem_classes='refresh-button')
+
+ with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']):
+ with gr.Column():
+ with gr.Row():
+ shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
+
+ with gr.Row():
+ shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value='chat', label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template selected under Parameters > Instruction template must match the current model.', elem_id='chat-mode')
+
+ with gr.Row():
+ shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
+
+
+def create_chat_settings_ui():
+ mu = shared.args.multi_user
+ with gr.Tab('Character'):
+ with gr.Row():
+ with gr.Column(scale=8):
+ with gr.Row():
+ shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
+ ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
+ shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
+
+ shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Your name')
+ shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name')
+ shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar'])
+ shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar'])
+
+ with gr.Column(scale=1):
+ shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil', interactive=not mu)
+ shared.gradio['your_picture'] = gr.Image(label='Your picture', type='pil', value=Image.open(Path('cache/pfp_me.png')) if Path('cache/pfp_me.png').exists() else None, interactive=not mu)
+
+ with gr.Tab('Instruction template'):
+ with gr.Row():
+ with gr.Column():
+ with gr.Row():
+ shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Saved instruction templates', info="After selecting the template, click on \"Load\" to load and apply it.", value='None', elem_classes='slim-dropdown')
+ ui.create_refresh_button(shared.gradio['instruction_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)
+ shared.gradio['load_template'] = gr.Button("Load", elem_classes='refresh-button')
+ shared.gradio['save_template'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['delete_template'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)
+
+ with gr.Column():
+ pass
+
+ with gr.Row():
+ with gr.Column():
+ shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label='Custom system message', info='If not empty, will be used instead of the default one.', elem_classes=['add_scrollbar'])
+ shared.gradio['instruction_template_str'] = gr.Textbox(value='', label='Instruction template', lines=24, info='Change this according to the model/LoRA that you are using. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
+ with gr.Row():
+ shared.gradio['send_instruction_to_default'] = gr.Button('Send to default', elem_classes=['small-button'])
+ shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button'])
+ shared.gradio['send_instruction_to_negative_prompt'] = gr.Button('Send to negative prompt', elem_classes=['small-button'])
+
+ with gr.Column():
+ shared.gradio['chat_template_str'] = gr.Textbox(value=shared.settings['chat_template_str'], label='Chat template', lines=22, elem_classes=['add_scrollbar', 'monospace'])
+ shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=4, label='Command for chat-instruct mode', info='<|character|> gets replaced by the bot name, and <|prompt|> gets replaced by the regular chat prompt.', elem_classes=['add_scrollbar'])
+
+ with gr.Tab('Chat history'):
+ with gr.Row():
+ with gr.Column():
+ shared.gradio['save_chat_history'] = gr.Button(value='Save history')
+
+ with gr.Column():
+ shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label='Upload History JSON')
+
+ with gr.Tab('Upload character'):
+ with gr.Tab('YAML or JSON'):
+ with gr.Row():
+ shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File', interactive=not mu)
+ shared.gradio['upload_img_bot'] = gr.Image(type='pil', label='Profile Picture (optional)', interactive=not mu)
+
+ shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False)
+
+ with gr.Tab('TavernAI PNG'):
+ with gr.Row():
+ with gr.Column():
+ shared.gradio['upload_img_tavern'] = gr.Image(type='pil', label='TavernAI PNG File', elem_id='upload_img_tavern', interactive=not mu)
+ shared.gradio['tavern_json'] = gr.State()
+ with gr.Column():
+ shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False)
+ shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False)
+
+ shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False)
+
+
+def create_event_handlers():
+
+ # Obsolete variables, kept for compatibility with old extensions
+ shared.input_params = gradio(inputs)
+ shared.reload_inputs = gradio(reload_arr)
+
+ shared.gradio['Generate'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
+ chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
+ lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+
+ shared.gradio['textbox'].submit(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
+ chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
+ lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+
+ shared.gradio['Regenerate'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
+ lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+
+ shared.gradio['Continue'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
+ lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+
+ shared.gradio['Impersonate'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then(
+ chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+
+ shared.gradio['Replace last reply'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.replace_last_reply, gradio('textbox', 'interface_state'), gradio('history')).then(
+ lambda: '', None, gradio('textbox'), show_progress=False).then(
+ chat.redraw_html, gradio(reload_arr), gradio('display')).then(
+ chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None)
+
+ shared.gradio['Send dummy message'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.send_dummy_message, gradio('textbox', 'interface_state'), gradio('history')).then(
+ lambda: '', None, gradio('textbox'), show_progress=False).then(
+ chat.redraw_html, gradio(reload_arr), gradio('display')).then(
+ chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None)
+
+ shared.gradio['Send dummy reply'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.send_dummy_reply, gradio('textbox', 'interface_state'), gradio('history')).then(
+ lambda: '', None, gradio('textbox'), show_progress=False).then(
+ chat.redraw_html, gradio(reload_arr), gradio('display')).then(
+ chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None)
+
+ shared.gradio['Remove last'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.remove_last_message, gradio('history'), gradio('textbox', 'history'), show_progress=False).then(
+ chat.redraw_html, gradio(reload_arr), gradio('display')).then(
+ chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None)
+
+ shared.gradio['Stop'].click(
+ stop_everything_event, None, None, queue=False).then(
+ chat.redraw_html, gradio(reload_arr), gradio('display'))
+
+ if not shared.args.multi_user:
+ shared.gradio['unique_id'].select(
+ chat.load_history, gradio('unique_id', 'character_menu', 'mode'), gradio('history')).then(
+ chat.redraw_html, gradio(reload_arr), gradio('display'))
+
+ shared.gradio['Start new chat'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.start_new_chat, gradio('interface_state'), gradio('history')).then(
+ chat.redraw_html, gradio(reload_arr), gradio('display')).then(
+ lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id'))
+
+ shared.gradio['delete_chat'].click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, gradio(clear_arr))
+ shared.gradio['delete_chat-cancel'].click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, gradio(clear_arr))
+ shared.gradio['delete_chat-confirm'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda x, y: str(chat.find_all_histories(x).index(y)), gradio('interface_state', 'unique_id'), gradio('temporary_text')).then(
+ chat.delete_history, gradio('unique_id', 'character_menu', 'mode'), None).then(
+ chat.load_history_after_deletion, gradio('interface_state', 'temporary_text'), gradio('history', 'unique_id')).then(
+ chat.redraw_html, gradio(reload_arr), gradio('display')).then(
+ lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, gradio(clear_arr))
+
+ shared.gradio['rename_chat'].click(
+ lambda x: x, gradio('unique_id'), gradio('rename_to')).then(
+ lambda: [gr.update(visible=True)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
+
+ shared.gradio['rename_to-cancel'].click(
+ lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
+
+ shared.gradio['rename_to-confirm'].click(
+ chat.rename_history, gradio('unique_id', 'rename_to', 'character_menu', 'mode'), None).then(
+ lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False).then(
+ lambda x, y: gr.update(choices=chat.find_all_histories(x), value=y), gradio('interface_state', 'rename_to'), gradio('unique_id'))
+
+ shared.gradio['rename_to'].submit(
+ chat.rename_history, gradio('unique_id', 'rename_to', 'character_menu', 'mode'), None).then(
+ lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False).then(
+ lambda x, y: gr.update(choices=chat.find_all_histories(x), value=y), gradio('interface_state', 'rename_to'), gradio('unique_id'))
+
+ shared.gradio['load_chat_history'].upload(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.start_new_chat, gradio('interface_state'), gradio('history')).then(
+ chat.load_history_json, gradio('load_chat_history', 'history'), gradio('history')).then(
+ chat.redraw_html, gradio(reload_arr), gradio('display')).then(
+ lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then(
+ chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
+ lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}')
+
+ shared.gradio['character_menu'].change(
+ chat.load_character, gradio('character_menu', 'name1', 'name2'), gradio('name1', 'name2', 'character_picture', 'greeting', 'context')).success(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.load_latest_history, gradio('interface_state'), gradio('history')).then(
+ chat.redraw_html, gradio(reload_arr), gradio('display')).then(
+ lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
+
+ shared.gradio['mode'].change(
+ lambda x: gr.update(visible=x != 'instruct'), gradio('mode'), gradio('chat_style'), show_progress=False).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.load_latest_history, gradio('interface_state'), gradio('history')).then(
+ chat.redraw_html, gradio(reload_arr), gradio('display')).then(
+ lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id'))
+
+ shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'))
+ shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False)
+
+ # Save/delete a character
+ shared.gradio['save_character'].click(
+ lambda x: x, gradio('name2'), gradio('save_character_filename')).then(
+ lambda: gr.update(visible=True), None, gradio('character_saver'))
+
+ shared.gradio['delete_character'].click(lambda: gr.update(visible=True), None, gradio('character_deleter'))
+
+ shared.gradio['load_template'].click(
+ chat.load_instruction_template, gradio('instruction_template'), gradio('instruction_template_str')).then(
+ lambda: "Select template to load...", None, gradio('instruction_template'))
+
+ shared.gradio['save_template'].click(
+ lambda: 'My Template.yaml', None, gradio('save_filename')).then(
+ lambda: 'instruction-templates/', None, gradio('save_root')).then(
+ chat.generate_instruction_template_yaml, gradio('instruction_template_str'), gradio('save_contents')).then(
+ lambda: gr.update(visible=True), None, gradio('file_saver'))
+
+ shared.gradio['delete_template'].click(
+ lambda x: f'{x}.yaml', gradio('instruction_template'), gradio('delete_filename')).then(
+ lambda: 'instruction-templates/', None, gradio('delete_root')).then(
+ lambda: gr.update(visible=True), None, gradio('file_deleter'))
+
+ shared.gradio['save_chat_history'].click(
+ lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then(
+ None, gradio('temporary_text', 'character_menu', 'mode'), None, _js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}')
+
+ shared.gradio['Submit character'].click(
+ chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
+
+ shared.gradio['Submit tavern character'].click(
+ chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
+
+ shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character'))
+ shared.gradio['upload_json'].clear(lambda: gr.update(interactive=False), None, gradio('Submit character'))
+ shared.gradio['upload_img_tavern'].upload(chat.check_tavern_character, gradio('upload_img_tavern'), gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False)
+ shared.gradio['upload_img_tavern'].clear(lambda: (None, None, None, gr.update(interactive=False)), None, gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False)
+ shared.gradio['your_picture'].change(
+ chat.upload_your_profile_picture, gradio('your_picture'), None).then(
+ partial(chat.redraw_html, reset_cache=True), gradio(reload_arr), gradio('display'))
+
+ shared.gradio['send_instruction_to_default'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then(
+ partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-default')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
+
+ shared.gradio['send_instruction_to_notebook'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then(
+ partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-notebook')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
+
+ shared.gradio['send_instruction_to_negative_prompt'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then(
+ partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('negative_prompt')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}')
+
+ shared.gradio['send-chat-to-default'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-default')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
+
+ shared.gradio['send-chat-to-notebook'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-notebook')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
+
+ shared.gradio['show_controls'].change(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
diff --git a/modules/ui_default.py b/modules/ui_default.py
new file mode 100644
index 0000000000000000000000000000000000000000..7db6f0d93abcc36354b0d687d83c865b8f5dd406
--- /dev/null
+++ b/modules/ui_default.py
@@ -0,0 +1,104 @@
+import gradio as gr
+
+from modules import logits, shared, ui, utils
+from modules.prompts import count_tokens, load_prompt
+from modules.text_generation import (
+ generate_reply_wrapper,
+ get_token_ids,
+ stop_everything_event
+)
+from modules.utils import gradio
+
+inputs = ('textbox-default', 'interface_state')
+outputs = ('output_textbox', 'html-default')
+
+
+def create_ui():
+ mu = shared.args.multi_user
+ with gr.Tab('Default', elem_id='default-tab'):
+ shared.gradio['last_input-default'] = gr.State('')
+ with gr.Row():
+ with gr.Column():
+ with gr.Row():
+ shared.gradio['textbox-default'] = gr.Textbox(value='', lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar'])
+ shared.gradio['token-counter-default'] = gr.HTML(value="
0", elem_classes=["token-counter", "default-token-counter"])
+
+ with gr.Row():
+ shared.gradio['Generate-default'] = gr.Button('Generate', variant='primary')
+ shared.gradio['Stop-default'] = gr.Button('Stop', elem_id='stop')
+ shared.gradio['Continue-default'] = gr.Button('Continue')
+
+ with gr.Row():
+ shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown')
+ ui.create_refresh_button(shared.gradio['prompt_menu-default'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, 'refresh-button', interactive=not mu)
+ shared.gradio['save_prompt-default'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['delete_prompt-default'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
+
+ with gr.Column():
+ with gr.Tab('Raw'):
+ shared.gradio['output_textbox'] = gr.Textbox(lines=27, label='Output', elem_id='textbox-default', elem_classes=['textbox_default_output', 'add_scrollbar'])
+
+ with gr.Tab('Markdown'):
+ shared.gradio['markdown_render-default'] = gr.Button('Render')
+ shared.gradio['markdown-default'] = gr.Markdown()
+
+ with gr.Tab('HTML'):
+ shared.gradio['html-default'] = gr.HTML()
+
+ with gr.Tab('Logits'):
+ with gr.Row():
+ with gr.Column(scale=10):
+ shared.gradio['get_logits-default'] = gr.Button('Get next token probabilities')
+ with gr.Column(scale=1):
+ shared.gradio['use_samplers-default'] = gr.Checkbox(label='Use samplers', value=True, elem_classes=['no-background'])
+
+ with gr.Row():
+ shared.gradio['logits-default'] = gr.Textbox(lines=23, label='Output', elem_classes=['textbox_logits', 'add_scrollbar'])
+ shared.gradio['logits-default-previous'] = gr.Textbox(lines=23, label='Previous output', elem_classes=['textbox_logits', 'add_scrollbar'])
+
+ with gr.Tab('Tokens'):
+ shared.gradio['get_tokens-default'] = gr.Button('Get token IDs for the input')
+ shared.gradio['tokens-default'] = gr.Textbox(lines=23, label='Tokens', elem_classes=['textbox_logits', 'add_scrollbar', 'monospace'])
+
+
+def create_event_handlers():
+ shared.gradio['Generate-default'].click(
+ lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+
+ shared.gradio['textbox-default'].submit(
+ lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+
+ shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False)
+ shared.gradio['Continue-default'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+
+ shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False)
+ shared.gradio['prompt_menu-default'].change(load_prompt, gradio('prompt_menu-default'), gradio('textbox-default'), show_progress=False)
+ shared.gradio['save_prompt-default'].click(
+ lambda x: x, gradio('textbox-default'), gradio('save_contents')).then(
+ lambda: 'prompts/', None, gradio('save_root')).then(
+ lambda: utils.current_time() + '.txt', None, gradio('save_filename')).then(
+ lambda: gr.update(visible=True), None, gradio('file_saver'))
+
+ shared.gradio['delete_prompt-default'].click(
+ lambda: 'prompts/', None, gradio('delete_root')).then(
+ lambda x: x + '.txt', gradio('prompt_menu-default'), gradio('delete_filename')).then(
+ lambda: gr.update(visible=True), None, gradio('file_deleter'))
+
+ shared.gradio['textbox-default'].change(lambda x: f"
{count_tokens(x)}", gradio('textbox-default'), gradio('token-counter-default'), show_progress=False)
+ shared.gradio['get_logits-default'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ logits.get_next_logits, gradio('textbox-default', 'interface_state', 'use_samplers-default', 'logits-default'), gradio('logits-default', 'logits-default-previous'), show_progress=False)
+
+ shared.gradio['get_tokens-default'].click(get_token_ids, gradio('textbox-default'), gradio('tokens-default'), show_progress=False)
diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py
new file mode 100644
index 0000000000000000000000000000000000000000..7147121773eb091a3c0bf6ebe14905783cf67405
--- /dev/null
+++ b/modules/ui_file_saving.py
@@ -0,0 +1,103 @@
+import gradio as gr
+
+from modules import chat, presets, shared, ui, utils
+from modules.utils import gradio
+
+
+def create_ui():
+ mu = shared.args.multi_user
+
+ # Text file saver
+ with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['file_saver']:
+ shared.gradio['save_filename'] = gr.Textbox(lines=1, label='File name')
+ shared.gradio['save_root'] = gr.Textbox(lines=1, label='File folder', info='For reference. Unchangeable.', interactive=False)
+ shared.gradio['save_contents'] = gr.Textbox(lines=10, label='File contents')
+ with gr.Row():
+ shared.gradio['save_cancel'] = gr.Button('Cancel', elem_classes="small-button")
+ shared.gradio['save_confirm'] = gr.Button('Save', elem_classes="small-button", variant='primary', interactive=not mu)
+
+ # Text file deleter
+ with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['file_deleter']:
+ shared.gradio['delete_filename'] = gr.Textbox(lines=1, label='File name')
+ shared.gradio['delete_root'] = gr.Textbox(lines=1, label='File folder', info='For reference. Unchangeable.', interactive=False)
+ with gr.Row():
+ shared.gradio['delete_cancel'] = gr.Button('Cancel', elem_classes="small-button")
+ shared.gradio['delete_confirm'] = gr.Button('Delete', elem_classes="small-button", variant='stop', interactive=not mu)
+
+ # Character saver/deleter
+ with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['character_saver']:
+ shared.gradio['save_character_filename'] = gr.Textbox(lines=1, label='File name', info='The character will be saved to your characters/ folder with this base filename.')
+ with gr.Row():
+ shared.gradio['save_character_cancel'] = gr.Button('Cancel', elem_classes="small-button")
+ shared.gradio['save_character_confirm'] = gr.Button('Save', elem_classes="small-button", variant='primary', interactive=not mu)
+
+ with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['character_deleter']:
+ gr.Markdown('Confirm the character deletion?')
+ with gr.Row():
+ shared.gradio['delete_character_cancel'] = gr.Button('Cancel', elem_classes="small-button")
+ shared.gradio['delete_character_confirm'] = gr.Button('Delete', elem_classes="small-button", variant='stop', interactive=not mu)
+
+ # Preset saver
+ with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['preset_saver']:
+ shared.gradio['save_preset_filename'] = gr.Textbox(lines=1, label='File name', info='The preset will be saved to your presets/ folder with this base filename.')
+ shared.gradio['save_preset_contents'] = gr.Textbox(lines=10, label='File contents')
+ with gr.Row():
+ shared.gradio['save_preset_cancel'] = gr.Button('Cancel', elem_classes="small-button")
+ shared.gradio['save_preset_confirm'] = gr.Button('Save', elem_classes="small-button", variant='primary', interactive=not mu)
+
+
+def create_event_handlers():
+ shared.gradio['save_confirm'].click(
+ lambda x, y, z: utils.save_file(x + y, z), gradio('save_root', 'save_filename', 'save_contents'), None).then(
+ lambda: gr.update(visible=False), None, gradio('file_saver'))
+
+ shared.gradio['delete_confirm'].click(
+ lambda x, y: utils.delete_file(x + y), gradio('delete_root', 'delete_filename'), None).then(
+ lambda: gr.update(visible=False), None, gradio('file_deleter'))
+
+ shared.gradio['delete_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_deleter'))
+ shared.gradio['save_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_saver'))
+
+ shared.gradio['save_character_confirm'].click(
+ chat.save_character, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), None).then(
+ lambda: gr.update(visible=False), None, gradio('character_saver')).then(
+ lambda x: gr.update(choices=utils.get_available_characters(), value=x), gradio('save_character_filename'), gradio('character_menu'))
+
+ shared.gradio['delete_character_confirm'].click(
+ lambda x: str(utils.get_available_characters().index(x)), gradio('character_menu'), gradio('temporary_text')).then(
+ chat.delete_character, gradio('character_menu'), None).then(
+ chat.update_character_menu_after_deletion, gradio('temporary_text'), gradio('character_menu')).then(
+ lambda: gr.update(visible=False), None, gradio('character_deleter'))
+
+ shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver'))
+ shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter'))
+
+ shared.gradio['save_preset'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ presets.generate_preset_yaml, gradio('interface_state'), gradio('save_preset_contents')).then(
+ lambda: 'My Preset', None, gradio('save_preset_filename')).then(
+ lambda: gr.update(visible=True), None, gradio('preset_saver'))
+
+ shared.gradio['save_preset_confirm'].click(
+ lambda x, y: utils.save_file(f'presets/{x}.yaml', y), gradio('save_preset_filename', 'save_preset_contents'), None).then(
+ lambda: gr.update(visible=False), None, gradio('preset_saver')).then(
+ lambda x: gr.update(choices=utils.get_available_presets(), value=x), gradio('save_preset_filename'), gradio('preset_menu'))
+
+ shared.gradio['save_preset_cancel'].click(lambda: gr.update(visible=False), None, gradio('preset_saver'))
+
+ shared.gradio['delete_preset'].click(
+ lambda x: f'{x}.yaml', gradio('preset_menu'), gradio('delete_filename')).then(
+ lambda: 'presets/', None, gradio('delete_root')).then(
+ lambda: gr.update(visible=True), None, gradio('file_deleter'))
+
+ shared.gradio['save_grammar'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda x: x, gradio('grammar_string'), gradio('save_contents')).then(
+ lambda: 'grammars/', None, gradio('save_root')).then(
+ lambda: 'My Fancy Grammar.gbnf', None, gradio('save_filename')).then(
+ lambda: gr.update(visible=True), None, gradio('file_saver'))
+
+ shared.gradio['delete_grammar'].click(
+ lambda x: x, gradio('grammar_file'), gradio('delete_filename')).then(
+ lambda: 'grammars/', None, gradio('delete_root')).then(
+ lambda: gr.update(visible=True), None, gradio('file_deleter'))
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac6a8a8f5bf144d73b1ffa515443889ced8ee22c
--- /dev/null
+++ b/modules/ui_model_menu.py
@@ -0,0 +1,333 @@
+import importlib
+import math
+import re
+import traceback
+from functools import partial
+from pathlib import Path
+
+import gradio as gr
+import psutil
+import torch
+from transformers import is_torch_xpu_available
+
+from modules import loaders, shared, ui, utils
+from modules.logging_colors import logger
+from modules.LoRA import add_lora_to_model
+from modules.models import load_model, unload_model
+from modules.models_settings import (
+ apply_model_settings_to_state,
+ get_model_metadata,
+ save_instruction_template,
+ save_model_settings,
+ update_model_parameters
+)
+from modules.utils import gradio
+
+
+def create_ui():
+ mu = shared.args.multi_user
+
+ # Finding the default values for the GPU and CPU memories
+ total_mem = []
+ if is_torch_xpu_available():
+ for i in range(torch.xpu.device_count()):
+ total_mem.append(math.floor(torch.xpu.get_device_properties(i).total_memory / (1024 * 1024)))
+ else:
+ for i in range(torch.cuda.device_count()):
+ total_mem.append(math.floor(torch.cuda.get_device_properties(i).total_memory / (1024 * 1024)))
+
+ default_gpu_mem = []
+ if shared.args.gpu_memory is not None and len(shared.args.gpu_memory) > 0:
+ for i in shared.args.gpu_memory:
+ if 'mib' in i.lower():
+ default_gpu_mem.append(int(re.sub('[a-zA-Z ]', '', i)))
+ else:
+ default_gpu_mem.append(int(re.sub('[a-zA-Z ]', '', i)) * 1000)
+
+ while len(default_gpu_mem) < len(total_mem):
+ default_gpu_mem.append(0)
+
+ total_cpu_mem = math.floor(psutil.virtual_memory().total / (1024 * 1024))
+ if shared.args.cpu_memory is not None:
+ default_cpu_mem = re.sub('[a-zA-Z ]', '', shared.args.cpu_memory)
+ else:
+ default_cpu_mem = 0
+
+ with gr.Tab("Model", elem_id="model-tab"):
+ with gr.Row():
+ with gr.Column():
+ with gr.Row():
+ with gr.Column():
+ with gr.Row():
+ shared.gradio['model_menu'] = gr.Dropdown(choices=utils.get_available_models(), value=lambda: shared.model_name, label='Model', elem_classes='slim-dropdown', interactive=not mu)
+ ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu)
+ shared.gradio['load_model'] = gr.Button("Load", visible=not shared.settings['autoload_model'], elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['unload_model'] = gr.Button("Unload", elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['reload_model'] = gr.Button("Reload", elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button', interactive=not mu)
+
+ with gr.Column():
+ with gr.Row():
+ shared.gradio['lora_menu'] = gr.Dropdown(multiselect=True, choices=utils.get_available_loras(), value=shared.lora_names, label='LoRA(s)', elem_classes='slim-dropdown', interactive=not mu)
+ ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': utils.get_available_loras(), 'value': shared.lora_names}, 'refresh-button', interactive=not mu)
+ shared.gradio['lora_menu_apply'] = gr.Button(value='Apply LoRAs', elem_classes='refresh-button', interactive=not mu)
+
+ with gr.Row():
+ with gr.Column():
+ shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys(), value=None)
+ with gr.Box():
+ with gr.Row():
+ with gr.Column():
+ with gr.Blocks():
+ for i in range(len(total_mem)):
+ shared.gradio[f'gpu_memory_{i}'] = gr.Slider(label=f"gpu-memory in MiB for device :{i}", maximum=total_mem[i], value=default_gpu_mem[i])
+
+ shared.gradio['cpu_memory'] = gr.Slider(label="cpu-memory in MiB", maximum=total_cpu_mem, value=default_cpu_mem)
+
+ with gr.Blocks():
+ shared.gradio['transformers_info'] = gr.Markdown('load-in-4bit params:')
+ shared.gradio['compute_dtype'] = gr.Dropdown(label="compute_dtype", choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype)
+ shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type)
+
+ shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
+ shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers)
+ shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=shared.settings['truncation_length_max'], step=256, label="n_ctx", value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.')
+ shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 18,17')
+ shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch)
+ shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=32, value=shared.args.threads)
+ shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=32, value=shared.args.threads_batch)
+ shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=shared.args.wbits if shared.args.wbits > 0 else "None")
+ shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=shared.args.groupsize if shared.args.groupsize > 0 else "None")
+ shared.gradio['model_type'] = gr.Dropdown(label="model_type", choices=["None"], value=shared.args.model_type or "None")
+ shared.gradio['pre_layer'] = gr.Slider(label="pre_layer", minimum=0, maximum=100, value=shared.args.pre_layer[0] if shared.args.pre_layer is not None else 0)
+ shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
+ shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=shared.settings['truncation_length_max'], step=256, info='Context length. Try lowering this if you run out of memory while loading the model.', value=shared.args.max_seq_len)
+ with gr.Blocks():
+ shared.gradio['alpha_value'] = gr.Slider(label='alpha_value', minimum=1, maximum=8, step=0.05, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.', value=shared.args.alpha_value)
+ shared.gradio['rope_freq_base'] = gr.Slider(label='rope_freq_base', minimum=0, maximum=1000000, step=1000, info='If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63)', value=shared.args.rope_freq_base)
+ shared.gradio['compress_pos_emb'] = gr.Slider(label='compress_pos_emb', minimum=1, maximum=8, step=1, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.', value=shared.args.compress_pos_emb)
+
+ shared.gradio['autogptq_info'] = gr.Markdown('ExLlamav2_HF is recommended over AutoGPTQ for models derived from Llama.')
+ shared.gradio['quipsharp_info'] = gr.Markdown('QuIP# has to be installed manually at the moment.')
+
+ with gr.Column():
+ shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
+ shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
+ shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
+ shared.gradio['use_flash_attention_2'] = gr.Checkbox(label="use_flash_attention_2", value=shared.args.use_flash_attention_2, info='Set use_flash_attention_2=True while loading the model.')
+ shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices)
+ shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.')
+ shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.')
+ shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
+ shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
+ shared.gradio['no_mul_mat_q'] = gr.Checkbox(label="no_mul_mat_q", value=shared.args.no_mul_mat_q, info='Disable the mulmat kernels.')
+ shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton)
+ shared.gradio['no_inject_fused_attention'] = gr.Checkbox(label="no_inject_fused_attention", value=shared.args.no_inject_fused_attention, info='Disable fused attention. Fused attention improves inference performance but uses more VRAM. Fuses layers for AutoAWQ. Disable if running low on VRAM.')
+ shared.gradio['no_inject_fused_mlp'] = gr.Checkbox(label="no_inject_fused_mlp", value=shared.args.no_inject_fused_mlp, info='Affects Triton only. Disable fused MLP. Fused MLP improves performance but uses more VRAM. Disable if running low on VRAM.')
+ shared.gradio['no_use_cuda_fp16'] = gr.Checkbox(label="no_use_cuda_fp16", value=shared.args.no_use_cuda_fp16, info='This can make models faster on some systems.')
+ shared.gradio['desc_act'] = gr.Checkbox(label="desc_act", value=shared.args.desc_act, info='\'desc_act\', \'wbits\', and \'groupsize\' are used for old models without a quantize_config.json.')
+ shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
+ shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
+ shared.gradio['numa'] = gr.Checkbox(label="numa", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')
+ shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
+ shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
+ shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.')
+ shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info='Automatically split the model tensors across the available GPUs.')
+ shared.gradio['no_flash_attn'] = gr.Checkbox(label="no_flash_attn", value=shared.args.no_flash_attn, info='Force flash-attention to not be used.')
+ shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Necessary to use CFG with this loader.')
+ shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')
+ with gr.Blocks():
+ shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='Set trust_remote_code=True while loading the tokenizer/model. To enable this option, start the web UI with the --trust-remote-code flag.', interactive=shared.args.trust_remote_code)
+ shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
+ shared.gradio['logits_all'] = gr.Checkbox(label="logits_all", value=shared.args.logits_all, info='Needs to be set for perplexity evaluation to work with this loader. Otherwise, ignore it, as it makes prompt processing slower.')
+
+ shared.gradio['disable_exllama'] = gr.Checkbox(label="disable_exllama", value=shared.args.disable_exllama, info='Disable ExLlama kernel for GPTQ models.')
+ shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.')
+ shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.')
+ shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
+ shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to place your GGUF in a subfolder of models/ with the necessary tokenizer files.\n\nYou can use the \"llamacpp_HF creator\" menu to do that automatically.")
+
+ with gr.Column():
+ with gr.Row():
+ shared.gradio['autoload_model'] = gr.Checkbox(value=shared.settings['autoload_model'], label='Autoload the model', info='Whether to load the model as soon as it is selected in the Model dropdown.', interactive=not mu)
+
+ with gr.Tab("Download"):
+ shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
+ shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
+ with gr.Row():
+ shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
+ shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
+
+ with gr.Tab("llamacpp_HF creator"):
+ with gr.Row():
+ shared.gradio['gguf_menu'] = gr.Dropdown(choices=utils.get_available_ggufs(), value=lambda: shared.model_name, label='Choose your GGUF', elem_classes='slim-dropdown', interactive=not mu)
+ ui.create_refresh_button(shared.gradio['gguf_menu'], lambda: None, lambda: {'choices': utils.get_available_ggufs()}, 'refresh-button', interactive=not mu)
+
+ shared.gradio['unquantized_url'] = gr.Textbox(label="Enter the URL for the original (unquantized) model", info="Example: https://huggingface.co/lmsys/vicuna-13b-v1.5", max_lines=1)
+ shared.gradio['create_llamacpp_hf_button'] = gr.Button("Submit", variant="primary", interactive=not mu)
+ gr.Markdown("This will move your gguf file into a subfolder of `models` along with the necessary tokenizer files.")
+
+ with gr.Tab("Customize instruction template"):
+ with gr.Row():
+ shared.gradio['customized_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), value='None', label='Select the desired instruction template', elem_classes='slim-dropdown')
+ ui.create_refresh_button(shared.gradio['customized_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)
+
+ shared.gradio['customized_template_submit'] = gr.Button("Submit", variant="primary", interactive=not mu)
+ gr.Markdown("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong.")
+
+ with gr.Row():
+ shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
+
+
+def create_event_handlers():
+ shared.gradio['loader'].change(
+ loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params())).then(
+ lambda value: gr.update(choices=loaders.get_model_types(value)), gradio('loader'), gradio('model_type'))
+
+ # In this event handler, the interface state is read and updated
+ # with the model defaults (if any), and then the model is loaded
+ # unless "autoload_model" is unchecked
+ shared.gradio['model_menu'].change(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ apply_model_settings_to_state, gradio('model_menu', 'interface_state'), gradio('interface_state')).then(
+ ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then(
+ update_model_parameters, gradio('interface_state'), None).then(
+ load_model_wrapper, gradio('model_menu', 'loader', 'autoload_model'), gradio('model_status'), show_progress=False).success(
+ update_truncation_length, gradio('truncation_length', 'interface_state'), gradio('truncation_length')).then(
+ lambda x: x, gradio('loader'), gradio('filter_by_loader'))
+
+ shared.gradio['load_model'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ update_model_parameters, gradio('interface_state'), None).then(
+ partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False).success(
+ update_truncation_length, gradio('truncation_length', 'interface_state'), gradio('truncation_length')).then(
+ lambda x: x, gradio('loader'), gradio('filter_by_loader'))
+
+ shared.gradio['reload_model'].click(
+ unload_model, None, None).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ update_model_parameters, gradio('interface_state'), None).then(
+ partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False).success(
+ update_truncation_length, gradio('truncation_length', 'interface_state'), gradio('truncation_length')).then(
+ lambda x: x, gradio('loader'), gradio('filter_by_loader'))
+
+ shared.gradio['unload_model'].click(
+ unload_model, None, None).then(
+ lambda: "Model unloaded", None, gradio('model_status'))
+
+ shared.gradio['save_model_settings'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ save_model_settings, gradio('model_menu', 'interface_state'), gradio('model_status'), show_progress=False)
+
+ shared.gradio['lora_menu_apply'].click(load_lora_wrapper, gradio('lora_menu'), gradio('model_status'), show_progress=False)
+ shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
+ shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
+ shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), gradio('autoload_model'), gradio('load_model'))
+ shared.gradio['create_llamacpp_hf_button'].click(create_llamacpp_hf, gradio('gguf_menu', 'unquantized_url'), gradio('model_status'), show_progress=True)
+ shared.gradio['customized_template_submit'].click(save_instruction_template, gradio('model_menu', 'customized_template'), gradio('model_status'), show_progress=True)
+
+
+def load_model_wrapper(selected_model, loader, autoload=False):
+ if not autoload:
+ yield f"The settings for `{selected_model}` have been updated.\n\nClick on \"Load\" to load it."
+ return
+
+ if selected_model == 'None':
+ yield "No model selected"
+ else:
+ try:
+ yield f"Loading `{selected_model}`..."
+ unload_model()
+ if selected_model != '':
+ shared.model, shared.tokenizer = load_model(selected_model, loader)
+
+ if shared.model is not None:
+ output = f"Successfully loaded `{selected_model}`."
+
+ settings = get_model_metadata(selected_model)
+ if 'instruction_template' in settings:
+ output += '\n\nIt seems to be an instruction-following model with template "{}". In the chat tab, instruct or chat-instruct modes should be used.'.format(settings['instruction_template'])
+
+ yield output
+ else:
+ yield f"Failed to load `{selected_model}`."
+ except:
+ exc = traceback.format_exc()
+ logger.error('Failed to load the model.')
+ print(exc)
+ yield exc.replace('\n', '\n\n')
+
+
+def load_lora_wrapper(selected_loras):
+ yield ("Applying the following LoRAs to {}:\n\n{}".format(shared.model_name, '\n'.join(selected_loras)))
+ add_lora_to_model(selected_loras)
+ yield ("Successfuly applied the LoRAs")
+
+
+def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):
+ try:
+ downloader = importlib.import_module("download-model").ModelDownloader()
+
+ progress(0.0)
+ model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)
+
+ yield ("Getting the download links from Hugging Face")
+ links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)
+ if return_links:
+ output = "```\n"
+ for link in links:
+ output += f"{Path(link).name}" + "\n"
+
+ output += "```"
+ yield output
+ return
+
+ yield ("Getting the output folder")
+ output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp)
+ if check:
+ progress(0.5)
+
+ yield ("Checking previously downloaded files")
+ downloader.check_model_files(model, branch, links, sha256, output_folder)
+ progress(1.0)
+ else:
+ yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
+ downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
+
+ yield (f"Model successfully saved to `{output_folder}/`.")
+ except:
+ progress(1.0)
+ yield traceback.format_exc().replace('\n', '\n\n')
+
+
+def create_llamacpp_hf(gguf_name, unquantized_url, progress=gr.Progress()):
+ try:
+ downloader = importlib.import_module("download-model").ModelDownloader()
+
+ progress(0.0)
+ model, branch = downloader.sanitize_model_and_branch_names(unquantized_url, None)
+
+ yield ("Getting the tokenizer files links from Hugging Face")
+ links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=True)
+ output_folder = Path(shared.args.model_dir) / (re.sub(r'(?i)\.gguf$', '', gguf_name) + "-HF")
+
+ yield (f"Downloading tokenizer to `{output_folder}`")
+ downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=False)
+
+ # Move the GGUF
+ (Path(shared.args.model_dir) / gguf_name).rename(output_folder / gguf_name)
+
+ yield (f"Model saved to `{output_folder}/`.\n\nYou can now load it using llamacpp_HF.")
+ except:
+ progress(1.0)
+ yield traceback.format_exc().replace('\n', '\n\n')
+
+
+def update_truncation_length(current_length, state):
+ if 'loader' in state:
+ if state['loader'].lower().startswith('exllama'):
+ return state['max_seq_len']
+ elif state['loader'] in ['llama.cpp', 'llamacpp_HF', 'ctransformers']:
+ return state['n_ctx']
+
+ return current_length
diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bd5c919f797a30003f291ed40ca82a924f760e7
--- /dev/null
+++ b/modules/ui_notebook.py
@@ -0,0 +1,106 @@
+import gradio as gr
+
+from modules import logits, shared, ui, utils
+from modules.prompts import count_tokens, load_prompt
+from modules.text_generation import (
+ generate_reply_wrapper,
+ get_token_ids,
+ stop_everything_event
+)
+from modules.utils import gradio
+
+inputs = ('textbox-notebook', 'interface_state')
+outputs = ('textbox-notebook', 'html-notebook')
+
+
+def create_ui():
+ mu = shared.args.multi_user
+ with gr.Tab('Notebook', elem_id='notebook-tab'):
+ shared.gradio['last_input-notebook'] = gr.State('')
+ with gr.Row():
+ with gr.Column(scale=4):
+ with gr.Tab('Raw'):
+ with gr.Row():
+ shared.gradio['textbox-notebook'] = gr.Textbox(value='', lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar'])
+ shared.gradio['token-counter-notebook'] = gr.HTML(value="
0", elem_classes=["token-counter"])
+
+ with gr.Tab('Markdown'):
+ shared.gradio['markdown_render-notebook'] = gr.Button('Render')
+ shared.gradio['markdown-notebook'] = gr.Markdown()
+
+ with gr.Tab('HTML'):
+ shared.gradio['html-notebook'] = gr.HTML()
+
+ with gr.Tab('Logits'):
+ with gr.Row():
+ with gr.Column(scale=10):
+ shared.gradio['get_logits-notebook'] = gr.Button('Get next token probabilities')
+ with gr.Column(scale=1):
+ shared.gradio['use_samplers-notebook'] = gr.Checkbox(label='Use samplers', value=True, elem_classes=['no-background'])
+
+ with gr.Row():
+ shared.gradio['logits-notebook'] = gr.Textbox(lines=23, label='Output', elem_classes=['textbox_logits_notebook', 'add_scrollbar'])
+ shared.gradio['logits-notebook-previous'] = gr.Textbox(lines=23, label='Previous output', elem_classes=['textbox_logits_notebook', 'add_scrollbar'])
+
+ with gr.Tab('Tokens'):
+ shared.gradio['get_tokens-notebook'] = gr.Button('Get token IDs for the input')
+ shared.gradio['tokens-notebook'] = gr.Textbox(lines=23, label='Tokens', elem_classes=['textbox_logits_notebook', 'add_scrollbar', 'monospace'])
+
+ with gr.Row():
+ shared.gradio['Generate-notebook'] = gr.Button('Generate', variant='primary', elem_classes='small-button')
+ shared.gradio['Stop-notebook'] = gr.Button('Stop', elem_classes='small-button', elem_id='stop')
+ shared.gradio['Undo'] = gr.Button('Undo', elem_classes='small-button')
+ shared.gradio['Regenerate-notebook'] = gr.Button('Regenerate', elem_classes='small-button')
+
+ with gr.Column(scale=1):
+ gr.HTML('
')
+ with gr.Row():
+ shared.gradio['prompt_menu-notebook'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown')
+ ui.create_refresh_button(shared.gradio['prompt_menu-notebook'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, ['refresh-button', 'refresh-button-small'], interactive=not mu)
+ shared.gradio['save_prompt-notebook'] = gr.Button('💾', elem_classes=['refresh-button', 'refresh-button-small'], interactive=not mu)
+ shared.gradio['delete_prompt-notebook'] = gr.Button('🗑️', elem_classes=['refresh-button', 'refresh-button-small'], interactive=not mu)
+
+
+def create_event_handlers():
+ shared.gradio['Generate-notebook'].click(
+ lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+
+ shared.gradio['textbox-notebook'].submit(
+ lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+
+ shared.gradio['Undo'].click(lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False)
+ shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False)
+ shared.gradio['Regenerate-notebook'].click(
+ lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}')
+
+ shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False)
+ shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False)
+ shared.gradio['save_prompt-notebook'].click(
+ lambda x: x, gradio('textbox-notebook'), gradio('save_contents')).then(
+ lambda: 'prompts/', None, gradio('save_root')).then(
+ lambda: utils.current_time() + '.txt', None, gradio('save_filename')).then(
+ lambda: gr.update(visible=True), None, gradio('file_saver'))
+
+ shared.gradio['delete_prompt-notebook'].click(
+ lambda: 'prompts/', None, gradio('delete_root')).then(
+ lambda x: x + '.txt', gradio('prompt_menu-notebook'), gradio('delete_filename')).then(
+ lambda: gr.update(visible=True), None, gradio('file_deleter'))
+
+ shared.gradio['textbox-notebook'].input(lambda x: f"
{count_tokens(x)}", gradio('textbox-notebook'), gradio('token-counter-notebook'), show_progress=False)
+ shared.gradio['get_logits-notebook'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ logits.get_next_logits, gradio('textbox-notebook', 'interface_state', 'use_samplers-notebook', 'logits-notebook'), gradio('logits-notebook', 'logits-notebook-previous'), show_progress=False)
+
+ shared.gradio['get_tokens-notebook'].click(get_token_ids, gradio('textbox-notebook'), gradio('tokens-notebook'), show_progress=False)
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
new file mode 100644
index 0000000000000000000000000000000000000000..078590dc119340064b081108da58d031d405a88e
--- /dev/null
+++ b/modules/ui_parameters.py
@@ -0,0 +1,124 @@
+from pathlib import Path
+
+import gradio as gr
+
+from modules import loaders, presets, shared, ui, ui_chat, utils
+from modules.utils import gradio
+
+
+def create_ui(default_preset):
+ mu = shared.args.multi_user
+ generate_params = presets.load_preset(default_preset)
+ with gr.Tab("Parameters", elem_id="parameters"):
+ with gr.Tab("Generation"):
+ with gr.Row():
+ with gr.Column():
+ with gr.Row():
+ shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=default_preset, label='Preset', elem_classes='slim-dropdown')
+ ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button', interactive=not mu)
+ shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['random_preset'] = gr.Button('🎲', elem_classes='refresh-button')
+
+ with gr.Column():
+ shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()), value="All", elem_classes='slim-dropdown')
+
+ with gr.Row():
+ with gr.Column():
+ with gr.Row():
+ with gr.Column():
+ shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
+ shared.gradio['temperature'] = gr.Slider(0.01, 5, value=generate_params['temperature'], step=0.01, label='temperature')
+ shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p')
+ shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=generate_params['min_p'], step=0.01, label='min_p')
+ shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k')
+ shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty')
+ shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=generate_params['presence_penalty'], step=0.05, label='presence_penalty')
+ shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=generate_params['frequency_penalty'], step=0.05, label='frequency_penalty')
+ shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range')
+ shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p')
+ shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs')
+ shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a')
+ shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
+ shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff')
+
+ with gr.Column():
+ shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
+ shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', lines=3, elem_classes=['add_scrollbar'])
+ shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
+ shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
+ shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau')
+ shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta')
+ shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=generate_params['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
+ shared.gradio['dynamic_temperature'] = gr.Checkbox(value=generate_params['dynamic_temperature'], label='dynamic_temperature')
+ shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_low'], step=0.01, label='dynatemp_low', visible=generate_params['dynamic_temperature'])
+ shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature'])
+ shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=generate_params['dynamic_temperature'])
+ shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
+ shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
+ shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')
+ with gr.Accordion('Other parameters', open=False):
+ shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
+ shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size')
+ shared.gradio['min_length'] = gr.Slider(0, 2000, step=1, value=generate_params['min_length'], label='min_length')
+ shared.gradio['num_beams'] = gr.Slider(1, 20, step=1, value=generate_params['num_beams'], label='num_beams', info='For Beam Search, along with length_penalty and early_stopping.')
+ shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty')
+ shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping')
+
+ gr.Markdown("[Learn more](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab)")
+
+ with gr.Column():
+ with gr.Row():
+ with gr.Column():
+ shared.gradio['truncation_length'] = gr.Slider(value=get_truncation_length(), minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.')
+ shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
+ shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
+ shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
+
+ shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
+ shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Custom token bans', info='Specific token IDs to ban from generating, comma-separated. The IDs can be found in the Default or Notebook tab.')
+
+ with gr.Column():
+ shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
+ shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
+ shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.')
+ shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.')
+ shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label='Activate text streaming')
+
+ with gr.Blocks():
+ shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.')
+
+ with gr.Row() as shared.gradio['grammar_file_row']:
+ shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown')
+ ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu)
+ shared.gradio['save_grammar'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['delete_grammar'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)
+
+ shared.gradio['grammar_string'] = gr.Textbox(value='', label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
+
+ ui_chat.create_chat_settings_ui()
+
+
+def create_event_handlers():
+ shared.gradio['filter_by_loader'].change(loaders.blacklist_samplers, gradio('filter_by_loader', 'dynamic_temperature'), gradio(loaders.list_all_samplers()), show_progress=False)
+ shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()))
+ shared.gradio['random_preset'].click(presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()))
+ shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'))
+ shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent'))
+
+
+def get_truncation_length():
+ if 'max_seq_len' in shared.provided_arguments or shared.args.max_seq_len != shared.args_defaults.max_seq_len:
+ return shared.args.max_seq_len
+ elif 'n_ctx' in shared.provided_arguments or shared.args.n_ctx != shared.args_defaults.n_ctx:
+ return shared.args.n_ctx
+ else:
+ return shared.settings['truncation_length']
+
+
+def load_grammar(name):
+ p = Path(f'grammars/{name}')
+ if p.exists():
+ return open(p, 'r', encoding='utf-8').read()
+ else:
+ return ''
diff --git a/modules/ui_session.py b/modules/ui_session.py
new file mode 100644
index 0000000000000000000000000000000000000000..989046eae8d8192610508ac731f9320996d03eda
--- /dev/null
+++ b/modules/ui_session.py
@@ -0,0 +1,74 @@
+import gradio as gr
+
+from modules import shared, ui, utils
+from modules.github import clone_or_pull_repository
+from modules.utils import gradio
+
+
+def create_ui():
+ mu = shared.args.multi_user
+ with gr.Tab("Session", elem_id="session-tab"):
+ with gr.Row():
+ with gr.Column():
+ shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
+ with gr.Row():
+ shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡')
+ shared.gradio['save_settings'] = gr.Button('Save UI defaults to settings.yaml', interactive=not mu)
+
+ with gr.Row():
+ with gr.Column():
+ shared.gradio['extensions_menu'] = gr.CheckboxGroup(choices=utils.get_available_extensions(), value=shared.args.extensions, label="Available extensions", info='Note that some of these extensions may require manually installing Python requirements through the command: pip install -r extensions/extension_name/requirements.txt', elem_classes='checkboxgroup-table')
+
+ with gr.Column():
+ shared.gradio['bool_menu'] = gr.CheckboxGroup(choices=get_boolean_arguments(), value=get_boolean_arguments(active=True), label="Boolean command-line flags", elem_classes='checkboxgroup-table')
+
+ with gr.Column():
+ extension_name = gr.Textbox(lines=1, label='Install or update an extension', info='Enter the GitHub URL below and press Enter. For a list of extensions, see: https://github.com/oobabooga/text-generation-webui-extensions ⚠️ WARNING ⚠️ : extensions can execute arbitrary code. Make sure to inspect their source code before activating them.', interactive=not mu)
+ extension_status = gr.Markdown()
+
+ shared.gradio['theme_state'] = gr.Textbox(visible=False, value='dark' if shared.settings['dark_theme'] else 'light')
+ extension_name.submit(clone_or_pull_repository, extension_name, extension_status, show_progress=False)
+
+ # Reset interface event
+ shared.gradio['reset_interface'].click(
+ set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then(
+ lambda: None, None, None, _js='() => {document.body.innerHTML=\'
Reloading...
\'; setTimeout(function(){location.reload()},2500); return []}')
+
+ shared.gradio['toggle_dark_mode'].click(
+ lambda: None, None, None, _js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then(
+ lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state'))
+
+ shared.gradio['save_settings'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ ui.save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents')).then(
+ lambda: './', None, gradio('save_root')).then(
+ lambda: 'settings.yaml', None, gradio('save_filename')).then(
+ lambda: gr.update(visible=True), None, gradio('file_saver'))
+
+
+def set_interface_arguments(extensions, bool_active):
+ shared.args.extensions = extensions
+
+ bool_list = get_boolean_arguments()
+
+ for k in bool_list:
+ setattr(shared.args, k, False)
+ for k in bool_active:
+ setattr(shared.args, k, True)
+ if k == 'api':
+ shared.add_extension('openai', last=True)
+
+ shared.need_restart = True
+
+
+def get_boolean_arguments(active=False):
+ exclude = shared.deprecated_args
+
+ cmd_list = vars(shared.args)
+ bool_list = sorted([k for k in cmd_list if type(cmd_list[k]) is bool and k not in exclude + ui.list_model_elements()])
+ bool_active = [k for k in bool_list if vars(shared.args)[k]]
+
+ if active:
+ return bool_active
+ else:
+ return bool_list
diff --git a/modules/utils.py b/modules/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b65736b6b838fff4fbaf27fdf2045d81ed351b6
--- /dev/null
+++ b/modules/utils.py
@@ -0,0 +1,143 @@
+import os
+import re
+from datetime import datetime
+from pathlib import Path
+
+from modules import github, shared
+from modules.logging_colors import logger
+
+
+# Helper function to get multiple values from shared.gradio
+def gradio(*keys):
+ if len(keys) == 1 and type(keys[0]) in [list, tuple]:
+ keys = keys[0]
+
+ return [shared.gradio[k] for k in keys]
+
+
+def save_file(fname, contents):
+ if fname == '':
+ logger.error('File name is empty!')
+ return
+
+ root_folder = Path(__file__).resolve().parent.parent
+ abs_path_str = os.path.abspath(fname)
+ rel_path_str = os.path.relpath(abs_path_str, root_folder)
+ rel_path = Path(rel_path_str)
+ if rel_path.parts[0] == '..':
+ logger.error(f'Invalid file path: \"{fname}\"')
+ return
+
+ with open(abs_path_str, 'w', encoding='utf-8') as f:
+ f.write(contents)
+
+ logger.info(f'Saved \"{abs_path_str}\".')
+
+
+def delete_file(fname):
+ if fname == '':
+ logger.error('File name is empty!')
+ return
+
+ root_folder = Path(__file__).resolve().parent.parent
+ abs_path_str = os.path.abspath(fname)
+ rel_path_str = os.path.relpath(abs_path_str, root_folder)
+ rel_path = Path(rel_path_str)
+ if rel_path.parts[0] == '..':
+ logger.error(f'Invalid file path: \"{fname}\"')
+ return
+
+ if rel_path.exists():
+ rel_path.unlink()
+ logger.info(f'Deleted \"{fname}\".')
+
+
+def current_time():
+ return f"{datetime.now().strftime('%Y-%m-%d-%H%M%S')}"
+
+
+def atoi(text):
+ return int(text) if text.isdigit() else text.lower()
+
+
+# Replace multiple string pairs in a string
+def replace_all(text, dic):
+ for i, j in dic.items():
+ text = text.replace(i, j)
+
+ return text
+
+
+def natural_keys(text):
+ return [atoi(c) for c in re.split(r'(\d+)', text)]
+
+
+def get_available_models():
+ model_list = []
+ for item in list(Path(f'{shared.args.model_dir}/').glob('*')):
+ if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml', '.py')) and 'llama-tokenizer' not in item.name:
+ model_list.append(item.name)
+
+ return ['None'] + sorted(model_list, key=natural_keys)
+
+
+def get_available_ggufs():
+ model_list = []
+ for item in Path(f'{shared.args.model_dir}/').glob('*'):
+ if item.is_file() and item.name.lower().endswith(".gguf"):
+ model_list.append(item.name)
+
+ return ['None'] + sorted(model_list, key=natural_keys)
+
+
+def get_available_presets():
+ return sorted(set((k.stem for k in Path('presets').glob('*.yaml'))), key=natural_keys)
+
+
+def get_available_prompts():
+ prompts = []
+ files = set((k.stem for k in Path('prompts').glob('*.txt')))
+ prompts += sorted([k for k in files if re.match('^[0-9]', k)], key=natural_keys, reverse=True)
+ prompts += sorted([k for k in files if re.match('^[^0-9]', k)], key=natural_keys)
+ prompts += ['None']
+ return prompts
+
+
+def get_available_characters():
+ paths = (x for x in Path('characters').iterdir() if x.suffix in ('.json', '.yaml', '.yml'))
+ return sorted(set((k.stem for k in paths)), key=natural_keys)
+
+
+def get_available_instruction_templates():
+ path = "instruction-templates"
+ paths = []
+ if os.path.exists(path):
+ paths = (x for x in Path(path).iterdir() if x.suffix in ('.json', '.yaml', '.yml'))
+
+ return ['None'] + sorted(set((k.stem for k in paths)), key=natural_keys)
+
+
+def get_available_extensions():
+ extensions = sorted(set(map(lambda x: x.parts[1], Path('extensions').glob('*/script.py'))), key=natural_keys)
+ extensions = [v for v in extensions if v not in github.new_extensions]
+ return extensions
+
+
+def get_available_loras():
+ return ['None'] + sorted([item.name for item in list(Path(shared.args.lora_dir).glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=natural_keys)
+
+
+def get_datasets(path: str, ext: str):
+ # include subdirectories for raw txt files to allow training from a subdirectory of txt files
+ if ext == "txt":
+ return ['None'] + sorted(set([k.stem for k in list(Path(path).glob('*.txt')) + list(Path(path).glob('*/')) if k.stem != 'put-trainer-datasets-here']), key=natural_keys)
+
+ return ['None'] + sorted(set([k.stem for k in Path(path).glob(f'*.{ext}') if k.stem != 'put-trainer-datasets-here']), key=natural_keys)
+
+
+def get_available_chat_styles():
+ return sorted(set(('-'.join(k.stem.split('-')[1:]) for k in Path('css').glob('chat_style*.css'))), key=natural_keys)
+
+
+def get_available_grammars():
+ return ['None'] + sorted([item.name for item in list(Path('grammars').glob('*.gbnf'))], key=natural_keys)
diff --git a/one_click.py b/one_click.py
new file mode 100644
index 0000000000000000000000000000000000000000..105d651901bbd90762bb9713c18aee35c9bd539d
--- /dev/null
+++ b/one_click.py
@@ -0,0 +1,399 @@
+import argparse
+import glob
+import hashlib
+import os
+import platform
+import re
+import signal
+import site
+import subprocess
+import sys
+
+script_dir = os.getcwd()
+conda_env_path = os.path.join(script_dir, "installer_files", "env")
+
+# Remove the '# ' from the following lines as needed for your AMD GPU on Linux
+# os.environ["ROCM_PATH"] = '/opt/rocm'
+# os.environ["HSA_OVERRIDE_GFX_VERSION"] = '10.3.0'
+# os.environ["HCC_AMDGPU_TARGET"] = 'gfx1030'
+
+# Command-line flags
+cmd_flags_path = os.path.join(script_dir, "CMD_FLAGS.txt")
+if os.path.exists(cmd_flags_path):
+ with open(cmd_flags_path, 'r') as f:
+ CMD_FLAGS = ' '.join(line.strip().rstrip('\\').strip() for line in f if line.strip().rstrip('\\').strip() and not line.strip().startswith('#'))
+else:
+ CMD_FLAGS = ''
+
+flags = f"{' '.join([flag for flag in sys.argv[1:] if flag != '--update'])} {CMD_FLAGS}"
+
+
+def signal_handler(sig, frame):
+ sys.exit(0)
+
+
+signal.signal(signal.SIGINT, signal_handler)
+
+
+def is_linux():
+ return sys.platform.startswith("linux")
+
+
+def is_windows():
+ return sys.platform.startswith("win")
+
+
+def is_macos():
+ return sys.platform.startswith("darwin")
+
+
+def is_x86_64():
+ return platform.machine() == "x86_64"
+
+
+def cpu_has_avx2():
+ try:
+ import cpuinfo
+
+ info = cpuinfo.get_cpu_info()
+ if 'avx2' in info['flags']:
+ return True
+ else:
+ return False
+ except:
+ return True
+
+
+def cpu_has_amx():
+ try:
+ import cpuinfo
+
+ info = cpuinfo.get_cpu_info()
+ if 'amx' in info['flags']:
+ return True
+ else:
+ return False
+ except:
+ return True
+
+
+def torch_version():
+ site_packages_path = None
+ for sitedir in site.getsitepackages():
+ if "site-packages" in sitedir and conda_env_path in sitedir:
+ site_packages_path = sitedir
+ break
+
+ if site_packages_path:
+ torch_version_file = open(os.path.join(site_packages_path, 'torch', 'version.py')).read().splitlines()
+ torver = [line for line in torch_version_file if '__version__' in line][0].split('__version__ = ')[1].strip("'")
+ else:
+ from torch import __version__ as torver
+
+ return torver
+
+
+def is_installed():
+ site_packages_path = None
+ for sitedir in site.getsitepackages():
+ if "site-packages" in sitedir and conda_env_path in sitedir:
+ site_packages_path = sitedir
+ break
+
+ if site_packages_path:
+ return os.path.isfile(os.path.join(site_packages_path, 'torch', '__init__.py'))
+ else:
+ return os.path.isdir(conda_env_path)
+
+
+def check_env():
+ # If we have access to conda, we are probably in an environment
+ conda_exist = run_cmd("conda", environment=True, capture_output=True).returncode == 0
+ if not conda_exist:
+ print("Conda is not installed. Exiting...")
+ sys.exit(1)
+
+ # Ensure this is a new environment and not the base environment
+ if os.environ["CONDA_DEFAULT_ENV"] == "base":
+ print("Create an environment for this project and activate it. Exiting...")
+ sys.exit(1)
+
+
+def clear_cache():
+ run_cmd("conda clean -a -y", environment=True)
+ run_cmd("python -m pip cache purge", environment=True)
+
+
+def print_big_message(message):
+ message = message.strip()
+ lines = message.split('\n')
+ print("\n\n*******************************************************************")
+ for line in lines:
+ if line.strip() != '':
+ print("*", line)
+
+ print("*******************************************************************\n\n")
+
+
+def calculate_file_hash(file_path):
+ p = os.path.join(script_dir, file_path)
+ if os.path.isfile(p):
+ with open(p, 'rb') as f:
+ return hashlib.sha256(f.read()).hexdigest()
+ else:
+ return ''
+
+
+def run_cmd(cmd, assert_success=False, environment=False, capture_output=False, env=None):
+ # Use the conda environment
+ if environment:
+ if is_windows():
+ conda_bat_path = os.path.join(script_dir, "installer_files", "conda", "condabin", "conda.bat")
+ cmd = f'"{conda_bat_path}" activate "{conda_env_path}" >nul && {cmd}'
+ else:
+ conda_sh_path = os.path.join(script_dir, "installer_files", "conda", "etc", "profile.d", "conda.sh")
+ cmd = f'. "{conda_sh_path}" && conda activate "{conda_env_path}" && {cmd}'
+
+ # Run shell commands
+ result = subprocess.run(cmd, shell=True, capture_output=capture_output, env=env)
+
+ # Assert the command ran successfully
+ if assert_success and result.returncode != 0:
+ print(f"Command '{cmd}' failed with exit status code '{str(result.returncode)}'.\n\nExiting now.\nTry running the start/update script again.")
+ sys.exit(1)
+
+ return result
+
+
+def install_webui():
+ # Select your GPU, or choose to run in CPU mode
+ if "GPU_CHOICE" in os.environ:
+ choice = os.environ["GPU_CHOICE"].upper()
+ print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
+ else:
+ print()
+ print("What is your GPU?")
+ print()
+ print("A) NVIDIA")
+ print("B) AMD (Linux/MacOS only. Requires ROCm SDK 5.6 on Linux)")
+ print("C) Apple M Series")
+ print("D) Intel Arc (IPEX)")
+ print("N) None (I want to run models in CPU mode)")
+ print()
+
+ choice = input("Input> ").upper()
+ while choice not in 'ABCDN':
+ print("Invalid choice. Please try again.")
+ choice = input("Input> ").upper()
+
+ gpu_choice_to_name = {
+ "A": "NVIDIA",
+ "B": "AMD",
+ "C": "APPLE",
+ "D": "INTEL",
+ "N": "NONE"
+ }
+
+ selected_gpu = gpu_choice_to_name[choice]
+
+ if selected_gpu == "NONE":
+ with open(cmd_flags_path, 'r+') as cmd_flags_file:
+ if "--cpu" not in cmd_flags_file.read():
+ print_big_message("Adding the --cpu flag to CMD_FLAGS.txt.")
+ cmd_flags_file.write("\n--cpu")
+
+ # Find the proper Pytorch installation command
+ install_git = "conda install -y -k ninja git"
+ install_pytorch = "python -m pip install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* "
+
+ use_cuda118 = "N"
+ if any((is_windows(), is_linux())) and selected_gpu == "NVIDIA":
+ if "USE_CUDA118" in os.environ:
+ use_cuda118 = "Y" if os.environ.get("USE_CUDA118", "").lower() in ("yes", "y", "true", "1", "t", "on") else "N"
+ else:
+ # Ask for CUDA version if using NVIDIA
+ print("\nDo you want to use CUDA 11.8 instead of 12.1? Only choose this option if your GPU is very old (Kepler or older).\nFor RTX and GTX series GPUs, say \"N\". If unsure, say \"N\".\n")
+ use_cuda118 = input("Input (Y/N)> ").upper().strip('"\'').strip()
+ while use_cuda118 not in 'YN':
+ print("Invalid choice. Please try again.")
+ use_cuda118 = input("Input> ").upper().strip('"\'').strip()
+
+ if use_cuda118 == 'Y':
+ print("CUDA: 11.8")
+ install_pytorch += "--index-url https://download.pytorch.org/whl/cu118"
+ else:
+ print("CUDA: 12.1")
+ install_pytorch += "--index-url https://download.pytorch.org/whl/cu121"
+ elif not is_macos() and selected_gpu == "AMD":
+ if is_linux():
+ install_pytorch += "--index-url https://download.pytorch.org/whl/rocm5.6"
+ else:
+ print("AMD GPUs are only supported on Linux. Exiting...")
+ sys.exit(1)
+ elif is_linux() and selected_gpu in ["APPLE", "NONE"]:
+ install_pytorch += "--index-url https://download.pytorch.org/whl/cpu"
+ elif selected_gpu == "INTEL":
+ install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+
+ # Install Git and then Pytorch
+ print_big_message("Installing PyTorch.")
+ run_cmd(f"{install_git} && {install_pytorch} && python -m pip install py-cpuinfo==9.0.0", assert_success=True, environment=True)
+
+ # Install CUDA libraries (this wasn't necessary for Pytorch before...)
+ if selected_gpu == "NVIDIA":
+ print_big_message("Installing the CUDA runtime libraries.")
+ run_cmd(f"conda install -y -c \"nvidia/label/{'cuda-12.1.1' if use_cuda118 == 'N' else 'cuda-11.8.0'}\" cuda-runtime", assert_success=True, environment=True)
+
+ if selected_gpu == "INTEL":
+ # Install oneAPI dependencies via conda
+ print_big_message("Installing Intel oneAPI runtime libraries.")
+ run_cmd("conda install -y -c intel dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0")
+ # Install libuv required by Intel-patched torch
+ run_cmd("conda install -y libuv")
+
+ # Install the webui requirements
+ update_requirements(initial_installation=True)
+
+
+def update_requirements(initial_installation=False):
+ # Create .git directory if missing
+ if not os.path.exists(os.path.join(script_dir, ".git")):
+ git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main'
+ run_cmd(git_creation_cmd, environment=True, assert_success=True)
+
+ files_to_check = [
+ 'start_linux.sh', 'start_macos.sh', 'start_windows.bat', 'start_wsl.bat',
+ 'update_linux.sh', 'update_macos.sh', 'update_windows.bat', 'update_wsl.bat',
+ 'one_click.py'
+ ]
+
+ before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
+ run_cmd("git pull --autostash", assert_success=True, environment=True)
+ after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
+
+ # Check for differences in installation file hashes
+ for file_name in files_to_check:
+ if before_pull_hashes[file_name] != after_pull_hashes[file_name]:
+ print_big_message(f"File '{file_name}' was updated during 'git pull'. Please run the script again.")
+ exit(1)
+
+ # Extensions requirements are installed only during the initial install by default.
+ # That can be changed with the INSTALL_EXTENSIONS environment variable.
+ install = initial_installation
+ if "INSTALL_EXTENSIONS" in os.environ:
+ install = os.environ["INSTALL_EXTENSIONS"].lower() in ("yes", "y", "true", "1", "t", "on")
+
+ if install:
+ print_big_message("Installing extensions requirements.")
+ skip = ['superbooga', 'superboogav2', 'coqui_tts'] # Fail to install on Windows
+ extensions = [foldername for foldername in os.listdir('extensions') if os.path.isfile(os.path.join('extensions', foldername, 'requirements.txt'))]
+ extensions = [x for x in extensions if x not in skip]
+ for i, extension in enumerate(extensions):
+ print(f"\n\n--- [{i+1}/{len(extensions)}]: {extension}\n\n")
+ extension_req_path = os.path.join("extensions", extension, "requirements.txt")
+ run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", assert_success=False, environment=True)
+ elif initial_installation:
+ print_big_message("Will not install extensions due to INSTALL_EXTENSIONS environment variable.")
+
+ # Detect the Python and PyTorch versions
+ torver = torch_version()
+ is_cuda = '+cu' in torver
+ is_cuda118 = '+cu118' in torver # 2.1.0+cu118
+ is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2
+ is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi
+ is_cpu = '+cpu' in torver # 2.0.1+cpu
+
+ if is_rocm:
+ base_requirements = "requirements_amd" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
+ elif is_cpu or is_intel:
+ base_requirements = "requirements_cpu_only" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
+ elif is_macos():
+ base_requirements = "requirements_apple_" + ("intel" if is_x86_64() else "silicon") + ".txt"
+ else:
+ base_requirements = "requirements" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
+
+ requirements_file = base_requirements
+
+ print_big_message(f"Installing webui requirements from file: {requirements_file}")
+ print(f"TORCH: {torver}\n")
+
+ # Prepare the requirements file
+ textgen_requirements = open(requirements_file).read().splitlines()
+ if is_cuda118:
+ textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]
+ if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11
+ textgen_requirements = [req for req in textgen_requirements if 'jllllll/flash-attention' not in req]
+
+ with open('temp_requirements.txt', 'w') as file:
+ file.write('\n'.join(textgen_requirements))
+
+ # Workaround for git+ packages not updating properly.
+ git_requirements = [req for req in textgen_requirements if req.startswith("git+")]
+ for req in git_requirements:
+ url = req.replace("git+", "")
+ package_name = url.split("/")[-1].split("@")[0].rstrip(".git")
+ run_cmd(f"python -m pip uninstall -y {package_name}", environment=True)
+ print(f"Uninstalled {package_name}")
+
+ # Make sure that API requirements are installed (temporary)
+ extension_req_path = os.path.join("extensions", "openai", "requirements.txt")
+ if os.path.exists(extension_req_path):
+ run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", environment=True)
+
+ # Install/update the project requirements
+ run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True)
+ os.remove('temp_requirements.txt')
+
+ # Check for '+cu' or '+rocm' in version string to determine if torch uses CUDA or ROCm. Check for pytorch-cuda as well for backwards compatibility
+ if not any((is_cuda, is_rocm)) and run_cmd("conda list -f pytorch-cuda | grep pytorch-cuda", environment=True, capture_output=True).returncode == 1:
+ clear_cache()
+ return
+
+ if not os.path.exists("repositories/"):
+ os.mkdir("repositories")
+
+ clear_cache()
+
+
+def launch_webui():
+ run_cmd(f"python server.py {flags}", environment=True)
+
+
+if __name__ == "__main__":
+ # Verifies we are in a conda environment
+ check_env()
+
+ parser = argparse.ArgumentParser(add_help=False)
+ parser.add_argument('--update', action='store_true', help='Update the web UI.')
+ args, _ = parser.parse_known_args()
+
+ if args.update:
+ update_requirements()
+ else:
+ # If webui has already been installed, skip and run
+ if not is_installed():
+ install_webui()
+ os.chdir(script_dir)
+
+ if os.environ.get("LAUNCH_AFTER_INSTALL", "").lower() in ("no", "n", "false", "0", "f", "off"):
+ print_big_message("Install finished successfully and will now exit due to LAUNCH_AFTER_INSTALL.")
+ sys.exit()
+
+ # Check if a model has been downloaded yet
+ if '--model-dir' in flags:
+ # Splits on ' ' or '=' while maintaining spaces within quotes
+ flags_list = re.split(' +(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)|=', flags)
+ model_dir = [flags_list[(flags_list.index(flag) + 1)] for flag in flags_list if flag == '--model-dir'][0].strip('"\'')
+ else:
+ model_dir = 'models'
+
+ if len([item for item in glob.glob(f'{model_dir}/*') if not item.endswith(('.txt', '.yaml'))]) == 0:
+ print_big_message("WARNING: You haven't downloaded any model yet.\nOnce the web UI launches, head over to the \"Model\" tab and download one.")
+
+ # Workaround for llama-cpp-python loading paths in CUDA env vars even if they do not exist
+ conda_path_bin = os.path.join(conda_env_path, "bin")
+ if not os.path.exists(conda_path_bin):
+ os.mkdir(conda_path_bin)
+
+ # Launch the webui
+ launch_webui()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21115394e0909bc2005083a893b0ad2ac2cd7b26
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,67 @@
+accelerate==0.25.*
+colorama
+datasets
+einops
+gradio==3.50.*
+hqq==0.1.3
+jinja2==3.1.2
+lm_eval==0.3.0
+markdown
+numpy==1.26.*
+optimum==1.16.*
+pandas
+peft==0.8.*
+Pillow>=9.5.0
+pyyaml
+requests
+rich
+safetensors==0.4.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.37.*
+tqdm
+wandb
+
+# bitsandbytes
+bitsandbytes==0.42.*; platform_system != "Windows"
+https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
+
+# llama-cpp-python (CPU only, AVX2)
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+
+# llama-cpp-python (CUDA, no tensor cores)
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.44+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.44+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.44+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.44+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+
+# llama-cpp-python (CUDA, tensor cores)
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.44+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.44+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.44+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.44+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+
+# CUDA wheels
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.27+cu121-py3-none-any.whl
+autoawq==0.1.8; platform_system == "Linux" or platform_system == "Windows"
diff --git a/requirements_amd.txt b/requirements_amd.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cbdbd464735cdc8a9db0885a6a3b5de16c8257f8
--- /dev/null
+++ b/requirements_amd.txt
@@ -0,0 +1,45 @@
+accelerate==0.25.*
+colorama
+datasets
+einops
+gradio==3.50.*
+hqq==0.1.3
+jinja2==3.1.2
+lm_eval==0.3.0
+markdown
+numpy==1.26.*
+optimum==1.16.*
+pandas
+peft==0.8.*
+Pillow>=9.5.0
+pyyaml
+requests
+rich
+safetensors==0.4.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.37.*
+tqdm
+wandb
+
+# bitsandbytes
+bitsandbytes==0.38.1; platform_system != "Windows"
+https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
+
+# llama-cpp-python (CPU only, AVX2)
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+
+# AMD wheels
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.44+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.44+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50c77b23bb2f14c34ec15847bb2c5b117d1cb94a
--- /dev/null
+++ b/requirements_amd_noavx2.txt
@@ -0,0 +1,43 @@
+accelerate==0.25.*
+colorama
+datasets
+einops
+gradio==3.50.*
+hqq==0.1.3
+jinja2==3.1.2
+lm_eval==0.3.0
+markdown
+numpy==1.26.*
+optimum==1.16.*
+pandas
+peft==0.8.*
+Pillow>=9.5.0
+pyyaml
+requests
+rich
+safetensors==0.4.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.37.*
+tqdm
+wandb
+
+# bitsandbytes
+bitsandbytes==0.38.1; platform_system != "Windows"
+https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
+
+# llama-cpp-python (CPU only, no AVX2)
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+
+# AMD wheels
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce1c4a5828636e5b1b8a114993ad4bc65964ed16
--- /dev/null
+++ b/requirements_apple_intel.txt
@@ -0,0 +1,37 @@
+accelerate==0.25.*
+colorama
+datasets
+einops
+gradio==3.50.*
+hqq==0.1.3
+jinja2==3.1.2
+lm_eval==0.3.0
+markdown
+numpy==1.26.*
+optimum==1.16.*
+pandas
+peft==0.8.*
+Pillow>=9.5.0
+pyyaml
+requests
+rich
+safetensors==0.4.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.37.*
+tqdm
+wandb
+
+# bitsandbytes
+bitsandbytes==0.42.*; platform_system != "Windows"
+https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
+
+# Mac wheels
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a5110d6c3783a9e2acd3cacdf75c076ad361430
--- /dev/null
+++ b/requirements_apple_silicon.txt
@@ -0,0 +1,39 @@
+accelerate==0.25.*
+colorama
+datasets
+einops
+gradio==3.50.*
+hqq==0.1.3
+jinja2==3.1.2
+lm_eval==0.3.0
+markdown
+numpy==1.26.*
+optimum==1.16.*
+pandas
+peft==0.8.*
+Pillow>=9.5.0
+pyyaml
+requests
+rich
+safetensors==0.4.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.37.*
+tqdm
+wandb
+
+# bitsandbytes
+bitsandbytes==0.42.*; platform_system != "Windows"
+https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
+
+# Mac wheels
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.44-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1a52bb4ceaaaf84e6354a21aa475ab1636bbee5
--- /dev/null
+++ b/requirements_cpu_only.txt
@@ -0,0 +1,34 @@
+accelerate==0.25.*
+colorama
+datasets
+einops
+gradio==3.50.*
+hqq==0.1.3
+jinja2==3.1.2
+lm_eval==0.3.0
+markdown
+numpy==1.26.*
+optimum==1.16.*
+pandas
+peft==0.8.*
+Pillow>=9.5.0
+pyyaml
+requests
+rich
+safetensors==0.4.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.37.*
+tqdm
+wandb
+
+# bitsandbytes
+bitsandbytes==0.42.*; platform_system != "Windows"
+https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
+
+# llama-cpp-python (CPU only, AVX2)
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ae86b8cfbc934e81073cc1cc78fffed8703b773
--- /dev/null
+++ b/requirements_cpu_only_noavx2.txt
@@ -0,0 +1,34 @@
+accelerate==0.25.*
+colorama
+datasets
+einops
+gradio==3.50.*
+hqq==0.1.3
+jinja2==3.1.2
+lm_eval==0.3.0
+markdown
+numpy==1.26.*
+optimum==1.16.*
+pandas
+peft==0.8.*
+Pillow>=9.5.0
+pyyaml
+requests
+rich
+safetensors==0.4.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.37.*
+tqdm
+wandb
+
+# bitsandbytes
+bitsandbytes==0.42.*; platform_system != "Windows"
+https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
+
+# llama-cpp-python (CPU only, no AVX2)
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a035e34876dfbfcef05d8a85a64bca1211c6926
--- /dev/null
+++ b/requirements_noavx2.txt
@@ -0,0 +1,67 @@
+accelerate==0.25.*
+colorama
+datasets
+einops
+gradio==3.50.*
+hqq==0.1.3
+jinja2==3.1.2
+lm_eval==0.3.0
+markdown
+numpy==1.26.*
+optimum==1.16.*
+pandas
+peft==0.8.*
+Pillow>=9.5.0
+pyyaml
+requests
+rich
+safetensors==0.4.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.37.*
+tqdm
+wandb
+
+# bitsandbytes
+bitsandbytes==0.42.*; platform_system != "Windows"
+https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
+
+# llama-cpp-python (CPU only, no AVX2)
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.44+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+
+# llama-cpp-python (CUDA, no tensor cores)
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.44+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.44+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.44+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.44+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+
+# llama-cpp-python (CUDA, tensor cores)
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.44+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.44+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.44+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.44+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+
+# CUDA wheels
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX/ctransformers-0.2.27+cu121-py3-none-any.whl
+autoawq==0.1.8; platform_system == "Linux" or platform_system == "Windows"
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b6fa38c5620862920be91a2add0e903706cd52d
--- /dev/null
+++ b/requirements_nowheels.txt
@@ -0,0 +1,28 @@
+accelerate==0.25.*
+colorama
+datasets
+einops
+gradio==3.50.*
+hqq==0.1.3
+jinja2==3.1.2
+lm_eval==0.3.0
+markdown
+numpy==1.26.*
+optimum==1.16.*
+pandas
+peft==0.8.*
+Pillow>=9.5.0
+pyyaml
+requests
+rich
+safetensors==0.4.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.37.*
+tqdm
+wandb
+
+# bitsandbytes
+bitsandbytes==0.42.*; platform_system != "Windows"
+https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/server.py b/server.py
new file mode 100644
index 0000000000000000000000000000000000000000..631aff603b6e7172ea8364a595aae77fc43a94c1
--- /dev/null
+++ b/server.py
@@ -0,0 +1,263 @@
+import os
+import warnings
+
+from modules import shared
+
+import accelerate # This early import makes Intel GPUs happy
+
+import modules.one_click_installer_check
+from modules.block_requests import OpenMonkeyPatch, RequestBlocker
+from modules.logging_colors import logger
+
+os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
+os.environ['BITSANDBYTES_NOWELCOME'] = '1'
+warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
+warnings.filterwarnings('ignore', category=UserWarning, message='Using the update method is deprecated')
+warnings.filterwarnings('ignore', category=UserWarning, message='Field "model_name" has conflict')
+warnings.filterwarnings('ignore', category=UserWarning, message='The value passed into gr.Dropdown()')
+warnings.filterwarnings('ignore', category=UserWarning, message='Field "model_names" has conflict')
+
+with RequestBlocker():
+ import gradio as gr
+
+import matplotlib
+
+matplotlib.use('Agg') # This fixes LaTeX rendering on some systems
+
+import json
+import os
+import signal
+import sys
+import time
+from functools import partial
+from pathlib import Path
+from threading import Lock
+
+import yaml
+
+import modules.extensions as extensions_module
+from modules import (
+ chat,
+ training,
+ ui,
+ ui_chat,
+ ui_default,
+ ui_file_saving,
+ ui_model_menu,
+ ui_notebook,
+ ui_parameters,
+ ui_session,
+ utils
+)
+from modules.extensions import apply_extensions
+from modules.LoRA import add_lora_to_model
+from modules.models import load_model
+from modules.models_settings import (
+ get_fallback_settings,
+ get_model_metadata,
+ update_model_parameters
+)
+from modules.shared import do_cmd_flags_warnings
+from modules.utils import gradio
+
+
+def signal_handler(sig, frame):
+ logger.info("Received Ctrl+C. Shutting down Text generation web UI gracefully.")
+ sys.exit(0)
+
+
+signal.signal(signal.SIGINT, signal_handler)
+
+
+def create_interface():
+
+ title = 'Text generation web UI'
+
+ # Password authentication
+ auth = []
+ if shared.args.gradio_auth:
+ auth.extend(x.strip() for x in shared.args.gradio_auth.strip('"').replace('\n', '').split(',') if x.strip())
+ if shared.args.gradio_auth_path:
+ with open(shared.args.gradio_auth_path, 'r', encoding="utf8") as file:
+ auth.extend(x.strip() for line in file for x in line.split(',') if x.strip())
+ auth = [tuple(cred.split(':')) for cred in auth]
+
+ # Import the extensions and execute their setup() functions
+ if shared.args.extensions is not None and len(shared.args.extensions) > 0:
+ extensions_module.load_extensions()
+
+ # Force some events to be triggered on page load
+ shared.persistent_interface_state.update({
+ 'loader': shared.args.loader or 'Transformers',
+ 'mode': shared.settings['mode'],
+ 'character_menu': shared.args.character or shared.settings['character'],
+ 'instruction_template_str': shared.settings['instruction_template_str'],
+ 'prompt_menu-default': shared.settings['prompt-default'],
+ 'prompt_menu-notebook': shared.settings['prompt-notebook'],
+ 'filter_by_loader': shared.args.loader or 'All'
+ })
+
+ if Path("cache/pfp_character.png").exists():
+ Path("cache/pfp_character.png").unlink()
+
+ # css/js strings
+ css = ui.css
+ js = ui.js
+ css += apply_extensions('css')
+ js += apply_extensions('js')
+
+ # Interface state elements
+ shared.input_elements = ui.list_interface_input_elements()
+
+ with gr.Blocks(css=css, analytics_enabled=False, title=title, theme=ui.theme) as shared.gradio['interface']:
+
+ # Interface state
+ shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements})
+
+ # Audio notification
+ if Path("notification.mp3").exists():
+ shared.gradio['audio_notification'] = gr.Audio(interactive=False, value="notification.mp3", elem_id="audio_notification", visible=False)
+
+ # Floating menus for saving/deleting files
+ ui_file_saving.create_ui()
+
+ # Temporary clipboard for saving files
+ shared.gradio['temporary_text'] = gr.Textbox(visible=False)
+
+ # Text Generation tab
+ ui_chat.create_ui()
+ ui_default.create_ui()
+ ui_notebook.create_ui()
+
+ ui_parameters.create_ui(shared.settings['preset']) # Parameters tab
+ ui_model_menu.create_ui() # Model tab
+ training.create_ui() # Training tab
+ ui_session.create_ui() # Session tab
+
+ # Generation events
+ ui_chat.create_event_handlers()
+ ui_default.create_event_handlers()
+ ui_notebook.create_event_handlers()
+
+ # Other events
+ ui_file_saving.create_event_handlers()
+ ui_parameters.create_event_handlers()
+ ui_model_menu.create_event_handlers()
+
+ # Interface launch events
+ if shared.settings['dark_theme']:
+ shared.gradio['interface'].load(lambda: None, None, None, _js="() => document.getElementsByTagName('body')[0].classList.add('dark')")
+
+ shared.gradio['interface'].load(lambda: None, None, None, _js=f"() => {{{js}}}")
+ shared.gradio['interface'].load(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
+ shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False)
+ shared.gradio['interface'].load(chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
+
+ extensions_module.create_extensions_tabs() # Extensions tabs
+ extensions_module.create_extensions_block() # Extensions block
+
+ # Launch the interface
+ shared.gradio['interface'].queue(concurrency_count=64)
+ with OpenMonkeyPatch():
+ shared.gradio['interface'].launch(
+ prevent_thread_lock=True,
+ share=shared.args.share,
+ server_name=None if not shared.args.listen else (shared.args.listen_host or '0.0.0.0'),
+ server_port=shared.args.listen_port,
+ inbrowser=shared.args.auto_launch,
+ auth=auth or None,
+ ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True,
+ ssl_keyfile=shared.args.ssl_keyfile,
+ ssl_certfile=shared.args.ssl_certfile,
+ debug=True if gr.utils.colab_check() else False
+ )
+
+
+if __name__ == "__main__":
+
+ logger.info("Starting Text generation web UI")
+ do_cmd_flags_warnings()
+
+ # Load custom settings
+ settings_file = None
+ if shared.args.settings is not None and Path(shared.args.settings).exists():
+ settings_file = Path(shared.args.settings)
+ elif Path('settings.yaml').exists():
+ settings_file = Path('settings.yaml')
+ elif Path('settings.json').exists():
+ settings_file = Path('settings.json')
+
+ if settings_file is not None:
+ logger.info(f"Loading settings from \"{settings_file}\"")
+ file_contents = open(settings_file, 'r', encoding='utf-8').read()
+ new_settings = json.loads(file_contents) if settings_file.suffix == "json" else yaml.safe_load(file_contents)
+ shared.settings.update(new_settings)
+
+ # Fallback settings for models
+ shared.model_config['.*'] = get_fallback_settings()
+ shared.model_config.move_to_end('.*', last=False) # Move to the beginning
+
+ # Activate the extensions listed on settings.yaml
+ extensions_module.available_extensions = utils.get_available_extensions()
+ for extension in shared.settings['default_extensions']:
+ shared.args.extensions = shared.args.extensions or []
+ if extension not in shared.args.extensions:
+ shared.args.extensions.append(extension)
+
+ available_models = utils.get_available_models()
+
+ # Model defined through --model
+ if shared.args.model is not None:
+ shared.model_name = shared.args.model
+
+ # Select the model from a command-line menu
+ elif shared.args.model_menu:
+ if len(available_models) == 0:
+ logger.error('No models are available! Please download at least one.')
+ sys.exit(0)
+ else:
+ print('The following models are available:\n')
+ for i, model in enumerate(available_models):
+ print(f'{i+1}. {model}')
+
+ print(f'\nWhich one do you want to load? 1-{len(available_models)}\n')
+ i = int(input()) - 1
+ print()
+
+ shared.model_name = available_models[i]
+
+ # If any model has been selected, load it
+ if shared.model_name != 'None':
+ p = Path(shared.model_name)
+ if p.exists():
+ model_name = p.parts[-1]
+ shared.model_name = model_name
+ else:
+ model_name = shared.model_name
+
+ model_settings = get_model_metadata(model_name)
+ update_model_parameters(model_settings, initial=True) # hijack the command-line arguments
+
+ # Load the model
+ shared.model, shared.tokenizer = load_model(model_name)
+ if shared.args.lora:
+ add_lora_to_model(shared.args.lora)
+
+ shared.generation_lock = Lock()
+
+ if shared.args.nowebui:
+ # Start the API in standalone mode
+ shared.args.extensions = [x for x in shared.args.extensions if x != 'gallery']
+ if shared.args.extensions is not None and len(shared.args.extensions) > 0:
+ extensions_module.load_extensions()
+ else:
+ # Launch the web UI
+ create_interface()
+ while True:
+ time.sleep(0.5)
+ if shared.need_restart:
+ shared.need_restart = False
+ time.sleep(0.5)
+ shared.gradio['interface'].close()
+ time.sleep(0.5)
+ create_interface()
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..68909d6f06802555c6b28fee09ac10e4ca8a3464
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,3 @@
+[pycodestyle]
+max-line-length = 120
+ignore = E402, E501, E722
\ No newline at end of file
diff --git a/start_linux.sh b/start_linux.sh
new file mode 100644
index 0000000000000000000000000000000000000000..948e40b8ad533375d10a426bddd1629bd77c4408
--- /dev/null
+++ b/start_linux.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+
+# deactivate existing conda envs as needed to avoid conflicts
+{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
+
+OS_ARCH=$(uname -m)
+case "${OS_ARCH}" in
+ x86_64*) OS_ARCH="x86_64";;
+ arm64*) OS_ARCH="aarch64";;
+ aarch64*) OS_ARCH="aarch64";;
+ *) echo "Unknown system architecture: $OS_ARCH! This script runs only on x86_64 or arm64" && exit
+esac
+
+# config
+INSTALL_DIR="$(pwd)/installer_files"
+CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
+INSTALL_ENV_DIR="$(pwd)/installer_files/env"
+MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Linux-${OS_ARCH}.sh"
+conda_exists="F"
+
+# figure out whether git and conda needs to be installed
+if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi
+
+# (if necessary) install git and conda into a contained environment
+# download miniconda
+if [ "$conda_exists" == "F" ]; then
+ echo "Downloading Miniconda from $MINICONDA_DOWNLOAD_URL to $INSTALL_DIR/miniconda_installer.sh"
+
+ mkdir -p "$INSTALL_DIR"
+ curl -Lk "$MINICONDA_DOWNLOAD_URL" > "$INSTALL_DIR/miniconda_installer.sh"
+
+ chmod u+x "$INSTALL_DIR/miniconda_installer.sh"
+ bash "$INSTALL_DIR/miniconda_installer.sh" -b -p $CONDA_ROOT_PREFIX
+
+ # test the conda binary
+ echo "Miniconda version:"
+ "$CONDA_ROOT_PREFIX/bin/conda" --version
+fi
+
+# create the installer env
+if [ ! -e "$INSTALL_ENV_DIR" ]; then
+ "$CONDA_ROOT_PREFIX/bin/conda" create -y -k --prefix "$INSTALL_ENV_DIR" python=3.11
+fi
+
+# check if conda environment was actually created
+if [ ! -e "$INSTALL_ENV_DIR/bin/python" ]; then
+ echo "Conda environment is empty."
+ exit
+fi
+
+# environment isolation
+export PYTHONNOUSERSITE=1
+unset PYTHONPATH
+unset PYTHONHOME
+export CUDA_PATH="$INSTALL_ENV_DIR"
+export CUDA_HOME="$CUDA_PATH"
+
+# activate installer env
+source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
+conda activate "$INSTALL_ENV_DIR"
+
+# setup installer env
+python one_click.py $@
diff --git a/start_macos.sh b/start_macos.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5877e1676914f5ba983f161b5dc7dcc14ee53be5
--- /dev/null
+++ b/start_macos.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+
+# deactivate existing conda envs as needed to avoid conflicts
+{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
+
+# M Series or Intel
+OS_ARCH=$(uname -m)
+case "${OS_ARCH}" in
+ x86_64*) OS_ARCH="x86_64";;
+ arm64*) OS_ARCH="arm64";;
+ *) echo "Unknown system architecture: $OS_ARCH! This script runs only on x86_64 or arm64" && exit
+esac
+
+# config
+INSTALL_DIR="$(pwd)/installer_files"
+CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
+INSTALL_ENV_DIR="$(pwd)/installer_files/env"
+MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-MacOSX-${OS_ARCH}.sh"
+conda_exists="F"
+
+# figure out whether git and conda needs to be installed
+if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi
+
+# (if necessary) install git and conda into a contained environment
+# download miniconda
+if [ "$conda_exists" == "F" ]; then
+ echo "Downloading Miniconda from $MINICONDA_DOWNLOAD_URL to $INSTALL_DIR/miniconda_installer.sh"
+
+ mkdir -p "$INSTALL_DIR"
+ curl -Lk "$MINICONDA_DOWNLOAD_URL" > "$INSTALL_DIR/miniconda_installer.sh"
+
+ chmod u+x "$INSTALL_DIR/miniconda_installer.sh"
+ bash "$INSTALL_DIR/miniconda_installer.sh" -b -p $CONDA_ROOT_PREFIX
+
+ # test the conda binary
+ echo "Miniconda version:"
+ "$CONDA_ROOT_PREFIX/bin/conda" --version
+fi
+
+# create the installer env
+if [ ! -e "$INSTALL_ENV_DIR" ]; then
+ "$CONDA_ROOT_PREFIX/bin/conda" create -y -k --prefix "$INSTALL_ENV_DIR" python=3.11
+fi
+
+# check if conda environment was actually created
+if [ ! -e "$INSTALL_ENV_DIR/bin/python" ]; then
+ echo "Conda environment is empty."
+ exit
+fi
+
+# environment isolation
+export PYTHONNOUSERSITE=1
+unset PYTHONPATH
+unset PYTHONHOME
+export CUDA_PATH="$INSTALL_ENV_DIR"
+export CUDA_HOME="$CUDA_PATH"
+
+# activate installer env
+source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
+conda activate "$INSTALL_ENV_DIR"
+
+# setup installer env
+python one_click.py $@
diff --git a/start_windows.bat b/start_windows.bat
new file mode 100644
index 0000000000000000000000000000000000000000..cdc303e46e49f12c046b942c8da19bce5aca7081
--- /dev/null
+++ b/start_windows.bat
@@ -0,0 +1,84 @@
+@echo off
+
+cd /D "%~dp0"
+
+set PATH=%PATH%;%SystemRoot%\system32
+
+echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
+
+@rem Check for special characters in installation path
+set "SPCHARMESSAGE="WARNING: Special characters were detected in the installation path!" " This can cause the installation to fail!""
+echo "%CD%"| findstr /R /C:"[!#\$%&()\*+,;<=>?@\[\]\^`{|}~]" >nul && (
+ call :PrintBigMessage %SPCHARMESSAGE%
+)
+set SPCHARMESSAGE=
+
+@rem fix failed install when installing to a separate drive
+set TMP=%cd%\installer_files
+set TEMP=%cd%\installer_files
+
+@rem deactivate existing conda envs as needed to avoid conflicts
+(call conda deactivate && call conda deactivate && call conda deactivate) 2>nul
+
+@rem config
+set INSTALL_DIR=%cd%\installer_files
+set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
+set INSTALL_ENV_DIR=%cd%\installer_files\env
+set MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Windows-x86_64.exe
+set conda_exists=F
+
+@rem figure out whether git and conda needs to be installed
+call "%CONDA_ROOT_PREFIX%\_conda.exe" --version >nul 2>&1
+if "%ERRORLEVEL%" EQU "0" set conda_exists=T
+
+@rem (if necessary) install git and conda into a contained environment
+@rem download conda
+if "%conda_exists%" == "F" (
+ echo Downloading Miniconda from %MINICONDA_DOWNLOAD_URL% to %INSTALL_DIR%\miniconda_installer.exe
+
+ mkdir "%INSTALL_DIR%"
+ call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
+
+ echo Installing Miniconda to %CONDA_ROOT_PREFIX%
+ start /wait "" "%INSTALL_DIR%\miniconda_installer.exe" /InstallationType=JustMe /NoShortcuts=1 /AddToPath=0 /RegisterPython=0 /NoRegistry=1 /S /D=%CONDA_ROOT_PREFIX%
+
+ @rem test the conda binary
+ echo Miniconda version:
+ call "%CONDA_ROOT_PREFIX%\_conda.exe" --version || ( echo. && echo Miniconda not found. && goto end )
+)
+
+@rem create the installer env
+if not exist "%INSTALL_ENV_DIR%" (
+ echo Packages to install: %PACKAGES_TO_INSTALL%
+ call "%CONDA_ROOT_PREFIX%\_conda.exe" create --no-shortcuts -y -k --prefix "%INSTALL_ENV_DIR%" python=3.11 || ( echo. && echo Conda environment creation failed. && goto end )
+)
+
+@rem check if conda environment was actually created
+if not exist "%INSTALL_ENV_DIR%\python.exe" ( echo. && echo Conda environment is empty. && goto end )
+
+@rem environment isolation
+set PYTHONNOUSERSITE=1
+set PYTHONPATH=
+set PYTHONHOME=
+set "CUDA_PATH=%INSTALL_ENV_DIR%"
+set "CUDA_HOME=%CUDA_PATH%"
+
+@rem activate installer env
+call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
+
+@rem setup installer env
+call python one_click.py %*
+
+@rem below are functions for the script next line skips these during normal execution
+goto end
+
+:PrintBigMessage
+echo. && echo.
+echo *******************************************************************
+for %%M in (%*) do echo * %%~M
+echo *******************************************************************
+echo. && echo.
+exit /b
+
+:end
+pause
diff --git a/start_wsl.bat b/start_wsl.bat
new file mode 100644
index 0000000000000000000000000000000000000000..d7bacead6b0ea94656ecacd8bccede01d7d53cc8
--- /dev/null
+++ b/start_wsl.bat
@@ -0,0 +1,11 @@
+@echo off
+
+cd /D "%~dp0"
+
+set PATH=%PATH%;%SystemRoot%\system32
+
+@rem sed -i 's/\x0D$//' ./wsl.sh converts newlines to unix format in the wsl script
+call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh %*"
+
+:end
+pause
diff --git a/training/formats/alpaca-chatbot-format.json b/training/formats/alpaca-chatbot-format.json
new file mode 100644
index 0000000000000000000000000000000000000000..4b38103f4c23de004666e0316855db62e57d2ad0
--- /dev/null
+++ b/training/formats/alpaca-chatbot-format.json
@@ -0,0 +1,4 @@
+{
+ "instruction,output": "User: %instruction%\nAssistant: %output%",
+ "instruction,input,output": "User: %instruction%: %input%\nAssistant: %output%"
+}
diff --git a/training/formats/alpaca-format.json b/training/formats/alpaca-format.json
new file mode 100644
index 0000000000000000000000000000000000000000..dd6df95640360297257b618715370093b715b21f
--- /dev/null
+++ b/training/formats/alpaca-format.json
@@ -0,0 +1,4 @@
+{
+ "instruction,output": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n%instruction%\n\n### Response:\n%output%",
+ "instruction,input,output": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n%instruction%\n\n### Input:\n%input%\n\n### Response:\n%output%"
+}
diff --git a/training/formats/vicuna-format.json b/training/formats/vicuna-format.json
new file mode 100644
index 0000000000000000000000000000000000000000..c1aa4f15ebcb99a57f696cbe1ec586ed7d5d4a90
--- /dev/null
+++ b/training/formats/vicuna-format.json
@@ -0,0 +1,3 @@
+{
+ "instruction,output": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n\nUSER: %instruction%\n\nASSISTANT: %output%"
+}
diff --git a/update_linux.sh b/update_linux.sh
new file mode 100644
index 0000000000000000000000000000000000000000..371db554a33f53f3bd3c5bf15fedeaf2f6812639
--- /dev/null
+++ b/update_linux.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+
+# deactivate existing conda envs as needed to avoid conflicts
+{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
+
+# config
+CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
+INSTALL_ENV_DIR="$(pwd)/installer_files/env"
+
+# environment isolation
+export PYTHONNOUSERSITE=1
+unset PYTHONPATH
+unset PYTHONHOME
+export CUDA_PATH="$INSTALL_ENV_DIR"
+export CUDA_HOME="$CUDA_PATH"
+
+# activate installer env
+source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
+conda activate "$INSTALL_ENV_DIR"
+
+# update installer env
+python one_click.py --update && echo -e "\nDone!"
diff --git a/update_macos.sh b/update_macos.sh
new file mode 100644
index 0000000000000000000000000000000000000000..371db554a33f53f3bd3c5bf15fedeaf2f6812639
--- /dev/null
+++ b/update_macos.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+
+# deactivate existing conda envs as needed to avoid conflicts
+{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
+
+# config
+CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
+INSTALL_ENV_DIR="$(pwd)/installer_files/env"
+
+# environment isolation
+export PYTHONNOUSERSITE=1
+unset PYTHONPATH
+unset PYTHONHOME
+export CUDA_PATH="$INSTALL_ENV_DIR"
+export CUDA_HOME="$CUDA_PATH"
+
+# activate installer env
+source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
+conda activate "$INSTALL_ENV_DIR"
+
+# update installer env
+python one_click.py --update && echo -e "\nDone!"
diff --git a/update_windows.bat b/update_windows.bat
new file mode 100644
index 0000000000000000000000000000000000000000..0d8f815272c5eec8714ef1adc1a23d547d6bf62d
--- /dev/null
+++ b/update_windows.bat
@@ -0,0 +1,37 @@
+@echo off
+
+cd /D "%~dp0"
+
+set PATH=%PATH%;%SystemRoot%\system32
+
+echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
+
+@rem fix failed install when installing to a separate drive
+set TMP=%cd%\installer_files
+set TEMP=%cd%\installer_files
+
+@rem deactivate existing conda envs as needed to avoid conflicts
+(call conda deactivate && call conda deactivate && call conda deactivate) 2>nul
+
+@rem config
+set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
+set INSTALL_ENV_DIR=%cd%\installer_files\env
+
+@rem environment isolation
+set PYTHONNOUSERSITE=1
+set PYTHONPATH=
+set PYTHONHOME=
+set "CUDA_PATH=%INSTALL_ENV_DIR%"
+set "CUDA_HOME=%CUDA_PATH%"
+
+@rem activate installer env
+call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
+
+@rem update installer env
+call python one_click.py --update && (
+ echo.
+ echo Done!
+)
+
+:end
+pause
diff --git a/update_wsl.bat b/update_wsl.bat
new file mode 100644
index 0000000000000000000000000000000000000000..36d019a86641bb69392e04822f9697c80b28dcf9
--- /dev/null
+++ b/update_wsl.bat
@@ -0,0 +1,11 @@
+@echo off
+
+cd /D "%~dp0"
+
+set PATH=%PATH%;%SystemRoot%\system32
+
+@rem sed -i 's/\x0D$//' ./wsl.sh converts newlines to unix format in the wsl script calling wsl.sh with 'update' will run updater
+call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh update"
+
+:end
+pause
diff --git a/wsl.sh b/wsl.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5c7d6212e0ab8f6777aee29ddd022a3b442b015c
--- /dev/null
+++ b/wsl.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+# detect if build-essential is missing or broken
+if ! dpkg-query -W -f'${Status}' "build-essential" 2>/dev/null | grep -q "ok installed"; then
+echo "build-essential not found or broken!
+
+A C++ compiler is required to build needed Python packages!
+To install one, run cmd_wsl.bat and enter these commands:
+
+sudo apt-get update
+sudo apt-get install build-essential
+"
+read -n1 -p "Continue the installer anyway? [y,n]" EXIT_PROMPT
+# only continue if user inputs 'y' else exit
+if ! [[ $EXIT_PROMPT == "Y" || $EXIT_PROMPT == "y" ]]; then exit; fi
+fi
+
+# deactivate existing conda envs as needed to avoid conflicts
+{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
+
+# config unlike other scripts, can't use current directory due to file IO bug in WSL, needs to be in virtual drive
+INSTALL_DIR_PREFIX="$HOME/text-gen-install"
+if [[ ! $(realpath "$(pwd)/..") = /mnt/* ]]; then
+ INSTALL_DIR_PREFIX="$(realpath "$(pwd)/..")" && INSTALL_INPLACE=1
+fi
+INSTALL_DIR="$INSTALL_DIR_PREFIX/text-generation-webui"
+CONDA_ROOT_PREFIX="$INSTALL_DIR/installer_files/conda"
+INSTALL_ENV_DIR="$INSTALL_DIR/installer_files/env"
+MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Linux-x86_64.sh"
+conda_exists="F"
+
+# environment isolation
+export PYTHONNOUSERSITE=1
+unset PYTHONPATH
+unset PYTHONHOME
+export CUDA_PATH="$INSTALL_ENV_DIR"
+export CUDA_HOME="$CUDA_PATH"
+
+# /usr/lib/wsl/lib needs to be added to LD_LIBRARY_PATH to fix years-old bug in WSL where GPU drivers aren't linked properly
+export LD_LIBRARY_PATH="$CUDA_HOME/lib:/usr/lib/wsl/lib:$LD_LIBRARY_PATH"
+
+# open bash cli if called with 'wsl.sh cmd' with workarounds for existing conda
+if [ "$1" == "cmd" ]; then
+ exec bash --init-file <(echo ". ~/.bashrc; conda deactivate 2> /dev/null; cd $INSTALL_DIR || cd $HOME; source $CONDA_ROOT_PREFIX/etc/profile.d/conda.sh; conda activate $INSTALL_ENV_DIR")
+ exit
+fi
+
+if [[ "$INSTALL_DIR" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+
+# create install dir if missing
+if [ ! -d "$INSTALL_DIR" ]; then mkdir -p "$INSTALL_DIR" || exit; fi
+
+# figure out whether git and conda needs to be installed
+if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi
+
+# (if necessary) install git and conda into a contained environment
+# download miniconda
+if [ "$conda_exists" == "F" ]; then
+ echo "Downloading Miniconda from $MINICONDA_DOWNLOAD_URL to $INSTALL_DIR/miniconda_installer.sh"
+
+ curl -Lk "$MINICONDA_DOWNLOAD_URL" > "$INSTALL_DIR/miniconda_installer.sh"
+
+ chmod u+x "$INSTALL_DIR/miniconda_installer.sh"
+ bash "$INSTALL_DIR/miniconda_installer.sh" -b -p $CONDA_ROOT_PREFIX
+
+ # test the conda binary
+ echo "Miniconda version:"
+ "$CONDA_ROOT_PREFIX/bin/conda" --version
+fi
+
+# create the installer env
+if [ ! -e "$INSTALL_ENV_DIR" ]; then
+ "$CONDA_ROOT_PREFIX/bin/conda" create -y -k --prefix "$INSTALL_ENV_DIR" python=3.11 git
+fi
+
+# check if conda environment was actually created
+if [ ! -e "$INSTALL_ENV_DIR/bin/python" ]; then
+ echo "Conda environment is empty."
+ exit
+fi
+
+# activate installer env
+source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
+conda activate "$INSTALL_ENV_DIR"
+
+pushd $INSTALL_DIR 1> /dev/null || exit
+
+if [ ! -f "./server.py" ]; then
+ git init -b main
+ git remote add origin https://github.com/oobabooga/text-generation-webui
+ git fetch
+ git remote set-head origin -a
+ git reset origin/HEAD --hard
+ git branch --set-upstream-to=origin/HEAD
+ git restore -- . :!./CMD_FLAGS.txt
+fi
+
+# copy CMD_FLAGS.txt to install dir to allow edits within Windows
+if [[ $INSTALL_INPLACE != 1 ]]; then
+ # workaround for old install migration
+ if [ ! -f "./wsl.sh" ]; then
+ git pull || exit
+ [ -f "../webui.py" ] && mv "../webui.py" "../webui-old.py"
+ fi
+ if [ -f "$(dirs +1)/CMD_FLAGS.txt" ] && [ -f "./CMD_FLAGS.txt" ]; then cp -u "$(dirs +1)/CMD_FLAGS.txt" "$INSTALL_DIR"; fi
+fi
+
+# setup installer env update env if called with 'wsl.sh update'
+case "$1" in
+("update") python one_click.py --update;;
+(*) python one_click.py $@;;
+esac