Spaces:

Gon04
/

xtuner_test_demo

Sleeping

App Files Files Community

Gon04 commited on Nov 27, 2024

Commit

248a90a

1 Parent(s): 68fcd6f

Add application file

Browse files

Files changed (2) hide show

app.py +297 -0
requirements.txt +199 -0

app.py ADDED Viewed

	@@ -0,0 +1,297 @@

+"""This script refers to the dialogue example of streamlit, the interactive
+generation code of chatglm2 and transformers.
+We mainly modified part of the code logic to adapt to the
+generation of our model.
+Please refer to these links below for more information:
+    1. streamlit chat example:
+        https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
+    2. chatglm2:
+        https://github.com/THUDM/ChatGLM2-6B
+    3. transformers:
+        https://github.com/huggingface/transformers
+Please run with the command `streamlit run path/to/web_demo.py
+    --server.address=0.0.0.0 --server.port 7860`.
+Using `python path/to/web_demo.py` may cause unknown problems.
+"""
+# isort: skip_file
+import copy
+import warnings
+from dataclasses import asdict, dataclass
+from typing import Callable, List, Optional
+import streamlit as st
+import torch
+from torch import nn
+from transformers.generation.utils import (LogitsProcessorList,
+                                           StoppingCriteriaList)
+from transformers.utils import logging
+from transformers import AutoTokenizer, AutoModelForCausalLM  # isort: skip
+from modelscope import snapshot_download
+model_name_or_path = snapshot_download('pandora04/assistTuner_demo')
+logger = logging.get_logger(__name__)
+# model_name_or_path="/root/finetune/models/internlm2-chat-7b"
+# model_name_or_path = "./models/merged"
+@dataclass
+class GenerationConfig:
+    # this config is used for chat to provide more diversity
+    max_length: int = 32768
+    top_p: float = 0.8
+    temperature: float = 0.8
+    do_sample: bool = True
+    repetition_penalty: float = 1.005
+@torch.inference_mode()
+def generate_interactive(
+    model,
+    tokenizer,
+    prompt,
+    generation_config: Optional[GenerationConfig] = None,
+    logits_processor: Optional[LogitsProcessorList] = None,
+    stopping_criteria: Optional[StoppingCriteriaList] = None,
+    prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor],
+                                                List[int]]] = None,
+    additional_eos_token_id: Optional[int] = None,
+    **kwargs,
+):
+    inputs = tokenizer([prompt], padding=True, return_tensors='pt')
+    input_length = len(inputs['input_ids'][0])
+    for k, v in inputs.items():
+        inputs[k] = v.cuda()
+    input_ids = inputs['input_ids']
+    _, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
+    if generation_config is None:
+        generation_config = model.generation_config
+    generation_config = copy.deepcopy(generation_config)
+    model_kwargs = generation_config.update(**kwargs)
+    bos_token_id, eos_token_id = (  # noqa: F841  # pylint: disable=W0612
+        generation_config.bos_token_id,
+        generation_config.eos_token_id,
+    )
+    if isinstance(eos_token_id, int):
+        eos_token_id = [eos_token_id]
+    if additional_eos_token_id is not None:
+        eos_token_id.append(additional_eos_token_id)
+    has_default_max_length = kwargs.get(
+        'max_length') is None and generation_config.max_length is not None
+    if has_default_max_length and generation_config.max_new_tokens is None:
+        warnings.warn(
+            f"Using 'max_length''s default \
+                ({repr(generation_config.max_length)}) \
+                to control the generation length. "
+            'This behaviour is deprecated and will be removed from the \
+                config in v5 of Transformers -- we'
+            ' recommend using `max_new_tokens` to control the maximum \
+                length of the generation.',
+            UserWarning,
+        )
+    elif generation_config.max_new_tokens is not None:
+        generation_config.max_length = generation_config.max_new_tokens + \
+            input_ids_seq_length
+        if not has_default_max_length:
+            logger.warn(  # pylint: disable=W4902
+                f"Both 'max_new_tokens' (={generation_config.max_new_tokens}) "
+                f"and 'max_length'(={generation_config.max_length}) seem to "
+                "have been set. 'max_new_tokens' will take precedence. "
+                'Please refer to the documentation for more information. '
+                '(https://huggingface.co/docs/transformers/main/'
+                'en/main_classes/text_generation)',
+                UserWarning,
+            )
+    if input_ids_seq_length >= generation_config.max_length:
+        input_ids_string = 'input_ids'
+        logger.warning(
+            f'Input length of {input_ids_string} is {input_ids_seq_length}, '
+            f"but 'max_length' is set to {generation_config.max_length}. "
+            'This can lead to unexpected behavior. You should consider'
+            " increasing 'max_new_tokens'.")
+    # 2. Set generation parameters if not already defined
+    logits_processor = logits_processor if logits_processor is not None \
+        else LogitsProcessorList()
+    stopping_criteria = stopping_criteria if stopping_criteria is not None \
+        else StoppingCriteriaList()
+    logits_processor = model._get_logits_processor(
+        generation_config=generation_config,
+        input_ids_seq_length=input_ids_seq_length,
+        encoder_input_ids=input_ids,
+        prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
+        logits_processor=logits_processor,
+    )
+    stopping_criteria = model._get_stopping_criteria(
+        generation_config=generation_config,
+        stopping_criteria=stopping_criteria)
+    logits_warper = model._get_logits_warper(generation_config)
+    unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
+    scores = None
+    while True:
+        model_inputs = model.prepare_inputs_for_generation(
+            input_ids, **model_kwargs)
+        # forward pass to get next token
+        outputs = model(
+            **model_inputs,
+            return_dict=True,
+            output_attentions=False,
+            output_hidden_states=False,
+        )
+        next_token_logits = outputs.logits[:, -1, :]
+        # pre-process distribution
+        next_token_scores = logits_processor(input_ids, next_token_logits)
+        next_token_scores = logits_warper(input_ids, next_token_scores)
+        # sample
+        probs = nn.functional.softmax(next_token_scores, dim=-1)
+        if generation_config.do_sample:
+            next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
+        else:
+            next_tokens = torch.argmax(probs, dim=-1)
+        # update generated ids, model inputs, and length for next step
+        input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
+        model_kwargs = model._update_model_kwargs_for_generation(
+            outputs, model_kwargs, is_encoder_decoder=False)
+        unfinished_sequences = unfinished_sequences.mul(
+            (min(next_tokens != i for i in eos_token_id)).long())
+        output_token_ids = input_ids[0].cpu().tolist()
+        output_token_ids = output_token_ids[input_length:]
+        for each_eos_token_id in eos_token_id:
+            if output_token_ids[-1] == each_eos_token_id:
+                output_token_ids = output_token_ids[:-1]
+        response = tokenizer.decode(output_token_ids)
+        yield response
+        # stop when each sentence is finished
+        # or if we exceed the maximum length
+        if unfinished_sequences.max() == 0 or stopping_criteria(
+                input_ids, scores):
+            break
+def on_btn_click():
+    del st.session_state.messages
+@st.cache_resource
+def load_model():
+    model = (AutoModelForCausalLM.from_pretrained(
+        model_name_or_path,
+        trust_remote_code=True).to(torch.bfloat16).cuda())
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,
+                                              trust_remote_code=True)
+    return model, tokenizer
+def prepare_generation_config():
+    with st.sidebar:
+        max_length = st.slider('Max Length',
+                               min_value=8,
+                               max_value=32768,
+                               value=32768)
+        top_p = st.slider('Top P', 0.0, 1.0, 0.8, step=0.01)
+        temperature = st.slider('Temperature', 0.0, 1.0, 0.7, step=0.01)
+        st.button('Clear Chat History', on_click=on_btn_click)
+    generation_config = GenerationConfig(max_length=max_length,
+                                         top_p=top_p,
+                                         temperature=temperature)
+    return generation_config
+user_prompt = '<|im_start|>user\n{user}<|im_end|>\n'
+robot_prompt = '<|im_start|>assistant\n{robot}<|im_end|>\n'
+cur_query_prompt = '<|im_start|>user\n{user}<|im_end|>\n\
+    <|im_start|>assistant\n'
+def combine_history(prompt):
+    messages = st.session_state.messages
+    meta_instruction = ('You are a helpful, honest, '
+                        'and harmless AI assistant.')
+    total_prompt = f'<s><|im_start|>system\n{meta_instruction}<|im_end|>\n'
+    for message in messages:
+        cur_content = message['content']
+        if message['role'] == 'user':
+            cur_prompt = user_prompt.format(user=cur_content)
+        elif message['role'] == 'robot':
+            cur_prompt = robot_prompt.format(robot=cur_content)
+        else:
+            raise RuntimeError
+        total_prompt += cur_prompt
+    total_prompt = total_prompt + cur_query_prompt.format(user=prompt)
+    return total_prompt
+def main():
+    st.title('internlm2_5-7b-chat-assistant')
+    # torch.cuda.empty_cache()
+    print('load model begin.')
+    model, tokenizer = load_model()
+    print('load model end.')
+    generation_config = prepare_generation_config()
+    # Initialize chat history
+    if 'messages' not in st.session_state:
+        st.session_state.messages = []
+    # Display chat messages from history on app rerun
+    for message in st.session_state.messages:
+        with st.chat_message(message['role'], avatar=message.get('avatar')):
+            st.markdown(message['content'])
+    # Accept user input
+    if prompt := st.chat_input('What is up?'):
+        # Display user message in chat message container
+        with st.chat_message('user', avatar='user'):
+            st.markdown(prompt)
+        real_prompt = combine_history(prompt)
+        # Add user message to chat history
+        st.session_state.messages.append({
+            'role': 'user',
+            'content': prompt,
+            'avatar': 'user'
+        })
+        with st.chat_message('robot', avatar='assistant'):
+            message_placeholder = st.empty()
+            for cur_response in generate_interactive(
+                    model=model,
+                    tokenizer=tokenizer,
+                    prompt=real_prompt,
+                    additional_eos_token_id=92542,
+                    device='cuda:0',
+                    **asdict(generation_config),
+            ):
+                # Display robot response in chat message container
+                message_placeholder.markdown(cur_response + '▌')
+            message_placeholder.markdown(cur_response)
+        # Add robot response to chat history
+        st.session_state.messages.append({
+            'role': 'robot',
+            'content': cur_response,  # pylint: disable=undefined-loop-variable
+            'avatar': 'assistant',
+        })
+        torch.cuda.empty_cache()
+if __name__ == '__main__':
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,199 @@

+accelerate==1.1.1
+addict==2.4.0
+aiohappyeyeballs==2.4.3
+aiohttp==3.11.7
+aiosignal==1.3.1
+altair==5.4.1
+annotated-types==0.7.0
+anyio==4.6.2.post1
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+arxiv==2.1.3
+asttokens==2.4.1
+async-lru==2.0.4
+async-timeout==5.0.1
+attrs==24.2.0
+babel==2.16.0
+beautifulsoup4==4.12.3
+bitsandbytes==0.44.1
+bleach==6.2.0
+blinker==1.9.0
+Brotli==1.1.0
+cachetools==5.5.0
+certifi==2024.8.30
+cffi==1.17.1
+charset-normalizer==3.4.0
+click==8.1.7
+colorama==0.4.6
+comm==0.2.2
+contourpy==1.3.1
+cycler==0.12.1
+datasets==3.1.0
+debugpy==1.8.9
+decorator==5.1.1
+deepspeed==0.15.4
+defusedxml==0.7.1
+dill==0.3.8
+distro==1.9.0
+duckduckgo_search==5.3.1b1
+einops==0.8.0
+et_xmlfile==2.0.0
+exceptiongroup==1.2.2
+executing==2.1.0
+fastjsonschema==2.20.0
+feedparser==6.0.11
+filelock==3.16.1
+fonttools==4.55.0
+fqdn==1.5.1
+frozenlist==1.5.0
+fsspec==2024.9.0
+func_timeout==4.3.5
+gitdb==4.0.11
+GitPython==3.1.43
+griffe==0.49.0
+h11==0.14.0
+h2==4.1.0
+hjson==3.1.0
+hpack==4.0.0
+httpcore==1.0.7
+httpx==0.27.2
+huggingface-hub==0.26.2
+hyperframe==6.0.1
+idna==3.10
+imageio==2.36.0
+ipykernel==6.29.5
+ipython==8.29.0
+ipywidgets==8.1.5
+isoduration==20.11.0
+jedi==0.19.2
+Jinja2==3.1.4
+json5==0.9.28
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+jupyter==1.1.1
+jupyter-console==6.6.3
+jupyter-events==0.10.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+jupyter_server==2.14.2
+jupyter_server_terminals==0.5.3
+jupyterlab==4.2.6
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+jupyterlab_widgets==3.0.13
+kiwisolver==1.4.7
+lagent==0.2.4
+lazy_loader==0.4
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+matplotlib==3.9.2
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.0.2
+mmengine==0.10.5
+modelscope==1.20.1
+mpi4py_mpich==3.1.5
+mpmath==1.3.0
+msgpack==1.1.0
+multidict==6.1.0
+multiprocess==0.70.16
+narwhals==1.14.1
+nbclient==0.10.0
+nbconvert==7.16.4
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.4.2
+ninja==1.11.1.1
+notebook==7.2.2
+notebook_shim==0.2.4
+numpy==1.26.4
+opencv-python==4.10.0.84
+openpyxl==3.1.5
+overrides==7.7.0
+packaging==24.2
+pandas==2.2.3
+pandocfilters==1.5.1
+parso==0.8.4
+peft==0.13.2
+pexpect==4.9.0
+phx-class-registry==4.1.0
+pillow==11.0.0
+platformdirs==4.3.6
+prometheus_client==0.21.0
+prompt_toolkit==3.0.48
+propcache==0.2.0
+protobuf==5.28.3
+psutil==6.1.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+py-cpuinfo==9.0.0
+pyarrow==18.0.0
+pycparser==2.22
+pydantic==2.10.1
+pydantic_core==2.27.1
+pydeck==0.9.1
+Pygments==2.18.0
+pyparsing==3.2.0
+python-dateutil==2.9.0.post0
+python-json-logger==2.0.7
+pytz==2024.2
+PyYAML==6.0.2
+pyzmq==26.2.0
+referencing==0.35.1
+regex==2024.11.6
+requests==2.32.3
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.9.4
+rpds-py==0.21.0
+safetensors==0.4.5
+scikit-image==0.24.0
+scipy==1.14.1
+Send2Trash==1.8.3
+sentencepiece==0.2.0
+sgmllib3k==1.0.0
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.1
+socksio==1.0.0
+soupsieve==2.6
+stack-data==0.6.3
+streamlit==1.40.1
+sympy==1.13.1
+tenacity==9.0.0
+termcolor==2.5.0
+terminado==0.18.1
+tifffile==2024.9.20
+tiktoken==0.8.0
+timeout-decorator==0.5.0
+tinycss2==1.4.0
+tokenizers==0.15.2
+toml==0.10.2
+tomli==2.1.0
+torch==2.4.1
+torchaudio==2.4.1
+torchvision==0.19.1
+tornado==6.4.2
+tqdm==4.67.0
+traitlets==5.14.3
+transformers==4.39.0
+transformers-stream-generator==0.0.5
+triton==3.0.0
+types-python-dateutil==2.9.0.20241003
+typing_extensions==4.12.2
+tzdata==2024.2
+uri-template==1.3.0
+urllib3==2.2.3
+watchdog==6.0.0
+wcwidth==0.2.13
+webcolors==24.11.1
+webencodings==0.5.1
+websocket-client==1.8.0
+widgetsnbextension==4.0.13
+-e git+https://github.com/InternLM/xtuner.git@90192ffe42612b0f88409432e7b4860294432bcc#egg=xtuner
+xxhash==3.5.0
+yapf==0.43.0
+yarl==1.18.0