license: apache-2.0 language: - en pipeline_tag: text-generation tags: - chat
This model is a 4-bit quantized version of the MNN model exported from phi-2 using llmexport.
# install huggingface pip install huggingface
# shell download huggingface download --model 'taobao-mnn/phi-2-MNN' --local_dir 'path/to/dir'
# SDK download from huggingface_hub import snapshot_download model_dir = snapshot_download('taobao-mnn/phi-2-MNN')
# git clone git clone https://www.modelscope.cn/taobao-mnn/phi-2-MNN
# clone MNN source git clone https://github.com/alibaba/MNN.git # compile cd MNN mkdir build && cd build cmake .. -DMNN_LOW_MEMORY=true -DMNN_CPU_WEIGHT_DEQUANT_GEMM=true -DMNN_BUILD_LLM=true -DMNN_SUPPORT_TRANSFORMER_FUSE=true make -j # run ./llm_demo /path/to/phi-2-MNN/config.json prompt.txt
MNN-LLM