11James11222 commited on
Commit
1fa21e8
1 Parent(s): dff17ee

Upload 3 files

Browse files
Files changed (3) hide show
  1. llm_ans.py +63 -0
  2. model.py +119 -0
  3. quiz_gen.py +83 -0
llm_ans.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ import textwrap
4
+ import time
5
+ import langchain
6
+ from langchain.document_loaders import PyPDFLoader, DirectoryLoader
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain import PromptTemplate, LLMChain
9
+ from langchain.vectorstores import FAISS
10
+ from langchain.llms import HuggingFacePipeline
11
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
12
+ from langchain.chains import RetrievalQA
13
+ import torch
14
+ import transformers
15
+ from model import qa_chain
16
+
17
+ def wrap_text_preserve_newlines(text, width=700):
18
+ # Split the input text into lines based on newline characters
19
+ lines = text.split('\n')
20
+
21
+ # Wrap each line individually
22
+ wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
23
+
24
+ # Join the wrapped lines back together using newline characters
25
+ wrapped_text = '\n'.join(wrapped_lines)
26
+
27
+ return wrapped_text
28
+
29
+
30
+ def process_llm_response(llm_response):
31
+ ans = wrap_text_preserve_newlines(llm_response['result'])
32
+
33
+ sources_used = ' \n'.join(
34
+ [
35
+ source.metadata['source'].split('/')[-1][:-4]
36
+ + ' - page: '
37
+ + str(source.metadata['page'])
38
+ for source in llm_response['source_documents']
39
+ ]
40
+ )
41
+
42
+ ans = ans + '\n\nSources: \n' + sources_used
43
+ return ans
44
+
45
+ def llm_ans(query):
46
+ start = time.time()
47
+
48
+ llm_response = qa_chain.invoke(query)
49
+ ans = process_llm_response(llm_response)
50
+
51
+ end = time.time()
52
+
53
+ time_elapsed = int(round(end - start, 0))
54
+ time_elapsed_str = f'\n\nTime elapsed: {time_elapsed} s'
55
+ ans_loc=ans.find("Answer:")
56
+ ans_loc+=len("Answer: ")
57
+ return ans[ans_loc:]
58
+
59
+
60
+ # query = "what are computer networks?"
61
+ # result=llm_ans(query)
62
+ # print(result)
63
+ # print(type(result))
model.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
+ import os
5
+ import glob
6
+ import textwrap
7
+ import time
8
+ import langchain
9
+ from langchain.document_loaders import PyPDFLoader, DirectoryLoader
10
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ from langchain import PromptTemplate, LLMChain
12
+ from langchain.vectorstores import FAISS
13
+ from langchain.llms import HuggingFacePipeline
14
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
15
+ from langchain.chains import RetrievalQA
16
+ import torch
17
+ import transformers
18
+ from transformers import (
19
+ AutoTokenizer, AutoModelForCausalLM,
20
+ BitsAndBytesConfig,
21
+ pipeline
22
+ )
23
+
24
+
25
+ class CFG:
26
+ # LLMs
27
+ model_name = 'llama2-13b-chat' # wizardlm, llama2-7b-chat, llama2-13b-chat, mistral-7B
28
+ temperature = 0
29
+ top_p = 0.95
30
+ repetition_penalty = 1.15
31
+ # splitting
32
+ split_chunk_size = 800
33
+ split_overlap = 0
34
+ # embeddings
35
+ embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'
36
+ # similar passages
37
+ k = 6
38
+ # paths
39
+ Embeddings_path = 'C:/Studies/main project/codes/final/model/cse-vectordb/faiss_index_hp'
40
+ # Output_folder = './cse-vectordb'
41
+
42
+ model_repo = 'daryl149/llama-2-7b-chat-hf'
43
+ tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)
44
+
45
+ bnb_config = BitsAndBytesConfig(
46
+ load_in_4bit = True,
47
+ bnb_4bit_quant_type = "nf4",
48
+ bnb_4bit_compute_dtype = torch.float16,
49
+ bnb_4bit_use_double_quant = True,
50
+ )
51
+
52
+ model = AutoModelForCausalLM.from_pretrained(
53
+ model_repo,
54
+ quantization_config = bnb_config,
55
+ device_map = 'auto',
56
+ low_cpu_mem_usage = True,
57
+ trust_remote_code = True
58
+ )
59
+
60
+ max_len = 2048
61
+
62
+ ### hugging face pipeline
63
+ pipe = pipeline(
64
+ task = "text-generation",
65
+ model = model,
66
+ tokenizer = tokenizer,
67
+ pad_token_id = tokenizer.eos_token_id,
68
+ # do_sample = True,
69
+ max_length = max_len,
70
+ temperature = CFG.temperature,
71
+ top_p = CFG.top_p,
72
+ repetition_penalty = CFG.repetition_penalty
73
+ )
74
+
75
+ ### langchain pipeline
76
+ llm = HuggingFacePipeline(pipeline = pipe)
77
+
78
+
79
+ ### download embeddings model
80
+ embeddings = HuggingFaceInstructEmbeddings(
81
+ model_name = CFG.embeddings_model_repo,
82
+ model_kwargs = {"device": "cuda"}
83
+ )
84
+
85
+ ### load vector DB embeddings
86
+ vectordb = FAISS.load_local(
87
+ CFG.Embeddings_path, # from input folder
88
+ # CFG.Output_folder + '/faiss_index_hp', # from output folder
89
+ embeddings,
90
+ allow_dangerous_deserialization=True
91
+ )
92
+
93
+ prompt_template = """
94
+ Don't try to make up an answer, if you don't know just say that you don't know.
95
+ Answer in the same language the question was asked.
96
+ Use only the following pieces of context to answer the question at the end.
97
+
98
+ {context}
99
+
100
+ Question: {question}
101
+ Answer:"""
102
+
103
+
104
+ PROMPT = PromptTemplate(
105
+ template = prompt_template,
106
+ input_variables = ["context", "question"]
107
+ )
108
+
109
+ retriever = vectordb.as_retriever(search_kwargs = {"k": CFG.k, "search_type" : "similarity"})
110
+
111
+ qa_chain = RetrievalQA.from_chain_type(
112
+ llm = llm,
113
+ chain_type = "stuff", # map_reduce, map_rerank, stuff, refine
114
+ retriever = retriever,
115
+ chain_type_kwargs = {"prompt": PROMPT},
116
+ return_source_documents = True,
117
+ verbose = False
118
+ )
119
+ print("Hello")
quiz_gen.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from IPython.display import Markdown, display
3
+ from langchain import PromptTemplate,HuggingFaceHub
4
+ from langchain.chat_models import ChatOpenAI
5
+ from langchain.chains import LLMChain
6
+
7
+ import re
8
+ import json
9
+ from langchain.chat_models import ChatOpenAI
10
+ from langchain.schema import HumanMessage
11
+ from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
12
+ from langchain.output_parsers import StructuredOutputParser, ResponseSchema
13
+
14
+ import warnings
15
+ warnings.filterwarnings("ignore")
16
+ import os
17
+ import textwrap
18
+ import langchain
19
+ from langchain.llms import HuggingFacePipeline
20
+ import torch
21
+ import transformers
22
+ from transformers import AutoTokenizer, AutoModelForCausalLM
23
+ from transformers import LlamaTokenizer, LlamaForCausalLM, pipeline
24
+ from langchain.vectorstores import Chroma, FAISS
25
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
26
+ from langchain.chains import RetrievalQA, VectorDBQA
27
+ from langchain.document_loaders import PyPDFLoader
28
+ from langchain.document_loaders import DirectoryLoader
29
+ from InstructorEmbedding import INSTRUCTOR
30
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
31
+ from langchain.llms import CTransformers
32
+ # import random
33
+ import random
34
+ from langchain.chains.question_answering import load_qa_chain
35
+ from model import llm,vectordb as index,embeddings
36
+
37
+
38
+ # mistral = CTransformers(
39
+ # model = "mistral-7b-instruct-v0.2.Q4_K_S.gguf",
40
+ # model_type="mistral",
41
+ # max_new_tokens = 4096,
42
+ # temperature = 0,
43
+ # repetition_penalty= 1.1,
44
+ # device="cuda" if torch.cuda.is_available() else "cpu")
45
+
46
+ # llm=HuggingFaceHub(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
47
+ # model_kwargs={"temperature":0.05,
48
+ # "max_length":1024,"top_p":0.95,"repetition_penalty":1.15,"torch_dtype":"torch.float16", "device_map":"auto"})
49
+
50
+ # topic=["Artificial intelligence", "Algorithm analysis","Computer graphics and image processing", "Computer organization and architecture","Compiler Design",
51
+ # "Computer networks", "Data Structure", "Database management system", "Distributed computing", "internet of things", "mobile computing", "management of software system",
52
+ # "Java", "Operating system", "Python programming", "Soft Computing", "Web programming"]
53
+
54
+ # select=[i for i in range(len(topic)-1)]
55
+
56
+ response_schemas = [
57
+ ResponseSchema(name="question", description="Question generated from provided input text data."),
58
+ ResponseSchema(name="choices", description="Available options for a multiple-choice question in comma separated."),
59
+ ResponseSchema(name="answer", description="Correct answer for the asked question.")
60
+ ]
61
+ output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
62
+ format_instructions = output_parser.get_format_instructions()
63
+ prompt = ChatPromptTemplate(
64
+ messages=[
65
+ HumanMessagePromptTemplate.from_template("""Please generate {num_questions} multiple choice questions
66
+ from {user_prompt}.
67
+ \n{format_instructions}\n{user_prompt}""")
68
+ ],
69
+ input_variables=["user_prompt"],
70
+ partial_variables={"format_instructions": format_instructions}
71
+ )
72
+ final_query = prompt.format_prompt(user_prompt = "computer networks",num_questions=5)
73
+
74
+
75
+ chain = LLMChain(llm=llm,
76
+ prompt=prompt)
77
+
78
+ # sub=topic[random.choice(select)]
79
+ # # chain = LLMChain(prompt=prompt,
80
+ # # llm=llm)
81
+
82
+ quiz_response = chain.run(user_prompt = "computer networks",num_questions=5)
83
+ print(quiz_response)