oceansweep commited on
Commit
fde148e
1 Parent(s): cfbac61

Delete App_Function_Libraries/RAG/RAG_Libary_2.py

Browse files
App_Function_Libraries/RAG/RAG_Libary_2.py DELETED
@@ -1,172 +0,0 @@
1
- # RAG_Library_2.py
2
- # Description: This script contains the main RAG pipeline function and related functions for the RAG pipeline.
3
- #
4
- # Import necessary modules and functions
5
- import configparser
6
- from typing import Dict, Any
7
- # Local Imports
8
- from App_Function_Libraries.RAG.ChromaDB_Library import process_and_store_content, vector_search, chroma_client
9
- from App_Function_Libraries.Article_Extractor_Lib import scrape_article
10
- from App_Function_Libraries.DB.DB_Manager import add_media_to_database, search_db, get_unprocessed_media
11
- # 3rd-Party Imports
12
- import openai
13
- #
14
- ########################################################################################################################
15
- #
16
- # Functions:
17
-
18
- # Initialize OpenAI client (adjust this based on your API key management)
19
- openai.api_key = "your-openai-api-key"
20
-
21
- config = configparser.ConfigParser()
22
- config.read('config.txt')
23
-
24
- # Main RAG pipeline function
25
- def rag_pipeline(url: str, query: str, api_choice=None) -> Dict[str, Any]:
26
- # Extract content
27
- article_data = scrape_article(url)
28
- content = article_data['content']
29
- title = article_data['title']
30
-
31
- # Store the article in the database and get the media_id
32
- media_id = add_media_to_database(url, title, 'article', content)
33
-
34
- # Process and store content
35
- collection_name = f"article_{media_id}"
36
- process_and_store_content(content, collection_name, media_id)
37
-
38
- # Perform searches
39
- vector_results = vector_search(collection_name, query, k=5)
40
- fts_results = search_db(query, ["content"], "", page=1, results_per_page=5)
41
-
42
- # Combine results
43
- all_results = vector_results + [result['content'] for result in fts_results]
44
- context = "\n".join(all_results)
45
-
46
- # Generate answer using the selected API
47
- answer = generate_answer(api_choice, context, query)
48
-
49
- return {
50
- "answer": answer,
51
- "context": context
52
- }
53
-
54
-
55
- def generate_answer(api_choice: str, context: str, query: str) -> str:
56
- prompt = f"Context: {context}\n\nQuestion: {query}"
57
- if api_choice == "OpenAI":
58
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_openai
59
- return summarize_with_openai(config['API']['openai_api_key'], prompt, "")
60
- elif api_choice == "Anthropic":
61
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_anthropic
62
- return summarize_with_anthropic(config['API']['anthropic_api_key'], prompt, "")
63
- elif api_choice == "Cohere":
64
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_cohere
65
- return summarize_with_cohere(config['API']['cohere_api_key'], prompt, "")
66
- elif api_choice == "Groq":
67
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_groq
68
- return summarize_with_groq(config['API']['groq_api_key'], prompt, "")
69
- elif api_choice == "OpenRouter":
70
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_openrouter
71
- return summarize_with_openrouter(config['API']['openrouter_api_key'], prompt, "")
72
- elif api_choice == "HuggingFace":
73
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_huggingface
74
- return summarize_with_huggingface(config['API']['huggingface_api_key'], prompt, "")
75
- elif api_choice == "DeepSeek":
76
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_deepseek
77
- return summarize_with_deepseek(config['API']['deepseek_api_key'], prompt, "")
78
- elif api_choice == "Mistral":
79
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_mistral
80
- return summarize_with_mistral(config['API']['mistral_api_key'], prompt, "")
81
- elif api_choice == "Local-LLM":
82
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_local_llm
83
- return summarize_with_local_llm(config['API']['local_llm_path'], prompt, "")
84
- elif api_choice == "Llama.cpp":
85
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_llama
86
- return summarize_with_llama(config['API']['llama_api_key'], prompt, "")
87
- elif api_choice == "Kobold":
88
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_kobold
89
- return summarize_with_kobold(config['API']['kobold_api_key'], prompt, "")
90
- elif api_choice == "Ooba":
91
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_oobabooga
92
- return summarize_with_oobabooga(config['API']['ooba_api_key'], prompt, "")
93
- elif api_choice == "TabbyAPI":
94
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_tabbyapi
95
- return summarize_with_tabbyapi(config['API']['tabby_api_key'], prompt, "")
96
- elif api_choice == "vLLM":
97
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_vllm
98
- return summarize_with_vllm(config['API']['vllm_api_key'], prompt, "")
99
- elif api_choice == "ollama":
100
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_ollama
101
- return summarize_with_ollama(config['API']['ollama_api_key'], prompt, "")
102
- else:
103
- raise ValueError(f"Unsupported API choice: {api_choice}")
104
-
105
- # Function to preprocess and store all existing content in the database
106
- def preprocess_all_content():
107
- unprocessed_media = get_unprocessed_media()
108
- for row in unprocessed_media:
109
- media_id = row[0]
110
- content = row[1]
111
- media_type = row[2]
112
- collection_name = f"{media_type}_{media_id}"
113
- process_and_store_content(content, collection_name, media_id)
114
-
115
-
116
- # Function to perform RAG search across all stored content
117
- def rag_search(query: str, api_choice: str) -> Dict[str, Any]:
118
- # Perform vector search across all collections
119
- all_collections = chroma_client.list_collections()
120
- vector_results = []
121
- for collection in all_collections:
122
- vector_results.extend(vector_search(collection.name, query, k=2))
123
-
124
- # Perform FTS search
125
- fts_results = search_db(query, ["content"], "", page=1, results_per_page=10)
126
-
127
- # Combine results
128
- all_results = vector_results + [result['content'] for result in fts_results]
129
- context = "\n".join(all_results[:10]) # Limit to top 10 results
130
-
131
- # Generate answer using the selected API
132
- answer = generate_answer(api_choice, context, query)
133
-
134
- return {
135
- "answer": answer,
136
- "context": context
137
- }
138
-
139
-
140
- # Example usage:
141
- # 1. Initialize the system:
142
- # create_tables(db) # Ensure FTS tables are set up
143
- #
144
- # 2. Create ChromaDB
145
- # chroma_client = ChromaDBClient()
146
- #
147
- # 3. Create Embeddings
148
- # Store embeddings in ChromaDB
149
- # preprocess_all_content() or create_embeddings()
150
- #
151
- # 4. Perform RAG search across all content:
152
- # result = rag_search("What are the key points about climate change?")
153
- # print(result['answer'])
154
- #
155
- # (Extra)5. Perform RAG on a specific URL:
156
- # result = rag_pipeline("https://example.com/article", "What is the main topic of this article?")
157
- # print(result['answer'])
158
- #
159
- ########################################################################################################################
160
-
161
-
162
- ############################################################################################################
163
- #
164
- # ElasticSearch Retriever
165
-
166
- # https://github.com/langchain-ai/langchain/tree/44e3e2391c48bfd0a8e6a20adde0b6567f4f43c3/templates/rag-elasticsearch
167
- #
168
- # https://github.com/langchain-ai/langchain/tree/44e3e2391c48bfd0a8e6a20adde0b6567f4f43c3/templates/rag-self-query
169
-
170
- #
171
- # End of RAG_Library_2.py
172
- ############################################################################################################