import gradio as gr
import requests
from duckduckgo_search import DDGS
import itertools
import time
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.output_parsers import JsonOutputParser
from langdetect import detect

# Fetch proxy list from GitHub
def get_proxies():
    url = "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt"
    response = requests.get(url)
    proxies = response.text.splitlines()
    return proxies

# Proxy cycle for rotation
proxy_list = get_proxies()
proxy_cycle = itertools.cycle(proxy_list)

# Proxy-enabled DDGS
class ProxiedDDGS(DDGS):
    def __init__(self, proxy):
        super().__init__()
        self.proxy = proxy

    def _get(self, url, headers=None):
        response = requests.get(
            url, headers=headers, proxies={"http": self.proxy, "https": self.proxy}
        )
        response.raise_for_status()
        return response

# Search function with retries
def search_with_retries(query, max_results=3, max_retries=5, backoff_factor=1):
    retries = 0
    while retries < max_retries:
        try:
            proxy = next(proxy_cycle)
            searcher = ProxiedDDGS(proxy)
            results = searcher.text(query, max_results=max_results)
            return results, proxy
        except Exception:
            retries += 1
            time.sleep(backoff_factor * retries)
    raise RuntimeError(f"All retries failed for query: {query}")

# Initialize the LLM
llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    task="text-generation",
    max_new_tokens=128,
    temperature=0.7,
    do_sample=False,
)

# Prompt template for feature extraction
template_extract_features = '''
You are a product feature extractor bot. Your task is to determine features like Brand, Model, Type, RAM, Storage, etc., from the given product description and web search results.

Given product description: {TEXT}
Relevant web search results:
{SEARCH_RESULTS}

Return features in JSON format with keys like Brand, Model, Type, RAM, Storage, and others.
Your response MUST only include a valid JSON object and nothing else.

Example:
{{
    "Brand": "Apple",
    "Model": "iPhone 14",
    "Type": "Smartphone",
    "RAM": "4GB",
    "Storage": "128GB"
}}
'''

json_output_parser = JsonOutputParser()

# Define the classify_text function
def extract_features(description):
    global llm
    start = time.time()

    try: 
        lang = detect(description)
    except:
        lang = "en"

    # Perform web search
    try:
        search_results, _ = search_with_retries(description, max_results=3)
        search_text = "\n".join([res.get('snippet', '') for res in search_results])
    except RuntimeError as e:
        search_text = "No search results available."

    # Format the prompt
    prompt_extract = PromptTemplate(
        template=template_extract_features,
        input_variables=["TEXT", "SEARCH_RESULTS"]
    )
    formatted_prompt = prompt_extract.format(TEXT=description, SEARCH_RESULTS=search_text)
    
    # LLM response
    response = llm.invoke(formatted_prompt)
    parsed_output = json_output_parser.parse(response)
    end = time.time()

    return lang, parsed_output, end - start

# Create the Gradio interface
def create_gradio_interface():
    with gr.Blocks() as iface:
        text_input = gr.Textbox(label="Item Description")
        lang_output = gr.Textbox(label="Detected Language")
        feature_output = gr.Textbox(label="Extracted Features (JSON)")
        time_taken = gr.Textbox(label="Time Taken (seconds)")
        submit_btn = gr.Button("Extract Features")

        def on_submit(text):
            lang, features, duration = extract_features(text)
            return lang, features, f"{duration:.2f} seconds"

        submit_btn.click(fn=on_submit, inputs=text_input, outputs=[lang_output, feature_output, time_taken])

    iface.launch()

if __name__ == "__main__":
    create_gradio_interface()