ajaynagotha commited on
Commit
7a69d96
·
verified ·
1 Parent(s): 3c4e014

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -79
app.py CHANGED
@@ -1,97 +1,41 @@
1
- import logging
2
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, TrainingArguments, Trainer
3
- from datasets import load_dataset
4
  import gradio as gr
5
-
6
- # Configure logging
7
- logging.basicConfig(level=logging.INFO)
8
 
9
  # Load the dataset
10
- dataset = load_dataset("knowrohit07/gita_dataset")
11
- logging.info("Dataset loaded successfully.")
12
-
13
- # Preprocess the dataset
14
- def preprocess_function(examples):
15
- inputs = [f"Question: {q} Answer:" for q in examples["question"]]
16
- targets = examples["answer"]
17
- return tokenizer(inputs, targets, padding="max_length", truncation=True)
18
 
19
  # Load the model and tokenizer
20
- model_name = "t5-base" # Or any other suitable model
21
  tokenizer = AutoTokenizer.from_pretrained(model_name)
22
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
23
- logging.info("Model and tokenizer loaded successfully.")
24
-
25
- # Tokenize the dataset
26
- tokenized_dataset = dataset.map(preprocess_function, batched=True)
27
- logging.info("Dataset tokenized successfully.")
28
-
29
- # Fine-tune the model on the dataset
30
- training_args = TrainingArguments(
31
- output_dir="./results",
32
- evaluation_strategy="epoch",
33
- save_strategy="epoch",
34
- learning_rate=2e-5,
35
- per_device_train_batch_size=16,
36
- per_device_eval_batch_size=16,
37
- num_train_epochs=3,
38
- weight_decay=0.01,
39
- logging_dir="./logs", # Specify the logging directory
40
- )
41
 
42
- trainer = Trainer(
43
- model=model,
44
- args=training_args,
45
- data_collator=default_data_collator,
46
- train_dataset=tokenized_dataset["train"],
47
- eval_dataset=tokenized_dataset["validation"],
48
- )
49
-
50
- logging.info("Starting training...")
51
- trainer.train()
52
- logging.info("Training completed.")
53
-
54
- # Save the fine-tuned model
55
- model.save_pretrained("gita_model")
56
- tokenizer.save_pretrained("gita_tokenizer")
57
-
58
- # Define the question-answering function
59
  def answer_question(question):
60
- """
61
- Answers a question about the Bhagavad Gita using a fine-tuned model.
62
-
63
- Args:
64
- question: The question to be answered.
65
-
66
- Returns:
67
- The answer generated by the model.
68
- """
69
-
70
- try:
71
- # Load the fine-tuned model and tokenizer
72
- model = AutoModelForSeq2SeqLM.from_pretrained("gita_model")
73
- tokenizer = AutoTokenizer.from_pretrained("gita_tokenizer")
74
 
75
- # Preprocess the input
76
- input_ids = tokenizer(question, return_tensors="pt").input_ids
77
 
78
- # Generate the answer
79
- output = model.generate(input_ids, max_length=500, no_repeat_ngram_size=2)
80
- answer = tokenizer.decode(output[0], skip_special_tokens=True)
81
 
82
- return answer.strip()
 
 
 
83
 
84
- except Exception as e:
85
- logging.error(f"An error occurred: {e}")
86
- return "I couldn't find an answer to your question. Please try rephrasing it or asking something different."
87
 
88
- # Create the Gradio interface
89
- interface = gr.Interface(
90
  fn=answer_question,
91
- inputs="text",
92
  outputs="text",
93
  title="Bhagavad Gita Q&A",
94
- description="Ask your questions about the Bhagavad Gita and receive insights from the model."
95
  )
96
 
97
- interface.launch()
 
 
 
 
 
1
  import gradio as gr
2
+ from datasets import load_dataset
3
+ from transformers import AutoTokenizer, AutoModelForQuestionAnswering
4
+ import torch
5
 
6
  # Load the dataset
7
+ ds = load_dataset("knowrohit07/gita_dataset")
 
 
 
 
 
 
 
8
 
9
  # Load the model and tokenizer
10
+ model_name = "deepset/roberta-base-squad2"
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForQuestionAnswering.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def answer_question(question):
15
+ # Combine all text from the dataset
16
+ context = " ".join([item['Text'] for item in ds['train']])
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # Tokenize input
19
+ inputs = tokenizer.encode_plus(question, context, return_tensors="pt", max_length=512, truncation=True)
20
 
21
+ # Get model output
22
+ outputs = model(**inputs)
 
23
 
24
+ # Process the output to get the answer
25
+ answer_start = torch.argmax(outputs.start_logits)
26
+ answer_end = torch.argmax(outputs.end_logits) + 1
27
+ answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))
28
 
29
+ return answer
 
 
30
 
31
+ # Define the Gradio interface
32
+ iface = gr.Interface(
33
  fn=answer_question,
34
+ inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
35
  outputs="text",
36
  title="Bhagavad Gita Q&A",
37
+ description="Ask a question about the Bhagavad Gita, and get an answer based on the dataset."
38
  )
39
 
40
+ # Launch the app
41
+ iface.launch()