pas2_fork/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
pas2_fork/.gitignore ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ env/
8
+ build/
9
+ develop-eggs/
10
+ dist/
11
+ downloads/
12
+ eggs/
13
+ .eggs/
14
+ lib/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual Environment
24
+ venv/
25
+ ENV/
26
+
27
+ # IDE
28
+ .idea/
29
+ .vscode/
30
+ *.swp
31
+ *.swo
32
+
33
+ # Logs
34
+ *.log
35
+
36
+ # Local configuration
37
+ .env
38
+
39
+ # Results and data files
40
+ results/
41
+ *.csv
42
+ *.xlsx
43
+
44
+ # Gradio
45
+ flagged/
46
+
47
+ # Code outputs
48
+ *.png
49
+ *.xlsx
50
+
51
+ .gradio/
52
+ .ipynb_checkpoints/
53
+
54
+ # Environment Variables
55
+ .env
56
+
57
+ # IDEs and Editors
58
+ .vscode/
59
+ .idea/
60
+ *.swp
61
+ *.swo
62
+
63
+ # Operating System Files
64
+ .DS_Store
65
+ Thumbs.db
66
+
67
+ # Build files
68
+ build/
69
+ dist/
70
+ *.egg-info/
71
+
72
+ # Database files
73
+ *.db
74
+ *.sqlite3
75
+
76
+ # Backup files
77
+ *.bak
78
+
79
+ # Other sensitive or project-specific files
80
+ config.ini
81
+ secrets.json
82
+ credentials.yml
pas2_fork/CLAUDE.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LLM Hallucination Detector Guidelines
2
+
3
+ ## Commands
4
+ - Setup: `pip install -r requirements.txt`
5
+ - Configure: Set environment variables `HF_MISTRAL_API_KEY` and `HF_OPENAI_API_KEY`
6
+ - Run: `python app.py`
7
+ - Lint: `ruff check app.py`
8
+ - Format: `black app.py`
9
+ - Type check: `mypy app.py`
10
+
11
+ ## Code Style
12
+ - Follow PEP 8 conventions with 4-space indentation
13
+ - Use type hints with Pydantic for data validation
14
+ - Write descriptive docstrings using triple quotes
15
+ - Name variables/functions in snake_case, classes in PascalCase
16
+ - Organize imports: stdlib first, then third-party, then local
17
+ - Exception handling: use try/except blocks with specific exceptions
18
+ - Constants should be UPPERCASE and defined at class/module level
19
+ - Prefer f-strings over other string formatting methods
20
+
21
+ ## Architecture
22
+ - App uses Gradio for UI, SQLite for persistence
23
+ - LLM integration with Mistral Large and OpenAI o3-mini
24
+ - Paraphrase-based approach for hallucination detection
25
+ - Maintain clean separation between UI and backend logic
pas2_fork/LICENSE ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License with Attribution Requirement
2
+
3
+ Copyright (c) 2024 Serhan Yilmaz, Sabanci University
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ 1. The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ 2. Any use of the Software must include appropriate credit to the original authors
16
+ by citing the project as follows:
17
+
18
+ Serhan Yilmaz. (2024). PAS2 - Paraphrase-based AI System for Semantic
19
+ Similarity. https://github.com/serhanylmz/pas2
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
pas2_fork/README.md ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Pas2 Llm Hallucination Detector
3
+ emoji: 🐠
4
+ colorFrom: purple
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 5.20.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: pas2 is an llm-as-a-judge system used to verify outputs
12
+ ---
13
+
14
+ # PAS2 - Hallucination Detection System
15
+
16
+ A sophisticated system for detecting hallucinations in AI responses using a paraphrase-based approach with model-as-judge verification.
17
+
18
+ ## Features
19
+
20
+ - **Paraphrase Generation**: Automatically generates semantically equivalent variations of user queries
21
+ - **Multi-Model Architecture**: Uses Mistral Large for responses and OpenAI's o3-mini as a judge
22
+ - **Real-time Progress Tracking**: Visual feedback during the analysis process
23
+ - **Permanent Cloud Storage**: User feedback and results are stored in MongoDB Atlas for persistent storage across restarts
24
+ - **Interactive Web Interface**: Clean, responsive Gradio interface with example queries
25
+ - **Detailed Analysis**: Provides confidence scores, reasoning, and specific conflicting facts
26
+ - **Statistics Dashboard**: Real-time tracking of hallucination detection statistics
27
+
28
+ ## Setup
29
+
30
+ 1. Clone this repository
31
+ 2. Install dependencies:
32
+ ```bash
33
+ pip install -r requirements.txt
34
+ ```
35
+ 3. Set up your API keys as environment variables:
36
+ - `HF_MISTRAL_API_KEY`: Your Mistral AI API key
37
+ - `HF_OPENAI_API_KEY`: Your OpenAI API key
38
+
39
+ ## Deployment on Hugging Face Spaces
40
+
41
+ 1. Create a new Space on Hugging Face
42
+ 2. Select "Gradio" as the SDK
43
+ 3. Add your repository
44
+ 4. Set up a MongoDB Atlas database (see below)
45
+ 5. Set the following secrets in your Space's settings:
46
+ - `HF_MISTRAL_API_KEY`
47
+ - `HF_OPENAI_API_KEY`
48
+ - `MONGODB_URI`
49
+
50
+ ### MongoDB Atlas Setup
51
+
52
+ For permanent data storage that persists across HuggingFace Space restarts:
53
+
54
+ 1. Create a free [MongoDB Atlas account](https://www.mongodb.com/cloud/atlas/register)
55
+ 2. Create a new cluster (the free tier is sufficient)
56
+ 3. In the "Database Access" menu, create a database user with read/write permissions
57
+ 4. In the "Network Access" menu, add IP `0.0.0.0/0` to allow access from anywhere (required for HuggingFace Spaces)
58
+ 5. In the "Databases" section, click "Connect" and choose "Connect your application"
59
+ 6. Copy the connection string and replace `<password>` with your database user's password
60
+ 7. Set this as your `MONGODB_URI` secret in HuggingFace Spaces settings
61
+
62
+ ## Usage
63
+
64
+ 1. Enter a factual question or select from example queries
65
+ 2. Click "Detect Hallucinations" to start the analysis
66
+ 3. Review the detailed results:
67
+ - Hallucination detection status
68
+ - Confidence score
69
+ - Original and paraphrased responses
70
+ - Detailed reasoning and analysis
71
+ 4. Provide feedback to help improve the system
72
+
73
+ ## How It Works
74
+
75
+ 1. **Query Processing**:
76
+ - Your question is paraphrased multiple ways
77
+ - Each version is sent to Mistral Large
78
+ - Responses are collected and compared
79
+
80
+ 2. **Hallucination Detection**:
81
+ - OpenAI's o3-mini analyzes responses
82
+ - Identifies factual inconsistencies
83
+ - Provides confidence scores and reasoning
84
+
85
+ 3. **Feedback Collection**:
86
+ - User feedback is stored in MongoDB Atlas
87
+ - Cloud-based persistent storage ensures data survival
88
+ - Statistics are updated in real-time
89
+ - Data can be exported for further analysis
90
+
91
+ ## Data Persistence
92
+
93
+ The application uses MongoDB Atlas for data storage, providing several benefits:
94
+ - **Permanent Storage**: Data persists even when Hugging Face Spaces restart
95
+ - **Scalability**: MongoDB scales as your data grows
96
+ - **Cloud-based**: No reliance on Space-specific storage that can be lost
97
+ - **Query Capabilities**: Powerful query functionality for data analysis
98
+ - **Export Options**: Built-in methods to export data to CSV
99
+
100
+ ## Contributing
101
+
102
+ Contributions are welcome! Please feel free to submit pull requests.
103
+
104
+ ## License
105
+
106
+ This project is licensed under the MIT License - see the LICENSE file for details.
107
+
108
+ ## About
109
+
110
+ This application uses a combination of paraphrasing techniques and model-as-judge approaches to identify potential hallucinations in LLM responses. It provides confidence scores, identifies conflicting facts, and offers detailed reasoning for its judgments.
111
+
112
+ ## Features
113
+
114
+ - Generates paraphrased versions of input queries
115
+ - Evaluates responses using semantic similarity analysis
116
+ - Provides match percentage and similarity metrics
117
+ - Includes visualization tools for similarity matrices
118
+ - Web interface for interactive testing
119
+ - Benchmarking capabilities for bulk evaluation
120
+
121
+ ## Installation
122
+
123
+ ```bash
124
+ git clone https://github.com/serhanylmz/pas2
125
+ cd pas2
126
+ pip install -r requirements.txt
127
+ ```
128
+
129
+ Set up your OpenAI API key in a `.env` file:
130
+ ```
131
+ OPENAI_API_KEY=your_api_key_here
132
+ ```
133
+
134
+ ## Usage
135
+
136
+ ### Web Interface
137
+
138
+ Run the Gradio interface:
139
+ ```bash
140
+ python pas2-gradio.py
141
+ ```
142
+
143
+ ### Benchmark Tool
144
+
145
+ Run the benchmark tool:
146
+ ```bash
147
+ python pas2-benchmark.py --json_file your_data.json --num_samples 10
148
+ ```
149
+
150
+ ### Library Usage
151
+
152
+ ```python
153
+ from pas2 import PAS2
154
+
155
+ detector = PAS2()
156
+ hallucinated, response, questions, answers = detector.detect_hallucination(
157
+ "your question",
158
+ n_paraphrases=5,
159
+ similarity_threshold=0.9,
160
+ match_percentage_threshold=0.7
161
+ )
162
+ ```
163
+
164
+ ## Configuration
165
+
166
+ - Default model: gpt-4-2024-08-06
167
+ - Default embedding model: text-embedding-3-small
168
+ - Adjustable similarity and match percentage thresholds
169
+
170
+ ## Output Files
171
+
172
+ - Similarity matrix plots (PNG)
173
+ - Match matrix plots (PNG)
174
+ - Benchmark results (CSV, TXT)
175
+ - User feedback logs (XLSX)
176
+
177
+ ## License
178
+
179
+ This project is licensed under the MIT License with an attribution requirement - see the [LICENSE](LICENSE) file for details.
180
+
181
+ ### Citation
182
+
183
+ If you use PAS2 in your research or project, please cite it as:
184
+
185
+ ```bibtex
186
+ @software{pas2_2024,
187
+ author = {Serhan Yilmaz},
188
+ title = {PAS2 - Paraphrase-based AI System for Semantic Similarity},
189
+ year = {2024},
190
+ publisher = {GitHub},
191
+ url = {https://github.com/serhanylmz/pas2}
192
+ }
193
+ ```
194
+
195
+ ### Attribution Requirements
196
+
197
+ When using PAS2, you must provide appropriate attribution by:
198
+
199
+ 1. Including the copyright notice and license in any copy or substantial portion of the software
200
+ 2. Citing the project in any publications, presentations, or documentation that uses or builds upon this work
201
+ 3. Maintaining a link to the original repository in any forks or derivative works
202
+
203
+ ## Contact
204
+
205
+ Serhan Yilmaz
206
+ serhan.yilmaz@sabanciuniv.edu
207
+ Sabanci University
pas2_fork/app.py ADDED
@@ -0,0 +1,1557 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import pandas as pd
4
+ from datetime import datetime
5
+ from pydantic import BaseModel, Field
6
+ from typing import List, Dict, Any, Optional
7
+ import numpy as np
8
+ from mistralai import Mistral
9
+ from openai import OpenAI
10
+ import re
11
+ import json
12
+ import logging
13
+ import time
14
+ import concurrent.futures
15
+ from concurrent.futures import ThreadPoolExecutor
16
+ import threading
17
+ import pymongo
18
+ from pymongo import MongoClient
19
+ from bson.objectid import ObjectId
20
+ from dotenv import load_dotenv
21
+
22
+ # Load environment variables
23
+ load_dotenv()
24
+
25
+ # Configure logging
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format='%(asctime)s [%(levelname)s] %(message)s',
29
+ handlers=[
30
+ logging.StreamHandler()
31
+ ]
32
+ )
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ class HallucinationJudgment(BaseModel):
37
+ hallucination_detected: bool = Field(description="Whether a hallucination is detected across the responses")
38
+ confidence_score: float = Field(description="Confidence score between 0-1 for the hallucination judgment")
39
+ conflicting_facts: List[Dict[str, Any]] = Field(description="List of conflicting facts found in the responses")
40
+ reasoning: str = Field(description="Detailed reasoning for the judgment")
41
+ summary: str = Field(description="A summary of the analysis")
42
+
43
+ class PAS2:
44
+ """Paraphrase-based Approach for LLM Systems - Using llm-as-judge methods"""
45
+
46
+ def __init__(self, mistral_api_key=None, openai_api_key=None, progress_callback=None):
47
+ """Initialize the PAS2 with API keys"""
48
+ # For Hugging Face Spaces, we prioritize getting API keys from HF_* environment variables
49
+ # which are set from the Secrets tab in the Space settings
50
+ self.mistral_api_key = mistral_api_key or os.environ.get("HF_MISTRAL_API_KEY") or os.environ.get("MISTRAL_API_KEY")
51
+ self.openai_api_key = openai_api_key or os.environ.get("HF_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY")
52
+ self.progress_callback = progress_callback
53
+
54
+ if not self.mistral_api_key:
55
+ raise ValueError("Mistral API key is required. Set it via HF_MISTRAL_API_KEY in Hugging Face Spaces secrets or pass it as a parameter.")
56
+
57
+ if not self.openai_api_key:
58
+ raise ValueError("OpenAI API key is required. Set it via HF_OPENAI_API_KEY in Hugging Face Spaces secrets or pass it as a parameter.")
59
+
60
+ self.mistral_client = Mistral(api_key=self.mistral_api_key)
61
+ self.openai_client = OpenAI(api_key=self.openai_api_key)
62
+
63
+ self.mistral_model = "mistral-large-latest"
64
+ self.openai_model = "o3-mini"
65
+
66
+ logger.info("PAS2 initialized with Mistral model: %s and OpenAI model: %s",
67
+ self.mistral_model, self.openai_model)
68
+
69
+ def generate_paraphrases(self, query: str, n_paraphrases: int = 3) -> List[str]:
70
+ """Generate paraphrases of the input query using Mistral API"""
71
+ logger.info("Generating %d paraphrases for query: %s", n_paraphrases, query)
72
+ start_time = time.time()
73
+
74
+ messages = [
75
+ {
76
+ "role": "system",
77
+ "content": f"You are an expert at creating semantically equivalent paraphrases. Generate {n_paraphrases} different paraphrases of the given query that preserve the original meaning but vary in wording and structure. Return a JSON array of strings, each containing one paraphrase."
78
+ },
79
+ {
80
+ "role": "user",
81
+ "content": query
82
+ }
83
+ ]
84
+
85
+ try:
86
+ logger.info("Sending paraphrase generation request to Mistral API...")
87
+ response = self.mistral_client.chat.complete(
88
+ model=self.mistral_model,
89
+ messages=messages,
90
+ response_format={"type": "json_object"}
91
+ )
92
+
93
+ content = response.choices[0].message.content
94
+ logger.debug("Received raw paraphrase response: %s", content)
95
+
96
+ paraphrases_data = json.loads(content)
97
+
98
+ # Handle different possible JSON structures
99
+ if isinstance(paraphrases_data, dict) and "paraphrases" in paraphrases_data:
100
+ paraphrases = paraphrases_data["paraphrases"]
101
+ elif isinstance(paraphrases_data, dict) and "results" in paraphrases_data:
102
+ paraphrases = paraphrases_data["results"]
103
+ elif isinstance(paraphrases_data, list):
104
+ paraphrases = paraphrases_data
105
+ else:
106
+ # Try to extract a list from any field
107
+ for key, value in paraphrases_data.items():
108
+ if isinstance(value, list) and len(value) > 0:
109
+ paraphrases = value
110
+ break
111
+ else:
112
+ logger.warning("Could not extract paraphrases from response: %s", content)
113
+ raise ValueError(f"Could not extract paraphrases from response: {content}")
114
+
115
+ # Ensure we have the right number of paraphrases
116
+ paraphrases = paraphrases[:n_paraphrases]
117
+
118
+ # Add the original query as the first item
119
+ all_queries = [query] + paraphrases
120
+
121
+ elapsed_time = time.time() - start_time
122
+ logger.info("Generated %d paraphrases in %.2f seconds", len(paraphrases), elapsed_time)
123
+ for i, p in enumerate(paraphrases, 1):
124
+ logger.info("Paraphrase %d: %s", i, p)
125
+
126
+ return all_queries
127
+
128
+ except Exception as e:
129
+ logger.error("Error generating paraphrases: %s", str(e), exc_info=True)
130
+ # Return original plus simple paraphrases as fallback
131
+ fallback_paraphrases = [
132
+ query,
133
+ f"Could you tell me about {query.strip('?')}?",
134
+ f"I'd like to know: {query}",
135
+ f"Please provide information on {query.strip('?')}."
136
+ ][:n_paraphrases+1]
137
+
138
+ logger.info("Using fallback paraphrases due to error")
139
+ for i, p in enumerate(fallback_paraphrases[1:], 1):
140
+ logger.info("Fallback paraphrase %d: %s", i, p)
141
+
142
+ return fallback_paraphrases
143
+
144
+ def _get_single_response(self, query: str, index: int = None) -> str:
145
+ """Get a single response from Mistral API for a query"""
146
+ try:
147
+ query_description = f"Query {index}: {query}" if index is not None else f"Query: {query}"
148
+ logger.info("Getting response for %s", query_description)
149
+ start_time = time.time()
150
+
151
+ messages = [
152
+ {
153
+ "role": "system",
154
+ "content": "You are a helpful AI assistant. Provide accurate, factual information in response to questions."
155
+ },
156
+ {
157
+ "role": "user",
158
+ "content": query
159
+ }
160
+ ]
161
+
162
+ response = self.mistral_client.chat.complete(
163
+ model=self.mistral_model,
164
+ messages=messages
165
+ )
166
+
167
+ result = response.choices[0].message.content
168
+ elapsed_time = time.time() - start_time
169
+
170
+ logger.info("Received response for %s (%.2f seconds)", query_description, elapsed_time)
171
+ logger.debug("Response content for %s: %s", query_description, result[:100] + "..." if len(result) > 100 else result)
172
+
173
+ return result
174
+
175
+ except Exception as e:
176
+ error_msg = f"Error getting response for query '{query}': {e}"
177
+ logger.error(error_msg, exc_info=True)
178
+ return f"Error: Failed to get response for this query."
179
+
180
+ def get_responses(self, queries: List[str]) -> List[str]:
181
+ """Get responses from Mistral API for each query in parallel"""
182
+ logger.info("Getting responses for %d queries in parallel", len(queries))
183
+ start_time = time.time()
184
+
185
+ # Use ThreadPoolExecutor for parallel API calls
186
+ with ThreadPoolExecutor(max_workers=min(len(queries), 5)) as executor:
187
+ # Submit tasks and map them to their original indices
188
+ future_to_index = {
189
+ executor.submit(self._get_single_response, query, i): i
190
+ for i, query in enumerate(queries)
191
+ }
192
+
193
+ # Prepare a list with the correct length
194
+ responses = [""] * len(queries)
195
+
196
+ # Counter for completed responses
197
+ completed_count = 0
198
+
199
+ # Collect results as they complete
200
+ for future in concurrent.futures.as_completed(future_to_index):
201
+ index = future_to_index[future]
202
+ try:
203
+ responses[index] = future.result()
204
+
205
+ # Update completion count and report progress
206
+ completed_count += 1
207
+ if self.progress_callback:
208
+ self.progress_callback("responses_progress",
209
+ completed_responses=completed_count,
210
+ total_responses=len(queries))
211
+
212
+ except Exception as e:
213
+ logger.error("Error processing response for index %d: %s", index, str(e))
214
+ responses[index] = f"Error: Failed to get response for query {index}."
215
+
216
+ # Still update completion count even for errors
217
+ completed_count += 1
218
+ if self.progress_callback:
219
+ self.progress_callback("responses_progress",
220
+ completed_responses=completed_count,
221
+ total_responses=len(queries))
222
+
223
+ elapsed_time = time.time() - start_time
224
+ logger.info("Received all %d responses in %.2f seconds total", len(responses), elapsed_time)
225
+
226
+ return responses
227
+
228
+ def detect_hallucination(self, query: str, n_paraphrases: int = 3) -> Dict:
229
+ """
230
+ Detect hallucinations by comparing responses to paraphrased queries using a judge model
231
+
232
+ Returns:
233
+ Dict containing hallucination judgment and all responses
234
+ """
235
+ logger.info("Starting hallucination detection for query: %s", query)
236
+ start_time = time.time()
237
+
238
+ # Report progress
239
+ if self.progress_callback:
240
+ self.progress_callback("starting", query=query)
241
+
242
+ # Generate paraphrases
243
+ logger.info("Step 1: Generating paraphrases")
244
+ if self.progress_callback:
245
+ self.progress_callback("generating_paraphrases", query=query)
246
+
247
+ all_queries = self.generate_paraphrases(query, n_paraphrases)
248
+
249
+ if self.progress_callback:
250
+ self.progress_callback("paraphrases_complete", query=query, count=len(all_queries))
251
+
252
+ # Get responses to all queries
253
+ logger.info("Step 2: Getting responses to all %d queries", len(all_queries))
254
+ if self.progress_callback:
255
+ self.progress_callback("getting_responses", query=query, total=len(all_queries))
256
+
257
+ all_responses = []
258
+ for i, q in enumerate(all_queries):
259
+ logger.info("Getting response %d/%d for query: %s", i+1, len(all_queries), q)
260
+ if self.progress_callback:
261
+ self.progress_callback("responses_progress", query=query, completed=i, total=len(all_queries))
262
+
263
+ response = self._get_single_response(q, index=i)
264
+ all_responses.append(response)
265
+
266
+ if self.progress_callback:
267
+ self.progress_callback("responses_complete", query=query)
268
+
269
+ # Judge the responses for hallucinations
270
+ logger.info("Step 3: Judging for hallucinations")
271
+ if self.progress_callback:
272
+ self.progress_callback("judging", query=query)
273
+
274
+ # The first query is the original, rest are paraphrases
275
+ original_query = all_queries[0]
276
+ original_response = all_responses[0]
277
+ paraphrased_queries = all_queries[1:] if len(all_queries) > 1 else []
278
+ paraphrased_responses = all_responses[1:] if len(all_responses) > 1 else []
279
+
280
+ # Judge the responses
281
+ judgment = self.judge_hallucination(
282
+ original_query=original_query,
283
+ original_response=original_response,
284
+ paraphrased_queries=paraphrased_queries,
285
+ paraphrased_responses=paraphrased_responses
286
+ )
287
+
288
+ # Assemble the results
289
+ results = {
290
+ "original_query": original_query,
291
+ "original_response": original_response,
292
+ "paraphrased_queries": paraphrased_queries,
293
+ "paraphrased_responses": paraphrased_responses,
294
+ "hallucination_detected": judgment.hallucination_detected,
295
+ "confidence_score": judgment.confidence_score,
296
+ "conflicting_facts": judgment.conflicting_facts,
297
+ "reasoning": judgment.reasoning,
298
+ "summary": judgment.summary
299
+ }
300
+
301
+ # Report completion
302
+ if self.progress_callback:
303
+ self.progress_callback("complete", query=query)
304
+
305
+ logger.info("Hallucination detection completed in %.2f seconds", time.time() - start_time)
306
+ return results
307
+
308
+ def judge_hallucination(self,
309
+ original_query: str,
310
+ original_response: str,
311
+ paraphrased_queries: List[str],
312
+ paraphrased_responses: List[str]) -> HallucinationJudgment:
313
+ """
314
+ Use OpenAI's o3-mini as a judge to detect hallucinations in the responses
315
+ """
316
+ logger.info("Judging hallucinations with OpenAI's %s model", self.openai_model)
317
+ start_time = time.time()
318
+
319
+ # Prepare the context for the judge
320
+ context = f"""
321
+ Original Question: {original_query}
322
+
323
+ Original Response:
324
+ {original_response}
325
+
326
+ Paraphrased Questions and their Responses:
327
+ """
328
+
329
+ for i, (query, response) in enumerate(zip(paraphrased_queries, paraphrased_responses), 1):
330
+ context += f"\nParaphrased Question {i}: {query}\n\nResponse {i}:\n{response}\n"
331
+
332
+ system_prompt = """
333
+ You are a judge evaluating whether an AI is hallucinating across different responses to semantically equivalent questions.
334
+ Analyze all responses carefully to identify any factual inconsistencies or contradictions.
335
+ Focus on factual discrepancies, not stylistic differences.
336
+ A hallucination is when the AI states different facts in response to questions that are asking for the same information.
337
+
338
+ Your response should be a JSON with the following fields:
339
+ - hallucination_detected: boolean indicating whether hallucinations were found
340
+ - confidence_score: number between 0 and 1 representing your confidence in the judgment
341
+ - conflicting_facts: an array of objects describing any conflicting information found
342
+ - reasoning: detailed explanation for your judgment
343
+ - summary: a concise summary of your analysis
344
+ """
345
+
346
+ try:
347
+ logger.info("Sending judgment request to OpenAI API...")
348
+ response = self.openai_client.chat.completions.create(
349
+ model=self.openai_model,
350
+ messages=[
351
+ {"role": "system", "content": system_prompt},
352
+ {"role": "user", "content": f"Evaluate these responses for hallucinations:\n\n{context}"}
353
+ ],
354
+ response_format={"type": "json_object"}
355
+ )
356
+
357
+ result_json = json.loads(response.choices[0].message.content)
358
+ logger.debug("Received judgment response: %s", result_json)
359
+
360
+ # Create the HallucinationJudgment object from the JSON response
361
+ judgment = HallucinationJudgment(
362
+ hallucination_detected=result_json.get("hallucination_detected", False),
363
+ confidence_score=result_json.get("confidence_score", 0.0),
364
+ conflicting_facts=result_json.get("conflicting_facts", []),
365
+ reasoning=result_json.get("reasoning", "No reasoning provided."),
366
+ summary=result_json.get("summary", "No summary provided.")
367
+ )
368
+
369
+ elapsed_time = time.time() - start_time
370
+ logger.info("Judgment completed in %.2f seconds", elapsed_time)
371
+
372
+ return judgment
373
+
374
+ except Exception as e:
375
+ logger.error("Error in hallucination judgment: %s", str(e), exc_info=True)
376
+ # Return a fallback judgment
377
+ return HallucinationJudgment(
378
+ hallucination_detected=False,
379
+ confidence_score=0.0,
380
+ conflicting_facts=[],
381
+ reasoning="Failed to obtain judgment from the model.",
382
+ summary="Analysis failed due to API error."
383
+ )
384
+
385
+
386
+ class HallucinationDetectorApp:
387
+ def __init__(self):
388
+ self.pas2 = None
389
+ logger.info("Initializing HallucinationDetectorApp")
390
+ self._initialize_database()
391
+ self.progress_callback = None
392
+
393
+ def _initialize_database(self):
394
+ """Initialize MongoDB connection for persistent feedback storage"""
395
+ try:
396
+ # Get MongoDB connection string from environment variable
397
+ mongo_uri = os.environ.get("MONGODB_URI")
398
+
399
+ if not mongo_uri:
400
+ logger.warning("MONGODB_URI not found in environment variables. Please set it in HuggingFace Spaces secrets.")
401
+ logger.warning("Using a placeholder URI for now - connection will fail until proper URI is provided.")
402
+ # Use a placeholder - this will fail but allows the app to initialize
403
+ mongo_uri = "mongodb+srv://username:password@cluster.mongodb.net/?retryWrites=true&w=majority"
404
+
405
+ # Connect to MongoDB
406
+ self.mongo_client = MongoClient(mongo_uri)
407
+
408
+ # Access or create database
409
+ self.db = self.mongo_client["hallucination_detector"]
410
+
411
+ # Access or create collection
412
+ self.feedback_collection = self.db["feedback"]
413
+
414
+ # Create index on timestamp for faster querying
415
+ self.feedback_collection.create_index("timestamp")
416
+
417
+ # Test connection
418
+ self.mongo_client.admin.command('ping')
419
+ logger.info("MongoDB connection successful")
420
+
421
+ except Exception as e:
422
+ logger.error(f"Error initializing MongoDB: {str(e)}", exc_info=True)
423
+ logger.warning("Proceeding without database connection. Data will not be saved persistently.")
424
+ self.mongo_client = None
425
+ self.db = None
426
+ self.feedback_collection = None
427
+
428
+ def set_progress_callback(self, callback):
429
+ """Set the progress callback function"""
430
+ self.progress_callback = callback
431
+
432
+ def initialize_api(self, mistral_api_key, openai_api_key):
433
+ """Initialize the PAS2 with API keys"""
434
+ try:
435
+ logger.info("Initializing PAS2 with API keys")
436
+ self.pas2 = PAS2(
437
+ mistral_api_key=mistral_api_key,
438
+ openai_api_key=openai_api_key,
439
+ progress_callback=self.progress_callback
440
+ )
441
+ logger.info("API initialization successful")
442
+ return "API keys set successfully! You can now use the application."
443
+ except Exception as e:
444
+ logger.error("Error initializing API: %s", str(e), exc_info=True)
445
+ return f"Error initializing API: {str(e)}"
446
+
447
+ def process_query(self, query: str):
448
+ """Process the query using PAS2"""
449
+ if not self.pas2:
450
+ logger.error("PAS2 not initialized")
451
+ return {
452
+ "error": "Please set API keys first before processing queries."
453
+ }
454
+
455
+ if not query.strip():
456
+ logger.warning("Empty query provided")
457
+ return {
458
+ "error": "Please enter a query."
459
+ }
460
+
461
+ try:
462
+ # Set the progress callback if needed
463
+ if self.progress_callback and self.pas2.progress_callback != self.progress_callback:
464
+ self.pas2.progress_callback = self.progress_callback
465
+
466
+ # Process the query
467
+ logger.info("Processing query with PAS2: %s", query)
468
+ results = self.pas2.detect_hallucination(query)
469
+ logger.info("Query processing completed successfully")
470
+ return results
471
+ except Exception as e:
472
+ logger.error("Error processing query: %s", str(e), exc_info=True)
473
+ return {
474
+ "error": f"Error processing query: {str(e)}"
475
+ }
476
+
477
+ def save_feedback(self, results, feedback):
478
+ """Save results and user feedback to MongoDB"""
479
+ try:
480
+ logger.info("Saving user feedback: %s", feedback)
481
+
482
+ if self.feedback_collection is None:
483
+ logger.error("MongoDB connection not available. Cannot save feedback.")
484
+ return "Database connection not available. Feedback not saved."
485
+
486
+ # Prepare document for MongoDB
487
+ document = {
488
+ "timestamp": datetime.now(),
489
+ "original_query": results.get('original_query', ''),
490
+ "original_response": results.get('original_response', ''),
491
+ "paraphrased_queries": results.get('paraphrased_queries', []),
492
+ "paraphrased_responses": results.get('paraphrased_responses', []),
493
+ "hallucination_detected": results.get('hallucination_detected', False),
494
+ "confidence_score": results.get('confidence_score', 0.0),
495
+ "conflicting_facts": results.get('conflicting_facts', []),
496
+ "reasoning": results.get('reasoning', ''),
497
+ "summary": results.get('summary', ''),
498
+ "user_feedback": feedback
499
+ }
500
+
501
+ # Insert document into collection
502
+ self.feedback_collection.insert_one(document)
503
+
504
+ logger.info("Feedback saved successfully to MongoDB")
505
+ return "Feedback saved successfully!"
506
+ except Exception as e:
507
+ logger.error("Error saving feedback: %s", str(e), exc_info=True)
508
+ return f"Error saving feedback: {str(e)}"
509
+
510
+ def get_feedback_stats(self):
511
+ """Get statistics about collected feedback from MongoDB"""
512
+ try:
513
+ if self.feedback_collection is None:
514
+ logger.error("MongoDB connection not available. Cannot get feedback stats.")
515
+ return None
516
+
517
+ # Get total feedback count
518
+ total_count = self.feedback_collection.count_documents({})
519
+
520
+ # Get hallucination detection stats using aggregation
521
+ hallucination_pipeline = [
522
+ {"$group": {
523
+ "_id": "$hallucination_detected",
524
+ "count": {"$sum": 1}
525
+ }}
526
+ ]
527
+ detection_stats = {doc["_id"]: doc["count"]
528
+ for doc in self.feedback_collection.aggregate(hallucination_pipeline)}
529
+
530
+ # Get average confidence score
531
+ avg_pipeline = [
532
+ {"$group": {
533
+ "_id": None,
534
+ "average": {"$avg": "$confidence_score"}
535
+ }}
536
+ ]
537
+ avg_result = list(self.feedback_collection.aggregate(avg_pipeline))
538
+ avg_confidence = avg_result[0]["average"] if avg_result else 0
539
+
540
+ return {
541
+ "total_feedback": total_count,
542
+ "hallucinations_detected": detection_stats.get(True, 0),
543
+ "no_hallucinations": detection_stats.get(False, 0),
544
+ "average_confidence": round(avg_confidence, 2)
545
+ }
546
+ except Exception as e:
547
+ logger.error("Error getting feedback stats: %s", str(e), exc_info=True)
548
+ return None
549
+
550
+ def export_data_to_csv(self, filepath=None):
551
+ """Export all feedback data to a CSV file for analysis"""
552
+ try:
553
+ if self.feedback_collection is None:
554
+ logger.error("MongoDB connection not available. Cannot export data.")
555
+ return "Database connection not available. Cannot export data."
556
+
557
+ # Query all feedback data
558
+ cursor = self.feedback_collection.find({})
559
+
560
+ # Convert cursor to list of dictionaries
561
+ records = list(cursor)
562
+
563
+ # Convert MongoDB documents to pandas DataFrame
564
+ # Handle nested arrays and complex objects
565
+ for record in records:
566
+ # Convert ObjectId to string
567
+ record['_id'] = str(record['_id'])
568
+
569
+ # Convert datetime objects to string
570
+ if 'timestamp' in record:
571
+ record['timestamp'] = record['timestamp'].strftime("%Y-%m-%d %H:%M:%S")
572
+
573
+ # Convert lists to strings for CSV storage
574
+ if 'paraphrased_queries' in record:
575
+ record['paraphrased_queries'] = json.dumps(record['paraphrased_queries'])
576
+ if 'paraphrased_responses' in record:
577
+ record['paraphrased_responses'] = json.dumps(record['paraphrased_responses'])
578
+ if 'conflicting_facts' in record:
579
+ record['conflicting_facts'] = json.dumps(record['conflicting_facts'])
580
+
581
+ # Create DataFrame
582
+ df = pd.DataFrame(records)
583
+
584
+ # Define default filepath if not provided
585
+ if not filepath:
586
+ filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)),
587
+ f"hallucination_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
588
+
589
+ # Export to CSV
590
+ df.to_csv(filepath, index=False)
591
+ logger.info(f"Data successfully exported to {filepath}")
592
+
593
+ return filepath
594
+ except Exception as e:
595
+ logger.error(f"Error exporting data: {str(e)}", exc_info=True)
596
+ return f"Error exporting data: {str(e)}"
597
+
598
+ def get_recent_queries(self, limit=10):
599
+ """Get most recent queries for display in the UI"""
600
+ try:
601
+ if self.feedback_collection is None:
602
+ logger.error("MongoDB connection not available. Cannot get recent queries.")
603
+ return []
604
+
605
+ # Get most recent queries
606
+ cursor = self.feedback_collection.find(
607
+ {},
608
+ {"original_query": 1, "hallucination_detected": 1, "timestamp": 1}
609
+ ).sort("timestamp", pymongo.DESCENDING).limit(limit)
610
+
611
+ # Convert to list of dictionaries
612
+ recent_queries = []
613
+ for doc in cursor:
614
+ recent_queries.append({
615
+ "id": str(doc["_id"]),
616
+ "query": doc["original_query"],
617
+ "hallucination_detected": doc.get("hallucination_detected", False),
618
+ "timestamp": doc["timestamp"].strftime("%Y-%m-%d %H:%M:%S") if isinstance(doc["timestamp"], datetime) else doc["timestamp"]
619
+ })
620
+
621
+ return recent_queries
622
+ except Exception as e:
623
+ logger.error(f"Error getting recent queries: {str(e)}", exc_info=True)
624
+ return []
625
+
626
+ def get_query_details(self, query_id):
627
+ """Get full details for a specific query by ID"""
628
+ try:
629
+ if self.feedback_collection is None:
630
+ logger.error("MongoDB connection not available. Cannot get query details.")
631
+ return None
632
+
633
+ # Convert string ID to ObjectId
634
+ obj_id = ObjectId(query_id)
635
+
636
+ # Find the query by ID
637
+ doc = self.feedback_collection.find_one({"_id": obj_id})
638
+
639
+ if doc is None:
640
+ logger.warning(f"No query found with ID {query_id}")
641
+ return None
642
+
643
+ # Convert ObjectId to string for JSON serialization
644
+ doc["_id"] = str(doc["_id"])
645
+
646
+ # Convert timestamp to string
647
+ if "timestamp" in doc and isinstance(doc["timestamp"], datetime):
648
+ doc["timestamp"] = doc["timestamp"].strftime("%Y-%m-%d %H:%M:%S")
649
+
650
+ return doc
651
+ except Exception as e:
652
+ logger.error(f"Error getting query details: {str(e)}", exc_info=True)
653
+ return None
654
+
655
+
656
+ # Progress tracking for UI updates
657
+ class ProgressTracker:
658
+ """Tracks progress of hallucination detection for UI updates"""
659
+
660
+ STAGES = {
661
+ "idle": {"status": "Ready", "progress": 0, "color": "#757575"},
662
+ "starting": {"status": "Starting process...", "progress": 5, "color": "#2196F3"},
663
+ "generating_paraphrases": {"status": "Generating paraphrases...", "progress": 15, "color": "#2196F3"},
664
+ "paraphrases_complete": {"status": "Paraphrases generated", "progress": 30, "color": "#2196F3"},
665
+ "getting_responses": {"status": "Getting responses (0/0)...", "progress": 35, "color": "#2196F3"},
666
+ "responses_progress": {"status": "Getting responses ({completed}/{total})...", "progress": 40, "color": "#2196F3"},
667
+ "responses_complete": {"status": "All responses received", "progress": 65, "color": "#2196F3"},
668
+ "judging": {"status": "Analyzing responses for hallucinations...", "progress": 70, "color": "#2196F3"},
669
+ "complete": {"status": "Analysis complete!", "progress": 100, "color": "#4CAF50"},
670
+ "error": {"status": "Error: {error_message}", "progress": 100, "color": "#F44336"}
671
+ }
672
+
673
+ def __init__(self):
674
+ self.stage = "idle"
675
+ self.stage_data = self.STAGES[self.stage].copy()
676
+ self.query = ""
677
+ self.completed_responses = 0
678
+ self.total_responses = 0
679
+ self.error_message = ""
680
+ self._lock = threading.Lock()
681
+ self._status_callback = None
682
+ self._stop_event = threading.Event()
683
+ self._update_thread = None
684
+
685
+ def register_callback(self, callback_fn):
686
+ """Register callback function to update UI"""
687
+ self._status_callback = callback_fn
688
+
689
+ def update_stage(self, stage, **kwargs):
690
+ """Update the current stage and trigger callback"""
691
+ with self._lock:
692
+ if stage in self.STAGES:
693
+ self.stage = stage
694
+ self.stage_data = self.STAGES[stage].copy()
695
+
696
+ # Update with any additional parameters
697
+ for key, value in kwargs.items():
698
+ if key == 'query':
699
+ self.query = value
700
+ elif key == 'completed_responses':
701
+ self.completed_responses = value
702
+ elif key == 'total_responses':
703
+ self.total_responses = value
704
+ elif key == 'error_message':
705
+ self.error_message = value
706
+
707
+ # Format status message
708
+ if stage == 'responses_progress':
709
+ self.stage_data['status'] = self.stage_data['status'].format(
710
+ completed=self.completed_responses,
711
+ total=self.total_responses
712
+ )
713
+ elif stage == 'error':
714
+ self.stage_data['status'] = self.stage_data['status'].format(
715
+ error_message=self.error_message
716
+ )
717
+
718
+ if self._status_callback:
719
+ self._status_callback(self.get_html_status())
720
+
721
+ def get_html_status(self):
722
+ """Get HTML representation of current status"""
723
+ progress_width = f"{self.stage_data['progress']}%"
724
+ status_text = self.stage_data['status']
725
+ color = self.stage_data['color']
726
+
727
+ query_info = f'<div class="query-display">{self.query}</div>' if self.query else ''
728
+
729
+ # Only show status text if not in idle state
730
+ status_display = f'<div class="progress-status" style="color: {color};">{status_text}</div>' if self.stage != "idle" else ''
731
+
732
+ html = f"""
733
+ <div class="progress-container">
734
+ {query_info}
735
+ {status_display}
736
+ <div class="progress-bar-container">
737
+ <div class="progress-bar" style="width: {progress_width}; background-color: {color};"></div>
738
+ </div>
739
+ </div>
740
+ """
741
+ return html
742
+
743
+ def start_pulsing(self):
744
+ """Start a pulsing animation for the progress bar during long operations"""
745
+ if self._update_thread and self._update_thread.is_alive():
746
+ return
747
+
748
+ self._stop_event.clear()
749
+ self._update_thread = threading.Thread(target=self._pulse_progress)
750
+ self._update_thread.daemon = True
751
+ self._update_thread.start()
752
+
753
+ def stop_pulsing(self):
754
+ """Stop the pulsing animation"""
755
+ self._stop_event.set()
756
+ if self._update_thread:
757
+ self._update_thread.join(0.5)
758
+
759
+ def _pulse_progress(self):
760
+ """Animate the progress bar to show activity"""
761
+ pulse_stages = ["⋯", "⋯⋯", "⋯⋯⋯", "⋯⋯", "⋯"]
762
+ i = 0
763
+ while not self._stop_event.is_set():
764
+ with self._lock:
765
+ if self.stage not in ["idle", "complete", "error"]:
766
+ status_base = self.stage_data['status'].split("...")[0] if "..." in self.stage_data['status'] else self.stage_data['status']
767
+ self.stage_data['status'] = f"{status_base}... {pulse_stages[i]}"
768
+
769
+ if self._status_callback:
770
+ self._status_callback(self.get_html_status())
771
+
772
+ i = (i + 1) % len(pulse_stages)
773
+ time.sleep(0.3)
774
+
775
+
776
+ def create_interface():
777
+ """Create Gradio interface"""
778
+ detector = HallucinationDetectorApp()
779
+
780
+ # Initialize Progress Tracker
781
+ progress_tracker = ProgressTracker()
782
+
783
+ # Initialize APIs from environment variables automatically
784
+ try:
785
+ detector.initialize_api(
786
+ mistral_api_key=os.environ.get("HF_MISTRAL_API_KEY"),
787
+ openai_api_key=os.environ.get("HF_OPENAI_API_KEY")
788
+ )
789
+ except Exception as e:
790
+ print(f"Warning: Failed to initialize APIs from environment variables: {e}")
791
+ print("Please make sure HF_MISTRAL_API_KEY and HF_OPENAI_API_KEY are set in your environment")
792
+
793
+ # CSS for styling
794
+ css = """
795
+ .container {
796
+ max-width: 1000px;
797
+ margin: 0 auto;
798
+ }
799
+ .title {
800
+ text-align: center;
801
+ margin-bottom: 0.5em;
802
+ color: #1a237e;
803
+ font-weight: 600;
804
+ }
805
+ .subtitle {
806
+ text-align: center;
807
+ margin-bottom: 1.5em;
808
+ color: #455a64;
809
+ font-size: 1.2em;
810
+ }
811
+ .section-title {
812
+ margin-top: 1em;
813
+ margin-bottom: 0.5em;
814
+ font-weight: bold;
815
+ color: #283593;
816
+ }
817
+ .info-box {
818
+ padding: 1.2em;
819
+ border-radius: 8px;
820
+ background-color: #f5f5f5;
821
+ margin-bottom: 1em;
822
+ box-shadow: 0 2px 5px rgba(0,0,0,0.05);
823
+ }
824
+ .hallucination-positive {
825
+ padding: 1.2em;
826
+ border-radius: 8px;
827
+ background-color: #ffebee;
828
+ border-left: 5px solid #f44336;
829
+ margin-bottom: 1em;
830
+ box-shadow: 0 2px 5px rgba(0,0,0,0.05);
831
+ }
832
+ .hallucination-negative {
833
+ padding: 1.2em;
834
+ border-radius: 8px;
835
+ background-color: #e8f5e9;
836
+ border-left: 5px solid #4caf50;
837
+ margin-bottom: 1em;
838
+ box-shadow: 0 2px 5px rgba(0,0,0,0.05);
839
+ }
840
+ .response-box {
841
+ padding: 1.2em;
842
+ border-radius: 8px;
843
+ background-color: #f5f5f5;
844
+ margin-bottom: 0.8em;
845
+ box-shadow: 0 2px 5px rgba(0,0,0,0.05);
846
+ }
847
+ .example-queries {
848
+ display: flex;
849
+ flex-wrap: wrap;
850
+ gap: 8px;
851
+ margin-bottom: 15px;
852
+ }
853
+ .example-query {
854
+ background-color: #e3f2fd;
855
+ padding: 8px 15px;
856
+ border-radius: 18px;
857
+ font-size: 0.9em;
858
+ cursor: pointer;
859
+ transition: all 0.2s;
860
+ border: 1px solid #bbdefb;
861
+ }
862
+ .example-query:hover {
863
+ background-color: #bbdefb;
864
+ box-shadow: 0 2px 5px rgba(0,0,0,0.1);
865
+ }
866
+ .stats-section {
867
+ display: flex;
868
+ justify-content: space-between;
869
+ background-color: #e8eaf6;
870
+ padding: 15px;
871
+ border-radius: 8px;
872
+ margin-bottom: 20px;
873
+ }
874
+ .stat-item {
875
+ text-align: center;
876
+ padding: 10px;
877
+ }
878
+ .stat-value {
879
+ font-size: 1.5em;
880
+ font-weight: bold;
881
+ color: #303f9f;
882
+ }
883
+ .stat-label {
884
+ font-size: 0.9em;
885
+ color: #5c6bc0;
886
+ }
887
+ .feedback-section {
888
+ border-top: 1px solid #e0e0e0;
889
+ padding-top: 15px;
890
+ margin-top: 20px;
891
+ }
892
+ footer {
893
+ text-align: center;
894
+ padding: 20px;
895
+ margin-top: 30px;
896
+ color: #9e9e9e;
897
+ font-size: 0.9em;
898
+ }
899
+ .processing-status {
900
+ padding: 12px;
901
+ background-color: #fff3e0;
902
+ border-left: 4px solid #ff9800;
903
+ margin-bottom: 15px;
904
+ font-weight: 500;
905
+ color: #e65100;
906
+ }
907
+ .debug-panel {
908
+ background-color: #f5f5f5;
909
+ border: 1px solid #e0e0e0;
910
+ border-radius: 4px;
911
+ padding: 10px;
912
+ margin-top: 15px;
913
+ font-family: monospace;
914
+ font-size: 0.9em;
915
+ white-space: pre-wrap;
916
+ max-height: 200px;
917
+ overflow-y: auto;
918
+ }
919
+ .progress-container {
920
+ padding: 15px;
921
+ background-color: #fff;
922
+ border-radius: 8px;
923
+ box-shadow: 0 2px 5px rgba(0,0,0,0.05);
924
+ margin-bottom: 15px;
925
+ }
926
+ .progress-status {
927
+ font-weight: 500;
928
+ margin-bottom: 8px;
929
+ padding: 4px 0;
930
+ font-size: 0.95em;
931
+ }
932
+ .progress-bar-container {
933
+ background-color: #e0e0e0;
934
+ height: 10px;
935
+ border-radius: 5px;
936
+ overflow: hidden;
937
+ margin-bottom: 10px;
938
+ box-shadow: inset 0 1px 3px rgba(0,0,0,0.1);
939
+ }
940
+ .progress-bar {
941
+ height: 100%;
942
+ transition: width 0.5s ease;
943
+ background-image: linear-gradient(to right, #2196F3, #3f51b5);
944
+ }
945
+ .query-display {
946
+ font-style: italic;
947
+ color: #666;
948
+ margin-bottom: 10px;
949
+ background-color: #f5f5f5;
950
+ padding: 8px;
951
+ border-radius: 4px;
952
+ border-left: 3px solid #2196F3;
953
+ }
954
+ """
955
+
956
+ # Example queries
957
+ example_queries = [
958
+ "Who was the first person to land on the moon?",
959
+ "What is the capital of France?",
960
+ "How many planets are in our solar system?",
961
+ "Who wrote the novel 1984?",
962
+ "What is the speed of light?",
963
+ "What was the first computer?"
964
+ ]
965
+
966
+ # Function to update the progress display
967
+ def update_progress_display(html):
968
+ """Update the progress display with the provided HTML"""
969
+ return gr.update(visible=True, value=html)
970
+
971
+ # Register the callback with the tracker
972
+ progress_tracker.register_callback(update_progress_display)
973
+
974
+ # Register the tracker with the detector
975
+ detector.set_progress_callback(progress_tracker.update_stage)
976
+
977
+ # Helper function to set example query
978
+ def set_example_query(example):
979
+ return example
980
+
981
+ # Function to show processing is starting
982
+ def start_processing(query):
983
+ logger.info("Processing query: %s", query)
984
+ # Stop any existing pulsing to prepare for incremental progress updates
985
+ progress_tracker.stop_pulsing()
986
+
987
+ # Reset to a processing state without the "Ready" text
988
+ # Use "starting" stage but with minimal UI display
989
+ progress_tracker.stage = "starting"
990
+ progress_tracker.query = query
991
+
992
+ # Force UI update with clean display
993
+ if progress_tracker._status_callback:
994
+ progress_tracker._status_callback(progress_tracker.get_html_status())
995
+
996
+ return [
997
+ gr.update(visible=True), # Show the progress display
998
+ gr.update(visible=False), # Hide the results accordion
999
+ gr.update(visible=False), # Hide the feedback accordion
1000
+ None # Reset hidden results
1001
+ ]
1002
+
1003
+ # Main processing function
1004
+ def process_query_and_display_results(query, progress=gr.Progress()):
1005
+ if not query.strip():
1006
+ logger.warning("Empty query submitted")
1007
+ progress_tracker.stop_pulsing()
1008
+ progress_tracker.update_stage("error", error_message="Please enter a query.")
1009
+ return [
1010
+ gr.update(visible=True), # Show the progress with error
1011
+ gr.update(visible=False),
1012
+ gr.update(visible=False),
1013
+ None
1014
+ ]
1015
+
1016
+ # Check if API is initialized
1017
+ if not detector.pas2:
1018
+ try:
1019
+ # Try to initialize from environment variables
1020
+ logger.info("Initializing APIs from environment variables")
1021
+ progress(0.05, desc="Initializing API...")
1022
+ init_message = detector.initialize_api(
1023
+ mistral_api_key=os.environ.get("HF_MISTRAL_API_KEY"),
1024
+ openai_api_key=os.environ.get("HF_OPENAI_API_KEY")
1025
+ )
1026
+ if "successfully" not in init_message:
1027
+ logger.error("Failed to initialize APIs: %s", init_message)
1028
+ progress_tracker.stop_pulsing()
1029
+ progress_tracker.update_stage("error", error_message="API keys not found in environment variables.")
1030
+ return [
1031
+ gr.update(visible=True),
1032
+ gr.update(visible=False),
1033
+ gr.update(visible=False),
1034
+ None
1035
+ ]
1036
+ except Exception as e:
1037
+ logger.error("Error initializing API: %s", str(e), exc_info=True)
1038
+ progress_tracker.stop_pulsing()
1039
+ progress_tracker.update_stage("error", error_message=f"Error initializing API: {str(e)}")
1040
+ return [
1041
+ gr.update(visible=True),
1042
+ gr.update(visible=False),
1043
+ gr.update(visible=False),
1044
+ None
1045
+ ]
1046
+
1047
+ try:
1048
+ # Process the query
1049
+ logger.info("Starting hallucination detection process")
1050
+ start_time = time.time()
1051
+
1052
+ # Set up a custom progress callback that uses both the progress_tracker and the gr.Progress
1053
+ def combined_progress_callback(stage, **kwargs):
1054
+ # Skip the idle stage, which shows "Ready"
1055
+ if stage == "idle":
1056
+ return
1057
+
1058
+ progress_tracker.update_stage(stage, **kwargs)
1059
+
1060
+ # Map the stages to progress values for the gr.Progress bar
1061
+ stage_to_progress = {
1062
+ "starting": 0.05,
1063
+ "generating_paraphrases": 0.15,
1064
+ "paraphrases_complete": 0.3,
1065
+ "getting_responses": 0.35,
1066
+ "responses_progress": lambda kwargs: 0.35 + (0.3 * (kwargs.get("completed", 0) / max(kwargs.get("total", 1), 1))),
1067
+ "responses_complete": 0.65,
1068
+ "judging": 0.7,
1069
+ "complete": 1.0,
1070
+ "error": 1.0
1071
+ }
1072
+
1073
+ # Update the gr.Progress bar
1074
+ if stage in stage_to_progress:
1075
+ prog_value = stage_to_progress[stage]
1076
+ if callable(prog_value):
1077
+ prog_value = prog_value(kwargs)
1078
+
1079
+ desc = progress_tracker.STAGES[stage]["status"]
1080
+ if "{" in desc and "}" in desc:
1081
+ # Format the description with any kwargs
1082
+ desc = desc.format(**kwargs)
1083
+
1084
+ # Ensure UI updates by adding a small delay
1085
+ # This forces the progress updates to be rendered
1086
+ progress(prog_value, desc=desc)
1087
+
1088
+ # For certain key stages, add a small sleep to ensure progress is visible
1089
+ if stage in ["starting", "generating_paraphrases", "paraphrases_complete",
1090
+ "getting_responses", "responses_complete", "judging", "complete"]:
1091
+ time.sleep(0.2) # Small delay to ensure UI update is visible
1092
+
1093
+ # Use these steps for processing
1094
+ detector.set_progress_callback(combined_progress_callback)
1095
+
1096
+ # Create a wrapper function for detect_hallucination that gives more control over progress updates
1097
+ def run_detection_with_visible_progress():
1098
+ # Step 1: Start
1099
+ combined_progress_callback("starting", query=query)
1100
+ time.sleep(0.3) # Ensure starting status is visible
1101
+
1102
+ # Step 2: Generate paraphrases (15-30%)
1103
+ combined_progress_callback("generating_paraphrases", query=query)
1104
+ all_queries = detector.pas2.generate_paraphrases(query)
1105
+ combined_progress_callback("paraphrases_complete", query=query, count=len(all_queries))
1106
+
1107
+ # Step 3: Get responses (35-65%)
1108
+ combined_progress_callback("getting_responses", query=query, total=len(all_queries))
1109
+ all_responses = []
1110
+ for i, q in enumerate(all_queries):
1111
+ # Show incremental progress for each response
1112
+ combined_progress_callback("responses_progress", query=query, completed=i, total=len(all_queries))
1113
+ response = detector.pas2._get_single_response(q, index=i)
1114
+ all_responses.append(response)
1115
+ combined_progress_callback("responses_complete", query=query)
1116
+
1117
+ # Step 4: Judge hallucinations (70-100%)
1118
+ combined_progress_callback("judging", query=query)
1119
+
1120
+ # The first query is the original, rest are paraphrases
1121
+ original_query = all_queries[0]
1122
+ original_response = all_responses[0]
1123
+ paraphrased_queries = all_queries[1:] if len(all_queries) > 1 else []
1124
+ paraphrased_responses = all_responses[1:] if len(all_responses) > 1 else []
1125
+
1126
+ # Judge the responses
1127
+ judgment = detector.pas2.judge_hallucination(
1128
+ original_query=original_query,
1129
+ original_response=original_response,
1130
+ paraphrased_queries=paraphrased_queries,
1131
+ paraphrased_responses=paraphrased_responses
1132
+ )
1133
+
1134
+ # Assemble the results
1135
+ results = {
1136
+ "original_query": original_query,
1137
+ "original_response": original_response,
1138
+ "paraphrased_queries": paraphrased_queries,
1139
+ "paraphrased_responses": paraphrased_responses,
1140
+ "hallucination_detected": judgment.hallucination_detected,
1141
+ "confidence_score": judgment.confidence_score,
1142
+ "conflicting_facts": judgment.conflicting_facts,
1143
+ "reasoning": judgment.reasoning,
1144
+ "summary": judgment.summary
1145
+ }
1146
+
1147
+ # Show completion
1148
+ combined_progress_callback("complete", query=query)
1149
+ time.sleep(0.3) # Ensure complete status is visible
1150
+
1151
+ return results
1152
+
1153
+ # Run the detection process with visible progress
1154
+ results = run_detection_with_visible_progress()
1155
+
1156
+ # Calculate elapsed time
1157
+ elapsed_time = time.time() - start_time
1158
+ logger.info("Hallucination detection completed in %.2f seconds", elapsed_time)
1159
+
1160
+ # Check for errors
1161
+ if "error" in results:
1162
+ logger.error("Error in results: %s", results["error"])
1163
+ progress_tracker.stop_pulsing()
1164
+ progress_tracker.update_stage("error", error_message=results["error"])
1165
+ return [
1166
+ gr.update(visible=True),
1167
+ gr.update(visible=False),
1168
+ gr.update(visible=False),
1169
+ None
1170
+ ]
1171
+
1172
+ # Prepare responses for display
1173
+ original_query = results["original_query"]
1174
+ original_response = results["original_response"]
1175
+
1176
+ paraphrased_queries = results["paraphrased_queries"]
1177
+ paraphrased_responses = results["paraphrased_responses"]
1178
+
1179
+ hallucination_detected = results["hallucination_detected"]
1180
+ confidence = results["confidence_score"]
1181
+ reasoning = results["reasoning"]
1182
+ summary = results["summary"]
1183
+
1184
+ # Format conflicting facts
1185
+ conflicting_facts = results["conflicting_facts"]
1186
+ conflicting_facts_text = ""
1187
+ if conflicting_facts:
1188
+ for i, fact in enumerate(conflicting_facts, 1):
1189
+ conflicting_facts_text += f"{i}. "
1190
+ if isinstance(fact, dict):
1191
+ for key, value in fact.items():
1192
+ conflicting_facts_text += f"{key}: {value}, "
1193
+ conflicting_facts_text = conflicting_facts_text.rstrip(", ")
1194
+ else:
1195
+ conflicting_facts_text += str(fact)
1196
+ conflicting_facts_text += "\n"
1197
+
1198
+ # Format responses to escape any backslashes
1199
+ original_response_safe = original_response.replace('\\', '\\\\').replace('\n', '<br>')
1200
+ paraphrased_responses_safe = [r.replace('\\', '\\\\').replace('\n', '<br>') for r in paraphrased_responses]
1201
+ reasoning_safe = reasoning.replace('\\', '\\\\').replace('\n', '<br>')
1202
+ conflicting_facts_text_safe = conflicting_facts_text.replace('\\', '\\\\').replace('\n', '<br>') if conflicting_facts_text else "None identified"
1203
+
1204
+ html_output = f"""
1205
+ <div class="container">
1206
+ <h2 class="title">Hallucination Detection Results</h2>
1207
+
1208
+ <div class="stats-section">
1209
+ <div class="stat-item">
1210
+ <div class="stat-value">{'Yes' if hallucination_detected else 'No'}</div>
1211
+ <div class="stat-label">Hallucination Detected</div>
1212
+ </div>
1213
+ <div class="stat-item">
1214
+ <div class="stat-value">{confidence:.2f}</div>
1215
+ <div class="stat-label">Confidence Score</div>
1216
+ </div>
1217
+ <div class="stat-item">
1218
+ <div class="stat-value">{len(paraphrased_queries)}</div>
1219
+ <div class="stat-label">Paraphrases Analyzed</div>
1220
+ </div>
1221
+ <div class="stat-item">
1222
+ <div class="stat-value">{elapsed_time:.1f}s</div>
1223
+ <div class="stat-label">Processing Time</div>
1224
+ </div>
1225
+ </div>
1226
+
1227
+ <div class="{'hallucination-positive' if hallucination_detected else 'hallucination-negative'}">
1228
+ <h3>Analysis Summary</h3>
1229
+ <p>{summary}</p>
1230
+ </div>
1231
+
1232
+ <div class="section-title">Original Query</div>
1233
+ <div class="response-box">
1234
+ {original_query}
1235
+ </div>
1236
+
1237
+ <div class="section-title">Original Response</div>
1238
+ <div class="response-box">
1239
+ {original_response_safe}
1240
+ </div>
1241
+
1242
+ <div class="section-title">Paraphrased Queries and Responses</div>
1243
+ """
1244
+
1245
+ for i, (q, r) in enumerate(zip(paraphrased_queries, paraphrased_responses_safe), 1):
1246
+ html_output += f"""
1247
+ <div class="section-title">Paraphrased Query {i}</div>
1248
+ <div class="response-box">
1249
+ {q}
1250
+ </div>
1251
+
1252
+ <div class="section-title">Response {i}</div>
1253
+ <div class="response-box">
1254
+ {r}
1255
+ </div>
1256
+ """
1257
+
1258
+ html_output += f"""
1259
+ <div class="section-title">Detailed Analysis</div>
1260
+ <div class="info-box">
1261
+ <p><strong>Reasoning:</strong></p>
1262
+ <p>{reasoning_safe}</p>
1263
+
1264
+ <p><strong>Conflicting Facts:</strong></p>
1265
+ <p>{conflicting_facts_text_safe}</p>
1266
+ </div>
1267
+ </div>
1268
+ """
1269
+
1270
+ logger.info("Updating UI with results")
1271
+ progress_tracker.stop_pulsing()
1272
+
1273
+ return [
1274
+ gr.update(visible=False), # Hide progress display when showing results
1275
+ gr.update(visible=True, value=html_output),
1276
+ gr.update(visible=True),
1277
+ results
1278
+ ]
1279
+
1280
+ except Exception as e:
1281
+ logger.error("Error processing query: %s", str(e), exc_info=True)
1282
+ progress_tracker.stop_pulsing()
1283
+ progress_tracker.update_stage("error", error_message=f"Error processing query: {str(e)}")
1284
+ return [
1285
+ gr.update(visible=True),
1286
+ gr.update(visible=False),
1287
+ gr.update(visible=False),
1288
+ None
1289
+ ]
1290
+
1291
+ # Helper function to submit feedback and update stats
1292
+ def combine_feedback(fb_input, fb_text, results):
1293
+ combined_feedback = f"{fb_input}: {fb_text}" if fb_text else fb_input
1294
+ if not results:
1295
+ return "No results to attach feedback to.", ""
1296
+
1297
+ response = detector.save_feedback(results, combined_feedback)
1298
+
1299
+ # Get updated stats
1300
+ stats = detector.get_feedback_stats()
1301
+ if stats:
1302
+ stats_html = f"""
1303
+ <div class="stats-section" style="margin-top: 15px;">
1304
+ <div class="stat-item">
1305
+ <div class="stat-value">{stats['total_feedback']}</div>
1306
+ <div class="stat-label">Total Feedback</div>
1307
+ </div>
1308
+ <div class="stat-item">
1309
+ <div class="stat-value">{stats['hallucinations_detected']}</div>
1310
+ <div class="stat-label">Hallucinations Found</div>
1311
+ </div>
1312
+ <div class="stat-item">
1313
+ <div class="stat-value">{stats['no_hallucinations']}</div>
1314
+ <div class="stat-label">No Hallucinations</div>
1315
+ </div>
1316
+ <div class="stat-item">
1317
+ <div class="stat-value">{stats['average_confidence']}</div>
1318
+ <div class="stat-label">Avg. Confidence</div>
1319
+ </div>
1320
+ </div>
1321
+ """
1322
+ else:
1323
+ stats_html = ""
1324
+
1325
+ return response, stats_html
1326
+
1327
+ # Create the interface
1328
+ with gr.Blocks(css=css, theme=gr.themes.Soft()) as interface:
1329
+ gr.HTML(
1330
+ """
1331
+ <div style="text-align: center; margin-bottom: 1.5rem">
1332
+ <h1 style="font-size: 2.2em; font-weight: 600; color: #1a237e; margin-bottom: 0.2em;">PAS2 - Hallucination Detector</h1>
1333
+ <h3 style="font-size: 1.3em; color: #455a64; margin-bottom: 0.8em;">Advanced AI Response Verification Using Model-as-Judge</h3>
1334
+ <p style="font-size: 1.1em; color: #546e7a; max-width: 800px; margin: 0 auto;">
1335
+ This tool detects hallucinations in AI responses by comparing answers to semantically equivalent questions and using a specialized judge model.
1336
+ </p>
1337
+ </div>
1338
+ """
1339
+ )
1340
+
1341
+ with gr.Accordion("About this Tool", open=False):
1342
+ gr.Markdown(
1343
+ """
1344
+ ### How It Works
1345
+
1346
+ This tool implements the Paraphrase-based Approach for Scrutinizing Systems (PAS2) with a model-as-judge enhancement:
1347
+
1348
+ 1. **Paraphrase Generation**: Your question is paraphrased multiple ways while preserving its core meaning
1349
+ 2. **Multiple Responses**: All questions (original + paraphrases) are sent to Mistral Large model
1350
+ 3. **Expert Judgment**: OpenAI's o3-mini analyzes all responses to detect factual inconsistencies
1351
+
1352
+ ### Why This Approach?
1353
+
1354
+ When an AI hallucinates, it often provides different answers to the same question when phrased differently.
1355
+ By using a separate judge model, we can identify these inconsistencies more effectively than with
1356
+ metric-based approaches.
1357
+
1358
+ ### Understanding the Results
1359
+
1360
+ - **Confidence Score**: Indicates the judge's confidence in the hallucination detection
1361
+ - **Conflicting Facts**: Specific inconsistencies found across responses
1362
+ - **Reasoning**: The judge's detailed analysis explaining its decision
1363
+
1364
+ ### Privacy Notice
1365
+
1366
+ Your queries and the system's responses are saved to help improve hallucination detection.
1367
+ No personally identifiable information is collected.
1368
+ """
1369
+ )
1370
+
1371
+ with gr.Row():
1372
+ with gr.Column():
1373
+ # First define the query input
1374
+ gr.Markdown("### Enter Your Question")
1375
+ with gr.Row():
1376
+ query_input = gr.Textbox(
1377
+ label="",
1378
+ placeholder="Ask a factual question (e.g., Who was the first person to land on the moon?)",
1379
+ lines=3
1380
+ )
1381
+
1382
+ # Now define the example queries
1383
+ gr.Markdown("### Or Try an Example")
1384
+ example_row = gr.Row()
1385
+ with example_row:
1386
+ for example in example_queries:
1387
+ example_btn = gr.Button(
1388
+ example,
1389
+ elem_classes=["example-query"],
1390
+ scale=0
1391
+ )
1392
+ example_btn.click(
1393
+ fn=set_example_query,
1394
+ inputs=[gr.Textbox(value=example, visible=False)],
1395
+ outputs=[query_input]
1396
+ )
1397
+
1398
+ with gr.Row():
1399
+ submit_button = gr.Button("Detect Hallucinations", variant="primary", scale=1)
1400
+
1401
+ # Error message
1402
+ error_message = gr.HTML(
1403
+ label="Status",
1404
+ visible=False
1405
+ )
1406
+
1407
+ # Progress display
1408
+ progress_display = gr.HTML(
1409
+ value=progress_tracker.get_html_status(),
1410
+ visible=True
1411
+ )
1412
+
1413
+ # Results display
1414
+ results_accordion = gr.HTML(visible=False)
1415
+
1416
+ # Add feedback stats display
1417
+ feedback_stats = gr.HTML(visible=True)
1418
+
1419
+ # Feedback section
1420
+ with gr.Accordion("Provide Feedback", open=False, visible=False) as feedback_accordion:
1421
+ gr.Markdown("### Help Improve the System")
1422
+ gr.Markdown("Your feedback helps us refine the hallucination detection system.")
1423
+
1424
+ feedback_input = gr.Radio(
1425
+ label="Is the hallucination detection accurate?",
1426
+ choices=["Yes, correct detection", "No, incorrectly flagged hallucination", "No, missed hallucination", "Unsure/Other"],
1427
+ value="Yes, correct detection"
1428
+ )
1429
+
1430
+ feedback_text = gr.Textbox(
1431
+ label="Additional comments (optional)",
1432
+ placeholder="Please provide any additional observations or details...",
1433
+ lines=2
1434
+ )
1435
+
1436
+ feedback_button = gr.Button("Submit Feedback", variant="secondary")
1437
+ feedback_status = gr.Textbox(label="Feedback Status", interactive=False, visible=False)
1438
+
1439
+ # Initialize feedback stats
1440
+ initial_stats = detector.get_feedback_stats()
1441
+ if initial_stats:
1442
+ feedback_stats.value = f"""
1443
+ <div class="stats-section">
1444
+ <div class="stat-item">
1445
+ <div class="stat-value">{initial_stats['total_feedback']}</div>
1446
+ <div class="stat-label">Total Feedback</div>
1447
+ </div>
1448
+ <div class="stat-item">
1449
+ <div class="stat-value">{initial_stats['hallucinations_detected']}</div>
1450
+ <div class="stat-label">Hallucinations Found</div>
1451
+ </div>
1452
+ <div class="stat-item">
1453
+ <div class="stat-value">{initial_stats['no_hallucinations']}</div>
1454
+ <div class="stat-label">No Hallucinations</div>
1455
+ </div>
1456
+ <div class="stat-item">
1457
+ <div class="stat-value">{initial_stats['average_confidence']}</div>
1458
+ <div class="stat-label">Avg. Confidence</div>
1459
+ </div>
1460
+ </div>
1461
+ """
1462
+
1463
+ # Hidden state to store results for feedback
1464
+ hidden_results = gr.State()
1465
+
1466
+ # Set up event handlers
1467
+ submit_button.click(
1468
+ fn=start_processing,
1469
+ inputs=[query_input],
1470
+ outputs=[progress_display, results_accordion, feedback_accordion, hidden_results],
1471
+ queue=False
1472
+ ).then(
1473
+ fn=process_query_and_display_results,
1474
+ inputs=[query_input],
1475
+ outputs=[progress_display, results_accordion, feedback_accordion, hidden_results]
1476
+ )
1477
+
1478
+ feedback_button.click(
1479
+ fn=combine_feedback,
1480
+ inputs=[feedback_input, feedback_text, hidden_results],
1481
+ outputs=[feedback_status, feedback_stats]
1482
+ )
1483
+
1484
+ # Footer
1485
+ gr.HTML(
1486
+ """
1487
+ <footer>
1488
+ <p>Paraphrase-based Approach for Scrutinizing Systems (PAS2) - Advanced Hallucination Detection</p>
1489
+ <p>Using Mistral Large for generation and OpenAI o3-mini as judge</p>
1490
+ </footer>
1491
+ """
1492
+ )
1493
+
1494
+ return interface
1495
+
1496
+ # Add a test function to demonstrate progress bar in isolation
1497
+ def test_progress():
1498
+ """Simple test function to demonstrate progress bar"""
1499
+ import gradio as gr
1500
+ import time
1501
+
1502
+ def slow_process(progress=gr.Progress()):
1503
+ progress(0, desc="Starting process...")
1504
+ time.sleep(0.5)
1505
+
1506
+ # Phase 1: Generating paraphrases
1507
+ progress(0.15, desc="Generating paraphrases...")
1508
+ time.sleep(1)
1509
+ progress(0.3, desc="Paraphrases generated")
1510
+ time.sleep(0.5)
1511
+
1512
+ # Phase 2: Getting responses
1513
+ progress(0.35, desc="Getting responses...")
1514
+ # Show incremental progress for responses
1515
+ for i in range(3):
1516
+ time.sleep(0.8)
1517
+ prog = 0.35 + (0.3 * ((i+1) / 3))
1518
+ progress(prog, desc=f"Getting responses ({i+1}/3)...")
1519
+
1520
+ progress(0.65, desc="All responses received")
1521
+ time.sleep(0.5)
1522
+
1523
+ # Phase 3: Analyzing
1524
+ progress(0.7, desc="Analyzing responses for hallucinations...")
1525
+ time.sleep(2)
1526
+
1527
+ # Complete
1528
+ progress(1.0, desc="Analysis complete!")
1529
+ return "Process completed successfully!"
1530
+
1531
+ with gr.Blocks() as demo:
1532
+ with gr.Row():
1533
+ btn = gr.Button("Start Process")
1534
+ output = gr.Textbox(label="Result")
1535
+
1536
+ btn.click(fn=slow_process, outputs=output)
1537
+
1538
+ demo.launch()
1539
+
1540
+ # Main application entry point
1541
+ if __name__ == "__main__":
1542
+ logger.info("Starting PAS2 Hallucination Detector")
1543
+ interface = create_interface()
1544
+ logger.info("Launching Gradio interface...")
1545
+ interface.launch(
1546
+ server_name="0.0.0.0", # Bind to all interfaces
1547
+ server_port=7860, # Default Hugging Face Spaces port
1548
+ show_api=False,
1549
+ quiet=True, # Changed to True for Hugging Face deployment
1550
+ share=False,
1551
+ max_threads=10,
1552
+ debug=False # Changed to False for production deployment
1553
+ )
1554
+
1555
+ # Uncomment this line to run the test function instead of the main interface
1556
+ # if __name__ == "__main__":
1557
+ # test_progress()
pas2_fork/migrate_db.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ """
3
+ Migration script to move data from SQLite to MongoDB.
4
+ Run this once to migrate existing data to your new MongoDB database.
5
+ """
6
+
7
+ import os
8
+ import sqlite3
9
+ import json
10
+ from datetime import datetime
11
+ from pymongo import MongoClient
12
+ from dotenv import load_dotenv
13
+ import logging
14
+
15
+ # Configure logging
16
+ logging.basicConfig(
17
+ level=logging.INFO,
18
+ format='%(asctime)s [%(levelname)s] %(message)s',
19
+ handlers=[
20
+ logging.StreamHandler()
21
+ ]
22
+ )
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ def migrate_sqlite_to_mongodb():
27
+ """Migrate data from SQLite to MongoDB"""
28
+
29
+ # Load environment variables
30
+ load_dotenv()
31
+
32
+ # Get MongoDB connection string from environment variable
33
+ mongo_uri = os.environ.get("MONGODB_URI")
34
+
35
+ if not mongo_uri:
36
+ logger.error("MONGODB_URI not found in environment variables. Please set it before running this script.")
37
+ return False
38
+
39
+ try:
40
+ # Connect to MongoDB
41
+ logger.info("Connecting to MongoDB...")
42
+ mongo_client = MongoClient(mongo_uri)
43
+
44
+ # Access database and collection
45
+ db = mongo_client["hallucination_detector"]
46
+ feedback_collection = db["feedback"]
47
+
48
+ # Check for existing data
49
+ existing_count = feedback_collection.count_documents({})
50
+ logger.info(f"MongoDB already contains {existing_count} documents")
51
+
52
+ # Determine SQLite database path
53
+ data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
54
+ db_path = os.path.join(data_dir, "feedback.db")
55
+
56
+ if not os.path.exists(db_path):
57
+ logger.warning(f"SQLite database not found at {db_path}. No data to migrate.")
58
+ return True
59
+
60
+ # Connect to SQLite
61
+ logger.info(f"Connecting to SQLite database at {db_path}...")
62
+ conn = sqlite3.connect(db_path)
63
+ conn.row_factory = sqlite3.Row # This enables column access by name
64
+ cursor = conn.cursor()
65
+
66
+ # Get all records
67
+ cursor.execute("SELECT * FROM feedback")
68
+ rows = cursor.fetchall()
69
+
70
+ if not rows:
71
+ logger.info("No data found in SQLite database.")
72
+ conn.close()
73
+ return True
74
+
75
+ logger.info(f"Found {len(rows)} records in SQLite database")
76
+
77
+ # Process rows and insert into MongoDB
78
+ mongo_docs = []
79
+ for row in rows:
80
+ # Convert row to dict
81
+ row_dict = dict(row)
82
+
83
+ # Parse special fields
84
+ try:
85
+ row_dict["paraphrased_queries"] = json.loads(row_dict["paraphrased_queries"])
86
+ except:
87
+ row_dict["paraphrased_queries"] = []
88
+
89
+ try:
90
+ row_dict["paraphrased_responses"] = json.loads(row_dict["paraphrased_responses"])
91
+ except:
92
+ row_dict["paraphrased_responses"] = []
93
+
94
+ try:
95
+ row_dict["conflicting_facts"] = json.loads(row_dict["conflicting_facts"])
96
+ except:
97
+ row_dict["conflicting_facts"] = []
98
+
99
+ # Convert integer to boolean
100
+ row_dict["hallucination_detected"] = bool(row_dict["hallucination_detected"])
101
+
102
+ # Parse timestamp
103
+ try:
104
+ row_dict["timestamp"] = datetime.strptime(row_dict["timestamp"], "%Y-%m-%d %H:%M:%S")
105
+ except:
106
+ row_dict["timestamp"] = datetime.now()
107
+
108
+ # Remove sqlite id
109
+ if "id" in row_dict:
110
+ del row_dict["id"]
111
+
112
+ mongo_docs.append(row_dict)
113
+
114
+ # Insert all documents
115
+ if mongo_docs:
116
+ logger.info(f"Inserting {len(mongo_docs)} documents into MongoDB...")
117
+ result = feedback_collection.insert_many(mongo_docs)
118
+ logger.info(f"Successfully migrated {len(result.inserted_ids)} records to MongoDB")
119
+
120
+ # Close SQLite connection
121
+ conn.close()
122
+
123
+ # Verify data in MongoDB
124
+ new_count = feedback_collection.count_documents({})
125
+ logger.info(f"MongoDB now contains {new_count} documents")
126
+
127
+ return True
128
+
129
+ except Exception as e:
130
+ logger.error(f"Error during migration: {str(e)}", exc_info=True)
131
+ return False
132
+
133
+ if __name__ == "__main__":
134
+ logger.info("Starting migration from SQLite to MongoDB")
135
+ success = migrate_sqlite_to_mongodb()
136
+ if success:
137
+ logger.info("Migration completed successfully")
138
+ else:
139
+ logger.error("Migration failed")
pas2_fork/requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ numpy
4
+ mistralai
5
+ openai
6
+ pydantic
7
+ python-dotenv
8
+ pymongo
9
+ dnspython