AshwinP commited on
Commit
5707fbc
·
1 Parent(s): acf7ab5

Pricing Helper

Browse files
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ ANTHROPIC_API_KEY=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
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: PricingHelper
3
  emoji: 🐢
4
  colorFrom: indigo
5
  colorTo: blue
 
1
  ---
2
+ title: Pricing Assistant
3
  emoji: 🐢
4
  colorFrom: indigo
5
  colorTo: blue
app.py ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import json
4
+ from typing import List, Dict
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import plotly.express as px
8
+ import plotly.graph_objects as go
9
+ from anthropic import Anthropic
10
+ import time
11
+
12
+ # Import our modules
13
+ from src.invoice_generator import InvoiceGenerator
14
+ from src.vector_store import ContractVectorStore
15
+
16
+ # Load environment variables
17
+ load_dotenv()
18
+
19
+ # Page configuration
20
+ st.set_page_config(
21
+ page_title="Enterprise Pricing Audit Assistant",
22
+ page_icon="💰",
23
+ layout="wide"
24
+ )
25
+
26
+ # Load custom CSS
27
+ def load_css():
28
+ with open("styles.css") as f:
29
+ st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
30
+
31
+ # Initialize LLM client
32
+ @st.cache_resource
33
+ def init_llm():
34
+ return Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
35
+
36
+ # Initialize the sentence transformer model
37
+ @st.cache_resource
38
+ def load_embedding_model():
39
+ from sentence_transformers import SentenceTransformer
40
+ return SentenceTransformer('all-MiniLM-L6-v2')
41
+
42
+ def analyze_invoice_with_rag(invoice: Dict, contract: Dict, vector_store: ContractVectorStore) -> Dict:
43
+ base_rate = contract["terms"]["base_rate"]
44
+ quantity = invoice["quantity"]
45
+ charged_amount = invoice["amount_charged"]
46
+ correct_amount = invoice["correct_amount"]
47
+
48
+ # Search for relevant contract terms
49
+ relevant_terms = vector_store.search_relevant_terms(
50
+ f"pricing rules for quantity {quantity} and amount {charged_amount}"
51
+ )
52
+
53
+ # Prepare context for LLM
54
+ context = {
55
+ "invoice_details": {
56
+ "invoice_id": invoice["invoice_id"],
57
+ "quantity": quantity,
58
+ "charged_amount": charged_amount,
59
+ "correct_amount": correct_amount,
60
+ "date": invoice["date"]
61
+ },
62
+ "relevant_terms": [term["text"] for term in relevant_terms],
63
+ "discrepancy": round(charged_amount - correct_amount, 2),
64
+ "discrepancy_percentage": round((charged_amount - correct_amount) / correct_amount * 100, 2)
65
+ }
66
+
67
+ # Generate explanation using LLM if there's a discrepancy
68
+ if abs(context["discrepancy"]) > 0.01:
69
+ prompt = f"""
70
+ Analyze this invoice for pricing accuracy:
71
+
72
+ Invoice Details:
73
+ - Invoice ID: {context['invoice_details']['invoice_id']}
74
+ - Quantity: {context['invoice_details']['quantity']}
75
+ - Charged Amount: ${context['invoice_details']['charged_amount']:.2f}
76
+ - Correct Amount: ${context['invoice_details']['correct_amount']:.2f}
77
+ - Date: {context['invoice_details']['date']}
78
+
79
+ Relevant Contract Terms:
80
+ {chr(10).join('- ' + term for term in context['relevant_terms'])}
81
+
82
+ Discrepancy found:
83
+ - Amount Difference: ${context['discrepancy']:.2f}
84
+ - Percentage Difference: {context['discrepancy_percentage']:.2f}%
85
+
86
+ Please provide a detailed explanation of:
87
+ 1. Why there is a pricing discrepancy
88
+ 2. Which contract terms were violated
89
+ 3. How the correct price should have been calculated
90
+
91
+ Keep the explanation clear and concise, focusing on the specific pricing rules that were not properly applied.
92
+ """
93
+
94
+ anthropic = init_llm()
95
+ response = anthropic.messages.create(
96
+ model="claude-3-sonnet-20240229",
97
+ max_tokens=1000,
98
+ messages=[{"role": "user", "content": prompt}]
99
+ )
100
+
101
+ explanation = response.content[0].text
102
+ else:
103
+ explanation = "Invoice pricing is correct according to contract terms."
104
+
105
+ return {
106
+ **context,
107
+ "explanation": explanation,
108
+ "relevant_terms": relevant_terms
109
+ }
110
+
111
+ def display_metrics(invoices_df):
112
+ with st.container():
113
+ st.markdown('<div class="metrics-container">', unsafe_allow_html=True)
114
+ col1, col2, col3, col4 = st.columns(4)
115
+
116
+ total_invoices = len(invoices_df)
117
+ incorrect_invoices = len(invoices_df[invoices_df['has_error']])
118
+ total_value = invoices_df['amount_charged'].sum()
119
+ total_discrepancy = (invoices_df['amount_charged'] - invoices_df['correct_amount']).sum()
120
+
121
+ with col1:
122
+ st.metric("Total Invoices", total_invoices)
123
+ with col2:
124
+ st.metric("Incorrect Invoices", incorrect_invoices)
125
+ with col3:
126
+ st.metric("Total Invoice Value", f"${total_value:,.2f}")
127
+ with col4:
128
+ st.metric("Total Pricing Discrepancy", f"${total_discrepancy:,.2f}")
129
+ st.markdown('</div>', unsafe_allow_html=True)
130
+
131
+ def display_invoice_tables(invoices_df):
132
+ st.markdown('<div class="invoice-table">', unsafe_allow_html=True)
133
+
134
+ # Separate correct and incorrect invoices
135
+ correct_invoices = invoices_df[~invoices_df['has_error']].copy()
136
+ incorrect_invoices = invoices_df[invoices_df['has_error']].copy()
137
+
138
+ # Format currency columns
139
+ currency_cols = ['amount_charged', 'correct_amount']
140
+ for df in [correct_invoices, incorrect_invoices]:
141
+ for col in currency_cols:
142
+ df[col] = df[col].apply(lambda x: f"${x:,.2f}")
143
+
144
+ # Display tables in tabs
145
+ tab1, tab2 = st.tabs(["🟢 Correct Invoices", "🔴 Incorrect Invoices"])
146
+
147
+ with tab1:
148
+ if not correct_invoices.empty:
149
+ st.dataframe(
150
+ correct_invoices,
151
+ column_config={
152
+ "invoice_id": "Invoice ID",
153
+ "date": "Date",
154
+ "quantity": "Quantity",
155
+ "amount_charged": "Amount",
156
+ },
157
+ hide_index=True
158
+ )
159
+ else:
160
+ st.info("No correctly priced invoices found.")
161
+
162
+ with tab2:
163
+ if not incorrect_invoices.empty:
164
+ st.dataframe(
165
+ incorrect_invoices,
166
+ column_config={
167
+ "invoice_id": "Invoice ID",
168
+ "date": "Date",
169
+ "quantity": "Quantity",
170
+ "amount_charged": "Charged Amount",
171
+ "correct_amount": "Correct Amount"
172
+ },
173
+ hide_index=True
174
+ )
175
+ else:
176
+ st.info("No pricing discrepancies found.")
177
+
178
+ st.markdown('</div>', unsafe_allow_html=True)
179
+
180
+ def display_contract_details(contract):
181
+ st.markdown('<div class="contract-details">', unsafe_allow_html=True)
182
+ st.subheader("📄 Contract Details")
183
+
184
+ # Basic contract information
185
+ col1, col2, col3 = st.columns(3)
186
+ with col1:
187
+ st.write("**Contract ID:**", contract['contract_id'])
188
+ with col2:
189
+ st.write("**Client:**", contract['client'])
190
+ with col3:
191
+ st.write("**Base Rate:**", f"${contract['terms']['base_rate']}")
192
+
193
+ # Pricing rules
194
+ with st.expander("🏷️ Pricing Rules"):
195
+ if "volume_discounts" in contract["terms"]:
196
+ st.write("**Volume Discounts:**")
197
+ for discount in contract["terms"]["volume_discounts"]:
198
+ st.write(f"• {discount['discount']*100}% off for quantities ≥ {discount['threshold']:,}")
199
+
200
+ if "tiered_pricing" in contract["terms"]:
201
+ st.write("**Tiered Pricing:**")
202
+ for tier in contract["terms"]["tiered_pricing"]:
203
+ st.write(f"• {tier['tier']}: {tier['rate']}x base rate")
204
+
205
+ # Special conditions
206
+ with st.expander("📋 Special Conditions"):
207
+ for condition in contract["terms"]["special_conditions"]:
208
+ st.write(f"• {condition}")
209
+
210
+ st.markdown('</div>', unsafe_allow_html=True)
211
+
212
+ def initialize_data():
213
+ """Initialize data and models"""
214
+ try:
215
+ # Initialize embedding model
216
+ embedding_model = load_embedding_model()
217
+
218
+ # Initialize invoice generator
219
+ generator = InvoiceGenerator(data_dir="data")
220
+
221
+ # Ensure we have both contracts and invoices
222
+ if not os.path.exists("data/contracts.json") or not os.path.exists("data/invoices.json"):
223
+ generator.generate_and_save()
224
+
225
+ # Load contracts and invoices
226
+ contracts = generator.load_contracts()
227
+ invoices = generator.load_or_generate_invoices()
228
+
229
+ if not contracts or not invoices:
230
+ st.error("No data found. Generating new data...")
231
+ generator.generate_and_save()
232
+ contracts = generator.load_contracts()
233
+ invoices = generator.load_or_generate_invoices()
234
+
235
+ # Initialize vector store
236
+ vector_store = ContractVectorStore(embedding_model)
237
+ for contract in contracts:
238
+ vector_store.add_contract_terms(contract)
239
+
240
+ return contracts, invoices, vector_store
241
+
242
+ except Exception as e:
243
+ st.error(f"Error initializing data: {str(e)}")
244
+ st.stop()
245
+
246
+ def main():
247
+ # Load custom CSS
248
+ try:
249
+ load_css()
250
+ except Exception as e:
251
+ st.warning(f"Could not load custom CSS: {str(e)}")
252
+
253
+ st.title("🔍 Enterprise Pricing Audit Assistant")
254
+
255
+ try:
256
+ # Initialize data and models
257
+ with st.spinner('Loading data and initializing models...'):
258
+ contracts, invoices, vector_store = initialize_data()
259
+
260
+ # Convert invoices to DataFrame
261
+ invoices_df = pd.DataFrame(invoices)
262
+
263
+ # Display metrics
264
+ display_metrics(invoices_df)
265
+
266
+ # Display contract selection
267
+ selected_contract_id = st.selectbox(
268
+ "Select Contract",
269
+ options=[c["contract_id"] for c in contracts],
270
+ format_func=lambda x: f"{x} - {next(c['client'] for c in contracts if c['contract_id'] == x)}"
271
+ )
272
+
273
+ # Get selected contract
274
+ selected_contract = next(c for c in contracts if c["contract_id"] == selected_contract_id)
275
+
276
+ # Display contract details
277
+ display_contract_details(selected_contract)
278
+
279
+ # Filter invoices for selected contract
280
+ contract_invoices_df = invoices_df[invoices_df['contract_id'] == selected_contract_id]
281
+
282
+ # Display invoice analysis
283
+ st.subheader("📊 Invoice Analysis")
284
+
285
+ # Create tabs for different views
286
+ tab1, tab2, tab3 = st.tabs(["📈 Overview", "📑 Invoice Details", "🔍 Detailed Analysis"])
287
+
288
+ with tab1:
289
+ # Display summary metrics for the selected contract
290
+ total_contract_value = contract_invoices_df['amount_charged'].sum()
291
+ total_contract_discrepancy = (
292
+ contract_invoices_df['amount_charged'] - contract_invoices_df['correct_amount']
293
+ ).sum()
294
+ error_rate = (
295
+ len(contract_invoices_df[contract_invoices_df['has_error']]) /
296
+ len(contract_invoices_df) * 100
297
+ )
298
+
299
+ col1, col2, col3 = st.columns(3)
300
+ with col1:
301
+ st.metric("Total Contract Value", f"${total_contract_value:,.2f}")
302
+ with col2:
303
+ st.metric("Total Discrepancy", f"${total_contract_discrepancy:,.2f}")
304
+ with col3:
305
+ st.metric("Error Rate", f"{error_rate:.1f}%")
306
+
307
+ # Create visualization
308
+ if not contract_invoices_df.empty:
309
+ # Prepare data for visualization
310
+ contract_invoices_df['error_amount'] = (
311
+ contract_invoices_df['amount_charged'] -
312
+ contract_invoices_df['correct_amount']
313
+ )
314
+
315
+ # Create scatter plot
316
+ fig = go.Figure()
317
+
318
+ # Add points for correct invoices
319
+ correct_invoices = contract_invoices_df[~contract_invoices_df['has_error']]
320
+ if not correct_invoices.empty:
321
+ fig.add_trace(go.Scatter(
322
+ x=correct_invoices['date'],
323
+ y=correct_invoices['amount_charged'],
324
+ mode='markers',
325
+ name='Correct Invoices',
326
+ marker=dict(color='green', size=10),
327
+ ))
328
+
329
+ # Add points for incorrect invoices
330
+ incorrect_invoices = contract_invoices_df[contract_invoices_df['has_error']]
331
+ if not incorrect_invoices.empty:
332
+ fig.add_trace(go.Scatter(
333
+ x=incorrect_invoices['date'],
334
+ y=incorrect_invoices['amount_charged'],
335
+ mode='markers',
336
+ name='Incorrect Invoices',
337
+ marker=dict(color='red', size=10),
338
+ ))
339
+
340
+ fig.update_layout(
341
+ title='Invoice Amounts Over Time',
342
+ xaxis_title='Date',
343
+ yaxis_title='Amount ($)',
344
+ hovermode='closest'
345
+ )
346
+
347
+ st.plotly_chart(fig, use_container_width=True)
348
+
349
+ with tab2:
350
+ # Display invoice tables
351
+ display_invoice_tables(contract_invoices_df)
352
+
353
+ with tab3:
354
+ # Detailed analysis of incorrect invoices
355
+ incorrect_invoices = contract_invoices_df[contract_invoices_df['has_error']]
356
+ if not incorrect_invoices.empty:
357
+ for _, invoice in incorrect_invoices.iterrows():
358
+ with st.expander(f"Invoice {invoice['invoice_id']} Analysis"):
359
+ analysis = analyze_invoice_with_rag(
360
+ invoice.to_dict(),
361
+ selected_contract,
362
+ vector_store
363
+ )
364
+
365
+ # Display analysis results
366
+ st.write("**Discrepancy Amount:**",
367
+ f"${analysis['discrepancy']:.2f} "
368
+ f"({analysis['discrepancy_percentage']}%)")
369
+
370
+ st.write("**Relevant Contract Terms:**")
371
+ for term in analysis['relevant_terms']:
372
+ st.write(f"• {term['text']}")
373
+
374
+ st.write("**Analysis:**")
375
+ st.write(analysis['explanation'])
376
+ else:
377
+ st.info("No pricing discrepancies found for this contract.")
378
+
379
+ except Exception as e:
380
+ st.error(f"An error occurred: {str(e)}")
381
+ st.stop()
382
+
383
+ if __name__ == "__main__":
384
+ main()
data/contracts.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "contracts": [
3
+ {
4
+ "contract_id": "CNT001",
5
+ "client": "TechCorp Solutions",
6
+ "start_date": "2024-01-01",
7
+ "end_date": "2024-12-31",
8
+ "terms": {
9
+ "base_rate": 100,
10
+ "volume_discounts": [
11
+ {"threshold": 1000, "discount": 0.10},
12
+ {"threshold": 5000, "discount": 0.15},
13
+ {"threshold": 10000, "discount": 0.20}
14
+ ],
15
+ "special_conditions": [
16
+ "Holiday surcharge: 15% on federal holidays",
17
+ "Rush order fee: Additional 25% for same-day delivery",
18
+ "Bulk order minimum: 500 units per order for volume pricing",
19
+ "Early payment discount: 2% if paid within 10 days",
20
+ "Multi-year commitment: 5% additional discount for 3+ year contract"
21
+ ]
22
+ }
23
+ },
24
+ {
25
+ "contract_id": "CNT002",
26
+ "client": "Global Manufacturing Inc",
27
+ "start_date": "2024-01-01",
28
+ "end_date": "2024-12-31",
29
+ "terms": {
30
+ "base_rate": 85,
31
+ "tiered_pricing": [
32
+ {"tier": "Standard", "rate": 1.0},
33
+ {"tier": "Premium", "rate": 1.25},
34
+ {"tier": "Enterprise", "rate": 1.5}
35
+ ],
36
+ "special_conditions": [
37
+ "Annual commitment discount: 5% off base rate",
38
+ "Multi-location discount: 3% per additional location",
39
+ "Payment terms: 2% discount for payment within 10 days",
40
+ "Volume guarantee: Minimum 1000 units per quarter",
41
+ "Service level agreement: 99.9% delivery accuracy required"
42
+ ]
43
+ }
44
+ }
45
+ ]
46
+ }
data/invoices.json ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "invoices": [
3
+ {
4
+ "invoice_id": "INV6886",
5
+ "contract_id": "CNT001",
6
+ "date": "2024-01-19",
7
+ "quantity": 1315,
8
+ "amount_charged": 118350.0,
9
+ "correct_amount": 118350.0,
10
+ "has_error": false
11
+ },
12
+ {
13
+ "invoice_id": "INV5025",
14
+ "contract_id": "CNT002",
15
+ "date": "2024-01-25",
16
+ "quantity": 5058,
17
+ "amount_charged": 429930.0,
18
+ "correct_amount": 429930.0,
19
+ "has_error": false
20
+ },
21
+ {
22
+ "invoice_id": "INV8930",
23
+ "contract_id": "CNT001",
24
+ "date": "2024-03-15",
25
+ "quantity": 1927,
26
+ "amount_charged": 173430.0,
27
+ "correct_amount": 173430.0,
28
+ "has_error": false
29
+ },
30
+ {
31
+ "invoice_id": "INV2619",
32
+ "contract_id": "CNT002",
33
+ "date": "2024-03-15",
34
+ "quantity": 7024,
35
+ "amount_charged": 826096.41,
36
+ "correct_amount": 746300.0,
37
+ "has_error": true
38
+ },
39
+ {
40
+ "invoice_id": "INV7771",
41
+ "contract_id": "CNT002",
42
+ "date": "2024-05-07",
43
+ "quantity": 3872,
44
+ "amount_charged": 329120.0,
45
+ "correct_amount": 329120.0,
46
+ "has_error": false
47
+ },
48
+ {
49
+ "invoice_id": "INV8599",
50
+ "contract_id": "CNT002",
51
+ "date": "2024-05-08",
52
+ "quantity": 3108,
53
+ "amount_charged": 264180.0,
54
+ "correct_amount": 264180.0,
55
+ "has_error": false
56
+ },
57
+ {
58
+ "invoice_id": "INV2289",
59
+ "contract_id": "CNT002",
60
+ "date": "2024-05-09",
61
+ "quantity": 10046,
62
+ "amount_charged": 1280865.0,
63
+ "correct_amount": 1280865.0,
64
+ "has_error": false
65
+ },
66
+ {
67
+ "invoice_id": "INV1743",
68
+ "contract_id": "CNT001",
69
+ "date": "2024-07-25",
70
+ "quantity": 8237,
71
+ "amount_charged": 700145.0,
72
+ "correct_amount": 700145.0,
73
+ "has_error": false
74
+ },
75
+ {
76
+ "invoice_id": "INV3707",
77
+ "contract_id": "CNT002",
78
+ "date": "2024-08-06",
79
+ "quantity": 14421,
80
+ "amount_charged": 1532231.25,
81
+ "correct_amount": 1532231.25,
82
+ "has_error": false
83
+ },
84
+ {
85
+ "invoice_id": "INV7751",
86
+ "contract_id": "CNT001",
87
+ "date": "2024-09-25",
88
+ "quantity": 6043,
89
+ "amount_charged": 513655.0,
90
+ "correct_amount": 513655.0,
91
+ "has_error": false
92
+ },
93
+ {
94
+ "invoice_id": "INV8916",
95
+ "contract_id": "CNT001",
96
+ "date": "2024-10-29",
97
+ "quantity": 12740,
98
+ "amount_charged": 1019200.0,
99
+ "correct_amount": 1019200.0,
100
+ "has_error": false
101
+ },
102
+ {
103
+ "invoice_id": "INV5521",
104
+ "contract_id": "CNT002",
105
+ "date": "2024-11-11",
106
+ "quantity": 3530,
107
+ "amount_charged": 450075.0,
108
+ "correct_amount": 450075.0,
109
+ "has_error": false
110
+ },
111
+ {
112
+ "invoice_id": "INV9580",
113
+ "contract_id": "CNT001",
114
+ "date": "2024-11-14",
115
+ "quantity": 11679,
116
+ "amount_charged": 934320.0,
117
+ "correct_amount": 934320.0,
118
+ "has_error": false
119
+ },
120
+ {
121
+ "invoice_id": "INV8276",
122
+ "contract_id": "CNT001",
123
+ "date": "2024-11-17",
124
+ "quantity": 5203,
125
+ "amount_charged": 442255.0,
126
+ "correct_amount": 442255.0,
127
+ "has_error": false
128
+ },
129
+ {
130
+ "invoice_id": "INV3439",
131
+ "contract_id": "CNT001",
132
+ "date": "2024-11-18",
133
+ "quantity": 5043,
134
+ "amount_charged": 489773.94,
135
+ "correct_amount": 428655.0,
136
+ "has_error": true
137
+ },
138
+ {
139
+ "invoice_id": "INV3344",
140
+ "contract_id": "CNT002",
141
+ "date": "2024-11-20",
142
+ "quantity": 5737,
143
+ "amount_charged": 609556.25,
144
+ "correct_amount": 609556.25,
145
+ "has_error": false
146
+ },
147
+ {
148
+ "invoice_id": "INV5645",
149
+ "contract_id": "CNT001",
150
+ "date": "2024-11-24",
151
+ "quantity": 7168,
152
+ "amount_charged": 609280.0,
153
+ "correct_amount": 609280.0,
154
+ "has_error": false
155
+ },
156
+ {
157
+ "invoice_id": "INV6786",
158
+ "contract_id": "CNT002",
159
+ "date": "2024-12-09",
160
+ "quantity": 8757,
161
+ "amount_charged": 744345.0,
162
+ "correct_amount": 744345.0,
163
+ "has_error": false
164
+ },
165
+ {
166
+ "invoice_id": "INV3445",
167
+ "contract_id": "CNT001",
168
+ "date": "2024-12-15",
169
+ "quantity": 4979,
170
+ "amount_charged": 448110.0,
171
+ "correct_amount": 448110.0,
172
+ "has_error": false
173
+ }
174
+ ]
175
+ }
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ sentence-transformers
5
+ faiss-cpu
6
+ torch
7
+ anthropic
8
+ python-dotenv
9
+ plotly
src/__init__.py ADDED
File without changes
src/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (188 Bytes). View file
 
src/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (176 Bytes). View file
 
src/__pycache__/invoice_generator.cpython-311.pyc ADDED
Binary file (10.8 kB). View file
 
src/__pycache__/vector_store.cpython-311.pyc ADDED
Binary file (4.36 kB). View file
 
src/invoice_generator.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from datetime import datetime
3
+ import random
4
+ import pandas as pd
5
+ from typing import List, Dict
6
+ import os
7
+
8
+ class InvoiceGenerator:
9
+ def __init__(self, data_dir: str = "data"):
10
+ """Initialize the invoice generator with a data directory."""
11
+ self.data_dir = data_dir
12
+ os.makedirs(data_dir, exist_ok=True)
13
+ self.ensure_contract_file()
14
+
15
+ def ensure_contract_file(self) -> None:
16
+ """Ensure contracts.json exists with initial data."""
17
+ contracts_file = os.path.join(self.data_dir, "contracts.json")
18
+ if not os.path.exists(contracts_file):
19
+ initial_contracts = {
20
+ "contracts": [
21
+ {
22
+ "contract_id": "CNT001",
23
+ "client": "TechCorp Solutions",
24
+ "start_date": "2024-01-01",
25
+ "end_date": "2024-12-31",
26
+ "terms": {
27
+ "base_rate": 100,
28
+ "volume_discounts": [
29
+ {"threshold": 1000, "discount": 0.10},
30
+ {"threshold": 5000, "discount": 0.15},
31
+ {"threshold": 10000, "discount": 0.20}
32
+ ],
33
+ "special_conditions": [
34
+ "Holiday surcharge: 15% on federal holidays",
35
+ "Rush order fee: Additional 25% for same-day delivery",
36
+ "Bulk order minimum: 500 units per order for volume pricing",
37
+ "Early payment discount: 2% if paid within 10 days",
38
+ "Multi-year commitment: 5% additional discount for 3+ year contract"
39
+ ]
40
+ }
41
+ },
42
+ {
43
+ "contract_id": "CNT002",
44
+ "client": "Global Manufacturing Inc",
45
+ "start_date": "2024-01-01",
46
+ "end_date": "2024-12-31",
47
+ "terms": {
48
+ "base_rate": 85,
49
+ "tiered_pricing": [
50
+ {"tier": "Standard", "rate": 1.0},
51
+ {"tier": "Premium", "rate": 1.25},
52
+ {"tier": "Enterprise", "rate": 1.5}
53
+ ],
54
+ "special_conditions": [
55
+ "Annual commitment discount: 5% off base rate",
56
+ "Multi-location discount: 3% per additional location",
57
+ "Payment terms: 2% discount for payment within 10 days",
58
+ "Volume guarantee: Minimum 1000 units per quarter",
59
+ "Service level agreement: 99.9% delivery accuracy required"
60
+ ]
61
+ }
62
+ }
63
+ ]
64
+ }
65
+ with open(contracts_file, 'w') as f:
66
+ json.dump(initial_contracts, f, indent=4)
67
+
68
+ def load_contracts(self) -> List[Dict]:
69
+ """Load contracts from JSON file."""
70
+ contracts_file = os.path.join(self.data_dir, "contracts.json")
71
+ try:
72
+ with open(contracts_file, 'r') as f:
73
+ contracts_data = json.load(f)
74
+ return contracts_data['contracts']
75
+ except Exception as e:
76
+ raise Exception(f"Error loading contracts: {str(e)}")
77
+
78
+ def calculate_correct_price(self, contract: Dict, quantity: int) -> float:
79
+ """Calculate the correct price based on contract terms and quantity."""
80
+ base_amount = contract["terms"]["base_rate"]
81
+ price = base_amount * quantity
82
+
83
+ # Apply volume discounts if applicable
84
+ if "volume_discounts" in contract["terms"]:
85
+ applicable_discount = 0
86
+ for discount in sorted(
87
+ contract["terms"]["volume_discounts"],
88
+ key=lambda x: x["threshold"],
89
+ reverse=True
90
+ ):
91
+ if quantity >= discount["threshold"]:
92
+ applicable_discount = discount["discount"]
93
+ break
94
+ if applicable_discount > 0:
95
+ price *= (1 - applicable_discount)
96
+
97
+ # Apply tiered pricing if applicable
98
+ if "tiered_pricing" in contract["terms"]:
99
+ # Randomly select a tier for this invoice
100
+ tier = random.choice(contract["terms"]["tiered_pricing"])
101
+ price *= tier["rate"]
102
+
103
+ return round(price, 2)
104
+
105
+ def generate_invoices(self, contracts: List[Dict]) -> List[Dict]:
106
+ """Generate synthetic invoices based on contract data."""
107
+ invoices = []
108
+ for contract in contracts:
109
+ # Generate multiple invoices per contract
110
+ for _ in range(random.randint(5, 10)): # Random number of invoices per contract
111
+ # Randomly decide if this invoice will have an error
112
+ has_error = random.random() < 0.3 # 30% chance of error
113
+
114
+ # Generate random quantity between contract minimums and maximums
115
+ min_quantity = 500 # Minimum from special conditions
116
+ max_quantity = 15000 # Arbitrary maximum
117
+ quantity = random.randint(min_quantity, max_quantity)
118
+
119
+ # Calculate correct price
120
+ correct_price = self.calculate_correct_price(contract, quantity)
121
+
122
+ # If we want an error, modify the price slightly
123
+ charged_amount = correct_price
124
+ if has_error:
125
+ error_factor = random.uniform(1.05, 1.15) # 5-15% overcharge
126
+ charged_amount *= error_factor
127
+
128
+ # Generate random date within contract period
129
+ start_date = datetime.strptime(contract["start_date"], "%Y-%m-%d")
130
+ end_date = datetime.strptime(contract["end_date"], "%Y-%m-%d")
131
+ random_days = random.randint(0, (end_date - start_date).days)
132
+ invoice_date = start_date + pd.Timedelta(days=random_days)
133
+
134
+ invoice = {
135
+ "invoice_id": f"INV{random.randint(1000, 9999)}",
136
+ "contract_id": contract["contract_id"],
137
+ "date": invoice_date.strftime("%Y-%m-%d"),
138
+ "quantity": quantity,
139
+ "amount_charged": round(charged_amount, 2),
140
+ "correct_amount": round(correct_price, 2),
141
+ "has_error": has_error
142
+ }
143
+ invoices.append(invoice)
144
+
145
+ return sorted(invoices, key=lambda x: x["date"])
146
+
147
+ def save_invoices(self, invoices: List[Dict]) -> None:
148
+ """Save generated invoices to JSON file."""
149
+ invoices_file = os.path.join(self.data_dir, "invoices.json")
150
+ with open(invoices_file, 'w') as f:
151
+ json.dump({"invoices": invoices}, f, indent=2)
152
+
153
+ def generate_and_save(self) -> None:
154
+ """Generate and save invoices in one step."""
155
+ contracts = self.load_contracts()
156
+ invoices = self.generate_invoices(contracts)
157
+ self.save_invoices(invoices)
158
+
159
+ def load_or_generate_invoices(self) -> List[Dict]:
160
+ """Load existing invoices or generate new ones if they don't exist."""
161
+ invoices_file = os.path.join(self.data_dir, "invoices.json")
162
+ try:
163
+ if os.path.exists(invoices_file):
164
+ with open(invoices_file, 'r') as f:
165
+ data = json.load(f)
166
+ return data.get('invoices', [])
167
+ else:
168
+ self.generate_and_save()
169
+ with open(invoices_file, 'r') as f:
170
+ data = json.load(f)
171
+ return data.get('invoices', [])
172
+ except Exception as e:
173
+ print(f"Error loading invoices: {str(e)}")
174
+ print("Generating new invoices...")
175
+ self.generate_and_save()
176
+ with open(invoices_file, 'r') as f:
177
+ data = json.load(f)
178
+ return data.get('invoices', [])
179
+
180
+ if __name__ == "__main__":
181
+ # This allows running the generator directly to create/update the files
182
+ generator = InvoiceGenerator()
183
+ generator.generate_and_save()
184
+ print("Successfully generated invoice and contract data!")
src/vector_store.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sentence_transformers import SentenceTransformer
3
+ from typing import List, Dict
4
+ import faiss
5
+
6
+ class ContractVectorStore:
7
+ def __init__(self, model: SentenceTransformer):
8
+ self.model = model
9
+ self.index = None
10
+ self.texts = []
11
+ self.dimension = 384 # dimension for 'all-MiniLM-L6-v2'
12
+
13
+ def add_contract_terms(self, contract: Dict) -> None:
14
+ """Add contract terms to the vector store"""
15
+ terms = []
16
+
17
+ # Add volume discounts
18
+ if "volume_discounts" in contract["terms"]:
19
+ for discount in contract["terms"]["volume_discounts"]:
20
+ terms.append(
21
+ f"Volume discount: {discount['discount']*100}% off for quantities >= {discount['threshold']}"
22
+ )
23
+
24
+ # Add tiered pricing
25
+ if "tiered_pricing" in contract["terms"]:
26
+ for tier in contract["terms"]["tiered_pricing"]:
27
+ terms.append(
28
+ f"Tier {tier['tier']}: Rate multiplier of {tier['rate']}x base rate"
29
+ )
30
+
31
+ # Add special conditions
32
+ for condition in contract["terms"]["special_conditions"]:
33
+ terms.append(condition)
34
+
35
+ # Add base rate
36
+ terms.append(f"Base rate is ${contract['terms']['base_rate']} per unit")
37
+
38
+ # Create embeddings and update index
39
+ self._add_texts(terms)
40
+
41
+ def _add_texts(self, texts: List[str]) -> None:
42
+ """Add texts to the vector store"""
43
+ if not texts:
44
+ return
45
+
46
+ # Generate embeddings
47
+ embeddings = self.model.encode(texts)
48
+
49
+ # Initialize index if needed
50
+ if self.index is None:
51
+ self.index = faiss.IndexFlatL2(self.dimension)
52
+
53
+ # Add to index
54
+ self.index.add(np.array(embeddings).astype('float32'))
55
+ self.texts.extend(texts)
56
+
57
+ def search_relevant_terms(self, query: str, k: int = 3) -> List[Dict]:
58
+ """Search for relevant terms using the query"""
59
+ if not self.index or not self.texts:
60
+ return []
61
+
62
+ # Generate query embedding
63
+ query_embedding = self.model.encode([query])[0].reshape(1, -1)
64
+
65
+ # Search
66
+ distances, indices = self.index.search(
67
+ np.array(query_embedding).astype('float32'),
68
+ k
69
+ )
70
+
71
+ # Return results
72
+ results = []
73
+ for i, (dist, idx) in enumerate(zip(distances[0], indices[0])):
74
+ if idx < len(self.texts): # Ensure valid index
75
+ results.append({
76
+ "text": self.texts[idx],
77
+ "score": float(1 / (1 + dist)) # Convert distance to similarity score
78
+ })
79
+
80
+ return results
styles.css ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .main {
2
+ padding: 0rem 1rem;
3
+ }
4
+
5
+ .stAlert {
6
+ padding: 1rem;
7
+ margin: 1rem 0;
8
+ border-radius: 0.5rem;
9
+ }
10
+
11
+ .pricing-card {
12
+ padding: 1.5rem;
13
+ border-radius: 0.5rem;
14
+ background-color: #f8f9fa;
15
+ margin: 1rem 0;
16
+ }
17
+
18
+ .metric-card {
19
+ background-color: white;
20
+ padding: 1rem;
21
+ border-radius: 0.5rem;
22
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
23
+ }
24
+
25
+ /* Dashboard specific styles */
26
+ .st-emotion-cache-16idsys p {
27
+ font-size: 14px;
28
+ margin-bottom: 0.5rem;
29
+ }
30
+
31
+ /* Contract details styles */
32
+ .contract-details {
33
+ background-color: #f8f9fa;
34
+ padding: 1rem;
35
+ border-radius: 0.5rem;
36
+ margin: 1rem 0;
37
+ }
38
+
39
+ /* Invoice table styles */
40
+ .invoice-table {
41
+ margin-top: 1rem;
42
+ }
43
+
44
+ /* Metrics section */
45
+ .metrics-container {
46
+ margin: 1.5rem 0;
47
+ }
48
+
49
+ /* Alert styles */
50
+ .alert-error {
51
+ background-color: #ffe6e6;
52
+ border-left: 4px solid #ff0000;
53
+ }
54
+
55
+ .alert-success {
56
+ background-color: #e6ffe6;
57
+ border-left: 4px solid #00ff00;
58
+ }