import json from datetime import datetime import random import pandas as pd from typing import List, Dict import os class InvoiceGenerator: def __init__(self, data_dir: str = "data"): """Initialize the invoice generator with a data directory.""" self.data_dir = data_dir os.makedirs(data_dir, exist_ok=True) self.ensure_contract_file() def ensure_contract_file(self) -> None: """Ensure contracts.json exists with initial data.""" contracts_file = os.path.join(self.data_dir, "contracts.json") if not os.path.exists(contracts_file): initial_contracts = { "contracts": [ { "contract_id": "CNT001", "client": "TechCorp Solutions", "start_date": "2024-01-01", "end_date": "2024-12-31", "terms": { "base_rate": 100, "volume_discounts": [ {"threshold": 1000, "discount": 0.10}, {"threshold": 5000, "discount": 0.15}, {"threshold": 10000, "discount": 0.20} ], "special_conditions": [ "Holiday surcharge: 15% on federal holidays", "Rush order fee: Additional 25% for same-day delivery", "Bulk order minimum: 500 units per order for volume pricing", "Early payment discount: 2% if paid within 10 days", "Multi-year commitment: 5% additional discount for 3+ year contract" ] } }, { "contract_id": "CNT002", "client": "Global Manufacturing Inc", "start_date": "2024-01-01", "end_date": "2024-12-31", "terms": { "base_rate": 85, "tiered_pricing": [ {"tier": "Standard", "rate": 1.0}, {"tier": "Premium", "rate": 1.25}, {"tier": "Enterprise", "rate": 1.5} ], "special_conditions": [ "Annual commitment discount: 5% off base rate", "Multi-location discount: 3% per additional location", "Payment terms: 2% discount for payment within 10 days", "Volume guarantee: Minimum 1000 units per quarter", "Service level agreement: 99.9% delivery accuracy required" ] } } ] } with open(contracts_file, 'w') as f: json.dump(initial_contracts, f, indent=4) def load_contracts(self) -> List[Dict]: """Load contracts from JSON file.""" contracts_file = os.path.join(self.data_dir, "contracts.json") try: with open(contracts_file, 'r') as f: contracts_data = json.load(f) return contracts_data['contracts'] except Exception as e: raise Exception(f"Error loading contracts: {str(e)}") def calculate_correct_price(self, contract: Dict, quantity: int) -> float: """Calculate the correct price based on contract terms and quantity.""" base_amount = contract["terms"]["base_rate"] price = base_amount * quantity # Apply volume discounts if applicable if "volume_discounts" in contract["terms"]: applicable_discount = 0 for discount in sorted( contract["terms"]["volume_discounts"], key=lambda x: x["threshold"], reverse=True ): if quantity >= discount["threshold"]: applicable_discount = discount["discount"] break if applicable_discount > 0: price *= (1 - applicable_discount) # Apply tiered pricing if applicable if "tiered_pricing" in contract["terms"]: # Randomly select a tier for this invoice tier = random.choice(contract["terms"]["tiered_pricing"]) price *= tier["rate"] return round(price, 2) def generate_invoices(self, contracts: List[Dict]) -> List[Dict]: """Generate synthetic invoices based on contract data.""" invoices = [] for contract in contracts: # Generate multiple invoices per contract for _ in range(random.randint(5, 10)): # Random number of invoices per contract # Randomly decide if this invoice will have an error has_error = random.random() < 0.3 # 30% chance of error # Generate random quantity between contract minimums and maximums min_quantity = 500 # Minimum from special conditions max_quantity = 15000 # Arbitrary maximum quantity = random.randint(min_quantity, max_quantity) # Calculate correct price correct_price = self.calculate_correct_price(contract, quantity) # If we want an error, modify the price slightly charged_amount = correct_price if has_error: error_factor = random.uniform(1.05, 1.15) # 5-15% overcharge charged_amount *= error_factor # Generate random date within contract period start_date = datetime.strptime(contract["start_date"], "%Y-%m-%d") end_date = datetime.strptime(contract["end_date"], "%Y-%m-%d") random_days = random.randint(0, (end_date - start_date).days) invoice_date = start_date + pd.Timedelta(days=random_days) invoice = { "invoice_id": f"INV{random.randint(1000, 9999)}", "contract_id": contract["contract_id"], "date": invoice_date.strftime("%Y-%m-%d"), "quantity": quantity, "amount_charged": round(charged_amount, 2), "correct_amount": round(correct_price, 2), "has_error": has_error } invoices.append(invoice) return sorted(invoices, key=lambda x: x["date"]) def save_invoices(self, invoices: List[Dict]) -> None: """Save generated invoices to JSON file.""" invoices_file = os.path.join(self.data_dir, "invoices.json") with open(invoices_file, 'w') as f: json.dump({"invoices": invoices}, f, indent=2) def generate_and_save(self) -> None: """Generate and save invoices in one step.""" contracts = self.load_contracts() invoices = self.generate_invoices(contracts) self.save_invoices(invoices) def load_or_generate_invoices(self) -> List[Dict]: """Load existing invoices or generate new ones if they don't exist.""" invoices_file = os.path.join(self.data_dir, "invoices.json") try: if os.path.exists(invoices_file): with open(invoices_file, 'r') as f: data = json.load(f) return data.get('invoices', []) else: self.generate_and_save() with open(invoices_file, 'r') as f: data = json.load(f) return data.get('invoices', []) except Exception as e: print(f"Error loading invoices: {str(e)}") print("Generating new invoices...") self.generate_and_save() with open(invoices_file, 'r') as f: data = json.load(f) return data.get('invoices', []) if __name__ == "__main__": # This allows running the generator directly to create/update the files generator = InvoiceGenerator() generator.generate_and_save() print("Successfully generated invoice and contract data!")