Spaces:
Sleeping
Sleeping
import logging | |
import os | |
import re | |
from typing import Any, Dict, List | |
from langchain_groq import ChatGroq | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_core.output_parsers import StrOutputParser | |
from .file_scanner import quick_scan_project | |
class AIReadmeGenerator: | |
def __init__(self, api_key: str): | |
self.llm = ChatGroq( | |
api_key=api_key, | |
model_name="llama3-70b-8192" | |
) | |
def read_file_content(self, file_path: str) -> str: | |
""" | |
Read the content of a file, handling different encodings and file sizes. | |
Args: | |
file_path (str): Path to the file to read | |
Returns: | |
str: File content or error message | |
""" | |
try: | |
# Limit file reading to prevent memory issues | |
with open(file_path, 'r', encoding='utf-8') as file: | |
# Read first 10000 characters to prevent large files from overwhelming the system | |
return file.read(10000) | |
except Exception as e: | |
return f"Error reading file {file_path}: {str(e)}" | |
def analyze_project_structure(self, project_path: str) -> Dict[str, Any]: | |
""" | |
Perform a comprehensive analysis of the project. | |
Args: | |
project_path (str): Path to the project directory | |
Returns: | |
Dict: Detailed project analysis | |
""" | |
# Use existing quick_scan_project for initial analysis | |
project_info = quick_scan_project(project_path) | |
# Collect file contents | |
file_contents = {} | |
for file in project_info.get('files', []): | |
full_path = os.path.join(project_path, file) | |
if os.path.isfile(full_path): | |
file_contents[file] = self.read_file_content(full_path) | |
# Extract docstrings and comments from main files | |
code_insights = {} | |
main_files = ['app.py', 'main.py', 'src/ui.py', 'src/generator.py'] | |
for main_file in main_files: | |
full_path = os.path.join(project_path, main_file) | |
if os.path.exists(full_path): | |
code_insights[main_file] = self.extract_code_insights(full_path) | |
# Combine all information | |
comprehensive_analysis = { | |
"project_structure": project_info, | |
"file_contents": file_contents, | |
"code_insights": code_insights, | |
"requirements": self.read_requirements(project_path) | |
} | |
return comprehensive_analysis | |
def extract_code_insights(self, file_path: str) -> Dict[str, str]: | |
""" | |
Extract insights from Python files. | |
Args: | |
file_path (str): Path to the Python file | |
Returns: | |
Dict: Extracted insights including docstrings, key functions, etc. | |
""" | |
insights = { | |
"module_docstring": "", | |
"key_functions": [], | |
"key_classes": [] | |
} | |
try: | |
with open(file_path, 'r', encoding='utf-8') as file: | |
content = file.read() | |
# Extract module-level docstring | |
module_docstring_match = re.search(r'"""(.*?)"""', content, re.DOTALL) | |
if module_docstring_match: | |
insights['module_docstring'] = module_docstring_match.group(1).strip() | |
# Find key functions | |
function_matches = re.findall(r'def\s+(\w+)\(.*?\):\s*"""(.*?)"""', content, re.DOTALL) | |
insights['key_functions'] = [ | |
f"{name}: {desc.strip()}" | |
for name, desc in function_matches | |
] | |
# Find key classes | |
class_matches = re.findall(r'class\s+(\w+).*?:\s*"""(.*?)"""', content, re.DOTALL) | |
insights['key_classes'] = [ | |
f"{name}: {desc.strip()}" | |
for name, desc in class_matches | |
] | |
except Exception as e: | |
insights['error'] = str(e) | |
return insights | |
def read_requirements(self, project_path: str) -> List[str]: | |
""" | |
Read project requirements file. | |
Args: | |
project_path (str): Path to the project directory | |
Returns: | |
List[str]: List of requirements | |
""" | |
try: | |
req_path = os.path.join(project_path, 'requirements.txt') | |
if os.path.exists(req_path): | |
with open(req_path, 'r') as f: | |
return [line.strip() for line in f if line.strip() and not line.startswith('#')] | |
return [] | |
except Exception: | |
return [] | |
def generate_concise_readme(self, project_path: str) -> str: | |
""" | |
Generate a README based on comprehensive project analysis. | |
Args: | |
project_path (str): Path to the project directory | |
Returns: | |
str: Generated README content | |
""" | |
# Analyze the project comprehensively | |
project_analysis = self.analyze_project_structure(project_path) | |
# Prepare a detailed prompt for the LLM | |
template = """ | |
Generate a comprehensive README.md based on the following project analysis: | |
PROJECT STRUCTURE: | |
{project_structure} | |
FILE CONTENTS: | |
{file_contents} | |
CODE INSIGHTS: | |
{code_insights} | |
REQUIREMENTS: | |
{requirements} | |
Based on this information, create a professional, detailed README.md that: | |
- Explains the project's purpose and functionality | |
- Describes key features and components | |
- Provides clear setup and usage instructions | |
- Highlights technical details | |
- Includes any relevant dependencies or prerequisites | |
Ensure the README is informative, well-structured, and tailored to the specific project. | |
""" | |
# Prepare context for the LLM | |
context = { | |
"project_structure": str(project_analysis.get('project_structure', 'No structure information')), | |
"file_contents": '\n'.join([f"{k}:\n{v}" for k, v in project_analysis.get('file_contents', {}).items()]), | |
"code_insights": '\n'.join([f"{k}:\n{str(v)}" for k, v in project_analysis.get('code_insights', {}).items()]), | |
"requirements": '\n'.join(project_analysis.get('requirements', [])) | |
} | |
try: | |
# Generate README using LLM | |
prompt = ChatPromptTemplate.from_template(template) | |
chain = prompt | self.llm | StrOutputParser() | |
readme_content = chain.invoke(context) | |
return readme_content.strip() | |
except Exception as e: | |
logging.error(f"README generation error: {str(e)}") | |
return f"# Project README\n\nUnable to generate README automatically.\n\nError: {str(e)}" |