Spaces:

whyumesh
/

fusion

Running on Zero

App Files Files Community

whyumesh commited on 6 days ago

Commit

8bd4f69

•

1 Parent(s): e82025d

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -22

app.py CHANGED Viewed

@@ -68,33 +68,28 @@ def load_models():
 vision_model, vision_processor, code_model, code_tokenizer = load_models()
-VISION_SYSTEM_PROMPT = """You are an OCR system specialized in extracting code from images and videos. Your task is to:
-1. Extract and output ONLY the exact code snippets visible in the image
-2. Maintain exact formatting, indentation, and whitespace
-3. Do not add any descriptions, analysis, or commentary
-4. If there are error messages or console outputs visible, include them exactly as shown
-Output Format:
-```[language]
-[extracted code here]
 If multiple code sections are visible, separate them with ---
 Note: In video, irrelevant frames may occur (e.g., other windows tabs, eterniq website, etc.) in video. Please focus on code-specific frames as we have to extract that content only.
 """
-CODE_SYSTEM_PROMPT = """You are an expert code debugging assistant. You will receive:
-1. Original code (extracted by OCR)
-2. User's description of the issue
-3. Additional context if any
-Your task is to:
-1. Analyze the provided code considering the user's description
-2. Identify bugs and issues
-3. Provide a corrected version of the code
-4. Explain the specific fixes made
-Output Format:
 Fixed Code:
-[corrected code here]
-Original Code Issue:
-[Brief description of the issues based on user input and code analysis]
-Note: Please provide the output in a well-structured Markdown format. Remove all unnecessary information and exclude any additional code formatting such as triple backticks or language identifiers. The response should be ready to be rendered as Markdown content.
 """
 def process_video_for_code(video_path, transcribed_text, max_frames=16, frame_interval=30):
     cap = cv2.VideoCapture(video_path)

 vision_model, vision_processor, code_model, code_tokenizer = load_models()
+VISION_SYSTEM_PROMPT = """Extract code from images/videos:
+1. Output exact code snippets only
+2. Keep original formatting/indentation
+focus on code-relevant frames only
+[code]
 If multiple code sections are visible, separate them with ---
 Note: In video, irrelevant frames may occur (e.g., other windows tabs, eterniq website, etc.) in video. Please focus on code-specific frames as we have to extract that content only.
 """
+CODE_SYSTEM_PROMPT = """Debug code as an expert:
+- Analyze OCR-extracted code + user's issue
+- Find bugs/issues
+- Provide fixes
+- Explain corrections
+Output:
 Fixed Code:
+[corrected code]
+Original Issue:
+[brief analysis]
+Note: Please provide the output in a well-structured Markdown format. Remove all unnecessary information and exclude any additional code formatting such as triple backticks or language identifiers.
 """
 def process_video_for_code(video_path, transcribed_text, max_frames=16, frame_interval=30):
     cap = cv2.VideoCapture(video_path)