acecalisto3 commited on
Commit
28e1738
1 Parent(s): 0298215

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +13 -85
agent.py CHANGED
@@ -93,101 +93,29 @@ urls = [
93
  "https://www.instagram.com/westcentralcte/",
94
  "https://www.tiktok.com/@mutplteen"
95
  ]
 
96
 
 
97
 
98
- # Configure logging
99
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
100
 
101
- # Define constants
102
- DATE_TIME_STR = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
103
- PURPOSE = f"You go to Culvers sites, you continuously seek changes on them since your last observation. Anything new that gets logged and dumped into csv, stored in your log folder at user/app/scraped_data."
104
- HISTORY = []
105
- CURRENT_TASK = None
106
- DEFAULT_FILE_PATH = "user/app/scraped_data/culver/culvers_changes.csv"
107
 
108
- # Ensure the directory exists
109
- os.makedirs(os.path.dirname(DEFAULT_FILE_PATH), exist_ok=True)
110
 
111
- # Function to monitor URLs for changes
112
- def monitor_urls(storage_location, urls, scrape_interval, content_type):
113
- global HISTORY
114
- previous_hashes = [""] * len(urls)
115
 
116
- try:
117
- with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=Options()) as driver:
118
- while True:
119
- for i, url in enumerate(urls):
120
- try:
121
- driver.get(url)
122
- time.sleep(2) # Wait for the page to load
123
- if content_type == "text":
124
- current_content = driver.page_source
125
- elif content_type == "media":
126
- current_content = driver.find_elements_by_tag_name("img")
127
- else:
128
- current_content = driver.page_source
129
- current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
130
- if current_hash != previous_hashes[i]:
131
- previous_hashes[i] = current_hash
132
- date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
133
- HISTORY.append(f"Change detected at {url} on {date_time_str}")
134
- with open(storage_location, "a", newline="") as csvfile:
135
- csv_writer = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
136
- csv_writer.writerow({"date": date_time_str.split()[0], "time": date_time_str.split()[1], "url": url, "change": "Content changed"})
137
- logging.info(f"Change detected at {url} on {date_time_str}")
138
- except Exception as e:
139
- logging.error(f"Error accessing {url}: {e}")
140
- time.sleep(scrape_interval * 60) # Check every scrape_interval minutes
141
- except Exception as e:
142
- logging.error(f"Error starting ChromeDriver: {e}")
143
 
144
- # Define main function to handle user input
145
- def handle_input(storage_location, urls, scrape_interval, content_type):
146
- global CURRENT_TASK, HISTORY
147
 
148
- CURRENT_TASK = f"Monitoring URLs: {', '.join(urls)}"
149
- HISTORY.append(f"Task started: {CURRENT_TASK}")
150
- monitor_urls(storage_location, urls, scrape_interval, content_type)
151
- return TASK_PROMPT.format(task=CURRENT_TASK, history="\n".join(map(str, HISTORY)))
152
 
153
- # Load custom prompts
154
- try:
155
- with open("custom_prompts.yaml", "r") as fp:
156
- custom_prompts = yaml.safe_load(fp)
157
- except FileNotFoundError:
158
- custom_prompts = {"WEB_DEV": "", "AI_SYSTEM_PROMPT": "", "PYTHON_CODE_DEV": "", "CODE_GENERATION": "", "CODE_INTERPRETATION": "", "CODE_TRANSLATION": "", "CODE_IMPLEMENTATION": ""}
159
 
160
- # Define agents
161
- AGENTS = ["WEB_DEV", "AI_SYSTEM_PROMPT", "PYTHON_CODE_DEV", "CODE_GENERATION", "CODE_INTERPRETATION", "CODE_TRANSLATION", "CODE_IMPLEMENTATION"]
162
-
163
- # Define the Mistral inference client
164
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
165
-
166
- # Define the chat response function
167
- def respond(message, history, system_message, max_tokens, temperature, top_p):
168
- return generate(message, history, system_message, max_tokens, temperature, top_p)
169
-
170
- # Function to start scraping
171
- def start_scraping(storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
172
- urls = [url for url in [url1, url2, url3, url4, url5, url6, url7, url8, url9, url10] if url]
173
- handle_input(storage_location, urls, scrape_interval, content_type)
174
- return f"Started scraping {', '.join(urls)} every {scrape_interval} minutes."
175
-
176
- # Function to display CSV content
177
- def display_csv(storage_location):
178
- if os.path.exists(storage_location):
179
- with open(storage_location, "r") as file:
180
- return file.read()
181
- else:
182
- return "No data available."
183
-
184
- # Create Gradio interface
185
- def chat_interface(message, system_message, max_tokens, temperature, top_p, storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
186
- global HISTORY
187
- response = respond(message, HISTORY, system_message, max_tokens, temperature, top_p)
188
- HISTORY.append((message, response))
189
- return HISTORY, ""
190
- '''
191
  {purpose}
192
  """
193
 
 
93
  "https://www.instagram.com/westcentralcte/",
94
  "https://www.tiktok.com/@mutplteen"
95
  ]
96
+ action: SEARCH_urls; # https://www.facebook.com/CulverCommunitySchools, https://www.culver.k12.in.us/
97
 
98
+ action: OBSERVE_new; # initial database build -- all observations from feeds are new
99
 
100
+ - action: UPDATE_log; # each school's log gets dumped into a folder named after the school, within this folder: # "Storage Location" and is going to be fed via rss via website. Must stay stored. lets just use this repo, here is the folder location's url -- i will provide authorization token with write perimission ; storage url: https://huggingface.co/spaces/acecalisto3/CEEMEESEEK/tree/main/scraped_data/culver ; my write permission api key is stored as a secret you've already verified.
 
101
 
102
+ action: OBSERVE_change; # after db build -- any and all changes from previous feed observation
 
 
 
 
 
103
 
104
+ - action: UPDATE_log; # each school's log gets dumped into a folder named after the school, within this folder: # "Storage Location" and is going to be fed via rss via website. Must stay stored. lets just use this repo, here is the folder location's url -- i will provide authorization token with write perimission ; storage url: https://huggingface.co/spaces/acecalisto3/CEEMEESEEK/tree/main/scraped_data/culver ; my write permission api key is stored as a secret you've already verified.
 
105
 
106
+ action: OBSERVE_live; # in the event of a live stream -- return "Currently Live-Streaming at "```html <link_to_stream> " Join us!"
 
 
 
107
 
108
+ action: UPDATE_task[extract>update log>monitor # scan the urls at user specified intervals] purpose: extract data/media from posts/pages, convert and deliver data in mysql database optimzied for rss feed>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ rss
 
 
111
 
112
+ - action: UPDATE_log # this is "Storage Location" and is going to be fed via rss via website. Must stay stored. lets just use this repo, here is the folder location's url -- i will provide authorization token with write perimission ; storage url: https://huggingface.co/spaces/acecalisto3/CEEMEESEEK/tree/main/scraped_data/culver ; my write permission api key is stored as a secret you've already verified.
 
 
 
113
 
114
+ #...
115
+ # anything i missed should be dealt with by the app's global ai. it should handle all conversion logics, all triggering events the user doesnt handle, and so on
116
+ # ...
117
+ - action: COMPLETE # there should be accurate data in a log folder i can point my rss.app at to obtain fresh and updated feed from this repo's log...
 
 
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  {purpose}
120
  """
121