Spaces:
Running
Running
acecalisto3
commited on
Commit
•
28e1738
1
Parent(s):
0298215
Update agent.py
Browse files
agent.py
CHANGED
@@ -93,101 +93,29 @@ urls = [
|
|
93 |
"https://www.instagram.com/westcentralcte/",
|
94 |
"https://www.tiktok.com/@mutplteen"
|
95 |
]
|
|
|
96 |
|
|
|
97 |
|
98 |
-
#
|
99 |
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
100 |
|
101 |
-
#
|
102 |
-
DATE_TIME_STR = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
103 |
-
PURPOSE = f"You go to Culvers sites, you continuously seek changes on them since your last observation. Anything new that gets logged and dumped into csv, stored in your log folder at user/app/scraped_data."
|
104 |
-
HISTORY = []
|
105 |
-
CURRENT_TASK = None
|
106 |
-
DEFAULT_FILE_PATH = "user/app/scraped_data/culver/culvers_changes.csv"
|
107 |
|
108 |
-
#
|
109 |
-
os.makedirs(os.path.dirname(DEFAULT_FILE_PATH), exist_ok=True)
|
110 |
|
111 |
-
#
|
112 |
-
def monitor_urls(storage_location, urls, scrape_interval, content_type):
|
113 |
-
global HISTORY
|
114 |
-
previous_hashes = [""] * len(urls)
|
115 |
|
116 |
-
|
117 |
-
with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=Options()) as driver:
|
118 |
-
while True:
|
119 |
-
for i, url in enumerate(urls):
|
120 |
-
try:
|
121 |
-
driver.get(url)
|
122 |
-
time.sleep(2) # Wait for the page to load
|
123 |
-
if content_type == "text":
|
124 |
-
current_content = driver.page_source
|
125 |
-
elif content_type == "media":
|
126 |
-
current_content = driver.find_elements_by_tag_name("img")
|
127 |
-
else:
|
128 |
-
current_content = driver.page_source
|
129 |
-
current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
|
130 |
-
if current_hash != previous_hashes[i]:
|
131 |
-
previous_hashes[i] = current_hash
|
132 |
-
date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
133 |
-
HISTORY.append(f"Change detected at {url} on {date_time_str}")
|
134 |
-
with open(storage_location, "a", newline="") as csvfile:
|
135 |
-
csv_writer = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
|
136 |
-
csv_writer.writerow({"date": date_time_str.split()[0], "time": date_time_str.split()[1], "url": url, "change": "Content changed"})
|
137 |
-
logging.info(f"Change detected at {url} on {date_time_str}")
|
138 |
-
except Exception as e:
|
139 |
-
logging.error(f"Error accessing {url}: {e}")
|
140 |
-
time.sleep(scrape_interval * 60) # Check every scrape_interval minutes
|
141 |
-
except Exception as e:
|
142 |
-
logging.error(f"Error starting ChromeDriver: {e}")
|
143 |
|
144 |
-
|
145 |
-
def handle_input(storage_location, urls, scrape_interval, content_type):
|
146 |
-
global CURRENT_TASK, HISTORY
|
147 |
|
148 |
-
|
149 |
-
HISTORY.append(f"Task started: {CURRENT_TASK}")
|
150 |
-
monitor_urls(storage_location, urls, scrape_interval, content_type)
|
151 |
-
return TASK_PROMPT.format(task=CURRENT_TASK, history="\n".join(map(str, HISTORY)))
|
152 |
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
except FileNotFoundError:
|
158 |
-
custom_prompts = {"WEB_DEV": "", "AI_SYSTEM_PROMPT": "", "PYTHON_CODE_DEV": "", "CODE_GENERATION": "", "CODE_INTERPRETATION": "", "CODE_TRANSLATION": "", "CODE_IMPLEMENTATION": ""}
|
159 |
|
160 |
-
# Define agents
|
161 |
-
AGENTS = ["WEB_DEV", "AI_SYSTEM_PROMPT", "PYTHON_CODE_DEV", "CODE_GENERATION", "CODE_INTERPRETATION", "CODE_TRANSLATION", "CODE_IMPLEMENTATION"]
|
162 |
-
|
163 |
-
# Define the Mistral inference client
|
164 |
-
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
165 |
-
|
166 |
-
# Define the chat response function
|
167 |
-
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
168 |
-
return generate(message, history, system_message, max_tokens, temperature, top_p)
|
169 |
-
|
170 |
-
# Function to start scraping
|
171 |
-
def start_scraping(storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
|
172 |
-
urls = [url for url in [url1, url2, url3, url4, url5, url6, url7, url8, url9, url10] if url]
|
173 |
-
handle_input(storage_location, urls, scrape_interval, content_type)
|
174 |
-
return f"Started scraping {', '.join(urls)} every {scrape_interval} minutes."
|
175 |
-
|
176 |
-
# Function to display CSV content
|
177 |
-
def display_csv(storage_location):
|
178 |
-
if os.path.exists(storage_location):
|
179 |
-
with open(storage_location, "r") as file:
|
180 |
-
return file.read()
|
181 |
-
else:
|
182 |
-
return "No data available."
|
183 |
-
|
184 |
-
# Create Gradio interface
|
185 |
-
def chat_interface(message, system_message, max_tokens, temperature, top_p, storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
|
186 |
-
global HISTORY
|
187 |
-
response = respond(message, HISTORY, system_message, max_tokens, temperature, top_p)
|
188 |
-
HISTORY.append((message, response))
|
189 |
-
return HISTORY, ""
|
190 |
-
'''
|
191 |
{purpose}
|
192 |
"""
|
193 |
|
|
|
93 |
"https://www.instagram.com/westcentralcte/",
|
94 |
"https://www.tiktok.com/@mutplteen"
|
95 |
]
|
96 |
+
action: SEARCH_urls; # https://www.facebook.com/CulverCommunitySchools, https://www.culver.k12.in.us/
|
97 |
|
98 |
+
action: OBSERVE_new; # initial database build -- all observations from feeds are new
|
99 |
|
100 |
+
- action: UPDATE_log; # each school's log gets dumped into a folder named after the school, within this folder: # "Storage Location" and is going to be fed via rss via website. Must stay stored. lets just use this repo, here is the folder location's url -- i will provide authorization token with write perimission ; storage url: https://huggingface.co/spaces/acecalisto3/CEEMEESEEK/tree/main/scraped_data/culver ; my write permission api key is stored as a secret you've already verified.
|
|
|
101 |
|
102 |
+
action: OBSERVE_change; # after db build -- any and all changes from previous feed observation
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
+
- action: UPDATE_log; # each school's log gets dumped into a folder named after the school, within this folder: # "Storage Location" and is going to be fed via rss via website. Must stay stored. lets just use this repo, here is the folder location's url -- i will provide authorization token with write perimission ; storage url: https://huggingface.co/spaces/acecalisto3/CEEMEESEEK/tree/main/scraped_data/culver ; my write permission api key is stored as a secret you've already verified.
|
|
|
105 |
|
106 |
+
action: OBSERVE_live; # in the event of a live stream -- return "Currently Live-Streaming at "```html <link_to_stream> " Join us!"
|
|
|
|
|
|
|
107 |
|
108 |
+
action: UPDATE_task[extract>update log>monitor # scan the urls at user specified intervals] purpose: extract data/media from posts/pages, convert and deliver data in mysql database optimzied for rss feed>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
+
rss
|
|
|
|
|
111 |
|
112 |
+
- action: UPDATE_log # this is "Storage Location" and is going to be fed via rss via website. Must stay stored. lets just use this repo, here is the folder location's url -- i will provide authorization token with write perimission ; storage url: https://huggingface.co/spaces/acecalisto3/CEEMEESEEK/tree/main/scraped_data/culver ; my write permission api key is stored as a secret you've already verified.
|
|
|
|
|
|
|
113 |
|
114 |
+
#...
|
115 |
+
# anything i missed should be dealt with by the app's global ai. it should handle all conversion logics, all triggering events the user doesnt handle, and so on
|
116 |
+
# ...
|
117 |
+
- action: COMPLETE # there should be accurate data in a log folder i can point my rss.app at to obtain fresh and updated feed from this repo's log...
|
|
|
|
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
{purpose}
|
120 |
"""
|
121 |
|