sergioska commited on
Commit
5465fb7
1 Parent(s): 0fc76d5

add scrape graph script

Browse files
Files changed (2) hide show
  1. app.py +35 -0
  2. requirements.txt +3 -0
app.py CHANGED
@@ -1,5 +1,40 @@
1
  import streamlit as st
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  x = st.slider('Select a value')
4
  st.write(x, 'squared is', x * x)
5
 
 
1
  import streamlit as st
2
 
3
+ from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
4
+ from scrapegraphai.graphs import SmartScraperGraph
5
+ from scrapegraphai.utils import prettify_exec_info
6
+
7
+ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
8
+ model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
9
+ modelNomic = AutoModel.from_pretrained("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
10
+
11
+ graph_config = {
12
+ "llm": {
13
+ "model-instance": model,
14
+ "temperature": 1,
15
+ "format": "json", # Ollama needs the format to be specified explicitly
16
+ "model_tokens": 4096, # depending on the model set context length
17
+ },
18
+ "embeddings": {
19
+ "model-instance": modelNomic,
20
+ "temperature": 0,
21
+ }
22
+ }
23
+
24
+ # ************************************************
25
+ # Create the SmartScraperGraph instance and run it
26
+ # ************************************************
27
+
28
+ smart_scraper_graph = SmartScraperGraph(
29
+ prompt="List me shoes in first page with names, prices and image urls",
30
+ # also accepts a string with the already downloaded HTML code
31
+ source="https://www.footlocker.co.uk/en/category/sale/men.html",
32
+ config=graph_config
33
+ )
34
+
35
+ result = smart_scraper_graph.run()
36
+ print(result)
37
+
38
  x = st.slider('Select a value')
39
  st.write(x, 'squared is', x * x)
40
 
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ scrapegraphai