hayuh commited on
Commit
941770c
·
verified ·
1 Parent(s): 35f3adf

Upload 16 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Ehlers-Danlos-1/2024_EDS_2.pdf filter=lfs diff=lfs merge=lfs -text
37
+ Ehlers-Danlos-1/2024_EDS_3.pdf filter=lfs diff=lfs merge=lfs -text
38
+ Ehlers-Danlos-1/2024_EDS_4.pdf filter=lfs diff=lfs merge=lfs -text
39
+ Ehlers-Danlos-1/2024_EDS_5.pdf filter=lfs diff=lfs merge=lfs -text
40
+ Ehlers-Danlos-1/Unknown_EDS_1.pdf filter=lfs diff=lfs merge=lfs -text
41
+ Ehlers-Danlos-1/Unknown_EDS_5.pdf filter=lfs diff=lfs merge=lfs -text
Ehlers-Danlos-1/2024_EDS_1.pdf ADDED
The diff for this file is too large to render. See raw diff
 
Ehlers-Danlos-1/2024_EDS_2.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46fc736ff4174473e0a846b7ca8430c140d89cd2c9f663e105bc48b33f8d9c99
3
+ size 2616000
Ehlers-Danlos-1/2024_EDS_3.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fef5c8c375297158ad7ad63166405ca7ce4ac511371a8454fe9df972755b0fe
3
+ size 10344738
Ehlers-Danlos-1/2024_EDS_4.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25db35c77fd6aeba6b15278671a462b30ffbb6f97eb5f221e0459f6d11c0f8ed
3
+ size 1071576
Ehlers-Danlos-1/2024_EDS_5.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ef98bcb445da6abda66de35204634bd81d8c6dcdf53bfc3be54447ec9ad0ad
3
+ size 2772421
Ehlers-Danlos-1/2024_EDS_6.pdf ADDED
Binary file (146 kB). View file
 
Ehlers-Danlos-1/2024_EDS_7.pdf ADDED
The diff for this file is too large to render. See raw diff
 
Ehlers-Danlos-1/Unknown_EDS_1.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbeaf13d3298a00bc1c7acfba3177a0c639f677e0f0941452709fe60542052d4
3
+ size 21553835
Ehlers-Danlos-1/Unknown_EDS_2.pdf ADDED
Binary file (428 kB). View file
 
Ehlers-Danlos-1/Unknown_EDS_3.pdf ADDED
Binary file (817 kB). View file
 
Ehlers-Danlos-1/Unknown_EDS_4.pdf ADDED
Binary file (392 kB). View file
 
Ehlers-Danlos-1/Unknown_EDS_5.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c5a77524b6bb4dca40798af5ff3e3c622216a13ac21a60d9befce255977b47a
3
+ size 1847313
helper.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Add your utilities or helper functions to this file.
2
+
3
+ import os
4
+ from dotenv import load_dotenv, find_dotenv
5
+
6
+ # these expect to find a .env file at the directory above the lesson. # the format for that file is (without the comment) #API_KEYNAME=AStringThatIsTheLongAPIKeyFromSomeService
7
+ def load_env():
8
+ _ = load_dotenv(find_dotenv())
9
+
10
+ def get_openai_api_key():
11
+ load_env()
12
+ openai_api_key = os.getenv("OPENAI_API_KEY")
13
+ return openai_api_key
ragas_eval.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from helper import get_openai_api_key
4
+
5
+ venv_path = os.path.join(os.path.dirname(__file__), 'venv', 'Lib', 'python3.12', 'site-packages')
6
+ sys.path.append(venv_path)
7
+
8
+ os.environ["OPENAI_API_KEY"] = get_openai_api_key()
9
+
10
+ from langchain_community.document_loaders import DirectoryLoader
11
+ loader = DirectoryLoader("Ehlers-Danlos-1")
12
+ documents = loader.load()
13
+
14
+ for document in documents:
15
+ document.metadata['filename'] = document.metadata['source']
16
+
17
+ from ragas.testset.generator import TestsetGenerator
18
+ from ragas.testset.evolutions import simple, reasoning, multi_context
19
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
20
+
21
+ # generator with openai models
22
+ generator_llm = ChatOpenAI(model="gpt-3.5-turbo")
23
+ critic_llm = ChatOpenAI(model="gpt-4")
24
+ embeddings = OpenAIEmbeddings()
25
+
26
+ generator = TestsetGenerator.from_langchain(
27
+ generator_llm,
28
+ critic_llm,
29
+ embeddings
30
+ )
31
+
32
+ # generate testset
33
+ testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})
34
+ print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
35
+ print(testset)
36
+ testset.to_pandas()
test.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+
4
+ # Add the virtual environment's site-packages to sys.path
5
+ # Replace 'pythonX.Y' with your Python version, e.g., 'python3.8'
6
+ venv_path = os.path.join(os.path.dirname(__file__), 'venv', 'lib', 'site-packages')
7
+ sys.path.append(venv_path)
8
+
9
+ # Ensure the directory structure is recognized as a package
10
+ # You can verify by listing the contents of the directory
11
+ print("sys.path:", sys.path)
12
+ print("Contents of venv_path:", os.listdir(venv_path))
13
+
14
+ # Now import the TestsetGenerator
15
+ try:
16
+ from ragas.testset.generator import TestsetGenerator
17
+ print("Successfully imported TestsetGenerator.")
18
+ except ImportError as e:
19
+ print("ImportError:", e)
20
+
21
+ # Use the imported function or class
22
+ try:
23
+ generator = TestsetGenerator()
24
+ print("Successfully created a TestsetGenerator instance.")
25
+ except Exception as e:
26
+ print("Error creating TestsetGenerator instance:", e)
utils.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core import SimpleDirectoryReader
2
+ from llama_index.core.node_parser import SentenceSplitter
3
+ from llama_index.core import Settings
4
+ from llama_index.llms.openai import OpenAI
5
+ from llama_index.embeddings.openai import OpenAIEmbedding
6
+ from llama_index.core import SummaryIndex, VectorStoreIndex
7
+ from llama_index.core.tools import QueryEngineTool
8
+ from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
9
+ from llama_index.core.selectors import LLMSingleSelector
10
+
11
+ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, SummaryIndex
12
+ from llama_index.core.node_parser import SentenceSplitter
13
+ from llama_index.core.tools import FunctionTool, QueryEngineTool
14
+ from llama_index.core.vector_stores import MetadataFilters, FilterCondition
15
+ from typing import List, Optional
16
+
17
+
18
+
19
+ def get_doc_tools(
20
+ file_path: str,
21
+ name: str,
22
+ ) -> str:
23
+ """Get vector query and summary query tools from a document."""
24
+
25
+ # load documents
26
+ documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
27
+ splitter = SentenceSplitter(chunk_size=1024)
28
+ nodes = splitter.get_nodes_from_documents(documents)
29
+ vector_index = VectorStoreIndex(nodes)
30
+
31
+ def vector_query(
32
+ query: str,
33
+ page_numbers: Optional[List[str]] = None
34
+ ) -> str:
35
+ """Use to answer questions over a given paper.
36
+
37
+ Useful if you have specific questions over the paper.
38
+ Always leave page_numbers as None UNLESS there is a specific page you want to search for.
39
+
40
+ Args:
41
+ query (str): the string query to be embedded.
42
+ page_numbers (Optional[List[str]]): Filter by set of pages. Leave as NONE
43
+ if we want to perform a vector search
44
+ over all pages. Otherwise, filter by the set of specified pages.
45
+
46
+ """
47
+
48
+ page_numbers = page_numbers or []
49
+ metadata_dicts = [
50
+ {"key": "page_label", "value": p} for p in page_numbers
51
+ ]
52
+
53
+ query_engine = vector_index.as_query_engine(
54
+ similarity_top_k=2,
55
+ filters=MetadataFilters.from_dicts(
56
+ metadata_dicts,
57
+ condition=FilterCondition.OR
58
+ )
59
+ )
60
+ response = query_engine.query(query)
61
+ return response
62
+
63
+
64
+ vector_query_tool = FunctionTool.from_defaults(
65
+ name=f"vector_tool_{name}",
66
+ fn=vector_query
67
+ )
68
+
69
+ summary_index = SummaryIndex(nodes)
70
+ summary_query_engine = summary_index.as_query_engine(
71
+ response_mode="tree_summarize",
72
+ use_async=True,
73
+ )
74
+ summary_tool = QueryEngineTool.from_defaults(
75
+ name=f"summary_tool_{name}",
76
+ query_engine=summary_query_engine,
77
+ description=(
78
+ f"Useful for summarization questions related to {name}"
79
+ ),
80
+ )
81
+
82
+ return vector_query_tool, summary_tool