initial commit
Browse files- few-shot.txt +24 -0
- tagging.py +7 -31
few-shot.txt
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Example #96
|
2 |
+
Tokens: ['Public']
|
3 |
+
Skill Labels: ['O']
|
4 |
+
Knowledge Labels: ['O']
|
5 |
+
|
6 |
+
Example #97
|
7 |
+
Tokens: ['Technologies']
|
8 |
+
Skill Labels: ['O']
|
9 |
+
Knowledge Labels: ['O']
|
10 |
+
|
11 |
+
Example #98
|
12 |
+
Tokens: ['cloud', 'java', 'amazon-web-services']
|
13 |
+
Skill Labels: ['O', 'O', 'O']
|
14 |
+
Knowledge Labels: ['B', 'B', 'B']
|
15 |
+
|
16 |
+
Example #99
|
17 |
+
Tokens: ['Job', 'description']
|
18 |
+
Skill Labels: ['O', 'O']
|
19 |
+
Knowledge Labels: ['O', 'O']
|
20 |
+
|
21 |
+
Example #100
|
22 |
+
Tokens: ['As', 'a', 'member', 'of', 'our', 'Software', 'Engineering', 'Group', 'we', 'look', 'first', 'and', 'foremost', 'for', 'people', 'who', 'are', 'passionate', 'about', 'solving', 'business', 'problems', 'through', 'innovation', 'and', 'engineering', 'practices', '.']
|
23 |
+
Skill Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'I', 'I', 'I', 'I', 'I', 'O']
|
24 |
+
Knowledge Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
|
tagging.py
CHANGED
@@ -1,31 +1,3 @@
|
|
1 |
-
few_shot_examples = """
|
2 |
-
Example #96
|
3 |
-
Tokens: ['Public']
|
4 |
-
Skill Labels: ['O']
|
5 |
-
Knowledge Labels: ['O']
|
6 |
-
|
7 |
-
Example #97
|
8 |
-
Tokens: ['Technologies']
|
9 |
-
Skill Labels: ['O']
|
10 |
-
Knowledge Labels: ['O']
|
11 |
-
|
12 |
-
Example #98
|
13 |
-
Tokens: ['cloud', 'java', 'amazon-web-services']
|
14 |
-
Skill Labels: ['O', 'O', 'O']
|
15 |
-
Knowledge Labels: ['B', 'B', 'B']
|
16 |
-
|
17 |
-
Example #99
|
18 |
-
Tokens: ['Job', 'description']
|
19 |
-
Skill Labels: ['O', 'O']
|
20 |
-
Knowledge Labels: ['O', 'O']
|
21 |
-
|
22 |
-
Example #100
|
23 |
-
Tokens: ['As', 'a', 'member', 'of', 'our', 'Software', 'Engineering', 'Group', 'we', 'look', 'first', 'and', 'foremost', 'for', 'people', 'who', 'are', 'passionate', 'about', 'solving', 'business', 'problems', 'through', 'innovation', 'and', 'engineering', 'practices', '.']
|
24 |
-
Skill Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'I', 'I', 'I', 'I', 'I', 'O']
|
25 |
-
Knowledge Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
|
26 |
-
"""
|
27 |
-
|
28 |
-
|
29 |
import os
|
30 |
from langchain_openai import ChatOpenAI
|
31 |
from pydantic import BaseModel
|
@@ -41,9 +13,7 @@ from transformers import AutoTokenizer, AutoModelForTokenClassification
|
|
41 |
import torch
|
42 |
import sys
|
43 |
from tabulate import tabulate
|
44 |
-
|
45 |
load_dotenv(".env")
|
46 |
-
# ChatOpenAI.api_key = OPENAI_API_KEY
|
47 |
|
48 |
|
49 |
### LLM-based tag extraction with few-shot learning
|
@@ -60,6 +30,8 @@ model = ChatOpenAI(model_name="gpt-4o", temperature=0.0, api_key=os.getenv('OPEN
|
|
60 |
tokenizer = AutoTokenizer.from_pretrained("jjzha/jobbert_skill_extraction")
|
61 |
parser = JsonOutputParser(pydantic_object=TokenTaggingResult)
|
62 |
|
|
|
|
|
63 |
skill_definition = """
|
64 |
Skill means the ability to apply knowledge and use know-how to complete tasks and solve problems.
|
65 |
"""
|
@@ -68,6 +40,10 @@ knowledge_definition = """
|
|
68 |
Knowledge means the outcome of the assimilation of information through learning. Knowledge is the body of facts, principles, theories and practices that is related to a field of work or study.
|
69 |
"""
|
70 |
|
|
|
|
|
|
|
|
|
71 |
prompt = PromptTemplate(
|
72 |
template="""You are an expert in tagging tokens with skill and knowledge labels. Use the following definitions to tag the input tokens:
|
73 |
Skill definition:{skill_definition}
|
@@ -92,6 +68,7 @@ def extract_tags(text: str, tokenize = True) -> TokenTaggingResult:
|
|
92 |
output = parser.invoke(output)
|
93 |
return tokens, output
|
94 |
|
|
|
95 |
### Pre-trained model from Hugging Face
|
96 |
|
97 |
mapping = {0: 'B', 1: 'I', 2: 'O'}
|
@@ -114,7 +91,6 @@ def convert(text):
|
|
114 |
return skill_cls, knowledge_cls
|
115 |
|
116 |
|
117 |
-
|
118 |
if __name__ == "__main__":
|
119 |
text = input('Enter text: ')
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
from langchain_openai import ChatOpenAI
|
3 |
from pydantic import BaseModel
|
|
|
13 |
import torch
|
14 |
import sys
|
15 |
from tabulate import tabulate
|
|
|
16 |
load_dotenv(".env")
|
|
|
17 |
|
18 |
|
19 |
### LLM-based tag extraction with few-shot learning
|
|
|
30 |
tokenizer = AutoTokenizer.from_pretrained("jjzha/jobbert_skill_extraction")
|
31 |
parser = JsonOutputParser(pydantic_object=TokenTaggingResult)
|
32 |
|
33 |
+
# Definitions
|
34 |
+
|
35 |
skill_definition = """
|
36 |
Skill means the ability to apply knowledge and use know-how to complete tasks and solve problems.
|
37 |
"""
|
|
|
40 |
Knowledge means the outcome of the assimilation of information through learning. Knowledge is the body of facts, principles, theories and practices that is related to a field of work or study.
|
41 |
"""
|
42 |
|
43 |
+
# Few-shot examples
|
44 |
+
with open('few-shot.txt', 'r') as file:
|
45 |
+
few_shot_examples = file.read()
|
46 |
+
|
47 |
prompt = PromptTemplate(
|
48 |
template="""You are an expert in tagging tokens with skill and knowledge labels. Use the following definitions to tag the input tokens:
|
49 |
Skill definition:{skill_definition}
|
|
|
68 |
output = parser.invoke(output)
|
69 |
return tokens, output
|
70 |
|
71 |
+
|
72 |
### Pre-trained model from Hugging Face
|
73 |
|
74 |
mapping = {0: 'B', 1: 'I', 2: 'O'}
|
|
|
91 |
return skill_cls, knowledge_cls
|
92 |
|
93 |
|
|
|
94 |
if __name__ == "__main__":
|
95 |
text = input('Enter text: ')
|
96 |
|