Robzy commited on
Commit
983a2d4
·
1 Parent(s): 8fe7f88

initial commit

Browse files
Files changed (2) hide show
  1. few-shot.txt +24 -0
  2. tagging.py +7 -31
few-shot.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Example #96
2
+ Tokens: ['Public']
3
+ Skill Labels: ['O']
4
+ Knowledge Labels: ['O']
5
+
6
+ Example #97
7
+ Tokens: ['Technologies']
8
+ Skill Labels: ['O']
9
+ Knowledge Labels: ['O']
10
+
11
+ Example #98
12
+ Tokens: ['cloud', 'java', 'amazon-web-services']
13
+ Skill Labels: ['O', 'O', 'O']
14
+ Knowledge Labels: ['B', 'B', 'B']
15
+
16
+ Example #99
17
+ Tokens: ['Job', 'description']
18
+ Skill Labels: ['O', 'O']
19
+ Knowledge Labels: ['O', 'O']
20
+
21
+ Example #100
22
+ Tokens: ['As', 'a', 'member', 'of', 'our', 'Software', 'Engineering', 'Group', 'we', 'look', 'first', 'and', 'foremost', 'for', 'people', 'who', 'are', 'passionate', 'about', 'solving', 'business', 'problems', 'through', 'innovation', 'and', 'engineering', 'practices', '.']
23
+ Skill Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'I', 'I', 'I', 'I', 'I', 'O']
24
+ Knowledge Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
tagging.py CHANGED
@@ -1,31 +1,3 @@
1
- few_shot_examples = """
2
- Example #96
3
- Tokens: ['Public']
4
- Skill Labels: ['O']
5
- Knowledge Labels: ['O']
6
-
7
- Example #97
8
- Tokens: ['Technologies']
9
- Skill Labels: ['O']
10
- Knowledge Labels: ['O']
11
-
12
- Example #98
13
- Tokens: ['cloud', 'java', 'amazon-web-services']
14
- Skill Labels: ['O', 'O', 'O']
15
- Knowledge Labels: ['B', 'B', 'B']
16
-
17
- Example #99
18
- Tokens: ['Job', 'description']
19
- Skill Labels: ['O', 'O']
20
- Knowledge Labels: ['O', 'O']
21
-
22
- Example #100
23
- Tokens: ['As', 'a', 'member', 'of', 'our', 'Software', 'Engineering', 'Group', 'we', 'look', 'first', 'and', 'foremost', 'for', 'people', 'who', 'are', 'passionate', 'about', 'solving', 'business', 'problems', 'through', 'innovation', 'and', 'engineering', 'practices', '.']
24
- Skill Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B', 'I', 'I', 'I', 'I', 'I', 'I', 'I', 'O']
25
- Knowledge Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
26
- """
27
-
28
-
29
  import os
30
  from langchain_openai import ChatOpenAI
31
  from pydantic import BaseModel
@@ -41,9 +13,7 @@ from transformers import AutoTokenizer, AutoModelForTokenClassification
41
  import torch
42
  import sys
43
  from tabulate import tabulate
44
-
45
  load_dotenv(".env")
46
- # ChatOpenAI.api_key = OPENAI_API_KEY
47
 
48
 
49
  ### LLM-based tag extraction with few-shot learning
@@ -60,6 +30,8 @@ model = ChatOpenAI(model_name="gpt-4o", temperature=0.0, api_key=os.getenv('OPEN
60
  tokenizer = AutoTokenizer.from_pretrained("jjzha/jobbert_skill_extraction")
61
  parser = JsonOutputParser(pydantic_object=TokenTaggingResult)
62
 
 
 
63
  skill_definition = """
64
  Skill means the ability to apply knowledge and use know-how to complete tasks and solve problems.
65
  """
@@ -68,6 +40,10 @@ knowledge_definition = """
68
  Knowledge means the outcome of the assimilation of information through learning. Knowledge is the body of facts, principles, theories and practices that is related to a field of work or study.
69
  """
70
 
 
 
 
 
71
  prompt = PromptTemplate(
72
  template="""You are an expert in tagging tokens with skill and knowledge labels. Use the following definitions to tag the input tokens:
73
  Skill definition:{skill_definition}
@@ -92,6 +68,7 @@ def extract_tags(text: str, tokenize = True) -> TokenTaggingResult:
92
  output = parser.invoke(output)
93
  return tokens, output
94
 
 
95
  ### Pre-trained model from Hugging Face
96
 
97
  mapping = {0: 'B', 1: 'I', 2: 'O'}
@@ -114,7 +91,6 @@ def convert(text):
114
  return skill_cls, knowledge_cls
115
 
116
 
117
-
118
  if __name__ == "__main__":
119
  text = input('Enter text: ')
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  from langchain_openai import ChatOpenAI
3
  from pydantic import BaseModel
 
13
  import torch
14
  import sys
15
  from tabulate import tabulate
 
16
  load_dotenv(".env")
 
17
 
18
 
19
  ### LLM-based tag extraction with few-shot learning
 
30
  tokenizer = AutoTokenizer.from_pretrained("jjzha/jobbert_skill_extraction")
31
  parser = JsonOutputParser(pydantic_object=TokenTaggingResult)
32
 
33
+ # Definitions
34
+
35
  skill_definition = """
36
  Skill means the ability to apply knowledge and use know-how to complete tasks and solve problems.
37
  """
 
40
  Knowledge means the outcome of the assimilation of information through learning. Knowledge is the body of facts, principles, theories and practices that is related to a field of work or study.
41
  """
42
 
43
+ # Few-shot examples
44
+ with open('few-shot.txt', 'r') as file:
45
+ few_shot_examples = file.read()
46
+
47
  prompt = PromptTemplate(
48
  template="""You are an expert in tagging tokens with skill and knowledge labels. Use the following definitions to tag the input tokens:
49
  Skill definition:{skill_definition}
 
68
  output = parser.invoke(output)
69
  return tokens, output
70
 
71
+
72
  ### Pre-trained model from Hugging Face
73
 
74
  mapping = {0: 'B', 1: 'I', 2: 'O'}
 
91
  return skill_cls, knowledge_cls
92
 
93
 
 
94
  if __name__ == "__main__":
95
  text = input('Enter text: ')
96