awinml commited on
Commit
5482130
·
1 Parent(s): b241a5c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +4 -2
  2. utils.py +52 -5
app.py CHANGED
@@ -13,6 +13,7 @@ from utils import (
13
  extract_entities,
14
  format_query,
15
  get_flan_alpaca_xl_model,
 
16
  generate_entities_flan_alpaca,
17
  format_entities_flan_alpaca,
18
  generate_flant5_prompt_instruct_chunk_context,
@@ -56,7 +57,7 @@ with st.sidebar:
56
  ner_choice = st.selectbox("Select NER Model", ["Alpaca", "Spacy"])
57
 
58
  if ner_choice == "Alpaca":
59
- ner_model = get_flan_alpaca_xl_model()
60
  else:
61
  ner_model = get_spacy_model()
62
 
@@ -68,7 +69,8 @@ with col1:
68
  )
69
 
70
  if ner_choice == "Alpaca":
71
- entity_text = generate_entities_flan_alpaca(ner_model)
 
72
  company_ent, quarter_ent, year_ent = format_entities_flan_alpaca(entity_text)
73
  else:
74
  company_ent, quarter_ent, year_ent = extract_entities(query_text, ner_model)
 
13
  extract_entities,
14
  format_query,
15
  get_flan_alpaca_xl_model,
16
+ generate_alpaca_ner_prompt,
17
  generate_entities_flan_alpaca,
18
  format_entities_flan_alpaca,
19
  generate_flant5_prompt_instruct_chunk_context,
 
57
  ner_choice = st.selectbox("Select NER Model", ["Alpaca", "Spacy"])
58
 
59
  if ner_choice == "Alpaca":
60
+ ner_model, ner_tokenizer = get_flan_alpaca_xl_model()
61
  else:
62
  ner_model = get_spacy_model()
63
 
 
69
  )
70
 
71
  if ner_choice == "Alpaca":
72
+ ner_prompt = generate_alpaca_ner_prompt(query_text)
73
+ entity_text = generate_entities_flan_alpaca(ner_model, ner_tokenizer, ner_prompt)
74
  company_ent, quarter_ent, year_ent = format_entities_flan_alpaca(entity_text)
75
  else:
76
  company_ent, quarter_ent, year_ent = extract_entities(query_text, ner_model)
utils.py CHANGED
@@ -36,7 +36,9 @@ def get_spacy_model():
36
 
37
  @st.experimental_singleton
38
  def get_flan_alpaca_xl_model():
39
- return pipeline(model="./models/flan-alpaca-xl")
 
 
40
 
41
 
42
  # Initialize models from HuggingFace
@@ -474,10 +476,55 @@ Answer:?"""
474
 
475
  # Entity Extraction
476
 
477
- def generate_entities_flan_alpaca(model):
478
- output = model(prompt, max_length=512, temperature=0.1)
479
- generated_text = output[0]["generated_text"]
480
- return generated_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
 
482
 
483
  def format_entities_flan_alpaca(model_output):
 
36
 
37
  @st.experimental_singleton
38
  def get_flan_alpaca_xl_model():
39
+ model = AutoModelForSeq2SeqLM("./models/flan-alpaca-xl")
40
+ tokenizer = AutoTokenizer("./models/flan-alpaca-xl")
41
+ return model, tokenizer
42
 
43
 
44
  # Initialize models from HuggingFace
 
476
 
477
  # Entity Extraction
478
 
479
+ def generate_alpaca_ner_prompt(query):
480
+ prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Use the following guidelines to extract the entities representing the Company, Quarter, and Year in the sentence.
481
+
482
+ ### Instruction:
483
+ - The output should be in the form "Company - Value, Quarter - Value, Year - Value".
484
+ - The output should be in the form "Company - None, Quarter - None, Year - None", if no entities are found.
485
+ - Only use entities that exist in the final sentence.
486
+ - If Company cannot be found in the sentence, return "none" for that entity.
487
+ - If Quarter cannot be found in the sentence, return "none" for that entity.
488
+ - If Year cannot be found in the sentence, return "none" for that entity.
489
+ - If there is ambiguity finding the entity, return "none" for that entity.
490
+
491
+ ### Input:
492
+
493
+ What was discussed regarding Wearables revenue performance in Apple's Q3 2023 earnings call?
494
+ Company - Apple, Quarter - Q3, Year - 2023
495
+
496
+ How has the growth in Q1 been for the PC market as seen by AMD?
497
+ Company - AMD, Quarter - Q1, Year - none
498
+
499
+ What was the long term view on GOOGL's cloud business growth as discussed in their earnings call?
500
+ Company - Google, Quarter - none, Year - none
501
+
502
+ What is Nvidia's visibility in the data center business in 2020?
503
+ Company - Nvidia, Quarter - none, Year - 2020
504
+
505
+ What are the opportunities and challenges in the Indian market that Amazon is facing?
506
+ Company - Amazon, Quarter - none, Year - none
507
+
508
+ What did the Analysts ask about CSCO's cybersecurity business in the earnings call?
509
+ Company - Cisco, Quarter - none, Year - none
510
+
511
+
512
+ {query}
513
+ ### Response:"""
514
+ return prompt
515
+
516
+
517
+ def generate_entities_flan_alpaca(model, tokenizer, prompt):
518
+ model_inputs = tokenizer(prompt, return_tensors="pt")
519
+ input_ids = inputs["input_ids"]
520
+ generation_output = model.generate(
521
+ input_ids=input_ids,
522
+ temperature=0.1,
523
+ top_p=0.5,
524
+ max_new_tokens=1024,
525
+ )
526
+ output = tokenizer.decode(generation_output[0], skip_special_tokens=True)
527
+ return output
528
 
529
 
530
  def format_entities_flan_alpaca(model_output):