Spaces:
Sleeping
Sleeping
update loader
Browse files- app.py +20 -33
- He Yingxu_2806.pdf → docs/He Yingxu_2806.pdf +0 -0
- docs/resume.md +90 -0
app.py
CHANGED
@@ -2,11 +2,10 @@ import gradio as gr
|
|
2 |
import os
|
3 |
import time
|
4 |
|
5 |
-
from langchain.document_loaders import
|
6 |
|
7 |
from langchain.text_splitter import CharacterTextSplitter
|
8 |
|
9 |
-
|
10 |
from langchain.llms import OpenAI
|
11 |
|
12 |
from langchain.embeddings import OpenAIEmbeddings
|
@@ -27,30 +26,25 @@ Follow Up Input: {question}
|
|
27 |
|
28 |
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(_template)
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
qa = ConversationalRetrievalChain.from_llm(
|
44 |
-
llm=OpenAI(temperature=0.5),
|
45 |
-
retriever=retriever,
|
46 |
-
condense_question_prompt=CUSTOM_QUESTION_PROMPT,
|
47 |
-
return_source_documents=False)
|
48 |
-
return "Ready"
|
49 |
|
50 |
def add_text(history, text):
|
51 |
history = history + [(text, None)]
|
52 |
return history, ""
|
53 |
|
|
|
54 |
def bot(history):
|
55 |
print(history)
|
56 |
response = infer(history[-1][0], history)
|
@@ -76,7 +70,8 @@ def infer(question, history):
|
|
76 |
#print(result)
|
77 |
return result["answer"]
|
78 |
|
79 |
-
|
|
|
80 |
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
|
81 |
"""
|
82 |
|
@@ -93,23 +88,15 @@ title = """
|
|
93 |
with gr.Blocks(css=css) as demo:
|
94 |
with gr.Column(elem_id="col-container"):
|
95 |
gr.HTML(title)
|
96 |
-
|
97 |
-
with gr.Column():
|
98 |
-
# openai_key = gr.Textbox(label="You OpenAI API key", type="password")
|
99 |
-
# pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
|
100 |
-
with gr.Row():
|
101 |
-
langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
|
102 |
-
load_pdf = gr.Button("Load pdf to langchain")
|
103 |
-
|
104 |
chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
|
105 |
question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
|
106 |
submit_btn = gr.Button("Send Message")
|
107 |
-
|
108 |
-
load_pdf.click(pdf_changes, inputs=[], outputs=[langchain_status], queue=False)
|
109 |
question.submit(add_text, [chatbot, question], [chatbot, question]).then(
|
110 |
bot, chatbot, chatbot
|
111 |
)
|
112 |
submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
|
113 |
bot, chatbot, chatbot)
|
114 |
|
115 |
-
demo.launch()
|
|
|
2 |
import os
|
3 |
import time
|
4 |
|
5 |
+
from langchain.document_loaders import UnstructuredMarkdownLoader
|
6 |
|
7 |
from langchain.text_splitter import CharacterTextSplitter
|
8 |
|
|
|
9 |
from langchain.llms import OpenAI
|
10 |
|
11 |
from langchain.embeddings import OpenAIEmbeddings
|
|
|
26 |
|
27 |
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(_template)
|
28 |
|
29 |
+
loader = UnstructuredMarkdownLoader('docs/resume.md')
|
30 |
+
documents = loader.load()
|
31 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
32 |
+
texts = text_splitter.split_documents(documents)
|
33 |
+
embeddings = OpenAIEmbeddings()
|
34 |
+
db = Chroma.from_documents(texts, embeddings)
|
35 |
+
retriever = db.as_retriever()
|
36 |
+
qa = ConversationalRetrievalChain.from_llm(
|
37 |
+
llm=OpenAI(temperature=0.3),
|
38 |
+
retriever=retriever,
|
39 |
+
condense_question_prompt=CUSTOM_QUESTION_PROMPT,
|
40 |
+
return_source_documents=False)
|
41 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
def add_text(history, text):
|
44 |
history = history + [(text, None)]
|
45 |
return history, ""
|
46 |
|
47 |
+
|
48 |
def bot(history):
|
49 |
print(history)
|
50 |
response = infer(history[-1][0], history)
|
|
|
70 |
#print(result)
|
71 |
return result["answer"]
|
72 |
|
73 |
+
|
74 |
+
css = """
|
75 |
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
|
76 |
"""
|
77 |
|
|
|
88 |
with gr.Blocks(css=css) as demo:
|
89 |
with gr.Column(elem_id="col-container"):
|
90 |
gr.HTML(title)
|
91 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
|
93 |
question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
|
94 |
submit_btn = gr.Button("Send Message")
|
95 |
+
|
|
|
96 |
question.submit(add_text, [chatbot, question], [chatbot, question]).then(
|
97 |
bot, chatbot, chatbot
|
98 |
)
|
99 |
submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
|
100 |
bot, chatbot, chatbot)
|
101 |
|
102 |
+
demo.launch()
|
He Yingxu_2806.pdf → docs/He Yingxu_2806.pdf
RENAMED
File without changes
|
docs/resume.md
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# personal information
|
2 |
+
## identification
|
3 |
+
Singapore Permanent Resident|Chinese citizen
|
4 |
+
|
5 |
+
## address
|
6 |
+
17 Jalan Masjid, Singapore
|
7 |
+
|
8 |
+
## contact
|
9 |
+
yingxu.he1998@gmail.com|+65 91752741|+86 15063250971
|
10 |
+
|
11 |
+
# Working Experience
|
12 |
+
## Machine Learning Engineer at Huawei Ltd.
|
13 |
+
• from Dec 2022 to present
|
14 |
+
|
15 |
+
• Built a pipeline to automatically visualize data tables using LSTM network trained on ChatGPT-generated
|
16 |
+
data with pairwise loss method, achieving 80% recall@5 on 100+ internal test cases.
|
17 |
+
|
18 |
+
• Designed and implemented a novel SISR method that enhanced WIFI-signal simulations for office buildings
|
19 |
+
by achieving 10x speedup compared to physics-based simulation with negligible loss in accuracy (1% MAE)
|
20 |
+
on over 80 large-scale office layouts.
|
21 |
+
|
22 |
+
## Machine Learning Research Engineer at Dyson Ltd.
|
23 |
+
• from Sept 2021 to Dec 2022
|
24 |
+
|
25 |
+
• Implemented an object localization model in a few -shot context by semi -supervised training. The model
|
26 |
+
achieved comparable results to professional software with improved adaptability and robustness .
|
27 |
+
|
28 |
+
• Designed and implemented an air quality estimation model, using LGBM, Bayesian Regression, etc., with
|
29 |
+
geographical and meteorological features . Demonstrat ed its advantages over spatial interpolated methods
|
30 |
+
and deployed the pipeline with Metaflow framework on AWS services.
|
31 |
+
|
32 |
+
## ML Research Assistant at NUS -Singtel Cyber Security Lab
|
33 |
+
• from Sept 2020 to July 2021
|
34 |
+
|
35 |
+
• Identif ied anomalies from system logs leveraging DBSCAN and hierarchical clustering for model training .
|
36 |
+
|
37 |
+
• Developed an information retrieval method for web -attack strategy identification from system and firewall
|
38 |
+
logs. The recall@3 rate achieved 80% on 100+ hand -labelled samples .
|
39 |
+
|
40 |
+
## Data Analyst Intern at GIC Pte. Ltd.
|
41 |
+
• from Dec 2018 to July 2019
|
42 |
+
|
43 |
+
• Deployed an R application that forecasts the mid -term returns of portfolio with visualization using R shiny .
|
44 |
+
|
45 |
+
• Optimized the coefficients of a mean reversion forecasting model using the Genetic Algorithm.
|
46 |
+
|
47 |
+
## Data Analyst Intern at PropertyGuru
|
48 |
+
• from May 2018 to Aug 2018
|
49 |
+
|
50 |
+
• Developed dashboard s in Tableau to analyze the user behaviors and listings’ performance to better match
|
51 |
+
user demand to agents’ recommendations.
|
52 |
+
|
53 |
+
• Implemented a POC to calculate and geographically visualize the liveability score for properties .
|
54 |
+
|
55 |
+
# Education
|
56 |
+
## Master of Computing in Artificial Intelligence at National University of Singapore
|
57 |
+
• from Aug 2020 to Sept 2021
|
58 |
+
• School of Computing : CAP 4.42/5.0
|
59 |
+
• Teaching Assistant : Advanced Analytics and Machine Learning (from Jan 2021 to May 2021)
|
60 |
+
|
61 |
+
## Bachelor of Science (Hons) in Business Analytics at National University of Singapore
|
62 |
+
• from Aug 2016 to June 2020
|
63 |
+
• School of Computing : CAP 4.15/5.0 , Dean’s List in Semester 3 AY 2018/2019
|
64 |
+
• Distinction : Analytics Techniques Knowledge Area (awarded in Dec 2020)
|
65 |
+
• Teaching Assistant : Programming Methodology in python (from Aug 2017 to June 2018)
|
66 |
+
|
67 |
+
# Relevant Projects
|
68 |
+
## Distilling ChatGPT for finetuning image captioning models
|
69 |
+
• from Jan 2023 to Present
|
70 |
+
• Employed Chain -of-Thought with verification prompting technique on ChatGPT to create 10k+ accurate
|
71 |
+
capt ions from the xView annotations. Fine -tuned a GIT image captioning model and significantly improved
|
72 |
+
the CIDE r score from 11.59 to 85.93 over 2k RSICD samples.
|
73 |
+
## Dialogue Response Generation ( Master Thesis ) at NUS NExT++ Lab
|
74 |
+
• from Nov 2020 to Aug 2021
|
75 |
+
• Built an enriched task -oriented response generation by implementing copy -mechanism on GPT -2 using
|
76 |
+
Pytorch. The proposed model is capable of naturally incorporating external tips/user reviews about venues
|
77 |
+
into responses. The generated response outperforms m any state -of-the-art models on user satisfaction.
|
78 |
+
## Property Resale Price Prediction
|
79 |
+
• from Jan 2021 to May 2021
|
80 |
+
• Fitted CatBoost, LGBM, XGBoost on 43k pieces of property sales data. Selected features by correlation and
|
81 |
+
information gain. Engineered new features describing properties’ livability. Reduce d data dimensionality
|
82 |
+
with WOE encoding. The f inal ensemble methods’ accuracy achieved 5th/64 place.
|
83 |
+
|
84 |
+
# Skills
|
85 |
+
• Python (Pytorch, Tensorflow), R : Machine
|
86 |
+
Learning, Deep Learning , Data processing
|
87 |
+
• SQL, Spark: Data query and big data
|
88 |
+
• Tableau, PowerBI : Visualization development
|
89 |
+
• Java, Git, Scala, JavaScript, HTML, CSS : Software
|
90 |
+
Development
|