talk-to-me

Sleeping

App Files Files Community

YingxuHe commited on Aug 14, 2023

Commit

a619426

1 Parent(s): ae714e8

update loader

Browse files

Files changed (3) hide show

app.py +20 -33
He Yingxu_2806.pdf → docs/He Yingxu_2806.pdf +0 -0
docs/resume.md +90 -0

app.py CHANGED Viewed

@@ -2,11 +2,10 @@ import gradio as gr
 import os
 import time
-from langchain.document_loaders import OnlinePDFLoader, PyPDFLoader
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.llms import OpenAI
 from langchain.embeddings import OpenAIEmbeddings
@@ -27,30 +26,25 @@ Follow Up Input: {question}
 CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(_template)
-def loading_pdf():
-    return "Loading..."
-def pdf_changes():
-    loader = PyPDFLoader("He Yingxu_2806.pdf")
-    documents = loader.load()
-    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-    texts = text_splitter.split_documents(documents)
-    embeddings = OpenAIEmbeddings()
-    db = Chroma.from_documents(texts, embeddings)
-    retriever = db.as_retriever()
-    global qa
-    qa = ConversationalRetrievalChain.from_llm(
-        llm=OpenAI(temperature=0.5),
-        retriever=retriever,
-        condense_question_prompt=CUSTOM_QUESTION_PROMPT,
-        return_source_documents=False)
-    return "Ready"
 def add_text(history, text):
     history = history + [(text, None)]
     return history, ""
 def bot(history):
     print(history)
     response = infer(history[-1][0], history)
@@ -76,7 +70,8 @@ def infer(question, history):
     #print(result)
     return result["answer"]
-css="""
 #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
 """
@@ -93,23 +88,15 @@ title = """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.HTML(title)
-        with gr.Column():
-            # openai_key = gr.Textbox(label="You OpenAI API key", type="password")
-            # pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
-            with gr.Row():
-                langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
-                load_pdf = gr.Button("Load pdf to langchain")
         chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
         question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
         submit_btn = gr.Button("Send Message")
-    load_pdf.click(loading_pdf, None, langchain_status, queue=False)
-    load_pdf.click(pdf_changes, inputs=[], outputs=[langchain_status], queue=False)
     question.submit(add_text, [chatbot, question], [chatbot, question]).then(
         bot, chatbot, chatbot
     )
     submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
         bot, chatbot, chatbot)
-demo.launch()

 import os
 import time
+from langchain.document_loaders import UnstructuredMarkdownLoader
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.llms import OpenAI
 from langchain.embeddings import OpenAIEmbeddings
 CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(_template)
+loader = UnstructuredMarkdownLoader('docs/resume.md')
+documents = loader.load()
+text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+texts = text_splitter.split_documents(documents)
+embeddings = OpenAIEmbeddings()
+db = Chroma.from_documents(texts, embeddings)
+retriever = db.as_retriever()
+qa = ConversationalRetrievalChain.from_llm(
+    llm=OpenAI(temperature=0.3),
+    retriever=retriever,
+    condense_question_prompt=CUSTOM_QUESTION_PROMPT,
+    return_source_documents=False)
 def add_text(history, text):
     history = history + [(text, None)]
     return history, ""
 def bot(history):
     print(history)
     response = infer(history[-1][0], history)
     #print(result)
     return result["answer"]
+css = """
 #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.HTML(title)
         chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
         question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
         submit_btn = gr.Button("Send Message")
     question.submit(add_text, [chatbot, question], [chatbot, question]).then(
         bot, chatbot, chatbot
     )
     submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
         bot, chatbot, chatbot)
+demo.launch()

He Yingxu_2806.pdf → docs/He Yingxu_2806.pdf RENAMED Viewed

File without changes

docs/resume.md ADDED Viewed

	@@ -0,0 +1,90 @@

+# personal information
+## identification
+Singapore Permanent Resident|Chinese citizen
+## address
+17 Jalan Masjid, Singapore
+## contact
+yingxu.he1998@gmail.com|+65 91752741|+86 15063250971
+# Working Experience
+## Machine Learning Engineer at Huawei Ltd.
+• from Dec 2022 to present
+• Built a pipeline to automatically visualize data tables using LSTM network trained on ChatGPT-generated
+data with pairwise loss method, achieving 80% recall@5 on 100+ internal test cases.
+• Designed and implemented a novel SISR method that enhanced WIFI-signal simulations for office buildings
+by achieving 10x speedup compared to physics-based simulation with negligible loss in accuracy (1% MAE)
+on over 80 large-scale office layouts.
+## Machine Learning Research Engineer at Dyson Ltd.
+• from Sept 2021 to Dec 2022
+• Implemented an object localization model in a few -shot context by semi -supervised training. The model
+achieved comparable results to professional  software with improved adaptability and robustness .
+• Designed and implemented  an air quality estimation model, using  LGBM, Bayesian Regression, etc., with
+geographical and meteorological features . Demonstrat ed its advantages over spatial interpolated methods
+and deployed  the pipeline with Metaflow framework on AWS services.
+## ML Research Assistant at NUS -Singtel Cyber  Security Lab
+• from Sept 2020 to July 2021
+• Identif ied anomalies from system logs leveraging  DBSCAN  and hierarchical clustering  for model training .
+• Developed an information retrieval  method  for web -attack  strategy identification  from system and firewall
+logs. The recall@3 rate achieved 80% on 100+ hand -labelled samples .
+## Data Analyst Intern at GIC Pte. Ltd.
+• from Dec 2018 to July 2019
+• Deployed an R application that forecasts the mid -term returns of portfolio  with  visualization using R shiny .
+• Optimized the coefficients of a mean reversion forecasting model using the Genetic Algorithm.
+## Data Analyst Intern at PropertyGuru
+• from May 2018 to Aug 2018
+• Developed dashboard s in Tableau to analyze the user behaviors and listings’ performance to better match
+user demand to agents’ recommendations.
+• Implemented a POC to calculate and geographically visualize the liveability  score for properties .
+# Education
+## Master of Computing in Artificial Intelligence at National University of Singapore
+• from Aug 2020 to Sept 2021
+• School of Computing : CAP 4.42/5.0
+• Teaching Assistant : Advanced Analytics and Machine Learning (from Jan 2021 to May 2021)
+## Bachelor of Science (Hons) in Business Analytics at National University of Singapore
+• from Aug 2016 to June 2020
+• School of Computing : CAP 4.15/5.0 , Dean’s List in Semester 3 AY 2018/2019
+• Distinction : Analytics Techniques Knowledge Area (awarded in Dec 2020)
+• Teaching Assistant : Programming Methodology in python (from Aug 2017 to June 2018)
+# Relevant  Projects
+## Distilling ChatGPT for finetuning image captioning models
+• from Jan 2023 to Present
+• Employed Chain -of-Thought with verification prompting technique on ChatGPT to create 10k+ accurate
+capt ions from the xView annotations. Fine -tuned a GIT image captioning model  and significantly improved
+the CIDE r score from 11.59 to 85.93 over 2k RSICD samples.
+## Dialogue  Response  Generation ( Master Thesis ) at NUS NExT++ Lab
+• from Nov 2020 to Aug 2021
+• Built an enriched task -oriented response generation by implementing copy -mechanism on GPT -2 using
+Pytorch. The proposed model is capable of naturally incorporating external tips/user reviews about venues
+into responses. The generated response outperforms m any state -of-the-art models on user satisfaction.
+## Property Resale Price Prediction
+• from Jan 2021 to May 2021
+• Fitted CatBoost, LGBM, XGBoost on 43k pieces of property sales data. Selected  features by correlation  and
+information gain. Engineered new features describing properties’ livability. Reduce d data dimensionality
+with WOE encoding. The f inal ensemble methods’ accuracy achieved 5th/64 place.
+# Skills
+• Python (Pytorch, Tensorflow), R : Machine
+Learning, Deep Learning , Data processing
+• SQL, Spark:  Data query and  big data
+• Tableau, PowerBI : Visualization development
+• Java, Git, Scala, JavaScript, HTML, CSS : Software
+Development