import streamlit as st from setfit import SetFitModel from file_processing import get_paragraphs ####################################### Dashboard ###################################################### # App st.title("Identify references to vulnerable groups.") st.write("""Vulnerable groups encompass various communities and individuals who are disproportionately affected by the impacts of climate change due to their socioeconomic status, geographical location, or inherent characteristics. By incorporating the needs and perspectives of these groups into national climate policies, governments can ensure equitable outcomes, promote social justice, and strive to build resilience within the most marginalized populations, fostering a more sustainable and inclusive society as we navigate the challenges posed by climate change.This app allows you to identify whether a text contains any references to vulnerable groups, for example when talking about policy documents.""") # Document upload uploaded_file = st.file_uploader("Upload your file here") # Create text input box #input_text = st.text_area(label='Please enter your text here', value="This policy has been implemented to support women.") #st.write('Prediction:', model(input_text)) ######################################### Model ######################################################### # Load the model model = SetFitModel.from_pretrained("leavoigt/vulnerable_groups") # Define the classes id2label = { 0: 'Agricultural communities', 1: 'Children and Youth', 2: 'Coastal communities', 3: 'Drought-prone regions', 4: 'Economically disadvantaged communities', 5: 'Elderly population', 6: 'Ethnic minorities and indigenous people', 7: 'Informal sector workers', 8: 'Migrants and Refugees', 9: 'Other', 10: 'People with Disabilities', 11: 'Rural populations', 12: 'Sexual minorities (LGBTQI+)', 13: 'Urban populations', 14: 'Women'} ### Process document to paragraphs # Source: https://blog.jcharistech.com/2021/01/21/how-to-save-uploaded-files-to-directory-in-streamlit-apps/ # Store uploaded file temporarily in directory to get file path (necessary for processing) # def save_uploadedfile(upl_file): # with open(os.path.join("tempDir",upl_file.name),"wb") as f: # f.write(upl_file.getbuffer()) # return st.success("Saved File:{} to tempDir".format(upl_file.name)) # if uploaded_file is not None: # # Save the file # file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type} # save_uploadedfile(uploaded_file) # #Get the file path file = st.file_uploader("File upload", type=["pdf"]) if uploaded_file is not None: # Retrieve the file name with tempfile.NamedTemporaryFile(mode="wb") as temp: bytes_data = files.getvalue() temp.write(bytes_data) print(temp.name) # # Process file # par_list = get_paragraphs(uploaded_file) # ### Make predictions # preds = vg_model(par_list) # # Get label names # preds_list = preds.tolist() # predictions_names=[] # # loop through each prediction # for ele in preds_list: # try: # index_of_one = ele.index(1) # except ValueError: # index_of_one = "NA" # if index_of_one != "NA": # name = id2label[index_of_one] # else: # name = "NA" # predictions_names.append(name) # # Combine the paragraphs and labels to a dataframe # df_predictions = pd.DataFrame({'Paragraph': par_list, 'Prediction': predictions_names}) # # Drop all "Other" and "NA" predictions # filtered_df = df[df['Prediction'].isin(['Other', 'NA'])] # ##################################### # st.write(df)