verified_categories

#32
Files changed (3) hide show
  1. app.py +4 -0
  2. requirements.txt +2 -1
  3. src/verified_categorie.py +127 -0
app.py CHANGED
@@ -17,6 +17,7 @@ from src.utils import init_map
17
  from src.map_utils import get_legend_macro
18
  from src.dataframes import load_data
19
  import gettext
 
20
 
21
  gettext.install("myapplication")
22
 
@@ -85,6 +86,9 @@ selected_options, options, show_unverified, show_interventions = show_requests_f
85
  len_solved_verified_requests,
86
  ) = load_data(show_unverified, selected_options, options)
87
 
 
 
 
88
  # Selection of interventions
89
  selected_statuses = show_interventions_filters()
90
 
 
17
  from src.map_utils import get_legend_macro
18
  from src.dataframes import load_data
19
  import gettext
20
+ from src.verified_categorie import add_category, string_category
21
 
22
  gettext.install("myapplication")
23
 
 
86
  len_solved_verified_requests,
87
  ) = load_data(show_unverified, selected_options, options)
88
 
89
+ verified_df = add_category(verified_df)
90
+ verified_df = string_category(verified_df)
91
+
92
  # Selection of interventions
93
  selected_statuses = show_interventions_filters()
94
 
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  folium
2
- streamlit_folium
 
 
1
  folium
2
+ streamlit_folium
3
+ nltk
src/verified_categorie.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from enum import Enum
3
+ import pandas as pd
4
+ import nltk
5
+ from nltk.stem import WordNetLemmatizer
6
+
7
+
8
+ PHRASE_NO_PROBLEMS = ['got food',
9
+ 'got food and clothes',
10
+ 'got food and covers']
11
+
12
+ KEYS_HOUSE = [
13
+ "shelters",
14
+ "mattresses",
15
+ "pillows",
16
+ "blankets",
17
+ "shelter",
18
+ "tentes",
19
+ "housing",
20
+ "couvertures",
21
+ "tents",
22
+ "covers",
23
+ "sdader",
24
+ "housing_shelter",
25
+ ]
26
+ KEYS_FOOD = [
27
+ "groceries",
28
+ "nouriture",
29
+ "food",
30
+ "water",
31
+ "gaz",
32
+ "dishes",
33
+ "oil",
34
+ "sugar",
35
+ "tea",
36
+ "hungry",
37
+ ]
38
+ KEYS_CLOTHES = [
39
+ "clothes",
40
+ "clothing",
41
+ "hygiene",
42
+ ]
43
+ KEYS_MEDICAL = [
44
+ "betadine",
45
+ "medical",
46
+ "diabetics",
47
+ "medicaments",
48
+ "diabetes",
49
+ "doliprane",
50
+ "vitamines",
51
+ "drugs",
52
+ ]
53
+
54
+ class HelpCategory(Enum):
55
+ HOUSE = 'house'
56
+ FOOD = 'food'
57
+ CLOTHES = 'clothes'
58
+ MEDICAL = 'medical'
59
+ UNKNOW = 'unknow'
60
+
61
+
62
+ nltk.download('wordnet')
63
+ nltk.download('omw-1.4')
64
+ lemmatizer = WordNetLemmatizer()
65
+
66
+ lemmatize_house = [lemmatizer.lemmatize(word) for word in KEYS_HOUSE]
67
+ lemmatize_food = [lemmatizer.lemmatize(word) for word in KEYS_FOOD]
68
+ lemmatize_clothes = [lemmatizer.lemmatize(word) for word in KEYS_CLOTHES]
69
+ lemmatize_medical = [lemmatizer.lemmatize(word) for word in KEYS_MEDICAL]
70
+
71
+ def to_category(text: str) -> List[HelpCategory]:
72
+ if text in PHRASE_NO_PROBLEMS:
73
+ return []
74
+
75
+ words = text.split()
76
+ categories = []
77
+ for word in words:
78
+ if word in KEYS_HOUSE:
79
+ categories.append(HelpCategory.HOUSE)
80
+ elif word in KEYS_FOOD:
81
+ categories.append(HelpCategory.FOOD)
82
+ if word in KEYS_CLOTHES:
83
+ categories.append(HelpCategory.CLOTHES)
84
+ if word in KEYS_MEDICAL:
85
+ categories.append(HelpCategory.MEDICAL)
86
+ if lemmatizer.lemmatize(word) in lemmatize_house:
87
+ categories.append(HelpCategory.HOUSE)
88
+ if lemmatizer.lemmatize(word) in lemmatize_food:
89
+ categories.append(HelpCategory.FOOD)
90
+ if lemmatizer.lemmatize(word) in lemmatize_clothes:
91
+ categories.append(HelpCategory.CLOTHES)
92
+ if lemmatizer.lemmatize(word) in lemmatize_medical:
93
+ categories.append(HelpCategory.MEDICAL)
94
+ if len(categories) == 0:
95
+ categories = [HelpCategory.UNKNOW]
96
+ return categories
97
+
98
+
99
+ def clean(text: str) -> str:
100
+ text = text.replace('Housing/Shelter', 'housing_shelter')
101
+ text = text.replace('/', ',')
102
+ text = text.lower()
103
+ text = text.strip()
104
+ return text
105
+
106
+
107
+ def to_list(text: str) -> List[str]:
108
+ helps = text.split(',')
109
+ helps = [help_string.replace('.', ' ').strip() for help_string in helps]
110
+ return helps
111
+
112
+
113
+ def help_text_to_help_category(helps: List[str]) -> List[str]:
114
+ all_categories = set()
115
+ for help_string in helps:
116
+ categories = to_category(help_string)
117
+ all_categories.update(categories)
118
+ return list(all_categories)
119
+
120
+
121
+ def add_category(df:pd.DataFrame) -> pd.DataFrame:
122
+ df['help_category'] = df['Help Details'].apply(clean).apply(to_list).apply(help_text_to_help_category)
123
+ return df
124
+
125
+ def string_category(df:pd.DataFrame) -> pd.DataFrame:
126
+ df['help_category'] = df['help_category'].apply(lambda x : ','.join([category.value for category in x]))
127
+ return df