This view is limited to 50 files because it contains too many changes.  See the raw diff here.
.dockerignore DELETED
@@ -1,13 +0,0 @@
1
- **/.git
2
- **/.gitignore
3
- **/.vscode
4
- **/coverage
5
- **/.aws
6
- **/.ssh
7
- **/.terraform
8
- Dockerfile
9
- README.md
10
- docker-compose.yml
11
- **/.DS_Store
12
- **/venv
13
- **/env
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.env.example DELETED
@@ -1,6 +0,0 @@
1
- OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxx
2
- HF_TOKEN_PATRICK=hf_xxxxxxxxxxxxxxxxx
3
- ZOTERO_LIBRARY_ID=1120xxxx
4
- ZOTERO_API_ACCESS_KEY=Ky5RGxxxxxxxxxxxxxxxxxx
5
- GRADIO_URL_=http://gradio:7860/
6
-
 
 
 
 
 
 
 
.flake8 DELETED
@@ -1,5 +0,0 @@
1
- [flake8]
2
- ignore = D203, E402, F403, F405, W503, W605
3
- exclude = .git,env,__pycache__,docs/source/conf.py,old,build,dist, *migrations*,env,venv,alembic
4
- max-complexity = 10
5
- max-line-length = 119
 
 
 
 
 
 
.gitattributes CHANGED
@@ -36,4 +36,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  *db* filter=lfs diff=lfs merge=lfs -text
37
  vaccine_coverage_study.db filter=lfs diff=lfs merge=lfs -text
38
  *.db filter=lfs diff=lfs merge=lfs -text
39
- *.pdf filter=lfs diff=lfs merge=lfs -text
 
36
  *db* filter=lfs diff=lfs merge=lfs -text
37
  vaccine_coverage_study.db filter=lfs diff=lfs merge=lfs -text
38
  *.db filter=lfs diff=lfs merge=lfs -text
 
.gitignore DELETED
@@ -1,187 +0,0 @@
1
- ### Python ###
2
- # Byte-compiled / optimized / DLL files
3
- __pycache__/
4
- *.py[cod]
5
- *$py.class
6
-
7
- # C extensions
8
- *.so
9
-
10
- # Distribution / packaging
11
- .Python
12
- build/
13
- develop-eggs/
14
- dist/
15
- downloads/
16
- eggs/
17
- .eggs/
18
- lib/
19
- lib64/
20
- parts/
21
- sdist/
22
- var/
23
- wheels/
24
- share/python-wheels/
25
- *.egg-info/
26
- .installed.cfg
27
- *.egg
28
- MANIFEST
29
-
30
- # PyInstaller
31
- # Usually these files are written by a python script from a template
32
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
- *.manifest
34
- *.spec
35
-
36
- # Installer logs
37
- pip-log.txt
38
- pip-delete-this-directory.txt
39
-
40
- # Unit test / coverage reports
41
- htmlcov/
42
- .tox/
43
- .nox/
44
- .coverage
45
- .coverage.*
46
- .cache
47
- nosetests.xml
48
- coverage.xml
49
- *.cover
50
- *.py,cover
51
- .hypothesis/
52
- .pytest_cache/
53
- cover/
54
-
55
- # Translations
56
- *.mo
57
- *.pot
58
-
59
- # Django stuff:
60
- *.log
61
- local_settings.py
62
- db.sqlite3
63
- db.sqlite3-journal
64
-
65
- # Flask stuff:
66
- instance/
67
- .webassets-cache
68
-
69
- # Scrapy stuff:
70
- .scrapy
71
-
72
- # Sphinx documentation
73
- docs/_build/
74
-
75
- # PyBuilder
76
- .pybuilder/
77
- target/
78
-
79
- # Jupyter Notebook
80
- .ipynb_checkpoints
81
-
82
- # IPython
83
- profile_default/
84
- ipython_config.py
85
-
86
- # pyenv
87
- # For a library or package, you might want to ignore these files since the code is
88
- # intended to run in multiple environments; otherwise, check them in:
89
- # .python-version
90
-
91
- # pipenv
92
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
94
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
95
- # install all needed dependencies.
96
- #Pipfile.lock
97
-
98
- # poetry
99
- # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100
- # This is especially recommended for binary packages to ensure reproducibility, and is more
101
- # commonly ignored for libraries.
102
- # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103
- #poetry.lock
104
-
105
- # pdm
106
- # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107
- #pdm.lock
108
- # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109
- # in version control.
110
- # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
111
- .pdm.toml
112
- .pdm-python
113
- .pdm-build/
114
-
115
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
- __pypackages__/
117
-
118
- # Celery stuff
119
- celerybeat-schedule
120
- celerybeat.pid
121
-
122
- # SageMath parsed files
123
- *.sage.py
124
-
125
- # Environments
126
- .env
127
- .venv
128
- env/
129
- venv/
130
- ENV/
131
- env.bak/
132
- venv.bak/
133
-
134
-
135
- yes
136
- *.pub
137
-
138
-
139
- # Spyder project settings
140
- .spyderproject
141
- .spyproject
142
-
143
- # Rope project settings
144
- .ropeproject
145
-
146
- # mkdocs documentation
147
- /site
148
-
149
- # mypy
150
- .mypy_cache/
151
- .dmypy.json
152
- dmypy.json
153
-
154
- # Pyre type checker
155
- .pyre/
156
-
157
- # pytype static type analyzer
158
- .pytype/
159
-
160
- # Cython debug symbols
161
- cython_debug/
162
-
163
- # PyCharm
164
- # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
- # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
- # and can be added to the global gitignore or merged into this file. For a more nuclear
167
- # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
- #.idea/
169
-
170
- ### Python Patch ###
171
- # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
172
- poetry.toml
173
-
174
- # ruff
175
- .ruff_cache/
176
-
177
- # LSP config files
178
- pyrightconfig.json
179
-
180
- # data
181
- data/
182
- study_export_*
183
- study_files.db
184
- # study_files.json
185
-
186
- infra/ecs_config.toml
187
- aws-cli.pdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gradio/certificate.pem DELETED
@@ -1,31 +0,0 @@
1
- -----BEGIN CERTIFICATE-----
2
- MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
- TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
- cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
- WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
- ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
- MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
- h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
- 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
- A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
- T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
- B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
- B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
- KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
- OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
- jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
- qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
- rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
- HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
- hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
- ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
- 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
- NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
- ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
- TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
- jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
- oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
- 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
- mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
- emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
- -----END CERTIFICATE-----
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.isort.cfg DELETED
@@ -1,7 +0,0 @@
1
- [settings]
2
- multi_line_output=3
3
- include_trailing_comma=True
4
- force_grid_wrap=0
5
- use_parentheses=True
6
- line_length=88
7
- skip=env,migrations,alembic,venv
 
 
 
 
 
 
 
 
Dockerfile.api DELETED
@@ -1,19 +0,0 @@
1
- # FastAPI Dockerfile
2
- FROM python:3.11.10-slim
3
-
4
- ENV PYTHONUNBUFFERED=1
5
- ENV OMP_NUM_THREADS=1
6
-
7
- # Set working directory
8
- WORKDIR /app
9
-
10
- # Copy app files
11
- COPY requirements.txt ./
12
- RUN pip install --no-cache-dir -r requirements.txt
13
- COPY . .
14
-
15
- # Expose port
16
- EXPOSE 8000
17
-
18
- # Command to run the FastAPI app
19
- CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile.api.prod DELETED
@@ -1,72 +0,0 @@
1
- ###########
2
- # BUILDER #
3
- ###########
4
-
5
- ARG AWS_ACCOUNT_ID
6
-
7
- # pull official base image
8
- FROM ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/gradio-python:3.11.10-slim as builder
9
-
10
- # set work directory
11
- WORKDIR /app
12
-
13
- # set environment variables
14
- ENV PYTHONDONTWRITEBYTECODE 1
15
- ENV PYTHONUNBUFFERED 1
16
- ENV OMP_NUM_THREADS=1
17
-
18
-
19
- # install dependencies
20
- RUN apt-get update \
21
- && apt-get -y install libpq-dev gcc \
22
- && pip install psycopg
23
-
24
- RUN pip install --upgrade pip
25
- COPY ./requirements.txt /app/requirements.txt
26
- RUN pip wheel --no-cache-dir --no-deps --wheel-dir /app/wheels -r requirements.txt
27
-
28
- #########
29
- # FINAL #
30
- #########
31
-
32
- ARG AWS_ACCOUNT_ID
33
-
34
- # pull official base image
35
- FROM ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/gradio-python:3.11.10-slim
36
-
37
- # create directory for the app user
38
- RUN mkdir -p /home/backend-app
39
-
40
- # create the app user
41
- RUN addgroup --system app && adduser --system --group app
42
-
43
- # create the appropriate directories
44
- ENV HOME=/home/app
45
- ENV BACKEND_APP_HOME=/home/app
46
- # RUN mkdir $BACKEND_APP_HOME
47
- WORKDIR $BACKEND_APP_HOME
48
-
49
- # install dependencies
50
- RUN apt-get update \
51
- && apt-get -y install libpq-dev gcc \
52
- && pip install psycopg
53
-
54
- COPY --from=builder /app/wheels /wheels
55
- COPY --from=builder /app/requirements.txt .
56
- RUN pip install --upgrade pip
57
- RUN pip install --no-cache /wheels/*
58
-
59
- # copy project
60
- COPY . $BACKEND_APP_HOME
61
-
62
- # chown all the files to the app user
63
- RUN chown -R app:app $BACKEND_APP_HOME
64
-
65
- # change to the app user
66
- USER app
67
-
68
- ## Expose port
69
- EXPOSE 8000
70
-
71
- # Command to run the FastAPI app
72
- CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile.gradio DELETED
@@ -1,21 +0,0 @@
1
- # Gradio Dockerfile
2
- FROM python:3.11.10-slim
3
-
4
- ENV PYTHONUNBUFFERED=1
5
- ENV OMP_NUM_THREADS=1
6
-
7
- # Set working directory
8
- WORKDIR /app
9
-
10
- # Copy app files
11
- COPY requirements.txt ./
12
- RUN pip install --no-cache-dir -r requirements.txt
13
- COPY . .
14
-
15
- # Expose port
16
- EXPOSE 7860
17
- ENV GRADIO_SERVER_NAME="0.0.0.0"
18
-
19
- # Command to run the Gradio app
20
- CMD ["gradio", "app.py"]
21
- # CMD ["python", "app.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile.gradio.prod DELETED
@@ -1,72 +0,0 @@
1
- ###########
2
- # BUILDER #
3
- ###########
4
-
5
- ARG AWS_ACCOUNT_ID
6
-
7
- # pull official base image
8
- FROM ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/gradio-python:3.11.10-slim as builder
9
-
10
- # set work directory
11
- WORKDIR /app
12
-
13
- # set environment variables
14
- ENV PYTHONDONTWRITEBYTECODE 1
15
- ENV PYTHONUNBUFFERED 1
16
- ENV OMP_NUM_THREADS=1
17
-
18
-
19
- # install dependencies
20
- RUN apt-get update \
21
- && apt-get -y install libpq-dev gcc \
22
- && pip install psycopg
23
-
24
- RUN pip install --upgrade pip
25
- COPY ./requirements.txt /app/requirements.txt
26
- RUN pip wheel --no-cache-dir --no-deps --wheel-dir /app/wheels -r requirements.txt
27
-
28
- #########
29
- # FINAL #
30
- #########
31
-
32
- ARG AWS_ACCOUNT_ID
33
-
34
- # pull official base image
35
- FROM ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/gradio-python:3.11.10-slim
36
-
37
- # create directory for the app user
38
- RUN mkdir -p /home/backend-app
39
-
40
- # create the app user
41
- RUN addgroup --system app && adduser --system --group app
42
-
43
- # create the appropriate directories
44
- ENV HOME=/home/app
45
- ENV BACKEND_APP_HOME=/home/app
46
- # RUN mkdir $BACKEND_APP_HOME
47
- WORKDIR $BACKEND_APP_HOME
48
-
49
- # install dependencies
50
- RUN apt-get update \
51
- && apt-get -y install libpq-dev gcc \
52
- && pip install psycopg
53
-
54
- COPY --from=builder /app/wheels /wheels
55
- COPY --from=builder /app/requirements.txt .
56
- RUN pip install --upgrade pip
57
- RUN pip install --no-cache /wheels/*
58
-
59
- # copy project
60
- COPY . $BACKEND_APP_HOME
61
-
62
- # chown all the files to the app user
63
- RUN chown -R app:app $BACKEND_APP_HOME
64
-
65
- # change to the app user
66
- USER app
67
-
68
- # Expose port
69
- EXPOSE 7860
70
- ENV GRADIO_SERVER_NAME="0.0.0.0"
71
-
72
- CMD ["gradio", "app.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Makefile DELETED
@@ -1,12 +0,0 @@
1
- .PHONY: lint-apply lint-check
2
-
3
- lint-check:
4
- @echo "Checking for lint errors..."
5
- flake8 .
6
- black --check .
7
- isort --check-only .
8
-
9
- lint-apply:
10
- @echo "apply linting ..."
11
- black .
12
- isort .
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,241 +1,13 @@
1
  ---
2
- title: 'ACRES: Center For Rapid Evidence Synthesis'
3
  emoji: 👁
4
  colorFrom: gray
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 5.6.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
-
15
-
16
- # ACRES RAG Project
17
-
18
- ## Project Setup
19
-
20
- To test and run the project locally. Clone the project from github and change directoory to `acres`.
21
-
22
- ```sh
23
- git clone https://github.com/SunbirdAI/acres.git
24
- cd acres
25
- ```
26
-
27
- Create python virtual environment and activate it.
28
-
29
- ```sh
30
- python -m venv env
31
- source env/bin/activate
32
- ```
33
-
34
- Install project dependencies
35
-
36
- ```sh
37
- pip install -r requirements.txt
38
- ```
39
-
40
- ## Run project locally
41
- To test the project locally follow the steps below.
42
-
43
- Copy `.env.example` to `.env` and provide the correct enviroment variable values.
44
-
45
- ```sh
46
- cp .env.example .env
47
- ```
48
-
49
- Run the application
50
-
51
- ```sh
52
- python app.py
53
- ```
54
-
55
- OR
56
-
57
- ```sh
58
- gradio app.py
59
- ```
60
-
61
- Browse the application with the link `http://localhost:7860/`
62
-
63
- ### Run the api
64
- Make sure the gradio app is running on port `7860` and then run the command below in another terminal tab in the same directory.
65
-
66
- ```sh
67
- uvicorn api:app --reload
68
- ```
69
-
70
- Browse the api at `http://localhost:8000/docs`
71
-
72
-
73
- ## Run with docker
74
- To run the application with docker locally, first make sure you have docker installed. See [link](https://docs.docker.com/)
75
-
76
- Build the project docker image
77
-
78
- ```sh
79
- docker build -f Dockerfile.gradio -t gradio-app .
80
- ```
81
-
82
- Create docker network
83
-
84
- ```sh
85
- docker network create gradio-fastapi-network
86
- ```
87
-
88
- Run the docker container
89
-
90
- ```sh
91
- docker run -it -p 7860:7860 --rm --name gradio --network=gradio-fastapi-network gradio-app
92
- ```
93
-
94
- Browse the application with the link `http://localhost:7860/`
95
-
96
- To run the api with docker run the commands below. The gradio container should be run first before running the api.
97
-
98
- ```sh
99
- docker build -f Dockerfile.api -t fastapi-app .
100
- docker run -it -p 8000:8000 --rm --name fastapi --network=gradio-fastapi-network fastapi-app
101
- ```
102
-
103
- Browse the api at `http://localhost:8000/docs`
104
-
105
-
106
- ## Deploy to AWS ECS (Elastic Container Service) with Fargate
107
-
108
- Install and configure the AWS CLI and aws credentials. See [link](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html)
109
-
110
- OR: See the pdf document [here](https://docs.aws.amazon.com/pdfs/cli/latest/userguide/aws-cli.pdf#getting-started-quickstart)
111
-
112
- Now follow the steps below to deploy to AWS ECS
113
-
114
- Setup the default region and your aws account id
115
-
116
- ```sh
117
- export AWS_DEFAULT_REGION=region # i.e us-east-1, eu-west-1
118
- export AWS_ACCOUNT_ID=aws_account_id # ie. 2243838xxxxxx
119
- ```
120
-
121
- Login into the AWS ECR (Elastic Container Registry) via the commandline
122
-
123
- ```sh
124
- aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
125
- ```
126
-
127
- Create a python image and push to ECR. This image will be used as the base image for the application image deployed on AWS ECS.
128
-
129
- - Create python repository
130
-
131
- ```sh
132
- aws ecr create-repository \
133
- --repository-name gradio-python \
134
- --image-tag-mutability MUTABLE
135
- ```
136
-
137
- ```sh
138
- export ECR_PYTHON_URL="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/gradio-python"
139
- echo $ECR_PYTHON_URL
140
- ```
141
-
142
- - Pull python image and tag it to the ECR url
143
-
144
- ```sh
145
- docker pull python:3.11.10-slim
146
- docker tag python:3.11.10-slim $ECR_PYTHON_URL:3.11.10-slim
147
-
148
- docker push $ECR_PYTHON_URL:3.11.10-slim
149
- ```
150
-
151
- - Now create application repostory
152
-
153
- ```sh
154
- aws ecr create-repository \
155
- --repository-name gradio-app-prod \
156
- --image-tag-mutability MUTABLE
157
-
158
- export ECR_BACKEND_GRADIO_URL="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/gradio-app-prod"
159
- echo $ECR_BACKEND_GRADIO_URL
160
- ```
161
-
162
- - Build the docker image for the production and push to ECR
163
-
164
- ```sh
165
- docker build --build-arg AWS_ACCOUNT_ID=$AWS_ACCOUNT_ID -f Dockerfile.gradio.prod -t gradio-app-prod .
166
- docker tag gradio-app-prod:latest "${ECR_BACKEND_GRADIO_URL}:latest"
167
- docker push "${ECR_BACKEND_GRADIO_URL}:latest"
168
- ```
169
-
170
- - Now create fastapi repostory
171
-
172
- ```sh
173
- aws ecr create-repository \
174
- --repository-name fastapi-api-prod \
175
- --image-tag-mutability MUTABLE
176
-
177
- export ECR_BACKEND_FASTAPI_URL="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/fastapi-api-prod"
178
- echo $ECR_BACKEND_FASTAPI_URL
179
- ```
180
-
181
- - Build the docker image for the production and push to ECR
182
-
183
- ```sh
184
- docker build --build-arg AWS_ACCOUNT_ID=$AWS_ACCOUNT_ID -f Dockerfile.api.prod -t fastapi-api-prod .
185
- docker tag fastapi-api-prod:latest "${ECR_BACKEND_FASTAPI_URL}:latest"
186
- docker push "${ECR_BACKEND_FASTAPI_URL}:latest"
187
- ```
188
-
189
- ### Setup and Provision AWS ECS infra using AWS Cloudformation (IaC)
190
-
191
- #### Install
192
- To install the CFN-CLI run the command below
193
-
194
- ```sh
195
- pip install cloudformation-cli cloudformation-cli-java-plugin cloudformation-cli-go-plugin cloudformation-cli-python-plugin cloudformation-cli-typescript-plugin
196
- ```
197
-
198
- #### CFN-Toml
199
-
200
- ```sh
201
- gem install cfn-toml
202
- ```
203
-
204
-
205
- Copy `infra/ecs_config.template` to `infra/ecs_config.toml` and provide the correct `AWS Account ID` for the `ContainerImageGradio`
206
-
207
- ```sh
208
- cp infra/ecs_config.template infra/ecs_config.toml
209
- ```
210
-
211
-
212
- #### Deploy
213
-
214
- To deploy the ECS infra run the commands below. It provisions the cloudformation stack changeset for review.
215
-
216
- Log into your aws console and search for `cloudformation`. See and review the changeset. If everything is good execute the changeset to finish with the infra deployment.
217
-
218
- Then look for the outputs to the link for the deployed application.
219
-
220
- ```sh
221
- chmod u+x bin/cfn/*
222
- ./bin/cfn/ecs-deploy
223
- ```
224
-
225
- #### Update Task Definition Deployments
226
- After making changes, build the docker images and then push to ECR.
227
-
228
- To update the task definition deployments, force the new deployment by running the commands below
229
-
230
- For the gradio task definition
231
-
232
- ```sh
233
- ./bin/cfn/ecs-deploy-update-gradio
234
- ```
235
-
236
- For the api task definition
237
-
238
- ```sh
239
- ./bin/cfn/ecs-deploy-update-api
240
- ```
241
-
 
1
  ---
2
+ title: Acres
3
  emoji: 👁
4
  colorFrom: gray
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.42.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
__pycache__/config.cpython-311.pyc ADDED
Binary file (455 Bytes). View file
 
api.py DELETED
@@ -1,134 +0,0 @@
1
- import logging
2
- import os
3
- from enum import Enum
4
- from typing import List, Optional, Union
5
-
6
- from dotenv import load_dotenv
7
- from fastapi import FastAPI, HTTPException
8
- from fastapi.middleware.cors import CORSMiddleware
9
- from fastapi.responses import FileResponse
10
- from gradio_client import Client
11
- from pydantic import BaseModel, ConfigDict, Field, constr
12
-
13
- from docs import description, tags_metadata
14
-
15
- load_dotenv()
16
- logging.basicConfig(level=logging.INFO)
17
- logger = logging.getLogger(__name__)
18
-
19
- GRADIO_URL = os.getenv("GRADIO_URL", "http://localhost:7860/")
20
- logger.info(f"GRADIO_URL: {GRADIO_URL}")
21
- client = Client(GRADIO_URL)
22
-
23
- app = FastAPI(
24
- title="ACRES RAG API",
25
- description=description,
26
- openapi_tags=tags_metadata,
27
- )
28
-
29
- origins = ["*"]
30
-
31
- app.add_middleware(
32
- CORSMiddleware,
33
- allow_origins=origins,
34
- allow_credentials=True,
35
- allow_methods=["*"],
36
- allow_headers=["*"],
37
- )
38
-
39
-
40
- class StudyVariables(str, Enum):
41
- ebola_virus = "Ebola Virus"
42
- vaccine_coverage = "Vaccine coverage"
43
- genexpert = "GeneXpert"
44
-
45
-
46
- class PromptType(str, Enum):
47
- default = "Default"
48
- highlight = "Highlight"
49
- evidence_based = "Evidence-based"
50
-
51
-
52
- class StudyVariableRequest(BaseModel):
53
- study_variable: Union[StudyVariables, str]
54
- prompt_type: PromptType
55
- text: constr(min_length=1, strip_whitespace=True) # type: ignore
56
-
57
- model_config = ConfigDict(from_attributes=True)
58
-
59
-
60
- class DownloadCSV(BaseModel):
61
- text: constr(min_length=1, strip_whitespace=True) # type: ignore
62
-
63
- model_config = ConfigDict(from_attributes=True)
64
-
65
-
66
- class Study(BaseModel):
67
- study_name: constr(min_length=1, strip_whitespace=True) # type: ignore
68
-
69
- model_config = ConfigDict(from_attributes=True)
70
-
71
-
72
- class ZoteroCredentials(BaseModel):
73
- library_id: constr(min_length=1, strip_whitespace=True) # type: ignore
74
- api_access_key: constr(min_length=1, strip_whitespace=True) # type: ignore
75
-
76
- model_config = ConfigDict(from_attributes=True)
77
-
78
-
79
- @app.post("/process_zotero_library_items", tags=["zotero"])
80
- def process_zotero_library_items(zotero_credentials: ZoteroCredentials):
81
- result = client.predict(
82
- zotero_library_id_param=zotero_credentials.library_id,
83
- zotero_api_access_key=zotero_credentials.api_access_key,
84
- api_name="/process_zotero_library_items",
85
- )
86
- return {"result": result}
87
-
88
-
89
- @app.post("/get_study_info", tags=["zotero"])
90
- def get_study_info(study: Study):
91
- result = client.predict(study_name=study.study_name, api_name="/get_study_info")
92
- # print(result)
93
- return {"result": result}
94
-
95
-
96
- @app.post("/study_variables", tags=["zotero"])
97
- def process_study_variables(
98
- study_request: StudyVariableRequest,
99
- ):
100
- result = client.predict(
101
- text=study_request.text, # "study id, study title, study design, study summary",
102
- study_name=study_request.study_variable, # "Ebola Virus",
103
- prompt_type=study_request.prompt_type, # "Default",
104
- api_name="/process_multi_input",
105
- )
106
- print(type(result))
107
- return {"result": result[0]}
108
-
109
-
110
- @app.post("/new_study_choices", tags=["zotero"])
111
- def new_study_choices():
112
- result = client.predict(api_name="/new_study_choices")
113
- return {"result": result}
114
-
115
-
116
- @app.post("/download_csv", tags=["zotero"])
117
- def download_csv(download_request: DownloadCSV):
118
- result = client.predict(
119
- markdown_content=download_request.text, api_name="/download_as_csv"
120
- )
121
- print(result)
122
-
123
- file_path = result
124
- if not file_path or not os.path.exists(file_path):
125
- raise HTTPException(status_code=404, detail="File not found")
126
-
127
- # Use FileResponse to send the file to the client
128
- return FileResponse(
129
- file_path,
130
- media_type="text/csv", # Specify the correct MIME type for CSV
131
- filename=os.path.basename(
132
- file_path
133
- ), # Provide a default filename for the download
134
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,687 +1,102 @@
1
- # app.py
2
-
3
- import csv
4
- import datetime
5
- # from datetime import datetime
6
- import io
7
- import json
8
- import logging
9
- import os
10
- from typing import Any, List, Tuple, Union
11
-
12
  import gradio as gr
13
- import openai
14
- from cachetools import LRUCache
15
- from dotenv import load_dotenv
16
- from slugify import slugify
17
-
18
- from config import OPENAI_API_KEY, STUDY_FILES
19
- from interface import create_chat_interface
20
  from rag.rag_pipeline import RAGPipeline
21
- from utils.db import (
22
- add_study_files_to_db,
23
- create_db_and_tables,
24
- get_all_study_files,
25
- get_study_file_by_name,
26
- get_study_files_by_library_id,
27
- )
28
- from utils.helpers import (
29
- add_study_files_to_chromadb,
30
- append_to_study_files,
31
- chromadb_client,
32
- create_directory,
33
  )
34
- from utils.pdf_processor import PDFProcessor
35
- from utils.prompts import evidence_based_prompt, highlight_prompt
36
- from utils.zotero_manager import ZoteroManager
37
-
38
- data_directory = "data"
39
- create_directory(data_directory)
40
- # Configure logging
41
- logging.basicConfig(level=logging.INFO)
42
- logger = logging.getLogger(__name__)
43
- load_dotenv()
44
-
45
- openai.api_key = OPENAI_API_KEY
46
-
47
- # Initialize ChromaDB with study files
48
- add_study_files_to_chromadb("study_files.json", "study_files_collection")
49
-
50
- # Create sqlite study file data table
51
- create_db_and_tables()
52
 
 
53
 
54
  # Cache for RAG pipelines
55
  rag_cache = {}
56
 
57
- cache = LRUCache(maxsize=100)
58
-
59
-
60
- def get_cache_value(key):
61
- return cache.get(key)
62
-
63
-
64
- zotero_library_id = get_cache_value("zotero_library_id")
65
- logger.info(f"zotero_library_id cache: {zotero_library_id}")
66
 
67
-
68
- def get_rag_pipeline(study_name: str) -> RAGPipeline:
69
- """Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
70
  if study_name not in rag_cache:
71
- study = get_study_file_by_name(study_name)
72
-
73
- if not study:
 
74
  raise ValueError(f"Invalid study name: {study_name}")
75
-
76
- study_file = study.file_path
77
- logger.info(f"study_file: {study_file}")
78
- if not study_file:
79
- raise ValueError(f"File path not found for study name: {study_name}")
80
-
81
- rag_cache[study_name] = RAGPipeline(study_file)
82
-
83
  return rag_cache[study_name]
84
 
85
 
86
- def get_study_info(study_name: Union[str, list]) -> str:
87
- """Retrieve information about the specified study."""
88
- if isinstance(study_name, list):
89
- study_name = study_name[0] if study_name else None
90
-
91
- if not study_name:
92
- return "No study selected"
93
-
94
- study = get_study_file_by_name(study_name)
95
- logger.info(f"Study: {study}")
96
-
97
- if not study:
98
- raise ValueError(f"Invalid study name: {study_name}")
99
-
100
- study_file = study.file_path
101
- logger.info(f"study_file: {study_file}")
102
- if not study_file:
103
- raise ValueError(f"File path not found for study name: {study_name}")
104
-
105
- with open(study_file, "r") as f:
106
- data = json.load(f)
107
- return f"### Number of documents: {len(data)}"
108
-
109
-
110
- def markdown_table_to_csv(markdown_text: str) -> str:
111
- """Convert a markdown table to CSV format."""
112
- lines = [line.strip() for line in markdown_text.split("\n") if line.strip()]
113
- table_lines = [line for line in lines if line.startswith("|")]
114
-
115
- if not table_lines:
116
- return ""
117
-
118
- csv_data = []
119
- for line in table_lines:
120
- if "---" in line:
121
- continue
122
- # Split by |, remove empty strings, and strip whitespace
123
- cells = [cell.strip() for cell in line.split("|") if cell.strip()]
124
- csv_data.append(cells)
125
-
126
- output = io.StringIO()
127
- writer = csv.writer(output)
128
- writer.writerows(csv_data)
129
- return output.getvalue()
130
-
131
-
132
- def cleanup_temp_files():
133
- """Clean up old temporary files."""
134
- try:
135
- current_time = datetime.datetime.now()
136
- for file in os.listdir():
137
- if file.startswith("study_export_") and file.endswith(".csv"):
138
- file_time = datetime.datetime.fromtimestamp(os.path.getmtime(file))
139
- # Calculate the time difference in seconds
140
- time_difference = (current_time - file_time).total_seconds()
141
- if time_difference > 20: # 5 minutes in seconds
142
- try:
143
- os.remove(file)
144
- except Exception as e:
145
- logger.warning(f"Failed to remove temp file {file}: {e}")
146
- except Exception as e:
147
- logger.warning(f"Error during cleanup: {e}")
148
-
149
-
150
- def chat_function(message: str, study_name: str, prompt_type: str) -> str:
151
- """Process a chat message and generate a response using the RAG pipeline."""
152
-
153
- if not message.strip():
154
- return "Please enter a valid query."
155
-
156
- rag = get_rag_pipeline(study_name)
157
- logger.info(f"rag: {rag}")
158
- prompt = {
159
- "Highlight": highlight_prompt,
160
- "Evidence-based": evidence_based_prompt,
161
- }.get(prompt_type)
162
-
163
- response, _ = rag.query(message, prompt_template=prompt) # Unpack the tuple
164
- return response
165
-
166
-
167
- def process_zotero_library_items(
168
- zotero_library_id_param: str, zotero_api_access_key: str
169
- ) -> str:
170
- global zotero_library_id
171
- if not zotero_library_id_param or not zotero_api_access_key:
172
- return "Please enter your zotero library Id and API Access Key"
173
-
174
- zotero_library_id = zotero_library_id_param
175
- cache["zotero_library_id"] = zotero_library_id
176
- zotero_library_type = "user" # or "group"
177
- zotero_api_access_key = zotero_api_access_key
178
-
179
- message = ""
180
-
181
- try:
182
- zotero_manager = ZoteroManager(
183
- zotero_library_id, zotero_library_type, zotero_api_access_key
184
- )
185
-
186
- zotero_collections = zotero_manager.get_collections()
187
- zotero_collection_lists = zotero_manager.list_zotero_collections(
188
- zotero_collections
189
- )
190
- filtered_zotero_collection_lists = (
191
- zotero_manager.filter_and_return_collections_with_items(
192
- zotero_collection_lists
193
- )
194
- )
195
-
196
- study_files_data = {} # Dictionary to collect items for ChromaDB
197
-
198
- for collection in filtered_zotero_collection_lists:
199
- collection_name = collection.get("name")
200
- if collection_name not in STUDY_FILES:
201
- collection_key = collection.get("key")
202
- collection_items = zotero_manager.get_collection_items(collection_key)
203
- zotero_collection_items = (
204
- zotero_manager.get_collection_zotero_items_by_key(collection_key)
205
- )
206
- # Export zotero collection items to json
207
- zotero_items_json = zotero_manager.zotero_items_to_json(
208
- zotero_collection_items
209
- )
210
- export_file = f"{slugify(collection_name)}_zotero_items.json"
211
- zotero_manager.write_zotero_items_to_json_file(
212
- zotero_items_json, f"data/{export_file}"
213
- )
214
- append_to_study_files(
215
- "study_files.json", collection_name, f"data/{export_file}"
216
- )
217
-
218
- # Collect for ChromaDB
219
- study_files_data[collection_name] = f"data/{export_file}"
220
-
221
- # Update in-memory STUDY_FILES for reference in current session
222
- STUDY_FILES.update({collection_name: f"data/{export_file}"})
223
- logger.info(f"STUDY_FILES: {STUDY_FILES}")
224
-
225
- # After loop, add all collected data to ChromaDB
226
- add_study_files_to_chromadb("study_files.json", "study_files_collection")
227
- # Add collected data to sqlite
228
- add_study_files_to_db("study_files.json", zotero_library_id)
229
-
230
- # Dynamically update study choices
231
- global study_choices
232
- study_choices = [
233
- file.name for file in get_study_files_by_library_id([zotero_library_id])
234
- ]
235
- message = "Successfully processed items in your zotero library"
236
- except Exception as e:
237
- message = f"Error process your zotero library: {str(e)}"
238
-
239
- return message
240
-
241
-
242
- process_zotero_library_items(
243
- os.getenv("ZOTERO_LIBRARY_ID"), os.getenv("ZOTERO_API_ACCESS_KEY")
244
- )
245
-
246
-
247
- def refresh_study_choices():
248
- """
249
- Refresh study choices for a specific dropdown instance.
250
-
251
- :return: Updated Dropdown with current study choices
252
- """
253
- global study_choices, zotero_library_id
254
- zotero_library_id = get_cache_value("zotero_library_id")
255
- logger.info(f"zotero_library_id refreshed: {zotero_library_id}")
256
- study_choices = [
257
- file.name for file in get_study_files_by_library_id([zotero_library_id])
258
- ]
259
- logger.info(f"Study choices refreshed: {study_choices}")
260
- return study_choices
261
-
262
-
263
- def new_study_choices():
264
- """
265
- Refresh study choices for a specific dropdown instance.
266
- """
267
- study_choices = refresh_study_choices()
268
- study_choices = ", ".join(study_choices)
269
- return f"**Your studies are: {study_choices}**"
270
-
271
-
272
- def process_multi_input(text, study_name, prompt_type):
273
- # Split input based on commas and strip any extra spaces
274
- variable_list = [word.strip().upper() for word in text.split(",")]
275
- user_message = f"Extract and present in a tabular format the following variables for each {study_name} study: {', '.join(variable_list)}"
276
- logger.info(f"User message: {user_message}")
277
- response = chat_function(user_message, study_name, prompt_type)
278
- return [response, gr.update(visible=True)]
279
-
280
-
281
- def download_as_csv(markdown_content):
282
- """Convert markdown table to CSV and provide for download."""
283
- if not markdown_content:
284
- return None
285
-
286
- csv_content = markdown_table_to_csv(markdown_content)
287
- if not csv_content:
288
- return None
289
-
290
- # Create temporary file with actual content
291
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
292
- temp_path = f"study_export_{timestamp}.csv"
293
-
294
- with open(temp_path, "w", newline="", encoding="utf-8") as f:
295
- f.write(csv_content)
296
-
297
- return temp_path
298
-
299
-
300
- # PDF Support
301
- def process_pdf_uploads(files: List[gr.File], collection_name: str) -> str:
302
- """Process uploaded PDF files and add them to the system."""
303
- if not files or not collection_name:
304
- return "Please upload PDF files and provide a collection name"
305
-
306
- try:
307
- processor = PDFProcessor()
308
-
309
- # Save uploaded files temporarily
310
- file_paths = []
311
- for file in files:
312
- # Get the actual file path from the Gradio File object
313
- if hasattr(file, "name"): # If it's already a path
314
- temp_path = file.name
315
- else: # If it needs to be saved
316
- temp_path = os.path.join(processor.upload_dir, file.orig_name)
317
- file.save(temp_path)
318
- file_paths.append(temp_path)
319
-
320
- # Process PDFs
321
- output_path = processor.process_pdfs(file_paths, collection_name)
322
-
323
- # Add to study files and ChromaDB
324
- collection_id = f"pdf_{slugify(collection_name)}"
325
- append_to_study_files("study_files.json", collection_id, output_path)
326
- add_study_files_to_chromadb("study_files.json", "study_files_collection")
327
-
328
- # Cleanup temporary files if they were created by us
329
- for path in file_paths:
330
- if path.startswith(processor.upload_dir):
331
- try:
332
- os.remove(path)
333
- except Exception as e:
334
- logger.warning(f"Failed to remove temporary file {path}: {e}")
335
-
336
- return f"Successfully processed PDFs into collection: {collection_id}"
337
-
338
- except Exception as e:
339
- logger.error(f"Error in process_pdf_uploads: {str(e)}")
340
- return f"Error processing PDF files: {str(e)}"
341
-
342
-
343
- def chat_response(
344
- message: str,
345
- history: List[Tuple[str, str]],
346
- study_name: str,
347
- pdf_processor: PDFProcessor,
348
- ) -> Tuple[List[Tuple[str, str]], str, Any]:
349
- """Generate chat response and update history."""
350
- if not message.strip():
351
- return history, None, None
352
-
353
  rag = get_rag_pipeline(study_name)
354
- response, source_info = rag.query(message)
355
- history.append((message, response))
356
 
357
- # Generate PDF preview if source information is available
358
- # preview_image = None
359
- if (
360
- source_info
361
- and source_info.get("source_file")
362
- and source_info.get("page_numbers")
363
- ):
364
- try:
365
- # Get the first page number from the source
366
- page_num = source_info["page_numbers"][0]
367
- except Exception as e:
368
- logger.error(f"Error generating PDF preview: {str(e)}")
369
 
370
- return history
 
371
 
 
372
 
373
- def create_gr_interface() -> gr.Blocks:
374
- """Create and configure the Gradio interface for the RAG platform."""
375
- global zotero_library_id
376
- with gr.Blocks(theme=gr.themes.Base()) as demo:
377
- gr.Markdown("# ACRES RAG Platform")
378
 
379
- with gr.Tabs() as tabs:
380
- # Tab 1: Original Study Analysis Interface
381
- with gr.Tab("Study Analysis"):
382
- with gr.Row():
383
- with gr.Column(scale=1):
384
- gr.Markdown("### Zotero Credentials")
385
- zotero_library_id_param = gr.Textbox(
386
- label="Zotero Library ID",
387
- type="password",
388
- placeholder="Enter Your Zotero Library ID here...",
389
- )
390
- zotero_api_access_key = gr.Textbox(
391
- label="Zotero API Access Key",
392
- type="password",
393
- placeholder="Enter Your Zotero API Access Key...",
394
- )
395
- process_zotero_btn = gr.Button("Process your Zotero Library")
396
- zotero_output = gr.Markdown(label="Zotero")
397
 
398
- local_storage_state = gr.BrowserState(
399
- {"zotero_library_id": "", "study_choices": []}
400
- )
401
 
402
- gr.Markdown("### Study Information")
 
403
 
404
- zotero_library_id = zotero_library_id_param.value
405
- if zotero_library_id is None:
406
- zotero_library_id = get_cache_value("zotero_library_id")
407
- logger.info(f"zotero_library_id: =====> {zotero_library_id}")
408
- study_choices = refresh_study_choices()
409
- logger.info(f"study_choices_db: =====> {study_choices}")
410
 
411
- study_dropdown = gr.Dropdown(
412
- choices=study_choices,
413
- label="Select Study",
414
- value=(study_choices[0] if study_choices else None),
415
- allow_custom_value=True,
416
- )
417
- # In Gradio interface setup
418
- refresh_button = gr.Button("Refresh Studies")
419
 
420
- study_info = gr.Markdown(label="Study Details")
421
- new_studies = gr.Markdown(label="Your Studies")
422
- prompt_type = gr.Radio(
423
- ["Default", "Highlight", "Evidence-based"],
424
- label="Prompt Type",
425
- value="Default",
426
- )
427
-
428
- @demo.load(
429
- inputs=[local_storage_state],
430
- outputs=[zotero_library_id_param],
431
- )
432
- def load_from_local_storage(saved_values):
433
- print("loading from local storage", saved_values)
434
- return saved_values.get("zotero_library_id")
435
-
436
- @gr.on(
437
- [
438
- zotero_library_id_param.change,
439
- process_zotero_btn.click,
440
- refresh_button.click,
441
- ],
442
- inputs=[zotero_library_id_param],
443
- outputs=[local_storage_state],
444
- )
445
- def save_to_local_storage(zotero_library_id_param):
446
- study_choices = refresh_study_choices()
447
- return {
448
- "zotero_library_id": zotero_library_id_param,
449
- "study_choices": study_choices,
450
- }
451
-
452
- with gr.Column(scale=3):
453
- gr.Markdown("### Study Variables")
454
- with gr.Row():
455
- study_variables = gr.Textbox(
456
- show_label=False,
457
- placeholder="Type your variables separated by commas e.g (Study ID, Study Title, Authors etc)",
458
- scale=4,
459
- lines=1,
460
- autofocus=True,
461
- )
462
- submit_btn = gr.Button("Submit", scale=1)
463
- answer_output = gr.Markdown(label="Answer")
464
- download_btn = gr.DownloadButton(
465
- "Download as CSV",
466
- variant="primary",
467
- size="sm",
468
- scale=1,
469
- visible=False,
470
- )
471
-
472
- # Tab 2: PDF Chat Interface
473
- with gr.Tab("PDF Chat"):
474
- pdf_processor = PDFProcessor()
475
-
476
- with gr.Row():
477
- # Left column: Chat and Input
478
- with gr.Column(scale=7):
479
- chat_history = gr.Chatbot(
480
- value=[], height=600, show_label=False
481
- )
482
- with gr.Row():
483
- query_input = gr.Textbox(
484
- show_label=False,
485
- placeholder="Ask a question about your PDFs...",
486
- scale=8,
487
- )
488
- chat_submit_btn = gr.Button(
489
- "Send", scale=2, variant="primary"
490
- )
491
-
492
- # Right column: PDF Preview and Upload
493
- with gr.Column(scale=3):
494
- # pdf_preview = gr.Image(label="Source Page", height=600)
495
- source_info = gr.Markdown(
496
- label="Sources",
497
- value="No sources available yet."
498
- )
499
- with gr.Row():
500
- pdf_files = gr.File(
501
- file_count="multiple",
502
- file_types=[".pdf"],
503
- label="Upload PDFs",
504
- )
505
- with gr.Row():
506
- collection_name = gr.Textbox(
507
- label="Collection Name",
508
- placeholder="Name this PDF collection...",
509
- )
510
- with gr.Row():
511
- upload_btn = gr.Button("Process PDFs", variant="primary")
512
- pdf_status = gr.Markdown()
513
- current_collection = gr.State(value=None)
514
-
515
- # Event handlers for Study Analysis tab
516
- process_zotero_btn.click(
517
- process_zotero_library_items,
518
- inputs=[zotero_library_id_param, zotero_api_access_key],
519
- outputs=[zotero_output],
520
  )
 
521
 
522
- study_dropdown.change(
523
- get_study_info, inputs=[study_dropdown], outputs=[study_info]
524
- )
525
-
526
- submit_btn.click(
527
- process_multi_input,
528
- inputs=[study_variables, study_dropdown, prompt_type],
529
- outputs=[answer_output, download_btn],
530
- )
531
-
532
- download_btn.click(
533
- fn=download_as_csv, inputs=[answer_output], outputs=[download_btn]
534
- ).then(fn=cleanup_temp_files, inputs=None, outputs=None)
535
 
536
- refresh_button.click(
537
- fn=new_study_choices,
538
- outputs=[new_studies], # Update the same dropdown
 
539
  )
540
 
541
- # Event handlers for PDF Chat tab
542
-
543
- def handle_pdf_upload(files, name):
544
- if not name:
545
- return "Please provide a collection name", None
546
- if not files:
547
- return "Please select PDF files", None
548
-
549
- try:
550
- processor = PDFProcessor()
551
- # Process PDFs
552
- output_path = processor.process_pdfs(files, name)
553
- collection_id = f"pdf_{slugify(name)}"
554
-
555
- # Add to study files JSON
556
- append_to_study_files("study_files.json", collection_id, output_path)
557
-
558
- # Add to ChromaDB
559
- add_study_files_to_chromadb("study_files.json", "study_files_collection")
560
-
561
- # Add to SQLite database - this is the crucial missing step
562
- add_study_files_to_db("study_files.json", "local") # Add library_id parameter
563
-
564
- return f"Successfully processed PDFs into collection: {collection_id}", collection_id
565
- except Exception as e:
566
- logger.error(f"Error in handle_pdf_upload: {str(e)}")
567
- return f"Error: {str(e)}", None
568
-
569
- def add_message(history, message):
570
- """Add user message to chat history."""
571
- if not message.strip():
572
- raise gr.Error("Please enter a message")
573
- history = history + [(message, None)]
574
- return history, "", None
575
-
576
- def format_source_info(source_nodes) -> str:
577
- """Format source information into a markdown string."""
578
- if not source_nodes:
579
- return "No source information available"
580
-
581
- sources_md = "### Sources\n\n"
582
- seen_sources = set() # To track unique sources
583
-
584
- for idx, node in enumerate(source_nodes, 1):
585
- metadata = node.metadata
586
- if not metadata:
587
- continue
588
-
589
- source_key = (metadata.get('source_file', ''), metadata.get('page_number', 0))
590
- if source_key in seen_sources:
591
- continue
592
-
593
- seen_sources.add(source_key)
594
- title = metadata.get('title', os.path.basename(metadata.get('source_file', 'Unknown')))
595
- page = metadata.get('page_number', 'N/A')
596
-
597
- sources_md += f"{idx}. **{title}** - Page {page}\n"
598
-
599
- return sources_md
600
-
601
- def generate_chat_response(history, collection_id, pdf_processor):
602
- """Generate response for the last message in history."""
603
- if not collection_id:
604
- raise gr.Error("Please upload PDFs first")
605
- if len(history) == 0:
606
- return history, None
607
-
608
- last_message = history[-1][0]
609
- try:
610
- # Get response and source info
611
- rag = get_rag_pipeline(collection_id)
612
- response_text, source_nodes = rag.query(last_message)
613
-
614
- # Format sources info
615
- sources_md = "### Top Sources\n\n"
616
- if source_nodes and len(source_nodes) > 0:
617
- seen_sources = set()
618
- source_count = 0
619
-
620
- # Only process up to 3 sources
621
- for node in source_nodes:
622
- if source_count >= 3: # Stop after 3 sources
623
- break
624
-
625
- if not hasattr(node, 'metadata'):
626
- continue
627
-
628
- metadata = node.metadata
629
- source_key = (
630
- metadata.get('source_file', ''),
631
- metadata.get('page_number', 0)
632
- )
633
-
634
- if source_key in seen_sources:
635
- continue
636
-
637
- seen_sources.add(source_key)
638
- source_count += 1
639
-
640
- title = metadata.get('title', 'Unknown')
641
- if not title or title == 'Unknown':
642
- title = os.path.basename(metadata.get('source_file', 'Unknown Document'))
643
-
644
- page = metadata.get('page_number', 'N/A')
645
- sources_md += f"{source_count}. **{title}** - Page {page}\n"
646
-
647
- if source_count == 0:
648
- sources_md = "No source information available"
649
- else:
650
- sources_md = "No source information available"
651
-
652
- # Update history with response
653
- history[-1] = (last_message, response_text)
654
- return history, sources_md
655
-
656
- except Exception as e:
657
- logger.error(f"Error in generate_chat_response: {str(e)}")
658
- history[-1] = (last_message, f"Error: {str(e)}")
659
- return history, "Error retrieving sources"
660
-
661
-
662
- # Update PDF event handlers
663
- upload_btn.click( # Change from pdf_files.upload to upload_btn.click
664
- handle_pdf_upload,
665
- inputs=[pdf_files, collection_name],
666
- outputs=[pdf_status, current_collection],
667
- )
668
-
669
- # Fixed chat event handling
670
- chat_submit_btn.click(
671
- add_message,
672
- inputs=[chat_history, query_input],
673
- outputs=[chat_history, query_input],
674
- ).success(
675
- generate_chat_response,
676
- inputs=[chat_history, current_collection],
677
- outputs=[chat_history, source_info],
678
- )
679
-
680
- return demo
681
-
682
-
683
- demo = create_gr_interface()
684
 
685
  if __name__ == "__main__":
686
- # demo = create_gr_interface()
687
  demo.launch(share=True, debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import json
 
 
 
 
 
 
3
  from rag.rag_pipeline import RAGPipeline
4
+ from utils.prompts import highlight_prompt, evidence_based_prompt
5
+ from utils.prompts import (
6
+ sample_questions,
 
 
 
 
 
 
 
 
 
7
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ from config import STUDY_FILES
10
 
11
  # Cache for RAG pipelines
12
  rag_cache = {}
13
 
 
 
 
 
 
 
 
 
 
14
 
15
+ def get_rag_pipeline(study_name):
 
 
16
  if study_name not in rag_cache:
17
+ study_file = STUDY_FILES.get(study_name)
18
+ if study_file:
19
+ rag_cache[study_name] = RAGPipeline(study_file)
20
+ else:
21
  raise ValueError(f"Invalid study name: {study_name}")
 
 
 
 
 
 
 
 
22
  return rag_cache[study_name]
23
 
24
 
25
+ def query_rag(study_name: str, question: str, prompt_type: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  rag = get_rag_pipeline(study_name)
 
 
27
 
28
+ if prompt_type == "Highlight":
29
+ prompt = highlight_prompt
30
+ elif prompt_type == "Evidence-based":
31
+ prompt = evidence_based_prompt
32
+ else:
33
+ prompt = None
 
 
 
 
 
 
34
 
35
+ # Use the prepared context in the query
36
+ response = rag.query(question, prompt_template=prompt)
37
 
38
+ return response.response
39
 
 
 
 
 
 
40
 
41
+ def get_study_info(study_name):
42
+ study_file = STUDY_FILES.get(study_name)
43
+ if study_file:
44
+ with open(study_file, "r") as f:
45
+ data = json.load(f)
46
+ return f"**Number of documents:** {len(data)}\n\n**First document title:** {data[0]['title']}"
47
+ else:
48
+ return "Invalid study name"
 
 
 
 
 
 
 
 
 
 
49
 
 
 
 
50
 
51
+ def update_sample_questions(study_name):
52
+ return gr.Dropdown(choices=sample_questions.get(study_name, []), interactive=True)
53
 
 
 
 
 
 
 
54
 
55
+ with gr.Blocks() as demo:
56
+ gr.Markdown("# RAG Pipeline Demo")
 
 
 
 
 
 
57
 
58
+ with gr.Row():
59
+ study_dropdown = gr.Dropdown(
60
+ choices=list(STUDY_FILES.keys()), label="Select Study"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  )
62
+ study_info = gr.Markdown(label="Study Information")
63
 
64
+ study_dropdown.change(get_study_info, inputs=[study_dropdown], outputs=[study_info])
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
+ with gr.Row():
67
+ question_input = gr.Textbox(label="Enter your question")
68
+ sample_question_dropdown = gr.Dropdown(
69
+ choices=[], label="Sample Questions", interactive=True
70
  )
71
 
72
+ study_dropdown.change(
73
+ update_sample_questions,
74
+ inputs=[study_dropdown],
75
+ outputs=[sample_question_dropdown],
76
+ )
77
+ sample_question_dropdown.change(
78
+ lambda x: x, inputs=[sample_question_dropdown], outputs=[question_input]
79
+ )
80
+
81
+ prompt_type = gr.Radio(
82
+ [
83
+ "Default",
84
+ "Highlight",
85
+ "Evidence-based",
86
+ ],
87
+ label="Prompt Type",
88
+ value="Default",
89
+ )
90
+
91
+ submit_button = gr.Button("Submit")
92
+
93
+ answer_output = gr.Markdown(label="Answer")
94
+
95
+ submit_button.click(
96
+ query_rag,
97
+ inputs=[study_dropdown, question_input, prompt_type],
98
+ outputs=[answer_output],
99
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  if __name__ == "__main__":
 
102
  demo.launch(share=True, debug=True)
bin/cfn/ecs-delete DELETED
@@ -1,14 +0,0 @@
1
- #! /usr/bin/env bash
2
- set -e # stop the execution of the script if it fails
3
-
4
- CONFIG_PATH="/Users/patrickcmd/Projects/sunbirdai/Acres/infra/ecs_config.toml"
5
-
6
-
7
- REGION=$(cfn-toml key deploy.region -t $CONFIG_PATH)
8
- STACK_NAME=$(cfn-toml key deploy.stack_name -t $CONFIG_PATH)
9
-
10
-
11
- aws cloudformation delete-stack \
12
- --stack-name $STACK_NAME \
13
- --region $REGION \
14
- --profile sunbirdai
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bin/cfn/ecs-deploy DELETED
@@ -1,25 +0,0 @@
1
- #! /usr/bin/env bash
2
- set -e # stop the execution of the script if it fails
3
-
4
- CFN_PATH="/Users/patrickcmd/Projects/sunbirdai/Acres/infra/ecs_fargate.yml"
5
- CONFIG_PATH="/Users/patrickcmd/Projects/sunbirdai/Acres/infra/ecs_config.toml"
6
- echo $CFN_PATH
7
-
8
- cfn-lint $CFN_PATH
9
-
10
- BUCKET=$(cfn-toml key deploy.bucket -t $CONFIG_PATH)
11
- REGION=$(cfn-toml key deploy.region -t $CONFIG_PATH)
12
- STACK_NAME=$(cfn-toml key deploy.stack_name -t $CONFIG_PATH)
13
- PARAMETERS=$(cfn-toml params v2 -t $CONFIG_PATH)
14
-
15
- aws cloudformation deploy \
16
- --stack-name $STACK_NAME \
17
- --s3-bucket $BUCKET \
18
- --s3-prefix acres-rag \
19
- --region $REGION \
20
- --template-file "$CFN_PATH" \
21
- --no-execute-changeset \
22
- --tags group=acres-rag \
23
- --parameter-overrides $PARAMETERS \
24
- --capabilities CAPABILITY_NAMED_IAM \
25
- --profile acres
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bin/cfn/ecs-deploy-update-api DELETED
@@ -1,22 +0,0 @@
1
- #! /usr/bin/env bash
2
-
3
- set -e
4
-
5
- CLUSTER_NAME="rag-ecs-cluster"
6
- FASTAPI_SERVICE_NAME="dev-acres-fastapi"
7
- TASK_FASTAPI_DEFINTION_FAMILY="dev-acres-fastapi"
8
-
9
-
10
- LATEST_FASTAPI_TASK_DEFINITION_ARN=$(aws ecs describe-task-definition \
11
- --task-definition $TASK_FASTAPI_DEFINTION_FAMILY \
12
- --query 'taskDefinition.taskDefinitionArn' \
13
- --output text)
14
-
15
- echo "TASK DEF ARN:"
16
- echo $LATEST_FASTAPI_TASK_DEFINITION_ARN
17
-
18
- aws ecs update-service \
19
- --cluster $CLUSTER_NAME \
20
- --service $FASTAPI_SERVICE_NAME \
21
- --task-definition $LATEST_FASTAPI_TASK_DEFINITION_ARN \
22
- --force-new-deployment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bin/cfn/ecs-deploy-update-gradio DELETED
@@ -1,22 +0,0 @@
1
- #! /usr/bin/env bash
2
-
3
- set -e
4
-
5
- CLUSTER_NAME="rag-ecs-cluster"
6
- SERVICE_NAME="dev-acres-gradio"
7
- TASK_GRADIO_DEFINTION_FAMILY="dev-acres-gradio"
8
-
9
-
10
- LATEST_GRADIO_TASK_DEFINITION_ARN=$(aws ecs describe-task-definition \
11
- --task-definition $TASK_GRADIO_DEFINTION_FAMILY \
12
- --query 'taskDefinition.taskDefinitionArn' \
13
- --output text)
14
-
15
- echo "TASK DEF ARN:"
16
- echo $LATEST_GRADIO_TASK_DEFINITION_ARN
17
-
18
- aws ecs update-service \
19
- --cluster $CLUSTER_NAME \
20
- --service $SERVICE_NAME \
21
- --task-definition $LATEST_GRADIO_TASK_DEFINITION_ARN \
22
- --force-new-deployment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
commands.md DELETED
@@ -1,53 +0,0 @@
1
- docker network create gradio-fastapi-network
2
-
3
- docker run -it -p 7860:7860 --rm --name gradio --network=gradio-fastapi-network gradio-app
4
-
5
- docker run -it -p 7860:7860 --rm --name gradio --network=gradio-fastapi-network gradio-app-prod
6
-
7
-
8
- export AWS_DEFAULT_REGION=us-east-1
9
- export AWS_ACCOUNT_ID=2244276xxxxx
10
- aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
11
-
12
- aws ecr create-repository \
13
- --repository-name gradio-python \
14
- --image-tag-mutability MUTABLE
15
-
16
- export ECR_PYTHON_URL="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/gradio-python"
17
- echo $ECR_PYTHON_URL
18
-
19
- docker pull python:3.11.10-slim
20
- docker tag python:3.11.10-slim $ECR_PYTHON_URL:3.11.10-slim
21
-
22
- docker push $ECR_PYTHON_URL:3.11.10-slim
23
-
24
-
25
- aws ecr create-repository \
26
- --repository-name gradio-app-prod \
27
- --image-tag-mutability MUTABLE
28
-
29
- export ECR_BACKEND_GRADIO_URL="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/gradio-app-prod"
30
- echo $ECR_BACKEND_GRADIO_URL
31
-
32
- docker build --build-arg AWS_ACCOUNT_ID=2244276xxxxx -t your-image-name .
33
- docker build -f Dockerfile.gradio.prod -t gradio-app-prod .
34
-
35
- docker build --build-arg AWS_ACCOUNT_ID=$AWS_ACCOUNT_ID -f Dockerfile.gradio.prod -t gradio-app-prod .
36
- docker tag gradio-app-prod:latest "${ECR_BACKEND_GRADIO_URL}:latest"
37
- docker push "${ECR_BACKEND_GRADIO_URL}:latest"
38
-
39
-
40
- docker build -f Dockerfile.api -t fastapi-app .
41
- docker run -it -p 8000:8000 --rm --name fastapi --network=gradio-fastapi-network fastapi-app
42
-
43
- aws ecr create-repository \
44
- --repository-name fastapi-api-prod \
45
- --image-tag-mutability MUTABLE
46
-
47
- export ECR_BACKEND_FASTAPI_URL="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/fastapi-api-prod"
48
- echo $ECR_BACKEND_FASTAPI_URL
49
-
50
- docker build --build-arg AWS_ACCOUNT_ID=$AWS_ACCOUNT_ID -f Dockerfile.api.prod -t fastapi-api-prod .
51
- docker tag fastapi-api-prod:latest "${ECR_BACKEND_FASTAPI_URL}:latest"
52
- docker push "${ECR_BACKEND_FASTAPI_URL}:latest"
53
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.py CHANGED
@@ -1,14 +1,9 @@
1
- # config.py
2
-
3
  import os
4
 
5
- from dotenv import load_dotenv
6
-
7
- from utils.helpers import read_study_files
8
-
9
- load_dotenv()
10
-
11
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
12
 
13
-
14
- STUDY_FILES = read_study_files(("study_files.json"))
 
 
 
 
 
 
1
  import os
2
 
 
 
 
 
 
 
3
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
4
 
5
+ STUDY_FILES = {
6
+ "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
7
+ "Ebola Virus": "data/ebola_virus_zotero_items.json",
8
+ "Gene Xpert": "data/gene_xpert_zotero_items.json",
9
+ }
config/pdf_config.yaml DELETED
File without changes
data/.keep DELETED
File without changes
data/ebola_virus_zotero_items.json CHANGED
@@ -11,7 +11,7 @@
11
  "Marco Tuccori"
12
  ],
13
  "doi": "",
14
- "date": "2020",
15
  "item_type": "journalArticle",
16
  "url": ""
17
  },
@@ -28,7 +28,7 @@
28
  "Sina Bavari"
29
  ],
30
  "doi": "10.1146/annurev-pharmtox-010716-105055",
31
- "date": "2017-01-06",
32
  "item_type": "journalArticle",
33
  "url": "https://www.annualreviews.org/doi/10.1146/annurev-pharmtox-010716-105055"
34
  },
@@ -48,7 +48,7 @@
48
  "Xiangguo Qiu"
49
  ],
50
  "doi": "10.1016/j.molmed.2017.07.002",
51
- "date": "09/2017",
52
  "item_type": "journalArticle",
53
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S1471491417301090"
54
  },
@@ -66,7 +66,7 @@
66
  "Robert L. Gottlieb"
67
  ],
68
  "doi": "10.1038/s41577-021-00542-x",
69
- "date": "06/2021",
70
  "item_type": "journalArticle",
71
  "url": "https://www.nature.com/articles/s41577-021-00542-x"
72
  },
@@ -85,7 +85,7 @@
85
  "M. P. Grobusch"
86
  ],
87
  "doi": "10.1080/13543784.2016.1240785",
88
- "date": "2016-11-01",
89
  "item_type": "journalArticle",
90
  "url": "https://www.tandfonline.com/doi/full/10.1080/13543784.2016.1240785"
91
  },
@@ -99,7 +99,7 @@
99
  "Luciana Borio"
100
  ],
101
  "doi": "10.1128/microbiolspec.EI10-0014-2016",
102
- "date": "2016-05-06",
103
  "item_type": "journalArticle",
104
  "url": "https://journals.asm.org/doi/10.1128/microbiolspec.EI10-0014-2016"
105
  },
@@ -117,7 +117,7 @@
117
  "Larry Zeitlin"
118
  ],
119
  "doi": "10.3233/HAB-150284",
120
- "date": "2015-12-23",
121
  "item_type": "journalArticle",
122
  "url": "https://www.medra.org/servlet/aliasResolver?alias=iospress&doi=10.3233/HAB-150284"
123
  },
@@ -130,7 +130,7 @@
130
  "Olivier Garraud"
131
  ],
132
  "doi": "10.1016/j.transci.2016.12.014",
133
- "date": "02/2017",
134
  "item_type": "journalArticle",
135
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S1473050216302002"
136
  },
@@ -144,7 +144,7 @@
144
  "K Karunamoorthi"
145
  ],
146
  "doi": "",
147
- "date": "",
148
  "item_type": "journalArticle",
149
  "url": ""
150
  },
@@ -162,7 +162,7 @@
162
  "Larry Zeitlin"
163
  ],
164
  "doi": "10.3233/HAB-150284",
165
- "date": "2015-12-23",
166
  "item_type": "journalArticle",
167
  "url": ""
168
  },
@@ -176,7 +176,7 @@
176
  "Aaron A. R. Tobian"
177
  ],
178
  "doi": "10.1111/trf.12913",
179
- "date": "2014-12",
180
  "item_type": "journalArticle",
181
  "url": ""
182
  },
@@ -194,7 +194,7 @@
194
  "Robert L. Gottlieb"
195
  ],
196
  "doi": "10.1038/s41577-021-00542-x",
197
- "date": "2021-06",
198
  "item_type": "journalArticle",
199
  "url": ""
200
  },
@@ -210,7 +210,7 @@
210
  "Marco Tuccori"
211
  ],
212
  "doi": "10.1128/CMR.00072-20",
213
- "date": "2020-09-16",
214
  "item_type": "journalArticle",
215
  "url": ""
216
  },
@@ -223,7 +223,7 @@
223
  "Olivier Garraud"
224
  ],
225
  "doi": "10.1016/j.transci.2016.12.014",
226
- "date": "2017-02",
227
  "item_type": "journalArticle",
228
  "url": ""
229
  },
@@ -237,7 +237,7 @@
237
  "Luciana Borio"
238
  ],
239
  "doi": "10.1128/microbiolspec.EI10-0014-2016",
240
- "date": "2016-06",
241
  "item_type": "journalArticle",
242
  "url": ""
243
  },
@@ -257,7 +257,7 @@
257
  "Xiangguo Qiu"
258
  ],
259
  "doi": "10.1016/j.molmed.2017.07.002",
260
- "date": "2017-09",
261
  "item_type": "journalArticle",
262
  "url": ""
263
  },
@@ -274,7 +274,7 @@
274
  "Sina Bavari"
275
  ],
276
  "doi": "10.1146/annurev-pharmtox-010716-105055",
277
- "date": "2017-01-06",
278
  "item_type": "journalArticle",
279
  "url": ""
280
  }
 
11
  "Marco Tuccori"
12
  ],
13
  "doi": "",
14
+ "year": null,
15
  "item_type": "journalArticle",
16
  "url": ""
17
  },
 
28
  "Sina Bavari"
29
  ],
30
  "doi": "10.1146/annurev-pharmtox-010716-105055",
31
+ "year": null,
32
  "item_type": "journalArticle",
33
  "url": "https://www.annualreviews.org/doi/10.1146/annurev-pharmtox-010716-105055"
34
  },
 
48
  "Xiangguo Qiu"
49
  ],
50
  "doi": "10.1016/j.molmed.2017.07.002",
51
+ "year": null,
52
  "item_type": "journalArticle",
53
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S1471491417301090"
54
  },
 
66
  "Robert L. Gottlieb"
67
  ],
68
  "doi": "10.1038/s41577-021-00542-x",
69
+ "year": null,
70
  "item_type": "journalArticle",
71
  "url": "https://www.nature.com/articles/s41577-021-00542-x"
72
  },
 
85
  "M. P. Grobusch"
86
  ],
87
  "doi": "10.1080/13543784.2016.1240785",
88
+ "year": null,
89
  "item_type": "journalArticle",
90
  "url": "https://www.tandfonline.com/doi/full/10.1080/13543784.2016.1240785"
91
  },
 
99
  "Luciana Borio"
100
  ],
101
  "doi": "10.1128/microbiolspec.EI10-0014-2016",
102
+ "year": null,
103
  "item_type": "journalArticle",
104
  "url": "https://journals.asm.org/doi/10.1128/microbiolspec.EI10-0014-2016"
105
  },
 
117
  "Larry Zeitlin"
118
  ],
119
  "doi": "10.3233/HAB-150284",
120
+ "year": null,
121
  "item_type": "journalArticle",
122
  "url": "https://www.medra.org/servlet/aliasResolver?alias=iospress&doi=10.3233/HAB-150284"
123
  },
 
130
  "Olivier Garraud"
131
  ],
132
  "doi": "10.1016/j.transci.2016.12.014",
133
+ "year": null,
134
  "item_type": "journalArticle",
135
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S1473050216302002"
136
  },
 
144
  "K Karunamoorthi"
145
  ],
146
  "doi": "",
147
+ "year": null,
148
  "item_type": "journalArticle",
149
  "url": ""
150
  },
 
162
  "Larry Zeitlin"
163
  ],
164
  "doi": "10.3233/HAB-150284",
165
+ "year": null,
166
  "item_type": "journalArticle",
167
  "url": ""
168
  },
 
176
  "Aaron A. R. Tobian"
177
  ],
178
  "doi": "10.1111/trf.12913",
179
+ "year": null,
180
  "item_type": "journalArticle",
181
  "url": ""
182
  },
 
194
  "Robert L. Gottlieb"
195
  ],
196
  "doi": "10.1038/s41577-021-00542-x",
197
+ "year": null,
198
  "item_type": "journalArticle",
199
  "url": ""
200
  },
 
210
  "Marco Tuccori"
211
  ],
212
  "doi": "10.1128/CMR.00072-20",
213
+ "year": null,
214
  "item_type": "journalArticle",
215
  "url": ""
216
  },
 
223
  "Olivier Garraud"
224
  ],
225
  "doi": "10.1016/j.transci.2016.12.014",
226
+ "year": null,
227
  "item_type": "journalArticle",
228
  "url": ""
229
  },
 
237
  "Luciana Borio"
238
  ],
239
  "doi": "10.1128/microbiolspec.EI10-0014-2016",
240
+ "year": null,
241
  "item_type": "journalArticle",
242
  "url": ""
243
  },
 
257
  "Xiangguo Qiu"
258
  ],
259
  "doi": "10.1016/j.molmed.2017.07.002",
260
+ "year": null,
261
  "item_type": "journalArticle",
262
  "url": ""
263
  },
 
274
  "Sina Bavari"
275
  ],
276
  "doi": "10.1146/annurev-pharmtox-010716-105055",
277
+ "year": null,
278
  "item_type": "journalArticle",
279
  "url": ""
280
  }
data/ebscohost_zotero_items.json DELETED
The diff for this file is too large to render. See raw diff
 
data/exportedris-file-1-of-1-1_zotero_items.json DELETED
@@ -1,813 +0,0 @@
1
- [
2
- {
3
- "key": "B4DGMAWK",
4
- "title": "Afghanistan Safety Nets Evaluation",
5
- "abstract": "The primary objective of this evaluation is to estimate the causal impact of an unconditional cash transfer on the ability of the poorest families in select districts in Afghanistan to smooth consumption during seasonal shocks (e.g., winter-related, agricultural, and water shocks). Secondarily, the study will assess intervention effects on other economic outcomes, ownership and preservation of assets, child wellbeing (including nutrition, education, and refrainment from labor), migration, psychological well-being, and confidence in institutions. The target population consists of the poorest quintile of families with children under the age of five. Unconditional cash transfers will be provided to poor families in three installments: before, at the beginning, and at the end of the lean season. The annual benefit will amount to approximately 8,500 AFN (125 USD) for families with one more child under five; each payment installment is of equal amount.",
6
- "full_text": "",
7
- "authors": [
8
- "Matthew Morton",
9
- "Lucian Bucur Pop"
10
- ],
11
- "doi": "",
12
- "year": null,
13
- "item_type": "journalArticle",
14
- "url": "https://ridie.3ieimpact.org/index.php?r=search/detailView&id=404"
15
- },
16
- {
17
- "key": "P2VD3QWB",
18
- "title": "Financial Education and Financial Access for Transnational Households: Field Experimental Evidence from the Philippines",
19
- "abstract": "We implemented a randomized controlled trial among transnational households in the Philippines estimating impacts of a financial education treatment, a financial access treatment, and the combination of the two on financial behaviors. We test whether there are complementarities between both interventions and provide insight into the nature of constraints operating in financial services markets. We find no evidence of complementarities between the financial education and access treatments. In addition, while we find no evidence of constraints in access to formal credit and savings products, our results suggest that access constraints exist in the formal insurance market. Impacts on other financial behaviors are suggestive of the importance of information constraints in financial decision-making. These results provide guidance to designers of financial interventions in similar populations.",
20
- "full_text": "",
21
- "authors": [
22
- "Abarcar P",
23
- "Barua R",
24
- "Yang D"
25
- ],
26
- "doi": "10.1086/703045",
27
- "year": null,
28
- "item_type": "journalArticle",
29
- "url": ""
30
- },
31
- {
32
- "key": "QD6ZPX6I",
33
- "title": "Labor Supply Responses to Large Social Transfers: Longitudinal Evidence from South Africa",
34
- "abstract": "We quantify the labor supply responses of prime-aged adults to the presence of pensioners in their households, using longitudinal data collected in South Africa. We compare households and individuals before and after pension receipt and pension loss, which allows us to control for a host of unobservable household and individual characteristics that may determine labor market behavior. We,find large cash transfers to the elderly lead to increased employment among prime-aged adults, which occurs primarily through labor migration. The pension's impact is attributable to the increase in household resources it represents, which can be used to stake migrants until they become self-sufficient, and to the presence of pensioners who can care for small children, which allows prime-aged adults to look for work elsewhere. (JEL H23, H55, I38, J22, O15)",
35
- "full_text": "",
36
- "authors": [
37
- "Ardington C",
38
- "Case A",
39
- "Hosegood V"
40
- ],
41
- "doi": "10.1257/app.1.1.22",
42
- "year": null,
43
- "item_type": "journalArticle",
44
- "url": ""
45
- },
46
- {
47
- "key": "VUJDLBR5",
48
- "title": "Aid programs' unintended effects: The case of Progresa and migration",
49
- "abstract": "This paper analyzes the effect of aid on international and domestic migration and explores the causal effect of income on migration. The theoretical model predicts that the effect of aid \u2026",
50
- "full_text": "",
51
- "authors": [
52
- "Angelucci M"
53
- ],
54
- "doi": "",
55
- "year": null,
56
- "item_type": "journalArticle",
57
- "url": "https://papers.ssrn.com/sol3/papers.cfm?abstract_id=868646"
58
- },
59
- {
60
- "key": "29J6T354",
61
- "title": "Information, Intermediaries, and International Migration",
62
- "abstract": "Job seekers face substantial information frictions, especially in international labor markets where intermediaries match prospective migrants with overseas employers. We conducted a randomized trial in Indonesia to explore how information about intermediary quality shapes migration outcomes. Holding access to information about the return to choosing a high-quality intermediary constant, intermediary-specific quality disclosure reduces the migration rate, cutting use of low-quality providers. Workers who do migrate receive better pre-departure preparation and have improved experiences abroad, despite no change in occupation or destination. These results are not driven by changes in beliefs about average provider quality or the return to migration. Nor does selection explain improved outcomes for those who migrate with quality disclosure. Together, our findings are consistent with an increase in the option value of search: with better ability to differentiate offer quality, workers search longer, select higher-quality intermediaries, and ultimately have better migration experiences.",
63
- "full_text": "",
64
- "authors": [
65
- "Bazzi Samuel",
66
- "Cameron Lisa",
67
- "Schaner Simone",
68
- "Witoelar Firman"
69
- ],
70
- "doi": "",
71
- "year": null,
72
- "item_type": "journalArticle",
73
- "url": ""
74
- },
75
- {
76
- "key": "YIL3BQE5",
77
- "title": "An Adaptive Targeted Field Experiment: Job Search Assistance for Refugees in Jordan",
78
- "abstract": "We introduce an adaptive targeted treatment assignment methodology for field experiments. Our Tempered Thompson Algorithm balances the goals of maximizing the precision of treatment effect estimates and maximizing the welfare of experimental participants. A hierarchical Bayesian model allows us to adaptively target treatments. We implement our methodology in Jordan, testing policies to help Syrian refugees and local jobseekers to find work. The immediate employment impacts of a small cash grant, information and psychological support are small, but targeting raises employment by 1 percentage-point (20%). After four months, cash has a sizable effect on employment and earnings of Syrians.",
79
- "full_text": "",
80
- "authors": [
81
- "Caria Stefano",
82
- "Gordon Grant",
83
- "Kasy Maximilian",
84
- "Quinn Simon",
85
- "Shami Soha",
86
- "Teytelboym Alexander"
87
- ],
88
- "doi": "",
89
- "year": null,
90
- "item_type": "journalArticle",
91
- "url": ""
92
- },
93
- {
94
- "key": "JINCDGIG",
95
- "title": "Social Protection and Labor Market Outcomes of Youth in South Africa",
96
- "abstract": "An Apartheid-driven spatial mismatch between workers and jobs leads to high job search costs for people living in rural areas of South Africa--costs that many young people cannot pay. In this article, the authors examine whether the arrival of a social grant--specifically a generous state-funded old-age pension given to men and women above prime age--enhances the ability of young men in rural areas to seek better work opportunities elsewhere. Based on eight waves of socioeconomic data on household living arrangements and household members' characteristics and employment status, collected between 2001 and 2011 at a demographic surveillance site in KwaZulu-Natal, the authors find that young men are significantly more likely to become labor migrants when someone in their household becomes age-eligible for the old-age pension. But this effect applies only to those who have completed high school (matric), who are on average 8 percentage points more likely to migrate for work when their households become pension eligible, compared with other potential labor migrants. The authors also find that, upon pension loss, it is the youngest migrants who are the most likely to return to their sending households, perhaps because they are the least likely to be self-sufficient at the time the pension is lost. The evidence is consistent with binding credit constraints limiting young men from poorer households from seeking more lucrative work elsewhere.",
97
- "full_text": "",
98
- "authors": [
99
- "Ardington Cally",
100
- "Barnighausen Till",
101
- "Case Anne",
102
- "Menendez Alicia"
103
- ],
104
- "doi": "",
105
- "year": null,
106
- "item_type": "journalArticle",
107
- "url": ""
108
- },
109
- {
110
- "key": "F3ZV6MYR",
111
- "title": "Unilateral Facilitation Does Not Raise International Labor Migration From The Philippines",
112
- "abstract": "Significant income gains from migrating from poorer to richer countries have motivated unilateral (source-country) policies facilitating labor emigration. However, their effectiveness is unknown. We conducted a large-scale randomized experiment in the Philippines testing the impact of unilaterally facilitating international labor migration. Our most intensive treatment doubled the rate of job offers but had no identifiable effect on international labor migration. Even the highest overseas job-search rate we induced (22%), falls far short of the share initially expressing interest in migrating (34%). We conclude that unilateral migration facilitation will at most induce a trickle, not a flood, of additional emigration.",
113
- "full_text": "",
114
- "authors": [
115
- "Beam Emily A",
116
- "Mckenzie David",
117
- "Yang Dean"
118
- ],
119
- "doi": "10.1086/683999",
120
- "year": null,
121
- "item_type": "journalArticle",
122
- "url": "https://www.journals.uchicago.edu/doi/10.1086/683999"
123
- },
124
- {
125
- "key": "7CUVDKRR",
126
- "title": "Long-term impacts of the oportunidades conditional cash transfer program on rural youth in Mexico",
127
- "abstract": "This paper studies the long-term effects of partici pation in the Mexican Oportunidades program on a\nvariety of outcomes and behaviors of rural youth in\nMexico. It analyzes data from a social experiment,\nwhich randomly phased-in the program in rural Mex ican villages. In 1997, 320 villages (the treatment\ngroup) were randomly selected for early incorpora tion into the program and 186 villages (the control\ngroup) were designated as a control group to be in corporated eighteen months later. This paper ex amines whether differential exposure to the program\nsignificantly impacted educational attainment, labor\nmarket outcomes, marriage, migration and cognitive\nachievement of youth. The results show positive im pacts of longer exposure on grades of schooling at tained, but no effects on achievement tests. With\nrespect to work, we find an overall reduction in work\nfor male youth.",
128
- "full_text": "",
129
- "authors": [
130
- "Behrman J R",
131
- "Parker S W",
132
- "Todd P E"
133
- ],
134
- "doi": "",
135
- "year": null,
136
- "item_type": "journalArticle",
137
- "url": ""
138
- },
139
- {
140
- "key": "PKYZCRJU",
141
- "title": "Climate and Resilience Impact Evaluation Window: Experimental Evidence from Several Countries",
142
- "abstract": "The concept of resilience has gained attention because it recognises the importance of addressing shorter-term humanitarian needs while simultaneously supporting communities to face future crises induced by climate change, conflict, and other factors. Many institutions, including the World Food Programme (WFP), have increasingly used the concept as a basis for their programming. WFP's integrated packages of interventions aim to improve food security and nutrition by smoothing food consumption in the short-term, while supporting livelihoods and addressing barriers to development (e.g., better climate information, access to markets, education, WASH, etc.) in the long-term. While all programme activities are potentially important for building resilience, livelihood activities are clearly connected to both immediate and future wellbeing. These activities include cash or in-kind transfers to the household and support for creating assets that could benefit the household or the community in the future. Therefore, livelihood activities have the potential to support households in improving and maintaining their wellbeing when facing future shocks and stressors. This pre-analysis plan describes policy experiments to estimate the impacts of experimentally varying WFP's activities on resilience as measured by community and household wellbeing. This approach follows others in conceptualizing resilience through changes in wellbeing (Knippenberg et al, 2019, Phadera et al. 2019, Jones and Tanner, 2017; Barrett et al, 2020). We design and run these experiments in the context of livelihoods programs implemented by the World Food Programme (WFP) across 6 countries. Beyond testing the overall impact of livelihood activities on wellbeing, a key ambition of this paper is to investigate whether activities themselves can be timed to accommodate households? vulnerability to seasonal fluctuations and shocks that are often connected to weather patterns and agricultural cycle. We identify two such mechanisms: adjusting the timing of cash transfers and labor requirements; and/or allowing for re-targeting participants over time to account for changes in vulnerability status.",
143
- "full_text": "",
144
- "authors": [],
145
- "doi": "10.1257/rct.6851-3.1",
146
- "year": null,
147
- "item_type": "journalArticle",
148
- "url": "https://www.socialscienceregistry.org/trials/6851"
149
- },
150
- {
151
- "key": "IDE53VC5",
152
- "title": "Migration policy: did an emergency provision displace standard rules? Evidence from Italy",
153
- "abstract": "In 2011, to manage the exceptional flow of people escaping North Africa, the Italian government released the North Africa Emergency (Emergenza Nord Africa, ENA) provision, temporarily relaxing migration policies for some categories of asylum seekers. Using data from an important charity, we perform baseline difference-in-differences regressions to investigate the impact of this emergency rule on the probability of migrants regularizing their legal status. We exploit the timing of the enactment of the ENA accessibility criteriasuch that potential beneficiaries learned of its existence only after the realization of the state of entitlementto identify the effects of the emergency policy provision on treated and control groups of immigrants. The results show an increased number of successful applications in favor of eligible individuals, although a dramatic boost in the denial rate for other migrants is also observed. This suggests either that some migrants suffered displacement due to the emergency rule, and/or that improper submissions of ENA-oriented applications have occurred. We extend our analysis to the use of multilevel models to shed light on these possible (non-mutually excludable) explanations. Results seem to support the presence of some rule-displacement effects, although the existence of a set of wrongful submissions cannot be excluded. We discuss these possibilities from a policy perspective.",
154
- "full_text": "",
155
- "authors": [
156
- "dalla Pellegrina L",
157
- "Saraceno M",
158
- "Suardi M"
159
- ],
160
- "doi": "10.1007/s40888-018-0128-0",
161
- "year": null,
162
- "item_type": "journalArticle",
163
- "url": ""
164
- },
165
- {
166
- "key": "44ICCPGX",
167
- "title": "Does the Role Model Encourage Female Labor Force Participation? Field Experiment in Bangladesh",
168
- "abstract": "Enhancing female labor force participation is considered key to economic growth, poverty alleviation, and women?s empowerment. Despite its importance both at the national and household/individual levels, the female labor force participation rate remains low in South Asian countries. It has been a great interest for researchers and policy makers how to enhance female labor force participation in these countries. We investigate whether providing unmarried young women and their parents with information about working conditions through the role model working women encourages unmarried young women?s labor force participation in rural Bangladesh, and if so, how. For this objective, we conduct the randomized controlled trial.",
169
- "full_text": "",
170
- "authors": [],
171
- "doi": "10.1257/rct.4940-2.0",
172
- "year": null,
173
- "item_type": "journalArticle",
174
- "url": "https://www.socialscienceregistry.org/trials/4940"
175
- },
176
- {
177
- "key": "AEMAYDTW",
178
- "title": "Managing the impact of climate on migration: Evidence from Mexico",
179
- "abstract": "Although there is a growing literature on the impact of climate and weather-related events on migration, little is known about the mitigating effect of policies directed toward the agricultural sector, or aimed at insuring against environmental disasters. This paper uses state-level data on migration flows between Mexico and the USA from 1999 to 2012 to investigate the mitigating impact of an agricultural cash transfer program (PROCAMPO) and a disaster fund (Fonden) on the migration response to weather shocks. We find that Fonden decreases migration in response to heavy rainfall, hurricanes and droughts. Increases in PROCAMPO amounts paid to small producers play a more ambiguous role in the migration response to shocks. Changes in the distribution of PROCAMPO payments favoring more vulnerable producers in the non-irrigated ejido sector, however, seem to mitigate the impact of droughts on migration.",
180
- "full_text": "",
181
- "authors": [
182
- "Chort I",
183
- "Rupelle M de La"
184
- ],
185
- "doi": "10.1007/s00148-022-00894-1",
186
- "year": null,
187
- "item_type": "journalArticle",
188
- "url": "https://link.springer.com/article/10.1007/s00148-022-00894-1"
189
- },
190
- {
191
- "key": "74WHPTAN",
192
- "title": "Impact assessment of the Migrant Resource Centres in the Silk Routes Region",
193
- "abstract": "Executive summary Several Migrant Resource Centres (MRCs) have been set up in recent years jointly by national administrations of origin and destination countries and the \u2026",
194
- "full_text": "",
195
- "authors": [
196
- "Dennison J"
197
- ],
198
- "doi": "",
199
- "year": null,
200
- "item_type": "journalArticle",
201
- "url": "https://www.budapestprocess.org/wp-content/uploads/2022/12/ICMPD_MRC_impact_assessment_2022.pdf"
202
- },
203
- {
204
- "key": "EYAF27EQ",
205
- "title": "Employment and Irregular Migration: Evidence from Two Randomized Controlled Trials in Egypt",
206
- "abstract": "Addressing the root causes of irregular migration has become a key policy priority in Europe. The EU Emergency Trust Fund for Africa (EUTF) was launched in 2015 with a budget of 5 billion euros to support projects aiming at deterring irregular migration flows from 26 origin countries. One of these projects is implemented by the Egyptian Micro, Small and Medium Enterprises Development Agency (MSMEDA) and targets unemployed youth in areas with the highest outflows of irregular migrants. We implement two randomized evaluations to assess whether (i) cash-for-work opportunities, and (ii) training and employment support have the intended effects on the direct beneficiaries and their relatives (household decision-maker, other household members, children, and friends). We assess impacts on people?s preferences and attitudes towards migration, irregular migration, as well as changes in their situation, aspirations, and expectations.",
207
- "full_text": "",
208
- "authors": [],
209
- "doi": "10.1257/rct.10604-1.0",
210
- "year": null,
211
- "item_type": "journalArticle",
212
- "url": "https://www.socialscienceregistry.org/trials/10604"
213
- },
214
- {
215
- "key": "BBGCWFN6",
216
- "title": "Enhancing Female Entrepreneurship through Cash Grants: Experimental Evidence from Rural Tunisia",
217
- "abstract": "This research is a product of the World Bank's Middle East and North Africa Gender Innovation Lab (MNAGIL), which conducts rigorous impact evaluations and inferential \u2026",
218
- "full_text": "",
219
- "authors": [
220
- "Zineb SB"
221
- ],
222
- "doi": "",
223
- "year": null,
224
- "item_type": "journalArticle",
225
- "url": "https://ericmvukiyehe.com/wp-content/uploads/2021/08/3-Enhancing-Female-Entrepreneurship-through-Cash-Grants-Experimental-Evidence-from-Rural-Tunisia-1.pdf"
226
- },
227
- {
228
- "key": "LU7SCHNY",
229
- "title": "Estimating the Impacts of Volunteer vs. Paraprofessional Community Worker Interventions among Conditional Cash Transfer Recipients in the Dominican Republic",
230
- "abstract": "This project seeks to estimate the impacts of providing social intermediation services to poor recipients of conditional cash transfers (CCT) in the Dominican Republic (DR). Employing a randomized control trial design, the project will randomly assign either i) referred volunteer, ii) publicly-recruited volunteer, or iii) university-recruited paid paraprofessional \"community workers\" to provide monthly visits to newly enrolled and existing CCT beneficiaries. Households in one arm of the study will only receive cash transfers and will not receive home visits by community workers and an additional arm will include households that are pure controls. Key outcomes of interest include beneficiaries' compliance with program conditionalities, household consumption, primary and secondary school attendance, use of health and other local public services, participation in vocational training, labor market participation, and knowledge of the wellness curriculum administered during the monthly household visits. Data on these key outcomes will be collected both administratively by the DR government and through conducting midline and endline household surveys. In addition to estimating impacts on beneficiary households, the project is designed to estimate the impacts of the community worker employment experience on community worker candidates themselves in terms of their own income, political and community engagement, leadership activities, and subjective wellbeing. Specifically, volunteer and paid employment offers will be randomly offered among those community worker candidates screened into the selection process.",
231
- "full_text": "",
232
- "authors": [],
233
- "doi": "10.1257/rct.1778-1.0",
234
- "year": null,
235
- "item_type": "journalArticle",
236
- "url": "https://www.socialscienceregistry.org/trials/1778"
237
- },
238
- {
239
- "key": "CDAYBE26",
240
- "title": "Evaluation of a Program for the Professionalization of Artisans (ProfArts) in Ghana",
241
- "abstract": "Training and professionalization interventions are an important vehicle for economic support within the development assistance landscape. Yet rigorous quantitative impact evaluations of such programs remain scarce, especially in developing countries. In order to help fill this research gap, we will conduct a rigorous impact evaluation of a program for the Professionalization of Artisans (ProfArts) in Ghana. The program will deliver top-up training, licensing, certification, and related benefits to up to 10,000 artisans drawn from the Ghanaian construction sector, with beneficiaries to be randomly selected from up to 20,000 baseline respondents. In a first step, we examine the effects of randomly assigned recruitment content on application rates, the composition of the applicant pool, and downstream program outcomes. In a second step, we use a randomized controlled trial to estimate effects of the program on four groups of outcomes: (i) employment, measured e.g. in terms of job retention, acquisition and lengths of employment spells, (ii) job quality and quality of life, including e.g. earnings and workplace conditions, (iii) mobility, e.g. migration intentions and behaviors, and (iv) firm-level outcomes, e.g. firm performance and employment.",
242
- "full_text": "",
243
- "authors": [],
244
- "doi": "10.1257/rct.6842-1.0",
245
- "year": null,
246
- "item_type": "journalArticle",
247
- "url": "https://www.socialscienceregistry.org/trials/6842"
248
- },
249
- {
250
- "key": "QMSMF872",
251
- "title": "Evaluation of the Network for Enterprise Development Learning through Sewing for Girls (N4G) training program for young women in Ghana",
252
- "abstract": "Skills trainings are often a central component of contemporary development assistance strategies, but much of the evidence base for their efficacy comes from high-income countries. RCTs in developing countries are still far less numerous. We contribute to this literature with a rigorous impact evaluation of a program for the empowerment and education of young women in Ghana, called Network for Enterprise Development Learning through Sewing for Girls (N4G). The program will deliver fashion-industry related training and empowerment programs to up to 1,000 underprivileged Ghanaian young women from both urban and rural areas. Beneficiaries are randomly selected from approximately 2,000 baseline respondents. We use a randomized controlled trial to estimate effects of the N4G program on four groups of outcomes: (i) employment, measured for example in terms of employment status, job acquisition, and lengths of employment spells, (ii) job quality, including earnings, having a contract or an oral agreement, job satisfaction, and workplace conditions among others, (iii) quality of life, covering indicators of the current living situation, household assets, and financial dependency, and (iv) mobility, e.g. migration intentions and behaviors.",
253
- "full_text": "",
254
- "authors": [],
255
- "doi": "10.1257/rct.7967-1.0",
256
- "year": null,
257
- "item_type": "journalArticle",
258
- "url": "https://www.socialscienceregistry.org/trials/7967"
259
- },
260
- {
261
- "key": "2WUKVSZA",
262
- "title": "The Impact of Tourist Visas on Intercontinental South-South Migration: Ecuador\u2019s Policy of \u201cOpen Doors\u201d as a Quasi-Experiment",
263
- "abstract": "Through the implementation of universal visa freedom from 2008 to 2010, Ecuador became one of the most accessible countries in the world. This article employs mixed methods to study the impact of the de facto opening of Ecuador\u2019s borders on intercontinental south-south migration. First, we use a difference-in-difference design to show that Ecuador\u2019s policy of universal visa freedom led to a significant increase of immigration from previously restricted nationalities in Africa, Asia, and the Caribbean. Complementary descriptive statistics and qualitative findings confirm the decisive impact visa freedom had on intercontinental south-south migration and suggest three main motives: taking advantage of Ecuador\u2019s open doors as an exit option from origin countries, settlement in Ecuador based on relatively improved opportunities, and transmigration to third countries. Our findings imply that travel visa policies of southern countries significantly impact which new south-south flows emerge. \u00a9 The Author(s) 2018.",
264
- "full_text": "",
265
- "authors": [
266
- "Freier L F",
267
- "Holloway K"
268
- ],
269
- "doi": "10.1177/0197918318801068",
270
- "year": null,
271
- "item_type": "journalArticle",
272
- "url": "https://www.scopus.com/inward/record.uri?eid=2-s2.0-85059900710&doi=10.1177%2f0197918318801068&partnerID=40&md5=60533a0c145229926d2e61b312680c19"
273
- },
274
- {
275
- "key": "YK492G5P",
276
- "title": "The REFANI Pakistan study\u2014a cluster randomised controlled trial of the effectiveness and cost-effectiveness of cash-based transfer programmes on child nutrition status: study protocol",
277
- "abstract": "Cash-based transfer programmes are an emerging strategy in the prevention of wasting in children, especially targeted at vulnerable households during periods of food insecurity or during emergencies. However, the evidence surrounding the use of either cash or voucher transfer programmes in the humanitarian context and on nutritional outcomes is elusive. More evidence is needed not only to inform the global community of practice on best practices in humanitarian settings, but also to help strengthen national mitigation responses. Methods/Design The Research for Food Assistance on Nutrition Impact Pakistan study (REFANI-P) sets out to evaluate the impact of three cash-based interventions on nutritional outcomes in children aged less than five years from poor and very poor households in Dadu District. This four-arm parallel cluster randomised controlled trial is set among Action Against Hunger (ACF) programme villages in Dadu District, Sindh Province. Mothers are the target recipients of either seasonal unconditional cash transfers or fresh food vouchers. A comparison group receives \u2018standard care\u2019 provided by the ACF programme to which all groups have the same access. The primary outcomes are prevalence of wasting and mean weight-for-height Z-score (WHZ) in children. Impact will be assessed at 6 months and at 1 year from baseline. Using a theory-based approach we will determine \u2018how\u2019 the different interventions work by looking at the processes involved and the impact pathways following the theory of change developed for this context. Quantitative and qualitative data are collected on morbidity, health seeking, hygiene and nutrition behaviours, dietary diversity, haemoglobin concentration, women\u2019s empowerment, household food security and expenditures and social capital. The direct and indirect costs of each intervention borne by the implementing organisation and their partners as well as by beneficiaries and their communities are also assessed. Discussion The results of this trial will provide robust evidence to help increase knowledge about the predictability of how different modalities of cash-based transfer work best to reduce the risk of child wasting during a season where food insecurity is at its highest. Evidence on costing and cost-effectiveness will further aid decisions on choice of modality in terms of effectiveness and sustainability.",
278
- "full_text": "",
279
- "authors": [
280
- "Fenn et al"
281
- ],
282
- "doi": "10.1186/s12889-015-2380-3",
283
- "year": null,
284
- "item_type": "journalArticle",
285
- "url": "https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-015-2380-3"
286
- },
287
- {
288
- "key": "4WXF37WF",
289
- "title": "How Overseas Opportunities Shape Political Preferences: A Field Experiment on International Migration",
290
- "abstract": "This paper demonstrates that access to overseas employment reduces support for taxation and redistribution by bolstering individuals\u2019 economic prospects. We present results from the \ufb01rst randomized controlled trial to result in international migration. Individuals who received the opportunity to migrate from India to the Middle East for work reported signi\ufb01cantly higher wages, greater economic con\ufb01dence, and more \ufb01scally conservative attitudes. Moreover, the program had lasting effects even for those who decided not to migrate, which we link to improved exit options. Our results speak to longstanding debates about the impact of globalization on economic development and welfare state politics.",
291
- "full_text": "",
292
- "authors": [
293
- "Gaikwad N",
294
- "Hanson K",
295
- "Toth A"
296
- ],
297
- "doi": "",
298
- "year": null,
299
- "item_type": "journalArticle",
300
- "url": "https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3816464"
301
- },
302
- {
303
- "key": "68ATB2KG",
304
- "title": "How Migrant Resource Centres affect migration decisions: Quasi-experimental evidence from Afghanistan, Bangladesh, Iraq and Pakistan",
305
- "abstract": "Several Migrant Resource Centres (MRCs) have been set up in South Asia jointly by national governments of origin and destination countries. Their objectives include encouragement of potential migrants to seek regular rather than irregular routes and to ensure the safety of those migrating, regardless of status. Of theoretical note, their activities utilise innovative, highly personalised counselling. This article provides quasi-experimental evidence of the effect of four activities - telephone and online counselling, and college outreach and pre-departure sessions - on 2215 randomly allocated users of the MRCs' services across six MRCs in four countries: Afghanistan, Bangladesh, Iraq and Pakistan. The effects are a large reduction in self-reported likelihood of migrating irregularly, and a strong increase in awareness of safe options and who to contact for assistance whilst migrating. The effects are consistent across activity type and MRC location, although magnitudes vary. These findings have implications for our understanding of how the decision to migrate is made, what interventions are effective and why.",
306
- "full_text": "",
307
- "authors": [
308
- "Dennison J"
309
- ],
310
- "doi": "10.1111/imig.13082",
311
- "year": null,
312
- "item_type": "journalArticle",
313
- "url": ""
314
- },
315
- {
316
- "key": "9YRUKMA5",
317
- "title": "The Apprenticeship-to-Work Transition : Experimental Evidence from Ghana",
318
- "abstract": "This paper examines the effects of a government-sponsored apprenticeship training program designed to address high levels of youth unemployment in Ghana. The study exploits randomized access to the program to examine the short-run effects of apprenticeship training on labor market outcomes. The results show that apprenticeships shift youth out of wage work and into self-employment. However, the loss of wage income is not offset by increases in self-employment profits in the short run. In addition, the study uses the randomized match between apprentices and training providers to examine the causal effect of characteristics of trainers on outcomes for apprentices. Participants who trained with the most experienced trainers or the most profitable ones had higher earnings. These increases more than offset the program's negative treatment effect on earnings. This suggests that training programs can be made more effective through better recruitment of trainers.",
319
- "full_text": "",
320
- "authors": [
321
- "Hardy Morgan L",
322
- "Mbiti Isaac Mulangu",
323
- "Mccasland Jamie Lee",
324
- "Salcher Isabelle"
325
- ],
326
- "doi": "",
327
- "year": null,
328
- "item_type": "journalArticle",
329
- "url": "https://www.proquest.com/working-papers/apprenticeship-work-transition-experimental/docview/2223061072/se-2 http://UnivofPretoria.on.worldcat.org/atoztitles/link?sid=ProQ:&issn=&volume=&issue=&title=IDEAS+Working+Paper+Series+from+RePEc&spage=&date=2019-01-01&atitle=The+Apprenticeship-to-Work+Transition+%3A+Experimental+Evidence+from+Ghana&au=Hardy%2C+Morgan+L%3BMbiti%2C+Isaac+Mulangu%3BMccasland%2C+Jamie+Lee%3BSalcher%2C+Isabelle&id=doi: https://ideas.repec.org/p/wbk/wbrwps/8851.html"
330
- },
331
- {
332
- "key": "2YVH2MHU",
333
- "title": "Get Rich or Die Tryin\u2019: Perceived Earnings, Perceived Mortality Rates, and Migration Decisions of Potential Work Migrants from Nepal",
334
- "abstract": "This article reports on a randomized field experiment in which potential work migrants from Nepal to Malaysia and the Persian Gulf countries are provided with information on wages and mortality incidences at their intended destinations. It is found that, particularly for the group of potential migrants without prior foreign migration experience, the information changes their expectations of earnings and mortality risks abroad, which further changes their actual migration decisions. Using the exogenous variation in expectations, it is estimated that the elasticity of migration with respect to mortality rate expectation is 0.8, and the elasticity of migration with respect to earnings expectation is 1.1.",
335
- "full_text": "",
336
- "authors": [],
337
- "doi": "",
338
- "year": null,
339
- "item_type": "journalArticle",
340
- "url": ""
341
- },
342
- {
343
- "key": "736V6NV8",
344
- "title": "Harnessing the Development Benefits of International Migration: A Randomized Evaluation of Enhanced Pre-Departure Orientation Seminars for Migrants from the Philippines",
345
- "abstract": "Pre-departure orientation seminars (PDOS) for migrants have the potential to become a key policy tool for increasing the benefits of migration. PDOS build on the fact that many migrants face important knowledge gaps with respect to various aspects of their destination country upon arrival. These knowledge gaps are particularly large for individuals who move from a developing to a developed country and have to navigate a completely different system. Many migrants may hence not be able to make optimal decisions, or only after costly learning. The principal idea of PDOS is to reduce these knowledge gaps early on and provide migrants with the right information to succeed abroad. There is currently no rigorous evidence on the effects of PDOS and on what kind of training modules matter. Using a randomized control trial, this project evaluates the effects of PDOS on migration outcomes of permanent migrants from the Philippines to the US. Together with the Commission on Filipinos Overseas (CFO), the key government agency tasked to manage permanent migration from the Philippines, we have developed new PDOS modules. The new PDOS aims to foster settlement and labor market integration and increase migrants? wellbeing more generally. It also aims to strengthen migrants? engagement in diaspora activities that contribute to development in the Philippines. We identify the effects of the new PDOS by randomly assigning migrants to different types of PDOS and tracking the impact on 1,273 migrants and their family members remaining in the Philippines over a period of two years after departure.",
346
- "full_text": "",
347
- "authors": [],
348
- "doi": "10.1257/rct.1389-2.1",
349
- "year": null,
350
- "item_type": "journalArticle",
351
- "url": "https://www.socialscienceregistry.org/trials/1389"
352
- },
353
- {
354
- "key": "CNXHNZ6K",
355
- "title": "Gender differences in the effects of vocational training: Constraints on women and drop-out behavior",
356
- "abstract": "We provide experimental evidence on the effects of vocational and entrepreneurial training for Malawian youth, in an environment where access to schooling and formal sector employment is extremely low. We track a large fraction of program drop-outs \u2013 a common phenomenon in the training evaluation literature \u2013 and this allows us to examine the determinants and consequences of drop-out and how it mediates the effects of such programs. We find that women make decisions in a more constrained environment, and their participation affected by family obligations. Participation is more expensive for them, resulting in worse training experience. The training results in skills development, continued investment in human capital, and improved well-being, with more positive effects for men, but no improvements in labor market outcomes in the short run.",
357
- "full_text": "",
358
- "authors": [],
359
- "doi": "10.1596/1813-9450-6545",
360
- "year": null,
361
- "item_type": "journalArticle",
362
- "url": ""
363
- },
364
- {
365
- "key": "P4QQT3NG",
366
- "title": "Impacts of Targeted Covid-19 Cash Transfers in Togo",
367
- "abstract": "In response to COVID-19, a third of social protection measures have taken the form of cash transfers reaching more than 1.1 billion people --- a 240% increase in coverage from pre-COVID levels. In the aftermath of the COVID-19 pandemic, direct cash transfers are an effective tool to protect vulnerable households. We conduct a randomized controlled trial (RCT) of a targeted cash transfer program implemented in rural Togo between November 2020 and May 2021. In collaboration with GiveDirectly, the government of Togo secured sufficient funding to provide benefits to roughly 57,000 of the approximately 580,000 citizens living in the poorest 100 cantons. Using mobile phone and satellite data, we identified the poorest cantons and poorest people living in them. We randomized the beneficiaries among the poorest phone owners. After registration, every month and for five months, eligible women receive a cash transfer of 8,620 F CFA ($15.5 US) and eligible men, a transfer of 7,450 F CFA ($13.5 US). We conduct a telephone survey at the end of the intervention to measure a wide range of outcomes, including consumption, food security, labor supply, access to health care, education, psychological well-being, financial inclusion and the perception of poverty. We also have access to administrative data of mobile phone companies in Togo, which will allow us to exploit phone usage behaviors and build other types of outcomes, such as adoption and use of the mobile money services, migration or predicted poverty.",
368
- "full_text": "",
369
- "authors": [],
370
- "doi": "10.1257/rct.7590-1.1",
371
- "year": null,
372
- "item_type": "journalArticle",
373
- "url": "https://www.socialscienceregistry.org/trials/7590"
374
- },
375
- {
376
- "key": "VASWRVLM",
377
- "title": "Information campaigns and migration perceptions: Evidence from Senegal",
378
- "abstract": "The research studies the effect of information campaigns on irregular immigration on the intentions to migrate irregularly among high school students in Dakar. We analyze which actors and information content are effective the most in shaping students' intention to migrate and migration perceptions.",
379
- "full_text": "",
380
- "authors": [],
381
- "doi": "10.1257/rct.8829-1.2",
382
- "year": null,
383
- "item_type": "journalArticle",
384
- "url": "https://www.socialscienceregistry.org/trials/8829"
385
- },
386
- {
387
- "key": "M9UMATUQ",
388
- "title": "Information and Irregular Migration: Evidence from a Field Experiment in Nigeria",
389
- "abstract": "Policy projections and recent research suggest that large numbers of irregular migrants from sub-Saharan Africa will continue to attempt to make their way to Europe over the next few decades. In response, European countries have made and continue to make significant investments in information campaigns designed to discourage irregular African migration. These campaigns are frequently accompanied by evaluations of some sort but, to our knowledge, none have involved a well-powered, randomized controlled trial with a representative sample and actual migration as an outcome. This pre-analysis plan describes the design of field experiment that addresses the following core questions: Are beliefs about migration-related risks, interest in attempting irregular migration, and actual migration decisions responsive to information campaigns highlighting the risks of the migration journey? The project takes place in Edo and Delta states, in the South-South region of Nigeria, a major Sub-Saharan African source of irregular migrants to Europe.",
390
- "full_text": "",
391
- "authors": [],
392
- "doi": "10.1257/rct.8718-1.0",
393
- "year": null,
394
- "item_type": "journalArticle",
395
- "url": "https://www.socialscienceregistry.org/trials/8718"
396
- },
397
- {
398
- "key": "YFJ6ZQ94",
399
- "title": "Informing Risky Migration: Evidence from a field experiment in Guinea",
400
- "abstract": "Migrants from Western Africa go through risky migration routes to reach Europe. In addition, African migration to Europe often results in low economic outcomes. Potential migrants might be poorly informed about benefits and costs of migration. We then propose to use a Randomized Control Trial (RCT) to answer the following questions: (i) Are migrants are about their earning opportunities in Europe and risks connected to the journey? (ii) Can an intervention providing information about earnings and risks changes potential migrants? beliefs and so influences their migration choices?",
401
- "full_text": "",
402
- "authors": [],
403
- "doi": "10.1257/rct.4062-1.1",
404
- "year": null,
405
- "item_type": "journalArticle",
406
- "url": "https://www.socialscienceregistry.org/trials/4062"
407
- },
408
- {
409
- "key": "ED6H9RXQ",
410
- "title": "Micro-credit programs and off-farm migration in China",
411
- "abstract": "This paper seeks to evaluate effects of micro-credit projects on the poor. We utilize data that we collected in Sichuan Province in 1999 to investigate whether micro-credit projects have targeted the poor and whether participation in the micro-credit project increases the likelihood of migration and switching to off-farm jobs. We find that, although the micro-credit programs did not help increase assets of the participants, it did help to move one or more of their members into an off-farm job. Our findings indicate that there is a great deal of benefit in supporting microcredit programs. \u00a9 2004 Blackwell Publishing Ltd.",
412
- "full_text": "",
413
- "authors": [
414
- "Li H",
415
- "Rozelle S",
416
- "Zhang L"
417
- ],
418
- "doi": "10.1111/j.1468-0106.2004.00245.x",
419
- "year": null,
420
- "item_type": "journalArticle",
421
- "url": "https://www.scopus.com/inward/record.uri?eid=2-s2.0-9744253083&doi=10.1111%2fj.1468-0106.2004.00245.x&partnerID=40&md5=f2a50ff821db79c4dfabfa1decc3b723"
422
- },
423
- {
424
- "key": "I4MY83RJ",
425
- "title": "Experimental long-term effects of early-childhood and school-age exposure to a conditional cash transfer program",
426
- "abstract": "Numerous evaluations of conditional cash transfer (CCT) programs show positive short-term impacts, but there is only limited evidence on whether these benefits translate into sustained longer-term gains. This paper uses the municipal-level randomized assignment of a CCT program implemented for five years in Honduras to estimate long-term effects 13 years after the program began. We estimate intent-to-treat effects using individual-level data from the population census, which allows assignment of individuals to their municipality of birth, thereby circumventing migration selection concerns. For the non-indigenous, we find positive and robust impacts on educational outcomes for cohorts of a very wide age range. These include increases of more than 50 percent for secondary school completion rates and the probability of reaching university studies for those exposed at school-going ages. They also include substantive gains for grades attained and current enrollment for others exposed during early childhood, raising the possibility of further gains going forward. Educational gains are, however, more limited for the indigenous. Finally, exposure to the CCT increased the probability of international migration for young men, from 3 to 7 percentage points, also stronger for the non-indigenous. Both early childhood exposure to the nutrition and health components of the CCT as well as exposure during school-going ages to the educational components led to sustained increases in human capital. \u00a9 2019 The Authors",
427
- "full_text": "",
428
- "authors": [
429
- "Molina Mill\u00e1n",
430
- "T",
431
- "Macours K",
432
- "Maluccio J A",
433
- "Tejerina L"
434
- ],
435
- "doi": "10.1016/j.jdeveco.2019.102385",
436
- "year": null,
437
- "item_type": "journalArticle",
438
- "url": "https://www.scopus.com/inward/record.uri?eid=2-s2.0-85072577537&doi=10.1016%2fj.jdeveco.2019.102385&partnerID=40&md5=cbacf9b13921db5e8346649371e83718"
439
- },
440
- {
441
- "key": "N6R57IVT",
442
- "title": "Bilateral labor agreements and the migration of Filipinos: An instrumental variable approach",
443
- "abstract": "Bilateral labor agreements (BLAs) are preferred policy models for regulating migration by many governments around the world. The Philippines has been a leader in both agreement conclusion and exporting labor. A recent Congressional evocation is pushing bureaucrats and academics alike to investigate this policy strategy for outcomes and effectiveness. The following analysis answers the question \"Do BLAs affect the migration outflows of Overseas Filipino Workers (OFWs)?\"using a plausibly exogenous variation to isolate a causal effect. I test for effects of BLAs using two instrumental variables (IVs), such as Bilateral Investment Treaties (BITs) and Formal Alliances, and an original dataset of land-based and sea-based Filipino BLAs and migrant stock in 213 unique areas from 1960 to 2018. I do not find any empirical evidence that these treaties drive migration. However, BLAs have statistically significant effects on gross domestic product (GDP) per capita and exports, suggesting other important channels through which these agreements affect economic outcomes. These null results are critically important for policymakers and diplomats because the resources spent on negotiation are wasted if the primary goal is to increase migration. \u00a9 2021 Brianna O'Steen, published by Sciendo.",
444
- "full_text": "",
445
- "authors": [
446
- "O'Steen B"
447
- ],
448
- "doi": "10.2478/izajodm-2021-0011",
449
- "year": null,
450
- "item_type": "journalArticle",
451
- "url": "https://www.scopus.com/inward/record.uri?eid=2-s2.0-85117392815&doi=10.2478%2fizajodm-2021-0011&partnerID=40&md5=4d168fa69f58fc0804c9b0a8a8fd3172"
452
- },
453
- {
454
- "key": "64F7ET9X",
455
- "title": "Children on the Move : Progressive Redistribution of Humanitarian Cash Transfers among Refugees",
456
- "abstract": "This paper evaluates the impact of the Emergency Social Safety Net (ESSN) in Turkey, the largest cash transfer program for international refugees in the world. The paper provides prima facie evidence that the program quickly caused substantial changes in household size and composition, with a net movement of primarily school-age children from larger ineligible households to smaller eligible ones. A sharp decline in inequality is observed in the entire study population: the Gini index declined by four percentage points (or 15 percent) within six months of program rollout, and the poverty headcount at the $3.20/day international poverty line declined by more than 50 percent after one year. ESSN caused a moderate increase in the diversity and frequency of food consumption among eligible households, and although there was no statistically significant effect on overall school enrollment, there were meaningful gains among the most vulnerable beneficiary households. To strike the right balance between transfer size and coverage, key parameters in the design of any cash transfer program, policy makers should consider the possibility that refugee populations may respond to their eligibility status by altering their household structure and living arrangements.",
457
- "full_text": "",
458
- "authors": [
459
- "Ozler Berk",
460
- "Celik Cigdem",
461
- "Cunningham Scott",
462
- "Cuevas Pablo Facundo",
463
- "Parisotto Luca"
464
- ],
465
- "doi": "",
466
- "year": null,
467
- "item_type": "journalArticle",
468
- "url": "https://search.ebscohost.com/login.aspx?direct=true&db=edsrep&AN=edsrep.p.wbk.wbrwps.9471&site=eds-live"
469
- },
470
- {
471
- "key": "82H99VKZ",
472
- "title": "Do higher salaries lower physician migration?",
473
- "abstract": "It is believed that low wages are an important reason why doctors and nurses in developing countries migrate, and this has led to a call for higher wages for health professionals in developing countries. In this paper, we provide some of the first estimates of the impact of raising health workers' salaries on migration. Using aggregate panel data on the stock of foreign doctors in 16 Organization for Economic Cooperation and Development countries, we explore the effect of a wage increase programme in Ghana on physician migration. We find evidence that 6 years after the implementation of this programme, the foreign stock of Ghanaian doctors abroad had fallen by approximately 10% relative to the estimated counterfactual. This result should be interpreted with caution, however, given the sensitivity of the results to changes in model specification. \u00a9 2013 The Author.",
474
- "full_text": "",
475
- "authors": [
476
- "Okeke E N"
477
- ],
478
- "doi": "10.1093/heapol/czt046",
479
- "year": null,
480
- "item_type": "journalArticle",
481
- "url": "https://www.scopus.com/inward/record.uri?eid=2-s2.0-84906064721&doi=10.1093%2fheapol%2fczt046&partnerID=40&md5=094f43a6ca58fdbf70d99377b4c2379c"
482
- },
483
- {
484
- "key": "924DRNUJ",
485
- "title": "Aid and Migration: An Analysis of the Impact of Progresa on the Timing and Size of Labour Migration",
486
- "abstract": "This paper models the short and medium-run impact of aid on migration, considering alternatively the effect of nconditional and conditional cash transfers to financially constrained households. Data from the evaluation of a Mexican development program, Progresa, are used to estimate the effect of the potential grant size on migration. The empirical analysis is consistent with model prediction. It shows that the program is associated with an increase in international migration, which is also a positive function of size of potential transfer. The grant may loosen financial constraints. At the same time, fine-tuned conditional grants targeting prospective migrants (in the form of secondary school subsidies) reduce the short-term migration probability. As regards medium-term migration, secondary school beneficiaries are not more likely to migrate than the control group after they complete the subsidised education cycle.",
487
- "full_text": "",
488
- "authors": [
489
- "Manuela Angelucci"
490
- ],
491
- "doi": "",
492
- "year": null,
493
- "item_type": "journalArticle",
494
- "url": "https://docs.iza.org/dp1187.pdf"
495
- },
496
- {
497
- "key": "UK8I5GM4",
498
- "title": "Medical worker migration and origin-country human capital: Evidence from us visa policy",
499
- "abstract": "We exploit changes in U.S. visa policies for nurses to measure the origin-country human capital response to international migration opportunities. Combining data on all migrant departures and postsecondary institutions in the Philippines, we show that nursing enrollment and graduation increased substantially in response to greater U.S. demand for nurses. The supply of nursing programs expanded. Nurse quality, measured by licensure exam pass rates, declined. Despite this, for each nurse migrant, 9 additional nurses were licensed. New nurses switched from other degree types, but graduated at higher rates than they would have otherwise, increasing the human capital stock in the Philippines.",
500
- "full_text": "",
501
- "authors": [],
502
- "doi": "10.1162/rest_a_01131",
503
- "year": null,
504
- "item_type": "journalArticle",
505
- "url": ""
506
- },
507
- {
508
- "key": "GIQRAEI7",
509
- "title": "Returns to International Migration: Evidence from a Bangladesh-Malaysia Visa Lottery",
510
- "abstract": "We follow 3,512 (of 1.4 million) applicants to a government lottery that randomly allocated visas to Bangladeshis for low-skilled, temporary labor contracts in Malaysia. Most lottery winners migrate, and their remittance substantially raises their family's standard of living in Bangladesh. The migrant's absence pauses demographic changes (marriage, childbirth, household formation), and shifts decision-making power towards females. Migration removes enterprising individuals, lowering household entrepreneurship, but does not crowd out other family members' labor supply. One group of applicants were offered deferred migration that never materialized. Improved migration prospects induce pre-migration investments in skills that generate no returns in the domestic market.",
511
- "full_text": "",
512
- "authors": [
513
- "Mobarak Ahmed Mushfiq",
514
- "Sharif Iffath",
515
- "Shrestha Maheshwor"
516
- ],
517
- "doi": "",
518
- "year": null,
519
- "item_type": "journalArticle",
520
- "url": ""
521
- },
522
- {
523
- "key": "DEFXYBQK",
524
- "title": "Role models and migration intentions",
525
- "abstract": "Role models\u2014those individuals who resemble us but have achieved more than us\u2014 are thought to impact our aspirations. In this paper, we study the impact of role models on intentions to migrate. Specifically, we implement a randomized controlled trial to show documentaries in rural villages of Mali (Kayes region). These documentaries focus on economic opportunities and show either negative or positive portraits of migrants, or portraits of local people who have successfully set up flourishing businesses without ever considering migration. This paper adds to the larger debate about the efficiency of information provision. We find very few significant impacts, none of which hold when attrition is controlled for using nonparametric Lee bounds. We also implement a treatment heterogeneity analysis using a causal forest algorithm, which aside from confirming our average treatment effects suggests the presence of heterogeneity. It appears that individuals with living conditions that could facilitate migration are less likely to be significantly impacted. The high aspirations to improve living conditions, coupled with a strong feeling of lack of control over the future may help explaining the fact that confrontations with real life experiences do not significantly modify average aspirations to migrate.",
526
- "full_text": "",
527
- "authors": [
528
- "Mespl\u00e9-Somps S",
529
- "Nilsson B",
530
- "d'Aiglepierre R"
531
- ],
532
- "doi": "",
533
- "year": null,
534
- "item_type": "journalArticle",
535
- "url": "https://www.cairn-int.info/journal-afd-research-papers-2021-200-page-1.htm"
536
- },
537
- {
538
- "key": "RQXKP6NP",
539
- "title": "Broken Promises: Evaluating an Incomplete Cash Transfer Program",
540
- "abstract": "Interventions in highly insecure and fragile contexts are always confronted with the latent risk of not being able to implement the program as intended. Despite its high policy relevance, little is known about the impacts of program disruption or cancellation on beneficiaries. This study uses the unplanned cancellation of the South Sudan Youth Business Start-Up Grant Program to assess the socioeconomic, behavioral, and psychological consequences of a program that fails to be implemented as intended. Originally planned as a randomized trial, the Youth Startup Business Grant Program consisted of an unconditional cash grant combined with a business and life skills training targeting the youth in South Sudan. Due to the intensification of violence in the country, the disbursement of the grant was terminated in late 2016 before most of the intended beneficiaries had accessed the grant. The study uses survey data from face-to-face interviews and experimental data from lotteries, trust games, and a list experiment to assess the consequences of the cancellation in a comprehensive form. The empirical analysis employs instrumental variable regressions to control for individual characteristics that might have made it more likely to access the grant before disbursement was frozen. The results show that participants who received the originally planned treatment displayed significant improvements in their consumption, savings, and psychological well-being. However, participants who vainly expected to receive the cash grant showed reduced levels of consumption and women among this subgroup also experienced strong reductions in their trust level. In addition, the study finds some evidence that these women were less likely to migrate.",
541
- "full_text": "",
542
- "authors": [
543
- "Muller Angelika",
544
- "Pape Utz Johann",
545
- "Ralston Laura R"
546
- ],
547
- "doi": "",
548
- "year": null,
549
- "item_type": "journalArticle",
550
- "url": ""
551
- },
552
- {
553
- "key": "G79BE4TK",
554
- "title": "The Benefits and Costs of Guest Worker Programs: Experimental Evidence from The India-UAE Migration Corridor",
555
- "abstract": "We estimate the returns to temporary migration programs using a randomized control trial with several thousand job seekers in India applying to guest worker jobs in the United Arab \u2026",
556
- "full_text": "",
557
- "authors": [
558
- "Naidu S",
559
- "Nyarko Y",
560
- "Wang SY"
561
- ],
562
- "doi": "",
563
- "year": null,
564
- "item_type": "journalArticle",
565
- "url": "https://econ.cms.arts.ubc.ca/wp-content/uploads/sites/38/2022/09/20221123_Suresh-Naidu.pdf"
566
- },
567
- {
568
- "key": "5J2Z3L7J",
569
- "title": "Do social protection programs foster short-term and long-term migration adaptation strategies?",
570
- "abstract": "Abstract We examine how migration is influenced by temperature and precipitation variability, and the extent to which the receipt of a cash transfer affects the use of migration as an adaptation strategy. Climate data is merged with georeferenced panel data (2010\u20132014) on individual migration collected from the Zambian Child Grant Program (CGP) sites. We use the person-year dataset to identify the direct and heterogeneous causal effects of the CGP on mobility. Having access to cash transfers doubles the rate of male, short-distance moves during cool periods, irrespective of wealth. Receipt of cash transfers (among wealthier households) during extreme heat causes an additional retention of males. Cash transfers positively spur long-distance migration under normal climate conditions in the long term. They also facilitate short-distance responses to climate, but not long-distance responses that might be demanded by future climate change.",
571
- "full_text": "",
572
- "authors": [
573
- "Mueller Valerie",
574
- "Gray Clark",
575
- "Handa Sudhanshu",
576
- "Seidenfeld David"
577
- ],
578
- "doi": "10.1017/S1355770X19000214",
579
- "year": null,
580
- "item_type": "journalArticle",
581
- "url": "https://www.cambridge.org/core/product/identifier/S1355770X19000214/type/journal_article https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7062362/pdf/nihms-1558213.pdf"
582
- },
583
- {
584
- "key": "EX4XE7QK",
585
- "title": "Labor Productivity, Remittance Use, and the Impact of the Poverty Alleviation Fund (PAF) Program in Nepal",
586
- "abstract": "This dissertation presents three studies related to labor productivity, remittances use, and the effect of an anti-poverty program on migration and remittances. Labor is the biggest endowment available to the poor. Understanding labor issues is important in addressing the problems of poverty, inequality, migration, and economic development. In this dissertation, I estimate the labor productivity of agricultural household because most of the agricultural households in developing countries work in their own farms, it is not possible to observe wages. The first chapter estimates the shadow wage (marginal productivity of labor) of the agricultural household in the context of Nepal. How different is marginal productivity of labor for women compared to men in agricultural households? In developing countries, where most of the families work on their farms, wage or labor-related income cannot be observed directly. This paper contributes to the literature on gender wage difference in labor and development economics by developing a new approach to estimate the shadow wage of agricultural households in Nepal. Using a general functional form, we first derive the shadow wage from a theoretical model. Then, a model with ward-level fixed effects is used to estimate the shadow wage by gender for Nepalese agricultural households. We find that the productivity of women is not that different than that of men. Despite the vast difference in observed market wages for women, the distribution of shadow wages of women is not that different from that of men, calling for policies to increase the market wages for women. The second chapter of this dissertation, attempts to understand the use of remittances among the households of Nepal. Remittances are transfers made by migrant workers to their family and relatives in their country of origin. In Nepal, remittances account for 25-30% of the GDP, and the trend of youths seeking work in other countries--mostly in Southeast Asia and the Middle East--has been increasing. Understanding the expenditure pattern of remittances-receiving households compared to non-recipients provides an understanding of the effect of remittances. In this chapter I employ nationally representative data from Nepal to investigate the effect of remittances on household expenditure patterns, and I compare the prevalence of poverty between remittance recipients and non-recipients. The findings that emerge are as follows: households receiving international and both domestic and international remittances have increased expenditure shares on education, suggesting investments in human capital in the household. In contrast I find a decrease in education expenditures for households receiving domestic remittances. Food expenditures share decreases for households receiving all types of remittances. Households receiving remittances increase the expenditure shares on durables and other consumption expenditures. Households receiving remittances have decreased shares in health expenditures. With regards to poverty, the paper shows that receiving remittances reduces the likelihood of being poor. In the third chapter of the dissertation, I evaluate the effects of the Poverty Alleviation Fund program (PAF) on remittances and migration using the data from a quasi-experiment. The PAF is a social fund program that has been providing services to marginalized communities in Nepal through various income-generating activities since 2006. Unlike previous research that has used conditional cash transfer programs (CCTs) to study the role of a development program on migration and remittances, I employ the data from the community-driven anti-poverty program that provides income-generating activities to participants. Using a panel dataset collected by Center for Economic Development and Administration (CEDA) of the Tribhuwan University and the PAF, and taking advantage of a quasi-randomized phase-in experimental design, I estimate the causal effects of a development program on remittances, migration, and welfare measures. I show that policy makers shou d be aware that community-driven development programs have unintended consequences for migration and remittances, which are distinct from the primary goals of the program: alleviating poverty and improving food security. The program results in a decrease of approximately Rs.6000 (approximately six percent of total household consumption) in remittances received, crowding out private transfers in the presence of public transfers. The paper shows an increase in domestic migration, but no change in international migration due to the program.",
587
- "full_text": "",
588
- "authors": [
589
- "Nepal Atul"
590
- ],
591
- "doi": "",
592
- "year": null,
593
- "item_type": "journalArticle",
594
- "url": ""
595
- },
596
- {
597
- "key": "QJH23MCD",
598
- "title": "Safe Migration Awareness Campaign In Rural Communities Of Nigeria, The Procedure And Impacts",
599
- "abstract": "International migration is undergoing unprecedented changes. The traditional determinants of migration such as poverty, food insecurity and climate change are giving way to new motivations. These new issues, that include but are not limited to ambitions to live a foreign lifestyle, incomplete and asymmetric information are capable of underestimating the risk in irregular migration. As the information about foreign lifestyle flow freely and new opportunities open, it becomes very difficult to manage irregular migration through border control. Within the transitional mode of international migration, the use of awareness campaigns, especially in rural areas of home countries that target the most vulnerable groups, \u2013 school-age youths, could become a veritable means of deterring irregular migration. In 2018, Ricosmigration \u2013 Rural Information Campaign on Safe Migration - received funding from the German Foreign Office to investigate why young people from Nigeria migrate irregularly and implement a safe migration awareness campaign for youth in rural secondary schools. This report is from the result of the project which cut across interviews of Nigerian migrants living in Italy, potential migrants in Nigeria, and the result of the awareness campaigns conducted in 10 secondary schools in Edo, Nigeria. In the report, we explain the profile of a potential migrant from Nigeria. We also explore how reduced capabilities to lead the desired life and how the increasing use of social media internet has greatly raised the likelihood of migration in Nigeria. We equally show in the detail how we implemented a randomized experiment to test the efficiency of the migration awareness campaign. The project provides a new dimension to the discourse of the root cause of migration by introducing the role of limited opportunities and freedom (capabilities), low life satisfaction, and incomplete information through social media. Additionally, it shows the power of light interventions such as awareness campaigns in reducing irregular migration. In particular, the result showed that about 77.2 per cent of Nigerian youths that responded to the interview have the intention to migrate abroad, and 37.2 per cent would migrate if they win a cash lottery that is enough to cover the migration cost. Our randomized experiment showed that migration awareness campaign could reduce the risk of being a victim of human trafficking by more than 50 per cent. It could also reduce the desire to engage in irregular migration by more than 30 per cent and increased the decision to take necessary steps to avoid human traffickers and follow proper procedures for safe migration by more than 50 per cent. The awareness campaign had a wide coverage reaching about 7000 students in rural communities in Edo State, Nigeria. The success factors of our awareness campaign include the utilization of appropriate channels, delivery of an accurate message that was tailored to the group we spoke to, and the use of respected external facilitators.",
600
- "full_text": "",
601
- "authors": [
602
- "Obi Chinedu"
603
- ],
604
- "doi": "",
605
- "year": null,
606
- "item_type": "journalArticle",
607
- "url": "https://search.ebscohost.com/login.aspx?direct=true&db=edsrep&AN=edsrep.p.osf.socarx.v3kn2&site=eds-live"
608
- },
609
- {
610
- "key": "PM9LIPT3",
611
- "title": "Labour migration and households: A reconsideration of the effects of the social pension on labour supply in South Africa",
612
- "abstract": "This paper re-examines the effect of the South African social pension on the labour supply of working-age adults using data from 1993. We take account of the fact that households may include non-resident members, and therefore the pension may play a role in facilitating migration to work or look for work. We find that rural African women are significantly more likely to be migrant workers when they are members of a household in receipt of a pension, and that it is female pension income that drives this result. We explore a number of possible reasons why pension income might have this effect.",
613
- "full_text": "",
614
- "authors": [
615
- "Posel D",
616
- "Fairburn J A",
617
- "Lund F"
618
- ],
619
- "doi": "10.1016/j.econmod.2005.10.010",
620
- "year": null,
621
- "item_type": "journalArticle",
622
- "url": ""
623
- },
624
- {
625
- "key": "X835X4JX",
626
- "title": "Raising Awareness About the Risk of Irregular Migration: Quasi-Experimental Evidence from Guinea",
627
- "abstract": "In response to mounting evidence of harm inflicted on irregular migrants along their journeys from West Africa to Europe, international organizations, civil society organizations, and governments have scaled up campaigns as a tool for raising awareness about the risks of irregular migration. Campaigns aim to counter misinformation by smugglers and facilitate safe migration decisions. Despite the growing number of interventions, there is limited empirical evidence on the impact and effectiveness of such campaigns. Based on a difference-in-difference design, this study investigates the effect of a mobile cinema and community discussion intervention on the perceptions, knowledge, and intentions of potential irregular migrants in Northern Guinea in 2019. The results show that potential migrants who participated in events were significantly more likely to show awareness gains and less likely to report high intentions to migrate irregularly. While the relative importance of risk perceptions and their impact on migration flows remain unclear, the findings provide evidence supporting the assumption that risk awareness can be a relevant factor in the decision-making process of potential irregular migrants. While campaigns may be an effective tool in certain contexts, effect sizes highlight the need for policymakers to keep realistic expectations.",
628
- "full_text": "",
629
- "authors": [
630
- "Tjaden J",
631
- "Gninafon H"
632
- ],
633
- "doi": "10.1111/padr.12468",
634
- "year": null,
635
- "item_type": "journalArticle",
636
- "url": ""
637
- },
638
- {
639
- "key": "EZTFJFFS",
640
- "title": "Household Structure and Short-Run Economic Change in Nicaragua",
641
- "abstract": "During the economic crises Nicaragua suffered between 2000 and 2002, a conditional cash transfer program targeting poor households began operating. Using panel data on 1,397 households from the program's experimentally designed evaluation, we examined the impact of the program on household structure. Our findings suggest that the program enabled households to avoid reagglomeration during the economic crises, with households in control communities growing more than treated households. These changes were driven primarily by shifts in residence of relatively young men and women with close kinship ties to the household head. In contrast, households that received transfers continued to send off young adult members, suggesting that the program provided resources to overcome the short-term economic pressures on household structure.",
642
- "full_text": "",
643
- "authors": [
644
- "Winters P",
645
- "Stecklov G",
646
- "Todd J"
647
- ],
648
- "doi": "10.1111/j.1741-3737.2009.00628.x",
649
- "year": null,
650
- "item_type": "journalArticle",
651
- "url": ""
652
- },
653
- {
654
- "key": "G9HHCW5Z",
655
- "title": "Empowering Indonesian Migrant Workers to Access Quality Overseas Placement Services",
656
- "abstract": "Nearly 700,000 Indonesians migrate abroad for work each year. The vast majority do so through recruiters and placement agencies that facilitate temporary employment in countries across Asia and the Middle East. These agencies support migrants starting with pre-departure paperwork and training through repatriation, and are therefore a crucial determinant of a worker\u2019s migration experience. Potential migrants ostensibly have a great deal of choice between agencies \u2013 there are over 1,000 formally registered firms in Indonesia \u2013 and in theory, competition between these firms should drive out poorly performing agencies. Yet anecdotal evidence suggests that agency quality is highly variable, and that many agencies engage in exploitative practices. This could be due to market power at the local level, or informational failures, both of which would hamper competitive pressures.\nOur findings from preliminary research suggest that despite the fact that nearly three-quarters of female migrants believe that there is no relationship between the quality of the agencies and the experience with the employer; there is indeed a robust correlation between the two variables. However, migrants attribute the employer quality to \"nasib\", or fate.\nIn 2015, we ran information campaigns designed to transmit information on agency quality to potential migrants. Surveys will be conducted to evaluate the impact of the information campaigns on migrants\u2019 outcomes.",
657
- "full_text": "",
658
- "authors": [
659
- "Schaner Simone",
660
- "Cameron Lisa",
661
- "Bazzi Samuel",
662
- "Kartaadipoetra Firman Witoelar"
663
- ],
664
- "doi": "",
665
- "year": null,
666
- "item_type": "journalArticle",
667
- "url": "https://www.socialscienceregistry.org/trials/630"
668
- },
669
- {
670
- "key": "SNG5JMCQ",
671
- "title": "The impact of providing vocational training to young men on labour outcomes and attitudes towards migration in northern Guinea-Bissau",
672
- "abstract": "As in most of Sub-Saharan Africa, the lack of quality employment among the rural youth is threatening economic development and inclusive growth, with employment search arguably working as an important driver for both internal and international migration. Active labour market policies, such as vocational trainings, are frequently chosen by governments, international institutions and NGOs as a potential solution to these labour market frictions. Nevertheless, the causal evidence determining the impact of these programs on the youth of rural areas is scarce in the sub-region, and inexistent in Guinea-Bissau. In this project, we run a randomized controlled trial within the GOT project implemented by the NGO ENGIM. This project provides vocational training courses on professional skills thought to be particularly relevant in the local context, and facilitates traineeships in local businesses for young men in two northern regions of the country, aiming to prevent their engagement in irregular migration practices. Assessing the impact of the GOT project on the employment rates, income and attitudes towards migrations of its trainees should contribute to a better understanding of the impact of active labour market policies in rural areas of low income countries, facilitating a better design, implementation and evaluation of such programmes in Guinea-Bissau and similar contexts.",
673
- "full_text": "",
674
- "authors": [],
675
- "doi": "10.1257/rct.6890-1.2",
676
- "year": null,
677
- "item_type": "journalArticle",
678
- "url": "https://www.socialscienceregistry.org/trials/6890"
679
- },
680
- {
681
- "key": "QYAQM7H9",
682
- "title": "Weather Insurance and Investment Choice",
683
- "abstract": "Exposure to the risk of extreme weather conditions has been shown to constrain investment\nby subsistence farmers in developing countries and may lead to ine\u00a2cient production choices.\nThis paper evaluates whether insuring farmers against such risks alters resource allocation decisions. In particular I consider the e\u00a7ects of a Mexican government disaster relief program with\ninsurance-like features. The results, based on a regression discontinuity design, indicate that\ninsurance against losses arising from natural disasters changes how rural households invest in\ntheir farms. Insured farmers utilize more expensive capital inputs and adopt di\u00a7erent technologies. Additionally, the insurance changes labor supply patterns. Notably, members of insured\nhouseholds are approximately 10% more likely to migrate internationally. Additional results,\nthat the program matters most when the returns to migration are more unpredictable, are\nconsistent with a model where insurance obviates the need for precautionary savings, allowing\nhouseholds to \u00d6nance international migration.",
684
- "full_text": "",
685
- "authors": [
686
- "Shapiro J"
687
- ],
688
- "doi": "",
689
- "year": null,
690
- "item_type": "journalArticle",
691
- "url": ""
692
- },
693
- {
694
- "key": "8EFH4AP4",
695
- "title": "Can Public Transfers Reduce Mexican Migration? A study based on randomized experimental data",
696
- "abstract": "Prior research on Mexican migration has shown that social networks and economic incentives play an important role in determining migration outcomes. This study utilizes experimental data on PROGRESA, Mexico's primary poverty reduction program, to evaluate the effects of public cash transfers on migration. Our study complements a growing body of literature aimed at overcoming longstanding hurdles towards the establishment of causal validity in empirical studies of migration. We find that public cash transfers reduce US migration but have little effect on domestic migration. Furthermore, we find that the provision of cash transfers appears to reduce migration partly by reducing the relative deprivation levels of poor households. Finally, we find that the effect of public cash transfers on US migration depend on the size of existing US migration networks. Surprisingly, we see that transfers have larger (more negative) effects on US migration in communities with large existing networks. The results suggest that public transfers may be helpful in managing rural out-migration, particularly to the US. Interestingly, such programs may be most effective if they are targeted towards communities with strong existing migration patterns.",
697
- "full_text": "",
698
- "authors": [
699
- "Stecklov Guy",
700
- "Stampini Marco",
701
- "Davis Benjamin"
702
- ],
703
- "doi": "",
704
- "year": null,
705
- "item_type": "journalArticle",
706
- "url": ""
707
- },
708
- {
709
- "key": "5M6JE7W5",
710
- "title": "Do Conditional Cash Transfers Influence Migration? A Study Using Experimental Data From The Mexican PROGRESA Program",
711
- "abstract": "Prior research on Mexican migration has shown that social networks and economic incentives play an important role in determining migration outcomes. We use experimental data from PROGRESA, Mexico's primary poverty-reduction program, to evaluate the effects of conditional cash transfers on migration both domestically and to the United States. Our study complements a growing body of literature aimed at overcoming longstanding hurdles to the establishment of causal validity in empirical studies of migration. Analysis based on the data collected before and after the program 's onset shows that conditional transfers reduce U.S. migration but not domestic migration. The data also enable us to explore the role of existing family and community migration networks. The results show that migration networks strongly influence migration, but that the effect of conditional transfers on migration is apparently not mediated by existing migration network structures. Our results suggest that conditional transfers may be helpful in managing rural out-migration, particularly to the United States.",
712
- "full_text": "",
713
- "authors": [
714
- "Stecklov Guy",
715
- "Winters Paul",
716
- "Stampini Marco",
717
- "Davis Benjamin"
718
- ],
719
- "doi": "",
720
- "year": null,
721
- "item_type": "journalArticle",
722
- "url": "https://www.jstor.org/stable/4147339?seq=1#page_scan_tab_contents"
723
- },
724
- {
725
- "key": "DG5T267N",
726
- "title": "Cash transfers and migration: theory and evidence from a randomized controlled trial",
727
- "abstract": "Will the fast expansion of cash-based programming in developing countries increase international migration? Theoretically, cash transfers may favor international migration by relaxing liquidity, credit, and risk constraints. But transfers, especially those conditional upon staying at home, may also increase the opportunity cost of migrating abroad. This paper evaluates the impact of a cash-for-work program on migration. Randomly selected households in Comoros were offered up to US$320 in cash in exchange for their participation in public works projects. We find that the program increased migration to Mayotte - the neighboring and richer French Island - by 38 percent, from 7.8% to 10.8%. The increase in migration is explained by the alleviation of liquidity and risk constraints, and by the fact that the program did not increase the opportunity cost of migration for likely migrants.",
728
- "full_text": "",
729
- "authors": [
730
- "Sterck O",
731
- "Gazeaud J",
732
- "Mvukiyehe E"
733
- ],
734
- "doi": "",
735
- "year": null,
736
- "item_type": "journalArticle",
737
- "url": "https://www.csae.ox.ac.uk/materials/papers/csae-wps-2019-16.pdf"
738
- },
739
- {
740
- "key": "CAJ5PJQW",
741
- "title": "The impact of Indian SHGs: a long-run field experiment in Jharkhand",
742
- "abstract": "This field experiment randomized access to Self-Help Groups in villages spread over the entire state of Jharkhand and surveyed a sample of 1,080 households three times between 2004 and 2009, in order to evaluate the changes in their living standards. We study impacts on children education and labor, household consumption and risk-coping behavior (including migration), and local credit markets.",
743
- "full_text": "",
744
- "authors": [],
745
- "doi": "10.1257/rct.5570-1.1",
746
- "year": null,
747
- "item_type": "journalArticle",
748
- "url": "https://www.socialscienceregistry.org/trials/5570"
749
- },
750
- {
751
- "key": "MHIY5NJJ",
752
- "title": "The value of forecasts: Experimental evidence from developing-country agriculture",
753
- "abstract": "Climate risk is a key driver of low agricultural productivity in poor countries. We use a cluster-randomized trial to evaluate a novel risk-mitigation approach: long-range forecasts that provide information about the onset of the Indian summer monsoon well in advance of its arrival. In contrast to traditional approaches that allow farmers to cope with risk ex post, this new ex ante technology provides accurate information at least one month in advance of the monsoon's arrival, enabling farmers to alter cropping choices and other up front input decisions. Moreover, forecasts have the potential to be disseminated cheaply, even at scale. We assign 250 villages to one of three groups: a control group; a group that is given an opportunity to purchase the forecast; and a group that is offered insurance. This design allows us to investigate farmers' willingness-to-pay for forecasts; measure how forecasts affect farmer beliefs, up-front investments, and welfare; and study how these effects compare to the canonical ex post loss mitigation tool: weather-based index insurance.",
754
- "full_text": "",
755
- "authors": [],
756
- "doi": "10.1257/rct.8846-1.0",
757
- "year": null,
758
- "item_type": "journalArticle",
759
- "url": "https://www.socialscienceregistry.org/trials/8846"
760
- },
761
- {
762
- "key": "YYL2HIQB",
763
- "title": "Investments in Human Capital: Long-term Effects of Progresa-Oportunidades on Poverty and Migration in Rural Mexico.",
764
- "abstract": "This paper evaluates the effects on poverty reduction and migration of a conditional cash transfer program in Mexico named Oportunidades (previously Progresa). This program, the first in Latin America and the most imitated of its kind, was intended to increase human capital, which would eventually translate into poverty reduction. Linear and non-linear panel models are used to explore whether there are any such effects, and the implications for the effectiveness and evaluation strategies of the program. No significant effects of Progresa-Oportunidades in reducing income poverty or affecting international migration were found at the rural level. However, there is a weak effect in the case of domestic migration. As data continues to become available through Mexico's official Secretariat of Social Development, there will be more opportunities for further exploration of the relationship between this program, poverty alleviation, and migration outcomes. Limitations and further recommendations for this study are also discussed.",
765
- "full_text": "",
766
- "authors": [
767
- "TIRADO J ALEJANDRO"
768
- ],
769
- "doi": "",
770
- "year": null,
771
- "item_type": "journalArticle",
772
- "url": "https://search.ebscohost.com/login.aspx?direct=true&profile=ehost&scope=site&authtype=crawler&jrnl=20672640&AN=113660670&h=tw80U6FJzYcJYu3fTX%2BMPrdXbXrYCOOX2ZDxlHcnJdmg5T13YEukzyFOx75iYT%2F%2BuqTBgG5p2dwmyl3bwBylKg%3D%3D&crl=c"
773
- },
774
- {
775
- "key": "X2YYD8XC",
776
- "title": "The effect of peer-to-peer risk information on potential migrants ' Evidence from a randomized controlled trial in Senegal",
777
- "abstract": "In response to mounting evidence on the dangers of irregular migration from Africa to Europe, the number of information campaigns which aim to raise awareness about the potential risks has rapidly increased. Governments, international organizations and civil society organizations implement a variety of campaigns to counter the spread of misinformation accelerated by smuggling and trafficking networks. The evidence on the effects of such information interventions on potential migrants remains limited and largely anecdotal. More generally, the role of risk perceptions in the decision-making process of potential irregular migrants is rarely explicitly tested, despite the fact that the concept of risk pervades conventional migration models, particularly in the field of economics. We address this gap by assessing the effects of a peer-to-peer information intervention on the perceptions, knowledge and intentions of potential migrants in Dakar, Senegal, using a randomized controlled trial design. The results show that--three months after the intervention--peer-to-peer information events increase potential migrants' subjective information levels, raise risk awareness, and reduce intentions to migrate irregularly. We find no substantial effects on factual migration knowledge. We discuss how the effects may be driven by the trust and identification-enhancing nature of peer-to-peer communication.",
778
- "full_text": "",
779
- "authors": [
780
- "Tjaden Jasper",
781
- "Dunsch Felipe Alexander"
782
- ],
783
- "doi": "10.1016/j.worlddev.2021.105488",
784
- "year": null,
785
- "item_type": "journalArticle",
786
- "url": "https://linkinghub.elsevier.com/retrieve/pii/S0305750X21001005"
787
- },
788
- {
789
- "key": "FC8EU2Q3",
790
- "title": "Universal Basic Income in Kenya",
791
- "abstract": "A universal basic income (UBI) is a recurring, unconditional cash transfer sized to meet basic needs and paid to all members of a society. Proponents argue that a UBI has the potential to eliminate extreme poverty and to counteract the harmful effects of rising inequality in wealthier countries. Interest has surged, with UBI actively debated in countries ranging from Switzerland and Finland to Namibia and India. Yet, while cash transfers in general have a strong evidence base, a full basic income has never been implemented or rigorously evaluated. This study proposes to fill these evidence gaps with an ambitious long-term randomized control trial in Kenya. The core objective is to test the hypothesis that a universal basic income is an effective way to eliminate extreme global poverty. This is a central policy question currently being debated in a number of emerging markets, and is also highly relevant for foreign aid policy given that the global poverty gap has fallen to $80B a year as of 2015, less than half of official development assistance flows. Positive results could shift public spending away from in-kind and means-tested programs, while negative ones could have the opposite effect. As a secondary goal, the study also aims to inform debate on the role of UBI in wealthy countries. While the populations are obviously different from those we will work with in East Africa, many of the core questions about human behavior are the same ? does economic security motivate people to work more or less? To socialize more or less? etc. At a minimum, positive results from a low-cost emerging-market evaluation could be motivator for testing more expensive guarantees in richer countries. Finally, it is worth emphasizing that in the process of producing this evidence the project will also directly benefit some of the poorest people on the planet by delivering cash transfers to them. The broader evidence base on cash transfers suggests that these recipients will reap life-changing benefits from participating in the project.",
792
- "full_text": "",
793
- "authors": [],
794
- "doi": "10.1257/rct.1952-2.1",
795
- "year": null,
796
- "item_type": "journalArticle",
797
- "url": "https://www.socialscienceregistry.org/trials/1952"
798
- },
799
- {
800
- "key": "UX36F5C2",
801
- "title": "Liquidity Constraints and Migration: Evidence from Indonesia",
802
- "abstract": "Although liquidity constraints have been seen as both a factor limiting individuals from migrating and a motivation for households to send a migrant, the consequences of relaxing liquidity constraints on migration behavior have not been adequately explored due to data limitations. In this study, we take advantage of an unusual policy, Bantuan Langsung Tunai -- a national-level unconditional cash transfer program targeted toward the poorest households in Indonesia -- to empirically assess the impact of increased liquidity on the migration behavior of poor Indonesian households.With a highly mobile population and a long history of circular migration, Indonesia is an ideal space to study migration. Using panel data from the Indonesian Family Life Survey, the results demonstrate that a positive liquidity shock increases the probability of migration among low-asset households, among households with a migration history, and, most significantly, among lowasset households with a migration history. [ABSTRACT FROM AUTHOR] Copyright of International Migration Review is the property of Sage Publications Inc. and its content may not be copied or emailed to multiple sites or posted to a listserv without the copyright holder's express written permission. However, users may print, download, or email articles for individual use. This abstract may be abridged. No warranty is given about the accuracy of the copy. Users should refer to the original published version of the material for the full abstract. (Copyright applies to all Abstracts.)",
803
- "full_text": "",
804
- "authors": [
805
- "Tiwari Smriti",
806
- "Winters Paul C"
807
- ],
808
- "doi": "10.1177/0197918318768555",
809
- "year": null,
810
- "item_type": "journalArticle",
811
- "url": "http://journals.sagepub.com/doi/10.1177/0197918318768555"
812
- }
813
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/gene_xpert_zotero_items.json CHANGED
@@ -13,7 +13,7 @@
13
  "Hairong Huang"
14
  ],
15
  "doi": "10.21037/jtd.2018.02.60",
16
- "date": "3/2018",
17
  "item_type": "journalArticle",
18
  "url": "http://jtd.amegroups.com/article/view/19447/15515"
19
  },
@@ -33,7 +33,7 @@
33
  "David W. Dowdy"
34
  ],
35
  "doi": "10.1097/QAI.0000000000000712",
36
- "date": "2015-09-1",
37
  "item_type": "journalArticle",
38
  "url": "https://journals.lww.com/00126334-201509010-00011"
39
  },
@@ -52,7 +52,7 @@
52
  "Sydney Rosen"
53
  ],
54
  "doi": "10.1111/j.1365-3156.2012.03028.x",
55
- "date": "09/2012",
56
  "item_type": "journalArticle",
57
  "url": "https://onlinelibrary.wiley.com/doi/10.1111/j.1365-3156.2012.03028.x"
58
  },
@@ -75,7 +75,7 @@
75
  "Keertan Dheda"
76
  ],
77
  "doi": "10.1183/09031936.00145511",
78
- "date": "07/2012",
79
  "item_type": "journalArticle",
80
  "url": "http://erj.ersjournals.com/lookup/doi/10.1183/09031936.00145511"
81
  },
@@ -94,7 +94,7 @@
94
  "Achilles Katamba"
95
  ],
96
  "doi": "10.1186/s12913-016-1804-9",
97
- "date": "12/2016",
98
  "item_type": "journalArticle",
99
  "url": "http://bmchealthservres.biomedcentral.com/articles/10.1186/s12913-016-1804-9"
100
  },
@@ -115,7 +115,7 @@
115
  "A. H. Van'T Hoog"
116
  ],
117
  "doi": "10.5588/ijtld.16.0496",
118
- "date": "2017-04-01",
119
  "item_type": "journalArticle",
120
  "url": "http://www.ingentaconnect.com/content/10.5588/ijtld.16.0496"
121
  },
@@ -138,7 +138,7 @@
138
  "Julio Croda"
139
  ],
140
  "doi": "10.1093/cid/ciaa135",
141
- "date": "2021-03-01",
142
  "item_type": "journalArticle",
143
  "url": "https://academic.oup.com/cid/article/72/5/771/5736588"
144
  },
@@ -154,7 +154,7 @@
154
  "Anete Trajman"
155
  ],
156
  "doi": "10.1590/s1806-37562015000004524",
157
- "date": "12/2015",
158
  "item_type": "journalArticle",
159
  "url": "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132015000600536&lng=en&tlng=en"
160
  },
@@ -181,7 +181,7 @@
181
  "Keertan Dheda"
182
  ],
183
  "doi": "10.1016/S2214-109X(19)30164-0",
184
- "date": "06/2019",
185
  "item_type": "journalArticle",
186
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S2214109X19301640"
187
  },
@@ -206,7 +206,7 @@
206
  "Luis E Cuevas"
207
  ],
208
  "doi": "10.1136/bmjgh-2021-007592",
209
- "date": "02/2022",
210
  "item_type": "journalArticle",
211
  "url": "https://gh.bmj.com/lookup/doi/10.1136/bmjgh-2021-007592"
212
  },
@@ -222,7 +222,7 @@
222
  "Elvira Richter"
223
  ],
224
  "doi": "10.1183/13993003.01333-2015",
225
- "date": "02/2016",
226
  "item_type": "journalArticle",
227
  "url": "http://erj.ersjournals.com/lookup/doi/10.1183/13993003.01333-2015"
228
  },
@@ -244,7 +244,7 @@
244
  "Susan Cleary"
245
  ],
246
  "doi": "10.1371/journal.pone.0251547",
247
- "date": "2021-5-14",
248
  "item_type": "journalArticle",
249
  "url": "https://dx.plos.org/10.1371/journal.pone.0251547"
250
  },
@@ -260,7 +260,7 @@
260
  "M. Shah"
261
  ],
262
  "doi": "10.5588/ijtld.13.0095",
263
- "date": "2013-10-01",
264
  "item_type": "journalArticle",
265
  "url": "http://openurl.ingenta.com/content/xref?genre=article&issn=1027-3719&volume=17&issue=10&spage=1328"
266
  },
@@ -285,7 +285,7 @@
285
  "Jacob Creswell"
286
  ],
287
  "doi": "10.3201/eid2703.204090",
288
- "date": "03/2021",
289
  "item_type": "journalArticle",
290
  "url": "https://wwwnc.cdc.gov/eid/article/27/3/20-4090_article.htm"
291
  },
@@ -300,7 +300,7 @@
300
  "A. Trajman"
301
  ],
302
  "doi": "10.5588/ijtld.13.0637",
303
- "date": "2014-05-01",
304
  "item_type": "journalArticle",
305
  "url": "http://openurl.ingenta.com/content/xref?genre=article&issn=1027-3719&volume=18&issue=5&spage=547"
306
  },
@@ -317,7 +317,7 @@
317
  "Deborah K. Glencross"
318
  ],
319
  "doi": "10.4102/ajlm.v10i1.1229",
320
- "date": "2021-11-30",
321
  "item_type": "journalArticle",
322
  "url": "http://www.ajlmonline.org/index.php/AJLM/article/view/1229"
323
  },
@@ -337,7 +337,7 @@
337
  "Maria Claudia Vater"
338
  ],
339
  "doi": "10.1590/0037-8682-0082-2018",
340
- "date": "10/2018",
341
  "item_type": "journalArticle",
342
  "url": "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S0037-86822018000500631&tlng=en"
343
  },
@@ -361,7 +361,7 @@
361
  "Luis E. Cuevas"
362
  ],
363
  "doi": "10.1128/JCM.00864-15",
364
- "date": "08/2015",
365
  "item_type": "journalArticle",
366
  "url": "https://journals.asm.org/doi/10.1128/JCM.00864-15"
367
  },
@@ -386,7 +386,7 @@
386
  "Corinne S. Merle"
387
  ],
388
  "doi": "10.1371/journal.pone.0264206",
389
- "date": "2022-2-22",
390
  "item_type": "journalArticle",
391
  "url": "https://dx.plos.org/10.1371/journal.pone.0264206"
392
  },
@@ -406,7 +406,7 @@
406
  "Prathap Tharyan"
407
  ],
408
  "doi": "10.1371/journal.pone.0205233",
409
- "date": "2018-10-29",
410
  "item_type": "journalArticle",
411
  "url": "https://dx.plos.org/10.1371/journal.pone.0205233"
412
  },
@@ -423,7 +423,7 @@
423
  "Susan E Dorman"
424
  ],
425
  "doi": "10.1186/1471-2334-13-352",
426
- "date": "12/2013",
427
  "item_type": "journalArticle",
428
  "url": "https://bmcinfectdis.biomedcentral.com/articles/10.1186/1471-2334-13-352"
429
  },
@@ -450,7 +450,7 @@
450
  "Frank Cobelens"
451
  ],
452
  "doi": "10.1371/journal.pmed.1001120",
453
- "date": "2011-11-8",
454
  "item_type": "journalArticle",
455
  "url": "https://dx.plos.org/10.1371/journal.pmed.1001120"
456
  },
@@ -468,7 +468,7 @@
468
  "Achilles Katamba"
469
  ],
470
  "doi": "10.1371/journal.pone.0122574",
471
- "date": "2015-4-1",
472
  "item_type": "journalArticle",
473
  "url": "https://dx.plos.org/10.1371/journal.pone.0122574"
474
  },
@@ -486,7 +486,7 @@
486
  "Brittany Moore"
487
  ],
488
  "doi": "10.1007/s40258-018-0397-3",
489
- "date": "8/2018",
490
  "item_type": "journalArticle",
491
  "url": "http://link.springer.com/10.1007/s40258-018-0397-3"
492
  },
@@ -502,7 +502,7 @@
502
  "Charoen Chuchottaworn"
503
  ],
504
  "doi": "10.1016/j.vhri.2019.09.010",
505
- "date": "05/2020",
506
  "item_type": "journalArticle",
507
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S221210992030008X"
508
  },
@@ -519,7 +519,7 @@
519
  "Kevin Schwartzman"
520
  ],
521
  "doi": "10.1371/journal.pone.0150119",
522
- "date": "2016-3-18",
523
  "item_type": "journalArticle",
524
  "url": "https://dx.plos.org/10.1371/journal.pone.0150119"
525
  },
@@ -533,7 +533,7 @@
533
  "Alemayehu Hailu"
534
  ],
535
  "doi": "10.1371/journal.pone.0259056",
536
- "date": "2021-10-25",
537
  "item_type": "journalArticle",
538
  "url": "https://dx.plos.org/10.1371/journal.pone.0259056"
539
  },
@@ -555,7 +555,7 @@
555
  "Esther Turunga"
556
  ],
557
  "doi": "10.1097/QAI.0000000000002371",
558
- "date": "2020-07-1",
559
  "item_type": "journalArticle",
560
  "url": "https://journals.lww.com/10.1097/QAI.0000000000002371"
561
  },
@@ -575,7 +575,7 @@
575
  "Jos\u00e9 Mar\u00eda Ten\u00edas-Burillo"
576
  ],
577
  "doi": "10.1016/j.eimc.2016.06.009",
578
- "date": "08/2017",
579
  "item_type": "journalArticle",
580
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S0213005X16301550"
581
  },
@@ -595,7 +595,7 @@
595
  "Jos\u00e9 Mar\u00eda Ten\u00edas-Burillo"
596
  ],
597
  "doi": "10.1016/j.eimc.2016.06.009",
598
- "date": "08/2017",
599
  "item_type": "journalArticle",
600
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S0213005X16301550"
601
  },
@@ -611,7 +611,7 @@
611
  "Nelson L.S. Lee"
612
  ],
613
  "doi": "10.1016/j.jinf.2014.12.015",
614
- "date": "04/2015",
615
  "item_type": "journalArticle",
616
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S016344531500002X"
617
  }
 
13
  "Hairong Huang"
14
  ],
15
  "doi": "10.21037/jtd.2018.02.60",
16
+ "year": null,
17
  "item_type": "journalArticle",
18
  "url": "http://jtd.amegroups.com/article/view/19447/15515"
19
  },
 
33
  "David W. Dowdy"
34
  ],
35
  "doi": "10.1097/QAI.0000000000000712",
36
+ "year": null,
37
  "item_type": "journalArticle",
38
  "url": "https://journals.lww.com/00126334-201509010-00011"
39
  },
 
52
  "Sydney Rosen"
53
  ],
54
  "doi": "10.1111/j.1365-3156.2012.03028.x",
55
+ "year": null,
56
  "item_type": "journalArticle",
57
  "url": "https://onlinelibrary.wiley.com/doi/10.1111/j.1365-3156.2012.03028.x"
58
  },
 
75
  "Keertan Dheda"
76
  ],
77
  "doi": "10.1183/09031936.00145511",
78
+ "year": null,
79
  "item_type": "journalArticle",
80
  "url": "http://erj.ersjournals.com/lookup/doi/10.1183/09031936.00145511"
81
  },
 
94
  "Achilles Katamba"
95
  ],
96
  "doi": "10.1186/s12913-016-1804-9",
97
+ "year": null,
98
  "item_type": "journalArticle",
99
  "url": "http://bmchealthservres.biomedcentral.com/articles/10.1186/s12913-016-1804-9"
100
  },
 
115
  "A. H. Van'T Hoog"
116
  ],
117
  "doi": "10.5588/ijtld.16.0496",
118
+ "year": null,
119
  "item_type": "journalArticle",
120
  "url": "http://www.ingentaconnect.com/content/10.5588/ijtld.16.0496"
121
  },
 
138
  "Julio Croda"
139
  ],
140
  "doi": "10.1093/cid/ciaa135",
141
+ "year": null,
142
  "item_type": "journalArticle",
143
  "url": "https://academic.oup.com/cid/article/72/5/771/5736588"
144
  },
 
154
  "Anete Trajman"
155
  ],
156
  "doi": "10.1590/s1806-37562015000004524",
157
+ "year": null,
158
  "item_type": "journalArticle",
159
  "url": "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132015000600536&lng=en&tlng=en"
160
  },
 
181
  "Keertan Dheda"
182
  ],
183
  "doi": "10.1016/S2214-109X(19)30164-0",
184
+ "year": null,
185
  "item_type": "journalArticle",
186
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S2214109X19301640"
187
  },
 
206
  "Luis E Cuevas"
207
  ],
208
  "doi": "10.1136/bmjgh-2021-007592",
209
+ "year": null,
210
  "item_type": "journalArticle",
211
  "url": "https://gh.bmj.com/lookup/doi/10.1136/bmjgh-2021-007592"
212
  },
 
222
  "Elvira Richter"
223
  ],
224
  "doi": "10.1183/13993003.01333-2015",
225
+ "year": null,
226
  "item_type": "journalArticle",
227
  "url": "http://erj.ersjournals.com/lookup/doi/10.1183/13993003.01333-2015"
228
  },
 
244
  "Susan Cleary"
245
  ],
246
  "doi": "10.1371/journal.pone.0251547",
247
+ "year": null,
248
  "item_type": "journalArticle",
249
  "url": "https://dx.plos.org/10.1371/journal.pone.0251547"
250
  },
 
260
  "M. Shah"
261
  ],
262
  "doi": "10.5588/ijtld.13.0095",
263
+ "year": null,
264
  "item_type": "journalArticle",
265
  "url": "http://openurl.ingenta.com/content/xref?genre=article&issn=1027-3719&volume=17&issue=10&spage=1328"
266
  },
 
285
  "Jacob Creswell"
286
  ],
287
  "doi": "10.3201/eid2703.204090",
288
+ "year": null,
289
  "item_type": "journalArticle",
290
  "url": "https://wwwnc.cdc.gov/eid/article/27/3/20-4090_article.htm"
291
  },
 
300
  "A. Trajman"
301
  ],
302
  "doi": "10.5588/ijtld.13.0637",
303
+ "year": null,
304
  "item_type": "journalArticle",
305
  "url": "http://openurl.ingenta.com/content/xref?genre=article&issn=1027-3719&volume=18&issue=5&spage=547"
306
  },
 
317
  "Deborah K. Glencross"
318
  ],
319
  "doi": "10.4102/ajlm.v10i1.1229",
320
+ "year": null,
321
  "item_type": "journalArticle",
322
  "url": "http://www.ajlmonline.org/index.php/AJLM/article/view/1229"
323
  },
 
337
  "Maria Claudia Vater"
338
  ],
339
  "doi": "10.1590/0037-8682-0082-2018",
340
+ "year": null,
341
  "item_type": "journalArticle",
342
  "url": "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S0037-86822018000500631&tlng=en"
343
  },
 
361
  "Luis E. Cuevas"
362
  ],
363
  "doi": "10.1128/JCM.00864-15",
364
+ "year": null,
365
  "item_type": "journalArticle",
366
  "url": "https://journals.asm.org/doi/10.1128/JCM.00864-15"
367
  },
 
386
  "Corinne S. Merle"
387
  ],
388
  "doi": "10.1371/journal.pone.0264206",
389
+ "year": null,
390
  "item_type": "journalArticle",
391
  "url": "https://dx.plos.org/10.1371/journal.pone.0264206"
392
  },
 
406
  "Prathap Tharyan"
407
  ],
408
  "doi": "10.1371/journal.pone.0205233",
409
+ "year": null,
410
  "item_type": "journalArticle",
411
  "url": "https://dx.plos.org/10.1371/journal.pone.0205233"
412
  },
 
423
  "Susan E Dorman"
424
  ],
425
  "doi": "10.1186/1471-2334-13-352",
426
+ "year": null,
427
  "item_type": "journalArticle",
428
  "url": "https://bmcinfectdis.biomedcentral.com/articles/10.1186/1471-2334-13-352"
429
  },
 
450
  "Frank Cobelens"
451
  ],
452
  "doi": "10.1371/journal.pmed.1001120",
453
+ "year": null,
454
  "item_type": "journalArticle",
455
  "url": "https://dx.plos.org/10.1371/journal.pmed.1001120"
456
  },
 
468
  "Achilles Katamba"
469
  ],
470
  "doi": "10.1371/journal.pone.0122574",
471
+ "year": null,
472
  "item_type": "journalArticle",
473
  "url": "https://dx.plos.org/10.1371/journal.pone.0122574"
474
  },
 
486
  "Brittany Moore"
487
  ],
488
  "doi": "10.1007/s40258-018-0397-3",
489
+ "year": null,
490
  "item_type": "journalArticle",
491
  "url": "http://link.springer.com/10.1007/s40258-018-0397-3"
492
  },
 
502
  "Charoen Chuchottaworn"
503
  ],
504
  "doi": "10.1016/j.vhri.2019.09.010",
505
+ "year": null,
506
  "item_type": "journalArticle",
507
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S221210992030008X"
508
  },
 
519
  "Kevin Schwartzman"
520
  ],
521
  "doi": "10.1371/journal.pone.0150119",
522
+ "year": null,
523
  "item_type": "journalArticle",
524
  "url": "https://dx.plos.org/10.1371/journal.pone.0150119"
525
  },
 
533
  "Alemayehu Hailu"
534
  ],
535
  "doi": "10.1371/journal.pone.0259056",
536
+ "year": null,
537
  "item_type": "journalArticle",
538
  "url": "https://dx.plos.org/10.1371/journal.pone.0259056"
539
  },
 
555
  "Esther Turunga"
556
  ],
557
  "doi": "10.1097/QAI.0000000000002371",
558
+ "year": null,
559
  "item_type": "journalArticle",
560
  "url": "https://journals.lww.com/10.1097/QAI.0000000000002371"
561
  },
 
575
  "Jos\u00e9 Mar\u00eda Ten\u00edas-Burillo"
576
  ],
577
  "doi": "10.1016/j.eimc.2016.06.009",
578
+ "year": null,
579
  "item_type": "journalArticle",
580
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S0213005X16301550"
581
  },
 
595
  "Jos\u00e9 Mar\u00eda Ten\u00edas-Burillo"
596
  ],
597
  "doi": "10.1016/j.eimc.2016.06.009",
598
+ "year": null,
599
  "item_type": "journalArticle",
600
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S0213005X16301550"
601
  },
 
611
  "Nelson L.S. Lee"
612
  ],
613
  "doi": "10.1016/j.jinf.2014.12.015",
614
+ "year": null,
615
  "item_type": "journalArticle",
616
  "url": "https://linkinghub.elsevier.com/retrieve/pii/S016344531500002X"
617
  }
data/kayongo-papers_zotero_items.json DELETED
@@ -1,222 +0,0 @@
1
- [
2
- {
3
- "key": "UNWR6MI4",
4
- "title": "Le\u00e7ons du Rapid Response Service en Ouganda",
5
- "abstract": "R\u00e9sum\u00e9 Le Service de r\u00e9ponse rapide (RRS)\u2013en anglais Rapid Reponse Service\u2013est un service d'application de connaissances. Il r\u00e9pond au besoin de donn\u00e9es des d\u00e9cideurs en \u2026",
6
- "full_text": "",
7
- "authors": [
8
- "I Kawooya",
9
- "I Ddumba",
10
- "E Kayongo",
11
- "..."
12
- ],
13
- "doi": "",
14
- "year": null,
15
- "item_type": "journalArticle",
16
- "url": "https://www.cres-sn.org/wp-content/uploads/2022/03/Ebook-Donnees-probantes.pdf#page=241"
17
- },
18
- {
19
- "key": "QEZLUVIL",
20
- "title": "Brain Magnetic Resonance Imaging and Angiography Findings in Ugandan Children with Sickle Cell Anemia; A Cross Sectional Study",
21
- "abstract": "Abstract Sickle Cell Anemia (SCA) is a leading cause of childhood stroke in sub-Saharan Africa and sickle cell brain vasculopathy manifests either as overt stroke or clinically\" silent \u2026",
22
- "full_text": "",
23
- "authors": [
24
- "R Idro",
25
- "NS Green",
26
- "D Munube",
27
- "LR Buluma",
28
- "B Kebirungi",
29
- "..."
30
- ],
31
- "doi": "",
32
- "year": null,
33
- "item_type": "journalArticle",
34
- "url": "https://ashpublications.org/blood/article/132/Supplement%201/2376/264336"
35
- },
36
- {
37
- "key": "BN43BLJ5",
38
- "title": "Accuracy of the chest x-ray in screening for tuberculosis in Uganda: A cross-sectional study.",
39
- "abstract": "Abstract ABSTRACT BACKGROUND: The WHO END TB strategy requires\u2265 90% case detection to combat tuberculosis (TB). Increased TB case detection requires a more \u2026",
40
- "full_text": "",
41
- "authors": [
42
- "J Nalunjogi",
43
- "F Mugabe",
44
- "I Najjingo",
45
- "P Lusiba",
46
- "F Olweny",
47
- "..."
48
- ],
49
- "doi": "",
50
- "year": null,
51
- "item_type": "journalArticle",
52
- "url": "https://www.researchsquare.com/article/rs-37900/latest.pdf"
53
- },
54
- {
55
- "key": "PF63FYYY",
56
- "title": "Rapidly responding to policy queries with evidence: Learning from rapid response services in Uganda",
57
- "abstract": "Summary The Rapid Response Service (RRS) is a knowledge translation service in Uganda that responds to a decision maker's needs for evidence with synthesised relevant evidence \u2026",
58
- "full_text": "",
59
- "authors": [
60
- "I Kawooya",
61
- "I Ddumba",
62
- "E Kayongo",
63
- "..."
64
- ],
65
- "doi": "",
66
- "year": null,
67
- "item_type": "journalArticle",
68
- "url": "https://library.oapen.org/bitstream/handle/20.500.12657/39511/9780367440121_text.pdf?sequence=1#page=159"
69
- },
70
- {
71
- "key": "VXFKNQIN",
72
- "title": "Pre-diagnostic drop out of presumptive TB patients and its associated factors at Bugembe Health Centre IV in Jinja, Uganda",
73
- "abstract": "Background: Drop out of presumptive TB individuals before making a final diagnosis poses a danger to the individual and their community. We aimed to determine the proportion of \u2026",
74
- "full_text": "",
75
- "authors": [
76
- "G Ekuka",
77
- "I Kawooya",
78
- "E Kayongo",
79
- "R Ssenyonga",
80
- "..."
81
- ],
82
- "doi": "",
83
- "year": null,
84
- "item_type": "journalArticle",
85
- "url": "https://www.ajol.info/index.php/ahs/article/view/197831"
86
- },
87
- {
88
- "key": "KQWSK82P",
89
- "title": "Point\u2010of\u2010care diagnostic tests for sickle cell disease",
90
- "abstract": "Objectives This is a protocol for a Cochrane Review (diagnostic). The objectives are as follows: To determine the accuracy of point\u2010of\u2010care tests (POCT) to screen sickle cell \u2026",
91
- "full_text": "",
92
- "authors": [
93
- "I Kawooya",
94
- "E Kayongo",
95
- "D Munube",
96
- "..."
97
- ],
98
- "doi": "10.1002/14651858.CD014584",
99
- "year": null,
100
- "item_type": "journalArticle",
101
- "url": "https://www.cochranelibrary.com/cdsr/doi/10.1002/14651858.CD014584/abstract"
102
- },
103
- {
104
- "key": "8PMU8JR9",
105
- "title": "of nodding syndrome",
106
- "abstract": "Aims. Nodding syndrome is a poorly understood acquired disorder affecting children in sub-Saharan Africa. The aetiology and pathogenesis are unknown, and no specific treatment is \u2026",
107
- "full_text": "",
108
- "authors": [
109
- "E Kayongo",
110
- "N Gumisiriza",
111
- "A Lanyero",
112
- "..."
113
- ],
114
- "doi": "",
115
- "year": null,
116
- "item_type": "journalArticle",
117
- "url": "https://pdfs.semanticscholar.org/a0a3/7f6c382fe555492d8640855e31bf233881c0.pdf"
118
- },
119
- {
120
- "key": "AN26LWTD",
121
- "title": "Accuracy and incremental yield of the chest X-ray in screening for tuberculosis in Uganda: a cross-sectional study",
122
- "abstract": "The WHO END TB strategy requires\u2265 90% case detection to combat tuberculosis (TB). Increased TB case detection requires a more sensitive and specific screening tool \u2026",
123
- "full_text": "",
124
- "authors": [
125
- "J Nalunjogi",
126
- "F Mugabe",
127
- "I Najjingo",
128
- "P Lusiba",
129
- "..."
130
- ],
131
- "doi": "",
132
- "year": null,
133
- "item_type": "journalArticle",
134
- "url": "https://www.hindawi.com/journals/trt/2021/6622809/"
135
- },
136
- {
137
- "key": "MAYC8H5C",
138
- "title": "Vulnerability of Populations to Malaria after Indoor Residual Spraying is Withdrawn from Areas where its Use has Previously Been Sustained. Protocol for a Systematic \u2026",
139
- "abstract": "Background: With its proven effectiveness, indoor residual spraying (IRS) as a malaria vector control strategy forms one of the reliable vector control strategies, especially when at \u2026",
140
- "full_text": "",
141
- "authors": [
142
- "P Orishaba",
143
- "E Kayongo",
144
- "P Lusiba",
145
- "C Nakalema",
146
- "..."
147
- ],
148
- "doi": "10.1101/2022.05.24.22275507.abstract",
149
- "year": null,
150
- "item_type": "journalArticle",
151
- "url": "https://www.medrxiv.org/content/10.1101/2022.05.24.22275507.abstract"
152
- },
153
- {
154
- "key": "S2FGVY8R",
155
- "title": "Asymptomatic malaria parasitaemia and seizure control in children with nodding syndrome; a cross-sectional study",
156
- "abstract": "Objective Plasmodium falciparum is epileptogenic and in malaria endemic areas, is a leading cause of acute seizures. In these areas, asymptomatic infections are common but \u2026",
157
- "full_text": "",
158
- "authors": [
159
- "R Ogwang",
160
- "R Anguzu",
161
- "P Akun",
162
- "A Ningwa",
163
- "E Kayongo",
164
- "..."
165
- ],
166
- "doi": "",
167
- "year": null,
168
- "item_type": "journalArticle",
169
- "url": "https://bmjopen.bmj.com/content/8/10/e023624.abstract"
170
- },
171
- {
172
- "key": "6KALDEWN",
173
- "title": "Adherence to the MDR-TB intensive phase treatment protocol amongst individuals followed up at central and peripheral health care facilities in Uganda-a descriptive \u2026",
174
- "abstract": "Background: Following initiation of MDR-TB treatment, patients have a choice to receive follow up DOT supervision at either the central initiating facility or at a peripheral facility \u2026",
175
- "full_text": "",
176
- "authors": [
177
- "J Mukasa",
178
- "E Kayongo",
179
- "I Kawooya",
180
- "D Lukoye",
181
- "..."
182
- ],
183
- "doi": "",
184
- "year": null,
185
- "item_type": "journalArticle",
186
- "url": "https://www.ajol.info/index.php/ahs/article/view/197828"
187
- },
188
- {
189
- "key": "76FY87Z6",
190
- "title": "The natural history of nodding syndrome",
191
- "abstract": "Aims. Nodding syndrome is a poorly understood acquired disorder affecting children in sub\u2010Saharan Africa. The aetiology and pathogenesis are unknown, and no specific treatment is \u2026",
192
- "full_text": "",
193
- "authors": [
194
- "R Idro",
195
- "R Ogwang",
196
- "E Kayongo",
197
- "N Gumisiriza",
198
- "..."
199
- ],
200
- "doi": "10.1684/epd.2018.1012",
201
- "year": null,
202
- "item_type": "journalArticle",
203
- "url": "https://onlinelibrary.wiley.com/doi/abs/10.1684/epd.2018.1012"
204
- },
205
- {
206
- "key": "TQQ2BYRK",
207
- "title": "Paper 1: Demand-driven rapid reviews for health policy and systems decision-making: lessons from Lebanon, Ethiopia, and South Africa on researchers and \u2026",
208
- "abstract": "Rapid reviews have emerged as an approach to provide contextualized evidence in a timely and efficient manner. Three rapid review centers were established in Ethiopia, Lebanon \u2026",
209
- "full_text": "",
210
- "authors": [
211
- "RM Mijumbi-Deve",
212
- "I Kawooya",
213
- "E Kayongo",
214
- "R Izizinga",
215
- "..."
216
- ],
217
- "doi": "10.1186/s13643-022-02021-3",
218
- "year": null,
219
- "item_type": "journalArticle",
220
- "url": "https://link.springer.com/article/10.1186/s13643-022-02021-3"
221
- }
222
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/vaccine_coverage_zotero_items.json CHANGED
@@ -17,7 +17,7 @@
17
  "Rania A Tohme"
18
  ],
19
  "doi": "10.15585/mmwr.mm7229a2",
20
- "date": "2023",
21
  "item_type": "journalArticle",
22
  "url": ""
23
  },
@@ -31,7 +31,7 @@
31
  "Edgar Mugema Mulogo"
32
  ],
33
  "doi": "10.21522/TIJPH.2013.09.03.Art019",
34
- "date": "2021",
35
  "item_type": "journalArticle",
36
  "url": ""
37
  },
@@ -45,7 +45,7 @@
45
  "Edgar Mugema Mulogo"
46
  ],
47
  "doi": "10.21522/TIJPH.2013.09.04.Art008",
48
- "date": "2021",
49
  "item_type": "journalArticle",
50
  "url": ""
51
  },
@@ -89,7 +89,7 @@
89
  "G. Shapira"
90
  ],
91
  "doi": "10.1371/journal.pmed.1004070",
92
- "date": "2022",
93
  "item_type": "journalArticle",
94
  "url": "https://www.scopus.com/inward/record.uri?eid=2-s2.0-85137126764&doi=10.1371%2fjournal.pmed.1004070&partnerID=40&md5=32f1cd887f5eb46121eb1e48abfcdaaf"
95
  },
@@ -111,7 +111,7 @@
111
  "Yahaya Gavamukulya"
112
  ],
113
  "doi": "10.1016/j.dib.2019.104269",
114
- "date": "2019",
115
  "item_type": "journalArticle",
116
  "url": ""
117
  },
@@ -137,7 +137,7 @@
137
  "Kirsty Le Doare"
138
  ],
139
  "doi": "10.1136/bmjgh-2021-006102",
140
- "date": "2021",
141
  "item_type": "journalArticle",
142
  "url": "https://search.ebscohost.com/login.aspx?direct=true&db=cmedm&AN=34452941&site=ehost-live&scope=site"
143
  },
@@ -152,7 +152,7 @@
152
  "Tsai-Ching Hsu"
153
  ],
154
  "doi": "",
155
- "date": "2020",
156
  "item_type": "journalArticle",
157
  "url": "https://search.ebscohost.com/login.aspx?direct=true&db=a9h&AN=148721705&site=ehost-live&scope=site"
158
  },
@@ -176,7 +176,7 @@
176
  "E. M. Mulogo"
177
  ],
178
  "doi": "10.1186/s12879-022-07579-w",
179
- "date": "2022",
180
  "item_type": "journalArticle",
181
  "url": ""
182
  },
@@ -192,7 +192,7 @@
192
  "D. Kajungu"
193
  ],
194
  "doi": "10.1186/s12913-023-09875-w",
195
- "date": "2023",
196
  "item_type": "journalArticle",
197
  "url": ""
198
  },
@@ -210,7 +210,7 @@
210
  "C. Banura"
211
  ],
212
  "doi": "10.1186/s12889-022-13113-z",
213
- "date": "2022",
214
  "item_type": "journalArticle",
215
  "url": ""
216
  },
@@ -242,7 +242,7 @@
242
  "N. Fadl"
243
  ],
244
  "doi": "10.1007/s10900-023-01261-1",
245
- "date": "2024",
246
  "item_type": "journalArticle",
247
  "url": ""
248
  },
@@ -264,7 +264,7 @@
264
  "Y. Gavamukulya"
265
  ],
266
  "doi": "10.1016/j.dib.2019.104269",
267
- "date": "2019",
268
  "item_type": "journalArticle",
269
  "url": ""
270
  },
@@ -286,7 +286,7 @@
286
  "Y. Gavamukulya"
287
  ],
288
  "doi": "10.9734/ijtdh/2019/v39i330209",
289
- "date": "2019",
290
  "item_type": "journalArticle",
291
  "url": ""
292
  },
@@ -303,7 +303,7 @@
303
  "H. W. Reynolds"
304
  ],
305
  "doi": "10.3390/vaccines11030647",
306
- "date": "2023",
307
  "item_type": "journalArticle",
308
  "url": ""
309
  },
@@ -320,7 +320,7 @@
320
  "E. Sacks"
321
  ],
322
  "doi": "10.1093/heapol/czaa099",
323
- "date": "2020",
324
  "item_type": "journalArticle",
325
  "url": ""
326
  },
@@ -341,7 +341,7 @@
341
  "L. Berman"
342
  ],
343
  "doi": "10.3390/vaccines11020375",
344
- "date": "2023",
345
  "item_type": "journalArticle",
346
  "url": ""
347
  },
@@ -359,7 +359,7 @@
359
  "P. Waiswa"
360
  ],
361
  "doi": "10.1186/s12913-021-06554-6",
362
- "date": "2021",
363
  "item_type": "journalArticle",
364
  "url": ""
365
  },
@@ -378,7 +378,7 @@
378
  "F. Guillen-Grima"
379
  ],
380
  "doi": "10.3390/vaccines11061103",
381
- "date": "2023",
382
  "item_type": "journalArticle",
383
  "url": ""
384
  }
 
17
  "Rania A Tohme"
18
  ],
19
  "doi": "10.15585/mmwr.mm7229a2",
20
+ "year": null,
21
  "item_type": "journalArticle",
22
  "url": ""
23
  },
 
31
  "Edgar Mugema Mulogo"
32
  ],
33
  "doi": "10.21522/TIJPH.2013.09.03.Art019",
34
+ "year": null,
35
  "item_type": "journalArticle",
36
  "url": ""
37
  },
 
45
  "Edgar Mugema Mulogo"
46
  ],
47
  "doi": "10.21522/TIJPH.2013.09.04.Art008",
48
+ "year": null,
49
  "item_type": "journalArticle",
50
  "url": ""
51
  },
 
89
  "G. Shapira"
90
  ],
91
  "doi": "10.1371/journal.pmed.1004070",
92
+ "year": null,
93
  "item_type": "journalArticle",
94
  "url": "https://www.scopus.com/inward/record.uri?eid=2-s2.0-85137126764&doi=10.1371%2fjournal.pmed.1004070&partnerID=40&md5=32f1cd887f5eb46121eb1e48abfcdaaf"
95
  },
 
111
  "Yahaya Gavamukulya"
112
  ],
113
  "doi": "10.1016/j.dib.2019.104269",
114
+ "year": null,
115
  "item_type": "journalArticle",
116
  "url": ""
117
  },
 
137
  "Kirsty Le Doare"
138
  ],
139
  "doi": "10.1136/bmjgh-2021-006102",
140
+ "year": null,
141
  "item_type": "journalArticle",
142
  "url": "https://search.ebscohost.com/login.aspx?direct=true&db=cmedm&AN=34452941&site=ehost-live&scope=site"
143
  },
 
152
  "Tsai-Ching Hsu"
153
  ],
154
  "doi": "",
155
+ "year": null,
156
  "item_type": "journalArticle",
157
  "url": "https://search.ebscohost.com/login.aspx?direct=true&db=a9h&AN=148721705&site=ehost-live&scope=site"
158
  },
 
176
  "E. M. Mulogo"
177
  ],
178
  "doi": "10.1186/s12879-022-07579-w",
179
+ "year": null,
180
  "item_type": "journalArticle",
181
  "url": ""
182
  },
 
192
  "D. Kajungu"
193
  ],
194
  "doi": "10.1186/s12913-023-09875-w",
195
+ "year": null,
196
  "item_type": "journalArticle",
197
  "url": ""
198
  },
 
210
  "C. Banura"
211
  ],
212
  "doi": "10.1186/s12889-022-13113-z",
213
+ "year": null,
214
  "item_type": "journalArticle",
215
  "url": ""
216
  },
 
242
  "N. Fadl"
243
  ],
244
  "doi": "10.1007/s10900-023-01261-1",
245
+ "year": null,
246
  "item_type": "journalArticle",
247
  "url": ""
248
  },
 
264
  "Y. Gavamukulya"
265
  ],
266
  "doi": "10.1016/j.dib.2019.104269",
267
+ "year": null,
268
  "item_type": "journalArticle",
269
  "url": ""
270
  },
 
286
  "Y. Gavamukulya"
287
  ],
288
  "doi": "10.9734/ijtdh/2019/v39i330209",
289
+ "year": null,
290
  "item_type": "journalArticle",
291
  "url": ""
292
  },
 
303
  "H. W. Reynolds"
304
  ],
305
  "doi": "10.3390/vaccines11030647",
306
+ "year": null,
307
  "item_type": "journalArticle",
308
  "url": ""
309
  },
 
320
  "E. Sacks"
321
  ],
322
  "doi": "10.1093/heapol/czaa099",
323
+ "year": null,
324
  "item_type": "journalArticle",
325
  "url": ""
326
  },
 
341
  "L. Berman"
342
  ],
343
  "doi": "10.3390/vaccines11020375",
344
+ "year": null,
345
  "item_type": "journalArticle",
346
  "url": ""
347
  },
 
359
  "P. Waiswa"
360
  ],
361
  "doi": "10.1186/s12913-021-06554-6",
362
+ "year": null,
363
  "item_type": "journalArticle",
364
  "url": ""
365
  },
 
378
  "F. Guillen-Grima"
379
  ],
380
  "doi": "10.3390/vaccines11061103",
381
+ "year": null,
382
  "item_type": "journalArticle",
383
  "url": ""
384
  }
data/zotero-collection-pastan_zotero_items.json DELETED
The diff for this file is too large to render. See raw diff
 
docs.py DELETED
@@ -1,13 +0,0 @@
1
- description = """
2
- Welcome to the Acres AI RAG API documentation.
3
-
4
- ### RAG Tasks
5
- - Use the `/process_zotero_library_items`: Process zotero library items with your zotero credentials.
6
- - Use the `/get_study_info`: Get number of documents in a zotero study.
7
- - Use the `/study_variables`: Get research summary from the study provided the study variables.
8
- - Use the `/download_csv`: Export the markdown text to a csv file.
9
- """
10
-
11
- tags_metadata = [
12
- {"name": "ACRES RAG", "description": "AI RAG Application"},
13
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
infra/ecs_config.template DELETED
@@ -1,7 +0,0 @@
1
- [deploy]
2
- bucket = 'dev-acres-gradio-bucket'
3
- region = 'us-east-1'
4
- stack_name = 'AcresRag'
5
-
6
- [parameters]
7
- ContainerImageGradio = '224427659xxxx.dkr.ecr.us-east-1.amazonaws.com/gradio-app-prod:latest'
 
 
 
 
 
 
 
 
infra/ecs_fargate.yml DELETED
@@ -1,581 +0,0 @@
1
- AWSTemplateFormatVersion: '2010-09-09'
2
- Description: Deploy Gradio and FastAPI services on AWS ECS Fargate
3
-
4
- Parameters:
5
- Environment:
6
- Type: String
7
- Default: dev
8
- AllowedValues: [dev, prod]
9
-
10
- # VPC Configuration
11
- VpcCIDR:
12
- Type: String
13
- Default: 10.0.0.0/16
14
- PublicSubnet1CIDR:
15
- Type: String
16
- Default: 10.0.1.0/24
17
- PublicSubnet2CIDR:
18
- Type: String
19
- Default: 10.0.2.0/24
20
-
21
- # ECS Configuration
22
- ECSClusterName:
23
- Type: String
24
- Default: rag-ecs-cluster
25
- GradioTaskDefinitionCPU:
26
- Type: Number
27
- Default: 512
28
- GradioTaskDefinitionMemory:
29
- Type: Number
30
- Default: 1024
31
- FastAPITaskDefinitionCPU:
32
- Type: Number
33
- Default: 256
34
- FastAPITaskDefinitionMemory:
35
- Type: Number
36
- Default: 512
37
-
38
- # Container Images
39
- ContainerImageGradio:
40
- Type: String
41
- Description: URI of the Gradio container image in ECR
42
- ContainerImageFastAPI:
43
- Type: String
44
- Description: URI of the FastAPI container image in ECR
45
- # CertificateArn:
46
- # Type: String
47
-
48
- Resources:
49
- # VPC and Networking
50
- VPC:
51
- Type: AWS::EC2::VPC
52
- Properties:
53
- CidrBlock: !Ref VpcCIDR
54
- EnableDnsHostnames: true
55
- EnableDnsSupport: true
56
- Tags:
57
- - Key: Name
58
- Value: !Sub ${Environment}-acres-vpc
59
-
60
- InternetGateway:
61
- Type: AWS::EC2::InternetGateway
62
- Properties:
63
- Tags:
64
- - Key: Name
65
- Value: !Sub ${Environment}-acres-igw
66
-
67
- AttachGateway:
68
- Type: AWS::EC2::VPCGatewayAttachment
69
- Properties:
70
- VpcId: !Ref VPC
71
- InternetGatewayId: !Ref InternetGateway
72
-
73
- PublicSubnet1:
74
- Type: AWS::EC2::Subnet
75
- Properties:
76
- VpcId: !Ref VPC
77
- AvailabilityZone: !Select [0, !GetAZs '']
78
- CidrBlock: !Ref PublicSubnet1CIDR
79
- MapPublicIpOnLaunch: true
80
- Tags:
81
- - Key: Name
82
- Value: !Sub ${Environment}-acres-public-subnet-1
83
-
84
- PublicSubnet2:
85
- Type: AWS::EC2::Subnet
86
- Properties:
87
- VpcId: !Ref VPC
88
- AvailabilityZone: !Select [1, !GetAZs '']
89
- CidrBlock: !Ref PublicSubnet2CIDR
90
- MapPublicIpOnLaunch: true
91
- Tags:
92
- - Key: Name
93
- Value: !Sub ${Environment}-acres-public-subnet-2
94
-
95
- PublicRouteTable:
96
- Type: AWS::EC2::RouteTable
97
- Properties:
98
- VpcId: !Ref VPC
99
- Tags:
100
- - Key: Name
101
- Value: !Sub ${Environment}-acres-public-rt
102
-
103
- PublicRoute:
104
- Type: AWS::EC2::Route
105
- DependsOn: AttachGateway
106
- Properties:
107
- RouteTableId: !Ref PublicRouteTable
108
- DestinationCidrBlock: 0.0.0.0/0
109
- GatewayId: !Ref InternetGateway
110
-
111
- PublicSubnet1RouteTableAssociation:
112
- Type: AWS::EC2::SubnetRouteTableAssociation
113
- Properties:
114
- SubnetId: !Ref PublicSubnet1
115
- RouteTableId: !Ref PublicRouteTable
116
-
117
- PublicSubnet2RouteTableAssociation:
118
- Type: AWS::EC2::SubnetRouteTableAssociation
119
- Properties:
120
- SubnetId: !Ref PublicSubnet2
121
- RouteTableId: !Ref PublicRouteTable
122
-
123
- # Security Groups
124
- GradioSecurityGroup:
125
- Type: AWS::EC2::SecurityGroup
126
- Properties:
127
- GroupDescription: Security group for Gradio service
128
- VpcId: !Ref VPC
129
- SecurityGroupIngress:
130
- - IpProtocol: tcp
131
- FromPort: 7860
132
- ToPort: 7860
133
- CidrIp: 0.0.0.0/0
134
- Description: INTERNET HTTPS
135
- - IpProtocol: tcp
136
- FromPort: 80
137
- ToPort: 80
138
- CidrIp: 0.0.0.0/0
139
- Description: INTERNET HTTP
140
- SecurityGroupEgress:
141
- - IpProtocol: -1
142
- CidrIp: 0.0.0.0/0
143
-
144
- FastAPISecurityGroup:
145
- Type: AWS::EC2::SecurityGroup
146
- Properties:
147
- GroupDescription: Security group for FastAPI service
148
- VpcId: !Ref VPC
149
- SecurityGroupIngress:
150
- - IpProtocol: tcp
151
- FromPort: 8000
152
- ToPort: 8000
153
- CidrIp: 0.0.0.0/0
154
- Description: INTERNET HTTPS
155
- - IpProtocol: tcp
156
- FromPort: 80
157
- ToPort: 80
158
- CidrIp: 0.0.0.0/0
159
- Description: INTERNET HTTP
160
- SecurityGroupEgress:
161
- - IpProtocol: -1
162
- CidrIp: 0.0.0.0/0
163
-
164
-
165
- # IAM Roles and Policies
166
- # Gradio Execution Role - for pulling images and logging
167
- GradioTaskExecutionRole:
168
- Type: AWS::IAM::Role
169
- Properties:
170
- AssumeRolePolicyDocument:
171
- Version: '2012-10-17'
172
- Statement:
173
- - Effect: Allow
174
- Principal:
175
- Service: ecs-tasks.amazonaws.com
176
- Action: sts:AssumeRole
177
- ManagedPolicyArns:
178
- - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy
179
- Policies:
180
- - PolicyName: GradioExecutionPolicy
181
- PolicyDocument:
182
- Version: '2012-10-17'
183
- Statement:
184
- - Effect: Allow
185
- Action:
186
- - ecr:GetAuthorizationToken
187
- - ecr:BatchCheckLayerAvailability
188
- - ecr:GetDownloadUrlForLayer
189
- - ecr:BatchGetImage
190
- Resource: '*'
191
- - Effect: Allow
192
- Action:
193
- - logs:CreateLogStream
194
- - logs:PutLogEvents
195
- Resource:
196
- - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/ecs/${Environment}-acres-gradio:*
197
- - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/ecs/${Environment}-acres-gradio:log-stream:*
198
-
199
- # Gradio Task Role - for runtime permissions
200
- GradioTaskRole:
201
- Type: AWS::IAM::Role
202
- Properties:
203
- AssumeRolePolicyDocument:
204
- Version: '2012-10-17'
205
- Statement:
206
- - Effect: Allow
207
- Principal:
208
- Service: ecs-tasks.amazonaws.com
209
- Action: sts:AssumeRole
210
- Policies:
211
- - PolicyName: GradioTaskPolicy
212
- PolicyDocument:
213
- Version: '2012-10-17'
214
- Statement:
215
- # Add specific permissions needed by your Gradio application at runtime
216
- - Effect: Allow
217
- Action:
218
- - s3:GetObject
219
- - s3:PutObject
220
- Resource: !Sub arn:aws:s3:::${Environment}-acres-gradio-bucket/*
221
-
222
- # FastAPI Execution Role - for pulling images and logging
223
- FastAPITaskExecutionRole:
224
- Type: AWS::IAM::Role
225
- Properties:
226
- AssumeRolePolicyDocument:
227
- Version: '2012-10-17'
228
- Statement:
229
- - Effect: Allow
230
- Principal:
231
- Service: ecs-tasks.amazonaws.com
232
- Action: sts:AssumeRole
233
- ManagedPolicyArns:
234
- - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy
235
- Policies:
236
- - PolicyName: FastAPIExecutionPolicy
237
- PolicyDocument:
238
- Version: '2012-10-17'
239
- Statement:
240
- - Effect: Allow
241
- Action:
242
- - ecr:GetAuthorizationToken
243
- - ecr:BatchCheckLayerAvailability
244
- - ecr:GetDownloadUrlForLayer
245
- - ecr:BatchGetImage
246
- Resource: '*'
247
- - Effect: Allow
248
- Action:
249
- - logs:CreateLogStream
250
- - logs:PutLogEvents
251
- Resource:
252
- - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/ecs/${Environment}-acres-fastapi:*
253
- - !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/ecs/${Environment}-acres-fastapi:log-stream:*
254
-
255
- # FastAPI Task Role - for runtime permissions
256
- FastAPITaskRole:
257
- Type: AWS::IAM::Role
258
- Properties:
259
- AssumeRolePolicyDocument:
260
- Version: '2012-10-17'
261
- Statement:
262
- - Effect: Allow
263
- Principal:
264
- Service: ecs-tasks.amazonaws.com
265
- Action: sts:AssumeRole
266
- Policies:
267
- - PolicyName: FastAPITaskPolicy
268
- PolicyDocument:
269
- Version: '2012-10-17'
270
- Statement:
271
- # Add specific permissions needed by your FastAPI application at runtime
272
- - Effect: Allow
273
- Action:
274
- - dynamodb:GetItem
275
- - dynamodb:PutItem
276
- - dynamodb:Query
277
- Resource: !Sub arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:table/${Environment}-acres-fastapi-table
278
- # Allow FastAPI to make HTTP calls to Gradio service
279
- - Effect: Allow
280
- Action:
281
- - execute-api:Invoke
282
- Resource: !Sub arn:aws:execute-api:${AWS::Region}:${AWS::AccountId}:*
283
-
284
- # ECS Cluster
285
- ECSCluster:
286
- Type: AWS::ECS::Cluster
287
- Properties:
288
- ClusterName: !Ref ECSClusterName
289
- Tags:
290
- - Key: Environment
291
- Value: !Ref Environment
292
-
293
- # Load Balancer for Gradio
294
- GradioALB:
295
- Type: AWS::ElasticLoadBalancingV2::LoadBalancer
296
- Properties:
297
- Name: !Sub ${Environment}-acres-gradio-alb
298
- Scheme: internet-facing
299
- LoadBalancerAttributes:
300
- - Key: idle_timeout.timeout_seconds
301
- Value: '60'
302
- Subnets:
303
- - !Ref PublicSubnet1
304
- - !Ref PublicSubnet2
305
- SecurityGroups:
306
- - !Ref GradioSecurityGroup
307
-
308
- GradioTargetGroup:
309
- Type: AWS::ElasticLoadBalancingV2::TargetGroup
310
- Properties:
311
- HealthCheckEnabled: true
312
- HealthCheckIntervalSeconds: 30
313
- HealthCheckPath: /
314
- HealthCheckPort: 7860
315
- HealthCheckTimeoutSeconds: 20
316
- HealthyThresholdCount: 2
317
- Name: !Sub ${Environment}-acres-gradio-tg
318
- Port: 7860
319
- Protocol: HTTP
320
- TargetType: ip
321
- UnhealthyThresholdCount: 5
322
- VpcId: !Ref VPC
323
- TargetGroupAttributes:
324
- - Key: deregistration_delay.timeout_seconds
325
- Value: '30'
326
-
327
- GradioHTTPSListener:
328
- # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-elasticloadbalancingv2-listener.html
329
- Type: AWS::ElasticLoadBalancingV2::Listener
330
- Properties:
331
- DefaultActions:
332
- - Type: forward
333
- TargetGroupArn: !Ref GradioTargetGroup
334
- LoadBalancerArn: !Ref GradioALB
335
- # Certificates:
336
- # - CertificateArn: !Ref CertificateArn
337
- Port: 7860
338
- Protocol: HTTP
339
- # GradioHTTPListener:
340
- # # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-elasticloadbalancingv2-listener.html
341
- # Type: AWS::ElasticLoadBalancingV2::Listener
342
- # Properties:
343
- # Protocol: HTTP
344
- # Port: 80
345
- # LoadBalancerArn: !Ref GradioALB
346
- # DefaultActions:
347
- # - Type: redirect
348
- # RedirectConfig:
349
- # Protocol: "HTTPS"
350
- # Port: 7860
351
- # Host: "#{host}"
352
- # Path: "/#{path}"
353
- # Query: "#{query}"
354
- # StatusCode: "HTTP_301"
355
-
356
- # Load Balancer for FastAPI
357
- FastAPIALB:
358
- Type: AWS::ElasticLoadBalancingV2::LoadBalancer
359
- Properties:
360
- Name: !Sub ${Environment}-acres-fastapi-alb
361
- Scheme: internet-facing
362
- LoadBalancerAttributes:
363
- - Key: idle_timeout.timeout_seconds
364
- Value: '60'
365
- Subnets:
366
- - !Ref PublicSubnet1
367
- - !Ref PublicSubnet2
368
- SecurityGroups:
369
- - !Ref FastAPISecurityGroup
370
-
371
- FastAPITargetGroup:
372
- Type: AWS::ElasticLoadBalancingV2::TargetGroup
373
- Properties:
374
- HealthCheckEnabled: true
375
- HealthCheckIntervalSeconds: 30
376
- HealthCheckPath: /docs # FastAPI's Swagger UI path
377
- HealthCheckPort: 8000
378
- HealthCheckTimeoutSeconds: 20
379
- HealthyThresholdCount: 2
380
- Name: !Sub ${Environment}-acres-fastapi-tg
381
- Port: 8000
382
- Protocol: HTTP
383
- TargetType: ip
384
- UnhealthyThresholdCount: 5
385
- VpcId: !Ref VPC
386
- TargetGroupAttributes:
387
- - Key: deregistration_delay.timeout_seconds
388
- Value: '30'
389
-
390
- FastAPIHTTPSListener:
391
- # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-elasticloadbalancingv2-listener.html
392
- Type: AWS::ElasticLoadBalancingV2::Listener
393
- Properties:
394
- DefaultActions:
395
- - Type: forward
396
- TargetGroupArn: !Ref FastAPITargetGroup
397
- LoadBalancerArn: !Ref FastAPIALB
398
- # Certificates:
399
- # - CertificateArn: !Ref CertificateArn
400
- Port: 8000
401
- Protocol: HTTP
402
- # FastAPIHTTPListener:
403
- # # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-elasticloadbalancingv2-listener.html
404
- # Type: AWS::ElasticLoadBalancingV2::Listener
405
- # Properties:
406
- # Protocol: HTTP
407
- # Port: 80
408
- # LoadBalancerArn: !Ref FastAPIALB
409
- # DefaultActions:
410
- # - Type: redirect
411
- # RedirectConfig:
412
- # Protocol: "HTTPS"
413
- # Port: 8000
414
- # Host: "#{host}"
415
- # Path: "/#{path}"
416
- # Query: "#{query}"
417
- # StatusCode: "HTTP_301"
418
-
419
- # ECS Task Definitions
420
- GradioTaskDefinition:
421
- Type: AWS::ECS::TaskDefinition
422
- Properties:
423
- Family: !Sub ${Environment}-acres-gradio
424
- RequiresCompatibilities:
425
- - FARGATE
426
- Cpu: !Ref GradioTaskDefinitionCPU
427
- Memory: !Ref GradioTaskDefinitionMemory
428
- NetworkMode: awsvpc
429
- ExecutionRoleArn: !GetAtt GradioTaskExecutionRole.Arn
430
- TaskRoleArn: !GetAtt GradioTaskRole.Arn
431
- ContainerDefinitions:
432
- - Name: gradio
433
- Image: !Ref ContainerImageGradio
434
- PortMappings:
435
- - ContainerPort: 7860
436
- LogConfiguration:
437
- LogDriver: awslogs
438
- Options:
439
- awslogs-group: !Ref GradioLogGroup
440
- awslogs-region: !Ref AWS::Region
441
- awslogs-stream-prefix: gradio
442
-
443
- FastAPITaskDefinition:
444
- Type: AWS::ECS::TaskDefinition
445
- Properties:
446
- Family: !Sub ${Environment}-acres-fastapi
447
- RequiresCompatibilities:
448
- - FARGATE
449
- Cpu: !Ref FastAPITaskDefinitionCPU
450
- Memory: !Ref FastAPITaskDefinitionMemory
451
- NetworkMode: awsvpc
452
- ExecutionRoleArn: !GetAtt FastAPITaskExecutionRole.Arn
453
- TaskRoleArn: !GetAtt FastAPITaskRole.Arn
454
- ContainerDefinitions:
455
- - Name: fastapi
456
- Image: !Ref ContainerImageFastAPI
457
- PortMappings:
458
- - ContainerPort: 8000
459
- Environment:
460
- - Name: GRADIO_URL
461
- Value: !Sub http://${GradioALB.DNSName}:7860/
462
- LogConfiguration:
463
- LogDriver: awslogs
464
- Options:
465
- awslogs-group: !Ref FastAPILogGroup
466
- awslogs-region: !Ref AWS::Region
467
- awslogs-stream-prefix: fastapi
468
-
469
- # CloudWatch Log Groups
470
- GradioLogGroup:
471
- Type: AWS::Logs::LogGroup
472
- Properties:
473
- LogGroupName: !Sub /ecs/${Environment}-acres-gradio
474
- RetentionInDays: 30
475
-
476
- FastAPILogGroup:
477
- Type: AWS::Logs::LogGroup
478
- Properties:
479
- LogGroupName: !Sub /ecs/${Environment}-acres-fastapi
480
- RetentionInDays: 30
481
-
482
-
483
- # ECS Services
484
- GradioService:
485
- Type: AWS::ECS::Service
486
- DependsOn:
487
- - GradioHTTPSListener
488
- # - GradioHTTPListener
489
- Properties:
490
- ServiceName: !Sub ${Environment}-acres-gradio
491
- Cluster: !Ref ECSCluster
492
- TaskDefinition: !Ref GradioTaskDefinition
493
- DesiredCount: 1
494
- LaunchType: FARGATE
495
- HealthCheckGracePeriodSeconds: 180
496
- LoadBalancers:
497
- - ContainerName: gradio
498
- ContainerPort: 7860
499
- TargetGroupArn: !Ref GradioTargetGroup
500
- NetworkConfiguration:
501
- AwsvpcConfiguration:
502
- AssignPublicIp: ENABLED
503
- SecurityGroups:
504
- - !Ref GradioSecurityGroup
505
- Subnets:
506
- - !Ref PublicSubnet1
507
- - !Ref PublicSubnet2
508
- DeploymentConfiguration:
509
- DeploymentCircuitBreaker:
510
- Enable: true
511
- Rollback: true
512
- MaximumPercent: 200
513
- MinimumHealthyPercent: 100
514
-
515
- FastAPIService:
516
- Type: AWS::ECS::Service
517
- DependsOn:
518
- - GradioService
519
- - FastAPIHTTPSListener
520
- # - FastAPIHTTPListener
521
- Properties:
522
- ServiceName: !Sub ${Environment}-acres-fastapi
523
- Cluster: !Ref ECSCluster
524
- TaskDefinition: !Ref FastAPITaskDefinition
525
- DesiredCount: 1
526
- LaunchType: FARGATE
527
- HealthCheckGracePeriodSeconds: 180
528
- LoadBalancers:
529
- - ContainerName: fastapi
530
- ContainerPort: 8000
531
- TargetGroupArn: !Ref FastAPITargetGroup
532
- NetworkConfiguration:
533
- AwsvpcConfiguration:
534
- AssignPublicIp: ENABLED
535
- SecurityGroups:
536
- - !Ref FastAPISecurityGroup
537
- Subnets:
538
- - !Ref PublicSubnet1
539
- - !Ref PublicSubnet2
540
- DeploymentConfiguration:
541
- DeploymentCircuitBreaker:
542
- Enable: true
543
- Rollback: true
544
- MaximumPercent: 200
545
- MinimumHealthyPercent: 100
546
- # Add deployment controller for better rollout control
547
- DeploymentController:
548
- Type: ECS
549
-
550
- Outputs:
551
- VpcId:
552
- Description: VPC ID
553
- Value: !Ref VPC
554
-
555
- PublicSubnet1:
556
- Description: Public Subnet 1
557
- Value: !Ref PublicSubnet1
558
-
559
- PublicSubnet2:
560
- Description: Public Subnet 2
561
- Value: !Ref PublicSubnet2
562
-
563
- GradioServiceUrl:
564
- Description: URL for the Gradio service
565
- Value: !Sub http://${GradioALB.DNSName}:7860/
566
-
567
- ECSClusterName:
568
- Description: Name of the ECS cluster
569
- Value: !Ref ECSCluster
570
-
571
- GradioServiceName:
572
- Description: Name of the Gradio service
573
- Value: !GetAtt GradioService.Name
574
-
575
- FastAPIServiceName:
576
- Description: Name of the FastAPI service
577
- Value: !GetAtt FastAPIService.Name
578
-
579
- FastAPIServiceUrl:
580
- Description: URL for the FastAPI service
581
- Value: !Sub http://${FastAPIALB.DNSName}:8000/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
interface.py DELETED
@@ -1,47 +0,0 @@
1
- """
2
- Gradio interface module for ACRES RAG Platform.
3
- Defines the UI components and layout.
4
- """
5
-
6
- # interface.py
7
- import gradio as gr
8
-
9
-
10
- def create_chat_interface() -> gr.Blocks:
11
- """Create the chat interface component."""
12
- with gr.Blocks() as chat_interface:
13
- with gr.Row():
14
- with gr.Column(scale=7):
15
- chat_history = gr.Chatbot(
16
- value=[], elem_id="chatbot", height=600, show_label=False
17
- )
18
- with gr.Column(scale=3):
19
- pdf_preview = gr.Image(label="Source Page", height=600)
20
-
21
- with gr.Row():
22
- with gr.Column(scale=8):
23
- query_input = gr.Textbox(
24
- show_label=False,
25
- placeholder="Ask a question about your documents...",
26
- container=False,
27
- )
28
- with gr.Column(scale=2):
29
- submit_btn = gr.Button("Send", variant="primary")
30
-
31
- with gr.Row():
32
- pdf_files = gr.File(
33
- file_count="multiple", file_types=[".pdf"], label="Upload PDF Files"
34
- )
35
- collection_name = gr.Textbox(
36
- label="Collection Name", placeholder="Name this collection of PDFs..."
37
- )
38
-
39
- return (
40
- chat_interface,
41
- chat_history,
42
- pdf_preview,
43
- query_input,
44
- submit_btn,
45
- pdf_files,
46
- collection_name,
47
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pyproject.toml DELETED
@@ -1,19 +0,0 @@
1
- [tool.black]
2
- include = '\.pyi?$'
3
- exclude = '''
4
- /(
5
- \.git
6
- | \.hg
7
- | \.mypy_cache
8
- | \.tox
9
- | \.venv
10
- | env
11
- |venv
12
- | _build
13
- | buck-out
14
- | build
15
- | dist
16
- | migrations
17
- |alembic
18
- )/
19
- '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (159 Bytes). View file
 
rag/__pycache__/rag_pipeline.cpython-311.pyc ADDED
Binary file (5.58 kB). View file
 
rag/rag_pipeline.py CHANGED
@@ -1,202 +1,91 @@
1
- # rag/rag_pipeline.py
2
  import json
3
- import logging
4
- import os
5
- import re
6
- from typing import Any, Dict, List, Optional, Tuple
7
-
8
- import chromadb
9
- from dotenv import load_dotenv
10
- from llama_index.core import Document, PromptTemplate, VectorStoreIndex
11
- from llama_index.core.node_parser import SentenceSplitter, SentenceWindowNodeParser
12
  from llama_index.embeddings.openai import OpenAIEmbedding
13
  from llama_index.llms.openai import OpenAI
14
- from llama_index.vector_stores.chroma import ChromaVectorStore
15
-
16
- logging.basicConfig(level=logging.INFO)
17
- logger = logging.getLogger(__name__)
18
-
19
- load_dotenv()
20
 
21
 
22
  class RAGPipeline:
23
- def __init__(
24
- self,
25
- study_json,
26
- collection_name="study_files_rag_collection",
27
- use_semantic_splitter=False,
28
- ):
29
  self.study_json = study_json
30
- self.collection_name = collection_name
31
  self.use_semantic_splitter = use_semantic_splitter
32
  self.documents = None
33
- self.client = chromadb.Client()
34
- self.collection = self.client.get_or_create_collection(self.collection_name)
35
- self.embedding_model = OpenAIEmbedding(
36
- model_name="text-embedding-ada-002", api_key=os.getenv("OPENAI_API_KEY")
37
- )
38
- self.is_pdf = self._check_if_pdf_collection()
39
  self.load_documents()
40
  self.build_index()
41
 
42
- def _check_if_pdf_collection(self) -> bool:
43
- """Check if this is a PDF collection based on the JSON structure."""
44
- try:
45
- with open(self.study_json, "r") as f:
46
- data = json.load(f)
47
- # Check first document for PDF-specific fields
48
- if data and isinstance(data, list) and len(data) > 0:
49
- return "pages" in data[0] and "source_file" in data[0]
50
- return False
51
- except Exception as e:
52
- logger.error(f"Error checking collection type: {str(e)}")
53
- return False
54
-
55
- def extract_page_number_from_query(self, query: str) -> int:
56
- """Extract page number from query text."""
57
- # Look for patterns like "page 3", "p3", "p. 3", etc.
58
- patterns = [
59
- r"page\s*(\d+)",
60
- r"p\.\s*(\d+)",
61
- r"p\s*(\d+)",
62
- r"pg\.\s*(\d+)",
63
- r"pg\s*(\d+)",
64
- ]
65
-
66
- for pattern in patterns:
67
- match = re.search(pattern, query.lower())
68
- if match:
69
- return int(match.group(1))
70
- return None
71
-
72
  def load_documents(self):
73
  if self.documents is None:
74
  with open(self.study_json, "r") as f:
75
  self.data = json.load(f)
76
 
77
  self.documents = []
78
- if self.is_pdf:
79
- # Handle PDF documents
80
- for index, doc_data in enumerate(self.data):
81
- pages = doc_data.get("pages", {})
82
- for page_num, page_content in pages.items():
83
- if isinstance(page_content, dict):
84
- content = page_content.get("text", "")
85
- else:
86
- content = page_content
87
-
88
- doc_content = (
89
- f"Title: {doc_data['title']}\n"
90
- f"Page {page_num} Content:\n{content}\n"
91
- f"Authors: {', '.join(doc_data['authors'])}\n"
92
- )
93
-
94
- metadata = {
95
- "title": doc_data.get("title"),
96
- "authors": ", ".join(doc_data.get("authors", [])),
97
- "year": doc_data.get("date"),
98
- "source_file": doc_data.get("source_file"),
99
- "page_number": int(page_num),
100
- "total_pages": doc_data.get("page_count"),
101
- }
102
 
103
- self.documents.append(
104
- Document(
105
- text=doc_content,
106
- id_=f"doc_{index}_page_{page_num}",
107
- metadata=metadata,
108
- )
109
- )
110
- else:
111
- # Handle Zotero documents
112
- for index, doc_data in enumerate(self.data):
113
- doc_content = (
114
- f"Title: {doc_data.get('title', '')}\n"
115
- f"Abstract: {doc_data.get('abstract', '')}\n"
116
- f"Authors: {', '.join(doc_data.get('authors', []))}\n"
117
- )
118
 
119
- metadata = {
120
- "title": doc_data.get("title"),
121
- "authors": ", ".join(doc_data.get("authors", [])),
122
- "year": doc_data.get("date"),
123
- "doi": doc_data.get("doi"),
124
- }
125
 
126
- self.documents.append(
127
- Document(
128
- text=doc_content, id_=f"doc_{index}", metadata=metadata
129
- )
130
- )
131
 
132
  def build_index(self):
133
- sentence_splitter = SentenceSplitter(chunk_size=2048, chunk_overlap=20)
 
134
 
135
- def _split(text: str) -> List[str]:
136
- return sentence_splitter.split_text(text)
137
 
138
- node_parser = SentenceWindowNodeParser.from_defaults(
139
- sentence_splitter=_split,
140
- window_size=5,
141
- window_metadata_key="window",
142
- original_text_metadata_key="original_text",
143
- )
144
-
145
- # Parse documents into nodes for embedding
146
- nodes = node_parser.get_nodes_from_documents(self.documents)
147
 
148
- # Initialize ChromaVectorStore with the existing collection
149
- vector_store = ChromaVectorStore(chroma_collection=self.collection)
150
-
151
- # Create the VectorStoreIndex using the ChromaVectorStore
152
- self.index = VectorStoreIndex(
153
- nodes, vector_store=vector_store, embed_model=self.embedding_model
154
- )
155
-
156
 
157
  def query(
158
  self, context: str, prompt_template: PromptTemplate = None
159
- ) -> Tuple[str, List[Any]]:
160
  if prompt_template is None:
161
  prompt_template = PromptTemplate(
162
- "Context information is below.\n"
163
- "---------------------\n"
164
- "{context_str}\n"
165
- "---------------------\n"
166
- "Given this information, please answer the question: {query_str}\n"
167
- "Follow these guidelines for your response:\n"
168
- "1. If the answer contains multiple pieces of information (e.g., author names, dates, statistics), "
169
- "present it in a markdown table format.\n"
170
- "2. For single piece information or simple answers, respond in a clear sentence.\n"
171
- "3. Always cite sources using square brackets for EVERY piece of information, e.g. [1], [2], etc.\n"
172
- "4. If the information spans multiple documents or pages, organize it by source.\n"
173
- "5. If you're unsure about something, say so rather than making assumptions.\n"
174
- "\nFormat tables like this:\n"
175
- "| Field | Information | Source |\n"
176
- "|-------|-------------|--------|\n"
177
- "| Title | Example Title | [1] |\n"
178
- )
179
-
180
- # Extract page number for PDF documents
181
- requested_page = (
182
- self.extract_page_number_from_query(context) if self.is_pdf else None
183
- )
184
-
185
  n_documents = len(self.index.docstore.docs)
186
- print(f"n_documents: {n_documents}")
187
  query_engine = self.index.as_query_engine(
188
  text_qa_template=prompt_template,
189
- similarity_top_k=n_documents if n_documents <= 17 else 15,
190
  response_mode="tree_summarize",
191
- llm=OpenAI(model="gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY")),
192
  )
193
 
194
  response = query_engine.query(context)
195
-
196
- # Debug logging
197
- print(f"Response type: {type(response)}")
198
- print(f"Has source_nodes: {hasattr(response, 'source_nodes')}")
199
- if hasattr(response, 'source_nodes'):
200
- print(f"Number of source nodes: {len(response.source_nodes)}")
201
-
202
- return response.response, getattr(response, 'source_nodes', [])
 
 
1
  import json
2
+ from typing import Dict, Any
3
+ from llama_index.core import Document, VectorStoreIndex
4
+ from llama_index.core.node_parser import SentenceWindowNodeParser, SentenceSplitter
5
+ from llama_index.core import PromptTemplate
6
+ from typing import List
 
 
 
 
7
  from llama_index.embeddings.openai import OpenAIEmbedding
8
  from llama_index.llms.openai import OpenAI
 
 
 
 
 
 
9
 
10
 
11
  class RAGPipeline:
12
+ def __init__(self, study_json, use_semantic_splitter=False):
 
 
 
 
 
13
  self.study_json = study_json
 
14
  self.use_semantic_splitter = use_semantic_splitter
15
  self.documents = None
16
+ self.index = None
 
 
 
 
 
17
  self.load_documents()
18
  self.build_index()
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def load_documents(self):
21
  if self.documents is None:
22
  with open(self.study_json, "r") as f:
23
  self.data = json.load(f)
24
 
25
  self.documents = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ for index, doc_data in enumerate(self.data):
28
+ doc_content = (
29
+ f"Title: {doc_data['title']}\n"
30
+ f"Abstract: {doc_data['abstract']}\n"
31
+ f"Authors: {', '.join(doc_data['authors'])}\n"
32
+ )
 
 
 
 
 
 
 
 
 
33
 
34
+ metadata = {
35
+ "title": doc_data.get("title"),
36
+ "authors": doc_data.get("authors", []),
37
+ "year": doc_data.get("year"),
38
+ "doi": doc_data.get("doi"),
39
+ }
40
 
41
+ self.documents.append(
42
+ Document(text=doc_content, id_=f"doc_{index}", metadata=metadata)
43
+ )
 
 
44
 
45
  def build_index(self):
46
+ if self.index is None:
47
+ sentence_splitter = SentenceSplitter(chunk_size=2048, chunk_overlap=20)
48
 
49
+ def _split(text: str) -> List[str]:
50
+ return sentence_splitter.split_text(text)
51
 
52
+ node_parser = SentenceWindowNodeParser.from_defaults(
53
+ sentence_splitter=_split,
54
+ window_size=5,
55
+ window_metadata_key="window",
56
+ original_text_metadata_key="original_text",
57
+ )
 
 
 
58
 
59
+ nodes = node_parser.get_nodes_from_documents(self.documents)
60
+ self.index = VectorStoreIndex(
61
+ nodes, embed_model=OpenAIEmbedding(model_name="text-embedding-3-large")
62
+ )
 
 
 
 
63
 
64
  def query(
65
  self, context: str, prompt_template: PromptTemplate = None
66
+ ) -> Dict[str, Any]:
67
  if prompt_template is None:
68
  prompt_template = PromptTemplate(
69
+ "Context information is below.\n"
70
+ "---------------------\n"
71
+ "{context_str}\n"
72
+ "---------------------\n"
73
+ "Given this information, please answer the question provided in the context. "
74
+ "Include all relevant information from the provided context. "
75
+ "If information comes from multiple sources, please mention all of them. "
76
+ "If the information is not available in the context, please state that clearly. "
77
+ "When quoting specific information, please use square brackets to indicate the source, e.g. [1], [2], etc."
78
+ )
79
+
80
+ # This is a hack to index all the documents in the store :)
 
 
 
 
 
 
 
 
 
 
 
81
  n_documents = len(self.index.docstore.docs)
 
82
  query_engine = self.index.as_query_engine(
83
  text_qa_template=prompt_template,
84
+ similarity_top_k=n_documents,
85
  response_mode="tree_summarize",
86
+ llm=OpenAI(model="gpt-4o-mini"),
87
  )
88
 
89
  response = query_engine.query(context)
90
+
91
+ return response
 
 
 
 
 
 
rag/rag_pipeline_backup.py DELETED
@@ -1,93 +0,0 @@
1
- import json
2
- from typing import Any, Dict, List
3
-
4
- from llama_index.core import Document, PromptTemplate, VectorStoreIndex
5
- from llama_index.core.node_parser import SentenceSplitter, SentenceWindowNodeParser
6
- from llama_index.embeddings.openai import OpenAIEmbedding
7
- from llama_index.llms.openai import OpenAI
8
-
9
-
10
- class RAGPipeline:
11
- def __init__(self, study_json, use_semantic_splitter=False):
12
- self.study_json = study_json
13
- self.use_semantic_splitter = use_semantic_splitter
14
- self.documents = None
15
- self.index = None
16
- self.load_documents()
17
- self.build_index()
18
-
19
- def load_documents(self):
20
- if self.documents is None:
21
- with open(self.study_json, "r") as f:
22
- self.data = json.load(f)
23
-
24
- self.documents = []
25
-
26
- for index, doc_data in enumerate(self.data):
27
- doc_content = (
28
- f"Title: {doc_data['title']}\n"
29
- f"Abstract: {doc_data['abstract']}\n"
30
- f"Authors: {', '.join(doc_data['authors'])}\n"
31
- # f"full_text: {doc_data['full_text']}"
32
- )
33
-
34
- metadata = {
35
- "title": doc_data.get("title"),
36
- "authors": doc_data.get("authors", []),
37
- "year": doc_data.get("date"),
38
- "doi": doc_data.get("doi"),
39
- }
40
-
41
- self.documents.append(
42
- Document(text=doc_content, id_=f"doc_{index}", metadata=metadata)
43
- )
44
-
45
- def build_index(self):
46
- if self.index is None:
47
- sentence_splitter = SentenceSplitter(chunk_size=2048, chunk_overlap=20)
48
-
49
- def _split(text: str) -> List[str]:
50
- return sentence_splitter.split_text(text)
51
-
52
- node_parser = SentenceWindowNodeParser.from_defaults(
53
- sentence_splitter=_split,
54
- window_size=5,
55
- window_metadata_key="window",
56
- original_text_metadata_key="original_text",
57
- )
58
-
59
- nodes = node_parser.get_nodes_from_documents(self.documents)
60
- self.index = VectorStoreIndex(
61
- nodes, embed_model=OpenAIEmbedding(model_name="text-embedding-3-large")
62
- )
63
-
64
- def query(
65
- self, context: str, prompt_template: PromptTemplate = None
66
- ) -> Dict[str, Any]:
67
- if prompt_template is None:
68
- prompt_template = PromptTemplate(
69
- "Context information is below.\n"
70
- "---------------------\n"
71
- "{context_str}\n"
72
- "---------------------\n"
73
- "Given this information, please answer the question: {query_str}\n"
74
- "Provide an answer to the question using evidence from the context above. "
75
- "Cite sources using square brackets for EVERY piece of information, e.g. [1], [2], etc. "
76
- "Even if there's only one source, still include the citation. "
77
- "If you're unsure about a source, use [?]. "
78
- "Ensure that EVERY statement from the context is properly cited."
79
- )
80
-
81
- # This is a hack to index all the documents in the store :)
82
- n_documents = len(self.index.docstore.docs)
83
- print(f"n_documents: {n_documents}")
84
- query_engine = self.index.as_query_engine(
85
- text_qa_template=prompt_template,
86
- similarity_top_k=n_documents if n_documents <= 17 else 15,
87
- response_mode="tree_summarize",
88
- llm=OpenAI(model="gpt-4o-mini"),
89
- )
90
-
91
- response = query_engine.query(context)
92
-
93
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements-dev.txt DELETED
@@ -1,3 +0,0 @@
1
- black==24.10.0
2
- isort==5.13.2
3
- flake8==7.1.1
 
 
 
 
requirements.txt CHANGED
@@ -1,25 +1,6 @@
1
- # Core dependencies
2
- chromadb==0.5.20
3
- fastapi==0.115.5
4
- gradio==5.6.0
5
- gradio_client==1.4.3
6
- nest-asyncio==1.6.0
7
- openai==1.57.0
8
- pandas==2.2.3
9
- pydantic==2.9.2
10
- python-dotenv
11
- pyzotero
12
- python-slugify
13
- PyMuPDF==1.23.8
14
- Pillow==10.2.0
15
- sqlmodel==0.0.22
16
- cachetools
17
-
18
- # LlamaIndex ecosystem (pinned to compatible versions)
19
- llama-index-core
20
- docling
21
- llama-index-readers-docling
22
- llama-index-vector-stores-chroma
23
- llama-index-node-parser-docling
24
- llama-index-embeddings-openai
25
- llama-index-llms-openai-like
 
1
+ fastapi==0.112.2
2
+ gradio
3
+ llama-index
4
+ openai
5
+ pandas
6
+ pydantic
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
sample_queries.md DELETED
@@ -1,2 +0,0 @@
1
- 1. For each of the studies on vaccine coverage, could you create a table that lists the findings from the vaccine coverage studies, incorporating variables such as STUDYID, TITLE, DOIs, YEAR?
2
-
 
 
 
study_files.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "Ebola Virus": "data/ebola-virus_zotero_items.json",
3
- "GeneXpert": "data/genexpert_zotero_items.json",
4
- "Vaccine coverage": "data/vaccine-coverage_zotero_items.json",
5
- "Concept": "data/concept_zotero_items.json",
6
- "Zotero Collection Pastan": "data/zotero-collection-pastan_zotero_items.json",
7
- "pdf_thequickone": "data/thequickone_20250108_111913_documents.json",
8
- "pdf_aforapples": "data/aforapples_20250108_113044_documents.json",
9
- "pdf_bforbinance": "data/bforbinance_20250108_114459_documents.json",
10
- "pdf_cforcongo": "data/cforcongo_20250108_115233_documents.json",
11
- "pdf_hjhj": "data/hjhj_20250108_115714_documents.json",
12
- "pdf_schooldropouts": "data/schooldropouts_20250108_140257_documents.json"
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (161 Bytes). View file
 
utils/__pycache__/prompts.cpython-311.pyc ADDED
Binary file (5.68 kB). View file
 
utils/db.py DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e165937f30a52640195e2a6ee5a71bcf980f0037e901658ebf813d794332ca6c
3
- size 5045
 
 
 
 
utils/helpers.py CHANGED
@@ -1,247 +1,42 @@
1
- # utils/helpers.py
2
-
3
- import json
4
- import os
5
- from typing import Any, Dict, List
6
-
7
- import chromadb
8
- from chromadb.api.types import Document
9
  from llama_index.core import Response
10
 
11
- from rag.rag_pipeline import RAGPipeline
12
- from utils.prompts import (
13
- StudyCharacteristics,
14
- VaccineCoverageVariables,
15
- structured_follow_up_prompt,
16
- )
17
-
18
- # Initialize ChromaDB client
19
- chromadb_client = chromadb.Client()
20
-
21
-
22
- def read_study_files(file_path):
23
- """
24
- Reads a JSON file and returns the parsed JSON data.
25
-
26
- Args:
27
- file_path (str): The path to the JSON file to be read.
28
-
29
- Returns:
30
- dict: The data from the JSON file as a Python dictionary.
31
-
32
- Raises:
33
- FileNotFoundError: If the file is not found at the provided path.
34
- json.JSONDecodeError: If the file contents are not valid JSON.
35
-
36
- Example:
37
- Given a JSON file 'study_files.json' with content like:
38
- {
39
- "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
40
- "Ebola Virus": "data/ebola_virus_zotero_items.json",
41
- "Gene Xpert": "data/gene_xpert_zotero_items.json"
42
- }
43
-
44
- Calling `read_json_file("study_files.json")` will return:
45
- {
46
- "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
47
- "Ebola Virus": "data/ebola_virus_zotero_items.json",
48
- "Gene Xpert": "data/gene_xpert_zotero_items.json"
49
- }
50
- """
51
- try:
52
- with open(file_path, "r") as file:
53
- data = json.load(file)
54
- return data
55
- except FileNotFoundError as e:
56
- raise FileNotFoundError(f"The file at path {file_path} was not found.") from e
57
- except json.JSONDecodeError as e:
58
- raise ValueError(
59
- f"The file at path {file_path} does not contain valid JSON."
60
- ) from e
61
-
62
-
63
- def append_to_study_files(file_path, new_key, new_value):
64
- """
65
- Appends a new key-value entry to an existing JSON file.
66
-
67
- Args:
68
- file_path (str): The path to the JSON file.
69
- new_key (str): The new key to add to the JSON file.
70
- new_value (any): The value associated with the new key (can be any valid JSON data type).
71
-
72
- Raises:
73
- FileNotFoundError: If the file is not found at the provided path.
74
- json.JSONDecodeError: If the file contents are not valid JSON.
75
- IOError: If the file cannot be written.
76
-
77
- Example:
78
- If the file 'study_files.json' initially contains:
79
- {
80
- "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
81
- "Ebola Virus": "data/ebola_virus_zotero_items.json"
82
- }
83
 
84
- Calling `append_to_json_file("study_files.json", "Gene Xpert", "data/gene_xpert_zotero_items.json")`
85
- will modify the file to:
86
- {
87
- "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
88
- "Ebola Virus": "data/ebola_virus_zotero_items.json",
89
- "Gene Xpert": "data/gene_xpert_zotero_items.json"
90
- }
91
- """
92
- try:
93
- # Read the existing data from the file
94
- with open(file_path, "r") as file:
95
- data = json.load(file)
96
 
97
- # Append the new key-value pair to the dictionary
98
- data[new_key] = new_value
99
 
100
- # Write the updated data back to the file
101
- with open(file_path, "w") as file:
102
- json.dump(data, file, indent=4) # indent for pretty printing
103
 
104
- except FileNotFoundError as e:
105
- raise FileNotFoundError(f"The file at path {file_path} was not found.") from e
106
- except json.JSONDecodeError as e:
107
- raise ValueError(
108
- f"The file at path {file_path} does not contain valid JSON."
109
- ) from e
110
- except IOError as e:
111
- raise IOError(f"Failed to write to the file at {file_path}.") from e
112
 
113
 
114
- def generate_follow_up_questions(
115
- rag: RAGPipeline, response: str, query: str, study_name: str
116
- ) -> List[str]:
117
- """
118
- Generates follow-up questions based on the given RAGPipeline, response, query, and study_name.
119
- Args:
120
- rag (RAGPipeline): The RAGPipeline object used for generating follow-up questions.
121
- response (str): The response to the initial query.
122
- query (str): The initial query.
123
- study_name (str): The name of the study.
124
- Returns:
125
- List[str]: A list of generated follow-up questions.
126
- Raises:
127
- None
128
- """
129
 
130
- # Determine the study type based on the study_name
131
- if "Vaccine Coverage" in study_name:
132
- study_type = "Vaccine Coverage"
133
- key_variables = list(VaccineCoverageVariables.__annotations__.keys())
134
- elif "Ebola Virus" in study_name:
135
- study_type = "Ebola Virus"
136
- key_variables = [
137
- "SAMPLE_SIZE",
138
- "PLASMA_TYPE",
139
- "DOSAGE",
140
- "FREQUENCY",
141
- "SIDE_EFFECTS",
142
- "VIRAL_LOAD_CHANGE",
143
- "SURVIVAL_RATE",
144
- ]
145
- elif "Gene Xpert" in study_name:
146
- study_type = "Gene Xpert"
147
- key_variables = [
148
- "OBJECTIVE",
149
- "OUTCOME_MEASURES",
150
- "SENSITIVITY",
151
- "SPECIFICITY",
152
- "COST_COMPARISON",
153
- "TURNAROUND_TIME",
154
- ]
155
  else:
156
- study_type = "General"
157
- key_variables = list(StudyCharacteristics.__annotations__.keys())
158
-
159
- # Add key variables to the context
160
- context = f"Study type: {study_type}\nKey variables to consider: {', '.join(key_variables)}\n\n{response}"
161
-
162
- follow_up_response = rag.query(
163
- structured_follow_up_prompt.format(
164
- context_str=context,
165
- query_str=query,
166
- response_str=response,
167
- study_type=study_type,
168
- )
169
- )
170
-
171
- questions = follow_up_response.response.strip().split("\n")
172
- cleaned_questions = []
173
- for q in questions:
174
- # Remove leading numbers and periods, and strip whitespace
175
- cleaned_q = q.split(". ", 1)[-1].strip()
176
- # Ensure the question ends with a question mark
177
- if cleaned_q and not cleaned_q.endswith("?"):
178
- cleaned_q += "?"
179
- if cleaned_q:
180
- cleaned_questions.append(f"✨ {cleaned_q}")
181
- return cleaned_questions[:3]
182
-
183
-
184
- def add_study_files_to_chromadb(file_path: str, collection_name: str):
185
- """
186
- Reads the study files data from a JSON file and adds it to the specified ChromaDB collection.
187
-
188
- :param file_path: Path to the JSON file containing study files data.
189
- :param collection_name: Name of the ChromaDB collection to store the data.
190
- """
191
- # Load study files data from JSON file
192
- try:
193
- with open(file_path, "r") as f:
194
- study_files_data = json.load(f)
195
- except FileNotFoundError:
196
- print(f"File '{file_path}' not found.")
197
- return
198
-
199
- if not study_files_data:
200
- return
201
-
202
- # Get or create the collection in ChromaDB
203
- collection = chromadb_client.get_or_create_collection(collection_name)
204
-
205
- # Prepare lists for ids, texts, and metadata to batch insert
206
- ids = []
207
- documents = []
208
- metadatas = []
209
-
210
- # Populate lists with data from the JSON file
211
- for name, file_path in study_files_data.items():
212
- ids.append(name) # Document ID
213
- documents.append("") # Optional text, can be left empty if not used
214
- metadatas.append({"file_path": file_path}) # Metadata with file path
215
-
216
- # Add documents to the collection in batch
217
- collection.add(ids=ids, documents=documents, metadatas=metadatas)
218
-
219
- print("All study files have been successfully added to ChromaDB.")
220
-
221
-
222
- def create_directory(directory_path):
223
- """
224
- Create a directory.
225
- Does not raise an error if the directory already exists.
226
-
227
- Args:
228
- directory_path (str): Path of the directory to create
229
 
230
- Returns:
231
- bool: True if directory was created or already exists, False if creation failed
232
- """
233
- try:
234
- # Use exist_ok=True to prevent error if directory exists
235
- os.makedirs(directory_path, exist_ok=True)
236
- return True
237
- except PermissionError:
238
- print(f"Permission denied: Cannot create directory {directory_path}")
239
- return False
240
- except Exception as e:
241
- print(f"An unexpected error occurred: {e}")
242
- return False
243
 
 
 
 
244
 
245
- if __name__ == "__main__":
246
- # Usage example
247
- add_study_files_to_chromadb("study_files.json", "study_files_collection")
 
1
+ from typing import Dict, Any
 
 
 
 
 
 
 
2
  from llama_index.core import Response
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ def process_response(response: Response) -> Dict[str, Any]:
6
+ source_nodes = response.source_nodes
7
+ sources = {}
8
+ for i, node in enumerate(source_nodes, 1):
9
+ source = format_source(node.metadata)
10
+ if source not in sources.values():
11
+ sources[i] = source
 
 
 
 
 
12
 
13
+ markdown_text = response.response + "\n\n### Sources\n\n"
14
+ raw_text = response.response + "\n\nSources:\n"
15
 
16
+ for i, source in sources.items():
17
+ markdown_text += f"{i}. {source}\n"
18
+ raw_text += f"[{i}] {source}\n"
19
 
20
+ return {"markdown": markdown_text, "raw": raw_text, "sources": sources}
 
 
 
 
 
 
 
21
 
22
 
23
+ def format_source(metadata: Dict[str, Any]) -> str:
24
+ authors = metadata.get("authors", "Unknown Author")
25
+ year = metadata.get("year", "n.d.")
26
+ title = metadata.get("title", "Untitled")
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ author_list = authors.split(",")
29
+ if len(author_list) > 2:
30
+ formatted_authors = f"{author_list[0].strip()} et al."
31
+ elif len(author_list) == 2:
32
+ formatted_authors = f"{author_list[0].strip()} and {author_list[1].strip()}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  else:
34
+ formatted_authors = author_list[0].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ year = "n.d." if year is None or year == "None" else str(year)
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ max_title_length = 250
39
+ if len(title) > max_title_length:
40
+ title = title[:max_title_length] + "..."
41
 
42
+ return f"{formatted_authors} ({year}). {title}"
 
 
utils/pdf_processor.py DELETED
@@ -1,222 +0,0 @@
1
- # utils/pdf_processor.py
2
-
3
- """
4
- PDF processing module for ACRES RAG Platform.
5
- Handles PDF file processing, text extraction, and page rendering.
6
- """
7
-
8
- import datetime
9
- import json
10
- import logging
11
- import os
12
- import re
13
- from typing import Dict, List, Optional
14
- from llama_index.readers.docling import DoclingReader
15
-
16
-
17
- import fitz
18
- from PIL import Image
19
- from slugify import slugify
20
-
21
- logger = logging.getLogger(__name__)
22
-
23
-
24
- reader = DoclingReader()
25
-
26
-
27
- class PDFProcessor:
28
- def __init__(self, upload_dir: str = "data/uploads"):
29
- """Initialize PDFProcessor with upload directory."""
30
- self.upload_dir = upload_dir
31
- os.makedirs(upload_dir, exist_ok=True)
32
- self.current_page = 0
33
-
34
- def is_references_page(self, text: str) -> bool:
35
- """
36
- Check if the page appears to be a references/bibliography page.
37
- """
38
- # Common section headers for references
39
- ref_headers = [
40
- r"^references\s*$",
41
- r"^bibliography\s*$",
42
- r"^works cited\s*$",
43
- r"^citations\s*$",
44
- r"^cited literature\s*$",
45
- ]
46
-
47
- # Check first few lines of the page
48
- first_lines = text.lower().split("\n")[:3]
49
- first_block = " ".join(first_lines)
50
-
51
- # Check for reference headers
52
- for header in ref_headers:
53
- if re.search(header, first_block, re.IGNORECASE):
54
- return True
55
-
56
- # Check for reference-like patterns (e.g., [1] Author, et al.)
57
- ref_patterns = [
58
- r"^\[\d+\]", # [1] style
59
- r"^\d+\.", # 1. style
60
- r"^[A-Z][a-z]+,\s+[A-Z]\.", # Author, I. style
61
- ]
62
-
63
- ref_pattern_count = 0
64
- lines = text.split("\n")[:10] # Check first 10 lines
65
- for line in lines:
66
- line = line.strip()
67
- if any(re.match(pattern, line) for pattern in ref_patterns):
68
- ref_pattern_count += 1
69
-
70
- # If multiple reference-like patterns are found, likely a references page
71
- return ref_pattern_count >= 3
72
-
73
- def detect_references_start(self, doc: fitz.Document) -> Optional[int]:
74
- """
75
- Detect the page where references section starts.
76
- Returns the page number or None if not found.
77
- """
78
- for page_num in range(len(doc)):
79
- page = doc[page_num]
80
- text = page.get_text()
81
- if self.is_references_page(text):
82
- logger.info(f"Detected references section starting at page {page_num}")
83
- return page_num
84
- return None
85
-
86
- def process_pdfs(self, file_paths: List[str], collection_name: str) -> str:
87
- """Process multiple PDF files and store their content."""
88
- processed_docs = []
89
-
90
- for file_path in file_paths:
91
- try:
92
- doc_data = self.extract_text_from_pdf(file_path)
93
- processed_docs.append(doc_data)
94
- logger.info(
95
- f"Successfully processed {file_path} ({doc_data['content_pages']} content pages)"
96
- )
97
- except Exception as e:
98
- logger.error(f"Error processing {file_path}: {str(e)}")
99
- continue
100
-
101
- if not processed_docs:
102
- raise ValueError("No documents were successfully processed")
103
-
104
- # Save to JSON file
105
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
106
- output_filename = f"{slugify(collection_name)}_{timestamp}_documents.json"
107
- output_path = os.path.join("data", output_filename)
108
-
109
- # Ensure the data directory exists
110
- os.makedirs("data", exist_ok=True)
111
-
112
- with open(output_path, "w", encoding="utf-8") as f:
113
- json.dump(processed_docs, f, indent=2, ensure_ascii=False)
114
-
115
- logger.info(f"Saved processed documents to {output_path}")
116
- return output_path
117
-
118
- def extract_text_from_pdf(self, file_path: str) -> Dict:
119
- """
120
- Extract text and metadata from a PDF file using DoclingReader.
121
- Maintains accurate page numbers for source citation.
122
- """
123
- try:
124
- # Use DoclingReader for main content extraction
125
- reader = DoclingReader()
126
- documents = reader.load_data(file_path)
127
- text = documents[0].text if documents else ""
128
-
129
- # Use PyMuPDF to get accurate page count
130
- doc = fitz.open(file_path)
131
- total_pages = len(doc)
132
-
133
- # Extract title from document
134
- title = os.path.basename(file_path)
135
- title_match = re.search(r'#+ (.+?)\n', text)
136
- if title_match:
137
- title = title_match.group(1).strip()
138
-
139
- # Extract abstract
140
- abstract = ""
141
- abstract_match = re.search(r'Abstract:?(.*?)(?=\n\n|Keywords:|$)', text, re.DOTALL | re.IGNORECASE)
142
- if abstract_match:
143
- abstract = abstract_match.group(1).strip()
144
-
145
- # Extract authors
146
- authors = []
147
- author_section = re.search(r'\n(.*?)\n.*?Department', text)
148
- if author_section:
149
- author_text = author_section.group(1)
150
- authors = [a.strip() for a in author_text.split(',') if a.strip()]
151
-
152
- # Remove references section
153
- content = text
154
- ref_patterns = [r'\nReferences\n', r'\nBibliography\n', r'\nWorks Cited\n']
155
- for pattern in ref_patterns:
156
- split_text = re.split(pattern, content, flags=re.IGNORECASE)
157
- if len(split_text) > 1:
158
- content = split_text[0]
159
- break
160
-
161
- # Map content to pages using PyMuPDF for accurate page numbers
162
- pages = {}
163
- for page_num in range(total_pages):
164
- page = doc[page_num]
165
- page_text = page.get_text()
166
-
167
- # Skip if this appears to be a references page
168
- if self.is_references_page(page_text):
169
- logger.info(f"Skipping references page {page_num}")
170
- continue
171
-
172
- # Look for this page's content in the Docling-extracted text
173
- # This is a heuristic approach - we look for unique phrases from the page
174
- key_phrases = self._get_key_phrases(page_text)
175
- page_content = self._find_matching_content(content, key_phrases)
176
-
177
- if page_content:
178
- pages[str(page_num)] = {
179
- 'text': page_content,
180
- 'page_number': page_num + 1 # 1-based page numbers for human readability
181
- }
182
-
183
- # Create structured document with page-aware content
184
- document = {
185
- "title": title,
186
- "authors": authors,
187
- "date": "", # Could be extracted if needed
188
- "abstract": abstract,
189
- "full_text": content,
190
- "source_file": file_path,
191
- "pages": pages,
192
- "page_count": total_pages,
193
- "content_pages": len(pages) # Number of non-reference pages
194
- }
195
-
196
- doc.close()
197
- return document
198
-
199
- except Exception as e:
200
- logger.error(f"Error processing PDF {file_path}: {str(e)}")
201
- raise
202
-
203
- def _get_key_phrases(self, text: str, phrase_length: int = 10) -> List[str]:
204
- """Extract key phrases from text for matching."""
205
- words = text.split()
206
- phrases = []
207
- for i in range(0, len(words), phrase_length):
208
- phrase = ' '.join(words[i:i + phrase_length])
209
- if len(phrase.strip()) > 20: # Only use substantial phrases
210
- phrases.append(phrase)
211
- return phrases
212
-
213
- def _find_matching_content(self, docling_text: str, key_phrases: List[str]) -> Optional[str]:
214
- """Find the corresponding content in Docling text using key phrases."""
215
- for phrase in key_phrases:
216
- if phrase in docling_text:
217
- # Find the paragraph or section containing this phrase
218
- paragraphs = docling_text.split('\n\n')
219
- for para in paragraphs:
220
- if phrase in para:
221
- return para
222
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/prompts.py CHANGED
@@ -1,10 +1,7 @@
1
- # utils/prompts.py
2
-
3
- from typing import List, Optional
4
-
5
  from llama_index.core import PromptTemplate
6
- from llama_index.core.prompts import PromptTemplate
7
  from pydantic import BaseModel, Field
 
8
 
9
 
10
  class StudyCharacteristics(BaseModel):
@@ -74,8 +71,18 @@ vaccine_coverage_prompt = PromptTemplate(
74
  )
75
 
76
  sample_questions = {
77
- "Vaccine coverage": [
78
  "What are the vaccine coverage rates reported in the study?",
 
 
 
 
 
 
 
 
 
 
79
  "Are there any reported adverse events following immunization (AEFI)?",
80
  "How does the study account for different vaccine types or schedules?",
81
  "Extract and present in a tabular format the following variables for each vaccine coverage study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, IMMUNISABLE_DISEASE_UNDER_STUDY, ROUTE_OF_VACCINE_ADMINISTRATION, DURATION_OF_STUDY, DURATION_IN_RELATION_TO_COVID19, VACCINE_COVERAGE_RATES, PROPORTION_ADMINISTERED_WITHIN_RECOMMENDED_AGE, IMMUNISATION_UPTAKE, VACCINE_DROP_OUT_RATES, INTENTIONS_TO_VACCINATE, VACCINE_CONFIDENCE, HESITANCY_FACTORS, DEMOGRAPHIC_DIFFERENCES, INTERVENTIONS, EQUITY_CONSIDERATIONS, GEOGRAPHICAL_SCOPE, AEFI, VACCINE_TYPES, and STUDY_COMMENTS.",
@@ -83,21 +90,37 @@ sample_questions = {
83
  "Ebola Virus": [
84
  "What is the sample size of the study?",
85
  "What is the type of plasma used in the study?",
 
 
 
 
 
 
 
 
 
 
86
  "What biosafety measures were implemented during the study?",
87
  "Were there any ethical considerations or challenges reported?",
88
  "Create a structured table for each Ebola virus study, including the following information: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, SAMPLE_SIZE, PLASMA_TYPE, DOSAGE, FREQUENCY, SIDE_EFFECTS, VIRAL_LOAD_CHANGE, SURVIVAL_RATE, INCLUSION_CRITERIA, EXCLUSION_CRITERIA, SUBGROUP_ANALYSES, FOLLOW_UP_DURATION, LONG_TERM_OUTCOMES, DISEASE_SEVERITY_ASSESSMENT, BIOSAFETY_MEASURES, ETHICAL_CONSIDERATIONS, and STUDY_COMMENTS.",
89
  ],
90
- "GeneXpert": [
91
  "What is the main objective of the study?",
92
  "What is the study design?",
93
  "What disease condition is being studied?",
 
 
 
 
 
 
 
 
 
 
 
94
  "Extract and present in a tabular format the following variables for each Gene Xpert study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, DISEASE_CONDITION, OBJECTIVE, OUTCOME_MEASURES, SENSITIVITY, SPECIFICITY, COST_COMPARISON, TURNAROUND_TIME, IMPLEMENTATION_CHALLENGES, PERFORMANCE_VARIATIONS, QUALITY_CONTROL, EQUIPMENT_ISSUES, PATIENT_OUTCOME_IMPACT, TRAINING_REQUIREMENTS, SCALABILITY_CONSIDERATIONS, and STUDY_COMMENTS.",
95
  ],
96
- "General": [
97
- "What is the main objective of the study?",
98
- "What is the study design?",
99
- "Extract and present in a tabular format the following variables for each study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, OBJECTIVE, and STUDY_COMMENTS.",
100
- ],
101
  }
102
 
103
 
@@ -119,25 +142,5 @@ evidence_based_prompt = PromptTemplate(
119
  "---------------------\n"
120
  "Given this information, please answer the question: {query_str}\n"
121
  "Provide an answer to the question using evidence from the context above. "
122
- "Cite sources using square brackets for EVERY piece of information, e.g. [1], [2], etc. "
123
- "Even if there's only one source, still include the citation. "
124
- "If you're unsure about a source, use [?]. "
125
- "Ensure that EVERY statement from the context is properly cited."
126
- )
127
-
128
-
129
- structured_follow_up_prompt = PromptTemplate(
130
- "Context information is below.\n"
131
- "---------------------\n"
132
- "{context_str}\n"
133
- "---------------------\n"
134
- "Original question: {query_str}\n"
135
- "Response: {response_str}\n"
136
- "Study type: {study_type}\n"
137
- "Based on the above information and the study type, generate 3 follow-up questions that help extract key variables or information from the study. "
138
- "Focus on the following aspects:\n"
139
- "1. Any missing key variables that are typically reported in this type of study.\n"
140
- "2. Clarification on methodology or results that might affect the interpretation of the study.\n"
141
- "3. Potential implications or applications of the study findings.\n"
142
- "Ensure each question is specific, relevant to the study type, and ends with a question mark."
143
  )
 
 
 
 
 
1
  from llama_index.core import PromptTemplate
2
+ from typing import Optional, List
3
  from pydantic import BaseModel, Field
4
+ from llama_index.core.prompts import PromptTemplate
5
 
6
 
7
  class StudyCharacteristics(BaseModel):
 
71
  )
72
 
73
  sample_questions = {
74
+ "Vaccine Coverage": [
75
  "What are the vaccine coverage rates reported in the study?",
76
+ "What proportion of vaccines were administered within the recommended age range?",
77
+ "What is the immunisation uptake reported in the study?",
78
+ "What are the vaccine drop-out rates mentioned in the document?",
79
+ "What are the intentions to vaccinate reported in the study?",
80
+ "How is vaccine confidence described in the document?",
81
+ "What factors influencing vaccine hesitancy are identified in the study?",
82
+ "Are there any demographic differences in vaccine coverage or uptake?",
83
+ "What interventions, if any, were implemented to improve vaccine coverage?",
84
+ "How does the study address equity in vaccine distribution and access?",
85
+ "What is the geographical scope of the study (e.g., urban, rural, national)?",
86
  "Are there any reported adverse events following immunization (AEFI)?",
87
  "How does the study account for different vaccine types or schedules?",
88
  "Extract and present in a tabular format the following variables for each vaccine coverage study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, IMMUNISABLE_DISEASE_UNDER_STUDY, ROUTE_OF_VACCINE_ADMINISTRATION, DURATION_OF_STUDY, DURATION_IN_RELATION_TO_COVID19, VACCINE_COVERAGE_RATES, PROPORTION_ADMINISTERED_WITHIN_RECOMMENDED_AGE, IMMUNISATION_UPTAKE, VACCINE_DROP_OUT_RATES, INTENTIONS_TO_VACCINATE, VACCINE_CONFIDENCE, HESITANCY_FACTORS, DEMOGRAPHIC_DIFFERENCES, INTERVENTIONS, EQUITY_CONSIDERATIONS, GEOGRAPHICAL_SCOPE, AEFI, VACCINE_TYPES, and STUDY_COMMENTS.",
 
90
  "Ebola Virus": [
91
  "What is the sample size of the study?",
92
  "What is the type of plasma used in the study?",
93
+ "What is the dosage and frequency of administration of the plasma?",
94
+ "Are there any reported side effects?",
95
+ "What is the change in viral load after treatment?",
96
+ "How many survivors were there in the intervention group compared to the control group?",
97
+ "What was the study design (e.g., RCT, observational)?",
98
+ "What were the inclusion and exclusion criteria for participants?",
99
+ "Were there any subgroup analyses performed?",
100
+ "What was the duration of follow-up?",
101
+ "Were there any reported long-term outcomes or sequelae?",
102
+ "How was the severity of Ebola virus disease assessed?",
103
  "What biosafety measures were implemented during the study?",
104
  "Were there any ethical considerations or challenges reported?",
105
  "Create a structured table for each Ebola virus study, including the following information: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, SAMPLE_SIZE, PLASMA_TYPE, DOSAGE, FREQUENCY, SIDE_EFFECTS, VIRAL_LOAD_CHANGE, SURVIVAL_RATE, INCLUSION_CRITERIA, EXCLUSION_CRITERIA, SUBGROUP_ANALYSES, FOLLOW_UP_DURATION, LONG_TERM_OUTCOMES, DISEASE_SEVERITY_ASSESSMENT, BIOSAFETY_MEASURES, ETHICAL_CONSIDERATIONS, and STUDY_COMMENTS.",
106
  ],
107
+ "Gene Xpert": [
108
  "What is the main objective of the study?",
109
  "What is the study design?",
110
  "What disease condition is being studied?",
111
+ "What are the main outcome measures in the study?",
112
+ "What is the sensitivity and specificity of the Gene Xpert test?",
113
+ "How does the cost of the Gene Xpert testing strategy compare to other methods?",
114
+ "What is the turnaround time for Gene Xpert results compared to conventional methods?",
115
+ "Are there any reported challenges in implementing Gene Xpert in the study setting?",
116
+ "How does Gene Xpert performance vary across different sample types or patient populations?",
117
+ "What quality control measures were implemented in the study?",
118
+ "Were there any reported equipment failures or technical issues?",
119
+ "How does the study address the impact of Gene Xpert on patient outcomes or clinical decision-making?",
120
+ "What training or human resource requirements were reported for Gene Xpert implementation?",
121
+ "How does the study consider the scalability and sustainability of Gene Xpert use?",
122
  "Extract and present in a tabular format the following variables for each Gene Xpert study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, DISEASE_CONDITION, OBJECTIVE, OUTCOME_MEASURES, SENSITIVITY, SPECIFICITY, COST_COMPARISON, TURNAROUND_TIME, IMPLEMENTATION_CHALLENGES, PERFORMANCE_VARIATIONS, QUALITY_CONTROL, EQUIPMENT_ISSUES, PATIENT_OUTCOME_IMPACT, TRAINING_REQUIREMENTS, SCALABILITY_CONSIDERATIONS, and STUDY_COMMENTS.",
123
  ],
 
 
 
 
 
124
  }
125
 
126
 
 
142
  "---------------------\n"
143
  "Given this information, please answer the question: {query_str}\n"
144
  "Provide an answer to the question using evidence from the context above. "
145
+ "Cite sources using square brackets."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  )