kltn20133118 commited on
Commit
dbaa71b
·
verified ·
1 Parent(s): 3b1c8a8

Upload 337 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. obsei_module/.github/ISSUE_TEMPLATE/bug_report.md +27 -0
  3. obsei_module/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  4. obsei_module/.github/dependabot.yml +7 -0
  5. obsei_module/.github/release-drafter.yml +33 -0
  6. obsei_module/.github/workflows/build.yml +54 -0
  7. obsei_module/.github/workflows/pypi_publish.yml +35 -0
  8. obsei_module/.github/workflows/release_draft.yml +15 -0
  9. obsei_module/.github/workflows/sdk_docker_publish.yml +50 -0
  10. obsei_module/.github/workflows/ui_docker_publish.yml +50 -0
  11. obsei_module/.gitignore +148 -0
  12. obsei_module/.pre-commit-config.yaml +21 -0
  13. obsei_module/.pyup.yml +5 -0
  14. obsei_module/ATTRIBUTION.md +18 -0
  15. obsei_module/CITATION.cff +14 -0
  16. obsei_module/CNAME +1 -0
  17. obsei_module/CODE_OF_CONDUCT.md +128 -0
  18. obsei_module/CONTRIBUTING.md +103 -0
  19. obsei_module/CONTRIBUTOR_LICENSE_AGREEMENT.md +3 -0
  20. obsei_module/Dockerfile +38 -0
  21. obsei_module/LICENSE +201 -0
  22. obsei_module/MANIFEST.in +3 -0
  23. obsei_module/README.md +1067 -0
  24. obsei_module/SECURITY.md +5 -0
  25. obsei_module/__init__.py +0 -0
  26. obsei_module/__pycache__/__init__.cpython-311.pyc +0 -0
  27. obsei_module/_config.yml +9 -0
  28. obsei_module/_includes/head-custom-google-analytics.html +9 -0
  29. obsei_module/binder/requirements.txt +2 -0
  30. obsei_module/example/app_store_scrapper_example.py +41 -0
  31. obsei_module/example/daily_get_example.py +77 -0
  32. obsei_module/example/elasticsearch_example.py +69 -0
  33. obsei_module/example/email_source_example.py +36 -0
  34. obsei_module/example/facebook_example.py +19 -0
  35. obsei_module/example/google_news_example.py +58 -0
  36. obsei_module/example/jira_example.py +77 -0
  37. obsei_module/example/maps_review_scrapper_example.py +22 -0
  38. obsei_module/example/pandas_sink_example.py +49 -0
  39. obsei_module/example/pandas_source_example.py +27 -0
  40. obsei_module/example/pii_analyzer_example.py +33 -0
  41. obsei_module/example/play_store_reviews_example.py +4 -0
  42. obsei_module/example/playstore_scrapper_example.py +40 -0
  43. obsei_module/example/playstore_scrapper_translator_example.py +86 -0
  44. obsei_module/example/reddit_example.py +50 -0
  45. obsei_module/example/reddit_scrapper_example.py +30 -0
  46. obsei_module/example/sdk.yaml +97 -0
  47. obsei_module/example/slack_example.py +66 -0
  48. obsei_module/example/twitter_source_example.py +98 -0
  49. obsei_module/example/web_crawler_example.py +43 -0
  50. obsei_module/example/with_sdk_config_file.py +28 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ obsei_module/images/obsei_flow.gif filter=lfs diff=lfs merge=lfs -text
37
+ obsei_module/obsei-master/images/obsei_flow.gif filter=lfs diff=lfs merge=lfs -text
obsei_module/.github/ISSUE_TEMPLATE/bug_report.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Bug report
3
+ about: Create a report to help us improve
4
+ title: "[BUG]"
5
+ labels: bug
6
+ assignees: lalitpagaria
7
+
8
+ ---
9
+
10
+ **Describe the bug**
11
+ A clear and concise description of what the bug is.
12
+
13
+ **To Reproduce**
14
+ Steps to reproduce the behavior:
15
+
16
+ **Expected behavior**
17
+ A clear and concise description of what you expected to happen.
18
+
19
+ **Stacktrace**
20
+ If applicable, add stacktrace to help explain your problem.
21
+
22
+ **Please complete the following information:**
23
+ - OS:
24
+ - Version:
25
+
26
+ **Additional context**
27
+ Add any other context about the problem here.
obsei_module/.github/ISSUE_TEMPLATE/feature_request.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: ''
5
+ labels: enhancement
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Is your feature request related to a problem? Please describe.**
11
+ A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12
+
13
+ **Describe the solution you'd like**
14
+ A clear and concise description of what you want to happen.
15
+
16
+ **Describe alternatives you've considered**
17
+ A clear and concise description of any alternative solutions or features you've considered.
18
+
19
+ **Additional context**
20
+ Add any other context or screenshots about the feature request here.
obsei_module/.github/dependabot.yml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ version: 2
2
+ updates:
3
+ # Maintain dependencies for GitHub Actions
4
+ - package-ecosystem: "github-actions"
5
+ directory: "/"
6
+ schedule:
7
+ interval: "daily"
obsei_module/.github/release-drafter.yml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name-template: 'v$RESOLVED_VERSION 🌈'
2
+ tag-template: 'v$RESOLVED_VERSION'
3
+ categories:
4
+ - title: '🚀 Features'
5
+ labels:
6
+ - 'feature'
7
+ - 'enhancement'
8
+ - title: '🐛 Bug Fixes'
9
+ labels:
10
+ - 'fix'
11
+ - 'bugfix'
12
+ - 'bug'
13
+ - title: '🧰 Maintenance'
14
+ label: 'chore'
15
+ - title: '⚠️Breaking Changes'
16
+ label: 'breaking changes'
17
+ change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
18
+ change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
19
+ version-resolver:
20
+ major:
21
+ labels:
22
+ - 'major'
23
+ minor:
24
+ labels:
25
+ - 'minor'
26
+ patch:
27
+ labels:
28
+ - 'patch'
29
+ default: patch
30
+ template: |
31
+ ## Changes
32
+
33
+ $CHANGES
obsei_module/.github/workflows/build.yml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflow will install Python dependencies, run test and lint with a single version of Python
2
+ # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3
+
4
+ name: CI
5
+
6
+ on:
7
+ push:
8
+ branches: [ master ]
9
+ pull_request:
10
+ branches: [ master ]
11
+
12
+ jobs:
13
+ type-check:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: '3.10'
20
+ - name: Test with mypy
21
+ run: |
22
+ pip install mypy
23
+ # Refer http://mypy-lang.blogspot.com/2021/06/mypy-0900-released.html
24
+ pip install mypy types-requests types-python-dateutil types-PyYAML types-dateparser types-protobuf types-pytz
25
+ mypy obsei
26
+
27
+ build-and-test:
28
+ needs: type-check
29
+ runs-on: ${{ matrix.os }}
30
+ strategy:
31
+ fail-fast: false
32
+ matrix:
33
+ os: [ ubuntu-latest, macos-latest, windows-latest ]
34
+ python-version: ['3.8', '3.9', '3.10', '3.11']
35
+
36
+ steps:
37
+ - uses: actions/checkout@v4
38
+ - name: Set up Python ${{ matrix.python-version }}
39
+ uses: actions/setup-python@v5
40
+ with:
41
+ python-version: ${{ matrix.python-version }}
42
+
43
+ - name: Install dependencies
44
+ run: |
45
+ python -m pip install --upgrade pip
46
+ pip install '.[dev,all]'
47
+ pip install --upgrade --upgrade-strategy eager trafilatura
48
+ python -m spacy download en_core_web_lg
49
+ python -m spacy download en_core_web_sm
50
+
51
+ - name: Test with pytest
52
+ run: |
53
+ coverage run -m pytest
54
+ coverage report -m
obsei_module/.github/workflows/pypi_publish.yml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflows will upload a Python Package using Twine when a release is created
2
+ # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3
+
4
+ name: Upload Python Package
5
+
6
+ on:
7
+ workflow_dispatch:
8
+ release:
9
+ types: [published]
10
+
11
+ jobs:
12
+ deploy-pypi-artifact:
13
+ runs-on: ubuntu-latest
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Set up Python
19
+ uses: actions/setup-python@v5
20
+ with:
21
+ python-version: '3.8'
22
+
23
+ - name: Install dependencies
24
+ run: |
25
+ python -m pip install --upgrade pip
26
+ pip install setuptools wheel twine hatch
27
+
28
+ - name: publish to PyPI
29
+ if: github.event_name != 'pull_request'
30
+ env:
31
+ TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
32
+ TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
33
+ run: |
34
+ hatch build
35
+ twine upload dist/*
obsei_module/.github/workflows/release_draft.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: release draft
2
+
3
+ on:
4
+ workflow_dispatch:
5
+
6
+ jobs:
7
+ draft-release:
8
+ # if: startsWith(github.ref, 'refs/tags/')
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: release-drafter/release-drafter@v6
12
+ with:
13
+ config-name: release-drafter.yml
14
+ env:
15
+ GITHUB_TOKEN: ${{ secrets.RELEASE_DRAFT_TOKEN }}
obsei_module/.github/workflows/sdk_docker_publish.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflows will upload a Python Package using Twine when a release is created
2
+ # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3
+
4
+ name: Publish SDK docker image
5
+
6
+ on:
7
+ workflow_dispatch:
8
+ inputs:
9
+ tag:
10
+ description: 'Image tag'
11
+ required: true
12
+ release:
13
+ types: [published]
14
+
15
+ jobs:
16
+ deploy-sdk-docker:
17
+ runs-on: ubuntu-latest
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+
21
+ - name: Docker meta
22
+ id: docker_meta
23
+ uses: docker/metadata-action@v5
24
+ with:
25
+ images: obsei/obsei-sdk
26
+
27
+ - name: Set up QEMU
28
+ uses: docker/setup-qemu-action@v3
29
+
30
+ - name: Set up Docker Buildx
31
+ uses: docker/setup-buildx-action@v3
32
+
33
+ - name: Login to DockerHub
34
+ if: github.event_name != 'pull_request'
35
+ uses: docker/login-action@v3
36
+ with:
37
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
38
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
39
+
40
+ - name: Build and push
41
+ uses: docker/build-push-action@v5
42
+ with:
43
+ context: ./
44
+ file: ./Dockerfile
45
+ push: ${{ github.event_name != 'pull_request' }}
46
+ tags: ${{ steps.docker_meta.outputs.tags }}
47
+ labels: ${{ steps.docker_meta.outputs.labels }}
48
+
49
+ - name: Image digest
50
+ run: echo ${{ steps.docker_build.outputs.digest }}
obsei_module/.github/workflows/ui_docker_publish.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflows will upload a Python Package using Twine when a release is created
2
+ # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3
+
4
+ name: Publish UI Docker image
5
+
6
+ on:
7
+ workflow_dispatch:
8
+ inputs:
9
+ tag:
10
+ description: 'Image tag'
11
+ required: true
12
+ release:
13
+ types: [published]
14
+
15
+ jobs:
16
+ deploy-ui-docker:
17
+ runs-on: ubuntu-latest
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+
21
+ - name: Docker meta
22
+ id: docker_meta
23
+ uses: docker/metadata-action@v5
24
+ with:
25
+ images: obsei/obsei-ui-demo
26
+
27
+ - name: Set up QEMU
28
+ uses: docker/setup-qemu-action@v3
29
+
30
+ - name: Set up Docker Buildx
31
+ uses: docker/setup-buildx-action@v3
32
+
33
+ - name: Login to DockerHub
34
+ if: github.event_name != 'pull_request'
35
+ uses: docker/login-action@v3
36
+ with:
37
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
38
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
39
+
40
+ - name: Build and push
41
+ uses: docker/build-push-action@v5
42
+ with:
43
+ context: "{{defaultContext}}:sample-ui"
44
+ file: Dockerfile
45
+ push: ${{ github.event_name != 'pull_request' }}
46
+ tags: ${{ steps.docker_meta.outputs.tags }}
47
+ labels: ${{ steps.docker_meta.outputs.labels }}
48
+
49
+ - name: Image digest
50
+ run: echo ${{ steps.docker_build.outputs.digest }}
obsei_module/.gitignore ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98
+ __pypackages__/
99
+
100
+ # Celery stuff
101
+ celerybeat-schedule
102
+ celerybeat.pid
103
+
104
+ # SageMath parsed files
105
+ *.sage.py
106
+
107
+ # Environments
108
+ .env
109
+ .venv
110
+ env/
111
+ venv/
112
+ ENV/
113
+ env.bak/
114
+ venv.bak/
115
+
116
+ # Spyder project settings
117
+ .spyderproject
118
+ .spyproject
119
+
120
+ # Rope project settings
121
+ .ropeproject
122
+
123
+ # mkdocs documentation
124
+ /site
125
+
126
+ # mypy
127
+ .mypy_cache/
128
+ .dmypy.json
129
+ dmypy.json
130
+
131
+ # Pyre type checker
132
+ .pyre/
133
+
134
+ # pytype static type analyzer
135
+ .pytype/
136
+
137
+ # Cython debug symbols
138
+ cython_debug/
139
+
140
+ /.idea/*
141
+ *.db
142
+ models*
143
+
144
+ # OSX custom attributes
145
+ .DS_Store
146
+
147
+ # VS code configuration
148
+ .vscode/*
obsei_module/.pre-commit-config.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.3.0
4
+ hooks:
5
+ - id: check-yaml
6
+ - id: trailing-whitespace
7
+ - id: requirements-txt-fixer
8
+ - id: end-of-file-fixer
9
+
10
+ - repo: https://github.com/psf/black
11
+ rev: 22.10.0
12
+ hooks:
13
+ - id: black
14
+
15
+ - repo: https://github.com/pre-commit/mirrors-mypy
16
+ rev: v0.991
17
+ hooks:
18
+ - id: mypy
19
+ args: [--ignore-missing-imports]
20
+ additional_dependencies: [types-all]
21
+ files: ^obsei/
obsei_module/.pyup.yml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # autogenerated pyup.io config file
2
+ # see https://pyup.io/docs/configuration/ for all available options
3
+
4
+ schedule: ''
5
+ update: insecure
obsei_module/ATTRIBUTION.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This could not have been possible without following open source software -
2
+ - [searchtweets-v2](https://github.com/twitterdev/search-tweets-python): For Twitter's API v2 wrapper
3
+ - [vaderSentiment](https://github.com/cjhutto/vaderSentiment): For rule-based sentiment analysis
4
+ - [transformers](https://github.com/huggingface/transformers): For text-classification pipeline
5
+ - [atlassian-python-api](https://github.com/atlassian-api/atlassian-python-api): To interact with Jira
6
+ - [elasticsearch](https://github.com/elastic/elasticsearch-py): To interact with Elasticsearch
7
+ - [pydantic](https://github.com/samuelcolvin/pydantic): For data validation
8
+ - [sqlalchemy](https://github.com/sqlalchemy/sqlalchemy): As SQL toolkit to access DB storage
9
+ - [google-play-scraper](https://github.com/JoMingyu/google-play-scraper): To fetch the Google Play Store review without authentication
10
+ - [praw](https://github.com/praw-dev/praw): For Reddit client
11
+ - [reddit-rss-reader](https://github.com/lalitpagaria/reddit-rss-reader): For Reddit scrapping
12
+ - [app-store-reviews-reader](https://github.com/lalitpagaria/app_store_reviews_reader): For App Store reviews scrapping
13
+ - [slack-sdk](https://github.com/slackapi/python-slack-sdk): For slack integration
14
+ - [presidio-anonymizer](https://github.com/microsoft/presidio): Personal information anonymizer
15
+ - [GoogleNews](https://github.com/Iceloof/GoogleNews): For Google News integration
16
+ - [python-facebook-api](https://github.com/sns-sdks/python-facebook): For facebook integration
17
+ - [youtube-comment-downloader](https://github.com/egbertbouman/youtube-comment-downloader): For Youtube video comments extraction code
18
+ - [dateparser](https://github.com/scrapinghub/dateparser): To parse date properly (where format is ambiguous)
obsei_module/CITATION.cff ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YAML 1.2
2
+ ---
3
+ authors:
4
+ -
5
+ family-names: Pagaria
6
+ given-names: Lalit
7
+
8
+ cff-version: "1.1.0"
9
+ license: "Apache-2.0"
10
+ message: "If you use this software, please cite it using this metadata."
11
+ repository-code: "https://github.com/obsei/obsei"
12
+ title: "Obsei - a low code AI powered automation tool"
13
+ version: "0.0.10"
14
+ ...
obsei_module/CNAME ADDED
@@ -0,0 +1 @@
 
 
1
+ www.obsei.com
obsei_module/CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a harassment-free experience for everyone, regardless of age, body
7
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
8
+ identity and expression, level of experience, education, socio-economic status,
9
+ nationality, personal appearance, race, religion, or sexual identity
10
+ and orientation.
11
+
12
+ We pledge to act and interact in ways that contribute to an open, welcoming,
13
+ diverse, inclusive, and healthy community.
14
+
15
+ ## Our Standards
16
+
17
+ Examples of behavior that contributes to a positive environment for our
18
+ community include:
19
+
20
+ - Demonstrating empathy and kindness toward other people
21
+ - Being respectful of differing opinions, viewpoints, and experiences
22
+ - Giving and gracefully accepting constructive feedback
23
+ - Accepting responsibility and apologizing to those affected by our mistakes,
24
+ and learning from the experience
25
+ - Focusing on what is best not just for us as individuals, but for the
26
+ overall community
27
+
28
+ Examples of unacceptable behavior include:
29
+
30
+ - The use of sexualized language or imagery, and sexual attention or
31
+ advances of any kind
32
+ - Trolling, insulting or derogatory comments, and personal or political attacks
33
+ - Public or private harassment
34
+ - Publishing others' private information, such as a physical or email
35
+ address, without their explicit permission
36
+ - Other conduct which could reasonably be considered inappropriate in a
37
+ professional setting
38
+
39
+ ## Enforcement Responsibilities
40
+
41
+ Community leaders are responsible for clarifying and enforcing our standards of
42
+ acceptable behavior and will take appropriate and fair corrective action in
43
+ response to any behavior that they deem inappropriate, threatening, offensive,
44
+ or harmful.
45
+
46
+ Community leaders have the right and responsibility to remove, edit, or reject
47
+ comments, commits, code, wiki edits, issues, and other contributions that are
48
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
49
+ decisions when appropriate.
50
+
51
+ ## Scope
52
+
53
+ This Code of Conduct applies within all community spaces, and also applies when
54
+ an individual is officially representing the community in public spaces.
55
+ Examples of representing our community include using an official e-mail address,
56
+ posting via an official social media account, or acting as an appointed
57
+ representative at an online or offline event.
58
+
59
+ ## Enforcement
60
+
61
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
62
+ reported to the community leaders responsible for enforcement at
63
+ obsei.tool@gmail.com
64
+ All complaints will be reviewed and investigated promptly and fairly.
65
+
66
+ All community leaders are obligated to respect the privacy and security of the
67
+ reporter of any incident.
68
+
69
+ ## Enforcement Guidelines
70
+
71
+ Community leaders will follow these Community Impact Guidelines in determining
72
+ the consequences for any action they deem in violation of this Code of Conduct:
73
+
74
+ ### 1. Correction
75
+
76
+ **Community Impact**: Use of inappropriate language or other behavior deemed
77
+ unprofessional or unwelcome in the community.
78
+
79
+ **Consequence**: A written warning from community leaders, providing
80
+ clarity around the nature of the violation and an explanation of why the
81
+ behavior was inappropriate. A public apology may be requested.
82
+
83
+ ### 2. Warning
84
+
85
+ **Community Impact**: A violation through a single incident or series
86
+ of actions.
87
+
88
+ **Consequence**: A warning with consequences for continued behavior. No
89
+ interaction with the people involved, including unsolicited interaction with
90
+ those enforcing the Code of Conduct, for a specified period of time. This
91
+ includes avoiding interactions in community spaces as well as external channels
92
+ like social media. Violating these terms may lead to a temporary or
93
+ permanent ban.
94
+
95
+ ### 3. Temporary Ban
96
+
97
+ **Community Impact**: A serious violation of community standards, including
98
+ sustained inappropriate behavior.
99
+
100
+ **Consequence**: A temporary ban from any sort of interaction or public
101
+ communication with the community for a specified period of time. No public or
102
+ private interaction with the people involved, including unsolicited interaction
103
+ with those enforcing the Code of Conduct, is allowed during this period.
104
+ Violating these terms may lead to a permanent ban.
105
+
106
+ ### 4. Permanent Ban
107
+
108
+ **Community Impact**: Demonstrating a pattern of violation of community
109
+ standards, including sustained inappropriate behavior, harassment of an
110
+ individual, or aggression toward or disparagement of classes of individuals.
111
+
112
+ **Consequence**: A permanent ban from any sort of public interaction within
113
+ the community.
114
+
115
+ ## Attribution
116
+
117
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118
+ version 2.0, available at
119
+ https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120
+
121
+ Community Impact Guidelines were inspired by [Mozilla's code of conduct
122
+ enforcement ladder](https://github.com/mozilla/diversity).
123
+
124
+ [homepage]: https://www.contributor-covenant.org
125
+
126
+ For answers to common questions about this code of conduct, see the FAQ at
127
+ https://www.contributor-covenant.org/faq. Translations are available at
128
+ https://www.contributor-covenant.org/translations.
obsei_module/CONTRIBUTING.md ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 👐 Contributing to Obsei
2
+
3
+ First off, thank you for even considering contributing to this package, every contribution big or small is greatly appreciated.
4
+ Community contributions are what keep projects like this fueled and constantly improving, so a big thanks to you!
5
+
6
+ Below are some sections detailing the guidelines we'd like you to follow to make your contribution as seamless as possible.
7
+
8
+ - [Code of Conduct](#coc)
9
+ - [Asking a Question and Discussions](#question)
10
+ - [Issues, Bugs, and Feature Requests](#issue)
11
+ - [Submission Guidelines](#submit)
12
+ - [Code Style and Formatting](#code)
13
+ - [Contributor License Agreement](#cla)
14
+
15
+ ## 📜 <a name="coc"></a> Code of Conduct
16
+
17
+ The [Code of Conduct](https://github.com/obsei/obsei/blob/master/CODE_OF_CONDUCT.md) applies within all community spaces.
18
+ If you are not familiar with our Code of Conduct policy, take a minute to read the policy before starting with your first contribution.
19
+
20
+ ## 🗣️ <a name="question"></a> Query or Discussion
21
+
22
+ We would like to use [Github discussions](https://github.com/obsei/obsei/discussions) as the central hub for all
23
+ community discussions, questions, and everything else in between. While Github discussions is a new service (as of 2021)
24
+ we believe that it really helps keep this repo as one single source to find all relevant information. Our hope is that
25
+ discussion page functions as a record of all the conversations that help contribute to the project's development.
26
+
27
+ If you are new to [Github discussions](https://github.com/obsei/obsei/discussions) it is a very similar experience
28
+ to Stack Overflow with an added element of general discussion and discourse rather than solely being question and answer based.
29
+
30
+ ## 🪲 <a name="issue"></a> Issues, Bugs, and Feature Requests
31
+
32
+ We are very open to community contributions and appreciate anything that improves **Obsei**. This includes fixings typos, adding missing documentation, fixing bugs or adding new features.
33
+ To avoid unnecessary work on either side, please stick to the following process:
34
+
35
+ 1. If you feel like your issue is not specific and more of a general question about a design decision, or algorithm implementation maybe start a [discussion](https://github.com/obsei/obsei/discussions) instead, this helps keep the issues less cluttered and encourages more open-ended conversation.
36
+ 2. Check if there is already [an related issue](https://github.com/obsei/obsei/issues).
37
+ 3. If there is not, open a new one to start a discussion. Some features might be a nice idea, but don't fit in the scope of Obsei and we hate to close finished PRs.
38
+ 4. If we came to the conclusion to move forward with your issue, we will be happy to accept a pull request. Make sure you create a pull request in an early draft version and ask for feedback.
39
+ 5. Verify that all tests in the CI pass (and add new ones if you implement anything new)
40
+
41
+ See [below](#submit) for some guidelines.
42
+
43
+ ## ✉️ <a name="submit"></a> Submission Guidelines
44
+
45
+ ### Submitting an Issue
46
+
47
+ Before you submit your issue search the archive, maybe your question was already answered.
48
+
49
+ If your issue appears to be a bug, and hasn't been reported, open a new issue.
50
+ Help us to maximize the effort we can spend fixing issues and adding new
51
+ features, by not reporting duplicate issues. Providing the following information will increase the
52
+ chances of your issue being dealt with quickly:
53
+
54
+ - **Describe the bug** - A clear and concise description of what the bug is.
55
+ - **To Reproduce**- Steps to reproduce the behavior.
56
+ - **Expected behavior** - A clear and concise description of what you expected to happen.
57
+ - **Environment**
58
+ - Obsei version
59
+ - Python version
60
+ - OS
61
+ - **Suggest a Fix** - if you can't fix the bug yourself, perhaps you can point to what might be
62
+ causing the problem (line of code or commit)
63
+
64
+ When you submit a PR you will be presented with a PR template, please fill this in as best you can.
65
+
66
+ ### Submitting a Pull Request
67
+
68
+ Before you submit your pull request consider the following guidelines:
69
+
70
+ - Search [GitHub](https://github.com/obsei/obsei/pulls) for an open or closed Pull Request
71
+ that relates to your submission. You don't want to duplicate effort.
72
+ - Fork the main repo if not already done
73
+ - Rebase fork with `upstream master`
74
+ - Create new branch and add the changes in that branch
75
+ - Add supporting test cases
76
+ - Follow our [Coding Rules](#rules).
77
+ - Avoid checking in files that shouldn't be tracked (e.g `dist`, `build`, `.tmp`, `.idea`).
78
+ We recommend using a [global](#global-gitignore) gitignore for this.
79
+ - Before you commit please run the test suite and make sure all tests are passing.
80
+ - Format your code appropriately:
81
+ - This package uses [black](https://black.readthedocs.io/en/stable/) as its formatter.
82
+ In order to format your code with black run `black . ` from the root of the package.
83
+ - Run `pre-commit run --all-files` if you're adding new hooks to pre-commit config file. By default, pre-commit will run on modified files when commiting changes.
84
+ - Commit your changes using a descriptive commit message.
85
+ - In GitHub, send a pull request to `obsei:master`.
86
+ - If we suggest changes then:
87
+ - Make the required updates.
88
+ - Rebase your branch and force push to your GitHub repository (this will update your Pull Request):
89
+
90
+ That's it! Thank you for your contribution!
91
+
92
+ ## ✅ <a name="rules"></a> Coding Rules
93
+
94
+ We generally follow the [Google Python style guide](http://google.github.io/styleguide/pyguide.html).
95
+
96
+ ## 📝 <a name="cla"></a> Contributor License Agreement
97
+
98
+ That we do not have any potential problems later it is sadly necessary to sign a [Contributor License Agreement](CONTRIBUTOR_LICENSE_AGREEMENT.md). That can be done literally with the push of a button.
99
+
100
+ ---
101
+
102
+ _This guide was inspired by the [transformers-interpret](https://github.com/cdpierse/transformers-interpret/blob/master/CONTRIBUTING.md),
103
+ [Haystack](https://github.com/deepset-ai/haystack/blob/master/CONTRIBUTING.md) and [n8n](https://github.com/n8n-io/n8n/blob/master/CONTRIBUTOR_LICENSE_AGREEMENT.md)_
obsei_module/CONTRIBUTOR_LICENSE_AGREEMENT.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Obsei Contributor License Agreement
2
+
3
+ I give Obsei's Creator permission to license my contributions to any terms they like. I am giving them this license in order to make it possible for them to accept my contributions into their project.
obsei_module/Dockerfile ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is Docker file to Obsei SDK with dependencies installed
2
+ FROM python:3.10-slim-bullseye
3
+
4
+ RUN useradd --create-home user
5
+ WORKDIR /home/user
6
+
7
+ # env variable
8
+ ENV PIP_DISABLE_PIP_VERSION_CHECK 1
9
+ ENV PIP_NO_CACHE_DIR 1
10
+ ENV WORKFLOW_SCRIPT '/home/user/obsei/process_workflow.py'
11
+ ENV OBSEI_CONFIG_PATH ""
12
+ ENV OBSEI_CONFIG_FILENAME ""
13
+
14
+
15
+ # Hack to install jre on debian
16
+ RUN mkdir -p /usr/share/man/man1
17
+
18
+ # install few required tools
19
+ RUN apt-get update && apt-get install -y --no-install-recommends curl git pkg-config cmake libncurses5 g++ \
20
+ && apt-get clean autoclean && apt-get autoremove -y \
21
+ && rm -rf /var/lib/{apt,dpkg,cache,log}/
22
+
23
+ # install as a package
24
+ COPY pyproject.toml README.md /home/user/
25
+ RUN pip install --upgrade pip
26
+
27
+ # copy README
28
+ COPY README.md /home/user/
29
+
30
+ # copy code
31
+ COPY obsei /home/user/obsei
32
+ RUN pip install -e .[all]
33
+
34
+
35
+ USER user
36
+
37
+ # cmd for running the API
38
+ CMD ["sh", "-c", "python ${WORKFLOW_SCRIPT}"]
obsei_module/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2020-2022 Oraika Technologies Private Limited (https://www.oraika.com)
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
obsei_module/MANIFEST.in ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ include LICENSE
2
+ include requirements.txt
3
+ include README.md
obsei_module/README.md ADDED
@@ -0,0 +1,1067 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p align="center">
2
+ <img src="https://raw.githubusercontent.com/obsei/obsei-resources/master/images/obsei-flyer.png" />
3
+ </p>
4
+
5
+ ---
6
+ <p align="center">
7
+ <a href="https://www.oraika.com">
8
+ <img src="https://static.wixstatic.com/media/59bc4e_971f153f107e48c7912b9b2d4cd1b1a4~mv2.png/v1/fill/w_177,h_49,al_c,q_85,usm_0.66_1.00_0.01,enc_auto/3_edited.png" />
9
+ </a>
10
+ </p>
11
+ <p align="center">
12
+ <a href="https://github.com/obsei/obsei/actions">
13
+ <img alt="Test" src="https://github.com/obsei/obsei/workflows/CI/badge.svg?branch=master">
14
+ </a>
15
+ <a href="https://github.com/obsei/obsei/blob/master/LICENSE">
16
+ <img alt="License" src="https://img.shields.io/pypi/l/obsei">
17
+ </a>
18
+ <a href="https://pypi.org/project/obsei">
19
+ <img src="https://img.shields.io/pypi/pyversions/obsei" alt="PyPI - Python Version" />
20
+ </a>
21
+ <a href="https://pypi.org/project/obsei/">
22
+ <img alt="Release" src="https://img.shields.io/pypi/v/obsei">
23
+ </a>
24
+ <a href="https://pepy.tech/project/obsei">
25
+ <img src="https://pepy.tech/badge/obsei/month" alt="Downloads" />
26
+ </a>
27
+ <a href="https://huggingface.co/spaces/obsei/obsei-demo">
28
+ <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue" alt="HF Spaces" />
29
+ </a>
30
+ <a href="https://github.com/obsei/obsei/commits/master">
31
+ <img alt="Last commit" src="https://img.shields.io/github/last-commit/obsei/obsei">
32
+ </a>
33
+ <a href="https://github.com/obsei/obsei">
34
+ <img alt="Github stars" src="https://img.shields.io/github/stars/obsei/obsei?style=social">
35
+ </a>
36
+ <a href="https://www.youtube.com/channel/UCqdvgro1BzU13tkAfX3jCJA">
37
+ <img alt="YouTube Channel Subscribers" src="https://img.shields.io/youtube/channel/subscribers/UCqdvgro1BzU13tkAfX3jCJA?style=social">
38
+ </a>
39
+ <a href="https://join.slack.com/t/obsei-community/shared_invite/zt-r0wnuz02-FAkAmhTAUoc6pD4SLB9Ikg">
40
+ <img src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/Slack_join.svg" height="30">
41
+ </a>
42
+ <a href="https://twitter.com/ObseiAI">
43
+ <img src="https://img.shields.io/twitter/follow/ObseiAI?style=social">
44
+ </a>
45
+ </p>
46
+
47
+ ---
48
+
49
+ ![](https://raw.githubusercontent.com/obsei/obsei-resources/master/gifs/obsei_flow.gif)
50
+
51
+ ---
52
+
53
+ <span style="color:red">
54
+ <b>Note</b>: Obsei is still in alpha stage hence carefully use it in Production. Also, as it is constantly undergoing development hence master branch may contain many breaking changes. Please use released version.
55
+ </span>
56
+
57
+ ---
58
+
59
+ **Obsei** (pronounced "Ob see" | /əb-'sē/) is an open-source, low-code, AI powered automation tool. _Obsei_ consists of -
60
+
61
+ - **Observer**: Collect unstructured data from various sources like tweets from Twitter, Subreddit comments on Reddit, page post's comments from Facebook, App Stores reviews, Google reviews, Amazon reviews, News, Website, etc.
62
+ - **Analyzer**: Analyze unstructured data collected with various AI tasks like classification, sentiment analysis, translation, PII, etc.
63
+ - **Informer**: Send analyzed data to various destinations like ticketing platforms, data storage, dataframe, etc so that the user can take further actions and perform analysis on the data.
64
+
65
+ All the Observers can store their state in databases (Sqlite, Postgres, MySQL, etc.), making Obsei suitable for scheduled jobs or serverless applications.
66
+
67
+ ![Obsei diagram](https://raw.githubusercontent.com/obsei/obsei-resources/master/images/Obsei_diagram.png)
68
+
69
+ ### Future direction -
70
+
71
+ - Text, Image, Audio, Documents and Video oriented workflows
72
+ - Collect data from every possible private and public channels
73
+ - Add every possible workflow to an AI downstream application to automate manual cognitive workflows
74
+
75
+ ## Use cases
76
+
77
+ _Obsei_ use cases are following, but not limited to -
78
+
79
+ - Social listening: Listening about social media posts, comments, customer feedback, etc.
80
+ - Alerting/Notification: To get auto-alerts for events such as customer complaints, qualified sales leads, etc.
81
+ - Automatic customer issue creation based on customer complaints on Social Media, Email, etc.
82
+ - Automatic assignment of proper tags to tickets based content of customer complaint for example login issue, sign up issue, delivery issue, etc.
83
+ - Extraction of deeper insight from feedbacks on various platforms
84
+ - Market research
85
+ - Creation of dataset for various AI tasks
86
+ - Many more based on creativity 💡
87
+
88
+ ## Installation
89
+
90
+ ### Prerequisite
91
+
92
+ Install the following (if not present already) -
93
+
94
+ - Install [Python 3.7+](https://www.python.org/downloads/)
95
+ - Install [PIP](https://pip.pypa.io/en/stable/installing/)
96
+
97
+ ### Install Obsei
98
+
99
+ You can install Obsei either via PIP or Conda based on your preference.
100
+ To install latest released version -
101
+
102
+ ```shell
103
+ pip install obsei[all]
104
+ ```
105
+
106
+ Install from master branch (if you want to try the latest features) -
107
+
108
+ ```shell
109
+ git clone https://github.com/obsei/obsei.git
110
+ cd obsei
111
+ pip install --editable .[all]
112
+ ```
113
+
114
+ Note: `all` option will install all the dependencies which might not be needed for your workflow, alternatively
115
+ following options are available to install minimal dependencies as per need -
116
+ - `pip install obsei[source]`: To install dependencies related to all observers
117
+ - `pip install obsei[sink]`: To install dependencies related to all informers
118
+ - `pip install obsei[analyzer]`: To install dependencies related to all analyzers, it will install pytorch as well
119
+ - `pip install obsei[twitter-api]`: To install dependencies related to Twitter observer
120
+ - `pip install obsei[google-play-scraper]`: To install dependencies related to Play Store review scrapper observer
121
+ - `pip install obsei[google-play-api]`: To install dependencies related to Google official play store review API based observer
122
+ - `pip install obsei[app-store-scraper]`: To install dependencies related to Apple App Store review scrapper observer
123
+ - `pip install obsei[reddit-scraper]`: To install dependencies related to Reddit post and comment scrapper observer
124
+ - `pip install obsei[reddit-api]`: To install dependencies related to Reddit official api based observer
125
+ - `pip install obsei[pandas]`: To install dependencies related to TSV/CSV/Pandas based observer and informer
126
+ - `pip install obsei[google-news-scraper]`: To install dependencies related to Google news scrapper observer
127
+ - `pip install obsei[facebook-api]`: To install dependencies related to Facebook official page post and comments api based observer
128
+ - `pip install obsei[atlassian-api]`: To install dependencies related to Jira official api based informer
129
+ - `pip install obsei[elasticsearch]`: To install dependencies related to elasticsearch informer
130
+ - `pip install obsei[slack-api]`:To install dependencies related to Slack official api based informer
131
+
132
+ You can also mix multiple dependencies together in single installation command. For example to install dependencies
133
+ Twitter observer, all analyzer, and Slack informer use following command -
134
+ ```shell
135
+ pip install obsei[twitter-api, analyzer, slack-api]
136
+ ```
137
+
138
+
139
+ ## How to use
140
+
141
+ Expand the following steps and create a workflow -
142
+
143
+ <details><summary><b>Step 1: Configure Source/Observer</b></summary>
144
+
145
+ <table ><tbody ><tr></tr><tr>
146
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/twitter.png" width="20" height="20"><b>Twitter</b></summary><hr>
147
+
148
+ ```python
149
+ from obsei.source.twitter_source import TwitterCredentials, TwitterSource, TwitterSourceConfig
150
+
151
+ # initialize twitter source config
152
+ source_config = TwitterSourceConfig(
153
+ keywords=["issue"], # Keywords, @user or #hashtags
154
+ lookup_period="1h", # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
155
+ cred_info=TwitterCredentials(
156
+ # Enter your twitter consumer key and secret. Get it from https://developer.twitter.com/en/apply-for-access
157
+ consumer_key="<twitter_consumer_key>",
158
+ consumer_secret="<twitter_consumer_secret>",
159
+ bearer_token='<ENTER BEARER TOKEN>',
160
+ )
161
+ )
162
+
163
+ # initialize tweets retriever
164
+ source = TwitterSource()
165
+ ```
166
+
167
+ </details>
168
+ </td>
169
+ </tr>
170
+ <tr>
171
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/Youtube.png" width="20" height="20"><b>Youtube Scrapper</b></summary><hr>
172
+
173
+ ```python
174
+ from obsei.source.youtube_scrapper import YoutubeScrapperSource, YoutubeScrapperConfig
175
+
176
+ # initialize Youtube source config
177
+ source_config = YoutubeScrapperConfig(
178
+ video_url="https://www.youtube.com/watch?v=uZfns0JIlFk", # Youtube video URL
179
+ fetch_replies=True, # Fetch replies to comments
180
+ max_comments=10, # Total number of comments and replies to fetch
181
+ lookup_period="1Y", # Lookup period from current time, format: `<number><d|h|m|M|Y>` (day|hour|minute|month|year)
182
+ )
183
+
184
+ # initialize Youtube comments retriever
185
+ source = YoutubeScrapperSource()
186
+ ```
187
+
188
+ </details>
189
+ </td>
190
+ </tr>
191
+ <tr>
192
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/facebook.png" width="20" height="20"><b>Facebook</b></summary><hr>
193
+
194
+ ```python
195
+ from obsei.source.facebook_source import FacebookCredentials, FacebookSource, FacebookSourceConfig
196
+
197
+ # initialize facebook source config
198
+ source_config = FacebookSourceConfig(
199
+ page_id="110844591144719", # Facebook page id, for example this one for Obsei
200
+ lookup_period="1h", # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
201
+ cred_info=FacebookCredentials(
202
+ # Enter your facebook app_id, app_secret and long_term_token. Get it from https://developers.facebook.com/apps/
203
+ app_id="<facebook_app_id>",
204
+ app_secret="<facebook_app_secret>",
205
+ long_term_token="<facebook_long_term_token>",
206
+ )
207
+ )
208
+
209
+ # initialize facebook post comments retriever
210
+ source = FacebookSource()
211
+ ```
212
+
213
+ </details>
214
+ </td>
215
+ </tr>
216
+ <tr>
217
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/gmail.png" width="20" height="20"><b>Email</b></summary><hr>
218
+
219
+ ```python
220
+ from obsei.source.email_source import EmailConfig, EmailCredInfo, EmailSource
221
+
222
+ # initialize email source config
223
+ source_config = EmailConfig(
224
+ # List of IMAP servers for most commonly used email providers
225
+ # https://www.systoolsgroup.com/imap/
226
+ # Also, if you're using a Gmail account then make sure you allow less secure apps on your account -
227
+ # https://myaccount.google.com/lesssecureapps?pli=1
228
+ # Also enable IMAP access -
229
+ # https://mail.google.com/mail/u/0/#settings/fwdandpop
230
+ imap_server="imap.gmail.com", # Enter IMAP server
231
+ cred_info=EmailCredInfo(
232
+ # Enter your email account username and password
233
+ username="<email_username>",
234
+ password="<email_password>"
235
+ ),
236
+ lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
237
+ )
238
+
239
+ # initialize email retriever
240
+ source = EmailSource()
241
+ ```
242
+
243
+ </details>
244
+ </td>
245
+ </tr>
246
+ <tr>
247
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/google_maps.png" width="20" height="20"><b>Google Maps Reviews Scrapper</b></summary><hr>
248
+
249
+ ```python
250
+ from obsei.source.google_maps_reviews import OSGoogleMapsReviewsSource, OSGoogleMapsReviewsConfig
251
+
252
+ # initialize Outscrapper Maps review source config
253
+ source_config = OSGoogleMapsReviewsConfig(
254
+ # Collect API key from https://outscraper.com/
255
+ api_key="<Enter Your API Key>",
256
+ # Enter Google Maps link or place id
257
+ # For example below is for the "Taj Mahal"
258
+ queries=["https://www.google.co.in/maps/place/Taj+Mahal/@27.1751496,78.0399535,17z/data=!4m5!3m4!1s0x39747121d702ff6d:0xdd2ae4803f767dde!8m2!3d27.1751448!4d78.0421422"],
259
+ number_of_reviews=10,
260
+ )
261
+
262
+
263
+ # initialize Outscrapper Maps review retriever
264
+ source = OSGoogleMapsReviewsSource()
265
+ ```
266
+
267
+ </details>
268
+ </td>
269
+ </tr>
270
+ <tr>
271
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/appstore.png" width="20" height="20"><b>AppStore Reviews Scrapper</b></summary><hr>
272
+
273
+ ```python
274
+ from obsei.source.appstore_scrapper import AppStoreScrapperConfig, AppStoreScrapperSource
275
+
276
+ # initialize app store source config
277
+ source_config = AppStoreScrapperConfig(
278
+ # Need two parameters app_id and country.
279
+ # `app_id` can be found at the end of the url of app in app store.
280
+ # For example - https://apps.apple.com/us/app/xcode/id497799835
281
+ # `310633997` is the app_id for xcode and `us` is country.
282
+ countries=["us"],
283
+ app_id="310633997",
284
+ lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
285
+ )
286
+
287
+
288
+ # initialize app store reviews retriever
289
+ source = AppStoreScrapperSource()
290
+ ```
291
+
292
+ </details>
293
+ </td>
294
+ </tr>
295
+ <tr>
296
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/playstore.png" width="20" height="20"><b>Play Store Reviews Scrapper</b></summary><hr>
297
+
298
+ ```python
299
+ from obsei.source.playstore_scrapper import PlayStoreScrapperConfig, PlayStoreScrapperSource
300
+
301
+ # initialize play store source config
302
+ source_config = PlayStoreScrapperConfig(
303
+ # Need two parameters package_name and country.
304
+ # `package_name` can be found at the end of the url of app in play store.
305
+ # For example - https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en&gl=US
306
+ # `com.google.android.gm` is the package_name for xcode and `us` is country.
307
+ countries=["us"],
308
+ package_name="com.google.android.gm",
309
+ lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
310
+ )
311
+
312
+ # initialize play store reviews retriever
313
+ source = PlayStoreScrapperSource()
314
+ ```
315
+
316
+ </details>
317
+ </td>
318
+ </tr>
319
+ <tr>
320
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/reddit.png" width="20" height="20"><b>Reddit</b></summary><hr>
321
+
322
+ ```python
323
+ from obsei.source.reddit_source import RedditConfig, RedditSource, RedditCredInfo
324
+
325
+ # initialize reddit source config
326
+ source_config = RedditConfig(
327
+ subreddits=["wallstreetbets"], # List of subreddits
328
+ # Reddit account username and password
329
+ # You can also enter reddit client_id and client_secret or refresh_token
330
+ # Create credential at https://www.reddit.com/prefs/apps
331
+ # Also refer https://praw.readthedocs.io/en/latest/getting_started/authentication.html
332
+ # Currently Password Flow, Read Only Mode and Saved Refresh Token Mode are supported
333
+ cred_info=RedditCredInfo(
334
+ username="<reddit_username>",
335
+ password="<reddit_password>"
336
+ ),
337
+ lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
338
+ )
339
+
340
+ # initialize reddit retriever
341
+ source = RedditSource()
342
+ ```
343
+
344
+ </details>
345
+ </td>
346
+ </tr>
347
+ <tr>
348
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/reddit.png" width="20" height="20"><b>Reddit Scrapper</b></summary><hr>
349
+
350
+ <i>Note: Reddit heavily rate limit scrappers, hence use it to fetch small data during long period</i>
351
+
352
+ ```python
353
+ from obsei.source.reddit_scrapper import RedditScrapperConfig, RedditScrapperSource
354
+
355
+ # initialize reddit scrapper source config
356
+ source_config = RedditScrapperConfig(
357
+ # Reddit subreddit, search etc rss url. For proper url refer following link -
358
+ # Refer https://www.reddit.com/r/pathogendavid/comments/tv8m9/pathogendavids_guide_to_rss_and_reddit/
359
+ url="https://www.reddit.com/r/wallstreetbets/comments/.rss?sort=new",
360
+ lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
361
+ )
362
+
363
+ # initialize reddit retriever
364
+ source = RedditScrapperSource()
365
+ ```
366
+
367
+ </details>
368
+ </td>
369
+ </tr>
370
+ <tr>
371
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/googlenews.png" width="20" height="20"><b>Google News</b></summary><hr>
372
+
373
+ ```python
374
+ from obsei.source.google_news_source import GoogleNewsConfig, GoogleNewsSource
375
+
376
+ # initialize Google News source config
377
+ source_config = GoogleNewsConfig(
378
+ query='bitcoin',
379
+ max_results=5,
380
+ # To fetch full article text enable `fetch_article` flag
381
+ # By default google news gives title and highlight
382
+ fetch_article=True,
383
+ # proxy='http://127.0.0.1:8080'
384
+ )
385
+
386
+ # initialize Google News retriever
387
+ source = GoogleNewsSource()
388
+ ```
389
+
390
+ </details>
391
+ </td>
392
+ </tr>
393
+ <tr>
394
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/webcrawler.png" width="20" height="20"><b>Web Crawler</b></summary><hr>
395
+
396
+ ```python
397
+ from obsei.source.website_crawler_source import TrafilaturaCrawlerConfig, TrafilaturaCrawlerSource
398
+
399
+ # initialize website crawler source config
400
+ source_config = TrafilaturaCrawlerConfig(
401
+ urls=['https://obsei.github.io/obsei/']
402
+ )
403
+
404
+ # initialize website text retriever
405
+ source = TrafilaturaCrawlerSource()
406
+ ```
407
+
408
+ </details>
409
+ </td>
410
+ </tr>
411
+ <tr>
412
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/pandas.svg" width="20" height="20"><b>Pandas DataFrame</b></summary><hr>
413
+
414
+ ```python
415
+ import pandas as pd
416
+ from obsei.source.pandas_source import PandasSource, PandasSourceConfig
417
+
418
+ # Initialize your Pandas DataFrame from your sources like csv, excel, sql etc
419
+ # In following example we are reading csv which have two columns title and text
420
+ csv_file = "https://raw.githubusercontent.com/deepset-ai/haystack/master/tutorials/small_generator_dataset.csv"
421
+ dataframe = pd.read_csv(csv_file)
422
+
423
+ # initialize pandas sink config
424
+ sink_config = PandasSourceConfig(
425
+ dataframe=dataframe,
426
+ include_columns=["score"],
427
+ text_columns=["name", "degree"],
428
+ )
429
+
430
+ # initialize pandas sink
431
+ sink = PandasSource()
432
+ ```
433
+
434
+ </details>
435
+ </td>
436
+ </tr>
437
+ </tbody>
438
+ </table>
439
+
440
+ </details>
441
+
442
+ <details><summary><b>Step 2: Configure Analyzer</b></summary>
443
+
444
+ <i>Note: To run transformers in an offline mode, check [transformers offline mode](https://huggingface.co/transformers/installation.html#offline-mode).</i>
445
+
446
+ <p>Some analyzer support GPU and to utilize pass <b>device</b> parameter.
447
+ List of possible values of <b>device</b> parameter (default value <i>auto</i>):
448
+ <ol>
449
+ <li> <b>auto</b>: GPU (cuda:0) will be used if available otherwise CPU will be used
450
+ <li> <b>cpu</b>: CPU will be used
451
+ <li> <b>cuda:{id}</b> - GPU will be used with provided CUDA device id
452
+ </ol>
453
+ </p>
454
+
455
+ <table ><tbody ><tr></tr><tr>
456
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/classification.png" width="20" height="20"><b>Text Classification</b></summary><hr>
457
+
458
+ Text classification: Classify text into user provided categories.
459
+
460
+ ```python
461
+ from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer
462
+
463
+ # initialize classification analyzer config
464
+ # It can also detect sentiments if "positive" and "negative" labels are added.
465
+ analyzer_config=ClassificationAnalyzerConfig(
466
+ labels=["service", "delay", "performance"],
467
+ )
468
+
469
+ # initialize classification analyzer
470
+ # For supported models refer https://huggingface.co/models?filter=zero-shot-classification
471
+ text_analyzer = ZeroShotClassificationAnalyzer(
472
+ model_name_or_path="typeform/mobilebert-uncased-mnli",
473
+ device="auto"
474
+ )
475
+ ```
476
+
477
+ </details>
478
+ </td>
479
+ </tr>
480
+ <tr>
481
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/sentiment.png" width="20" height="20"><b>Sentiment Analyzer</b></summary><hr>
482
+
483
+ Sentiment Analyzer: Detect the sentiment of the text. Text classification can also perform sentiment analysis but if you don't want to use heavy-duty NLP model then use less resource hungry dictionary based Vader Sentiment detector.
484
+
485
+ ```python
486
+ from obsei.analyzer.sentiment_analyzer import VaderSentimentAnalyzer
487
+
488
+ # Vader does not need any configuration settings
489
+ analyzer_config=None
490
+
491
+ # initialize vader sentiment analyzer
492
+ text_analyzer = VaderSentimentAnalyzer()
493
+ ```
494
+
495
+ </details>
496
+ </td>
497
+ </tr>
498
+ <tr>
499
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/ner.png" width="20" height="20"><b>NER Analyzer</b></summary><hr>
500
+
501
+ NER (Named-Entity Recognition) Analyzer: Extract information and classify named entities mentioned in text into pre-defined categories such as person names, organizations, locations, medical codes, time expressions, quantities, monetary values, percentages, etc
502
+
503
+ ```python
504
+ from obsei.analyzer.ner_analyzer import NERAnalyzer
505
+
506
+ # NER analyzer does not need configuration settings
507
+ analyzer_config=None
508
+
509
+ # initialize ner analyzer
510
+ # For supported models refer https://huggingface.co/models?filter=token-classification
511
+ text_analyzer = NERAnalyzer(
512
+ model_name_or_path="elastic/distilbert-base-cased-finetuned-conll03-english",
513
+ device = "auto"
514
+ )
515
+ ```
516
+
517
+ </details>
518
+ </td>
519
+ </tr>
520
+ <tr>
521
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/translator.png" width="20" height="20"><b>Translator</b></summary><hr>
522
+
523
+ ```python
524
+ from obsei.analyzer.translation_analyzer import TranslationAnalyzer
525
+
526
+ # Translator does not need analyzer config
527
+ analyzer_config = None
528
+
529
+ # initialize translator
530
+ # For supported models refer https://huggingface.co/models?pipeline_tag=translation
531
+ analyzer = TranslationAnalyzer(
532
+ model_name_or_path="Helsinki-NLP/opus-mt-hi-en",
533
+ device = "auto"
534
+ )
535
+ ```
536
+
537
+ </details>
538
+ </td>
539
+ </tr>
540
+ <tr>
541
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/pii.png" width="20" height="20"><b>PII Anonymizer</b></summary><hr>
542
+
543
+ ```python
544
+ from obsei.analyzer.pii_analyzer import PresidioEngineConfig, PresidioModelConfig, \
545
+ PresidioPIIAnalyzer, PresidioPIIAnalyzerConfig
546
+
547
+ # initialize pii analyzer's config
548
+ analyzer_config = PresidioPIIAnalyzerConfig(
549
+ # Whether to return only pii analysis or anonymize text
550
+ analyze_only=False,
551
+ # Whether to return detail information about anonymization decision
552
+ return_decision_process=True
553
+ )
554
+
555
+ # initialize pii analyzer
556
+ analyzer = PresidioPIIAnalyzer(
557
+ engine_config=PresidioEngineConfig(
558
+ # spacy and stanza nlp engines are supported
559
+ # For more info refer
560
+ # https://microsoft.github.io/presidio/analyzer/developing_recognizers/#utilize-spacy-or-stanza
561
+ nlp_engine_name="spacy",
562
+ # Update desired spacy model and language
563
+ models=[PresidioModelConfig(model_name="en_core_web_lg", lang_code="en")]
564
+ )
565
+ )
566
+ ```
567
+
568
+ </details>
569
+ </td>
570
+ </tr>
571
+ <tr>
572
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/dummy.png" width="20" height="20"><b>Dummy Analyzer</b></summary><hr>
573
+
574
+ Dummy Analyzer: Does nothing. Its simply used for transforming the input (TextPayload) to output (TextPayload) and adding the user supplied dummy data.
575
+
576
+ ```python
577
+ from obsei.analyzer.dummy_analyzer import DummyAnalyzer, DummyAnalyzerConfig
578
+
579
+ # initialize dummy analyzer's configuration settings
580
+ analyzer_config = DummyAnalyzerConfig()
581
+
582
+ # initialize dummy analyzer
583
+ analyzer = DummyAnalyzer()
584
+ ```
585
+
586
+ </details>
587
+ </td>
588
+ </tr>
589
+ </tbody>
590
+ </table>
591
+
592
+ </details>
593
+
594
+ <details><summary><b>Step 3: Configure Sink/Informer</b></summary>
595
+
596
+ <table ><tbody ><tr></tr><tr>
597
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/slack.svg" width="25" height="25"><b>Slack</b></summary><hr>
598
+
599
+ ```python
600
+ from obsei.sink.slack_sink import SlackSink, SlackSinkConfig
601
+
602
+ # initialize slack sink config
603
+ sink_config = SlackSinkConfig(
604
+ # Provide slack bot/app token
605
+ # For more detail refer https://slack.com/intl/en-de/help/articles/215770388-Create-and-regenerate-API-tokens
606
+ slack_token="<Slack_app_token>",
607
+ # To get channel id refer https://stackoverflow.com/questions/40940327/what-is-the-simplest-way-to-find-a-slack-team-id-and-a-channel-id
608
+ channel_id="C01LRS6CT9Q"
609
+ )
610
+
611
+ # initialize slack sink
612
+ sink = SlackSink()
613
+ ```
614
+
615
+ </details>
616
+ </td>
617
+ </tr>
618
+ <tr>
619
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/zendesk.png" width="20" height="20"><b>Zendesk</b></summary><hr>
620
+
621
+ ```python
622
+ from obsei.sink.zendesk_sink import ZendeskSink, ZendeskSinkConfig, ZendeskCredInfo
623
+
624
+ # initialize zendesk sink config
625
+ sink_config = ZendeskSinkConfig(
626
+ # provide zendesk domain
627
+ domain="zendesk.com",
628
+ # provide subdomain if you have one
629
+ subdomain=None,
630
+ # Enter zendesk user details
631
+ cred_info=ZendeskCredInfo(
632
+ email="<zendesk_user_email>",
633
+ password="<zendesk_password>"
634
+ )
635
+ )
636
+
637
+ # initialize zendesk sink
638
+ sink = ZendeskSink()
639
+ ```
640
+
641
+ </details>
642
+ </td>
643
+ </tr>
644
+ <tr>
645
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/jira.png" width="20" height="20"><b>Jira</b></summary><hr>
646
+
647
+ ```python
648
+ from obsei.sink.jira_sink import JiraSink, JiraSinkConfig
649
+
650
+ # For testing purpose you can start jira server locally
651
+ # Refer https://developer.atlassian.com/server/framework/atlassian-sdk/atlas-run-standalone/
652
+
653
+ # initialize Jira sink config
654
+ sink_config = JiraSinkConfig(
655
+ url="http://localhost:2990/jira", # Jira server url
656
+ # Jira username & password for user who have permission to create issue
657
+ username="<username>",
658
+ password="<password>",
659
+ # Which type of issue to be created
660
+ # For more information refer https://support.atlassian.com/jira-cloud-administration/docs/what-are-issue-types/
661
+ issue_type={"name": "Task"},
662
+ # Under which project issue to be created
663
+ # For more information refer https://support.atlassian.com/jira-software-cloud/docs/what-is-a-jira-software-project/
664
+ project={"key": "CUS"},
665
+ )
666
+
667
+ # initialize Jira sink
668
+ sink = JiraSink()
669
+ ```
670
+
671
+ </details>
672
+ </td>
673
+ </tr>
674
+ <tr>
675
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/elastic.png" width="20" height="20"><b>ElasticSearch</b></summary><hr>
676
+
677
+ ```python
678
+ from obsei.sink.elasticsearch_sink import ElasticSearchSink, ElasticSearchSinkConfig
679
+
680
+ # For testing purpose you can start Elasticsearch server locally via docker
681
+ # `docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" elasticsearch:8.5.0`
682
+
683
+ # initialize Elasticsearch sink config
684
+ sink_config = ElasticSearchSinkConfig(
685
+ # Elasticsearch server
686
+ hosts="http://localhost:9200",
687
+ # Index name, it will create if not exist
688
+ index_name="test",
689
+ )
690
+
691
+ # initialize Elasticsearch sink
692
+ sink = ElasticSearchSink()
693
+ ```
694
+
695
+ </details>
696
+ </td>
697
+ </tr>
698
+ <tr>
699
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/http_api.png" width="20" height="20"><b>Http</b></summary><hr>
700
+
701
+ ```python
702
+ from obsei.sink.http_sink import HttpSink, HttpSinkConfig
703
+
704
+ # For testing purpose you can create mock http server via postman
705
+ # For more details refer https://learning.postman.com/docs/designing-and-developing-your-api/mocking-data/setting-up-mock/
706
+
707
+ # initialize http sink config (Currently only POST call is supported)
708
+ sink_config = HttpSinkConfig(
709
+ # provide http server url
710
+ url="https://localhost:8080/api/path",
711
+ # Here you can add headers you would like to pass with request
712
+ headers={
713
+ "Content-type": "application/json"
714
+ }
715
+ )
716
+
717
+ # To modify or converting the payload, create convertor class
718
+ # Refer obsei.sink.dailyget_sink.PayloadConvertor for example
719
+
720
+ # initialize http sink
721
+ sink = HttpSink()
722
+ ```
723
+
724
+ </details>
725
+ </td>
726
+ </tr>
727
+ <tr>
728
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/pandas.svg" width="20" height="20"><b>Pandas DataFrame</b></summary><hr>
729
+
730
+ ```python
731
+ from pandas import DataFrame
732
+ from obsei.sink.pandas_sink import PandasSink, PandasSinkConfig
733
+
734
+ # initialize pandas sink config
735
+ sink_config = PandasSinkConfig(
736
+ dataframe=DataFrame()
737
+ )
738
+
739
+ # initialize pandas sink
740
+ sink = PandasSink()
741
+ ```
742
+
743
+ </details>
744
+ </td>
745
+ </tr>
746
+ <tr>
747
+ <td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/logger.png" width="20" height="20"><b>Logger</b></summary><hr>
748
+
749
+ This is useful for testing and dry running the pipeline.
750
+
751
+ ```python
752
+ from obsei.sink.logger_sink import LoggerSink, LoggerSinkConfig
753
+ import logging
754
+ import sys
755
+
756
+ logger = logging.getLogger("Obsei")
757
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
758
+
759
+ # initialize logger sink config
760
+ sink_config = LoggerSinkConfig(
761
+ logger=logger,
762
+ level=logging.INFO
763
+ )
764
+
765
+ # initialize logger sink
766
+ sink = LoggerSink()
767
+ ```
768
+
769
+ </details>
770
+ </td>
771
+ </tr>
772
+ </tbody>
773
+ </table>
774
+
775
+ </details>
776
+
777
+ <details><summary><b>Step 4: Join and create workflow</b></summary>
778
+
779
+ `source` will fetch data from the selected source, then feed it to the `analyzer` for processing, whose output we feed into a `sink` to get notified at that sink.
780
+
781
+ ```python
782
+ # Uncomment if you want logger
783
+ # import logging
784
+ # import sys
785
+ # logger = logging.getLogger(__name__)
786
+ # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
787
+
788
+ # This will fetch information from configured source ie twitter, app store etc
789
+ source_response_list = source.lookup(source_config)
790
+
791
+ # Uncomment if you want to log source response
792
+ # for idx, source_response in enumerate(source_response_list):
793
+ # logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
794
+
795
+ # This will execute analyzer (Sentiment, classification etc) on source data with provided analyzer_config
796
+ analyzer_response_list = text_analyzer.analyze_input(
797
+ source_response_list=source_response_list,
798
+ analyzer_config=analyzer_config
799
+ )
800
+
801
+ # Uncomment if you want to log analyzer response
802
+ # for idx, an_response in enumerate(analyzer_response_list):
803
+ # logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
804
+
805
+ # Analyzer output added to segmented_data
806
+ # Uncomment to log it
807
+ # for idx, an_response in enumerate(analyzer_response_list):
808
+ # logger.info(f"analyzed_data#'{idx}'='{an_response.segmented_data.__dict__}'")
809
+
810
+ # This will send analyzed output to configure sink ie Slack, Zendesk etc
811
+ sink_response_list = sink.send_data(analyzer_response_list, sink_config)
812
+
813
+ # Uncomment if you want to log sink response
814
+ # for sink_response in sink_response_list:
815
+ # if sink_response is not None:
816
+ # logger.info(f"sink_response='{sink_response}'")
817
+ ```
818
+
819
+ </details>
820
+
821
+ <details><summary><b>Step 5: Execute workflow</b></summary>
822
+ Copy the code snippets from <b>Steps 1 to 4</b> into a python file, for example <code>example.py</code> and execute the following command -
823
+
824
+ ```shell
825
+ python example.py
826
+ ```
827
+
828
+ </details>
829
+
830
+ ## Demo
831
+
832
+ We have a minimal [streamlit](https://streamlit.io/) based UI that you can use to test Obsei.
833
+
834
+ ![Screenshot](https://raw.githubusercontent.com/obsei/obsei-resources/master/images/obsei-ui-demo.png)
835
+
836
+ ### Watch UI demo video
837
+
838
+ [![Introductory and demo video](https://img.youtube.com/vi/GTF-Hy96gvY/2.jpg)](https://www.youtube.com/watch?v=GTF-Hy96gvY)
839
+
840
+ Check demo at [![](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/obsei/obsei-demo)
841
+
842
+ (**Note**: Sometimes the Streamlit demo might not work due to rate limiting, use the docker image (locally) in such cases.)
843
+
844
+ To test locally, just run
845
+
846
+ ```
847
+ docker run -d --name obesi-ui -p 8501:8501 obsei/obsei-ui-demo
848
+
849
+ # You can find the UI at http://localhost:8501
850
+ ```
851
+
852
+ **To run Obsei workflow easily using GitHub Actions (no sign ups and cloud hosting required), refer to this [repo](https://github.com/obsei/demo-workflow-action)**.
853
+
854
+ ## Companies/Projects using Obsei
855
+
856
+ Here are some companies/projects (alphabetical order) using Obsei. To add your company/project to the list, please raise a PR or contact us via [email](contact@obsei.com).
857
+
858
+ - [Oraika](https://www.oraika.com): Contextually understand customer feedback
859
+ - [1Page](https://www.get1page.com/): Giving a better context in meetings and calls
860
+ - [Spacepulse](http://spacepulse.in/): The operating system for spaces
861
+ - [Superblog](https://superblog.ai/): A blazing fast alternative to WordPress and Medium
862
+ - [Zolve](https://zolve.com/): Creating a financial world beyond borders
863
+ - [Utilize](https://www.utilize.app/): No-code app builder for businesses with a deskless workforce
864
+
865
+ ## Articles
866
+
867
+ <table>
868
+ <thead>
869
+ <tr class="header">
870
+ <th>Sr. No.</th>
871
+ <th>Title</th>
872
+ <th>Author</th>
873
+ </tr>
874
+ </thead>
875
+ <tbody>
876
+ <tr>
877
+ <td>1</td>
878
+ <td>
879
+ <a href="https://reenabapna.medium.com/ai-based-comparative-customer-feedback-analysis-using-deep-learning-models-def0dc77aaee">AI based Comparative Customer Feedback Analysis Using Obsei</a>
880
+ </td>
881
+ <td>
882
+ <a href="linkedin.com/in/reena-bapna-66a8691a">Reena Bapna</a>
883
+ </td>
884
+ </tr>
885
+ <tr>
886
+ <td>2</td>
887
+ <td>
888
+ <a href="https://medium.com/mlearning-ai/linkedin-app-user-feedback-analysis-9c9f98464daa">LinkedIn App - User Feedback Analysis</a>
889
+ </td>
890
+ <td>
891
+ <a href="http://www.linkedin.com/in/himanshusharmads">Himanshu Sharma</a>
892
+ </td>
893
+ </tr>
894
+ </tbody>
895
+ </table>
896
+
897
+ ## Tutorials
898
+
899
+ <table>
900
+ <thead>
901
+ <tr class="header">
902
+ <th>Sr. No.</th>
903
+ <th>Workflow</th>
904
+ <th>Colab</th>
905
+ <th>Binder</th>
906
+ </tr>
907
+ </thead>
908
+ <tbody>
909
+ <tr>
910
+ <td rowspan="2">1</td>
911
+ <td colspan="3">Observe app reviews from Google play store, Analyze them by performing text classification and then Inform them on console via logger</td>
912
+ </tr>
913
+ <tr>
914
+ <td>PlayStore Reviews → Classification → Logger</td>
915
+ <td>
916
+ <a href="https://colab.research.google.com/github/obsei/obsei/blob/master/tutorials/01_PlayStore_Classification_Logger.ipynb">
917
+ <img alt="Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
918
+ </a>
919
+ </td>
920
+ <td>
921
+ <a href="https://mybinder.org/v2/gh/obsei/obsei/HEAD?filepath=tutorials%2F01_PlayStore_Classification_Logger.ipynb">
922
+ <img alt="Colab" src="https://mybinder.org/badge_logo.svg">
923
+ </a>
924
+ </td>
925
+ </tr>
926
+ <tr>
927
+ <td rowspan="2">2</td>
928
+ <td colspan="3">Observe app reviews from Google play store, PreProcess text via various text cleaning functions, Analyze them by performing text classification, Inform them to Pandas DataFrame and store resultant CSV to Google Drive</td>
929
+ </tr>
930
+ <tr>
931
+ <td>PlayStore Reviews → PreProcessing → Classification → Pandas DataFrame → CSV in Google Drive</td>
932
+ <td>
933
+ <a href="https://colab.research.google.com/github/obsei/obsei/blob/master/tutorials/02_PlayStore_PreProc_Classification_Pandas.ipynb">
934
+ <img alt="Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
935
+ </a>
936
+ </td>
937
+ <td>
938
+ <a href="https://mybinder.org/v2/gh/obsei/obsei/HEAD?filepath=tutorials%2F02_PlayStore_PreProc_Classification_Pandas.ipynb">
939
+ <img alt="Colab" src="https://mybinder.org/badge_logo.svg">
940
+ </a>
941
+ </td>
942
+ </tr>
943
+ <tr>
944
+ <td rowspan="2">3</td>
945
+ <td colspan="3">Observe app reviews from Apple app store, PreProcess text via various text cleaning function, Analyze them by performing text classification, Inform them to Pandas DataFrame and store resultant CSV to Google Drive</td>
946
+ </tr>
947
+ <tr>
948
+ <td>AppStore Reviews → PreProcessing → Classification → Pandas DataFrame → CSV in Google Drive</td>
949
+ <td>
950
+ <a href="https://colab.research.google.com/github/obsei/obsei/blob/master/tutorials/03_AppStore_PreProc_Classification_Pandas.ipynb">
951
+ <img alt="Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
952
+ </a>
953
+ </td>
954
+ <td>
955
+ <a href="https://mybinder.org/v2/gh/obsei/obsei/HEAD?filepath=tutorials%2F03_AppStore_PreProc_Classification_Pandas.ipynb">
956
+ <img alt="Colab" src="https://mybinder.org/badge_logo.svg">
957
+ </a>
958
+ </td>
959
+ </tr>
960
+ <tr>
961
+ <td rowspan="2">4</td>
962
+ <td colspan="3">Observe news article from Google news, PreProcess text via various text cleaning function, Analyze them via performing text classification while splitting text in small chunks and later computing final inference using given formula</td>
963
+ </tr>
964
+ <tr>
965
+ <td>Google News → Text Cleaner → Text Splitter → Classification → Inference Aggregator</td>
966
+ <td>
967
+ <a href="https://colab.research.google.com/github/obsei/obsei/blob/master/tutorials/04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb">
968
+ <img alt="Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
969
+ </a>
970
+ </td>
971
+ <td>
972
+ <a href="https://mybinder.org/v2/gh/obsei/obsei/HEAD?filepath=tutorials%2F04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb">
973
+ <img alt="Colab" src="https://mybinder.org/badge_logo.svg">
974
+ </a>
975
+ </td>
976
+ </tr>
977
+ </tbody>
978
+ </table>
979
+
980
+ <details><summary><b>💡Tips: Handle large text classification via Obsei</b></summary>
981
+
982
+ ![](https://raw.githubusercontent.com/obsei/obsei-resources/master/gifs/Long_Text_Classification.gif)
983
+
984
+ </details>
985
+
986
+ ## Documentation
987
+
988
+ For detailed installation instructions, usages and examples, refer to our [documentation](https://obsei.github.io/obsei/).
989
+
990
+ ## Support and Release Matrix
991
+
992
+ <table>
993
+ <thead>
994
+ <tr class="header">
995
+ <th></th>
996
+ <th>Linux</th>
997
+ <th>Mac</th>
998
+ <th>Windows</th>
999
+ <th>Remark</th>
1000
+ </tr>
1001
+ </thead>
1002
+ <tbody>
1003
+ <tr>
1004
+ <td>Tests</td>
1005
+ <td style="text-align:center">✅</td>
1006
+ <td style="text-align:center">✅</td>
1007
+ <td style="text-align:center">✅</td>
1008
+ <td>Low Coverage as difficult to test 3rd party libs</td>
1009
+ </tr>
1010
+ <tr>
1011
+ <td>PIP</td>
1012
+ <td style="text-align:center">✅</td>
1013
+ <td style="text-align:center">✅</td>
1014
+ <td style="text-align:center">✅</td>
1015
+ <td>Fully Supported</td>
1016
+ </tr>
1017
+ <tr>
1018
+ <td>Conda</td>
1019
+ <td style="text-align:center">❌</td>
1020
+ <td style="text-align:center">❌</td>
1021
+ <td style="text-align:center">❌</td>
1022
+ <td>Not Supported</td>
1023
+ </tr>
1024
+ </tbody>
1025
+ </table>
1026
+
1027
+ ## Discussion forum
1028
+
1029
+ Discussion about _Obsei_ can be done at [community forum](https://github.com/obsei/obsei/discussions)
1030
+
1031
+ ## Changelogs
1032
+
1033
+ Refer [releases](https://github.com/obsei/obsei/releases) for changelogs
1034
+
1035
+ ## Security Issue
1036
+
1037
+ For any security issue please contact us via [email](mailto:contact@oraika.com)
1038
+
1039
+ ## Stargazers over time
1040
+
1041
+ [![Stargazers over time](https://starchart.cc/obsei/obsei.svg)](https://starchart.cc/obsei/obsei)
1042
+
1043
+ ## Maintainers
1044
+
1045
+ This project is being maintained by [Oraika Technologies](https://www.oraika.com). [Lalit Pagaria](https://github.com/lalitpagaria) and [Girish Patel](https://github.com/GirishPatel) are maintainers of this project.
1046
+
1047
+ ## License
1048
+
1049
+ - Copyright holder: [Oraika Technologies](https://www.oraika.com)
1050
+ - Overall Apache 2.0 and you can read [License](https://github.com/obsei/obsei/blob/master/LICENSE) file.
1051
+ - Multiple other secondary permissive or weak copyleft licenses (LGPL, MIT, BSD etc.) for third-party components refer [Attribution](https://github.com/obsei/obsei/blob/master/ATTRIBUTION.md).
1052
+ - To make project more commercial friendly, we void third party components which have strong copyleft licenses (GPL, AGPL etc.) into the project.
1053
+
1054
+ ## Attribution
1055
+
1056
+ This could not have been possible without these [open source softwares](https://github.com/obsei/obsei/blob/master/ATTRIBUTION.md).
1057
+
1058
+ ## Contribution
1059
+
1060
+ First off, thank you for even considering contributing to this package, every contribution big or small is greatly appreciated.
1061
+ Please refer our [Contribution Guideline](https://github.com/obsei/obsei/blob/master/CONTRIBUTING.md) and [Code of Conduct](https://github.com/obsei/obsei/blob/master/CODE_OF_CONDUCT.md).
1062
+
1063
+ Thanks so much to all our contributors
1064
+
1065
+ <a href="https://github.com/obsei/obsei/graphs/contributors">
1066
+ <img src="https://contrib.rocks/image?repo=obsei/obsei" />
1067
+ </a>
obsei_module/SECURITY.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Security Policy
2
+
3
+ ## Reporting a Vulnerability
4
+
5
+ For any security issue please report it via [email](mailto:contact@oraika.com).
obsei_module/__init__.py ADDED
File without changes
obsei_module/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (166 Bytes). View file
 
obsei_module/_config.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ theme: jekyll-theme-primer
2
+ markdown: CommonMarkGhPages
3
+ commonmark:
4
+ options: ["UNSAFE", "SMART", "FOOTNOTES"]
5
+ extensions: ["strikethrough", "autolink", "table", "tagfilter"]
6
+ title: "Obsei: An open-source low-code AI powered automation tool"
7
+ description: "Obsei is an open-source low-code AI powered automation tool"
8
+
9
+ google_analytics: G-0E2FTKBK4T
obsei_module/_includes/head-custom-google-analytics.html ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Global site tag (gtag.js) - Google Analytics -->
2
+ <script async src="https://www.googletagmanager.com/gtag/js?id=G-0E2FTKBK4T"></script>
3
+ <script>
4
+ window.dataLayer = window.dataLayer || [];
5
+ function gtag(){dataLayer.push(arguments);}
6
+ gtag('js', new Date());
7
+
8
+ gtag('config', 'G-0E2FTKBK4T');
9
+ </script>
obsei_module/binder/requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ git+https://github.com/obsei/obsei@master#egg=obsei[all]
2
+ trafilatura
obsei_module/example/app_store_scrapper_example.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from datetime import datetime, timedelta
4
+
5
+ import pytz
6
+
7
+ from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer
8
+ from obsei.misc.utils import DATETIME_STRING_PATTERN
9
+ from obsei.source.appstore_scrapper import (
10
+ AppStoreScrapperConfig,
11
+ AppStoreScrapperSource,
12
+ )
13
+
14
+ logger = logging.getLogger(__name__)
15
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
16
+
17
+ since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(days=-5)
18
+ source_config = AppStoreScrapperConfig(
19
+ app_url='https://apps.apple.com/us/app/gmail-email-by-google/id422689480',
20
+ lookup_period=since_time.strftime(DATETIME_STRING_PATTERN),
21
+ max_count=10,
22
+ )
23
+
24
+ source = AppStoreScrapperSource()
25
+
26
+ text_analyzer = ZeroShotClassificationAnalyzer(
27
+ model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
28
+ )
29
+
30
+ source_response_list = source.lookup(source_config)
31
+ for idx, source_response in enumerate(source_response_list):
32
+ logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
33
+
34
+ analyzer_response_list = text_analyzer.analyze_input(
35
+ source_response_list=source_response_list,
36
+ analyzer_config=ClassificationAnalyzerConfig(
37
+ labels=["interface", "slow", "battery"],
38
+ ),
39
+ )
40
+ for idx, an_response in enumerate(analyzer_response_list):
41
+ logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
obsei_module/example/daily_get_example.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ from obsei.sink.dailyget_sink import DailyGetSink, DailyGetSinkConfig
7
+ from obsei.source.twitter_source import TwitterSource, TwitterSourceConfig
8
+ from obsei.analyzer.classification_analyzer import (
9
+ ClassificationAnalyzerConfig,
10
+ ZeroShotClassificationAnalyzer,
11
+ )
12
+
13
+ logger = logging.getLogger(__name__)
14
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
15
+
16
+ sink_config = DailyGetSinkConfig(
17
+ url=os.environ["DAILYGET_URL"],
18
+ partner_id=os.environ["DAILYGET_PARTNER_ID"],
19
+ consumer_phone_number=os.environ["DAILYGET_CONSUMER_NUMBER"],
20
+ source_information="Twitter " + os.environ["DAILYGET_QUERY"],
21
+ base_payload={
22
+ "partnerId": os.environ["DAILYGET_PARTNER_ID"],
23
+ "consumerPhoneNumber": os.environ["DAILYGET_CONSUMER_NUMBER"],
24
+ },
25
+ )
26
+
27
+ dir_path = Path(__file__).resolve().parent.parent
28
+ source_config = TwitterSourceConfig(
29
+ keywords=[os.environ["DAILYGET_QUERY"]],
30
+ lookup_period=os.environ["DAILYGET_LOOKUP_PERIOD"],
31
+ tweet_fields=[
32
+ "author_id",
33
+ "conversation_id",
34
+ "created_at",
35
+ "id",
36
+ "public_metrics",
37
+ "text",
38
+ ],
39
+ user_fields=["id", "name", "public_metrics", "username", "verified"],
40
+ expansions=["author_id"],
41
+ place_fields=None,
42
+ max_tweets=10,
43
+ )
44
+
45
+ source = TwitterSource()
46
+ sink = DailyGetSink()
47
+ text_analyzer = ZeroShotClassificationAnalyzer(
48
+ model_name_or_path="joeddav/bart-large-mnli-yahoo-answers",
49
+ # model_name_or_path="joeddav/xlm-roberta-large-xnli",
50
+ )
51
+
52
+ source_response_list = source.lookup(source_config)
53
+ for idx, source_response in enumerate(source_response_list):
54
+ logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
55
+
56
+ analyzer_response_list = text_analyzer.analyze_input(
57
+ source_response_list=source_response_list,
58
+ analyzer_config=ClassificationAnalyzerConfig(
59
+ labels=[
60
+ "service",
61
+ "delay",
62
+ "tracking",
63
+ "no response",
64
+ "missing items",
65
+ "delivery",
66
+ "mask",
67
+ ],
68
+ ),
69
+ )
70
+ for idx, an_response in enumerate(analyzer_response_list):
71
+ logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
72
+
73
+ # HTTP Sink
74
+ sink_response_list = sink.send_data(analyzer_response_list, sink_config)
75
+ for sink_response in sink_response_list:
76
+ if sink_response is not None:
77
+ logger.info(f"sink_response='{sink_response.__dict__}'")
obsei_module/example/elasticsearch_example.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ from obsei.sink.elasticsearch_sink import ElasticSearchSink, ElasticSearchSinkConfig
6
+ from obsei.source.twitter_source import TwitterSource, TwitterSourceConfig
7
+ from obsei.analyzer.classification_analyzer import (
8
+ ClassificationAnalyzerConfig,
9
+ ZeroShotClassificationAnalyzer,
10
+ )
11
+
12
+ logger = logging.getLogger(__name__)
13
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
14
+
15
+ dir_path = Path(__file__).resolve().parent.parent
16
+ source_config = TwitterSourceConfig(
17
+ keywords="@Handle",
18
+ lookup_period="1h", # 1 Hour
19
+ tweet_fields=[
20
+ "author_id",
21
+ "conversation_id",
22
+ "created_at",
23
+ "id",
24
+ "public_metrics",
25
+ "text",
26
+ ],
27
+ user_fields=["id", "name", "public_metrics", "username", "verified"],
28
+ expansions=["author_id"],
29
+ place_fields=None,
30
+ max_tweets=10,
31
+ )
32
+
33
+ source = TwitterSource()
34
+ text_analyzer = ZeroShotClassificationAnalyzer(
35
+ model_name_or_path="joeddav/bart-large-mnli-yahoo-answers",
36
+ )
37
+
38
+ # Start Elasticsearch server locally
39
+ # `docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2`
40
+ sink_config = ElasticSearchSinkConfig(
41
+ host="localhost",
42
+ port=9200,
43
+ index_name="test",
44
+ )
45
+
46
+ source_response_list = source.lookup(source_config)
47
+ for idx, source_response in enumerate(source_response_list):
48
+ logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
49
+
50
+ analyzer_response_list = text_analyzer.analyze_input(
51
+ source_response_list=source_response_list,
52
+ analyzer_config=ClassificationAnalyzerConfig(
53
+ labels=[
54
+ "service",
55
+ "delay",
56
+ "tracking",
57
+ "no response",
58
+ "missing items",
59
+ "delivery",
60
+ "mask",
61
+ ],
62
+ ),
63
+ )
64
+ for idx, an_response in enumerate(analyzer_response_list):
65
+ logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
66
+
67
+ sink = ElasticSearchSink()
68
+ sink_response = sink.send_data(analyzer_response_list, sink_config)
69
+ logger.info(f"sink_response='{sink_response}'")
obsei_module/example/email_source_example.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import sys
4
+ from datetime import datetime, timedelta
5
+
6
+ import pytz
7
+
8
+ from obsei.misc.utils import DATETIME_STRING_PATTERN
9
+ from obsei.source.email_source import EmailConfig, EmailCredInfo, EmailSource
10
+
11
+ logger = logging.getLogger(__name__)
12
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
13
+
14
+ since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(hours=-10)
15
+
16
+ # List of IMAP servers for most commonly used email providers
17
+ # https://www.systoolsgroup.com/imap/
18
+ # Also, if you're using a Gmail account then make sure you allow less secure apps on your account -
19
+ # https://myaccount.google.com/lesssecureapps?pli=1
20
+ # Also enable IMAP access -
21
+ # https://mail.google.com/mail/u/0/#settings/fwdandpop
22
+ source_config = EmailConfig(
23
+ imap_server="imap.gmail.com",
24
+ cred_info=EmailCredInfo(
25
+ # It will fetch username and password from environment variable
26
+ username=os.environ.get("email_username"),
27
+ password=os.environ.get("email_password"),
28
+ ),
29
+ lookup_period=since_time.strftime(DATETIME_STRING_PATTERN),
30
+ )
31
+
32
+ source = EmailSource()
33
+ source_response_list = source.lookup(source_config)
34
+
35
+ for source_response in source_response_list:
36
+ logger.info(source_response.__dict__)
obsei_module/example/facebook_example.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+
4
+ from obsei.source.facebook_source import FacebookSource, FacebookSourceConfig
5
+
6
+ logger = logging.getLogger(__name__)
7
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
8
+
9
+ source_config = FacebookSourceConfig(page_id="110844591144719", lookup_period="2M")
10
+ source = FacebookSource()
11
+ source_response_list = source.lookup(source_config)
12
+
13
+ logger.info("DETAILS:")
14
+ for source_response in source_response_list:
15
+ logger.info(source_response)
16
+
17
+ logger.info("TEXT:")
18
+ for source_response in source_response_list:
19
+ logger.info(source_response.processed_text)
obsei_module/example/google_news_example.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from obsei.analyzer.classification_analyzer import (
2
+ ClassificationAnalyzerConfig,
3
+ ZeroShotClassificationAnalyzer,
4
+ )
5
+ from obsei.source.google_news_source import GoogleNewsConfig, GoogleNewsSource
6
+
7
+ # Only fetch title and highlight
8
+ source_config_without_full_text = GoogleNewsConfig(
9
+ query="ai",
10
+ max_results=150,
11
+ after_date='2023-12-01',
12
+ before_date='2023-12-31',
13
+ )
14
+
15
+ # Fetch full news article
16
+ source_config_with_full_text = GoogleNewsConfig(
17
+ query="ai",
18
+ max_results=5,
19
+ fetch_article=True,
20
+ lookup_period="1d",
21
+ # proxy="http://127.0.0.1:8080"
22
+ )
23
+
24
+ source = GoogleNewsSource()
25
+
26
+ analyzer_config = ClassificationAnalyzerConfig(
27
+ labels=["buy", "sell", "going up", "going down"],
28
+ )
29
+
30
+ text_analyzer = ZeroShotClassificationAnalyzer(
31
+ model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
32
+ )
33
+
34
+ news_articles_without_full_text = source.lookup(source_config_without_full_text)
35
+
36
+ news_articles_with_full_text = source.lookup(source_config_with_full_text)
37
+
38
+
39
+ analyzer_responses_without_full_text = text_analyzer.analyze_input(
40
+ source_response_list=news_articles_without_full_text,
41
+ analyzer_config=analyzer_config,
42
+ )
43
+
44
+ analyzer_responses_with_full_text = text_analyzer.analyze_input(
45
+ source_response_list=news_articles_with_full_text, analyzer_config=analyzer_config
46
+ )
47
+
48
+ for article in news_articles_without_full_text:
49
+ print(article.__dict__)
50
+
51
+ for response in analyzer_responses_without_full_text:
52
+ print(response.__dict__)
53
+
54
+ for article in news_articles_with_full_text:
55
+ print(article.__dict__)
56
+
57
+ for response in analyzer_responses_with_full_text:
58
+ print(response.__dict__)
obsei_module/example/jira_example.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Jira Sink
2
+ import logging
3
+ import os
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ from pydantic import SecretStr
8
+
9
+ from obsei.sink.jira_sink import JiraSink, JiraSinkConfig
10
+ from obsei.source.twitter_source import (
11
+ TwitterCredentials,
12
+ TwitterSource,
13
+ TwitterSourceConfig,
14
+ )
15
+ from obsei.analyzer.classification_analyzer import (
16
+ ClassificationAnalyzerConfig,
17
+ ZeroShotClassificationAnalyzer,
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
22
+
23
+ dir_path = Path(__file__).resolve().parent.parent
24
+ source_config = TwitterSourceConfig(
25
+ keywords=["facing issue"],
26
+ lookup_period="1h",
27
+ tweet_fields=[
28
+ "author_id",
29
+ "conversation_id",
30
+ "created_at",
31
+ "id",
32
+ "public_metrics",
33
+ "text",
34
+ ],
35
+ user_fields=["id", "name", "public_metrics", "username", "verified"],
36
+ expansions=["author_id"],
37
+ place_fields=None,
38
+ max_tweets=10,
39
+ cred_info=TwitterCredentials(
40
+ consumer_key=SecretStr(os.environ["twitter_consumer_key"]),
41
+ consumer_secret=SecretStr(os.environ["twitter_consumer_secret"]),
42
+ ),
43
+ )
44
+
45
+ source = TwitterSource()
46
+
47
+ # To start jira server locally `atlas-run-standalone --product jira`
48
+ jira_sink_config = JiraSinkConfig(
49
+ url="http://localhost:2990/jira",
50
+ username=SecretStr("admin"),
51
+ password=SecretStr("admin"),
52
+ issue_type={"name": "Task"},
53
+ project={"key": "CUS"},
54
+ )
55
+ jira_sink = JiraSink()
56
+
57
+ text_analyzer = ZeroShotClassificationAnalyzer(
58
+ model_name_or_path="joeddav/bart-large-mnli-yahoo-answers"
59
+ )
60
+
61
+ source_response_list = source.lookup(source_config)
62
+ for idx, source_response in enumerate(source_response_list):
63
+ logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
64
+
65
+ analyzer_response_list = text_analyzer.analyze_input(
66
+ source_response_list=source_response_list,
67
+ analyzer_config=ClassificationAnalyzerConfig(
68
+ labels=["service", "delay", "performance"],
69
+ ),
70
+ )
71
+ for idx, an_response in enumerate(analyzer_response_list):
72
+ logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
73
+
74
+ sink_response_list = jira_sink.send_data(analyzer_response_list, jira_sink_config)
75
+ for sink_response in sink_response_list:
76
+ if sink_response is not None:
77
+ logger.info(f"sink_response='{sink_response}'")
obsei_module/example/maps_review_scrapper_example.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+
4
+ from obsei.source.google_maps_reviews import (OSGoogleMapsReviewsConfig,
5
+ OSGoogleMapsReviewsSource)
6
+
7
+ logger = logging.getLogger(__name__)
8
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
9
+
10
+ source_config = OSGoogleMapsReviewsConfig(
11
+ api_key="<Enter Your API Key>", # Get API key from https://outscraper.com/
12
+ queries=[
13
+ "https://www.google.co.in/maps/place/Taj+Mahal/@27.1751496,78.0399535,17z/data=!4m5!3m4!1s0x39747121d702ff6d:0xdd2ae4803f767dde!8m2!3d27.1751448!4d78.0421422"
14
+ ],
15
+ number_of_reviews=3,
16
+ )
17
+
18
+ source = OSGoogleMapsReviewsSource()
19
+
20
+ source_response_list = source.lookup(source_config)
21
+ for source_response in source_response_list:
22
+ logger.info(source_response.__dict__)
obsei_module/example/pandas_sink_example.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+
4
+ from pandas import DataFrame
5
+
6
+ from obsei.analyzer.classification_analyzer import (
7
+ ClassificationAnalyzerConfig,
8
+ ZeroShotClassificationAnalyzer,
9
+ )
10
+ from obsei.sink.pandas_sink import PandasSink, PandasSinkConfig
11
+ from obsei.source.playstore_scrapper import (
12
+ PlayStoreScrapperConfig,
13
+ PlayStoreScrapperSource,
14
+ )
15
+
16
+
17
+ logger = logging.getLogger(__name__)
18
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
19
+
20
+ source_config = PlayStoreScrapperConfig(
21
+ countries=["us"], package_name="com.apcoaconnect", max_count=3
22
+ )
23
+
24
+ source = PlayStoreScrapperSource()
25
+
26
+ text_analyzer = ZeroShotClassificationAnalyzer(
27
+ model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
28
+ )
29
+
30
+ # initialize pandas sink config
31
+ sink_config = PandasSinkConfig(dataframe=DataFrame())
32
+
33
+ # initialize pandas sink
34
+ sink = PandasSink()
35
+
36
+ source_response_list = source.lookup(source_config)
37
+
38
+ analyzer_response_list = text_analyzer.analyze_input(
39
+ source_response_list=source_response_list,
40
+ analyzer_config=ClassificationAnalyzerConfig(
41
+ labels=["no parking", "registration issue", "app issue", "payment issue"],
42
+ ),
43
+ )
44
+
45
+ dataframe = sink.send_data(
46
+ analyzer_responses=analyzer_response_list, config=sink_config
47
+ )
48
+
49
+ print(dataframe.to_csv())
obsei_module/example/pandas_source_example.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ from obsei.source.pandas_source import (
4
+ PandasSourceConfig,
5
+ PandasSource,
6
+ )
7
+ import logging
8
+ import sys
9
+
10
+ logger = logging.getLogger(__name__)
11
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
12
+
13
+ # Initialize your Pandas DataFrame from your sources like csv, excel, sql etc
14
+ # In following example we are reading csv which have two columns title and text
15
+ csv_file = "https://raw.githubusercontent.com/deepset-ai/haystack/master/tutorials/small_generator_dataset.csv"
16
+ dataframe = pd.read_csv(csv_file)
17
+
18
+ source_config = PandasSourceConfig(
19
+ dataframe=dataframe,
20
+ include_columns=["title"],
21
+ text_columns=["text"],
22
+ )
23
+ source = PandasSource()
24
+
25
+ source_response_list = source.lookup(source_config)
26
+ for idx, source_response in enumerate(source_response_list):
27
+ logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
obsei_module/example/pii_analyzer_example.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+
4
+ from obsei.payload import TextPayload
5
+ from obsei.analyzer.pii_analyzer import (
6
+ PresidioEngineConfig,
7
+ PresidioModelConfig,
8
+ PresidioPIIAnalyzer,
9
+ PresidioPIIAnalyzerConfig,
10
+ )
11
+
12
+ logger = logging.getLogger(__name__)
13
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
14
+
15
+ analyzer_config = PresidioPIIAnalyzerConfig(
16
+ analyze_only=False, return_decision_process=True
17
+ )
18
+ analyzer = PresidioPIIAnalyzer(
19
+ engine_config=PresidioEngineConfig(
20
+ nlp_engine_name="spacy",
21
+ models=[PresidioModelConfig(model_name="en_core_web_lg", lang_code="en")],
22
+ )
23
+ )
24
+
25
+ text_to_anonymize = "His name is Mr. Jones and his phone number is 212-555-5555"
26
+
27
+ analyzer_results = analyzer.analyze_input(
28
+ source_response_list=[TextPayload(processed_text=text_to_anonymize)],
29
+ analyzer_config=analyzer_config,
30
+ )
31
+
32
+ for analyzer_result in analyzer_results:
33
+ logging.info(analyzer_result.to_dict())
obsei_module/example/play_store_reviews_example.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # TDB
2
+
3
+ # Need proper service account file to test the changes :(
4
+ print("TBD")
obsei_module/example/playstore_scrapper_example.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+
4
+ from obsei.analyzer.classification_analyzer import (
5
+ ClassificationAnalyzerConfig,
6
+ ZeroShotClassificationAnalyzer,
7
+ )
8
+
9
+ from obsei.source.playstore_scrapper import (
10
+ PlayStoreScrapperConfig,
11
+ PlayStoreScrapperSource,
12
+ )
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
17
+
18
+ source_config = PlayStoreScrapperConfig(
19
+ app_url='https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en_IN&gl=US',
20
+ max_count=3
21
+ )
22
+
23
+ source = PlayStoreScrapperSource()
24
+
25
+ text_analyzer = ZeroShotClassificationAnalyzer(
26
+ model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
27
+ )
28
+
29
+ source_response_list = source.lookup(source_config)
30
+ for idx, source_response in enumerate(source_response_list):
31
+ logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
32
+
33
+ analyzer_response_list = text_analyzer.analyze_input(
34
+ source_response_list=source_response_list,
35
+ analyzer_config=ClassificationAnalyzerConfig(
36
+ labels=["interface", "slow", "battery"],
37
+ ),
38
+ )
39
+ for idx, an_response in enumerate(analyzer_response_list):
40
+ logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
obsei_module/example/playstore_scrapper_translator_example.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import sys
4
+ from datetime import datetime, timedelta
5
+
6
+ import pytz
7
+
8
+ from obsei.payload import TextPayload
9
+ from obsei.analyzer.classification_analyzer import (
10
+ ClassificationAnalyzerConfig,
11
+ ZeroShotClassificationAnalyzer,
12
+ )
13
+ from obsei.analyzer.translation_analyzer import TranslationAnalyzer
14
+ from obsei.misc.utils import DATETIME_STRING_PATTERN
15
+ from obsei.source.playstore_scrapper import (
16
+ PlayStoreScrapperConfig,
17
+ PlayStoreScrapperSource,
18
+ )
19
+
20
+
21
+ logger = logging.getLogger(__name__)
22
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
23
+ source = PlayStoreScrapperSource()
24
+
25
+
26
+ def source_fetch():
27
+ since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(days=-1)
28
+ source_config = PlayStoreScrapperConfig(
29
+ countries=["us"],
30
+ package_name="com.color.apps.hindikeyboard.hindi.language",
31
+ lookup_period=since_time.strftime(
32
+ DATETIME_STRING_PATTERN
33
+ ), # todo should be optional
34
+ max_count=5,
35
+ )
36
+ return source.lookup(source_config)
37
+
38
+
39
+ def translate_text(text_list):
40
+ translate_analyzer = TranslationAnalyzer(
41
+ model_name_or_path="Helsinki-NLP/opus-mt-hi-en", device="auto"
42
+ )
43
+ source_responses = [
44
+ TextPayload(processed_text=text.processed_text, source_name="sample")
45
+ for text in text_list
46
+ ]
47
+ analyzer_responses = translate_analyzer.analyze_input(
48
+ source_response_list=source_responses
49
+ )
50
+ return [
51
+ TextPayload(
52
+ processed_text=response.segmented_data["translated_text"],
53
+ source_name="translator",
54
+ )
55
+ for response in analyzer_responses
56
+ ]
57
+
58
+
59
+ def classify_text(text_list):
60
+ text_analyzer = ZeroShotClassificationAnalyzer(
61
+ model_name_or_path="joeddav/bart-large-mnli-yahoo-answers", device="cpu"
62
+ )
63
+
64
+ return text_analyzer.analyze_input(
65
+ source_response_list=text_list,
66
+ analyzer_config=ClassificationAnalyzerConfig(
67
+ labels=["no parking", "registration issue", "app issue", "payment issue"],
68
+ ),
69
+ )
70
+
71
+
72
+ def print_list(text_name, text_list):
73
+ for idx, text in enumerate(text_list):
74
+ json_response = json.dumps(text.__dict__, indent=4, sort_keys=True, default=str)
75
+ logger.info(f"\n{text_name}#'{idx}'='{json_response}'")
76
+
77
+
78
+ logger.info("Started...")
79
+
80
+ source_responses_list = source_fetch()
81
+ translated_text_list = translate_text(source_responses_list)
82
+ analyzer_response_list = classify_text(translated_text_list)
83
+
84
+ print_list("source_response", source_responses_list)
85
+ print_list("translator_response", translated_text_list)
86
+ print_list("classifier_response", analyzer_response_list)
obsei_module/example/reddit_example.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ import time
4
+ from datetime import datetime, timedelta
5
+
6
+ import pytz
7
+
8
+ from obsei.misc.utils import DATETIME_STRING_PATTERN
9
+ from obsei.source.reddit_source import RedditConfig, RedditSource
10
+ from obsei.workflow.store import WorkflowStore
11
+ from obsei.workflow.workflow import Workflow, WorkflowConfig
12
+
13
+
14
+ def print_state(id: str):
15
+ logger.info(f"Source State: {source.store.get_source_state(id)}")
16
+
17
+
18
+ logger = logging.getLogger(__name__)
19
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
20
+
21
+ since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(hours=-2)
22
+ # Credentials will be fetched from env variable named reddit_client_id and reddit_client_secret
23
+ source_config = RedditConfig(
24
+ subreddits=["wallstreetbets"],
25
+ lookup_period=since_time.strftime(DATETIME_STRING_PATTERN),
26
+ )
27
+
28
+ source = RedditSource(store=WorkflowStore())
29
+
30
+ workflow = Workflow(
31
+ config=WorkflowConfig(
32
+ source_config=source_config,
33
+ ),
34
+ )
35
+ source.store.add_workflow(workflow)
36
+
37
+
38
+ for i in range(1, 4):
39
+ print_state(workflow.id)
40
+ source_response_list = source.lookup(source_config, id=workflow.id)
41
+
42
+ if source_response_list is None or len(source_response_list) == 0:
43
+ break
44
+
45
+ for source_response in source_response_list:
46
+ logger.info(source_response.__dict__)
47
+
48
+ time.sleep(10)
49
+
50
+ print_state(workflow.id)
obsei_module/example/reddit_scrapper_example.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from datetime import datetime, timedelta
4
+
5
+ import pytz
6
+
7
+ from obsei.misc.utils import DATETIME_STRING_PATTERN
8
+ from obsei.source.reddit_scrapper import RedditScrapperConfig, RedditScrapperSource
9
+
10
+
11
+ def print_state(id: str):
12
+ logger.info(f"Source State: {source.store.get_source_state(id)}")
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
17
+
18
+ since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(days=-1)
19
+
20
+ source_config = RedditScrapperConfig(
21
+ url="https://www.reddit.com/r/wallstreetbets/comments/.rss?sort=new",
22
+ user_agent="testscript by u/FitStatistician7378",
23
+ lookup_period=since_time.strftime(DATETIME_STRING_PATTERN),
24
+ )
25
+
26
+ source = RedditScrapperSource()
27
+
28
+ source_response_list = source.lookup(source_config)
29
+ for source_response in source_response_list:
30
+ logger.info(source_response.__dict__)
obsei_module/example/sdk.yaml ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ twitter_source:
2
+ _target_: obsei.source.twitter_source.TwitterSourceConfig
3
+ keywords:
4
+ - "@sample"
5
+ lookup_period: "1d"
6
+ tweet_fields:
7
+ - "author_id"
8
+ - "conversation_id"
9
+ - "created_at"
10
+ - "id"
11
+ - "public_metrics"
12
+ - "text"
13
+ user_fields:
14
+ - "id"
15
+ - "name"
16
+ - "public_metrics"
17
+ - "username"
18
+ - "verified"
19
+ expansions:
20
+ - "author_id"
21
+ place_fields: []
22
+ max_tweets: 10
23
+ credential:
24
+ _target_: obsei.source.twitter_source.TwitterCredentials
25
+ bearer_token: "bearer_token"
26
+
27
+ play_store_source:
28
+ _target_: obsei.source.playstore_reviews.PlayStoreConfig
29
+ package_name: "com.company.package"
30
+ max_results: 10
31
+ num_retries: 2
32
+ cred_info:
33
+ _target_: obsei.source.playstore_reviews.GoogleCredInfo
34
+ service_cred_file: "foo/credential.json"
35
+ developer_key: "test_key"
36
+
37
+ daily_get_sink:
38
+ _target_: obsei.sink.dailyget_sink.DailyGetSinkConfig
39
+ url: "http://localhost:8080/sample"
40
+ partner_id: "123456"
41
+ consumer_phone_number: "1234567890"
42
+ source_information: "Twitter @sample"
43
+ base_payload:
44
+ partnerId: daily_get_sink.partner_id
45
+ consumerPhoneNumber: daily_get_sink.consumer_phone_number
46
+
47
+ http_sink:
48
+ _target_: obsei.sink.http_sink.HttpSinkConfig
49
+ url: "http://localhost:8080/sample"
50
+
51
+ elasticsearch_sink:
52
+ _target_: obsei.sink.elasticsearch_sink.ElasticSearchSinkConfig
53
+ host: "localhost"
54
+ port: 9200
55
+ index_name: "test"
56
+
57
+ jira_sink:
58
+ _target_: obsei.sink.jira_sink.JiraSinkConfig
59
+ url: "http://localhost:2990/jira"
60
+ username: "user"
61
+ password: "pass"
62
+ issue_type:
63
+ name: "Task"
64
+ project:
65
+ key: "CUS"
66
+
67
+ analyzer_config:
68
+ _target_: obsei.analyzer.classification_analyzer.ClassificationAnalyzerConfig
69
+ labels:
70
+ - "service"
71
+ - "delay"
72
+ - "tracking"
73
+ - "no response"
74
+ add_positive_negative_labels: false
75
+
76
+ analyzer:
77
+ _target_: obsei.analyzer.classification_analyzer.ZeroShotClassificationAnalyzer
78
+ model_name_or_path: "typeform/mobilebert-uncased-mnli"
79
+ device: "auto"
80
+
81
+ slack_sink:
82
+ _target_: obsei.sink.SlackSink
83
+
84
+ slack_sink_config:
85
+ _target_: obsei.sink.SlackSinkConfig
86
+ slack_token: 'Enter token'
87
+ channel_id: 'slack channel id'
88
+ jinja_template: |
89
+ ```
90
+ {%- for key, value in payload.items() recursive%}
91
+ {%- if value is mapping -%}
92
+ {{loop(value.items())}}
93
+ {%- else %}
94
+ {{key}}: {{value}}
95
+ {%- endif %}
96
+ {%- endfor%}
97
+ ```
obsei_module/example/slack_example.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import sys
4
+
5
+ from obsei.analyzer.dummy_analyzer import DummyAnalyzer, DummyAnalyzerConfig
6
+ from obsei.processor import Processor
7
+ from obsei.sink.slack_sink import SlackSink, SlackSinkConfig
8
+ from obsei.source.playstore_scrapper import (PlayStoreScrapperConfig,
9
+ PlayStoreScrapperSource)
10
+ from obsei.workflow.store import WorkflowStore
11
+ from obsei.workflow.workflow import Workflow, WorkflowConfig
12
+
13
+
14
+ def print_state(identifier: str):
15
+ logger.info(f"Source State: {source.store.get_source_state(identifier)}")
16
+
17
+
18
+ logger = logging.getLogger(__name__)
19
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
20
+
21
+
22
+ workflow_store = WorkflowStore()
23
+
24
+ source_config = PlayStoreScrapperConfig(
25
+ app_url='https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en_IN&gl=US',
26
+ max_count=3
27
+ )
28
+
29
+ source = PlayStoreScrapperSource(store=workflow_store)
30
+
31
+ sink_config = SlackSinkConfig(
32
+ slack_token=os.environ["SLACK_TOKEN"],
33
+ channel_id="C01TUPZ23NZ",
34
+ jinja_template="""
35
+ ```
36
+ {%- for key, value in payload.items() recursive%}
37
+ {%- if value is mapping -%}
38
+ {{loop(value.items())}}
39
+ {%- else %}
40
+ {{key}}: {{value}}
41
+ {%- endif %}
42
+ {%- endfor%}
43
+ ```
44
+ """
45
+ )
46
+ sink = SlackSink(store=workflow_store)
47
+
48
+ analyzer_config = DummyAnalyzerConfig()
49
+ analyzer = DummyAnalyzer()
50
+
51
+ workflow = Workflow(
52
+ config=WorkflowConfig(
53
+ source_config=source_config,
54
+ sink_config=sink_config,
55
+ analyzer_config=analyzer_config,
56
+ ),
57
+ )
58
+ workflow_store.add_workflow(workflow)
59
+
60
+ processor = Processor(
61
+ analyzer=analyzer, sink=sink, source=source, analyzer_config=analyzer_config
62
+ )
63
+
64
+ processor.process(workflow=workflow)
65
+
66
+ print_state(workflow.id)
obsei_module/example/twitter_source_example.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+
4
+ from obsei.analyzer.classification_analyzer import ZeroShotClassificationAnalyzer, ClassificationAnalyzerConfig
5
+ from obsei.sink.slack_sink import SlackSinkConfig, SlackSink
6
+ from obsei.source.twitter_source import TwitterSourceConfig, TwitterSource, TwitterCredentials
7
+
8
+ logger = logging.getLogger(__name__)
9
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
10
+
11
+ twitter_cred_info = None
12
+
13
+ # Enter your twitter credentials
14
+ # Get it from https://developer.twitter.com/en/apply-for-access
15
+ # Currently it will fetch from environment variables: twitter_bearer_token, twitter_consumer_key, twitter_consumer_secret
16
+ # Uncomment below lines if you like to pass credentials directly instead of env variables
17
+
18
+ # twitter_cred_info = TwitterCredentials(
19
+ # bearer_token='<Enter bearer_token>',
20
+ # consumer_key="<Enter consumer_key>",
21
+ # consumer_secret="<Enter consumer_secret>"
22
+ # )
23
+
24
+ source_config = TwitterSourceConfig(
25
+ query="bitcoin",
26
+ lookup_period="1h",
27
+ tweet_fields=[
28
+ "author_id",
29
+ "conversation_id",
30
+ "created_at",
31
+ "id",
32
+ "public_metrics",
33
+ "text",
34
+ ],
35
+ user_fields=["id", "name", "public_metrics", "username", "verified"],
36
+ expansions=["author_id"],
37
+ place_fields=None,
38
+ max_tweets=10,
39
+ cred_info=twitter_cred_info or None
40
+ )
41
+
42
+ source = TwitterSource()
43
+
44
+
45
+ sink_config = SlackSinkConfig(
46
+ # Uncomment below lines if you like to pass credentials directly instead of env variables
47
+ # slack_token="SLACK_TOKEN",
48
+ # channel_id="CHANNEL_ID",
49
+ jinja_template="""
50
+ :bell: Hi there!, a new `<{{payload['meta']['tweet_url']}}|tweet>` of interest is found by *Obsei*
51
+ >📝 Content:
52
+ ```{{payload['meta']['text']}}```
53
+ >ℹ️Information:
54
+ ```
55
+ User Name: {{payload['meta']['author_info']['name']}} ({{payload['meta']['author_info']['user_url']}})
56
+ Tweet Metrics: Retweets={{payload['meta']['public_metrics']['retweet_count']}}, Likes={{payload['meta']['public_metrics']['like_count']}}
57
+ Author Metrics: Verified={{payload['meta']['author_info']['verified']}}, Followers={{payload['meta']['author_info']['public_metrics']['followers_count']}}
58
+ ```
59
+ >🧠 AI Engine Data:
60
+ ```
61
+ {%- for key, value in payload['segmented_data']['classifier_data'].items() recursive%}
62
+ {%- if value is mapping -%}
63
+ {{loop(value.items())}}
64
+ {%- else %}
65
+ {{key}}: {{value}}
66
+ {%- endif %}
67
+ {%- endfor%}
68
+ ```
69
+ """
70
+ )
71
+ sink = SlackSink()
72
+
73
+ text_analyzer = ZeroShotClassificationAnalyzer(
74
+ model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
75
+ )
76
+
77
+ analyzer_config = ClassificationAnalyzerConfig(
78
+ labels=["going up", "going down"],
79
+ add_positive_negative_labels=False,
80
+ )
81
+
82
+ source_response_list = source.lookup(source_config)
83
+ for idx, source_response in enumerate(source_response_list):
84
+ logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
85
+
86
+ analyzer_response_list = text_analyzer.analyze_input(
87
+ source_response_list=source_response_list,
88
+ analyzer_config=analyzer_config,
89
+ )
90
+
91
+ for idx, an_response in enumerate(analyzer_response_list):
92
+ logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
93
+
94
+ sink_response_list = sink.send_data(
95
+ analyzer_responses=analyzer_response_list, config=sink_config, id=id
96
+ )
97
+ for idx, sink_response in enumerate(sink_response_list):
98
+ logger.info(f"source_response#'{idx}'='{sink_response.__dict__}'")
obsei_module/example/web_crawler_example.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Fetch full news article
2
+ from obsei.source.website_crawler_source import (
3
+ TrafilaturaCrawlerConfig,
4
+ TrafilaturaCrawlerSource,
5
+ )
6
+
7
+
8
+ def print_list(response_list):
9
+ for response in response_list:
10
+ print(response.__dict__)
11
+
12
+
13
+ # Single URL
14
+ source_config = TrafilaturaCrawlerConfig(urls=["https://obsei.github.io/obsei/"])
15
+
16
+ source = TrafilaturaCrawlerSource()
17
+
18
+ source_response_list = source.lookup(source_config)
19
+ print_list(source_response_list)
20
+
21
+
22
+ # RSS feed (Note it will take lot of time)
23
+ source_config = TrafilaturaCrawlerConfig(
24
+ urls=["https://news.google.com/rss/search?q=bitcoin&hl=en&gl=US&ceid=US:en"],
25
+ is_feed=True,
26
+ )
27
+
28
+ source = TrafilaturaCrawlerSource()
29
+
30
+ source_response_list = source.lookup(source_config)
31
+ print_list(source_response_list)
32
+
33
+
34
+ # Full website (Note it will take lot of time)
35
+ source_config = TrafilaturaCrawlerConfig(
36
+ urls=["https://haystack.deepset.ai/"],
37
+ is_sitemap=True,
38
+ )
39
+
40
+ source = TrafilaturaCrawlerSource()
41
+
42
+ source_response_list = source.lookup(source_config)
43
+ print_list(source_response_list)
obsei_module/example/with_sdk_config_file.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+
4
+ from obsei.configuration import ObseiConfiguration
5
+
6
+ logger = logging.getLogger(__name__)
7
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
8
+
9
+ obsei_configuration = ObseiConfiguration(
10
+ config_path="../example",
11
+ config_filename="sdk.yaml",
12
+ )
13
+
14
+ text_analyzer = obsei_configuration.initialize_instance("analyzer")
15
+ analyzer_config = obsei_configuration.initialize_instance("analyzer_config")
16
+ slack_source_config = obsei_configuration.initialize_instance("slack_sink_config")
17
+ slack_sink = obsei_configuration.initialize_instance("slack_sink")
18
+
19
+ play_store_source_config = obsei_configuration.initialize_instance("play_store_source")
20
+ twitter_source_config = obsei_configuration.initialize_instance("twitter_source")
21
+ http_sink_config = obsei_configuration.initialize_instance("http_sink")
22
+ daily_get_sink_config = obsei_configuration.initialize_instance("daily_get_sink")
23
+ # docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2
24
+ elasticsearch_sink_config = obsei_configuration.initialize_instance(
25
+ "elasticsearch_sink"
26
+ )
27
+ # Start jira server locally `atlas-run-standalone --product jira`
28
+ jira_sink_config = obsei_configuration.initialize_instance("jira_sink")