aliasgerovs commited on
Commit
28cd001
1 Parent(s): 9bf5d9f
Files changed (3) hide show
  1. app.py +2 -2
  2. highlighter.py +1 -1
  3. nohup.out +261 -0
app.py CHANGED
@@ -20,9 +20,9 @@ with open("config.yaml", "r") as file:
20
  model_list = params["MC_OUTPUT_LABELS"]
21
 
22
 
23
- analyze_and_highlight_bc = partial(segmented_higlighter, bias_buster_selected, model_type="bc")
24
  analyze_and_highlight_quillbot = partial(
25
- segmented_higlighter, bias_buster_selected, model_type="quillbot"
26
  )
27
 
28
 
 
20
  model_list = params["MC_OUTPUT_LABELS"]
21
 
22
 
23
+ analyze_and_highlight_bc = partial(segmented_higlighter, model_type="bc")
24
  analyze_and_highlight_quillbot = partial(
25
+ segmented_higlighter, model_type="quillbot"
26
  )
27
 
28
 
highlighter.py CHANGED
@@ -26,7 +26,7 @@ def explainer(text, model_type):
26
  return sentences_weights, sentences, exp
27
 
28
 
29
- def analyze_and_highlight(text, bias_buster_selected, model_type):
30
  highlighted_text = ""
31
  sentences_weights, sentences, _ = explainer(text, model_type)
32
  positive_weights = [weight for weight in sentences_weights.values() if weight >= 0]
 
26
  return sentences_weights, sentences, exp
27
 
28
 
29
+ def analyze_and_highlight(text, model_type):
30
  highlighted_text = ""
31
  sentences_weights, sentences, _ = explainer(text, model_type)
32
  positive_weights = [weight for weight in sentences_weights.values() if weight >= 0]
nohup.out CHANGED
@@ -722,3 +722,264 @@ Traceback (most recent call last):
722
  chunk = analyze_and_highlight(segment, model_type)
723
  TypeError: analyze_and_highlight() missing 1 required positional argument: 'model_type'
724
  WARNING: Invalid HTTP request received.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
722
  chunk = analyze_and_highlight(segment, model_type)
723
  TypeError: analyze_and_highlight() missing 1 required positional argument: 'model_type'
724
  WARNING: Invalid HTTP request received.
725
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
726
+ warnings.warn(
727
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
728
+ warnings.warn(
729
+ 2024-05-24 14:47:48.224818: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
730
+ To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
731
+ [nltk_data] Downloading package punkt to /root/nltk_data...
732
+ [nltk_data] Package punkt is already up-to-date!
733
+ [nltk_data] Downloading package stopwords to /root/nltk_data...
734
+ [nltk_data] Package stopwords is already up-to-date!
735
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
736
+ warnings.warn(
737
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
738
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
739
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
740
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
741
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
742
+ Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
743
+ - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
744
+ - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
745
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
746
+ Framework not specified. Using pt to export the model.
747
+ Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
748
+ - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
749
+ - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
750
+ Using the export variant default. Available variants are:
751
+ - default: The default ONNX variant.
752
+
753
+ ***** Exporting submodel 1/1: RobertaForSequenceClassification *****
754
+ Using framework PyTorch: 2.3.0+cu121
755
+ Overriding 1 configuration item(s)
756
+ - use_cache -> False
757
+ Framework not specified. Using pt to export the model.
758
+ Using the export variant default. Available variants are:
759
+ - default: The default ONNX variant.
760
+ Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
761
+ Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
762
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
763
+ warnings.warn(
764
+
765
+ ***** Exporting submodel 1/3: T5Stack *****
766
+ Using framework PyTorch: 2.3.0+cu121
767
+ Overriding 1 configuration item(s)
768
+ - use_cache -> False
769
+
770
+ ***** Exporting submodel 2/3: T5ForConditionalGeneration *****
771
+ Using framework PyTorch: 2.3.0+cu121
772
+ Overriding 1 configuration item(s)
773
+ - use_cache -> True
774
+ /usr/local/lib/python3.9/dist-packages/transformers/modeling_utils.py:1017: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
775
+ if causal_mask.shape[1] < attention_mask.shape[1]:
776
+
777
+ ***** Exporting submodel 3/3: T5ForConditionalGeneration *****
778
+ Using framework PyTorch: 2.3.0+cu121
779
+ Overriding 1 configuration item(s)
780
+ - use_cache -> True
781
+ /usr/local/lib/python3.9/dist-packages/transformers/models/t5/modeling_t5.py:503: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
782
+ elif past_key_value.shape[2] != key_value_states.shape[1]:
783
+ In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
784
+ In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
785
+ Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
786
+ Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
787
+ [nltk_data] Downloading package cmudict to /root/nltk_data...
788
+ [nltk_data] Package cmudict is already up-to-date!
789
+ [nltk_data] Downloading package punkt to /root/nltk_data...
790
+ [nltk_data] Package punkt is already up-to-date!
791
+ [nltk_data] Downloading package stopwords to /root/nltk_data...
792
+ [nltk_data] Package stopwords is already up-to-date!
793
+ [nltk_data] Downloading package wordnet to /root/nltk_data...
794
+ [nltk_data] Package wordnet is already up-to-date!
795
+ Collecting en-core-web-sm==3.7.1
796
+ Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
797
+ Requirement already satisfied: spacy<3.8.0,>=3.7.2 in /usr/local/lib/python3.9/dist-packages (from en-core-web-sm==3.7.1) (3.7.2)
798
+ Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/lib/python3/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.25.1)
799
+ Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.10)
800
+ Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.8)
801
+ Requirement already satisfied: thinc<8.3.0,>=8.1.8 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.2.3)
802
+ Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.2)
803
+ Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.9)
804
+ Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.26.4)
805
+ Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.1.4)
806
+ Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.3.4)
807
+ Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.66.4)
808
+ Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.12)
809
+ Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (52.0.0)
810
+ Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.7.1)
811
+ Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.4.0)
812
+ Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.10)
813
+ Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (24.0)
814
+ Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.4.8)
815
+ Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.5)
816
+ Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (6.4.0)
817
+ Requirement already satisfied: typer<0.10.0,>=0.3.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.9.4)
818
+ Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.9/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.2.0)
819
+ Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.9/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.1)
820
+ Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.6.0)
821
+ Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.18.2)
822
+ Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.11.0)
823
+ Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.1.4)
824
+ Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.11)
825
+ Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.9/dist-packages (from typer<0.10.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.1.7)
826
+ Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /usr/local/lib/python3.9/dist-packages (from weasel<0.4.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.16.0)
827
+ Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.1.5)
828
+ ✔ Download and installation successful
829
+ You can now load the package via spacy.load('en_core_web_sm')
830
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
831
+ warnings.warn(
832
+ Traceback (most recent call last):
833
+ File "/home/aliasgarov/copyright_checker/app.py", line 23, in <module>
834
+ analyze_and_highlight_bc = partial(segmented_higlighter, bias_buster_selected, model_type="bc")
835
+ NameError: name 'bias_buster_selected' is not defined
836
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
837
+ warnings.warn(
838
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
839
+ warnings.warn(
840
+ 2024-05-24 14:53:05.309820: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
841
+ To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
842
+ [nltk_data] Downloading package punkt to /root/nltk_data...
843
+ [nltk_data] Package punkt is already up-to-date!
844
+ [nltk_data] Downloading package stopwords to /root/nltk_data...
845
+ [nltk_data] Package stopwords is already up-to-date!
846
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
847
+ warnings.warn(
848
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
849
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
850
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
851
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
852
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
853
+ Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
854
+ - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
855
+ - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
856
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
857
+ Framework not specified. Using pt to export the model.
858
+ Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
859
+ - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
860
+ - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
861
+ Using the export variant default. Available variants are:
862
+ - default: The default ONNX variant.
863
+
864
+ ***** Exporting submodel 1/1: RobertaForSequenceClassification *****
865
+ Using framework PyTorch: 2.3.0+cu121
866
+ Overriding 1 configuration item(s)
867
+ - use_cache -> False
868
+ Framework not specified. Using pt to export the model.
869
+ Using the export variant default. Available variants are:
870
+ - default: The default ONNX variant.
871
+ Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
872
+ Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
873
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
874
+ warnings.warn(
875
+
876
+ ***** Exporting submodel 1/3: T5Stack *****
877
+ Using framework PyTorch: 2.3.0+cu121
878
+ Overriding 1 configuration item(s)
879
+ - use_cache -> False
880
+
881
+ ***** Exporting submodel 2/3: T5ForConditionalGeneration *****
882
+ Using framework PyTorch: 2.3.0+cu121
883
+ Overriding 1 configuration item(s)
884
+ - use_cache -> True
885
+ /usr/local/lib/python3.9/dist-packages/transformers/modeling_utils.py:1017: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
886
+ if causal_mask.shape[1] < attention_mask.shape[1]:
887
+
888
+ ***** Exporting submodel 3/3: T5ForConditionalGeneration *****
889
+ Using framework PyTorch: 2.3.0+cu121
890
+ Overriding 1 configuration item(s)
891
+ - use_cache -> True
892
+ /usr/local/lib/python3.9/dist-packages/transformers/models/t5/modeling_t5.py:503: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
893
+ elif past_key_value.shape[2] != key_value_states.shape[1]:
894
+ In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
895
+ In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
896
+ Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
897
+ Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
898
+ [nltk_data] Downloading package cmudict to /root/nltk_data...
899
+ [nltk_data] Package cmudict is already up-to-date!
900
+ [nltk_data] Downloading package punkt to /root/nltk_data...
901
+ [nltk_data] Package punkt is already up-to-date!
902
+ [nltk_data] Downloading package stopwords to /root/nltk_data...
903
+ [nltk_data] Package stopwords is already up-to-date!
904
+ [nltk_data] Downloading package wordnet to /root/nltk_data...
905
+ [nltk_data] Package wordnet is already up-to-date!
906
+ Collecting en-core-web-sm==3.7.1
907
+ Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
908
+ Requirement already satisfied: spacy<3.8.0,>=3.7.2 in /usr/local/lib/python3.9/dist-packages (from en-core-web-sm==3.7.1) (3.7.2)
909
+ Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.10)
910
+ Requirement already satisfied: thinc<8.3.0,>=8.1.8 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.2.3)
911
+ Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/lib/python3/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.25.1)
912
+ Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.26.4)
913
+ Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (6.4.0)
914
+ Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.66.4)
915
+ Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (52.0.0)
916
+ Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.4.8)
917
+ Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.9)
918
+ Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.8)
919
+ Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.4.0)
920
+ Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.10)
921
+ Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.5)
922
+ Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.12)
923
+ Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.3.4)
924
+ Requirement already satisfied: typer<0.10.0,>=0.3.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.9.4)
925
+ Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.7.1)
926
+ Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.2)
927
+ Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (24.0)
928
+ Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.1.4)
929
+ Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.9/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.2.0)
930
+ Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.9/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.1)
931
+ Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.11.0)
932
+ Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.18.2)
933
+ Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.6.0)
934
+ Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.1.4)
935
+ Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.11)
936
+ Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.9/dist-packages (from typer<0.10.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.1.7)
937
+ Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /usr/local/lib/python3.9/dist-packages (from weasel<0.4.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.16.0)
938
+ Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.1.5)
939
+ ✔ Download and installation successful
940
+ You can now load the package via spacy.load('en_core_web_sm')
941
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
942
+ warnings.warn(
943
+ Token indices sequence length is longer than the specified maximum sequence length for this model (608 > 512). Running this sequence through the model will result in indexing errors
944
+ Traceback (most recent call last):
945
+ File "/usr/local/lib/python3.9/dist-packages/gradio/queueing.py", line 527, in process_events
946
+ response = await route_utils.call_process_api(
947
+ File "/usr/local/lib/python3.9/dist-packages/gradio/route_utils.py", line 261, in call_process_api
948
+ output = await app.get_blocks().process_api(
949
+ File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1786, in process_api
950
+ result = await self.call_function(
951
+ File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1338, in call_function
952
+ prediction = await anyio.to_thread.run_sync(
953
+ File "/usr/local/lib/python3.9/dist-packages/anyio/to_thread.py", line 56, in run_sync
954
+ return await get_async_backend().run_sync_in_worker_thread(
955
+ File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
956
+ return await future
957
+ File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 851, in run
958
+ result = context.run(func, *args)
959
+ File "/usr/local/lib/python3.9/dist-packages/gradio/utils.py", line 759, in wrapper
960
+ response = f(*args, **kwargs)
961
+ File "/home/aliasgarov/copyright_checker/highlighter.py", line 76, in segmented_higlighter
962
+ chunk = analyze_and_highlight(segment, model_type)
963
+ TypeError: analyze_and_highlight() missing 1 required positional argument: 'model_type'
964
+ /usr/local/lib/python3.9/dist-packages/optimum/bettertransformer/models/encoder_models.py:301: UserWarning: The PyTorch API of nested tensors is in prototype stage and will change in the near future. (Triggered internally at ../aten/src/ATen/NestedTensorImpl.cpp:178.)
965
+ hidden_states = torch._nested_tensor_from_mask(hidden_states, ~attention_mask)
966
+ Traceback (most recent call last):
967
+ File "/usr/local/lib/python3.9/dist-packages/gradio/queueing.py", line 527, in process_events
968
+ response = await route_utils.call_process_api(
969
+ File "/usr/local/lib/python3.9/dist-packages/gradio/route_utils.py", line 261, in call_process_api
970
+ output = await app.get_blocks().process_api(
971
+ File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1786, in process_api
972
+ result = await self.call_function(
973
+ File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1338, in call_function
974
+ prediction = await anyio.to_thread.run_sync(
975
+ File "/usr/local/lib/python3.9/dist-packages/anyio/to_thread.py", line 56, in run_sync
976
+ return await get_async_backend().run_sync_in_worker_thread(
977
+ File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
978
+ return await future
979
+ File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 851, in run
980
+ result = context.run(func, *args)
981
+ File "/usr/local/lib/python3.9/dist-packages/gradio/utils.py", line 759, in wrapper
982
+ response = f(*args, **kwargs)
983
+ File "/home/aliasgarov/copyright_checker/highlighter.py", line 76, in segmented_higlighter
984
+ chunk = analyze_and_highlight(segment, model_type)
985
+ TypeError: analyze_and_highlight() missing 1 required positional argument: 'model_type'