Prajwal Kailas commited on
Commit
5d427be
1 Parent(s): 45c1511

change scispacy versions

Browse files
ner_datasets/preprocessing/preprocessing_loader.py CHANGED
@@ -20,6 +20,8 @@ class PreprocessingLoader(object):
20
  """
21
  if sentencizer == 'en_core_sci_lg':
22
  return SpacySentencizer(spacy_model='en_core_sci_lg')
 
 
23
  elif sentencizer == 'en_core_web_sm':
24
  return SpacySentencizer(spacy_model='en_core_web_sm')
25
  elif sentencizer == 'note':
@@ -55,9 +57,8 @@ class PreprocessingLoader(object):
55
  elif tokenizer == 'clinical':
56
  # Abbreviations - we won't split tokens that match these (e.g 18F-FDG)
57
  if abbreviations is None:
58
- return ClinicalSpacyTokenizer(spacy_model='en_core_sci_lg', abbreviations=abbreviations)
59
  else:
60
-
61
- return ClinicalSpacyTokenizer(spacy_model='en_core_sci_lg', abbreviations=abbreviations)
62
  else:
63
  raise ValueError('Invalid tokenizer - does not exist')
 
20
  """
21
  if sentencizer == 'en_core_sci_lg':
22
  return SpacySentencizer(spacy_model='en_core_sci_lg')
23
+ elif sentencizer == 'en_core_sci_sm':
24
+ return SpacySentencizer(spacy_model='en_core_sci_sm')
25
  elif sentencizer == 'en_core_web_sm':
26
  return SpacySentencizer(spacy_model='en_core_web_sm')
27
  elif sentencizer == 'note':
 
57
  elif tokenizer == 'clinical':
58
  # Abbreviations - we won't split tokens that match these (e.g 18F-FDG)
59
  if abbreviations is None:
60
+ return ClinicalSpacyTokenizer(spacy_model='en_core_sci_sm', abbreviations=abbreviations)
61
  else:
62
+ return ClinicalSpacyTokenizer(spacy_model='en_core_sci_sm', abbreviations=abbreviations)
 
63
  else:
64
  raise ValueError('Invalid tokenizer - does not exist')
requirements.txt CHANGED
@@ -19,4 +19,5 @@ scispacy
19
  datasets
20
  pytorch-crf
21
  allennlp
22
- pycorenlp
 
 
19
  datasets
20
  pytorch-crf
21
  allennlp
22
+ pycorenlp
23
+ https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_sm-0.4.0.tar.gz