derek-thomas HF staff commited on
Commit
a5a27a1
1 Parent(s): dcc0ff0

Abstracting nomic deletion

Browse files
Files changed (1) hide show
  1. src/build_nomic.py +13 -12
src/build_nomic.py CHANGED
@@ -20,6 +20,18 @@ def count_words(text):
20
  return len(words)
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def build_nomic(dataset):
24
  df = dataset['train'].to_pandas()
25
 
@@ -42,19 +54,8 @@ def build_nomic(dataset):
42
 
43
  df['word_count'] = df['content'].apply(count_words)
44
 
 
45
 
46
- logger.info(f"Trying to delete old version of nomic Atlas...")
47
- try:
48
- ac = AtlasClass()
49
- atlas_id = ac._get_dataset_by_slug_identifier("derek2/boru-subreddit-neural-search")['id']
50
- ac._delete_project_by_id(atlas_id)
51
- logger.info(f"Succeeded in deleting old version of nomic Atlas.")
52
- logger.info(f"Sleeping for 60s to wait for old version deletion on the server-side")
53
- time.sleep(60)
54
- except:
55
- logger.info(f"Failed to delete old version of nomic Atlas.")
56
-
57
-
58
  # Create Atlas project
59
  logger.info(f"Trying to create new version of Atlas...")
60
  project = atlas.map_data(embeddings=np.stack(df['embedding'].values),
 
20
  return len(words)
21
 
22
 
23
+ def delete_old_nomic():
24
+ logger.info(f"Trying to delete old version of nomic Atlas...")
25
+ try:
26
+ ac = AtlasClass()
27
+ atlas_id = ac._get_dataset_by_slug_identifier("derek2/boru-subreddit-neural-search")['id']
28
+ ac._delete_project_by_id(atlas_id)
29
+ logger.info(f"Succeeded in deleting old version of nomic Atlas.")
30
+ logger.info(f"Sleeping for 60s to wait for old version deletion on the server-side")
31
+ time.sleep(60)
32
+ except:
33
+ logger.info(f"Failed to delete old version of nomic Atlas.")
34
+
35
  def build_nomic(dataset):
36
  df = dataset['train'].to_pandas()
37
 
 
54
 
55
  df['word_count'] = df['content'].apply(count_words)
56
 
57
+ delete_old_nomic()
58
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  # Create Atlas project
60
  logger.info(f"Trying to create new version of Atlas...")
61
  project = atlas.map_data(embeddings=np.stack(df['embedding'].values),