File size: 28,561 Bytes
debd61e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
#speckle utils
import json 
import pandas as pd
import numpy as np
import specklepy
from specklepy.api.client import SpeckleClient
from specklepy.api.credentials import get_default_account, get_local_accounts
from specklepy.transports.server import ServerTransport
from specklepy.api import operations
from specklepy.objects.geometry import Polyline, Point, Mesh

from specklepy.api.wrapper import StreamWrapper
try:
    import openai
except:
    pass

import requests
from datetime import datetime
import copy


# HELP FUNCTION ===============================================================
def helper():
    """
    Prints out the help message for this module.
    """
    print("This module contains a set of utility functions for speckle streams.")
    print("______________________________________________________________________")
    print("It requires the specklepy package to be installed -> !pip install specklepy")
    print("the following functions are available:")
    print("getSpeckleStream(stream_id, branch_name, client)")
    print("getSpeckleGlobals(stream_id, client)")
    print("get_dataframe(objects_raw, return_original_df)")
    print("updateStreamAnalysis(stream_id, new_data, branch_name, geometryGroupPath, match_by_id, openai_key, return_original)")
    print("there are some more function available not documented fully yet, including updating a notion database")
    print("______________________________________________________________________")
    print("for detailed help call >>> help(speckle_utils.function_name) <<< ")
    print("______________________________________________________________________")
    print("standard usage:")
    print("______________________________________________________________________")
    print("retreiving data")
    print("1. import speckle_utils & speckle related libaries from specklepy")
    print("2. create a speckle client -> client = SpeckleClient(host='https://speckle.xyz/')" )
    print("                              client.authenticate_with_token(token='your_token_here')")
    print("3. get a speckle stream -> stream = speckle_utils.getSpeckleStream(stream_id, branch_name, client)")
    print("4. get the stream data -> data = stream['pth']['to']['data']")
    print("5. transform data to dataframe -> df = speckle_utils.get_dataframe(data, return_original_df=False)")
    print("______________________________________________________________________")
    print("updating data")
    print("1. call updateStreamAnalysis --> updateStreamAnalysis(new_data, stream_id, branch_name, geometryGroupPath, match_by_id, openai_key, return_original)")


#==============================================================================

def updateSpeckleStream(stream_id,
                        branch_name,
                        client,
                        data_object,
                        commit_message="Updated the data object",
                        ):
    """
    Updates a speckle stream with a new data object.

    Args:
        stream_id (str): The ID of the speckle stream.
        branch_name (str): The name of the branch within the speckle stream.
        client (specklepy.api.client.Client): A speckle client.
        data_object (dict): The data object to send to the speckle stream.
        commit_message (str): The commit message. Defaults to "Updated the data object".
    """
    # set stream and branch
    branch = client.branch.get(stream_id, branch_name)
    # Get transport
    transport = ServerTransport(client=client, stream_id=stream_id)
    # Send the data object to the speckle stream
    object_id = operations.send(data_object, [transport])

    # Create a new commit with the new object
    commit_id = client.commit.create(
        stream_id,
        object_id= object_id,
        message=commit_message,
        branch_name=branch_name,
    )

    return commit_id
def getSpeckleStream(stream_id,
                     branch_name,
                     client,
                     commit_id=""
                     ):
    """
    Retrieves data from a specific branch of a speckle stream.

    Args:
        stream_id (str): The ID of the speckle stream.
        branch_name (str): The name of the branch within the speckle stream.
        client (specklepy.api.client.Client, optional): A speckle client. Defaults to a global `client`.
        commit_id (str): id of a commit, if nothing is specified, the latest commit will be fetched

    Returns:
        dict: The speckle stream data received from the specified branch.

    This function retrieves the last commit from a specific branch of a speckle stream.
    It uses the provided speckle client to get the branch and commit information, and then 
    retrieves the speckle stream data associated with the last commit.
    It prints out the branch details and the creation dates of the last three commits for debugging purposes.
    """

    print("updated A")

    # set stream and branch
    try:
        branch = client.branch.get(stream_id, branch_name, 3)
        print(branch)
    except:
        branch = client.branch.get(stream_id, branch_name, 1)
        print(branch)

    print("last three commits:")
    [print(ite.createdAt) for ite in branch.commits.items]

    if commit_id == "":
        latest_commit = branch.commits.items[0]
        choosen_commit_id = latest_commit.id
        commit = client.commit.get(stream_id, choosen_commit_id)
        print("latest commit ", branch.commits.items[0].createdAt, " was choosen")
    elif type(commit_id) == type("s"): # string, commit uuid
        choosen_commit_id = commit_id
        commit = client.commit.get(stream_id, choosen_commit_id)
        print("provided commit ", choosen_commit_id, " was choosen")
    elif type(commit_id) == type(1): #int 
        latest_commit = branch.commits.items[commit_id]
        choosen_commit_id = latest_commit.id
        commit = client.commit.get(stream_id, choosen_commit_id)


    print(commit)
    print(commit.referencedObject)
    # get transport
    transport = ServerTransport(client=client, stream_id=stream_id)
    #speckle stream
    res = operations.receive(commit.referencedObject, transport)

    return res
 
def getSpeckleGlobals(stream_id, client):
    """
    Retrieves global analysis information from the "globals" branch of a speckle stream.

    Args:
        stream_id (str): The ID of the speckle stream.
        client (specklepy.api.client.Client, optional): A speckle client. Defaults to a global `client`.

    Returns:
        analysisInfo (dict or None): The analysis information retrieved from globals. None if no globals found.
        analysisGroups (list or None): The analysis groups retrieved from globals. None if no globals found.

    This function attempts to retrieve and parse the analysis information from the "globals" 
    branch of the specified speckle stream. It accesses and parses the "analysisInfo" and "analysisGroups" 
    global attributes, extracts analysis names and UUIDs.
    If no globals are found in the speckle stream, it returns None for both analysisInfo and analysisGroups.
    """
    # get the latest commit
    try:
        # speckle stream globals
        branchGlob = client.branch.get(stream_id, "globals")
        latest_commit_Glob = branchGlob.commits.items[0]
        transport = ServerTransport(client=client, stream_id=stream_id)

        globs = operations.receive(latest_commit_Glob.referencedObject, transport)
        
        # access and parse globals
        #analysisInfo = json.loads(globs["analysisInfo"]["@{0;0;0;0}"][0].replace("'", '"'))
        #analysisGroups = [json.loads(gr.replace("'", '"')) for gr in globs["analysisGroups"]["@{0}"]]

        def get_error_context(e, context=100):
            start = max(0, e.pos - context)
            end = e.pos + context
            error_line = e.doc[start:end]
            pointer_line = ' ' * (e.pos - start - 1) + '^'
            return error_line, pointer_line

        try:
            analysisInfo = json.loads(globs["analysisInfo"]["@{0;0;0;0}"][0].replace("'", '"').replace("None", "null"))
        except json.JSONDecodeError as e:
            print(f"Error decoding analysisInfo: {e}")
            error_line, pointer_line = get_error_context(e)
            print("Error position and surrounding text:")
            print(error_line)
            print(pointer_line)
            analysisInfo = None

        try:
            analysisGroups = [json.loads(gr.replace("'", '"').replace("None", "null")) for gr in globs["analysisGroups"]["@{0}"]]
        except json.JSONDecodeError as e:
            print(f"Error decoding analysisGroups: {e}")
            error_line, pointer_line = get_error_context(e)
            print("Error position and surrounding text:")
            print(error_line)
            print(pointer_line)
            analysisGroups = None



        # extract analysis names 
        analysis_names = []
        analysis_uuid = []
        [(analysis_names.append(key.split("++")[0]),analysis_uuid.append(key.split("++")[1]) ) for key in analysisInfo.keys()]


        # print extracted results
        print("there are global dictionaries with additional information for each analysis")
        print("<analysisGroups> -> ", [list(curgrp.keys()) for curgrp in analysisGroups])
        print("<analysis_names> -> ", analysis_names)                       
        print("<analysis_uuid>  -> ", analysis_uuid)
    except Exception as e:  # catch exception as 'e'
        analysisInfo = None
        analysisGroups = None
        print("No GlOBALS FOUND")
        print(f"Error: {e}")  # print error description
  
    return analysisInfo, analysisGroups



#function to extract non geometry data from speckle 
def get_dataframe(objects_raw, return_original_df=False):
    """
    Creates a pandas DataFrame from a list of raw Speckle objects.

    Args:
        objects_raw (list): List of raw Speckle objects.
        return_original_df (bool, optional): If True, the function also returns the original DataFrame before any conversion to numeric. Defaults to False.

    Returns:
        pd.DataFrame or tuple: If return_original_df is False, returns a DataFrame where all numeric columns have been converted to their respective types, 
                               and non-numeric columns are left unchanged. 
                               If return_original_df is True, returns a tuple where the first item is the converted DataFrame, 
                               and the second item is the original DataFrame before conversion.

    This function iterates over the raw Speckle objects, creating a dictionary for each object that excludes the '@Geometry' attribute. 
    These dictionaries are then used to create a pandas DataFrame. 
    The function attempts to convert each column to a numeric type if possible, and leaves it unchanged if not. 
    Non-convertible values in numeric columns are replaced with their original values.
    """
    # dataFrame
    df_data = []
    # Iterate over speckle objects
    for obj_raw in objects_raw:
        obj = obj_raw.__dict__
        df_obj = {k: v for k, v in obj.items() if k != '@Geometry'}
        df_data.append(df_obj)

    # Create DataFrame and GeoDataFrame
    df = pd.DataFrame(df_data)
    # Convert columns to float or int if possible, preserving non-convertible values <-
    df_copy = df.copy()
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')
        df[col].fillna(df_copy[col], inplace=True)

    if return_original_df:
        return df, df_copy
    else:
        return df
    

def updateStreamAnalysis(
          client,
          new_data,
          stream_id,
          branch_name,
          geometryGroupPath=None,
          match_by_id="",
          openai_key ="",
          return_original = False
      ):
  

    """
    Updates Stream Analysis by modifying object attributes based on new data.

    Args:
        new_data (pandas.DataFrame): DataFrame containing new data.
        stream_id (str): Stream ID.
        branch_name (str): Branch name.
        geometry_group_path (list, optional): Path to geometry group. Defaults to ["@Data", "@{0}"].
        match_by_id (str, optional): key for column that should be used for matching. If empty, the index is used.
        openai_key (str, optional): OpenAI key. If empty no AI commit message is generated Defaults to an empty string.
        return_original (bool, optional): Determines whether to return original speckle stream objects. Defaults to False.

    Returns:
        list:  original speckle stream objects as backup if return_original is set to True.

    This function retrieves the latest commit from a specified branch, obtains the 
    necessary geometry objects, and matches new data with existing objects using 
    an ID mapper. The OpenAI GPT model is optionally used to create a commit summary 
    message. Changes are sent back to the server and a new commit is created, with 
    the original objects returned as a backup if return_original is set to True. 
    The script requires active server connection, necessary permissions, and relies 
    on Speckle and OpenAI's GPT model libraries.
    """
    print("1")
    if geometryGroupPath == None:
        geometryGroupPath = ["@Speckle", "Geometry"]

    branch = client.branch.get(stream_id, branch_name, 2)

    latest_commit = branch.commits.items[0]
    commitID = latest_commit.id 

    commit = client.commit.get(stream_id, commitID)

    # get objects
    transport = ServerTransport(client=client, stream_id=stream_id)

    #speckle stream
    res = operations.receive(commit.referencedObject, transport)

    # get geometry objects (they carry the attributes)
    objects_raw = res[geometryGroupPath[0]][geometryGroupPath[1]]
    res_new = copy.deepcopy(res)
    print("2")
    # map ids 
    id_mapper = {}
    if match_by_id != "":
        for i, obj in enumerate(objects_raw):
            id_mapper[obj[match_by_id]] = i
    else:
        for i, obj in enumerate(objects_raw):
            id_mapper[str(i)] = i
    print("3")
    # iterate through rows (objects)
    for index, row in new_data.iterrows():
        #determin target object 
        if match_by_id != "":
            local_id = row[match_by_id]
        else:
            local_id = index
        target_id = id_mapper[local_id]     

        #iterate through columns (attributes)
        for col_name in new_data.columns:
            res_new[geometryGroupPath[0]][geometryGroupPath[1]][target_id][col_name] = row[col_name]

    print("4")
    # ======================== OPEN AI FUN ===========================
    """
    try:
        try:
            answer_summary = gptCommitMessage(objects_raw, new_data,openai_key)
            if answer_summary == None:
                _, answer_summary = compareStats(get_dataframe(objects_raw),new_data)
        except:
            _, answer_summary = compareStats(get_dataframe(objects_raw),new_data)
    except:
        answer_summary = ""
    """
    answer_summary = ""
    # ================================================================
    print("5")
    new_objects_raw_speckle_id = operations.send(base=res_new, transports=[transport])
    print("6")
    # You can now create a commit on your stream with this object
    commit_id = client.commit.create(
        stream_id=stream_id,
        branch_name=branch_name,
        object_id=new_objects_raw_speckle_id,
        message="Updated item in colab -" + answer_summary,
        )
    print("7")
    print("Commit created!")
    if return_original:
        return objects_raw #as back-up

def custom_describe(df):
    # Convert columns to numeric if possible
    df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))

    # Initial describe with 'include = all'
    desc = df.describe(include='all')

    # Desired statistics
    desired_stats = ['count', 'unique', 'mean', 'min', 'max']

    # Filter for desired statistics
    result = desc.loc[desired_stats, :].copy()
    return result

def compareStats(df_before, df_after):
  """
    Compares the descriptive statistics of two pandas DataFrames before and after some operations.

    Args:
        df_before (pd.DataFrame): DataFrame representing the state of data before operations.
        df_after (pd.DataFrame): DataFrame representing the state of data after operations.

    Returns:
        The CSV string includes column name, intervention type, and before and after statistics for each column.
        The summary string provides a count of updated and new columns.

    This function compares the descriptive statistics of two DataFrames: 'df_before' and 'df_after'. 
    It checks the columns in both DataFrames and categorizes them as either 'updated' or 'new'.
    The 'updated' columns exist in both DataFrames while the 'new' columns exist only in 'df_after'.
    For 'updated' columns, it compares the statistics before and after and notes the differences.
    For 'new' columns, it lists the 'after' statistics and marks the 'before' statistics as 'NA'.
    The function provides a summary with the number of updated and new columns, 
    and a detailed account in CSV format of changes in column statistics.
  """
   
  desc_before = custom_describe(df_before)
  desc_after = custom_describe(df_after)

  # Get union of all columns
  all_columns = set(desc_before.columns).union(set(desc_after.columns))

  # Track number of updated and new columns
  updated_cols = 0
  new_cols = 0

  # Prepare DataFrame output
  output_data = []

  for column in all_columns:
      row_data = {'column': column}
      stat_diff = False  # Track if there's a difference in stats for a column

      # Check if column exists in both dataframes
      if column in desc_before.columns and column in desc_after.columns:
          updated_cols += 1
          row_data['interventionType'] = 'updated'
          for stat in desc_before.index:
              before_val = round(desc_before.loc[stat, column], 1) if pd.api.types.is_number(desc_before.loc[stat, column]) else desc_before.loc[stat, column]
              after_val = round(desc_after.loc[stat, column], 1) if pd.api.types.is_number(desc_after.loc[stat, column]) else desc_after.loc[stat, column]
              if before_val != after_val:
                  stat_diff = True
                  row_data[stat+'_before'] = before_val
                  row_data[stat+'_after'] = after_val
      elif column in desc_after.columns:
          new_cols += 1
          stat_diff = True
          row_data['interventionType'] = 'new'
          for stat in desc_after.index:
              row_data[stat+'_before'] = 'NA'
              after_val = round(desc_after.loc[stat, column], 1) if pd.api.types.is_number(desc_after.loc[stat, column]) else desc_after.loc[stat, column]
              row_data[stat+'_after'] = after_val

      # Only add to output_data if there's actually a difference in the descriptive stats between "before" and "after".
      if stat_diff:
          output_data.append(row_data)

  output_df = pd.DataFrame(output_data)
  csv_output = output_df.to_csv(index=False)
  print (output_df)
  # Add summary to beginning of output
  summary = f"Summary:\n  Number of updated columns: {updated_cols}\n  Number of new columns: {new_cols}\n\n"
  csv_output = summary + csv_output

  return csv_output, summary



# Function to call ChatGPT API
def ask_chatgpt(prompt, model="gpt-3.5-turbo", max_tokens=300, n=1, stop=None, temperature=0.3):
    import openai
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpfull assistant,."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=max_tokens,
        n=n,
        stop=stop,
        temperature=temperature,
    )
    return response.choices[0].message['content']




def gptCommitMessage(objects_raw, new_data,openai_key):
    # the idea is to automatically create commit messages. Commits coming through this channel are all
    # about updating or adding a dataTable. So we can compare the descriptive stats of a before and after
    # data frame 
    #try:
    try:
        import openai
        openai.api_key = openai_key
    except NameError as ne:
        if str(ne) == "name 'openai' is not defined":
            print("No auto commit message: openai module not imported. Please import the module before setting the API key.")
        elif str(ne) == "name 'openai_key' is not defined":
            print("No auto commit message: openai_key is not defined. Please define the variable before setting the API key.")
        else:
            raise ne

    report, summary = compareStats(get_dataframe(objects_raw),new_data)

    # prompt
    prompt = f"""Given the following changes in my tabular data structure, generate a 
    precise and informative commit message. The changes involve updating or adding 
    attribute keys and values. The provided summary statistics detail the changes in 
    the data from 'before' to 'after'. 
    The CSV format below demonstrates the structure of the summary:

    Summary:
    Number of updated columns: 2
    Number of new columns: 1
    column,interventionType,count_before,count_after,unique_before,unique_after,mean_before,mean_after,min_before,min_after,max_before,max_after
    A,updated,800,800,2,3,,nan,nan,nan,nan,nan
    B,updated,800,800,3,3,,nan,nan,nan,nan,nan
    C,new,NA,800,NA,4,NA,nan,NA,nan,NA,nan

    For the commit message, your focus should be on changes in the data structure, not the interpretation of the content. Be precise, state the facts, and highlight significant differences or trends in the statistics, such as shifts in mean values or an increase in unique entries.

    Based on the above guidance, draft a commit message using the following actual summary statistics:

    {report}

    Your commit message should follow this structure:

    1. Brief description of the overall changes.
    2. Significant changes in summary statistics (count, unique, mean, min, max).
    3. Conclusion, summarizing the most important findings with the strucutre:
    # changed columns: , comment: ,
    # added Columns:  , comment: ,
    # Chaged statistic: ,  coment: ,

    Mark the beginning of the conclusion with ">>>" and ensure to emphasize hard facts and significant findings. 
    """

    try:
        answer = ask_chatgpt(prompt)
        answer_summery = answer.split(">>>")[1]
        if answer == None:
            answer_summery = summary
    except:
        answer_summery = summary

    print(answer_summery)
    return answer_summery

def specklePolyline_to_BokehPatches(speckle_objs, pth_to_geo="curves", id_key="ids"):
  """
  Takes a list of speckle objects, extracts the polyline geometry at the specified path, and returns a dataframe of x and y coordinates for each polyline.
  This format is compatible with the Bokeh Patches object for plotting.
  
  Args:
    speckle_objs (list): A list of Speckle Objects
    pth_to_geo (str): Path to the geometry in the Speckle Object
    id_key (str): The key to use for the uuid in the dataframe. Defaults to "uuid"
    
  Returns:
    pd.DataFrame: A Pandas DataFrame with columns "uuid", "patches_x" and "patches_y"
  """
  patchesDict = {"uuid":[], "patches_x":[], "patches_y":[]}
  
  for obj in speckle_objs:
    obj_geo = obj[pth_to_geo]
    obj_pts = Polyline.as_points(obj_geo)
    coorX = []
    coorY = []
    for pt in obj_pts:
      coorX.append(pt.x)
      coorY.append(pt.y)
    
    patchesDict["patches_x"].append(coorX)
    patchesDict["patches_y"].append(coorY)
    patchesDict["uuid"].append(obj[id_key])

  return pd.DataFrame(patchesDict)



def rebuildAnalysisInfoDict(analysisInfo):
    """rebuild the analysisInfo dictionary to remove the ++ from the keys

    Args:
        analysisInfo (list): a list containing the analysisInfo dictionary

    Returns:
        dict: a dictionary containing the analysisInfo dictionary with keys without the ++

    """
    analysisInfoDict = {}
    for curKey in analysisInfo[0]:
        newkey = curKey.split("++")[0]
        analysisInfoDict[newkey] = analysisInfo[0][curKey]
    return analysisInfoDict


def specklePolyline2Patches(speckle_objs, pth_to_geo="curves", id_key=None):
    """
    Converts Speckle objects' polyline information into a format suitable for Bokeh patches.

    Args:
        speckle_objs (list): A list of Speckle objects.
        pth_to_geo (str, optional): The path to the polyline geometric information in the Speckle objects. Defaults to "curves".
        id_key (str, optional): The key for object identification. Defaults to "uuid".

    Returns:
        DataFrame: A pandas DataFrame with three columns - "uuid", "patches_x", and "patches_y". Each row corresponds to a Speckle object.
                    "uuid" column contains the object's identifier.
                    "patches_x" and "patches_y" columns contain lists of x and y coordinates of the polyline points respectively.

    This function iterates over the given Speckle objects, retrieves the polyline geometric information and the object's id from each Speckle object, 
    and formats this information into a format suitable for Bokeh or matplotlib patches. The formatted information is stored in a dictionary with three lists 
    corresponding to the "uuid", "patches_x", and "patches_y", and this dictionary is then converted into a pandas DataFrame.
    """
    patchesDict = {"patches_x":[], "patches_y":[]}
    if id_key != None:
        patchesDict[id_key] = []

    for obj in speckle_objs:
        obj_geo = obj[pth_to_geo]
        
        coorX = []
        coorY = []
        
        if isinstance(obj_geo, Mesh):
            # For meshes, we'll just use the vertices for now
            for pt in obj_geo.vertices:
                coorX.append(pt.x)
                coorY.append(pt.y)
        else:
            # For polylines, we'll use the existing logic
            obj_pts = Polyline.as_points(obj_geo)
            for pt in obj_pts:
                coorX.append(pt.x)
                coorY.append(pt.y)

        patchesDict["patches_x"].append(coorX)
        patchesDict["patches_y"].append(coorY)
        if id_key != None:
            patchesDict[id_key].append(obj[id_key])

    return pd.DataFrame(patchesDict)


#================= NOTION INTEGRATION ============================
headers = {
    "Notion-Version": "2022-06-28",
    "Content-Type": "application/json"
}

def get_page_id(token, database_id, name):
    headers['Authorization'] = "Bearer " + token
    # Send a POST request to the Notion API
    response = requests.post(f"https://api.notion.com/v1/databases/{database_id}/query", headers=headers)

    # Load the response data
    data = json.loads(response.text)

    # Check each page in the results
    for page in data['results']:
        # If the name matches, return the ID
        if page['properties']['name']['title'][0]['text']['content'] == name:
            return page['id']

    # If no match was found, return None
    return None

def add_or_update_page(token, database_id, name, type, time_updated, comment, speckle_link):
    # Format time_updated as a string 'YYYY-MM-DD'
    date_string = time_updated.strftime('%Y-%m-%d')

    # Construct the data payload
    data = {
        'parent': {'database_id': database_id},
        'properties': {
            'name': {'title': [{'text': {'content': name}}]},
            'type': {'rich_text': [{'text': {'content': type}}]},
            'time_updated': {'date': {'start': date_string}},
            'comment': {'rich_text': [{'text': {'content': comment}}]},
            'speckle_link': {'rich_text': [{'text': {'content': speckle_link}}]}
        }
    }

    # Check if a page with this name already exists
    page_id = get_page_id(token, database_id, name)

    headers['Authorization'] = "Bearer " + token
    if page_id:
        # If the page exists, send a PATCH request to update it
        response = requests.patch(f"https://api.notion.com/v1/pages/{page_id}", headers=headers, data=json.dumps(data))
    else:
        # If the page doesn't exist, send a POST request to create it
        response = requests.post("https://api.notion.com/v1/pages", headers=headers, data=json.dumps(data))
    
    print(response.text)

# Use the function
#add_or_update_page('your_token', 'your_database_id', 'New Title', 'New Type', datetime.now(), 'This is a comment', 'https://your-link.com')