Spaces:
Sleeping
Sleeping
File size: 8,070 Bytes
1906ba6 c6afc9c 1906ba6 a96e87c 1906ba6 a96e87c 1906ba6 a96e87c 1906ba6 a96e87c 62088b4 1906ba6 3c3cf55 1906ba6 3c3cf55 1906ba6 62088b4 1906ba6 bef6059 d476945 e8eca20 3c3cf55 bef6059 7a11a5a bef6059 7a11a5a bef6059 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import specklepy
from specklepy.api.client import SpeckleClient
from specklepy.api.credentials import get_default_account, get_local_accounts
from specklepy.transports.server import ServerTransport
from specklepy.api import operations
from specklepy.objects.geometry import Polyline, Point, Mesh
import json
import pandas as pd
import numpy as n
from specklepy.api.wrapper import StreamWrapper
import requests
from datetime import datetime
import copy
def get_dataframe(objects_raw, return_original_df=False):
"""
Creates a pandas DataFrame from a list of raw Speckle objects.
Args:
objects_raw (list): List of raw Speckle objects.
return_original_df (bool, optional): If True, the function also returns the original DataFrame before any conversion to numeric. Defaults to False.
Returns:
pd.DataFrame or tuple: If return_original_df is False, returns a DataFrame where all numeric columns have been converted to their respective types,
and non-numeric columns are left unchanged.
If return_original_df is True, returns a tuple where the first item is the converted DataFrame,
and the second item is the original DataFrame before conversion.
This function iterates over the raw Speckle objects, creating a dictionary for each object that excludes the '@Geometry' attribute.
These dictionaries are then used to create a pandas DataFrame.
The function attempts to convert each column to a numeric type if possible, and leaves it unchanged if not.
Non-convertible values in numeric columns are replaced with their original values.
"""
# dataFrame
df_data = []
# Iterate over speckle objects
for obj_raw in objects_raw:
obj = obj_raw.__dict__
df_obj = {k: v for k, v in obj.items() if k != '@Geometry'}
df_data.append(df_obj)
# Create DataFrame and GeoDataFrame
df = pd.DataFrame(df_data)
# Convert columns to float or int if possible, preserving non-convertible values <-
df_copy = df.copy()
for col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
df[col].fillna(df_copy[col], inplace=True)
if return_original_df:
return df, df_copy
else:
return df
def aggregate_data_optimized(df_a, df_b, uuid_col_name, ref_col_name, exclude_columns):
# Ensure the uuid_col_name is included for the merging process
columns_to_use = [col for col in df_a.columns if col not in exclude_columns or col == uuid_col_name]
df_a_filtered = df_a[columns_to_use]
# Perform the merge without adding suffixes, as we intend to overwrite existing columns in df_b
df_merged = pd.merge(df_b, df_a_filtered, how='left', left_on=ref_col_name, right_on=uuid_col_name, suffixes=(None, '_y'))
# Initialize a dictionary for logging
log_dict = {
'info': [],
'warning': [],
'summary': {}
}
# Logging matched and unmatched counts
matched_count = df_merged[ref_col_name].notnull().sum()
unmatched_count = df_b.shape[0] - matched_count
log_dict['summary'] = {
'matched_count': matched_count,
'unmatched_count': unmatched_count,
'total_rows_processed': df_b.shape[0]
}
log_dict['info'].append("Data aggregation completed successfully.")
# Explicitly overwrite columns in df_b with those from df_a, based on the merge
for col in columns_to_use:
if col not in exclude_columns and col != uuid_col_name and f'{col}_y' in df_merged:
df_merged[col] = df_merged.pop(f'{col}_y')
# Drop any remaining '_y' columns that were not explicitly handled
df_merged = df_merged.loc[:, ~df_merged.columns.str.endswith('_y')]
# Additionally, if the uuid_col_name is not part of the original df_b columns and is only used for matching, it should be removed
if uuid_col_name not in df_b.columns:
df_merged.drop(columns=[uuid_col_name], inplace=True, errors='ignore')
return df_merged, log_dict
def updateStreamAnalysisFast(client, new_data, stream_id, branch_name, geometryGroupPath=None, match_by_id="", return_original=False, comm_message=""):
if geometryGroupPath is None:
geometryGroupPath = ["@Speckle", "Geometry"]
branch = client.branch.get(stream_id, branch_name, 2)
latest_commit = branch.commits.items[0]
commit = client.commit.get(stream_id, latest_commit.id)
transport = ServerTransport(client=client, stream_id=stream_id)
res = operations.receive(commit.referencedObject, transport)
objects_raw = res[geometryGroupPath[0]][geometryGroupPath[1]]
# Pre-create a mapping from IDs to objects for faster lookup
id_to_object_map = {obj[match_by_id]: obj for obj in objects_raw} if match_by_id else {i: obj for i, obj in enumerate(objects_raw)}
# Pre-process DataFrame if match_by_id is provided
if match_by_id:
new_data.set_index(match_by_id, inplace=True)
# Update objects in a more efficient way using .items()
for local_id, updates in new_data.iterrows():
target_object = id_to_object_map.get(str(local_id))
if target_object:
for col_name, value in updates.items():
target_object[col_name] = value
# Send updated objects back to Speckle
new_objects_raw_speckle_id = operations.send(base=res, transports=[transport])
commit_id = client.commit.create(stream_id=stream_id, branch_name=branch_name, object_id=new_objects_raw_speckle_id, message=comm_message + "#+SourceCommit: "+latest_commit.id)
print("commit created")
if return_original:
return objects_raw # as back-up
return commit_id
def getSpeckleStream(stream_id,
branch_name,
client,
commit_id=""
):
"""
Retrieves data from a specific branch of a speckle stream.
Args:
stream_id (str): The ID of the speckle stream.
branch_name (str): The name of the branch within the speckle stream.
client (specklepy.api.client.Client, optional): A speckle client. Defaults to a global `client`.
commit_id (str): id of a commit, if nothing is specified, the latest commit will be fetched
Returns:
dict: The speckle stream data received from the specified branch.
This function retrieves the last commit from a specific branch of a speckle stream.
It uses the provided speckle client to get the branch and commit information, and then
retrieves the speckle stream data associated with the last commit.
It prints out the branch details and the creation dates of the last three commits for debugging purposes.
"""
print("updated A")
# set stream and branch
try:
branch = client.branch.get(stream_id, branch_name, 1)
print(branch)
except:
branch = client.branch.get(stream_id, branch_name, 1)
print(branch)
print("branch info:", branch)
#[print(ite.createdAt) for ite in branch.commits.items]
if commit_id == "":
latest_commit = branch.commits.items[0]
choosen_commit_id = latest_commit.id
commit = client.commit.get(stream_id, choosen_commit_id)
print("latest commit ", branch.commits.items[0].createdAt, " was choosen")
elif type(commit_id) == type("s"): # string, commit uuid
choosen_commit_id = commit_id
commit = client.commit.get(stream_id, choosen_commit_id)
print("provided commit ", choosen_commit_id, " was choosen")
elif type(commit_id) == type(1): #int
latest_commit = branch.commits.items[commit_id]
choosen_commit_id = latest_commit.id
commit = client.commit.get(stream_id, choosen_commit_id)
print(commit)
print(commit.referencedObject)
# get transport
transport = ServerTransport(client=client, stream_id=stream_id)
#speckle stream
res = operations.receive(commit.referencedObject, transport)
return res |