Spaces:

serJD
/

speckleAggregateBranches

Sleeping

App Files Files Community

speckleAggregateBranches / utils.py

serJD

Update utils.py

d476945 verified over 1 year ago

raw

history blame contribute delete

8.07 kB

	import specklepy
	from specklepy.api.client import SpeckleClient
	from specklepy.api.credentials import get_default_account, get_local_accounts
	from specklepy.transports.server import ServerTransport
	from specklepy.api import operations
	from specklepy.objects.geometry import Polyline, Point, Mesh
	import json
	import pandas as pd
	import numpy as n
	from specklepy.api.wrapper import StreamWrapper
	import requests
	from datetime import datetime
	import copy

	def get_dataframe(objects_raw, return_original_df=False):
	"""
	Creates a pandas DataFrame from a list of raw Speckle objects.
	Args:
	objects_raw (list): List of raw Speckle objects.
	return_original_df (bool, optional): If True, the function also returns the original DataFrame before any conversion to numeric. Defaults to False.
	Returns:
	pd.DataFrame or tuple: If return_original_df is False, returns a DataFrame where all numeric columns have been converted to their respective types,
	and non-numeric columns are left unchanged.
	If return_original_df is True, returns a tuple where the first item is the converted DataFrame,
	and the second item is the original DataFrame before conversion.
	This function iterates over the raw Speckle objects, creating a dictionary for each object that excludes the '@Geometry' attribute.
	These dictionaries are then used to create a pandas DataFrame.
	The function attempts to convert each column to a numeric type if possible, and leaves it unchanged if not.
	Non-convertible values in numeric columns are replaced with their original values.
	"""
	# dataFrame
	df_data = []
	# Iterate over speckle objects
	for obj_raw in objects_raw:
	obj = obj_raw.__dict__
	df_obj = {k: v for k, v in obj.items() if k != '@Geometry'}
	df_data.append(df_obj)

	# Create DataFrame and GeoDataFrame
	df = pd.DataFrame(df_data)
	# Convert columns to float or int if possible, preserving non-convertible values <-
	df_copy = df.copy()
	for col in df.columns:
	df[col] = pd.to_numeric(df[col], errors='coerce')
	df[col].fillna(df_copy[col], inplace=True)

	if return_original_df:
	return df, df_copy
	else:
	return df

	def aggregate_data_optimized(df_a, df_b, uuid_col_name, ref_col_name, exclude_columns):
	# Ensure the uuid_col_name is included for the merging process
	columns_to_use = [col for col in df_a.columns if col not in exclude_columns or col == uuid_col_name]

	df_a_filtered = df_a[columns_to_use]

	# Perform the merge without adding suffixes, as we intend to overwrite existing columns in df_b
	df_merged = pd.merge(df_b, df_a_filtered, how='left', left_on=ref_col_name, right_on=uuid_col_name, suffixes=(None, '_y'))

	# Initialize a dictionary for logging
	log_dict = {
	'info': [],
	'warning': [],
	'summary': {}
	}

	# Logging matched and unmatched counts
	matched_count = df_merged[ref_col_name].notnull().sum()
	unmatched_count = df_b.shape[0] - matched_count

	log_dict['summary'] = {
	'matched_count': matched_count,
	'unmatched_count': unmatched_count,
	'total_rows_processed': df_b.shape[0]
	}
	log_dict['info'].append("Data aggregation completed successfully.")

	# Explicitly overwrite columns in df_b with those from df_a, based on the merge
	for col in columns_to_use:
	if col not in exclude_columns and col != uuid_col_name and f'{col}_y' in df_merged:
	df_merged[col] = df_merged.pop(f'{col}_y')

	# Drop any remaining '_y' columns that were not explicitly handled
	df_merged = df_merged.loc[:, ~df_merged.columns.str.endswith('_y')]

	# Additionally, if the uuid_col_name is not part of the original df_b columns and is only used for matching, it should be removed
	if uuid_col_name not in df_b.columns:
	df_merged.drop(columns=[uuid_col_name], inplace=True, errors='ignore')

	return df_merged, log_dict



	def updateStreamAnalysisFast(client, new_data, stream_id, branch_name, geometryGroupPath=None, match_by_id="", return_original=False, comm_message=""):
	if geometryGroupPath is None:
	geometryGroupPath = ["@Speckle", "Geometry"]

	branch = client.branch.get(stream_id, branch_name, 2)
	latest_commit = branch.commits.items[0]
	commit = client.commit.get(stream_id, latest_commit.id)
	transport = ServerTransport(client=client, stream_id=stream_id)
	res = operations.receive(commit.referencedObject, transport)
	objects_raw = res[geometryGroupPath[0]][geometryGroupPath[1]]

	# Pre-create a mapping from IDs to objects for faster lookup
	id_to_object_map = {obj[match_by_id]: obj for obj in objects_raw} if match_by_id else {i: obj for i, obj in enumerate(objects_raw)}

	# Pre-process DataFrame if match_by_id is provided
	if match_by_id:
	new_data.set_index(match_by_id, inplace=True)

	# Update objects in a more efficient way using .items()
	for local_id, updates in new_data.iterrows():
	target_object = id_to_object_map.get(str(local_id))
	if target_object:
	for col_name, value in updates.items():
	target_object[col_name] = value

	# Send updated objects back to Speckle
	new_objects_raw_speckle_id = operations.send(base=res, transports=[transport])
	commit_id = client.commit.create(stream_id=stream_id, branch_name=branch_name, object_id=new_objects_raw_speckle_id, message=comm_message + "#+SourceCommit: "+latest_commit.id)
	print("commit created")
	if return_original:
	return objects_raw # as back-up

	return commit_id




	def getSpeckleStream(stream_id,
	branch_name,
	client,
	commit_id=""
	):
	"""
	Retrieves data from a specific branch of a speckle stream.
	Args:
	stream_id (str): The ID of the speckle stream.
	branch_name (str): The name of the branch within the speckle stream.
	client (specklepy.api.client.Client, optional): A speckle client. Defaults to a global `client`.
	commit_id (str): id of a commit, if nothing is specified, the latest commit will be fetched
	Returns:
	dict: The speckle stream data received from the specified branch.
	This function retrieves the last commit from a specific branch of a speckle stream.
	It uses the provided speckle client to get the branch and commit information, and then
	retrieves the speckle stream data associated with the last commit.
	It prints out the branch details and the creation dates of the last three commits for debugging purposes.
	"""

	print("updated A")

	# set stream and branch
	try:
	branch = client.branch.get(stream_id, branch_name, 1)
	print(branch)
	except:
	branch = client.branch.get(stream_id, branch_name, 1)
	print(branch)

	print("branch info:", branch)
	#[print(ite.createdAt) for ite in branch.commits.items]

	if commit_id == "":
	latest_commit = branch.commits.items[0]
	choosen_commit_id = latest_commit.id
	commit = client.commit.get(stream_id, choosen_commit_id)
	print("latest commit ", branch.commits.items[0].createdAt, " was choosen")
	elif type(commit_id) == type("s"): # string, commit uuid
	choosen_commit_id = commit_id
	commit = client.commit.get(stream_id, choosen_commit_id)
	print("provided commit ", choosen_commit_id, " was choosen")
	elif type(commit_id) == type(1): #int
	latest_commit = branch.commits.items[commit_id]
	choosen_commit_id = latest_commit.id
	commit = client.commit.get(stream_id, choosen_commit_id)


	print(commit)
	print(commit.referencedObject)
	# get transport
	transport = ServerTransport(client=client, stream_id=stream_id)
	#speckle stream
	res = operations.receive(commit.referencedObject, transport)

	return res