Spaces:

SHEN1017
/

Citelab

Sleeping

App Files Files Community

Citelab / parser.py

SHEN1017

Upload 97 files

96b6673 verified 22 days ago

raw

history blame contribute delete

16.4 kB

	import hashlib
	import copy
	import json
	from citekit.prompt.prompt import Prompt, ALCEDocPrompt,DocPrompt,NewALCEVanillaPrompt

	try:
	example_results = json.load(open('res.json'))
	except:
	example_results = []


	def weight_check(module, target):
	if module.model_type == 'verifier':
	test_module = copy.deepcopy(module)
	test_module.last_message = True
	true_target = test_module.send()
	if str(target) == str(true_target) or ('output' in str(target).lower() and 'output' in str(true_target).lower()):
	return 1
	test_module.last_message = False
	true_target = test_module.send()
	if str(target) == str(true_target) or ('output' in str(target).lower() and 'output' in str(true_target).lower()):
	return -1

	return 0

	def get_params(module):
	import copy
	componets = copy.deepcopy(module.prompt_maker.components if hasattr(module, 'prompt_maker') and hasattr(module.prompt_maker, 'components') else None)
	if componets:
	if isinstance(componets, tuple):
	componets = componets[0]
	componets.update(module.self_prompt)
	destination = module.get_destinations()
	if len(destination) == 1:
	destination = str(destination[0])
	elif not destination:
	destination = 'N/A'
	else:
	destination = 'Not Available for multiple destinations'

	if hasattr(module, 'iterative'):
	mode = 'iterative' if module.iterative else ('parallel' if module.parallel else None)
	else:
	mode = None

	if hasattr(module, 'if_add_output_to_head') and hasattr(module, 'head_key'):
	global_prompt = 'N/A' if module.if_add_output_to_head == False else module.head_key
	else:
	global_prompt = 'N/A'


	params = {
	'type': module.model_type,
	'Model': getattr(module, 'model_name', None),
	'Mode': mode,
	'Max Turn': getattr(module, 'max_turn', None),
	'Destination': destination,
	'Prompt': module.prompt_maker.make_prompt(componets) if hasattr(module,'prompt_maker') and module.prompt_maker else None,
	'Global Prompt': global_prompt,
	}
	print(params)
	non_empty_params = {}
	for k, v in params.items():
	if v:
	non_empty_params[k] = v
	return non_empty_params

	class PipelineGraph:

	def __init__(self, pipeline = None):
	self.pipeline = pipeline
	self.nodes = {'input': {}, 'output': {}}
	self.edges = {} # 邻接表：{from_node: {to_node: weight}}
	self.node_count = 0
	if pipeline:
	self.load_pipeline(pipeline)
	print('FINDING ATTR:', [str(module) for module in pipeline.module])

	def get_auto_node_name(self):
	"""生成自动节点名。"""
	self.node_count += 1
	return f"Node{self.node_count}"

	def update(self):
	self.__init__(pipeline=self.pipeline)

	def load_pipeline(self, pipeline):
	"""从现有Pipeline对象加载图。"""
	initial_module = pipeline.get_initial_module()
	processed_nodes = []
	if not initial_module:
	initial_module = pipeline.llm
	self.add_node(name = str(initial_module),
	params = get_params(module=initial_module))
	self.add_edge(from_node='input', to_node=str(initial_module), weight=0)
	processed_nodes.append(initial_module)
	subnodes = initial_module.get_destinations()
	if not subnodes:
	self.add_edge(from_node=str(initial_module), to_node='output', weight=0)
	return
	# print([str(subnode) for subnode in subnodes])
	for subnode in subnodes:
	weight = weight_check(initial_module, subnode)
	self.load_node(subnode, processed_nodes=processed_nodes)
	self.add_edge(from_node=str(initial_module), to_node=str(subnode), weight=0)
	if hasattr(initial_module,'output_cond') and initial_module.output_cond:
	weight = weight_check(initial_module, 'output')
	self.add_edge(from_node=str(initial_module), to_node='output', weight=weight)


	def load_node(self, module, processed_nodes = None):
	if module in processed_nodes:
	return
	self.add_node(name = str(module),
	params = get_params(module=module))
	processed_nodes.append(module)
	subnodes = module.get_destinations()
	if not subnodes:
	self.add_edge(from_node=str(module), to_node='output', weight=0)
	return
	for subnode in subnodes:
	self.load_node(subnode, processed_nodes)
	weight = weight_check(module, subnode)
	self.add_edge(from_node=str(module), to_node=str(subnode), weight=weight)
	if hasattr(module,'output_cond') and module.output_cond:
	weight = weight_check(module, 'output')
	self.add_edge(from_node=str(module), to_node='output', weight=0)


	def export(self):
	"""将图结构导出为字典格式，包含节点和边的信息。"""
	nodes_export = {name: params.copy() for name, params in self.nodes.items()}
	edges_export = []
	for from_node, connections in self.edges.items():
	for to_node, weight in connections.items():
	edges_export.append({
	'from': from_node,
	'to': to_node,
	'weight': weight
	})
	return {
	'nodes': nodes_export,
	'edges': edges_export
	}

	@classmethod
	def import_from_dict(cls, data):
	"""从字典数据创建新的PipelineGraph实例。"""
	graph = cls() # 创建新实例，此时包含默认的input/output节点
	# 清空默认节点（假设导入数据包含完整节点信息）
	graph.nodes = {}
	# 导入节点
	for name, params in data['nodes'].items():
	graph.add_node(name, params)
	# 导入边
	for edge in data['edges']:
	graph.add_edge(edge['from'], edge['to'], edge['weight'])
	return graph

	def add_node(self, name, params=None):
	"""添加或更新节点，参数可选。"""
	if params is None:
	params = {}
	self.nodes[name] = params.copy()

	def remove_node(self, name):
	"""删除节点，自动移除相关边。不能删除input/output节点。"""
	if name in ['input', 'output']:
	raise ValueError("Cannot remove 'input' or 'output' nodes.")
	if name not in self.nodes:
	return
	del self.nodes[name]
	# 移除该节点作为起点的边
	if name in self.edges:
	del self.edges[name]
	# 移除其他节点到该节点的边
	for from_node in list(self.edges.keys()):
	if name in self.edges[from_node]:
	del self.edges[from_node][name]
	# 若该节点无其他出边，删除空字典以保持整洁
	if not self.edges[from_node]:
	del self.edges[from_node]

	def add_edge(self, from_node, to_node, weight):
	"""添加或更新边，权重必须是0、-1或+1。"""
	if weight not in {0, -1, 1}:
	raise ValueError("Weight must be 0, -1, or 1.")
	if from_node not in self.nodes:
	raise KeyError(f"Node '{from_node}' does not exist.")
	if to_node not in self.nodes:
	raise KeyError(f"Node '{to_node}' does not exist.")
	if from_node not in self.edges:
	self.edges[from_node] = {}
	self.edges[from_node][to_node] = weight

	def remove_edge(self, from_node, to_node):
	"""删除指定边。"""
	if from_node in self.edges and to_node in self.edges[from_node]:
	del self.edges[from_node][to_node]
	# 清理空字典
	if not self.edges[from_node]:
	del self.edges[from_node]

	def get_nodes(self):
	"""返回所有节点名。"""
	return list(self.nodes.keys())

	def get_edges(self):
	"""返回所有边的列表，形式为(from, to, weight)。"""
	edges = []
	for from_node in self.edges:
	for to_node, weight in self.edges[from_node].items():
	edges.append((from_node, to_node, weight))
	return edges

	def update_node_params(self, name, params):
	"""更新节点的参数表。"""
	if name not in self.nodes:
	raise KeyError(f"Node '{name}' does not exist.")
	self.nodes[name].update(params.copy())

	def visualize(self, use_graphviz=True):
	"""
	可视化图结构：
	- 默认尝试用graphviz生成矢量图（需安装graphviz）
	- 若未安装graphviz，则用文本模式输出
	"""
	# 尝试用graphviz生成专业图表
	if use_graphviz:
	try:
	import graphviz
	except ImportError:
	print("Graphviz未安装，切换到文本模式。安装命令：pip install graphviz")
	return self._visualize_text()

	# 创建有向图，设置排版方向为从左到右
	dot = graphviz.Digraph(comment='Pipeline Graph', graph_attr={'rankdir': 'LR'})

	# 添加所有节点（包含参数信息）
	for node_name in self.nodes:
	params = self.nodes[node_name]
	# 将参数字典转换为易读字符串（例如 "size=2, color=red"）
	params_str = ", ".join([f"{k}={v}" for k, v in params.items()])

	# 特殊样式标记input/output节点
	if node_name in ('input', 'output'):
	dot.node(
	node_name,
	label=f"{node_name}\n({params_str})" if params_str else node_name,
	shape='doublecircle', # 双圆表示端点
	style='filled', # 填充颜色
	fillcolor='#e6f3ff' # 浅蓝色背景
	)
	else:
	dot.node(
	node_name,
	label=f"{node_name}\n({params_str})" if params_str else node_name,
	shape='box', # 方框表示普通节点
	style='rounded,filled',# 圆角填充
	fillcolor='#f0f0f0' # 浅灰色背景
	)

	# 添加所有带权重的边
	for from_node, connections in self.edges.items():
	for to_node, weight in connections.items():
	dot.edge(
	from_node,
	to_node,
	label=str(weight), # 显示权重
	color='#666666', # 边颜色
	fontcolor='#ff5555' # 权重文字颜色
	)

	# 生成并自动打开可视化文件（Mac默认用Preview打开PDF）
	dot.render('pipeline_graph', view=True, cleanup=True)
	return

	# 文本模式回退方案
	self._visualize_text()

	def _visualize_text(self):
	"""纯文本模式的可视化（备用方案）"""
	print("="40 + "\nPipeline Graph 文本视图\n" + "="40)

	# 列出所有节点及其参数
	print("\n[节点列表]")
	for node, params in self.nodes.items():
	param_desc = " ".join([f"{k}={v}" for k, v in params.items()])
	print(f"· {node:10} {param_desc}")

	# 列出所有边及其权重
	print("\n[边列表]")
	if not self.edges:
	print("（暂无边连接）")
	else:
	for from_node, connections in self.edges.items():
	for to_node, weight in connections.items():
	arrow = {
	1: "──(+)──>", # +1权重用(+)表示
	-1: "──(-)──>", # -1权重用(-)表示
	0: "───────>" # 0权重无特殊标记
	}[weight]
	print(f"{from_node:10} {arrow} {to_node}")
	print("="*40 + "\n")



	def generate_html(self, results = example_results):
	assert isinstance(results, str)

	import json
	result_path = results
	results = json.load(open(results))
	"""生成交互式可视化HTML，使用D3.js力导向图布局"""
	nodes_data = []
	for name in self.nodes:
	params = self.nodes[name]
	nodes_data.append({
	"id": name,
	"type": params.get("type", name),
	"params": params
	})

	edges_data = []
	for from_node, to_nodes in self.edges.items():
	for to_node, weight in to_nodes.items():
	edges_data.append({
	"source": from_node,
	"target": to_node,
	"weight": weight
	})

	nodes_js = json.dumps(nodes_data)
	edges_js = json.dumps(edges_data)

	with open('htmls/html_templates/pipeline_template.txt', 'r', encoding='utf-8') as f:
	template = f.read()

	template = template.replace('<NODE_JS>', nodes_js).replace('<EDGE_JS>', edges_js).replace('<FILE_PATH>', result_path).replace('<OPTIONS>', "".join(f'<option value="{i}">Result {i+1}</option>' for i in range(len(results))))
	return template


	def get_nodes(self):
	nodes_data = []
	for name in self.nodes:
	params = self.nodes[name]
	nodes_data.append({
	"id": name,
	"type": params.get("type", name),
	"params": params
	})
	return json.dumps(nodes_data)

	def get_edges(self):
	edges_data = []
	for from_node, to_nodes in self.edges.items():
	for to_node, weight in to_nodes.items():
	edges_data.append({
	"source": from_node,
	"target": to_node,
	"weight": weight
	})
	return json.dumps(edges_data)

	def get_json(self):
	return {
	'nodes': self.get_nodes(),
	'edges': self.get_edges()
	}

	def generate_html_embed(self, results = example_results):
	assert isinstance(results, str) or isinstance(results, list)

	import json
	result_path = results
	if isinstance(results, str):
	results = json.load(open(results))
	"""生成交互式可视化HTML，使用D3.js力导向图布局"""
	nodes_data = []
	for name in self.nodes:
	params = self.nodes[name]
	nodes_data.append({
	"id": name,
	"type": params.get("type", name),
	"params": params
	})

	edges_data = []
	for from_node, to_nodes in self.edges.items():
	for to_node, weight in to_nodes.items():
	edges_data.append({
	"source": from_node,
	"target": to_node,
	"weight": weight
	})

	nodes_js = json.dumps(nodes_data)
	edges_js = json.dumps(edges_data)

	with open('htmls/html_templates/pipeline_template_emb.txt', 'r', encoding='utf-8') as f:
	template = f.read()

	template = template.replace('<NODE_JS>', nodes_js).replace('<EDGE_JS>', edges_js).replace('<RESULTS>', json.dumps(results)).replace('<OPTIONS>', "".join(f'<option value="{i}">Result {i+1}</option>' for i in range(len(results))))
	return template

	if __name__ == '__main__':
	# 初始化图
	graph = PipelineGraph()

	# 添加节点
	graph.add_node('processor', {'type': 'transform', 'rate': 0.8})
	graph.add_node('validator', {'threshold': 0.5})

	# 添加边
	graph.add_edge('input', 'processor', 1)
	graph.add_edge('processor', 'validator', -1)
	graph.add_edge('validator', 'output', 0)

	# 更新参数
	graph.update_node_params('processor', {'rate': 1.0})

	graph.visualize()