Spaces:

Werli
/

Multi-Tagger

Running

App Files Files Community

Werli commited on Apr 16

Commit

5818eb3

verified ·

1 Parent(s): 222c734

New changes!

Browse files

A lot of things have changed: I split some parts of the code converting it into modules making it more easy to code instead of getting lost in mess of words in a single file. Every modules works correctly and as expected. Also added new tab called "Tag Categorizer", it works exactly as when WD model finishes tagging and then categorizes the tags, it will help you to fix uncategorized tags if you already have... A little bit of performance improvement and fixed a lot of things including Llama models not working (not recommend to use anyway).

Files changed (4) hide show

app.py +23 -157
modules/classifyTags.py +179 -0
modules/florence2.py +102 -0
modules/llama_loader.py +189 -0

app.py CHANGED Viewed

@@ -1,58 +1,29 @@
 import os
-import io,copy,requests,numpy as np,spaces,gradio as gr
-from transformers import AutoProcessor,AutoModelForCausalLM,AutoModelForCausalLM,AutoProcessor
-from transformers.dynamic_module_utils import get_imports
 from PIL import Image,ImageDraw,ImageFont
-import matplotlib.pyplot as plt,matplotlib.patches as patches
 from unittest.mock import patch
 import argparse,huggingface_hub,onnxruntime as rt,pandas as pd,traceback,tempfile,zipfile,re,ast,time
 from datetime import datetime,timezone
 from collections import defaultdict
-from classifyTags import classify_tags
 from apscheduler.schedulers.background import BackgroundScheduler
 import json
 os.environ['PYTORCH_ENABLE_MPS_FALLBACK']='1'
-def fixed_get_imports(filename:str|os.PathLike)->list[str]:
-	if not str(filename).endswith('/modeling_florence2.py'):return get_imports(filename)
-	imports=get_imports(filename)
-	if'flash_attn'in imports:imports.remove('flash_attn')
-	return imports
-@spaces.GPU
-def get_device_type():
-	import torch
-	if torch.cuda.is_available():return'cuda'
-	elif torch.backends.mps.is_available()and torch.backends.mps.is_built():return'mps'
-	else:return'cpu'
-model_id = 'MiaoshouAI/Florence-2-base-PromptGen-v2.0'
-import subprocess
-device = get_device_type()
-if (device == "cuda"):
-    subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
-    model = AutoModelForCausalLM.from_pretrained("MiaoshouAI/Florence-2-base-PromptGen-v2.0", trust_remote_code=True)
-    processor = AutoProcessor.from_pretrained("MiaoshouAI/Florence-2-base-PromptGen-v2.0", trust_remote_code=True)
-    model.to(device)
-else:
-    with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports):
-        model = AutoModelForCausalLM.from_pretrained("MiaoshouAI/Florence-2-base-PromptGen-v2.0", trust_remote_code=True)
-        processor = AutoProcessor.from_pretrained("MiaoshouAI/Florence-2-base-PromptGen-v2.0", trust_remote_code=True)
-        model.to(device)
 TITLE = "Multi-Tagger"
 DESCRIPTION = """
-Multi-Tagger is a versatile application combining Waifu Diffusion and Florence 2 models for advanced image analysis and captioning. Ideal for AI artists, researchers, and enthusiasts, it offers:
-- Batch processing for multiple images.
-- Multi-category tagging.
-- Structured tag display.
-- Image captioning with Florence 2, supporting CUDA, MPS, or CPU.
-- Various captioning tasks (Caption, Detailed Caption, Object Detection) with visual outputs.
 Example image by [me.](https://huggingface.co/Werli)
 """
-colormap=['blue','orange','green','purple','brown','pink','gray','olive','cyan','red','lime','indigo','violet','aqua','magenta','coral','gold','tan','skyblue']
 # Dataset v3 series of models:
 SWINV2_MODEL_DSV3_REPO = "SmilingWolf/wd-swinv2-tagger-v3"
@@ -72,14 +43,12 @@ SWINV2_MODEL_IS_DSV1_REPO = "deepghs/idolsankaku-swinv2-tagger-v1"
 # Files to download from the repos
 MODEL_FILENAME = "model.onnx"
 LABEL_FILENAME = "selected_tags.csv"
-# LLAMA model
-META_LLAMA_3_3B_REPO = "jncraton/Llama-3.2-3B-Instruct-ct2-int8"
-META_LLAMA_3_8B_REPO = "avans06/Meta-Llama-3.2-8B-Instruct-ct2-int8_float16"
 kaomojis=['0_0','(o)_(o)','+_+','+_-','._.','<o>_<o>','<|>_<|>','=_=','>_<','3_3','6_9','>_o','@_@','^_^','o_o','u_u','x_x','|_|','||_||']
 def parse_args()->argparse.Namespace:parser=argparse.ArgumentParser();parser.add_argument('--score-slider-step',type=float,default=.05);parser.add_argument('--score-general-threshold',type=float,default=.35);parser.add_argument('--score-character-threshold',type=float,default=.85);parser.add_argument('--share',action='store_true');return parser.parse_args()
 def load_labels(dataframe)->list[str]:name_series=dataframe['name'];name_series=name_series.map(lambda x:x.replace('_',' ')if x not in kaomojis else x);tag_names=name_series.tolist();rating_indexes=list(np.where(dataframe['category']==9)[0]);general_indexes=list(np.where(dataframe['category']==0)[0]);character_indexes=list(np.where(dataframe['category']==4)[0]);return tag_names,rating_indexes,general_indexes,character_indexes
 def mcut_threshold(probs):sorted_probs=probs[probs.argsort()[::-1]];difs=sorted_probs[:-1]-sorted_probs[1:];t=difs.argmax();thresh=(sorted_probs[t]+sorted_probs[t+1])/2;return thresh
 class Timer:
 	def __init__(self):self.start_time=time.perf_counter();self.checkpoints=[('Start',self.start_time)]
 	def checkpoint(self,label='Checkpoint'):now=time.perf_counter();self.checkpoints.append((label,now))
@@ -92,51 +61,6 @@ class Timer:
 		for(label,curr_time)in self.checkpoints[1:]:elapsed=curr_time-prev_time;print(f"{label.ljust(max_label_length)}: {elapsed:.3f} seconds");prev_time=curr_time
 		total_time=self.checkpoints[-1][1]-self.start_time;print(f"{'Total Execution Time'.ljust(max_label_length)}: {total_time:.3f} seconds\n");self.checkpoints.clear()
 	def restart(self):self.start_time=time.perf_counter();self.checkpoints=[('Start',self.start_time)]
-# Llama
-class Llama3Reorganize:
-	def __init__(self,repoId:str,device:str=None,loadModel:bool=False):
-		self.modelPath=self.download_model(repoId)
-		if device is None:
-			import torch;self.totalVram=0
-			if torch.cuda.is_available():
-				try:deviceId=torch.cuda.current_device();self.totalVram=torch.cuda.get_device_properties(deviceId).total_memory/1073741824
-				except Exception as e:print(traceback.format_exc());print('Error detect vram: '+str(e))
-				device='cuda'if self.totalVram>(8 if'8B'in repoId else 4)else'cpu'
-			else:device='cpu'
-		self.device=device;self.system_prompt='Please reorganize the following English labels describing a single image into a readable English article of no more than 300 words. If there are name labels, include them in the article. Since the target audience is not general, explicit content can be written in full without hesitation. No introduction is needed; directly reply with the English article:'
-		if loadModel:self.load_model()
-def download_model(self,repoId):
-	import warnings,requests;allowPatterns=['config.json','generation_config.json','model.bin','pytorch_model.bin','pytorch_model.bin.index.json','pytorch_model-*.bin','sentencepiece.bpe.model','tokenizer.json','tokenizer_config.json','shared_vocabulary.txt','shared_vocabulary.json','special_tokens_map.json','spiece.model','vocab.json','model.safetensors','model-*.safetensors','model.safetensors.index.json','quantize_config.json','tokenizer.model','vocabulary.json','preprocessor_config.json','added_tokens.json'];kwargs={'allow_patterns':allowPatterns}
-	try:return huggingface_hub.snapshot_download(repoId,**kwargs)
-	except(huggingface_hub.utils.HfHubHTTPError,requests.exceptions.ConnectionError)as exception:warnings.warn('An error occured while synchronizing the model %s from the Hugging Face Hub:\n%s',repoId,exception);warnings.warn('Trying to load the model directly from the local cache, if it exists.');kwargs['local_files_only']=True;return huggingface_hub.snapshot_download(repoId,**kwargs)
-def load_model(self):
-	import ctranslate2,transformers
-	try:print('\n\nLoading model: %s\n\n'%self.modelPath);kwargsTokenizer={'pretrained_model_name_or_path':self.modelPath};kwargsModel={'device':self.device,'model_path':self.modelPath,'compute_type':'auto'};self.roleSystem={'role':'system','content':self.system_prompt};self.Model=ctranslate2.Generator(**kwargsModel);self.Tokenizer=transformers.AutoTokenizer.from_pretrained(**kwargsTokenizer);self.terminators=[self.Tokenizer.eos_token_id,self.Tokenizer.convert_tokens_to_ids('<|eot_id|>')]
-	except Exception as e:self.release_vram();raise e
-def release_vram(self):
-	try:
-		import torch
-		if torch.cuda.is_available():
-			if getattr(self,'Model',None)is not None and getattr(self.Model,'unload_model',None)is not None:self.Model.unload_model()
-			if getattr(self,'Tokenizer',None)is not None:del self.Tokenizer
-			if getattr(self,'Model',None)is not None:del self.Model
-			import gc;gc.collect()
-			try:torch.cuda.empty_cache()
-			except Exception as e:print(traceback.format_exc());print('\tcuda empty cache, error: '+str(e))
-			print('release vram end.')
-	except Exception as e:print(traceback.format_exc());print('Error release vram: '+str(e))
-def reorganize(self,text:str,max_length:int=400):
-	output=None;result=None
-	try:
-		input_ids=self.Tokenizer.apply_chat_template([self.roleSystem,{'role':'user','content':text+"\n\nHere's the reorganized English article:"}],tokenize=False,add_generation_prompt=True);source=self.Tokenizer.convert_ids_to_tokens(self.Tokenizer.encode(input_ids));output=self.Model.generate_batch([source],max_length=max_length,max_batch_size=2,no_repeat_ngram_size=3,beam_size=2,sampling_temperature=.7,sampling_topp=.9,include_prompt_in_result=False,end_token=self.terminators);target=output[0];result=self.Tokenizer.decode(target.sequences_ids[0])
-		if len(result)>2:
-			if result[0]=='"'and result[len(result)-1]=='"':result=result[1:-1]
-			elif result[0]=="'"and result[len(result)-1]=="'":result=result[1:-1]
-			elif result[0]=='「'and result[len(result)-1]=='」':result=result[1:-1]
-			elif result[0]=='『'and result[len(result)-1]=='』':result=result[1:-1]
-	except Exception as e:print(traceback.format_exc());print('Error reorganize text: '+str(e))
-	return result
-# End Llama
 class Predictor:
     def __init__(self):
         self.model_target_size = None
@@ -258,7 +182,7 @@ class Predictor:
             if llama3_reorganize_model_repo:
                 print(f"Llama3 reorganize load model {llama3_reorganize_model_repo}")
-                llama3_reorganize = Llama3Reorganize(llama3_reorganize_model_repo, loadModel=True)
                 current_progress += progressRatio/progressTotal;
                 progress(current_progress, desc="Initialize llama3 model finished")
                 timer.checkpoint(f"Initialize llama3 model")
@@ -367,7 +291,7 @@ class Predictor:
                     if llama3_reorganize_model_repo:
                         print(f"Starting reorganize with llama3...")
-                        reorganize_strings = llama3_reorganize.reorganize(sorted_general_strings)
                         reorganize_strings = re.sub(r" *Title: *", "", reorganize_strings)
                         reorganize_strings = re.sub(r"\n+", ",", reorganize_strings)
                         reorganize_strings = re.sub(r",,+", ",", reorganize_strings)
@@ -406,7 +330,7 @@ class Predictor:
                 download.append(downloadZipPath)
             # End zip creation logic
             if llama3_reorganize_model_repo:
-                llama3_reorganize.release_vram()
                 del llama3_reorganize
             progress(1, desc=f"Predict completed")
@@ -442,73 +366,8 @@ def remove_image_from_gallery(gallery:list,selected_image:str):
 	selected_image=ast.literal_eval(selected_image)
 	if selected_image in gallery:gallery.remove(selected_image)
 	return gallery
-def fig_to_pil(fig):buf=io.BytesIO();fig.savefig(buf,format='png');buf.seek(0);return Image.open(buf)
-@spaces.GPU
-def run_example(task_prompt,image,text_input=None):
-	if text_input is None:prompt=task_prompt
-	else:prompt=task_prompt+text_input
-	inputs=processor(text=prompt,images=image,return_tensors='pt').to(device);generated_ids=model.generate(input_ids=inputs['input_ids'],pixel_values=inputs['pixel_values'],max_new_tokens=1024,early_stopping=False,do_sample=False,num_beams=3);generated_text=processor.batch_decode(generated_ids,skip_special_tokens=False)[0];parsed_answer=processor.post_process_generation(generated_text,task=task_prompt,image_size=(image.width,image.height));return parsed_answer
-def plot_bbox(image,data):
-	fig,ax=plt.subplots();ax.imshow(image)
-	for(bbox,label)in zip(data['bboxes'],data['labels']):x1,y1,x2,y2=bbox;rect=patches.Rectangle((x1,y1),x2-x1,y2-y1,linewidth=1,edgecolor='r',facecolor='none');ax.add_patch(rect);plt.text(x1,y1,label,color='white',fontsize=8,bbox=dict(facecolor='red',alpha=.5))
-	ax.axis('off');return fig
-def draw_polygons(image,prediction,fill_mask=False):
-	draw=ImageDraw.Draw(image);scale=1
-	for(polygons,label)in zip(prediction['polygons'],prediction['labels']):
-		color=random.choice(colormap);fill_color=random.choice(colormap)if fill_mask else None
-		for _polygon in polygons:
-			_polygon=np.array(_polygon).reshape(-1,2)
-			if len(_polygon)<3:print('Invalid polygon:',_polygon);continue
-			_polygon=(_polygon*scale).reshape(-1).tolist()
-			if fill_mask:draw.polygon(_polygon,outline=color,fill=fill_color)
-			else:draw.polygon(_polygon,outline=color)
-			draw.text((_polygon[0]+8,_polygon[1]+2),label,fill=color)
-	return image
-def convert_to_od_format(data):bboxes=data.get('bboxes',[]);labels=data.get('bboxes_labels',[]);od_results={'bboxes':bboxes,'labels':labels};return od_results
-def draw_ocr_bboxes(image,prediction):
-	scale=1;draw=ImageDraw.Draw(image);bboxes,labels=prediction['quad_boxes'],prediction['labels']
-	for(box,label)in zip(bboxes,labels):color=random.choice(colormap);new_box=(np.array(box)*scale).tolist();draw.polygon(new_box,width=3,outline=color);draw.text((new_box[0]+8,new_box[1]+2),'{}'.format(label),align='right',fill=color)
-	return image
-def convert_to_od_format(data):bboxes=data.get('bboxes',[]);labels=data.get('bboxes_labels',[]);od_results={'bboxes':bboxes,'labels':labels};return od_results
-def draw_ocr_bboxes(image,prediction):
-	scale=1;draw=ImageDraw.Draw(image);bboxes,labels=prediction['quad_boxes'],prediction['labels']
-	for(box,label)in zip(bboxes,labels):color=random.choice(colormap);new_box=(np.array(box)*scale).tolist();draw.polygon(new_box,width=3,outline=color);draw.text((new_box[0]+8,new_box[1]+2),'{}'.format(label),align='right',fill=color)
-	return image
-def process_image(image,task_prompt,text_input=None):
-	if isinstance(image,str):image=Image.open(image)
-	else:image=Image.fromarray(image)
-	if task_prompt=='Caption':task_prompt='<CAPTION>';results=run_example(task_prompt,image);return results[task_prompt],None
-	elif task_prompt=='Detailed Caption':task_prompt='<DETAILED_CAPTION>';results=run_example(task_prompt,image);return results[task_prompt],None
-	elif task_prompt=='More Detailed Caption':task_prompt='<MORE_DETAILED_CAPTION>';results=run_example(task_prompt,image);return results,None
-	elif task_prompt=='Caption + Grounding':task_prompt='<CAPTION>';results=run_example(task_prompt,image);text_input=results[task_prompt];task_prompt='<CAPTION_TO_PHRASE_GROUNDING>';results=run_example(task_prompt,image,text_input);results['<CAPTION>']=text_input;fig=plot_bbox(image,results['<CAPTION_TO_PHRASE_GROUNDING>']);return results,fig_to_pil(fig)
-	elif task_prompt=='Detailed Caption + Grounding':task_prompt='<DETAILED_CAPTION>';results=run_example(task_prompt,image);text_input=results[task_prompt];task_prompt='<CAPTION_TO_PHRASE_GROUNDING>';results=run_example(task_prompt,image,text_input);results['<DETAILED_CAPTION>']=text_input;fig=plot_bbox(image,results['<CAPTION_TO_PHRASE_GROUNDING>']);return results,fig_to_pil(fig)
-	elif task_prompt=='More Detailed Caption + Grounding':task_prompt='<MORE_DETAILED_CAPTION>';results=run_example(task_prompt,image);text_input=results[task_prompt];task_prompt='<CAPTION_TO_PHRASE_GROUNDING>';results=run_example(task_prompt,image,text_input);results['<MORE_DETAILED_CAPTION>']=text_input;fig=plot_bbox(image,results['<CAPTION_TO_PHRASE_GROUNDING>']);return results,fig_to_pil(fig)
-	elif task_prompt=='Object Detection':task_prompt='<OD>';results=run_example(task_prompt,image);fig=plot_bbox(image,results['<OD>']);return results,fig_to_pil(fig)
-	elif task_prompt=='Dense Region Caption':task_prompt='<DENSE_REGION_CAPTION>';results=run_example(task_prompt,image);fig=plot_bbox(image,results['<DENSE_REGION_CAPTION>']);return results,fig_to_pil(fig)
-	elif task_prompt=='Region Proposal':task_prompt='<REGION_PROPOSAL>';results=run_example(task_prompt,image);fig=plot_bbox(image,results['<REGION_PROPOSAL>']);return results,fig_to_pil(fig)
-	elif task_prompt=='Caption to Phrase Grounding':task_prompt='<CAPTION_TO_PHRASE_GROUNDING>';results=run_example(task_prompt,image,text_input);fig=plot_bbox(image,results['<CAPTION_TO_PHRASE_GROUNDING>']);return results,fig_to_pil(fig)
-	elif task_prompt=='Referring Expression Segmentation':task_prompt='<REFERRING_EXPRESSION_SEGMENTATION>';results=run_example(task_prompt,image,text_input);output_image=copy.deepcopy(image);output_image=draw_polygons(output_image,results['<REFERRING_EXPRESSION_SEGMENTATION>'],fill_mask=True);return results,output_image
-	elif task_prompt=='Region to Segmentation':task_prompt='<REGION_TO_SEGMENTATION>';results=run_example(task_prompt,image,text_input);output_image=copy.deepcopy(image);output_image=draw_polygons(output_image,results['<REGION_TO_SEGMENTATION>'],fill_mask=True);return results,output_image
-	elif task_prompt=='Open Vocabulary Detection':task_prompt='<OPEN_VOCABULARY_DETECTION>';results=run_example(task_prompt,image,text_input);bbox_results=convert_to_od_format(results['<OPEN_VOCABULARY_DETECTION>']);fig=plot_bbox(image,bbox_results);return results,fig_to_pil(fig)
-	elif task_prompt=='Region to Category':task_prompt='<REGION_TO_CATEGORY>';results=run_example(task_prompt,image,text_input);return results,None
-	elif task_prompt=='Region to Description':task_prompt='<REGION_TO_DESCRIPTION>';results=run_example(task_prompt,image,text_input);return results,None
-	elif task_prompt=='OCR':task_prompt='<OCR>';results=run_example(task_prompt,image);return results,None
-	elif task_prompt=='OCR with Region':task_prompt='<OCR_WITH_REGION>';results=run_example(task_prompt,image);output_image=copy.deepcopy(image);output_image=draw_ocr_bboxes(output_image,results['<OCR_WITH_REGION>']);return results,output_image
-	else:return'',None # Return empty string and None for unknown task prompts
-single_task_list=['Caption','Detailed Caption','More Detailed Caption','Object Detection','Dense Region Caption','Region Proposal','Caption to Phrase Grounding','Referring Expression Segmentation','Region to Segmentation','Open Vocabulary Detection','Region to Category','Region to Description','OCR','OCR with Region']
-cascaded_task_list=['Caption + Grounding','Detailed Caption + Grounding','More Detailed Caption + Grounding']
-def update_task_dropdown(choice):
-    if choice == 'Cascaded task':
-        return gr.Dropdown(choices=cascaded_task_list, value='Caption + Grounding')
-    else:
-        return gr.Dropdown(choices=single_task_list, value='Caption')
 args = parse_args()
 predictor = Predictor()
 dropdown_list = [
     EVA02_LARGE_MODEL_DSV3_REPO,
     SWINV2_MODEL_DSV3_REPO,
@@ -525,7 +384,6 @@ dropdown_list = [
     SWINV2_MODEL_IS_DSV1_REPO,
     EVA02_LARGE_MODEL_IS_DSV1_REPO,
 ]
-llama_list=[META_LLAMA_3_3B_REPO,META_LLAMA_3_8B_REPO]
 def _restart_space():
 	HF_TOKEN=os.getenv('HF_TOKEN')
@@ -539,7 +397,6 @@ next_run_time_utc=restart_space_job.next_run_time.astimezone(timezone.utc)
 NEXT_RESTART=f"Next Restart: {next_run_time_utc.strftime('%Y-%m-%d %H:%M:%S')} (UTC) - The space will restart every 2 days to ensure stability and performance. It uses a background scheduler to handle the restart process."
 css = """
-div.progress-level div.progress-level-inner {text-align: left !important; width: 55.5% !important;}
 #output {height: 500px; overflow: auto; border: 1px solid #ccc;}
 label.float.svelte-i3tvor {position: relative !important;}
 .reduced-height.svelte-11chud3 {height: calc(80% - var(--size-10));}
@@ -686,6 +543,15 @@ with gr.Blocks(title=TITLE, css=css, theme="Werli/Multi-Tagger", fill_width=True
                 character_mcut_enabled,
             ],
         )
     with gr.Tab(label="Florence 2 Image Captioning"):
         with gr.Row():
             with gr.Column(variant="panel"):

 import os
+import io,copy,requests,spaces,gradio as gr,numpy as np
+from transformers import AutoProcessor,AutoModelForCausalLM
 from PIL import Image,ImageDraw,ImageFont
 from unittest.mock import patch
 import argparse,huggingface_hub,onnxruntime as rt,pandas as pd,traceback,tempfile,zipfile,re,ast,time
 from datetime import datetime,timezone
 from collections import defaultdict
 from apscheduler.schedulers.background import BackgroundScheduler
 import json
+from modules.classifyTags import classify_tags,process_tags
+from modules.florence2 import process_image,single_task_list,update_task_dropdown
+from modules.llama_loader import llama_list,llama3reorganize
 os.environ['PYTORCH_ENABLE_MPS_FALLBACK']='1'
 TITLE = "Multi-Tagger"
 DESCRIPTION = """
+Multi-Tagger is a versatile application that combines the Waifu Diffusion and Florence 2 models for advanced image analysis and captioning. Perfect for AI artists and enthusiasts, it offers a range of features:
+- Batch processing for multiple images
+- Multi-category tagging with structured tag display.
+- CUDA or CPU support.
+- Image tagging, various captioning tasks which includes: Caption, Detailed Caption, Object Detection with visual outputs and much more.
 Example image by [me.](https://huggingface.co/Werli)
 """
 # Dataset v3 series of models:
 SWINV2_MODEL_DSV3_REPO = "SmilingWolf/wd-swinv2-tagger-v3"
 # Files to download from the repos
 MODEL_FILENAME = "model.onnx"
 LABEL_FILENAME = "selected_tags.csv"
 kaomojis=['0_0','(o)_(o)','+_+','+_-','._.','<o>_<o>','<|>_<|>','=_=','>_<','3_3','6_9','>_o','@_@','^_^','o_o','u_u','x_x','|_|','||_||']
 def parse_args()->argparse.Namespace:parser=argparse.ArgumentParser();parser.add_argument('--score-slider-step',type=float,default=.05);parser.add_argument('--score-general-threshold',type=float,default=.35);parser.add_argument('--score-character-threshold',type=float,default=.85);parser.add_argument('--share',action='store_true');return parser.parse_args()
 def load_labels(dataframe)->list[str]:name_series=dataframe['name'];name_series=name_series.map(lambda x:x.replace('_',' ')if x not in kaomojis else x);tag_names=name_series.tolist();rating_indexes=list(np.where(dataframe['category']==9)[0]);general_indexes=list(np.where(dataframe['category']==0)[0]);character_indexes=list(np.where(dataframe['category']==4)[0]);return tag_names,rating_indexes,general_indexes,character_indexes
 def mcut_threshold(probs):sorted_probs=probs[probs.argsort()[::-1]];difs=sorted_probs[:-1]-sorted_probs[1:];t=difs.argmax();thresh=(sorted_probs[t]+sorted_probs[t+1])/2;return thresh
 class Timer:
 	def __init__(self):self.start_time=time.perf_counter();self.checkpoints=[('Start',self.start_time)]
 	def checkpoint(self,label='Checkpoint'):now=time.perf_counter();self.checkpoints.append((label,now))
 		for(label,curr_time)in self.checkpoints[1:]:elapsed=curr_time-prev_time;print(f"{label.ljust(max_label_length)}: {elapsed:.3f} seconds");prev_time=curr_time
 		total_time=self.checkpoints[-1][1]-self.start_time;print(f"{'Total Execution Time'.ljust(max_label_length)}: {total_time:.3f} seconds\n");self.checkpoints.clear()
 	def restart(self):self.start_time=time.perf_counter();self.checkpoints=[('Start',self.start_time)]
 class Predictor:
     def __init__(self):
         self.model_target_size = None
             if llama3_reorganize_model_repo:
                 print(f"Llama3 reorganize load model {llama3_reorganize_model_repo}")
+                llama3_reorganize = llama3reorganize(llama3_reorganize_model_repo, loadModel=True)
                 current_progress += progressRatio/progressTotal;
                 progress(current_progress, desc="Initialize llama3 model finished")
                 timer.checkpoint(f"Initialize llama3 model")
                     if llama3_reorganize_model_repo:
                         print(f"Starting reorganize with llama3...")
+                        reorganize_strings = llama_loader.llama3_reorganize.reorganize(sorted_general_strings)
                         reorganize_strings = re.sub(r" *Title: *", "", reorganize_strings)
                         reorganize_strings = re.sub(r"\n+", ",", reorganize_strings)
                         reorganize_strings = re.sub(r",,+", ",", reorganize_strings)
                 download.append(downloadZipPath)
             # End zip creation logic
             if llama3_reorganize_model_repo:
+                llama_loader.llama3_reorganize.release_vram()
                 del llama3_reorganize
             progress(1, desc=f"Predict completed")
 	selected_image=ast.literal_eval(selected_image)
 	if selected_image in gallery:gallery.remove(selected_image)
 	return gallery
 args = parse_args()
 predictor = Predictor()
 dropdown_list = [
     EVA02_LARGE_MODEL_DSV3_REPO,
     SWINV2_MODEL_DSV3_REPO,
     SWINV2_MODEL_IS_DSV1_REPO,
     EVA02_LARGE_MODEL_IS_DSV1_REPO,
 ]
 def _restart_space():
 	HF_TOKEN=os.getenv('HF_TOKEN')
 NEXT_RESTART=f"Next Restart: {next_run_time_utc.strftime('%Y-%m-%d %H:%M:%S')} (UTC) - The space will restart every 2 days to ensure stability and performance. It uses a background scheduler to handle the restart process."
 css = """
 #output {height: 500px; overflow: auto; border: 1px solid #ccc;}
 label.float.svelte-i3tvor {position: relative !important;}
 .reduced-height.svelte-11chud3 {height: calc(80% - var(--size-10));}
                 character_mcut_enabled,
             ],
         )
+    with gr.Tab(label="Tag Categorizer"):
+       with gr.Row():
+            with gr.Column(variant="panel"):
+                input_tags = gr.Textbox(label="Input Tags (Danbooru comma-separated)", placeholder="1girl, cat, horns, blue hair, ...")
+                submit_button = gr.Button(value="Submit", variant="primary", size="lg")
+            with gr.Column(variant="panel"):
+                categorized_string = gr.Textbox(label="Categorized (string)", show_label=True, show_copy_button=True, lines=8)
+                categorized_json = gr.JSON(label="Categorized (tags) - JSON")
+            submit_button.click(process_tags, inputs=[input_tags], outputs=[categorized_string, categorized_json])
     with gr.Tab(label="Florence 2 Image Captioning"):
         with gr.Row():
             with gr.Column(variant="panel"):

modules/classifyTags.py ADDED Viewed

	@@ -0,0 +1,179 @@

+from collections import defaultdict
+# Define grouping rules (categories and keywords)
+# Provided categories and reversed_categories
+categories = {
+"Explicit"            : ["sex", "69", "paizuri", "cum", "precum", "areola_slip", "hetero", "erection", "oral", "fellatio", "yaoi", "ejaculation", "ejaculating", "masturbation", "handjob", "bulge", "rape", "_rape", "doggystyle", "threesome", "missionary", "object_insertion", "nipple", "nipples", "pussy", "anus", "penis", "groin", "testicles", "testicle", "anal", "cameltoe", "areolae", "dildo", "clitoris", "top-down_bottom-up", "gag", "groping", "gagged", "gangbang", "orgasm", "femdom", "incest", "bukkake", "breast_out", "vaginal", "vagina", "public_indecency", "breast_sucking", "folded", "cunnilingus", "_cunnilingus", "foreskin", "bestiality", "footjob", "uterus", "womb", "flaccid", "defloration", "butt_plug", "cowgirl_position", "reverse_cowgirl_position", "squatting_cowgirl_position", "reverse_upright_straddle", "irrumatio", "deepthroat", "pokephilia", "gaping", "orgy", "cleft_of_venus", "futanari", "futasub", "futa", "cumdrip", "fingering", "vibrator", "partially_visible_vulva", "penetration", "penetrated", "cumshot", "exhibitionism", "breast_milk", "grinding", "clitoral", "urethra", "phimosis", "cervix", "impregnation", "tribadism", "molestation", "pubic_hair", "clothed_female_nude_male", "clothed_male_nude_female", "clothed_female_nude_female", "clothed_male_nude_male", "sex_machine", "milking_machine", "ovum", "chikan", "pussy_juice_drip_through_clothes", "ejaculating_while_penetrated", "suspended_congress", "reverse_suspended_congress", "spread_pussy_under_clothes", "anilingus", "reach-around", "humping", "consensual_tentacles", "tentacle_pit", "cum_in_", ],
+#外観状態／外觀狀態
+"Appearance Status"   : ["backless", "bandaged_neck", "bleeding", "blood", "_blood", "blush", "body_writing", "bodypaint", "bottomless", "breath", "bruise", "butt_crack", "cold", "covered_mouth", "crack", "cross-section", "crotchless", "crying", "curvy", "cuts", "dirty", "dripping", "drunk", "from_mouth", "glowing", "hairy", "halterneck", "hot", "injury", "latex", "leather", "levitation", "lipstick_mark", "_markings", "makeup", "mole", "moles", "no_bra", "nosebleed", "nude", "outfit", "pantylines", "peeing", "piercing", "_piercing", "piercings", "pregnant", "public_nudity", "reverse", "_skin", "_submerged", "saliva", "scar", "scratches", "see-through", "shadow", "shibari", "sideless", "skindentation", "sleeping","tan", "soap_bubbles", "steam", "steaming_body", "stitches", "sweat", "sweatdrop", "sweaty", "tanlines", "tattoo", "tattoo", "tears", "topless", "transparent", "trefoil", "trembling", "veins", "visible_air", "wardrobe_malfunction", "wet", "x-ray", "unconscious", "handprint", ],
+#動作姿勢／動作姿勢
+"Action Pose"         : ["afloat", "afterimage", "against_fourth_wall", "against_wall", "aiming", "all_fours", "another's_mouth", "arm_", "arm_support", "arms_", "arms_behind_back", "asphyxiation", "attack", "back", "ballet", "bara", "bathing", "battle", "bdsm", "beckoning", "bent_over", "bite_mark", "biting", "bondage", "breast_suppress", "breathing", "burning", "bust_cup", "carry", "carrying", "caught", "chained", "cheek_squash", "chewing", "cigarette", "clapping", "closed_eye", "come_hither", "cooking", "covering", "cuddling", "dancing", "_docking", "destruction", "dorsiflexion", "dreaming", "dressing", "drinking", "driving", "dropping", "eating", "exercise", "expansion", "exposure", "facing", "failure", "fallen_down", "falling", "feeding", "fetal_position", "fighting", "finger_on_trigger", "finger_to_cheek", "finger_to_mouth", "firing", "fishing", "flashing", "fleeing", "flexible", "flexing", "floating", "flying", "fourth_wall", "freediving", "frogtie", "_grab", "girl_on_top", "giving", "grabbing", "grabbing_", "gymnastics", "_hold", "hadanugi_dousa", "hairdressing", "hand_", "hand_on", "hand_on_wall", "hands_", "headpat", "hiding", "holding", "hug", "hugging", "imagining", "in_container", "in_mouth", "in_palm", "jealous", "jumping", "kabedon", "kicking", "kiss", "kissing", "kneeling", "_lift", "lactation", "laundry", "licking", "lifted_by_self", "looking", "lowleg", "lying", "melting", "midair", "moaning", "_open", "on_back", "on_bed", "on_ground", "on_lap", "on_one_knee", "one_eye_closed", "open_", "over_mouth", "own_mouth", "_peek", "_pose", "_press", "_pull", "padding", "paint", "painting_(action)", "palms_together", "pee", "peeking", "pervert", "petting", "pigeon-toed", "piggyback", "pinching", "pinky_out", "pinned", "plantar_flexion", "planted", "playing", "pocky", "pointing", "poke", "poking", "pouring", "pov", "praying", "presenting", "profanity", "pulled_by_self", "pulling", "pump_action", "punching", "_rest", "raised", "reaching", "reading", "reclining", "reverse_grip", "riding", "running", "_slip", "salute", "screaming", "seiza", "selfie", "sewing", "shaking", "shoe_dangle", "shopping", "shouting", "showering", "shushing", "singing", "sitting", "slapping", "smell", "smelling", "smoking", "smother", "solo", "spanked", "spill", "spilling", "spinning", "splashing", "split", "squatting", "squeezed", "breasts_squeezed_together", "standing", "standing_on_", "staring", "straddling", "strangling", "stretching", "surfing", "suspension", "swimming", "talking", "teardrop", "tearing_clothes", "throwing", "tied_up", "tiptoes", "toe_scrunch", "toothbrush", "trigger_discipline", "tripping", "tsundere", "turning_head", "twitching", "two-handed", "tying", "_up",  "unbuttoned", "undressed", "undressing", "unsheathed", "unsheathing", "unzipped", "unzipping", "upright_straddle", "v", "V", "vore", "_wielding","wading", "walk-in", "walking", "wariza", "waving", "wedgie", "wrestling", "writing", "yawning", "yokozuwari", "_conscious", "massage", "struggling", "shrugging", "drugged", "tentacles_under_clothes", "restrained_by_tentacles", "tentacles_around_arms", "tentacles_around_legs", "restrained_legs", "restrained_tail", "restrained_arms", "tentacles_on_female", "archery", "cleaning", "tempura", "facepalm", "sadism", ],
+#頭部装飾／頭部服飾
+"Headwear"            : ["antennae", "antlers", "aura", "bandaged_head", "bandana", "bandeau", "beanie", "beanie", "beret", "bespectacled", "blindfold", "bonnet", "_cap", "circlet", "crown", "_drill", "_drills", "diadem", "_eyewear", "ear_covers", "ear_ornament", "ear_tag", "earbuds", "earclip", "earmuffs", "earphones", "earpiece", "earring", "earrings", "eyeliner", "eyepatch", "eyewear_on_head", "facial", "fedora", "glasses", "goggles", "_headwear", "hachimaki", "hair_bobbles", "hair_ornament", "hair_rings", "hair_tie", "hairband", "hairclip", "hairpin", "hairpods", "halo", "hat", "head-mounted_display", "head_wreath", "headband", "headdress", "headgear", "headphones", "headpiece", "headset", "helm", "helmet", "hood", "kabuto_(helmet)", "kanzashi", "_mask", "maid_headdress", "mask", "mask", "mechanical_ears", "mechanical_eye", "mechanical_horns", "mob_cap", "monocle", "neck_ruff", "nightcap", "on_head", "pince-nez", "qingdai_guanmao", "scarf_over_mouth", "scrunchie", "sunglasses", "tam_o'_shanter", "tate_eboshi", "tiara", "topknot", "turban", "veil", "visor", "wig", "mitre", "tricorne", "bicorne", ],
+#手部装飾／手部服飾
+"Handwear"            : ["arm_warmers", "armband", "armlet", "bandaged_arm", "bandaged_fingers", "bandaged_hand", "bandaged_wrist", "bangle", "bracelet", "bracelets", "bracer", "cuffs", "elbow_pads", "_gauntlets", "_glove", "_gloves", "gauntlets", "gloves", "kote", "kurokote", "mechanical_arm", "mechanical_arms", "mechanical_hands", "mittens", "mitts", "nail_polish", "prosthetic_arm", "wrist_cuffs", "wrist_guards", "wristband", "yugake", ],
+#ワンピース衣装／一件式服裝
+"One-Piece Outfit"    : ["bodystocking", "bodysuit", "dress", "furisode", "gown", "hanfu", "jumpsuit", "kimono", "leotard", "microdress", "one-piece", "overalls", "robe", "spacesuit", "sundress", "yukata", ],
+#上半身衣装／上半身服裝
+"Upper Body Clothing" : ["aiguillette", "apron", "_apron", "armor", "_armor", "ascot", "babydoll", "bikini", "_bikini", "blazer", "_blazer", "blouse", "_blouse", "bowtie", "_bowtie", "bra", "_bra", "breast_curtain", "breast_curtains", "breast_pocket", "breastplate", "bustier", "camisole", "cape", "capelet", "cardigan", "center_opening", "chemise", "chest_jewel", "choker", "cloak", "coat", "coattails", "collar", "_collar", "corset", "criss-cross_halter", "crop_top", "dougi", "feather_boa", "gakuran", "hagoromo", "hanten_(clothes)", "haori", "harem_pants", "harness", "hoodie", "jacket", "_jacket", "japanese_clothes", "kappougi", "kariginu", "lapels", "lingerie", "_lingerie", "maid", "mechanical_wings", "mizu_happi", "muneate", "neckerchief", "necktie", "negligee", "nightgown", "pajamas", "_pajamas", "pauldron", "pauldrons", "plunging_neckline", "raincoat", "rei_no_himo", "sailor_collar", "sarashi", "scarf", "serafuku", "shawl", "shirt", "shoulder_", "sleepwear", "sleeve", "sleeveless", "sleeves", "_sleeves", "sode", "spaghetti_strap", "sportswear", "strapless", "suit", "sundress", "suspenders", "sweater", "swimsuit", "_top", "_torso", "t-shirt", "tabard", "tailcoat", "tank_top", "tasuki", "tie_clip", "tunic", "turtleneck", "tuxedo", "_uniform", "undershirt", "uniform", "v-neck", "vambraces", "vest", "waistcoat", ],
+#下半身衣装／下半身服裝
+"Lower Body Clothing" : ["bare_hips", "bloomers", "briefs", "buruma", "crotch_seam", "cutoffs", "denim", "faulds", "fundoshi", "g-string", "garter_straps", "hakama", "hip_vent", "jeans", "knee_pads", "loincloth", "mechanical_tail", "microskirt", "miniskirt", "overskirt", "panties", "pants", "pantsu", "panty_straps", "pelvic_curtain", "petticoat", "sarong", "shorts", "side_slit", "skirt", "sweatpants", "swim_trunks", "thong", "underwear", "waist_cape", ],
+#足元・レッグウェア／腳與腿部服飾
+"Foot & Legwear"      : ["anklet", "bandaged_leg", "boot", "boots", "_footwear", "flats", "flip-flops", "geta", "greaves", "_heels", "kneehigh", "kneehighs", "_legwear", "leg_warmers", "leggings", "loafers", "mary_janes", "mechanical_legs", "okobo", "over-kneehighs", "pantyhose", "prosthetic_leg", "pumps", "_shoe", "_sock", "sandals", "shoes", "skates", "slippers", "sneakers", "socks", "spikes", "tabi", "tengu-geta", "thigh_strap", "thighhighs", "uwabaki", "zouri", "legband", "ankleband", ],
+#その他の装飾／其他服飾
+"Other Accessories"   : ["alternate_", "anklet", "badge", "beads", "belt", "belts", "bow", "brooch", "buckle", "button", "buttons", "_clothes", "_costume", "_cutout", "casual", "charm", "clothes_writing", "clothing_aside", "costume", "cow_print", "cross", "d-pad", "double-breasted", "drawstring", "epaulettes", "fabric", "fishnets", "floral_print", "formal", "frills", "_garter", "gem", "holster", "jewelry", "_knot", "lace", "lanyard", "leash", "magatama", "mechanical_parts", "medal", "medallion", "naked_bandage", "necklace", "_ornament", "(ornament)", "o-ring", "obi", "obiage", "obijime", "_pin", "_print", "padlock", "patterned_clothing", "pendant", "piercing", "plaid", "pocket", "polka_dot", "pom_pom_(clothes)", "pom_pom_(clothes)", "pouch", "ribbon", "_ribbon", "_stripe", "_stripes", "sash", "shackles", "shimenawa", "shrug_(clothing)", "skin_tight", "spandex", "strap", "sweatband", "_trim", "tassel", "zettai_ryouiki", "zipper", ],
+#表情／表情
+"Facial Expression"   : ["ahegao", "anger_vein", "angry", "annoyed", "confused", "drooling", "embarrassed", "expressionless", "eye_contact", "_face", "frown", "fucked_silly", "furrowed_brow", "glaring", "gloom_(expression)", "grimace", "grin", "happy", "jitome", "laughing", "_mouth", "nervous", "notice_lines", "o_o", "parted_lips", "pout", "puff_of_air", "restrained", "sad", "sanpaku", "scared", "scowl", "serious", "shaded_face", "shy", "sigh", "sleepy", "smile", "smirk", "smug", "snot", "spoken_ellipsis", "spoken_exclamation_mark", "spoken_interrobang", "spoken_question_mark", "squiggle", "surprised", "tareme", "tearing_up", "thinking", "tongue", "tongue_out", "torogao", "tsurime", "turn_pale", "wide-eyed", "wince", "worried", "heartbeat", ],
+#絵文字／表情符號
+"Facial Emoji"        : ["!!", "!", "!?", "+++", "+_+", "...", "...?", "._.", "03:00", "0_0", ":/", ":3", ":<", ":>", ":>=", ":d", ":i", ":o", ":p", ":q", ":t", ":x", ":|", ";(", ";)", ";3", ";d", ";o", ";p", ";q", "=_=", ">:(", ">:)", ">_<", ">_o", ">o<", "?", "??", "@_@", "\m/", "\n/", "\o/", "\||/", "^^^", "^_^", "c:", "d:", "o_o", "o3o", "u_u", "w", "x", "x_x", "xd", "zzz", "|_|", ],
+#頭部／頭部
+"Head"                : ["afro", "ahoge", "animal_ear_fluff", "_bangs", "_bun", "bald", "beard", "blunt_bangs", "blunt_ends", "bob_cut", "bowl_cut", "braid", "braids", "buzz_cut", "circle_cut", "colored_tips", "cowlick", "dot_nose", "dreadlocks", "_ear", "_ears", "_eye", "_eyes", "enpera", "eyeball", "eyebrow", "eyebrow_cut", "eyebrows", "eyelashes", "eyeshadow", "faceless", "facepaint", "facial_mark", "fang", "forehead", "freckles", "goatee", "_hair", "_horn", "_horns", "hair_", "hair_bun", "hair_flaps", "hair_intakes", "hair_tubes", "half_updo", "head_tilt", "heterochromia", "hime_cut", "hime_cut", "horns", "in_eye", "inverted_bob", "kemonomimi_mode", "lips", "mascara", "mohawk", "mouth_", "mustache", "nose", "one-eyed", "one_eye", "one_side_up", "_pupils", "parted_bangs", "pompadour", "ponytail", "ringlets", "_sclera", "sideburns", "sidecut", "sidelock", "sidelocks", "skull", "snout", "stubble", "swept_bangs", "tails", "teeth", "third_eye", "twintails", "two_side_up", "undercut", "updo", "v-shaped_eyebrows", "whiskers", "tentacle_hair", ],
+#手部／手部
+"Hands"               : ["_arm", "_arms", "claws", "_finger", "_fingers", "fingernails", "_hand", "_nail", "_nails", "palms", "rings", "thumbs_up", ],
+#上半身／上半身
+"Upper Body"          : ["abs", "armpit", "armpits", "backboob", "belly", "biceps", "breast_rest", "breasts", "button_gap", "cleavage", "collarbone", "dimples_of_venus", "downblouse", "flat_chest", "linea_alba", "median_furrow", "midriff", "nape", "navel", "pectorals", "ribs", "_shoulder", "_shoulders", "shoulder_blades", "sideboob", "sidetail", "spine", "stomach", "strap_gap", "toned", "underboob", "underbust", ],
+#下半身／下半身
+"Lower Body"          : ["ankles", "ass", "barefoot", "crotch", "feet", "highleg", "hip_bones", "hooves", "kneepits", "knees", "legs", "soles", "tail", "thigh_gap", "thighlet", "thighs", "toenail", "toenails", "toes", "wide_hips", ],
+#生物／生物
+"Creature"            : ["(animal)", "anglerfish", "animal", "bear", "bee", "bird", "bug", "butterfly", "cat", "chick", "chicken", "chinese_zodiac", "clownfish", "coral", "crab", "creature", "crow", "dog", "dove", "dragon", "duck", "eagle", "fish", "fish", "fox", "fox", "frog", "frog", "goldfish", "hamster", "horse", "jellyfish", "ladybug", "lion", "mouse", "octopus", "owl", "panda", "penguin", "pig", "pigeon", "rabbit", "rooster", "seagull", "shark", "sheep", "shrimp", "snail", "snake", "squid", "starfish", "tanuki", "tentacles", "goo_tentacles", "plant_tentacles", "crotch_tentacles", "mechanical_tentacles", "squidward_tentacles", "suction_tentacles", "penis_tentacles", "translucent_tentacles", "back_tentacles", "red_tentacles", "green_tentacles", "blue_tentacles", "black_tentacles", "pink_tentacles", "purple_tentacles", "face_tentacles", "tentacles_everywhere", "milking_tentacles", "tiger", "turtle", "weasel", "whale", "wolf", "parrot", "sparrow", "unicorn", ],
+#植物／植物
+"Plant"               : ["bamboo", "bouquet", "branch", "bush", "cherry_blossoms", "clover", "daisy", "(flower)", "flower", "flower", "gourd", "hibiscus", "holly", "hydrangea", "leaf", "lily_pad", "lotus", "moss", "palm_leaf", "palm_tree", "petals", "plant", "plum_blossoms", "rose", "spider_lily", "sunflower", "thorns", "tree", "tulip", "vines", "wisteria", "acorn", ],
+#食べ物／食物
+"Food"                : ["apple", "baguette", "banana", "baozi", "beans", "bento", "berry", "blueberry", "bread", "broccoli", "burger", "cabbage", "cake", "candy", "carrot", "cheese", "cherry", "chili_pepper", "chocolate", "coconut", "cookie", "corn", "cream", "crepe", "cucumber", "cucumber", "cupcake", "curry", "dango", "dessert", "doughnut", "egg", "eggplant", "_(food)", "_(fruit)", "food", "french_fries", "fruit", "grapes", "ice_cream", "icing", "lemon", "lettuce", "lollipop", "macaron", "mandarin_orange", "meat", "melon", "mochi", "mushroom", "noodles", "omelet", "omurice", "onigiri", "onion", "pancake", "parfait", "pasties", "pastry", "peach", "pineapple", "pizza", "popsicle", "potato", "pudding", "pumpkin", "radish", "ramen", "raspberry", "rice", "roasted_sweet_potato", "sandwich", "sausage", "seaweed", "skewer", "spitroast", "spring_onion", "strawberry", "sushi", "sweet_potato", "sweets", "taiyaki", "takoyaki", "tamagoyaki", "tempurakanbea", "toast", "tomato", "vegetable", "wagashi", "wagashi", "watermelon", "jam", "popcorn", ],
+#飲み物／飲品
+"Beverage"            : ["alcohol", "beer", "coffee", "cola", "drink", "juice", "juice_box", "milk", "sake", "soda", "tea", "_tea", "whiskey", "wine", "cocktail", ],
+#音楽／音樂
+"Music"               : ["band", "baton_(conducting)", "beamed", "cello", "concert", "drum", "drumsticks", "eighth_note", "flute", "guitar", "harp", "horn", "(instrument)", "idol", "instrument", "k-pop", "lyre", "(music)", "megaphone", "microphone", "music", "musical_note", "phonograph", "piano", "plectrum", "quarter_note", "recorder", "sixteenth_note", "sound_effects", "trumpet", "utaite", "violin", "whistle", ],
+#武器・装備／武器・裝備
+"Weapons & Equipment" : ["ammunition", "arrow_(projectile)", "axe", "bandolier", "baseball_bat", "beretta_92", "bolt_action", "bomb", "bullet", "bullpup", "cannon", "chainsaw", "crossbow", "dagger", "energy_sword", "explosive", "fighter_jet", "gohei", "grenade", "gun", "hammer", "handgun", "holstered", "jet", "katana", "knife", "kunai", "lance", "mallet", "nata_(tool)", "polearm", "quiver", "rapier", "revolver", "rifle", "rocket_launcher", "scabbard", "scope", "scythe", "sheath", "sheathed", "shield", "shotgun", "shuriken", "spear", "staff", "suppressor", "sword", "tank", "tantou", "torpedo", "trident", "(weapon)", "wand", "weapon", "whip", "yumi_(bow)", "h&k_hk416", "rocket_launcher", "heckler_&_koch", "_weapon", ],
+#乗り物／交通器具
+"Vehicles"            : ["aircraft", "airplane", "bicycle", "boat", "car", "caterpillar_tracks", "flight_deck", "helicopter", "motor_vehicle", "motorcycle", "ship", "spacecraft", "spoiler_(automobile)", "train", "truck", "watercraft", "wheel", "wheelbarrow", "wheelchair", "inflatable_raft", ],
+#建物／建物
+"Buildings"           : ["apartment", "aquarium", "architecture", "balcony", "building", "cafe", "castle", "church", "gym", "hallway", "hospital", "house", "library", "(place)", "porch", "restaurant", "restroom", "rooftop", "shop", "skyscraper", "stadium", "stage", "temple", "toilet", "tower", "train_station", "veranda", ],
+#室内／室內
+"Indoor"              : ["bath", "bathroom", "bathtub", "bed", "bed_sheet", "bedroom", "blanket", "bookshelf", "carpet", "ceiling", "chair", "chalkboard", "classroom", "counter", "cupboard", "curtains", "cushion", "dakimakura", "desk", "door", "doorway", "drawer", "_floor", "floor", "futon", "indoors", "interior", "kitchen", "kotatsu", "locker", "mirror", "pillow", "room", "rug", "school_desk", "shelf", "shouji", "sink", "sliding_doors", "stairs", "stool", "storeroom", "table", "tatami", "throne", "window", "windowsill", "bathhouse", "chest_of_drawers", ],
+#屋外／室外
+"Outdoor"             : ["alley", "arch", "beach", "bridge", "bus_stop", "bush", "cave", "(city)", "city", "cliff", "crescent", "crosswalk", "day", "desert", "fence", "ferris_wheel", "field", "forest", "grass", "graveyard", "hill", "lake", "lamppost", "moon", "mountain", "night", "ocean", "onsen", "outdoors", "path", "pool", "poolside", "railing", "railroad", "river", "road", "rock", "sand", "shore", "sky", "smokestack", "snow", "snowball", "snowman", "street", "sun", "sunlight", "sunset", "tent", "torii", "town", "tree", "turret", "utility_pole", "valley", "village", "waterfall", ],
+#物品／物品
+"Objects"             : ["anchor", "android", "armchair", "(bottle)", "backpack", "bag", "ball", "balloon", "bandages", "bandaid", "bandaids", "banknote", "banner", "barcode", "barrel", "baseball", "basket", "basketball", "beachball", "bell", "bench", "binoculars", "board_game", "bone", "book", "bottle", "bowl", "box", "box_art", "briefcase", "broom", "bucket", "(chess)", "(computer)", "(computing)", "(container)", "cage", "calligraphy_brush", "camera", "can", "candle", "candlestand", "cane", "card", "cartridge", "cellphone", "chain", "chandelier", "chess", "chess_piece", "choko_(cup)", "chopsticks", "cigar", "clipboard", "clock", "clothesline", "coin", "comb", "computer", "condom", "controller", "cosmetics", "couch", "cowbell", "crazy_straw", "cup", "cutting_board", "dice", "digital_media_player", "doll", "drawing_tablet", "drinking_straw", "easel", "electric_fan", "emblem", "envelope", "eraser", "feathers", "figure", "fire", "fishing_rod", "flag", "flask", "folding_fan", "fork", "frying_pan", "(gemstone)", "game_console", "gears", "gemstone", "gift", "glass", "glowstick", "gold", "handbag", "handcuffs", "handheld_game_console", "hose", "id_card", "innertube", "iphone", "jack-o'-lantern", "jar", "joystick", "key", "keychain", "kiseru", "ladder", "ladle", "lamp", "lantern", "laptop", "letter", "letterboxed", "lifebuoy", "lipstick", "liquid", "lock", "lotion", "_machine", "map", "marker", "model_kit", "money", "monitor", "mop", "mug", "needle", "newspaper", "nintendo", "nintendo_switch", "notebook", "(object)", "ofuda", "orb", "origami", "(playing_card)", "pack", "paddle", "paintbrush", "pan", "paper", "parasol", "patch", "pc", "pen", "pencil", "pencil", "pendant_watch", "phone", "pill", "pinwheel", "plate", "playstation", "pocket_watch", "pointer", "poke_ball", "pole", "quill", "racket", "randoseru", "remote_control", "ring", "rope", "sack", "saddle", "sakazuki", "satchel", "saucer", "scissors", "scroll", "seashell", "seatbelt", "shell", "shide", "shopping_cart", "shovel", "shower_head", "silk", "sketchbook", "smartphone", "soap", "sparkler", "spatula", "speaker", "spoon", "statue", "stethoscope", "stick", "sticker", "stopwatch", "string", "stuffed_", "stylus", "suction_cups", "suitcase", "surfboard", "syringe", "talisman", "tanzaku", "tape", "teacup", "teapot", "teddy_bear", "television", "test_tube", "tiles", "tokkuri", "tombstone", "torch", "towel", "toy", "traffic_cone", "tray", "treasure_chest", "uchiwa", "umbrella", "vase", "vial", "video_game", "viewfinder", "volleyball", "wallet", "watch", "watch", "whisk", "whiteboard", "wreath", "wrench", "wristwatch", "yunomi", "ace_of_hearts", "inkwell", "compass", "ipod", "sunscreen", "rocket", "cobblestone", ],
+#キャラクター設定／角色設定
+"Character Design"    : ["+boys", "+girls", "1other", "39", "_boys", "_challenge", "_connection", "_female", "_fur", "_girls", "_interface", "_male", "_man", "_person", "abyssal_ship", "age_difference", "aged_down", "aged_up", "albino", "alien", "alternate_muscle_size", "ambiguous_gender", "amputee", "androgynous", "angel", "animalization", "ass-to-ass", "assault_visor", "au_ra", "baby", "bartender", "beak", "bishounen", "borrowed_character", "boxers", "boy", "breast_envy", "breathing_fire", "bride", "broken", "brother_and_sister", "brothers", "camouflage", "cheating_(relationship)", "cheerleader", "chibi", "child", "clone", "command_spell", "comparison", "contemporary", "corpse", "corruption", "cosplay", "couple", "creature_and_personification", "crossdressing", "crossover", "cyberpunk", "cyborg", "cyclops", "damaged", "dancer", "danmaku", "darkness", "death", "defeat", "demon", "disembodied_", "draph", "drone", "duel", "dwarf", "egyptian", "electricity", "elezen", "elf", "enmaided", "erune", "everyone", "evolutionary_line", "expressions", "fairy", "family", "fangs", "fantasy", "fashion", "fat", "father_and_daughter", "father_and_son", "fewer_digits", "fins", "flashback", "fluffy", "fumo_(doll)", "furry", "fusion", "fuuin_no_tsue", "gameplay_mechanics", "genderswap", "ghost", "giant", "giantess", "gibson_les_paul", "girl", "goblin", "groom", "guro", "gyaru", "habit", "harem", "harpy", "harvin", "heads_together", "health_bar", "height_difference", "hitodama", "horror_(theme)", "humanization", "husband_and_wife", "hydrokinesis", "hypnosis", "hyur", "idol", "insignia", "instant_loss", "interracial", "interspecies", "japari_bun", "jeweled_branch_of_hourai", "jiangshi", "jirai_kei", "joints", "karakasa_obake", "keyhole", "kitsune", "knight", "kodona", "kogal", "kyuubi", "lamia", "left-handed", "loli", "lolita", "look-alike", "machinery", "magic", "male_focus", "manly", "matching_outfits", "mature_female", "mecha", "mermaid", "meta", "miko", "milestone_celebration", "military", "mind_control", "miniboy", "minigirl", "miqo'te", "monster", "monsterification", "mother_and_daughter", "mother_and_son", "multiple_others", "muscular", "nanodesu_(phrase)", "narrow_waist", "nekomata", "netorare", "ninja", "no_humans", "nontraditional", "nun", "nurse", "object_namesake", "obliques", "office_lady", "old", "on_body", "onee-shota", "oni", "orc", "others", "otoko_no_ko", "oversized_object", "paint_splatter", "pantyshot", "pawpads", "persona", "personality", "personification", "pet_play", "petite", "pirate", "playboy_bunny", "player_2", "plugsuit", "plump", "poi", "pokemon", "police", "policewoman", "pom_pom_(cheerleading)", "princess", "prosthesis", "pun", "puppet", "race_queen", "radio_antenna", "real_life_insert", "redesign", "reverse_trap", "rigging", "robot", "rod_of_remorse", "sailor", "salaryman", "samurai", "sangvis_ferri", "scales", "scene_reference", "school", "sheikah", "shota", "shrine", "siblings", "side-by-side", "sidesaddle", "sisters", "size_difference", "skeleton", "skinny", "slave", "slime_(substance)", "soldier", "spiked_shell", "spokencharacter", "steampunk", "streetwear", "striker_unit", "strongman", "submerged", "suggestive", "super_saiyan", "superhero", "surreal", "take_your_pick", "tall", "talons", "taur", "teacher", "team_rocket", "three-dimensional_maneuver_gear", "time_paradox", "tomboy", "traditional_youkai", "transformation", "trick_or_treat", "tusks", "twins", "ufo", "under_covers", "v-fin", "v-fin", "vampire", "virtual_youtuber", "waitress", "watching_television", "wedding", "what", "when_you_see_it", "wife_and_wife", "wing", "wings", "witch", "world_war_ii", "yandere", "year_of", "yes", "yin_yang", "yordle", "you're_doing_it_wrong", "you_gonna_get_raped", "yukkuri_shiteitte_ne", "yuri", "zombie", "(alice_in_wonderland)", "(arknights)", "(blue_archive)", "(cosplay)", "(creature)", "(emblem)", "(evangelion)", "(fate)", "(fate/stay_night)", "(ff11)", "(fire_emblem)", "(genshin_impact)", "(grimm)", "(houseki_no_kuni)", "(hyouka)", "(idolmaster)", "(jojo)", "(kancolle)", "(kantai_collection)", "(kill_la_kill)", "(league_of_legends)", "(legends)", "(lyomsnpmp)", "(machimazo)", "(madoka_magica)", "(mecha)", "(meme)", "(nier:automata)", "(organ)", "(overwatch)", "(pokemon)", "(project_moon)", "(project_sekai)", "(sao)", "(senran_kagura)", "(splatoon)", "(touhou)", "(tsukumo_sana)", "(youkai_watch)", "(yu-gi-oh!_gx)", "(zelda)", "sextuplets", "imperial_japanese_army", "extra_faces", "_miku", ],
+#構図／構圖
+"Composition"         : ["abstract", "anime_coloring", "animification", "back-to-back", "bad_anatomy", "blurry", "border", "bound", "cameo", "cheek-to-cheek", "chromatic_aberration", "close-up", "collage", "color_guide", "colorful", "comic", "contrapposto", "cover", "cowboy_shot", "crosshatching", "depth_of_field", "dominatrix", "dutch_angle", "_focus", "face-to-face", "fake_screenshot", "film_grain", "fisheye", "flat_color", "foreshortening", "from_above", "from_behind", "from_below", "from_side", "full_body", "glitch", "greyscale", "halftone", "head_only", "heads-up_display", "high_contrast", "horizon", "_inset", "inset", "jaggy_lines", "1koma", "2koma", "3koma", "4koma", "5koma", "leaning", "leaning_forward", "leaning_to_the_side", "left-to-right_manga", "lens_flare", "limited_palette", "lineart", "lineup", "lower_body", "(medium)", "marker_(medium)", "meme", "mixed_media", "monochrome", "multiple_views", "muted_color", "oekaki", "on_side", "out_of_frame", "outline", "painting", "parody", "partially_colored", "partially_underwater_shot", "perspective", "photorealistic", "picture_frame", "pillarboxed", "portrait", "poster_(object)", "product_placement", "profile", "realistic", "recording", "retro_artstyle", "(style)", "_style", "sandwiched", "science_fiction", "sepia", "shikishi", "side-by-side", "sideways", "sideways_glance", "silhouette", "sketch", "spot_color", "still_life", "straight-on", "symmetry", "(texture)", "tachi-e", "taking_picture", "tegaki", "too_many", "traditional_media", "turnaround", "underwater", "upper_body", "upside-down", "upskirt", "variations", "wide_shot", "_design", "symbolism", "rounded_corners", "surrounded", ],
+#季節／季節
+"Season"              : ["akeome", "anniversary", "autumn", "birthday", "christmas", "_day", "festival", "halloween", "kotoyoro", "nengajou", "new_year", "spring_(season)", "summer", "tanabata", "valentine", "winter", ],
+#背景／背景
+"Background"          : ["_background", "backlighting", "bloom", "bokeh", "brick_wall", "bubble", "cable", "caustics", "cityscape", "cloud", "confetti", "constellation", "contrail", "crowd", "crystal", "dark", "debris", "dusk", "dust", "egasumi", "embers", "emphasis_lines", "energy", "evening", "explosion", "fireworks", "fog", "footprints", "glint", "graffiti", "ice", "industrial_pipe", "landscape", "light", "light_particles", "light_rays", "lightning", "lights", "moonlight", "motion_blur", "motion_lines", "mountainous_horizon", "nature", "(planet)", "pagoda", "people", "pillar", "planet", "power_lines", "puddle", "rain", "rainbow", "reflection", "ripples", "rubble", "ruins", "scenery", "shade", "shooting_star", "sidelighting", "smoke", "snowflakes", "snowing", "space", "sparkle", "sparks", "speed_lines", "spider_web", "spotlight", "star_(sky)", "stone_wall", "sunbeam", "sunburst", "sunrise", "_theme", "tile_wall", "twilight", "wall_clock", "wall_of_text", "water", "waves", "wind", "wire", "wooden_wall", "lighthouse", ],
+# パターン／圖案
+"Patterns"            : ["arrow", "bass_clef", "blank_censor", "circle", "cube", "heart", "hexagon", "hexagram", "light_censor", "(pattern)", "pattern", "pentagram", "roman_numeral", "(shape)", "(symbol)", "shape", "sign", "symbol", "tally", "treble_clef", "triangle", "tube", "yagasuri", ],
+#検閲／審查
+"Censorship"          : ["blur_censor", "_censor", "_censoring", "censored", "character_censor", "convenient", "hair_censor", "heart_censor", "identity_censor", "maebari", "novelty_censor", "soap_censor", "steam_censor", "tail_censor", "uncensored", ],
+#その他／其他
+"Others"              : ["2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023", "2024", "artist", "artist_name", "artistic_error", "asian", "(company)", "character_name", "content_rating", "copyright", "cover_page", "dated", "english_text", "japan", "layer", "logo", "name", "numbered", "page_number", "pixiv_id", "ranguage", "reference_sheet", "signature", "speech_bubble", "subtitled", "text", "thank_you", "typo", "username", "wallpaper", "watermark", "web_address", "screwdriver", "translated", ],
+"Quality Tags"              : ["masterpiece", "_quality", "highres", "absurdres", "ultra-detailed", "lowres", ],
+}
+reversed_categories = {value: key for key, values in categories.items() for value in values}
+# Precompute keyword lengths
+keyword_lengths = {keyword: len(keyword) for keyword in reversed_categories}
+# Trie for efficient keyword matching
+class TrieNode:
+    def __init__(self):
+        self.children = {}
+        self.category = None
+def build_trie(keywords):
+    root = TrieNode()
+    for keyword, category in reversed_categories.items():
+        node = root
+        for char in keyword:
+            if char not in node.children:
+                node.children[char] = TrieNode()
+            node = node.children[char]
+        node.category = category
+    return root
+trie_root = build_trie(reversed_categories)
+def find_category(trie_root, tag):
+    node = trie_root
+    for char in tag:
+        if char in node.children:
+            node = node.children[char]
+            if node.category:
+                return node.category
+        else:
+            break
+    return None
+def classify_tags(tags: list[str], local_test: bool = False):
+    # Dictionary for automatic classification
+    classified_tags: defaultdict[str, list] = defaultdict(list)
+    fuzzy_match_tags: defaultdict[str, list] = defaultdict(list)
+    unclassified_tags: list[str] = []
+    # Logic for automatic grouping
+    for tag in tags:
+        classified = False
+        tag_new = tag.replace(" ", "_").replace("-", "_").replace("\\(", "(").replace("\\)", ")")  # Replace spaces in source tags with underscores
+        # Exact match using the trie
+        category = find_category(trie_root, tag_new)
+        if category:
+            classified = True
+        else:
+            # Fuzzy match
+            tag_parts = tag_new.split("_")
+            for keyword, keyword_length in keyword_lengths.items():
+                if keyword in tag_new and keyword_length > 3:  # Adjust the threshold if needed
+                    classified = True
+                    category = reversed_categories[keyword]
+                    break
+        if classified and tag not in classified_tags[category]:  # Avoid duplicates
+            classified_tags[category].append(tag)
+        elif not classified and tag not in unclassified_tags:
+            unclassified_tags.append(tag)  # Unclassified tags
+    if local_test:
+        # Output the grouping result
+        for category, tags in classified_tags.items():
+            print(f"{category}:")
+            print(", ".join(tags))
+            print()
+        print()
+        print("Fuzzy match:")
+        for category, tags in fuzzy_match_tags.items():
+            print(f"{category}:")
+            print(", ".join(tags))
+            print()
+        print()
+        if len(unclassified_tags) > 0:
+            print(f"\nUnclassified tags: {len(unclassified_tags)}")
+            print(f"{unclassified_tags[:200]}")  # Display some unclassified tags
+    return classified_tags, unclassified_tags
+    return classified_tags, unclassified_tags
+# Code for "Tag Categorizer" tab
+def process_tags(input_tags: str):
+    # Clean and split the input tags <- Fix later
+    # tags = [tag.strip().split()[0] for tag in input_tags.split('?') if tag.strip()]
+    # tags = [tag.replace('_', ' ') for tag in tags]
+    tags = [tag.strip() for tag in input_tags.split(',') if tag.strip()]
+    classified_tags, unclassified_tags = classify_tags(tags)
+    categorized_string = ', '.join([tag for category in classified_tags.values() for tag in category])
+    categorized_json = {category: tags for category, tags in classified_tags.items()}
+    return categorized_string, categorized_json
+tags = []
+if __name__ == "__main__":
+    classify_tags (tags, True)
+    process_tags(input_tags)

modules/florence2.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import os
+from transformers import AutoProcessor,AutoModelForCausalLM
+import copy
+from PIL import Image,ImageDraw,ImageFont
+import io,spaces,matplotlib.pyplot as plt,matplotlib.patches as patches,random,numpy as np
+from unittest.mock import patch
+from transformers import AutoModelForCausalLM,AutoProcessor
+from transformers.dynamic_module_utils import get_imports
+def fixed_get_imports(filename:str|os.PathLike)->list[str]:
+	if not str(filename).endswith('/modeling_florence2.py'):return get_imports(filename)
+	imports=get_imports(filename)
+	if'flash_attn'in imports:imports.remove('flash_attn')
+	return imports
+@spaces.GPU
+def get_device_type():
+	import torch
+	if torch.cuda.is_available():return'cuda'
+	elif torch.backends.mps.is_available()and torch.backends.mps.is_built():return'mps'
+	else:return'cpu'
+model_id = 'MiaoshouAI/Florence-2-base-PromptGen-v2.0'
+import subprocess
+device = get_device_type()
+if (device == "cuda"):
+    subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+    model = AutoModelForCausalLM.from_pretrained("MiaoshouAI/Florence-2-base-PromptGen-v2.0", trust_remote_code=True)
+    processor = AutoProcessor.from_pretrained("MiaoshouAI/Florence-2-base-PromptGen-v2.0", trust_remote_code=True)
+    model.to(device)
+else:
+    with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports):
+        model = AutoModelForCausalLM.from_pretrained("MiaoshouAI/Florence-2-base-PromptGen-v2.0", trust_remote_code=True)
+        processor = AutoProcessor.from_pretrained("MiaoshouAI/Florence-2-base-PromptGen-v2.0", trust_remote_code=True)
+        model.to(device)
+colormap=['blue','orange','green','purple','brown','pink','gray','olive','cyan','red','lime','indigo','violet','aqua','magenta','coral','gold','tan','skyblue']
+def fig_to_pil(fig):buf=io.BytesIO();fig.savefig(buf,format='png');buf.seek(0);return Image.open(buf)
+@spaces.GPU
+def run_example(task_prompt,image,text_input=None):
+	if text_input is None:prompt=task_prompt
+	else:prompt=task_prompt+text_input
+	inputs=processor(text=prompt,images=image,return_tensors='pt').to(device);generated_ids=model.generate(input_ids=inputs['input_ids'],pixel_values=inputs['pixel_values'],max_new_tokens=1024,early_stopping=False,do_sample=False,num_beams=3);generated_text=processor.batch_decode(generated_ids,skip_special_tokens=False)[0];parsed_answer=processor.post_process_generation(generated_text,task=task_prompt,image_size=(image.width,image.height));return parsed_answer
+def plot_bbox(image,data):
+	fig,ax=plt.subplots();ax.imshow(image)
+	for(bbox,label)in zip(data['bboxes'],data['labels']):x1,y1,x2,y2=bbox;rect=patches.Rectangle((x1,y1),x2-x1,y2-y1,linewidth=1,edgecolor='r',facecolor='none');ax.add_patch(rect);plt.text(x1,y1,label,color='white',fontsize=8,bbox=dict(facecolor='red',alpha=.5))
+	ax.axis('off');return fig
+def draw_polygons(image,prediction,fill_mask=False):
+	draw=ImageDraw.Draw(image);scale=1
+	for(polygons,label)in zip(prediction['polygons'],prediction['labels']):
+		color=random.choice(colormap);fill_color=random.choice(colormap)if fill_mask else None
+		for _polygon in polygons:
+			_polygon=np.array(_polygon).reshape(-1,2)
+			if len(_polygon)<3:print('Invalid polygon:',_polygon);continue
+			_polygon=(_polygon*scale).reshape(-1).tolist()
+			if fill_mask:draw.polygon(_polygon,outline=color,fill=fill_color)
+			else:draw.polygon(_polygon,outline=color)
+			draw.text((_polygon[0]+8,_polygon[1]+2),label,fill=color)
+	return image
+def draw_ocr_bboxes(image,prediction):
+	scale=1;draw=ImageDraw.Draw(image);bboxes,labels=prediction['quad_boxes'],prediction['labels']
+	for(box,label)in zip(bboxes,labels):color=random.choice(colormap);new_box=(np.array(box)*scale).tolist();draw.polygon(new_box,width=3,outline=color);draw.text((new_box[0]+8,new_box[1]+2),'{}'.format(label),align='right',fill=color)
+	return image
+def convert_to_od_format(data):bboxes=data.get('bboxes',[]);labels=data.get('bboxes_labels',[]);od_results={'bboxes':bboxes,'labels':labels};return od_results
+def process_image(image,task_prompt,text_input=None):
+	if isinstance(image,str):image=Image.open(image)
+	else:image=Image.fromarray(image)
+	if task_prompt=='Caption':task_prompt='<CAPTION>';results=run_example(task_prompt,image);return results[task_prompt],None
+	elif task_prompt=='Detailed Caption':task_prompt='<DETAILED_CAPTION>';results=run_example(task_prompt,image);return results[task_prompt],None
+	elif task_prompt=='More Detailed Caption':task_prompt='<MORE_DETAILED_CAPTION>';results=run_example(task_prompt,image);return results,None
+	elif task_prompt=='Caption + Grounding':task_prompt='<CAPTION>';results=run_example(task_prompt,image);text_input=results[task_prompt];task_prompt='<CAPTION_TO_PHRASE_GROUNDING>';results=run_example(task_prompt,image,text_input);results['<CAPTION>']=text_input;fig=plot_bbox(image,results['<CAPTION_TO_PHRASE_GROUNDING>']);return results,fig_to_pil(fig)
+	elif task_prompt=='Detailed Caption + Grounding':task_prompt='<DETAILED_CAPTION>';results=run_example(task_prompt,image);text_input=results[task_prompt];task_prompt='<CAPTION_TO_PHRASE_GROUNDING>';results=run_example(task_prompt,image,text_input);results['<DETAILED_CAPTION>']=text_input;fig=plot_bbox(image,results['<CAPTION_TO_PHRASE_GROUNDING>']);return results,fig_to_pil(fig)
+	elif task_prompt=='More Detailed Caption + Grounding':task_prompt='<MORE_DETAILED_CAPTION>';results=run_example(task_prompt,image);text_input=results[task_prompt];task_prompt='<CAPTION_TO_PHRASE_GROUNDING>';results=run_example(task_prompt,image,text_input);results['<MORE_DETAILED_CAPTION>']=text_input;fig=plot_bbox(image,results['<CAPTION_TO_PHRASE_GROUNDING>']);return results,fig_to_pil(fig)
+	elif task_prompt=='Object Detection':task_prompt='<OD>';results=run_example(task_prompt,image);fig=plot_bbox(image,results['<OD>']);return results,fig_to_pil(fig)
+	elif task_prompt=='Dense Region Caption':task_prompt='<DENSE_REGION_CAPTION>';results=run_example(task_prompt,image);fig=plot_bbox(image,results['<DENSE_REGION_CAPTION>']);return results,fig_to_pil(fig)
+	elif task_prompt=='Region Proposal':task_prompt='<REGION_PROPOSAL>';results=run_example(task_prompt,image);fig=plot_bbox(image,results['<REGION_PROPOSAL>']);return results,fig_to_pil(fig)
+	elif task_prompt=='Caption to Phrase Grounding':task_prompt='<CAPTION_TO_PHRASE_GROUNDING>';results=run_example(task_prompt,image,text_input);fig=plot_bbox(image,results['<CAPTION_TO_PHRASE_GROUNDING>']);return results,fig_to_pil(fig)
+	elif task_prompt=='Referring Expression Segmentation':task_prompt='<REFERRING_EXPRESSION_SEGMENTATION>';results=run_example(task_prompt,image,text_input);output_image=copy.deepcopy(image);output_image=draw_polygons(output_image,results['<REFERRING_EXPRESSION_SEGMENTATION>'],fill_mask=True);return results,output_image
+	elif task_prompt=='Region to Segmentation':task_prompt='<REGION_TO_SEGMENTATION>';results=run_example(task_prompt,image,text_input);output_image=copy.deepcopy(image);output_image=draw_polygons(output_image,results['<REGION_TO_SEGMENTATION>'],fill_mask=True);return results,output_image
+	elif task_prompt=='Open Vocabulary Detection':task_prompt='<OPEN_VOCABULARY_DETECTION>';results=run_example(task_prompt,image,text_input);bbox_results=convert_to_od_format(results['<OPEN_VOCABULARY_DETECTION>']);fig=plot_bbox(image,bbox_results);return results,fig_to_pil(fig)
+	elif task_prompt=='Region to Category':task_prompt='<REGION_TO_CATEGORY>';results=run_example(task_prompt,image,text_input);return results,None
+	elif task_prompt=='Region to Description':task_prompt='<REGION_TO_DESCRIPTION>';results=run_example(task_prompt,image,text_input);return results,None
+	elif task_prompt=='OCR':task_prompt='<OCR>';results=run_example(task_prompt,image);return results,None
+	elif task_prompt=='OCR with Region':task_prompt='<OCR_WITH_REGION>';results=run_example(task_prompt,image);output_image=copy.deepcopy(image);output_image=draw_ocr_bboxes(output_image,results['<OCR_WITH_REGION>']);return results,output_image
+	else:return'',None # Return empty string and None for unknown task prompts
+single_task_list=['Caption','Detailed Caption','More Detailed Caption','Object Detection','Dense Region Caption','Region Proposal','Caption to Phrase Grounding','Referring Expression Segmentation','Region to Segmentation','Open Vocabulary Detection','Region to Category','Region to Description','OCR','OCR with Region']
+cascaded_task_list=['Caption + Grounding','Detailed Caption + Grounding','More Detailed Caption + Grounding']
+def update_task_dropdown(choice):
+    if choice == 'Cascaded task':
+        return gr.Dropdown(choices=cascaded_task_list, value='Caption + Grounding')
+    else:
+        return gr.Dropdown(choices=single_task_list, value='Caption')
+if __name__ == "__main__":
+    process_image()
+    single_task_list
+    update_task_dropdown()

modules/llama_loader.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import os
+import io,copy,requests,spaces,gradio as gr,numpy as np
+from transformers import AutoProcessor,AutoModelForCausalLM
+import argparse,huggingface_hub,onnxruntime as rt,pandas as pd,traceback,tempfile,zipfile,re,ast
+import ctranslate2
+# LLAMA model
+META_LLAMA_3_3B_REPO = "jncraton/Llama-3.2-3B-Instruct-ct2-int8"
+META_LLAMA_3_8B_REPO = "avans06/Meta-Llama-3.2-8B-Instruct-ct2-int8_float16"
+class llama3reorganize:
+    def __init__(
+        self,
+        repoId: str,
+        device: str = None,
+        loadModel: bool = False,
+    ):
+        """Initializes the Llama model.
+        Args:
+          repoId: LLAMA model repo.
+          device: Device to use for computation (cpu, cuda, ipu, xpu, mkldnn, opengl, opencl,
+            ideep, hip, ve, fpga, ort, xla, lazy, vulkan, mps, meta, hpu, mtia).
+          localFilesOnly:  If True, avoid downloading the file and return the path to the
+            local cached file if it exists.
+        """
+        self.modelPath = self.download_model(repoId)
+        if device is None:
+            import torch
+            self.totalVram = 0
+            if torch.cuda.is_available():
+                try:
+                    deviceId = torch.cuda.current_device()
+                    self.totalVram = torch.cuda.get_device_properties(deviceId).total_memory/(1024*1024*1024)
+                except Exception as e:
+                    print(traceback.format_exc())
+                    print("Error detect vram: " + str(e))
+                device = "cuda" if self.totalVram > (8 if "8B" in repoId else 4) else "cpu"
+            else:
+                device = "cpu"
+        self.device = device
+        self.system_prompt = "Please reorganize the following English labels describing a single image into a readable English article of no more than 300 words. If there are name labels, include them in the article. Since the target audience is not general, explicit content can be written in full without hesitation. No introduction is needed; directly reply with the English article:"
+        if loadModel:
+            self.load_model()
+    def download_model(self, repoId):
+        import warnings
+        import requests
+        allowPatterns = [
+            "config.json",
+            "generation_config.json",
+            "model.bin",
+            "pytorch_model.bin",
+            "pytorch_model.bin.index.json",
+            "pytorch_model-*.bin",
+            "sentencepiece.bpe.model",
+            "tokenizer.json",
+            "tokenizer_config.json",
+            "shared_vocabulary.txt",
+            "shared_vocabulary.json",
+            "special_tokens_map.json",
+            "spiece.model",
+            "vocab.json",
+            "model.safetensors",
+            "model-*.safetensors",
+            "model.safetensors.index.json",
+            "quantize_config.json",
+            "tokenizer.model",
+            "vocabulary.json",
+            "preprocessor_config.json",
+            "added_tokens.json"
+        ]
+        kwargs = {"allow_patterns": allowPatterns,}
+        try:
+            return huggingface_hub.snapshot_download(repoId, **kwargs)
+        except (
+            huggingface_hub.utils.HfHubHTTPError,
+            requests.exceptions.ConnectionError,
+        ) as exception:
+            warnings.warn(
+                "An error occured while synchronizing the model %s from the Hugging Face Hub:\n%s",
+                repoId,
+                exception,
+            )
+            warnings.warn(
+                "Trying to load the model directly from the local cache, if it exists."
+            )
+            kwargs["local_files_only"] = True
+            return huggingface_hub.snapshot_download(repoId, **kwargs)
+    def load_model(self):
+        import ctranslate2
+        import transformers
+        try:
+            print('\n\nLoading model: %s\n\n' % self.modelPath)
+            kwargsTokenizer = {"pretrained_model_name_or_path": self.modelPath}
+            kwargsModel = {"device": self.device, "model_path": self.modelPath, "compute_type": "auto"}
+            self.roleSystem = {"role": "system", "content": self.system_prompt}
+            self.Model = ctranslate2.Generator(**kwargsModel)
+            self.Tokenizer = transformers.AutoTokenizer.from_pretrained(**kwargsTokenizer)
+            self.terminators = [self.Tokenizer.eos_token_id, self.Tokenizer.convert_tokens_to_ids("<|eot_id|>")]
+        except Exception as e:
+            self.release_vram()
+            raise e
+    def release_vram(self):
+        try:
+            import torch
+            if torch.cuda.is_available():
+                if getattr(self, "Model", None) is not None and getattr(self.Model, "unload_model", None) is not None:
+                    self.Model.unload_model()
+                if getattr(self, "Tokenizer", None) is not None:
+                    del self.Tokenizer
+                if getattr(self, "Model", None) is not None:
+                    del self.Model
+                import gc
+                gc.collect()
+                try:
+                    torch.cuda.empty_cache()
+                except Exception as e:
+                    print(traceback.format_exc())
+                    print("\tcuda empty cache, error: " + str(e))
+                print("release vram end.")
+        except Exception as e:
+            print(traceback.format_exc())
+            print("Error release vram: " + str(e))
+    def reorganize(self, text: str, max_length: int = 400):
+        output = None
+        result = None
+        try:
+            input_ids = self.Tokenizer.apply_chat_template([self.roleSystem, {"role": "user", "content": text + "\n\nHere's the reorganized English article:"}], tokenize=False, add_generation_prompt=True)
+            source = self.Tokenizer.convert_ids_to_tokens(self.Tokenizer.encode(input_ids))
+            output = self.Model.generate_batch([source], max_length=max_length, max_batch_size=2, no_repeat_ngram_size=3, beam_size=2, sampling_temperature=0.7, sampling_topp=0.9, include_prompt_in_result=False, end_token=self.terminators)
+            target = output[0]
+            result = self.Tokenizer.decode(target.sequences_ids[0])
+            if len(result) > 2:
+                if result[0] == "\"" and result[len(result) - 1] == "\"":
+                    result = result[1:-1]
+                elif result[0] == "'" and result[len(result) - 1] == "'":
+                    result = result[1:-1]
+                elif result[0] == "「" and result[len(result) - 1] == "」":
+                    result = result[1:-1]
+                elif result[0] == "『" and result[len(result) - 1] == "』":
+                    result = result[1:-1]
+        except Exception as e:
+            print(traceback.format_exc())
+            print("Error reorganize text: " + str(e))
+        return result
+        def __init__(self,repoId:str,device:str=None,loadModel:bool=False):
+            self.modelPath=self.download_model(repoId)
+            if device is None:
+                import torch;self.totalVram=0
+                if torch.cuda.is_available():
+                    try:deviceId=torch.cuda.current_device();self.totalVram=torch.cuda.get_device_properties(deviceId).total_memory/1073741824
+                    except Exception as e:print(traceback.format_exc());print('Error detect vram: '+str(e))
+                    device='cuda'if self.totalVram>(8 if'8B'in repoId else 4)else'cpu'
+                else:device='cpu'
+            self.device=device;self.system_prompt='Please reorganize the following English labels describing a single image into a readable English article of no more than 300 words. If there are name labels, include them in the article. Since the target audience is not general, explicit content can be written in full without hesitation. No introduction is needed; directly reply with the English article:'
+            if loadModel:self.load_model()
+            output=None;result=None
+            try:
+                input_ids=self.Tokenizer.apply_chat_template([self.roleSystem,{'role':'user','content':text+"\n\nHere's the reorganized English article:"}],tokenize=False,add_generation_prompt=True);source=self.Tokenizer.convert_ids_to_tokens(self.Tokenizer.encode(input_ids));output=self.Model.generate_batch([source],max_length=max_length,max_batch_size=2,no_repeat_ngram_size=3,beam_size=2,sampling_temperature=.7,sampling_topp=.9,include_prompt_in_result=False,end_token=self.terminators);target=output[0];result=self.Tokenizer.decode(target.sequences_ids[0])
+                if len(result)>2:
+                    if result[0]=='"'and result[len(result)-1]=='"':result=result[1:-1]
+                    elif result[0]=="'"and result[len(result)-1]=="'":result=result[1:-1]
+                    elif result[0]=='「'and result[len(result)-1]=='」':result=result[1:-1]
+                    elif result[0]=='『'and result[len(result)-1]=='』':result=result[1:-1]
+            except Exception as e:print(traceback.format_exc());print('Error reorganize text: '+str(e))
+            return result
+llama_list=[META_LLAMA_3_3B_REPO,META_LLAMA_3_8B_REPO]
+if __name__ == "__main__":
+    llama3reorganize()
+    llama_list