#!/usr/bin/env python3 """ 简洁版BERT+FAISS标语数据库 输入:产品/业务描述 输出:匹配的广告标语 """ import numpy as np import faiss import json from sentence_transformers import SentenceTransformer from datasets import Dataset import pandas as pd class SloganDatabase: def __init__(self): self.encoder = SentenceTransformer('all-MiniLM-L6-v2') self.index = None self.slogans = [] def create_dataset(self): """创建标语数据集 - 珠宝首饰奢侈品领域""" # 示例数据:[品牌, 类别, 描述, 标语] data = [ # 顶级珠宝品牌 ["Tiffany & Co.", "jewelry", "luxury diamond jewelry and engagement rings", "A Diamond is Forever"], ["Cartier", "luxury_jewelry", "high-end jewelry watches and accessories", "L'art de vivre"], ["Van Cleef & Arpels", "jewelry", "French luxury jewelry and watches", "Poetry of Time"], ["Harry Winston", "jewelry", "rare diamonds and luxury jewelry", "Rare Jewels of the World"], ["Bulgari", "jewelry", "Italian luxury jewelry and watches", "Italian Excellence"], ["Chopard", "jewelry", "Swiss luxury jewelry and watches", "Happy Diamonds"], ["Graff", "jewelry", "exceptional diamonds and jewelry", "The Most Fabulous Jewels in the World"], ["Piaget", "jewelry", "Swiss luxury watches and jewelry", "Possession"], ["Boucheron", "jewelry", "French high jewelry and luxury watches", "Le Joaillier Depuis 1858"], ["Mikimoto", "jewelry", "cultured pearl jewelry", "The Originator of Cultured Pearls"], # 奢侈品牌 ["Louis Vuitton", "luxury_fashion", "luxury leather goods and fashion", "The Art of Travel"], ["Hermès", "luxury_fashion", "French luxury goods and accessories", "Luxury in the making"], ["Chanel", "luxury_fashion", "haute couture and luxury fashion", "Inside every woman there is a flower and a cat"], ["Gucci", "luxury_fashion", "Italian luxury fashion and accessories", "Quality is remembered long after price is forgotten"], ["Prada", "luxury_fashion", "Italian luxury fashion house", "Prada"], ["Dior", "luxury_fashion", "French luxury fashion and beauty", "Miss Dior"], ["Versace", "luxury_fashion", "Italian luxury fashion design", "Virtus"], ["Saint Laurent", "luxury_fashion", "French luxury fashion house", "Saint Laurent Paris"], ["Balenciaga", "luxury_fashion", "Spanish luxury fashion house", "Balenciaga"], ["Bottega Veneta", "luxury_fashion", "Italian luxury leather goods", "When your own initials are enough"], # 腕表品牌 ["Rolex", "luxury_watches", "Swiss luxury watches and timepieces", "Perpetual, Spirit of Excellence"], ["Patek Philippe", "luxury_watches", "Swiss luxury watch manufacturer", "You never actually own a Patek Philippe"], ["Audemars Piguet", "luxury_watches", "Swiss luxury watch brand", "To break the rules, you must first master them"], ["Omega", "luxury_watches", "Swiss luxury watch manufacturer", "Precision"], ["TAG Heuer", "luxury_watches", "Swiss luxury watches", "Don't crack under pressure"], ["Breitling", "luxury_watches", "Swiss luxury watchmaker", "Instruments for Professionals"], ["IWC", "luxury_watches", "Swiss luxury watch company", "Engineered for men"], ["Jaeger-LeCoultre", "luxury_watches", "Swiss luxury watch manufacturer", "The World's Most Complicated Watches"], ["Vacheron Constantin", "luxury_watches", "Swiss luxury watch manufacturer", "One of Not Many"], ["A. Lange & Söhne", "luxury_watches", "German luxury watch manufacturer", "When nothing else will do"], # 时尚首饰 ["Pandora", "fashion_jewelry", "Danish jewelry brand charm bracelets", "Be Love"], ["Swarovski", "fashion_jewelry", "Austrian crystal jewelry and accessories", "Unleash Your Light"], ["Daniel Wellington", "fashion_watches", "Swedish watch brand minimalist design", "Live the moment"], ["Alex and Ani", "fashion_jewelry", "American jewelry brand spiritual bracelets", "Positive Energy"], ["Kendra Scott", "fashion_jewelry", "American jewelry designer colorful stones", "Live colorfully"], ["Monica Vinader", "fashion_jewelry", "British jewelry brand contemporary design", "Everyday luxury"], ["Mejuri", "fashion_jewelry", "Canadian jewelry brand everyday luxury", "Everyday fine"], ["Gorjana", "fashion_jewelry", "California jewelry brand layered necklaces", "Live your layer"], ["Kate Spade", "fashion_jewelry", "American fashion accessories jewelry", "Live colorfully"], ["Marc Jacobs", "fashion_jewelry", "American fashion designer accessories", "Marc Jacobs"], # 珠宝定制 ["Blue Nile", "diamond_jewelry", "online diamond jewelry retailer", "Extraordinary diamonds for extraordinary moments"], ["James Allen", "diamond_jewelry", "online engagement ring retailer", "See it. Love it. Own it."], ["Brilliant Earth", "diamond_jewelry", "ethical diamond jewelry", "Brilliant Earth"], ["With Clarity", "diamond_jewelry", "lab-grown diamond jewelry", "Diamonds. Redefined."], ["Clean Origin", "diamond_jewelry", "lab-created diamond jewelry", "Grown with love"], ["Ritani", "diamond_jewelry", "engagement rings and wedding bands", "Love is in the details"], ["Vrai", "diamond_jewelry", "lab-grown diamond jewelry", "Created, not mined"], ["Catbird", "jewelry", "Brooklyn-based jewelry designer", "Made in Brooklyn"], ["Wwake", "jewelry", "contemporary fine jewelry designer", "Wwake"], ["Jacquie Aiche", "jewelry", "California jewelry designer bohemian luxury", "Jacquie Aiche"], # 中国珠宝品牌 ["周大福", "jewelry", "香港珠宝品牌黄金钻石", "心意足金"], ["周生生", "jewelry", "香港珠宝品牌传统工艺", "传承经典"], ["老凤祥", "jewelry", "中国传统珠宝品牌黄金首饰", "老凤祥,真金不怕火炼"], ["六福珠宝", "jewelry", "香港珠宝品牌时尚设计", "六福临门"], ["潘多拉", "jewelry", "丹麦珠宝品牌串珠手链", "表达你的故事"], ["周大生", "jewelry", "中国珠宝品牌钻石首饰", "爱就在一起"], ["金伯利", "jewelry", "中国钻石珠宝品牌", "只为更好的你"], ["戴比尔斯", "diamond_jewelry", "钻石开采珠宝品牌", "钻石恒久远,一颗永流传"], ["施华洛世奇", "crystal_jewelry", "奥地利水晶珠宝品牌", "释放你的光芒"], ["谢瑞麟", "jewelry", "香港珠宝设计师品牌", "艺术珠宝"], # 奢侈品配饰 ["Goyard", "luxury_accessories", "French luxury leather goods", "Goyard"], ["Moynat", "luxury_accessories", "French luxury leather goods", "Moynat"], ["Berluti", "luxury_accessories", "French luxury leather goods", "Berluti"], ["Valextra", "luxury_accessories", "Italian luxury leather goods", "Milanese excellence since 1937"], ["Loewe", "luxury_accessories", "Spanish luxury leather goods", "Craft"], ["Brunello Cucinelli", "luxury_fashion", "Italian luxury fashion cashmere", "Humanistic Enterprise"], ["Loro Piana", "luxury_fashion", "Italian luxury textile and clothing", "Excellence in natural fibers"], ["Kiton", "luxury_fashion", "Italian luxury menswear", "The most beautiful thing made by man"], ["Zegna", "luxury_fashion", "Italian luxury menswear", "What makes a man"], ["Brioni", "luxury_fashion", "Italian luxury menswear", "Roman style"], # 新兴奢侈品牌 ["Jacquemus", "luxury_fashion", "French luxury fashion house", "La Montagne"], ["Ganni", "luxury_fashion", "Danish fashion brand", "Ganni"], ["Staud", "luxury_fashion", "American fashion brand", "Staud"], ["Cult Gaia", "luxury_accessories", "American accessories brand", "Cult Gaia"], ["Rosantica", "jewelry", "Italian jewelry brand", "Rosantica"], ["Alighieri", "jewelry", "British jewelry brand", "The Inferno"], ["Lizzie Fortunato", "jewelry", "American jewelry brand", "Lizzie Fortunato"], ["Aurate", "jewelry", "American jewelry brand", "Accessible luxury"], ["AUrate New York", "jewelry", "New York jewelry brand", "Radically responsible luxury"], ["Missoma", "jewelry", "British jewelry brand", "Missoma"] ] # 转换为DataFrame df = pd.DataFrame(data, columns=['brand', 'category', 'description', 'slogan']) # 创建搜索文本(组合描述信息) df['search_text'] = df['brand'] + ' ' + df['category'] + ' ' + df['description'] return df.to_dict('records') def build_index(self, data): """构建FAISS索引""" print("🔨 Building FAISS index...") # 提取搜索文本 texts = [item['search_text'] for item in data] # 生成embeddings embeddings = self.encoder.encode(texts, show_progress_bar=True) # 构建索引 self.index = faiss.IndexFlatIP(384) # 使用内积相似度 self.index.add(embeddings.astype('float32')) # 保存数据 self.slogans = data print(f"✅ Index built with {len(data)} slogans") def search(self, query, k=5): """搜索相似标语""" if not self.index: raise ValueError("Index not built yet!") # 编码查询 query_embedding = self.encoder.encode([query]) # 搜索 scores, indices = self.index.search(query_embedding.astype('float32'), k) # 返回结果 results = [] for score, idx in zip(scores[0], indices[0]): if idx < len(self.slogans): result = self.slogans[idx].copy() result['similarity_score'] = float(score) results.append(result) return results def save(self, path="slogan_db"): """保存数据库""" # 保存FAISS索引 faiss.write_index(self.index, f"{path}.faiss") # 保存标语数据 with open(f"{path}.json", 'w', encoding='utf-8') as f: json.dump(self.slogans, f, ensure_ascii=False, indent=2) print(f"💾 Database saved to {path}") def load(self, path="slogan_db"): """加载数据库""" try: # 加载FAISS索引 self.index = faiss.read_index(f"{path}.faiss") # 加载标语数据 with open(f"{path}.json", 'r', encoding='utf-8') as f: self.slogans = json.load(f) print(f"📂 Database loaded from {path}") return True except: print(f"❌ Failed to load database from {path}") return False def main(): """主函数""" print("🚀 Creating Slogan Database...") # 初始化 db = SloganDatabase() # 尝试加载现有数据库 if not db.load(): print("📊 Creating new database...") # 创建数据集 data = db.create_dataset() # 构建索引 db.build_index(data) # 保存数据库 db.save() # 测试搜索 test_queries = [ "钻石订婚戒指", "奢侈品手袋", "瑞士手表品牌", "珍珠首饰", "黄金项链", "时尚耳环", "luxury jewelry brand", "designer handbag", "crystal accessories", "wedding rings" ] print("\n🔍 Testing searches...") for query in test_queries: print(f"\n查询: {query}") print("-" * 40) results = db.search(query, k=3) for i, result in enumerate(results, 1): print(f"{i}. {result['brand']} ({result['category']})") print(f" 描述: {result['description']}") print(f" 标语: {result['slogan']}") print(f" 相似度: {result['similarity_score']:.3f}") print() if __name__ == "__main__": main()