google-labs-jules[bot] commited on
Commit
c359015
ยท
1 Parent(s): 35d2b81

Fix: Replace failing embedding model with ko-sroberta-multitask

Browse files

The previous `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2`
model was causing errors in the Streamlit community server environment.

This commit replaces it with `jhgan/ko-sroberta-multitask`, a model
optimized for Korean language tasks.

Additionally, the model loading logic in `app.py` has been corrected:
- The global `EMBEDDING_MODEL` variable was renamed to `EMBEDDING_MODEL_ID`
and now stores the model identifier string.
- The `load_embedding_model` function now correctly uses this ID to
initialize the `SentenceTransformer` instance. This ensures that
`st.cache_resource` caches the model loading process effectively and
avoids potential issues with re-instantiating an already loaded model.

No changes to `requirements.txt` were necessary as the new model is
compatible with the existing `sentence-transformers==2.2.2` version.

Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -84,7 +84,7 @@ def generate_youtube_embed_html(youtube_url: str, timestamp: str) -> str | None:
84
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY","pcsk_PZHLK_TRAvMCyNmJM4FKGCX7rbbY22a58fhnWYasx1mf3WL6sRasoASZXfsbnJYvCQ13w") # Load from environment variable
85
  PINECONE_ENV = os.getenv("PINECONE_ENV", "us-east-1")
86
  INDEX_NAME = "video-embeddings"
87
- EMBEDDING_MODEL = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
88
 
89
  # OpenAI ์„ค์ •
90
  OPENAI_API_KEY = "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"
@@ -109,8 +109,8 @@ def init_pinecone():
109
  def load_embedding_model():
110
  """Sentence Transformer ๋ชจ๋ธ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค."""
111
  try:
112
- model = SentenceTransformer(EMBEDDING_MODEL)
113
- logger.info(f"Successfully loaded embedding model: {EMBEDDING_MODEL}")
114
  return model
115
  except Exception as e:
116
  st.error(f"์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
 
84
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY","pcsk_PZHLK_TRAvMCyNmJM4FKGCX7rbbY22a58fhnWYasx1mf3WL6sRasoASZXfsbnJYvCQ13w") # Load from environment variable
85
  PINECONE_ENV = os.getenv("PINECONE_ENV", "us-east-1")
86
  INDEX_NAME = "video-embeddings"
87
+ EMBEDDING_MODEL_ID = 'jhgan/ko-sroberta-multitask' # ๋ชจ๋ธ ID๋กœ ๋ณ€๊ฒฝ
88
 
89
  # OpenAI ์„ค์ •
90
  OPENAI_API_KEY = "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"
 
109
  def load_embedding_model():
110
  """Sentence Transformer ๋ชจ๋ธ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค."""
111
  try:
112
+ model = SentenceTransformer(EMBEDDING_MODEL_ID) # ์ˆ˜์ •๋œ ๋ชจ๋ธ ID ์‚ฌ์šฉ
113
+ logger.info(f"Successfully loaded embedding model: {EMBEDDING_MODEL_ID}") # ์ˆ˜์ •๋œ ๋ชจ๋ธ ID ์‚ฌ์šฉ
114
  return model
115
  except Exception as e:
116
  st.error(f"์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")