ALVHB95 commited on
Commit
918e81f
·
1 Parent(s): 865160b

update langchain

Browse files
Files changed (2) hide show
  1. app.py +14 -97
  2. url_list.py +84 -0
app.py CHANGED
@@ -39,6 +39,9 @@ from langchain.memory import ConversationBufferMemory
39
 
40
  from pydantic.v1 import BaseModel, Field
41
 
 
 
 
42
 
43
  """
44
  =========================================================
@@ -54,7 +57,7 @@ class_labels = ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']
54
  def predict_image(input_image):
55
  """
56
  Resize the user-uploaded image and preprocess it so that it can be fed
57
- into the EfficientNetB0 model. The model then returns a dictionary of
58
  class probabilities.
59
  """
60
  # Resize the image (note the target dimensions)
@@ -84,7 +87,6 @@ image_gradio_app = gr.Interface(
84
  theme=theme
85
  )
86
 
87
-
88
  """
89
  =========================================================
90
  3) CHATBOT MODEL SETUP
@@ -94,86 +96,6 @@ image_gradio_app = gr.Interface(
94
  user_agent = UserAgent().random
95
  header_template = {"User-Agent": user_agent}
96
 
97
- # 3.2) List of URLs to load for retrieval
98
- URLS = [
99
- "https://www.epa.gov/recycle/frequent-questions-recycling",
100
- "https://www.whitehorsedc.gov.uk/vale-of-white-horse-district-council/recycling-rubbish-and-waste/lets-get-real-about-recycling/",
101
- "https://www.teimas.com/blog/13-preguntas-y-respuestas-sobre-la-ley-de-residuos-07-2022",
102
- "https://www.molok.com/es/blog/gestion-de-residuos-solidos-urbanos-rsu-10-dudas-comunes",
103
- "https://espanol.epa.gov/espanol/el-reciclaje#valelapena",
104
- "https://espanol.epa.gov/espanol/preguntas-frecuentes-sobre-reciclado-de-plastico-y-elaboracion-de-abono-vegetal",
105
- "https://espanol.epa.gov/espanol/consejo-del-dia-como-reciclo-mis",
106
- "https://espanol.epa.gov/espanol/recursos-para-reciclar-dispositivos-electronicos",
107
- "https://www.epa.gov/recycle/electronics-donation-and-recycling",
108
- "https://reducereutilizarecicla.org/que-es-el-reciclaje/",
109
- "https://reducereutilizarecicla.org/contenedores-de-reciclaje/",
110
- "https://reducereutilizarecicla.org/contenedores-de-reciclaje/contenedor-amarillo/",
111
- "https://reducereutilizarecicla.org/contenedores-de-reciclaje/contenedor-azul/",
112
- "https://reducereutilizarecicla.org/contenedores-de-reciclaje/contenedor-verde/",
113
- "https://reducereutilizarecicla.org/contenedores-de-reciclaje/contenedor-marron-organico/",
114
- "https://reducereutilizarecicla.org/contenedores-de-reciclaje/contenedor-gris-restos/",
115
- "https://reducereutilizarecicla.org/contenedores-de-reciclaje/punto-limpio/",
116
- "https://reducereutilizarecicla.org/donde-tirar-auriculares/",
117
- "https://reducereutilizarecicla.org/donde-tirar-sartenes/",
118
- "https://reducereutilizarecicla.org/donde-tirar-aceite-usado/",
119
- "https://reducereutilizarecicla.org/como-se-reciclan-los-envases-tipo-brik/",
120
- "https://reducereutilizarecicla.org/los-envases-del-verano/",
121
- "https://reducereutilizarecicla.org/donde-tirar-radiografias/",
122
- "https://reducereutilizarecicla.org/envases-ecologicos/",
123
- "https://reducereutilizarecicla.org/donde-tirar-los-restos-de-pintura/",
124
- "https://reducereutilizarecicla.org/valorizacion-de-residuos/",
125
- "https://reducereutilizarecicla.org/como-reciclar-pilas/",
126
- "https://reducereutilizarecicla.org/como-reciclar-capsulas-de-cafe/",
127
- "https://reducereutilizarecicla.org/reciclando-cd/",
128
- "https://reducereutilizarecicla.org/donde-tirar-neumaticos/",
129
- "https://reducereutilizarecicla.org/como-reciclar-una-canasta-de-mimbre/",
130
- "https://reducereutilizarecicla.org/como-funciona-el-contenedor-amarillo/",
131
- "https://reducereutilizarecicla.org/donde-se-tiran-los-vapers/",
132
- "https://reducereutilizarecicla.org/cuanto-tarda-una-bolsa-biodegradable-en-degradarse/",
133
- "https://reducereutilizarecicla.org/donde-se-reciclan-los-juguetes/",
134
- "https://reducereutilizarecicla.org/objetos-que-se-pueden-reutilizar/",
135
- "https://reducereutilizarecicla.org/la-parafina-se-puede-reutilizar/",
136
- "https://reducereutilizarecicla.org/planta-de-reciclaje-de-papel/",
137
- "https://reducereutilizarecicla.org/como-saber-si-un-envase-es-reciclable/",
138
- "https://reducereutilizarecicla.org/reutilizar-vasos-de-vela/",
139
- "https://reducereutilizarecicla.org/bolsas-frio-calor/",
140
- "https://reducereutilizarecicla.org/reciclar-y-reutilizar-materiales-de-construccion/",
141
- "https://reducereutilizarecicla.org/que-es-exactamente-el-pet/",
142
- "https://reducereutilizarecicla.org/tipos-de-reciclaje/",
143
- "https://reducereutilizarecicla.org/que-hacer-con-palets-reciclados/",
144
- "https://reducereutilizarecicla.org/vertederos-controlados/",
145
- "https://reducereutilizarecicla.org/donde-tirar-escombros/",
146
- "https://reducereutilizarecicla.org/como-reciclar-los-residuos-de-ps-poliestireno/",
147
- "https://reducereutilizarecicla.org/tirar-la-basura-sin-bolsas/",
148
- "https://reducereutilizarecicla.org/tirar-el-palo-de-la-fregona/",
149
- "https://reducereutilizarecicla.org/la-mejor-manera-de-reciclar-una-pala-de-padel/",
150
- "https://reducereutilizarecicla.org/sabes-donde-tirar-las-llantas-viejas-de-un-coche/",
151
- "https://reducereutilizarecicla.org/sabes-donde-tirar-el-arbol-de-navidad/",
152
- "https://reducereutilizarecicla.org/clavos-tornillos-herramientas-donde-tirar-hierro/",
153
- "https://reducereutilizarecicla.org/donde-tirar-un-secador-de-pelo-contenedor-o-punto-limpio/",
154
- "https://reducereutilizarecicla.org/donde-tirar-electrodomesticos/",
155
- "https://reducereutilizarecicla.org/donde-puedo-tirar-ramas-de-arboles/",
156
- "https://reducereutilizarecicla.org/donde-tirar-escombros/",
157
- "https://reducereutilizarecicla.org/donde-se-tira-el-muerdago-quemado/",
158
- "https://reducereutilizarecicla.org/sandalias-caucho-reciclado-neumaticos/",
159
- "https://reducereutilizarecicla.org/ideas-para-reciclar-aspas-de-ventilador-de-techo/",
160
- "https://reducereutilizarecicla.org/reciclar-sacos-dormir/",
161
- "https://reducereutilizarecicla.org/reciclar-sillas-playa/",
162
- "https://reducereutilizarecicla.org/donde-tirar-antipolillas/",
163
- "https://reducereutilizarecicla.org/que-hacer-con-los-juguetes-viejos/",
164
- "https://reducereutilizarecicla.org/como-utilizar-las-mascarillas-y-el-gel-hidroalcoholico-en-la-playa/",
165
- "https://reducereutilizarecicla.org/ideas-para-reciclar-un-ventilador-de-pie/",
166
- "https://reducereutilizarecicla.org/donde-tirar-gasoil/",
167
- "https://reducereutilizarecicla.org/donde-puedo-tirar-basura-electronica/",
168
- "https://reducereutilizarecicla.org/donde-tirar-agujas/",
169
- "https://reducereutilizarecicla.org/donde-tirar-residuos-peligrosos/",
170
- "https://reducereutilizarecicla.org/donde-tirar-los-cables/",
171
- "https://reducereutilizarecicla.org/donde-tirar-bicicletas/",
172
- "https://reducereutilizarecicla.org/donde-tirar-maletas/",
173
- "https://reducereutilizarecicla.org/como-reciclar-una-pantalla/",
174
- "https://reducereutilizarecicla.org/donde-tirar-ropa-usada/"
175
- ]
176
-
177
 
178
  @tenacity.retry(
179
  wait=tenacity.wait_fixed(3), # wait 3 seconds between retries
@@ -182,7 +104,7 @@ URLS = [
182
  )
183
  def load_url(url):
184
  """
185
- Use the WebBaseLoader for a single URL.
186
  The function is retried if it fails due to connection issues.
187
  """
188
  loader = WebBaseLoader(
@@ -194,7 +116,7 @@ def load_url(url):
194
 
195
  def safe_load_all_urls(urls):
196
  """
197
- Safely load documents from a list of URLs.
198
  Any URL that fails after the specified number of retries is skipped.
199
  """
200
  all_docs = []
@@ -207,11 +129,10 @@ def safe_load_all_urls(urls):
207
  print(f"Skipping URL due to error: {link}\nError: {e}\n")
208
  return all_docs
209
 
210
-
211
- # 3.3) Actually load the data from all URLs
212
  all_loaded_docs = safe_load_all_urls(URLS)
213
 
214
- # 3.4) Split the documents into manageable chunks
215
  text_splitter = RecursiveCharacterTextSplitter(
216
  chunk_size=1024,
217
  chunk_overlap=150,
@@ -219,27 +140,26 @@ text_splitter = RecursiveCharacterTextSplitter(
219
  )
220
  docs = text_splitter.split_documents(all_loaded_docs)
221
 
222
- # 3.5) Create embeddings
223
  embeddings = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
224
 
225
- # 3.6) Create a persistent directory to store vector DB
226
  persist_directory = 'docs/chroma/'
227
  shutil.rmtree(persist_directory, ignore_errors=True) # remove old DB files
228
 
229
- # 3.7) Build Chroma vector store
230
  vectordb = Chroma.from_documents(
231
  documents=docs,
232
  embedding=embeddings,
233
  persist_directory=persist_directory
234
  )
235
 
236
- # 3.8) Create a retriever
237
  retriever = vectordb.as_retriever(
238
  search_kwargs={"k": 2},
239
  search_type="mmr"
240
  )
241
 
242
-
243
  """
244
  =========================================================
245
  4) PROMPT & CHAIN SETUP
@@ -254,8 +174,8 @@ parser = PydanticOutputParser(pydantic_object=FinalAnswer)
254
 
255
  # 4.2) Prompt template: system instructions
256
  template = """
257
- Your name is Greta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
258
- Has sido diseñado y creado por el Grupo 1 del Máster en Data Science & Big Data de la promoción 2023/2024 de la Universidad Complutense de Madrid. Este grupo está fromado por Rocío, María Guillermo, Alejandra, Paloma y Álvaro /
259
  Use the following pieces of context to answer the question /
260
  If the question is English answer in English /
261
  If the question is Spanish answer in Spanish /
@@ -304,7 +224,6 @@ qa_chain = ConversationalRetrievalChain.from_llm(
304
  output_key='output'
305
  )
306
 
307
-
308
  def chat_interface(question, history):
309
  """
310
  This function processes the user's question through the qa_chain,
@@ -330,7 +249,6 @@ chatbot_gradio_app = gr.ChatInterface(
330
  title="<span style='color: rgb(243, 239, 224);'>Green Greta</span>"
331
  )
332
 
333
-
334
  """
335
  =========================================================
336
  5) BANNER / WELCOME TAB
@@ -359,7 +277,6 @@ banner_tab_content = """
359
  """
360
  banner_tab = gr.Markdown(banner_tab_content)
361
 
362
-
363
  """
364
  =========================================================
365
  6) GRADIO FINAL APP: TABS
 
39
 
40
  from pydantic.v1 import BaseModel, Field
41
 
42
+ # Import the separate file that contains our list of URLs
43
+ from url_list import URLS
44
+
45
 
46
  """
47
  =========================================================
 
57
  def predict_image(input_image):
58
  """
59
  Resize the user-uploaded image and preprocess it so that it can be fed
60
+ into the EfficientNetB0 model. The model then returns a dictionary of
61
  class probabilities.
62
  """
63
  # Resize the image (note the target dimensions)
 
87
  theme=theme
88
  )
89
 
 
90
  """
91
  =========================================================
92
  3) CHATBOT MODEL SETUP
 
96
  user_agent = UserAgent().random
97
  header_template = {"User-Agent": user_agent}
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  @tenacity.retry(
101
  wait=tenacity.wait_fixed(3), # wait 3 seconds between retries
 
104
  )
105
  def load_url(url):
106
  """
107
+ Use the WebBaseLoader for a single URL.
108
  The function is retried if it fails due to connection issues.
109
  """
110
  loader = WebBaseLoader(
 
116
 
117
  def safe_load_all_urls(urls):
118
  """
119
+ Safely load documents from a list of URLs.
120
  Any URL that fails after the specified number of retries is skipped.
121
  """
122
  all_docs = []
 
129
  print(f"Skipping URL due to error: {link}\nError: {e}\n")
130
  return all_docs
131
 
132
+ # 3.2) Actually load the data from all URLs (imported from url_list.py)
 
133
  all_loaded_docs = safe_load_all_urls(URLS)
134
 
135
+ # 3.3) Split the documents into manageable chunks
136
  text_splitter = RecursiveCharacterTextSplitter(
137
  chunk_size=1024,
138
  chunk_overlap=150,
 
140
  )
141
  docs = text_splitter.split_documents(all_loaded_docs)
142
 
143
+ # 3.4) Create embeddings
144
  embeddings = HuggingFaceEmbeddings(model_name='thenlper/gte-small')
145
 
146
+ # 3.5) Create a persistent directory to store vector DB
147
  persist_directory = 'docs/chroma/'
148
  shutil.rmtree(persist_directory, ignore_errors=True) # remove old DB files
149
 
150
+ # 3.6) Build Chroma vector store
151
  vectordb = Chroma.from_documents(
152
  documents=docs,
153
  embedding=embeddings,
154
  persist_directory=persist_directory
155
  )
156
 
157
+ # 3.7) Create a retriever
158
  retriever = vectordb.as_retriever(
159
  search_kwargs={"k": 2},
160
  search_type="mmr"
161
  )
162
 
 
163
  """
164
  =========================================================
165
  4) PROMPT & CHAIN SETUP
 
174
 
175
  # 4.2) Prompt template: system instructions
176
  template = """
177
+ Your name is Greta and you are a recycling chatbot with the objective to answer questions from user in English or Spanish /
178
+ Has sido diseñado y creado por el Grupo 1 del Máster en Data Science & Big Data de la promoción 2023/2024 de la Universidad Complutense de Madrid. Este grupo está formado por Rocío, María Guillermo, Alejandra, Paloma y Álvaro /
179
  Use the following pieces of context to answer the question /
180
  If the question is English answer in English /
181
  If the question is Spanish answer in Spanish /
 
224
  output_key='output'
225
  )
226
 
 
227
  def chat_interface(question, history):
228
  """
229
  This function processes the user's question through the qa_chain,
 
249
  title="<span style='color: rgb(243, 239, 224);'>Green Greta</span>"
250
  )
251
 
 
252
  """
253
  =========================================================
254
  5) BANNER / WELCOME TAB
 
277
  """
278
  banner_tab = gr.Markdown(banner_tab_content)
279
 
 
280
  """
281
  =========================================================
282
  6) GRADIO FINAL APP: TABS
url_list.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # url_list.py
2
+
3
+ """
4
+ This file contains the list of URLs to be loaded by the main app.py file.
5
+ """
6
+
7
+ URLS = [
8
+ "https://www.epa.gov/recycle/frequent-questions-recycling",
9
+ "https://www.whitehorsedc.gov.uk/vale-of-white-horse-district-council/recycling-rubbish-and-waste/lets-get-real-about-recycling/",
10
+ "https://www.teimas.com/blog/13-preguntas-y-respuestas-sobre-la-ley-de-residuos-07-2022",
11
+ "https://www.molok.com/es/blog/gestion-de-residuos-solidos-urbanos-rsu-10-dudas-comunes",
12
+ "https://espanol.epa.gov/espanol/el-reciclaje#valelapena",
13
+ "https://espanol.epa.gov/espanol/preguntas-frecuentes-sobre-reciclado-de-plastico-y-elaboracion-de-abono-vegetal",
14
+ "https://espanol.epa.gov/espanol/consejo-del-dia-como-reciclo-mis",
15
+ "https://espanol.epa.gov/espanol/recursos-para-reciclar-dispositivos-electronicos",
16
+ "https://www.epa.gov/recycle/electronics-donation-and-recycling",
17
+ "https://reducereutilizarecicla.org/que-es-el-reciclaje/",
18
+ "https://reducereutilizarecicla.org/contenedores-de-reciclaje/",
19
+ "https://reducereutilizarecicla.org/contenedores-de-reciclaje/contenedor-amarillo/",
20
+ "https://reducereutilizarecicla.org/contenedores-de-reciclaje/contenedor-azul/",
21
+ "https://reducereutilizarecicla.org/contenedores-de-reciclaje/contenedor-verde/",
22
+ "https://reducereutilizarecicla.org/contenedores-de-reciclaje/contenedor-marron-organico/",
23
+ "https://reducereutilizarecicla.org/contenedores-de-reciclaje/contenedor-gris-restos/",
24
+ "https://reducereutilizarecicla.org/contenedores-de-reciclaje/punto-limpio/",
25
+ "https://reducereutilizarecicla.org/donde-tirar-auriculares/",
26
+ "https://reducereutilizarecicla.org/donde-tirar-sartenes/",
27
+ "https://reducereutilizarecicla.org/donde-tirar-aceite-usado/",
28
+ "https://reducereutilizarecicla.org/como-se-reciclan-los-envases-tipo-brik/",
29
+ "https://reducereutilizarecicla.org/los-envases-del-verano/",
30
+ "https://reducereutilizarecicla.org/donde-tirar-radiografias/",
31
+ "https://reducereutilizarecicla.org/envases-ecologicos/",
32
+ "https://reducereutilizarecicla.org/donde-tirar-los-restos-de-pintura/",
33
+ "https://reducereutilizarecicla.org/valorizacion-de-residuos/",
34
+ "https://reducereutilizarecicla.org/como-reciclar-pilas/",
35
+ "https://reducereutilizarecicla.org/como-reciclar-capsulas-de-cafe/",
36
+ "https://reducereutilizarecicla.org/reciclando-cd/",
37
+ "https://reducereutilizarecicla.org/donde-tirar-neumaticos/",
38
+ "https://reducereutilizarecicla.org/como-reciclar-una-canasta-de-mimbre/",
39
+ "https://reducereutilizarecicla.org/como-funciona-el-contenedor-amarillo/",
40
+ "https://reducereutilizarecicla.org/donde-se-tiran-los-vapers/",
41
+ "https://reducereutilizarecicla.org/cuanto-tarda-una-bolsa-biodegradable-en-degradarse/",
42
+ "https://reducereutilizarecicla.org/donde-se-reciclan-los-juguetes/",
43
+ "https://reducereutilizarecicla.org/objetos-que-se-pueden-reutilizar/",
44
+ "https://reducereutilizarecicla.org/la-parafina-se-puede-reutilizar/",
45
+ "https://reducereutilizarecicla.org/planta-de-reciclaje-de-papel/",
46
+ "https://reducereutilizarecicla.org/como-saber-si-un-envase-es-reciclable/",
47
+ "https://reducereutilizarecicla.org/reutilizar-vasos-de-vela/",
48
+ "https://reducereutilizarecicla.org/bolsas-frio-calor/",
49
+ "https://reducereutilizarecicla.org/reciclar-y-reutilizar-materiales-de-construccion/",
50
+ "https://reducereutilizarecicla.org/que-es-exactamente-el-pet/",
51
+ "https://reducereutilizarecicla.org/tipos-de-reciclaje/",
52
+ "https://reducereutilizarecicla.org/que-hacer-con-palets-reciclados/",
53
+ "https://reducereutilizarecicla.org/vertederos-controlados/",
54
+ "https://reducereutilizarecicla.org/donde-tirar-escombros/",
55
+ "https://reducereutilizarecicla.org/como-reciclar-los-residuos-de-ps-poliestireno/",
56
+ "https://reducereutilizarecicla.org/tirar-la-basura-sin-bolsas/",
57
+ "https://reducereutilizarecicla.org/tirar-el-palo-de-la-fregona/",
58
+ "https://reducereutilizarecicla.org/la-mejor-manera-de-reciclar-una-pala-de-padel/",
59
+ "https://reducereutilizarecicla.org/sabes-donde-tirar-las-llantas-viejas-de-un-coche/",
60
+ "https://reducereutilizarecicla.org/sabes-donde-tirar-el-arbol-de-navidad/",
61
+ "https://reducereutilizarecicla.org/clavos-tornillos-herramientas-donde-tirar-hierro/",
62
+ "https://reducereutilizarecicla.org/donde-tirar-un-secador-de-pelo-contenedor-o-punto-limpio/",
63
+ "https://reducereutilizarecicla.org/donde-tirar-electrodomesticos/",
64
+ "https://reducereutilizarecicla.org/donde-puedo-tirar-ramas-de-arboles/",
65
+ "https://reducereutilizarecicla.org/donde-tirar-escombros/",
66
+ "https://reducereutilizarecicla.org/donde-se-tira-el-muerdago-quemado/",
67
+ "https://reducereutilizarecicla.org/sandalias-caucho-reciclado-neumaticos/",
68
+ "https://reducereutilizarecicla.org/ideas-para-reciclar-aspas-de-ventilador-de-techo/",
69
+ "https://reducereutilizarecicla.org/reciclar-sacos-dormir/",
70
+ "https://reducereutilizarecicla.org/reciclar-sillas-playa/",
71
+ "https://reducereutilizarecicla.org/donde-tirar-antipolillas/",
72
+ "https://reducereutilizarecicla.org/que-hacer-con-los-juguetes-viejos/",
73
+ "https://reducereutilizarecicla.org/como-utilizar-las-mascarillas-y-el-gel-hidroalcoholico-en-la-playa/",
74
+ "https://reducereutilizarecicla.org/ideas-para-reciclar-un-ventilador-de-pie/",
75
+ "https://reducereutilizarecicla.org/donde-tirar-gasoil/",
76
+ "https://reducereutilizarecicla.org/donde-puedo-tirar-basura-electronica/",
77
+ "https://reducereutilizarecicla.org/donde-tirar-agujas/",
78
+ "https://reducereutilizarecicla.org/donde-tirar-residuos-peligrosos/",
79
+ "https://reducereutilizarecicla.org/donde-tirar-los-cables/",
80
+ "https://reducereutilizarecicla.org/donde-tirar-bicicletas/",
81
+ "https://reducereutilizarecicla.org/donde-tirar-maletas/",
82
+ "https://reducereutilizarecicla.org/como-reciclar-una-pantalla/",
83
+ "https://reducereutilizarecicla.org/donde-tirar-ropa-usada/"
84
+ ]