Remove some old codes
Browse files- LISA_mini.ipynb +19 -108
- app.py +125 -127
- documents.py +12 -21
- llms.py +7 -0
- models.py +5 -263
LISA_mini.ipynb
CHANGED
@@ -12,10 +12,9 @@
|
|
12 |
"\n",
|
13 |
"from dotenv import load_dotenv\n",
|
14 |
"from langchain.document_loaders import PyPDFLoader\n",
|
15 |
-
"
|
16 |
-
"from langchain.
|
17 |
-
"from langchain.
|
18 |
-
"from langchain.vectorstores import FAISS, Chroma\n",
|
19 |
"from langchain.chains import ConversationalRetrievalChain\n",
|
20 |
"from langchain.llms import HuggingFaceTextGenInference\n",
|
21 |
"from langchain.chains.conversation.memory import (\n",
|
@@ -38,23 +37,10 @@
|
|
38 |
},
|
39 |
{
|
40 |
"cell_type": "code",
|
41 |
-
"execution_count":
|
42 |
"id": "ffd3db32",
|
43 |
"metadata": {},
|
44 |
-
"outputs": [
|
45 |
-
{
|
46 |
-
"name": "stderr",
|
47 |
-
"output_type": "stream",
|
48 |
-
"text": [
|
49 |
-
"/mnt/data2/yinghanz/codes/machine_learning_projects/llm/venv/hftest/lib/python3.10/site-packages/pydantic/_internal/_fields.py:151: UserWarning: Field \"model_id\" has conflict with protected namespace \"model_\".\n",
|
50 |
-
"\n",
|
51 |
-
"You may be able to resolve this warning by setting `model_config['protected_namespaces'] = ()`.\n",
|
52 |
-
" warnings.warn(\n",
|
53 |
-
"/mnt/data2/yinghanz/codes/machine_learning_projects/llm/venv/hftest/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
54 |
-
" from .autonotebook import tqdm as notebook_tqdm\n"
|
55 |
-
]
|
56 |
-
}
|
57 |
-
],
|
58 |
"source": [
|
59 |
"# Set inference link, use this online one for easier reproduce\n",
|
60 |
"inference_api_url = 'https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta'\n",
|
@@ -89,6 +75,8 @@
|
|
89 |
"source": [
|
90 |
"# Function for reading and chunking text\n",
|
91 |
"def load_pdf_as_docs(pdf_path, loader_module=None):\n",
|
|
|
|
|
92 |
" if pdf_path.endswith('.pdf'): # single file\n",
|
93 |
" pdf_docs = [pdf_path]\n",
|
94 |
" else: # a directory\n",
|
@@ -125,8 +113,7 @@
|
|
125 |
"outputs": [],
|
126 |
"source": [
|
127 |
"# Specify the directory containing your PDFs\n",
|
128 |
-
"
|
129 |
-
"directory = \"FestbattLiterature\" # change to your pdf dictory\n",
|
130 |
"\n",
|
131 |
"# Find and parse all PDFs in the directory\n",
|
132 |
"pdf_docs = load_pdf_as_docs(directory, PyPDFLoader)\n",
|
@@ -136,26 +123,17 @@
|
|
136 |
},
|
137 |
{
|
138 |
"cell_type": "code",
|
139 |
-
"execution_count":
|
140 |
"id": "7bf62c76",
|
141 |
"metadata": {},
|
142 |
-
"outputs": [
|
143 |
-
{
|
144 |
-
"name": "stderr",
|
145 |
-
"output_type": "stream",
|
146 |
-
"text": [
|
147 |
-
"/mnt/data2/yinghanz/codes/machine_learning_projects/llm/venv/hftest/lib/python3.10/site-packages/torch/cuda/__init__.py:141: UserWarning: CUDA initialization: The NVIDIA driver on your system is too old (found version 11040). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver. (Triggered internally at ../c10/cuda/CUDAFunctions.cpp:108.)\n",
|
148 |
-
" return torch._C._cuda_getDeviceCount() > 0\n"
|
149 |
-
]
|
150 |
-
}
|
151 |
-
],
|
152 |
"source": [
|
153 |
"# Set embedding\n",
|
154 |
"embeddings = HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5') # choose the one you like\n",
|
155 |
"\n",
|
156 |
"# Set vectorstore, e.g. FAISS\n",
|
157 |
"texts = [\"LISA - Lithium Ion Solid-state Assistant\"]\n",
|
158 |
-
"vectorstore = FAISS.from_texts(texts, embeddings) # this is a workaround as FAISS cannot be
|
159 |
"# You may also use Chroma\n",
|
160 |
"# vectorstore = Chroma(embedding_function=embeddings)"
|
161 |
]
|
@@ -186,8 +164,8 @@
|
|
186 |
"# For local storage, ref: https://stackoverflow.com/questions/77385587/persist-parentdocumentretriever-of-langchain\n",
|
187 |
"store = InMemoryStore()\n",
|
188 |
"\n",
|
189 |
-
"parent_splitter = RecursiveCharacterTextSplitter(separators=[\"\\n\\n\", \"\\n\"], chunk_size=512, chunk_overlap=
|
190 |
-
"child_splitter = RecursiveCharacterTextSplitter(separators=[\"\\n\\n\", \"\\n\"], chunk_size=256, chunk_overlap=
|
191 |
"\n",
|
192 |
"parent_doc_retriver = ParentDocumentRetriever(\n",
|
193 |
" vectorstore=vectorstore,\n",
|
@@ -213,23 +191,10 @@
|
|
213 |
},
|
214 |
{
|
215 |
"cell_type": "code",
|
216 |
-
"execution_count":
|
217 |
"id": "2eb8bc8f",
|
218 |
"metadata": {},
|
219 |
-
"outputs": [
|
220 |
-
{
|
221 |
-
"name": "stderr",
|
222 |
-
"output_type": "stream",
|
223 |
-
"text": [
|
224 |
-
"config.json: 100%|██████████| 801/801 [00:00<00:00, 2.96MB/s]\n",
|
225 |
-
"model.safetensors: 100%|██████████| 2.24G/2.24G [00:06<00:00, 359MB/s]\n",
|
226 |
-
"tokenizer_config.json: 100%|██████████| 443/443 [00:00<00:00, 2.68MB/s]\n",
|
227 |
-
"sentencepiece.bpe.model: 100%|██████████| 5.07M/5.07M [00:00<00:00, 405MB/s]\n",
|
228 |
-
"tokenizer.json: 100%|██████████| 17.1M/17.1M [00:00<00:00, 354MB/s]\n",
|
229 |
-
"special_tokens_map.json: 100%|██████████| 279/279 [00:00<00:00, 1.31MB/s]\n"
|
230 |
-
]
|
231 |
-
}
|
232 |
-
],
|
233 |
"source": [
|
234 |
"# 3. Rerank\n",
|
235 |
"\"\"\"\n",
|
@@ -367,26 +332,10 @@
|
|
367 |
},
|
368 |
{
|
369 |
"cell_type": "code",
|
370 |
-
"execution_count":
|
371 |
"id": "59159951",
|
372 |
"metadata": {},
|
373 |
-
"outputs": [
|
374 |
-
{
|
375 |
-
"name": "stderr",
|
376 |
-
"output_type": "stream",
|
377 |
-
"text": [
|
378 |
-
"/mnt/data2/yinghanz/codes/machine_learning_projects/llm/venv/hftest/lib/python3.10/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The function `__call__` was deprecated in LangChain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n",
|
379 |
-
" warn_deprecated(\n"
|
380 |
-
]
|
381 |
-
},
|
382 |
-
{
|
383 |
-
"name": "stdout",
|
384 |
-
"output_type": "stream",
|
385 |
-
"text": [
|
386 |
-
" Two common solid electrolytes are LLZO (lithium lanthanum zirconate titanate) and sulfide-based solid electrolytes, as mentioned in the context provided.\n"
|
387 |
-
]
|
388 |
-
}
|
389 |
-
],
|
390 |
"source": [
|
391 |
"# Now begin to ask question\n",
|
392 |
"question = \"Please name two common solid electrolytes.\"\n",
|
@@ -404,48 +353,10 @@
|
|
404 |
},
|
405 |
{
|
406 |
"cell_type": "code",
|
407 |
-
"execution_count":
|
408 |
"id": "d736960b",
|
409 |
"metadata": {},
|
410 |
-
"outputs": [
|
411 |
-
{
|
412 |
-
"name": "stdout",
|
413 |
-
"output_type": "stream",
|
414 |
-
"text": [
|
415 |
-
"Running on local URL: http://127.0.0.1:7860\n",
|
416 |
-
"Running on public URL: https://3a0ee58b7378104912.gradio.live\n",
|
417 |
-
"\n",
|
418 |
-
"This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
|
419 |
-
]
|
420 |
-
},
|
421 |
-
{
|
422 |
-
"data": {
|
423 |
-
"text/html": [
|
424 |
-
"<div><iframe src=\"https://3a0ee58b7378104912.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
425 |
-
],
|
426 |
-
"text/plain": [
|
427 |
-
"<IPython.core.display.HTML object>"
|
428 |
-
]
|
429 |
-
},
|
430 |
-
"metadata": {},
|
431 |
-
"output_type": "display_data"
|
432 |
-
},
|
433 |
-
{
|
434 |
-
"name": "stdout",
|
435 |
-
"output_type": "stream",
|
436 |
-
"text": [
|
437 |
-
"Answer: Two common solid electrolytes used in lithium metal batteries are poly(ethylene oxide)-based solid electrolytes and lithium phosphate/phosphite or lithium sulfate/sulfite layers.\n",
|
438 |
-
"Source document: [Document(page_content='electrolytes (SEs) and in contrast to many liquid electrolytes, SEs are stable under high\\nelectrochemical oxidation potentials up to 5.0 V vs Li/Li+[8]. In addition, solid composite', metadata={'source': 'FestbattLiteraturemini/materials-14-03472-v2.pdf', 'page': 0, 'relevance_score': 0.35495195}), Document(page_content='chieflyforapplicationinelectricvehicles,callsforanodematerialswith\\nimproved practical specific capacity as compared to the theoretical\\ncapacityof372mAhg−1ofgraphite[1,2].Overcominglimitationsof\\norganiccarbonate-basedliquidelectrolytesduetothecomplexinter-\\nfacialchemistryandflammabilityisalsofundamentalindesigningsafer\\nLIBs[3].Inthisregard,researcheffortsaredevotedtoreplacetheli-\\nquid electrolytes with highly-conductive solid electrolytes aiming to', metadata={'source': 'FestbattLiteraturemini/1-s2.0-S1388248120301582-main.pdf', 'page': 0, 'relevance_score': 0.024606787}), Document(page_content='and power density, Li metal as a high energy density anode can be employed with solid\\nelectrolytes (SEs) and in contrast to many liquid electrolytes, SEs are stable under high', metadata={'source': 'FestbattLiteraturemini/materials-14-03472-v2.pdf', 'page': 0, 'relevance_score': 0.014535204}), Document(page_content='+depletion and concentration polarization, immobilized anions,\\nsingle-ion versus dual-ion conduction, Li+diffusion versus Li+migration, limiting current, Li dendrites\\n1. INTRODUCTION\\nSolid electrolytes are currently regarded as the most promising\\nenabler of lithium metal batteries (LMBs), which, at least\\ntheoretically can o ffer enhanced speci fic energies and energy\\ndensities compared to state-of-the-art liquid electrolyte Li-ionbatteries (LIBs).\\n1−4The poly(ethylene oxide)-based solid', metadata={'source': 'FestbattLiteraturemini/stolz-et-al-2022-single-ion-versus-dual-ion-conducting-electrolytes-the-relevance-of-concentration-polarization-in.pdf', 'page': 0, 'relevance_score': 0.013416832}), Document(page_content='J. Effective Optimization of High Voltage Solid State LithiumBatteries by Using Poly(ethylene oxide) Based Polymer Electrolytewith Semi-Interpenetrating Network. Adv. Funct. Mater. 2020 ,30,\\n2006289.\\n( 1 8 )H o m a n n ,G . ;S t o l z ,L . ;W i n t e r ,M . ;K a s n a t s c h e e w ,J .\\nElimination of “Voltage Noise ”of Poly (Ethylene Oxide)-Based\\nSolid Electrolytes in High-Voltage Lithium Batteries: Linear versusNetwork Polymers. iScience 2020 ,23, 101225.', metadata={'source': 'FestbattLiteraturemini/stolz-et-al-2022-single-ion-versus-dual-ion-conducting-electrolytes-the-relevance-of-concentration-polarization-in.pdf', 'page': 6, 'relevance_score': 0.0091508655}), Document(page_content='electrolytes, whichmayinsituformahomogeneous lithium\\nphosphate/phosphite orlithium sulfate/sulfite layerare\\npromising forthemodification ofnewelectrolytes. These\\nresultshighlight thepossibility ofsolvinginterfacial prob-', metadata={'source': 'FestbattLiteraturemini/Angew Chem Int Ed - 2022 - Zuo - Impact of the Chlorination of Lithium Argyrodites on the Electrolyte Cathode Interface in.pdf', 'page': 6, 'relevance_score': 0.0059685726}), Document(page_content='“k” is Boltzmann ’s constant. Note that the “ σ” data obtained at \\nSmall 2020, 16, 2000279\\nFigure 1. Schematic illustration of the interface between LLZO SE and \\nLFP cathode. “magnified view” shows the ILE interlayer between the \\ncathode and the solid electrolyte enabling fast ionic transport.', metadata={'source': 'FestbattLiteraturemini/Small - 2020 - Pervez - Overcoming the Interfacial Limitations Imposed by the Solid Solid Interface in Solid‐State.pdf', 'page': 1, 'relevance_score': 0.0007537542}), Document(page_content='affecttheelectrochemical decomposition behavior aswellas\\ntheinterfacial reaction between SEandNCM.Thus,new\\nelectrolytes, whichmayinsituformahomogeneous lithium\\nphosphate/phosphite orlithium sulfate/sulfite layerare', metadata={'source': 'FestbattLiteraturemini/Angew Chem Int Ed - 2022 - Zuo - Impact of the Chlorination of Lithium Argyrodites on the Electrolyte Cathode Interface in.pdf', 'page': 6, 'relevance_score': 0.0005863567}), Document(page_content='From a managerial point of view, it is key that the two efforts grow simultaneously as a combined solution to tribology’s digital transformation.\\nCoping with the challenge of defining the common terms which describe tribological specimens, equipment', metadata={'source': 'FestbattLiteraturemini/s41597-022-01429-9.pdf', 'page': 1, 'relevance_score': 7.6203854e-05})]\n",
|
439 |
-
"Answer: Unfortunately, the provided context does not include information on how to synthesize gc-LPSC. You may need to consult additional resources or contact the authors of the context provided for more information.\n",
|
440 |
-
"Source document: [Document(page_content='or high temperature steps which may affect the scalability of the \\nsynthesis process and increase the cost.Li-garnets are promising inorganic ceramic solid electrolytes for lithium metal', metadata={'source': 'FestbattLiteraturemini/Small - 2020 - Pervez - Overcoming the Interfacial Limitations Imposed by the Solid Solid Interface in Solid‐State.pdf', 'page': 0, 'relevance_score': 0.2680533}), Document(page_content='enabler of lithium metal batteries (LMBs), which, at least\\ntheoretically can o ffer enhanced speci fic energies and energy\\ndensities compared to state-of-the-art liquid electrolyte Li-ionbatteries (LIBs).\\n1−4The poly(ethylene oxide)-based solid', metadata={'source': 'FestbattLiteraturemini/stolz-et-al-2022-single-ion-versus-dual-ion-conducting-electrolytes-the-relevance-of-concentration-polarization-in.pdf', 'page': 0, 'relevance_score': 0.14643796}), Document(page_content='Lithium metal batteries (LMBs) promise higher energy densities\\nand speci fic energies compared to the state-of-the-art (SOTA) Li\\nion batteries (LIBs) [1–4]. However, a suitable solid electrolyte\\nor liquid electrolyte/separator system for high-performance andsafe cell (-stack) operation remains the key for application andis the predominant actual focus of research and development(R&D) [5–11].\\nThe organic -,i.e.solid polymer-based electrolytes (SPEs) are', metadata={'source': 'FestbattLiteraturemini/1-s2.0-S1369702120304521-main.pdf', 'page': 0, 'relevance_score': 0.046960726}), Document(page_content='Performance of Solid Polymer Electrolytes for Use in Solid-StateLithium Batteries. iScience 2020 ,23, 101597.\\n(8) Jung, K. N.; Shin, H. S.; Park, M. S.; Lee, J. W. Solid-State\\nLithium Batteries: Bipolar Design, Fabrication, and Electrochemistry.\\nChemElectroChem 2019 ,6, 3842−3859.\\n(9) Simonetti, E.; Carewska, M.; Di Carli, M.; Moreno, M.; De\\nFrancesco, M.; Appetecchi, G. B. Towards improvement of the\\nelectrochemical properties of ionic liquid-containing polyethylene', metadata={'source': 'FestbattLiteraturemini/stolz-et-al-2022-single-ion-versus-dual-ion-conducting-electrolytes-the-relevance-of-concentration-polarization-in.pdf', 'page': 6, 'relevance_score': 0.01367707}), Document(page_content='adjusted to ensure a balance between the number of active charge carriers (Li ions) and viscosity of the IL. The ILE was further dried at 60 °C under vacuum to decrease the water content below 5 ppm, as \\ndetected by Karl–Fischer measurements.\\nPreparation of LFP Positive Electrodes (Cathodes): Carbon-coated LFP \\nwas synthesized via a solid state method.\\n[56] Stoichiometric amounts \\nof lithium carbonate (Li 2CO 3; Aldrich, 99.95%), ammonium hydrogen', metadata={'source': 'FestbattLiteraturemini/Small - 2020 - Pervez - Overcoming the Interfacial Limitations Imposed by the Solid Solid Interface in Solid‐State.pdf', 'page': 8, 'relevance_score': 0.0011098508}), Document(page_content='avoidanyshortcircuit,astheSi-FLGdiskhasØ18mm).TheBLPEfortheionicconductivitymeasurementwaspreparedfollowingthesameprocedureusedforSi-FLG/BLPE,butnoelectrodewasusedinthiscase.\\nThe Si-FLG/BLPE was assembled in an ECC-Std cell (EL-cell,\\nGermany) with a 18 mm Li metal disk anode (200 µm thick,\\nAlbermarle)inatwo-electrodesconfiguration.TheLi||Si-FLGcellwith\\nIL_liqwasassembledusingaglasswoolWhatmanseparatordrenched\\nwith200µLofelectrolyte.Testcellsweregalvanostaticallycycled(GC)', metadata={'source': 'FestbattLiteraturemini/1-s2.0-S1388248120301582-main.pdf', 'page': 1, 'relevance_score': 0.0005449906}), Document(page_content='LiNbO 3layer was deposited on the garnet type lithium ion conductor Li 6.45Al0.05La3Zr1.6Ta0.4O12(LLZTO) to improve its\\ninterface to lithium metal and reduce dendrite formation. The application of the thin film reduced the interface resistance between', metadata={'source': 'FestbattLiteraturemini/Mann_2022_J._Electrochem._Soc._169_040564.pdf', 'page': 1, 'relevance_score': 8.970482e-05}), Document(page_content='Zenodo (CERN & OpenAIRE 2013). The listed repositories are all generic and represent only a \\nselection of the existing open-source systems (Amorim et al. 2017).\\nA second type of system in addition to the repositories, which is also increasingly used in', metadata={'source': 'FestbattLiteraturemini/kadi4mat.pdf', 'page': 1, 'relevance_score': 7.65131e-05}), Document(page_content='A second type of system in addition to the repositories, which is also increasingly used in \\nexperimentally oriented research areas, are the electronic lab notebooks (ELN) (Rubacha, Rattan', metadata={'source': 'FestbattLiteraturemini/kadi4mat.pdf', 'page': 1, 'relevance_score': 7.6393466e-05})]\n",
|
441 |
-
"Answer: Yes, the paper \"Kadi4Mat: A Research Data Infrastructure for Materials Science\" by C, Schoof, E, Tosato, G, Zhao, Y, Zschumme, P, and Selzer, M, published in the Data Science Journal in 2021, provides an overview of Kadi4Mat, a research data infrastructure for materials science. It discusses the components of Kadi4Mat, including the electronic laboratory notebook (ELN), data management, and data analysis, and provides examples of how Kadi4Mat has been used in materials science research. This paper can help you gain a deeper understanding of Kadi4Mat and its potential applications in materials science research.\n",
|
442 |
-
"Source document: [Document(page_content='deeper understanding of the phenomena that govern friction and wear. Missing community-wide data', metadata={'source': 'FestbattLiteraturemini/s41597-022-01429-9.pdf', 'page': 0, 'relevance_score': 0.06298193}), Document(page_content='32. Brandt, N. et al. Kadi4mat: A research data infrastructure for materials science. Data Sci. J. 20, 1–14 (2021).\\n 33. Brandt, N. et al. Managing FAIR tribological data using Kadi4Mat. Data 7, 15 (2022).\\n 34. Garabedian, N. et al . FAIR Data Package of a Tribological Showcase Pin-on-Disk Experiment. Zenodo https://doi.org/10.5281/\\nzenodo.5720626 (2021).\\n 35. Garabedian, N. et al. Generating FAIR research data in experimental tribology. Zenodo https://doi.org/10.5281/zenodo.6349293 (2022).', metadata={'source': 'FestbattLiteraturemini/s41597-022-01429-9.pdf', 'page': 10, 'relevance_score': 0.03710895}), Document(page_content='C, Schoof, E, Tosato, G, Zhao, \\nY, Zschumme, P and Selzer, M. \\n2021. Kadi4Mat: A Research \\nData Infrastructure for \\nMaterials Science. Data Science \\nJournal , 20: 8, pp. 1– 14. DOI: \\nhttps://doi.org/10.5334/dsj-\\n2021-008\\nSubmitted: 16 October 2020 \\nAccepted: 27 January 2021 \\nPublished: 10 February 2021\\nCOPYRIGHT: \\n© 2021 The Author(s). This is an \\nopen-access article distributed \\nunder the terms of the Creative \\nCommons Attribution 4.0 \\nInternational License (CC-BY', metadata={'source': 'FestbattLiteraturemini/kadi4mat.pdf', 'page': 13, 'relevance_score': 0.03163605}), Document(page_content='Brandt, N. 2020. Kadi4Mat – Karlsruhe Data Infrastructure for Materials Science . URL: https://kadi.iam-cms.\\nkit.edu (visited on Sept. 30, 2020).\\nBrandt, N, et al. Oct. 16, 2020. IAM-CMS/Kadi: Kadi4Mat. Version 0.2.0. Zenodo . DOI: https://doi.\\norg/10.5281/ZENODO.4088270\\nCantor, S and Scavo, T. 2005. Shibboleth Architecture. Protocols and Profiles, 10: 16. DOI: https://doi.\\norg/10.26869/TI.66.1\\nCARPi, N, Minges, A and Piel, M. Apr. 14, 2017. eLabFTW: An Open Source Laboratory Notebook for', metadata={'source': 'FestbattLiteraturemini/kadi4mat.pdf', 'page': 11, 'relevance_score': 0.01203158}), Document(page_content='various tools and technical infrastructures. The components can be used by web- and desktop-\\nbased applications, via uniform interfaces. Both a graphical and a programmatic interface \\nare provided, using machine-readable formats and various exchange protocols. In Figure 2 , a \\nconceptual overview of the infrastructure of Kadi4Mat is presented.\\n2.1 ELECTRONIC LAB NOTEBOOK\\nIn the ELN component, the so-called workflows are of particular importance. A workflow is a', metadata={'source': 'FestbattLiteraturemini/kadi4mat.pdf', 'page': 2, 'relevance_score': 0.004907727}), Document(page_content='plinarity of the field: many seemingly trivial tribological problems require a deep, but still holistic, understanding of processes and mechanisms that act between, at, and underneath contacting surfaces\\n12. A tribological response', metadata={'source': 'FestbattLiteraturemini/s41597-022-01429-9.pdf', 'page': 0, 'relevance_score': 7.9162426e-05}), Document(page_content='alumina crucibles. A photo of the sintered LLZO pellet is shown \\nin the inset of Figure 2a while a low magnification SEM image is shown in Figure S2, Supporting Information. The sintering \\nand pellet pressing conditions were optimized to get the pure', metadata={'source': 'FestbattLiteraturemini/Small - 2020 - Pervez - Overcoming the Interfacial Limitations Imposed by the Solid Solid Interface in Solid‐State.pdf', 'page': 1, 'relevance_score': 7.627518e-05}), Document(page_content='mitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/. © The Author(s) 2022', metadata={'source': 'FestbattLiteraturemini/s41597-022-01429-9.pdf', 'page': 10, 'relevance_score': 7.626042e-05}), Document(page_content='at 1100 °C for 3 h. To get desired dimensions, the pellets were polished with Si-carbide sand paper (grit size 400) under argon environment to obtain a thickness of ≈500 µm and a geometric area of ≈0.785 cm\\n2 for \\neach side.', metadata={'source': 'FestbattLiteraturemini/Small - 2020 - Pervez - Overcoming the Interfacial Limitations Imposed by the Solid Solid Interface in Solid‐State.pdf', 'page': 7, 'relevance_score': 7.619601e-05})]\n",
|
443 |
-
"now reading document\n",
|
444 |
-
"file is located at /tmp/gradio/4067b227cf47cb8a25bd94e77cfd2193637b225e/10.5445IR1000071294.pdf\n",
|
445 |
-
"now creating vectordatabase\n"
|
446 |
-
]
|
447 |
-
}
|
448 |
-
],
|
449 |
"source": [
|
450 |
"# The rests are for Gradio GUI\n",
|
451 |
"\n",
|
|
|
12 |
"\n",
|
13 |
"from dotenv import load_dotenv\n",
|
14 |
"from langchain.document_loaders import PyPDFLoader\n",
|
15 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
16 |
+
"from langchain.embeddings import HuggingFaceEmbeddings\n",
|
17 |
+
"from langchain.vectorstores import FAISS\n",
|
|
|
18 |
"from langchain.chains import ConversationalRetrievalChain\n",
|
19 |
"from langchain.llms import HuggingFaceTextGenInference\n",
|
20 |
"from langchain.chains.conversation.memory import (\n",
|
|
|
37 |
},
|
38 |
{
|
39 |
"cell_type": "code",
|
40 |
+
"execution_count": null,
|
41 |
"id": "ffd3db32",
|
42 |
"metadata": {},
|
43 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
"source": [
|
45 |
"# Set inference link, use this online one for easier reproduce\n",
|
46 |
"inference_api_url = 'https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta'\n",
|
|
|
75 |
"source": [
|
76 |
"# Function for reading and chunking text\n",
|
77 |
"def load_pdf_as_docs(pdf_path, loader_module=None):\n",
|
78 |
+
" \"\"\"Load and parse pdf files.\"\"\"\n",
|
79 |
+
" \n",
|
80 |
" if pdf_path.endswith('.pdf'): # single file\n",
|
81 |
" pdf_docs = [pdf_path]\n",
|
82 |
" else: # a directory\n",
|
|
|
113 |
"outputs": [],
|
114 |
"source": [
|
115 |
"# Specify the directory containing your PDFs\n",
|
116 |
+
"directory = \"data/documents\" # change to your pdf directory\n",
|
|
|
117 |
"\n",
|
118 |
"# Find and parse all PDFs in the directory\n",
|
119 |
"pdf_docs = load_pdf_as_docs(directory, PyPDFLoader)\n",
|
|
|
123 |
},
|
124 |
{
|
125 |
"cell_type": "code",
|
126 |
+
"execution_count": null,
|
127 |
"id": "7bf62c76",
|
128 |
"metadata": {},
|
129 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
"source": [
|
131 |
"# Set embedding\n",
|
132 |
"embeddings = HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5') # choose the one you like\n",
|
133 |
"\n",
|
134 |
"# Set vectorstore, e.g. FAISS\n",
|
135 |
"texts = [\"LISA - Lithium Ion Solid-state Assistant\"]\n",
|
136 |
+
"vectorstore = FAISS.from_texts(texts, embeddings) # this is a workaround as FAISS cannot be initialized by 'FAISS(embedding_function=embeddings)', waiting for Langchain fix\n",
|
137 |
"# You may also use Chroma\n",
|
138 |
"# vectorstore = Chroma(embedding_function=embeddings)"
|
139 |
]
|
|
|
164 |
"# For local storage, ref: https://stackoverflow.com/questions/77385587/persist-parentdocumentretriever-of-langchain\n",
|
165 |
"store = InMemoryStore()\n",
|
166 |
"\n",
|
167 |
+
"parent_splitter = RecursiveCharacterTextSplitter(separators=[\"\\n\\n\", \"\\n\"], chunk_size=512, chunk_overlap=128)\n",
|
168 |
+
"child_splitter = RecursiveCharacterTextSplitter(separators=[\"\\n\\n\", \"\\n\"], chunk_size=256, chunk_overlap=64)\n",
|
169 |
"\n",
|
170 |
"parent_doc_retriver = ParentDocumentRetriever(\n",
|
171 |
" vectorstore=vectorstore,\n",
|
|
|
191 |
},
|
192 |
{
|
193 |
"cell_type": "code",
|
194 |
+
"execution_count": null,
|
195 |
"id": "2eb8bc8f",
|
196 |
"metadata": {},
|
197 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
"source": [
|
199 |
"# 3. Rerank\n",
|
200 |
"\"\"\"\n",
|
|
|
332 |
},
|
333 |
{
|
334 |
"cell_type": "code",
|
335 |
+
"execution_count": null,
|
336 |
"id": "59159951",
|
337 |
"metadata": {},
|
338 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
"source": [
|
340 |
"# Now begin to ask question\n",
|
341 |
"question = \"Please name two common solid electrolytes.\"\n",
|
|
|
353 |
},
|
354 |
{
|
355 |
"cell_type": "code",
|
356 |
+
"execution_count": null,
|
357 |
"id": "d736960b",
|
358 |
"metadata": {},
|
359 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
"source": [
|
361 |
"# The rests are for Gradio GUI\n",
|
362 |
"\n",
|
app.py
CHANGED
@@ -5,7 +5,8 @@ from pathlib import Path
|
|
5 |
from dotenv import load_dotenv
|
6 |
import pickle
|
7 |
|
8 |
-
from
|
|
|
9 |
import gradio as gr
|
10 |
|
11 |
from huggingface_hub import login
|
@@ -36,11 +37,13 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
|
|
36 |
database_root = "./data/db"
|
37 |
document_path = "./data/documents"
|
38 |
|
|
|
39 |
# Load cached db
|
40 |
def load_from_pickle(filename):
|
41 |
with open(filename, "rb") as file:
|
42 |
return pickle.load(file)
|
43 |
|
|
|
44 |
# Load docs
|
45 |
docs = load_from_pickle(os.path.join(database_root, "docs.pkl"))
|
46 |
|
@@ -54,7 +57,11 @@ embeddings = get_jinaai_embeddings(device="auto")
|
|
54 |
print("embedding loaded")
|
55 |
|
56 |
# Load vectorstore
|
57 |
-
vectorstore = FAISS.load_local(
|
|
|
|
|
|
|
|
|
58 |
print("vectorestore loaded")
|
59 |
|
60 |
# Load or create retrievers
|
@@ -62,7 +69,11 @@ from retrievers import get_parent_doc_retriever, get_rerank_retriever
|
|
62 |
|
63 |
docstore = load_from_pickle(os.path.join(database_root, "docstore.pkl"))
|
64 |
parent_doc_retriver = get_parent_doc_retriever(
|
65 |
-
docs,
|
|
|
|
|
|
|
|
|
66 |
)
|
67 |
|
68 |
# Hybrid-search
|
@@ -78,39 +89,15 @@ ensemble_retriever = EnsembleRetriever(
|
|
78 |
)
|
79 |
# Reranker
|
80 |
from rerank import BgeRerank
|
|
|
81 |
reranker = BgeRerank()
|
82 |
rerank_retriever = get_rerank_retriever(ensemble_retriever, reranker)
|
83 |
print("rerank loaded")
|
84 |
|
85 |
|
86 |
# Create LLM model
|
87 |
-
|
88 |
-
|
89 |
-
#chat_model_name = os.environ["chat_model_name"] # llm server model name
|
90 |
-
#inference_server_url = os.environ["inference_server_url"] # openai-like api
|
91 |
-
#llm = get_llm_openai_chat(chat_model_name, inference_server_url)
|
92 |
-
llm = ChatGroq(temperature=0, model_name="llama-3.1-70b-versatile") # llama3-70b-8192
|
93 |
-
|
94 |
-
# # Tmp test
|
95 |
-
# from langchain.llms import HuggingFacePipeline
|
96 |
-
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
97 |
-
# model_path = "/mnt/localstorage/yinghan/llm/OpenHermes-2.5-Mistral-7B"
|
98 |
-
# model_path = "stabilityai/stablelm-2-zephyr-1_6b"
|
99 |
-
# model_path = "openbmb/MiniCPM-2B-dpo-bf16"
|
100 |
-
# model_path = "cognitivecomputations/dolphin-2_6-phi-2"
|
101 |
-
# model_path = "stabilityai/stablelm-3b-4e1t"
|
102 |
-
|
103 |
-
# model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True,)#, load_in_8bit=True)
|
104 |
-
# tokenizer = AutoTokenizer.from_pretrained(model_path)
|
105 |
-
# pipe = pipeline(
|
106 |
-
# "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=1024, model_kwargs={"temperature":0}
|
107 |
-
# )
|
108 |
-
# llm = HuggingFacePipeline(pipeline=pipe)
|
109 |
-
|
110 |
-
# Tmp test for vllm -> do not use this, slow and not good
|
111 |
-
# from langchain.llms import VLLM
|
112 |
-
# llm = VLLM(model="mistralai/Mistral-7B-Instruct-v0.2", download_dir="/mnt/localstorage/yinghan/llm/vllms", dtype="half",
|
113 |
-
# trust_remote_code=True, tensor_parallel_size=2)#, tensor_parallel_size=4)
|
114 |
|
115 |
# # # Create conversation qa chain (Note: conversation is not supported yet)
|
116 |
from models import RAGChain
|
@@ -121,8 +108,13 @@ lisa_qa_conversation = rag_chain.create(rerank_retriever, llm, add_citation=True
|
|
121 |
# Web search rag chain
|
122 |
from langchain_community.retrievers import TavilySearchAPIRetriever
|
123 |
from langchain.chains import RetrievalQAWithSourcesChain
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
126 |
print("chain loaded")
|
127 |
|
128 |
|
@@ -146,7 +138,7 @@ def add_text(history, text):
|
|
146 |
|
147 |
def postprocess_remove_cite_misinfo(text, allowed_max_cite_num=6):
|
148 |
"""Exp.-based removal of misinfo. of citations."""
|
149 |
-
|
150 |
# Remove trailing references at end of text
|
151 |
if "References:\n[" in text:
|
152 |
text = text.split("References:\n")[0]
|
@@ -157,35 +149,45 @@ def postprocess_remove_cite_misinfo(text, allowed_max_cite_num=6):
|
|
157 |
|
158 |
# Define the custom function for replacement
|
159 |
def replace_and_increment(match):
|
160 |
-
|
161 |
match_str = match.group(1)
|
162 |
# print("match str", match_str)
|
163 |
-
|
164 |
-
# Delete
|
165 |
if "–" in match_str or "-" in match_str:
|
166 |
return ""
|
167 |
-
|
168 |
# Delete anything like [i]
|
169 |
if "i" in match_str:
|
170 |
return ""
|
171 |
-
|
172 |
# Find number in match_str
|
173 |
# pattern = r'\[(\d+)\]'
|
174 |
-
pattern = r
|
175 |
nums = re.findall(pattern, match_str)
|
176 |
if nums:
|
177 |
nums_list = []
|
178 |
for n in nums:
|
179 |
if int(n) <= allowed_max_cite_num: # maxmium num. of inputs for llm
|
180 |
-
nums_list.append(
|
181 |
-
#num = int(num[0])
|
182 |
else: # no number found
|
183 |
return ""
|
184 |
-
|
185 |
-
if re.search(
|
186 |
-
return
|
187 |
-
|
188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
# Replace all matches with itself plus 1
|
191 |
new_text = re.sub(pattern, replace_and_increment, text)
|
@@ -198,13 +200,15 @@ def postprocess_remove_cite_misinfo(text, allowed_max_cite_num=6):
|
|
198 |
|
199 |
# Remove unnecessary white space etc.
|
200 |
new_text = new_text.strip()
|
201 |
-
|
202 |
return new_text
|
203 |
|
|
|
204 |
def postprocess_citation(text, source_docs):
|
205 |
"""Postprocess text for extracting citations."""
|
|
|
206 |
# return "test putout for debug {}".format(xxx)
|
207 |
-
|
208 |
source_ids = re.findall(r"\[(\d*)\]", text) # List[Char]
|
209 |
# print(f"source ids by re: {source_ids}")
|
210 |
# source_ids = re.findall(r"\[\[(.*?)\]\]", text) # List[Char]
|
@@ -240,19 +244,23 @@ def postprocess_citation(text, source_docs):
|
|
240 |
# """ # collapsible section (fold)
|
241 |
# item = f"**[[{index}]] source: {source}**\n> {content}\n\n" # shift index+1
|
242 |
output_markdown += item
|
243 |
-
#print("in add citaiton funciton output
|
244 |
-
#output_markdown = "this is just a test before real
|
245 |
return output_markdown
|
246 |
|
247 |
|
248 |
def postprocess_web_citation(text, qa_result):
|
249 |
"""Postprocess text for extracting web citations."""
|
250 |
-
|
251 |
-
|
252 |
-
|
|
|
253 |
# '<https://www.extremetech.com/energy/what-is-a-solid-state-battery-how-they-work-explained>,'
|
254 |
web_sources = qa_result["sources"].split(",")
|
255 |
-
web_sources = [
|
|
|
|
|
|
|
256 |
else: # if no qa_results["sources"]
|
257 |
web_sources = [doc.metadata["source"] for doc in qa_result["source_documents"]]
|
258 |
output_markdown = "" # """**References**\n\n"""
|
@@ -264,8 +272,8 @@ def postprocess_web_citation(text, qa_result):
|
|
264 |
|
265 |
"""
|
266 |
output_markdown += item
|
267 |
-
return output_markdown
|
268 |
-
|
269 |
|
270 |
def bot_lisa(history, flag_web_search):
|
271 |
"""Get answer from LLM."""
|
@@ -305,7 +313,7 @@ def bot_lisa(history, flag_web_search):
|
|
305 |
answer_text = result["answer"].strip()
|
306 |
citation_text = postprocess_web_citation(answer_text, result)
|
307 |
|
308 |
-
#
|
309 |
# history[-1][1] = answer_text
|
310 |
# return history, citation_text
|
311 |
# fake stream style
|
@@ -337,9 +345,9 @@ def bot(history, qa_conversation):
|
|
337 |
answer_text = result["answer"].strip()
|
338 |
# Remove misinfo in text
|
339 |
answer_text = postprocess_remove_cite_misinfo(answer_text)
|
340 |
-
|
341 |
citation_text = postprocess_citation(answer_text, result["source_documents"])
|
342 |
-
|
343 |
history[-1][1] = "" # Fake stream, TODO: implement streaming
|
344 |
for character in answer_text:
|
345 |
time.sleep(0.002)
|
@@ -370,8 +378,6 @@ def document_changes(doc_path):
|
|
370 |
elif file_extension == ".xml":
|
371 |
documents.extend(load_xml_as_docs(doc))
|
372 |
|
373 |
-
# embeddings = HuggingFaceEmbeddings()
|
374 |
-
|
375 |
print("now creating vectordatabase")
|
376 |
|
377 |
vectorstore = get_faiss_vectorestore(embeddings)
|
@@ -380,11 +386,8 @@ def document_changes(doc_path):
|
|
380 |
|
381 |
print("now getting llm model")
|
382 |
|
383 |
-
|
384 |
-
|
385 |
-
# llm = get_llm_openai_chat(chat_model_name, inference_server_url)
|
386 |
-
llm = ChatGroq(temperature=0, model_name="llama-3.1-70b-versatile") # llama3-70b-8192
|
387 |
-
# llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0.1, "max_new_tokens":250})
|
388 |
rag_chain = RAGChain()
|
389 |
|
390 |
# global qa_conversation
|
@@ -406,9 +409,11 @@ def main():
|
|
406 |
# LISA chat tab
|
407 |
# Title info
|
408 |
gr.Markdown("## LISA - Lithium Ion Solid-state Assistant")
|
409 |
-
gr.Markdown(
|
|
|
410 |
Q&A research assistant for efficient Knowledge Management not only in Battery Science.
|
411 |
-
Based on RAG-architecture and powered by Large Language Models (LLMs)."""
|
|
|
412 |
|
413 |
with gr.Tab("LISA ⚡"):
|
414 |
with gr.Row():
|
@@ -420,24 +425,26 @@ def main():
|
|
420 |
label="Document Assistant",
|
421 |
bubble_full_width=False,
|
422 |
show_copy_button=True,
|
423 |
-
#likeable=True,
|
424 |
) # .style(height=750)
|
425 |
-
|
426 |
user_txt = gr.Textbox(
|
427 |
label="Question", # show_label=False,
|
428 |
placeholder="Type in the question and press Enter/click Submit",
|
429 |
) # .style(container=False)
|
430 |
-
|
431 |
with gr.Accordion("Advanced", open=False):
|
432 |
-
flag_web_search = gr.Checkbox(
|
433 |
-
|
|
|
|
|
434 |
with gr.Row():
|
435 |
-
#with gr.Column(scale=8):
|
436 |
with gr.Column(scale=1):
|
437 |
submit_btn = gr.Button("Submit", variant="primary")
|
438 |
with gr.Column(scale=1):
|
439 |
clear_btn = gr.Button("Clear", variant="stop")
|
440 |
-
|
441 |
# citations test place
|
442 |
# doc_citation = gr.Markdown("References used in answering the question will be displayed below.")
|
443 |
# Examples
|
@@ -465,7 +472,9 @@ def main():
|
|
465 |
# Reference (citations) and other settings
|
466 |
with gr.Column(scale=3):
|
467 |
with gr.Tab("References"):
|
468 |
-
doc_citation = gr.HTML(
|
|
|
|
|
469 |
# gr.Markdown("nothing test")
|
470 |
with gr.Tab("Setting"):
|
471 |
# checkbox for allowing web search
|
@@ -493,7 +502,9 @@ def main():
|
|
493 |
# Document-based QA
|
494 |
|
495 |
with gr.Tab("Upload document 📚"):
|
496 |
-
qa_conversation = gr.State(
|
|
|
|
|
497 |
|
498 |
with gr.Row():
|
499 |
with gr.Column(scale=7, variant="chat_panel"):
|
@@ -559,46 +570,11 @@ def main():
|
|
559 |
)
|
560 |
load_document = gr.Button("Load file")
|
561 |
with gr.Tab("References"):
|
562 |
-
doc_citation_user_doc = gr.HTML(
|
|
|
|
|
563 |
with gr.Tab("Setting"):
|
564 |
gr.Markdown("More in DEV...")
|
565 |
-
|
566 |
-
# with gr.Row():
|
567 |
-
# # with gr.Column(scale=3, min_width=357, variant="panel"):
|
568 |
-
# with gr.Column(scale=3, variant="load_file_panel"):
|
569 |
-
# with gr.Row():
|
570 |
-
# gr.HTML(
|
571 |
-
# "Upload pdf/xml file(s), click the Load file button. After preprocessing, you can start asking questions about the document."
|
572 |
-
# )
|
573 |
-
# with gr.Row():
|
574 |
-
# uploaded_doc = gr.File(
|
575 |
-
# label="Upload pdf/xml (max. 3) file(s)",
|
576 |
-
# file_count="multiple",
|
577 |
-
# file_types=[".pdf", ".xml"],
|
578 |
-
# type="filepath",
|
579 |
-
# height=100,
|
580 |
-
# )
|
581 |
-
|
582 |
-
# repo_id = gr.Dropdown(
|
583 |
-
# label="LLM",
|
584 |
-
# choices=[
|
585 |
-
# "default", # open hermes mistral 7b
|
586 |
-
# "more-on-the-way",
|
587 |
-
# # "OpenAssistant/oasst-sft-1-pythia-12b",
|
588 |
-
# # "bigscience/bloomz",
|
589 |
-
# ],
|
590 |
-
# value="default",
|
591 |
-
# )
|
592 |
-
|
593 |
-
|
594 |
-
# pdf_loader = gr_PDF(label="Document")
|
595 |
-
|
596 |
-
# repo_id.change(
|
597 |
-
# pdf_changes,
|
598 |
-
# inputs=[uploaded_doc, repo_id],
|
599 |
-
# outputs=[langchain_status],
|
600 |
-
# queue=False,
|
601 |
-
# )
|
602 |
|
603 |
load_document.click(
|
604 |
document_changes,
|
@@ -611,26 +587,45 @@ def main():
|
|
611 |
)
|
612 |
|
613 |
docqa_question.submit(check_input_text, docqa_question).success(
|
614 |
-
add_text,
|
615 |
-
|
616 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
617 |
docqa_submit_btn.click(check_input_text, docqa_question).success(
|
618 |
-
add_text,
|
619 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
620 |
|
621 |
-
|
|
|
622 |
with gr.Tab("Preview feature 🔬"):
|
623 |
with gr.Tab("Vision LM 🖼"):
|
624 |
-
vision_tmp_link =
|
|
|
|
|
625 |
with gr.Blocks(css="""footer {visibility: hidden};""") as preview_tab:
|
626 |
-
gr.HTML(
|
|
|
|
|
|
|
|
|
627 |
# gr.Markdown("placeholder")
|
628 |
-
|
629 |
# with gr.Tab("Knowledge Graph RAG 🔎"):
|
630 |
# graph_tmp_link = "https://ea0feb6eb6495e8b2d.gradio.live/" # vision model link
|
631 |
# with gr.Blocks(css="""footer {visibility: hidden};""") as preview_tab:
|
632 |
# gr.HTML("""<iframe src="{}" style="width:100%; height:1024px; overflow:auto"></iframe>""".format(graph_tmp_link))
|
633 |
-
|
634 |
# About information
|
635 |
with gr.Tab("About 📝"):
|
636 |
with gr.Tab("Dev. info"):
|
@@ -668,10 +663,13 @@ def main():
|
|
668 |
*Notes: The model may produce incorrect statements. Users should treat these outputs as suggestions or starting points, not as definitive or accurate facts.
|
669 |
"""
|
670 |
)
|
671 |
-
|
672 |
with gr.Tab("What's included?"):
|
673 |
from paper_list import paper_list_str
|
674 |
-
|
|
|
|
|
|
|
675 |
|
676 |
# pdf_loader.change(pdf_changes, inputs=[pdf_loader, repo_id], outputs=[langchain_status], queue=False)
|
677 |
|
|
|
5 |
from dotenv import load_dotenv
|
6 |
import pickle
|
7 |
|
8 |
+
from llms import get_groq_chat
|
9 |
+
|
10 |
import gradio as gr
|
11 |
|
12 |
from huggingface_hub import login
|
|
|
37 |
database_root = "./data/db"
|
38 |
document_path = "./data/documents"
|
39 |
|
40 |
+
|
41 |
# Load cached db
|
42 |
def load_from_pickle(filename):
|
43 |
with open(filename, "rb") as file:
|
44 |
return pickle.load(file)
|
45 |
|
46 |
+
|
47 |
# Load docs
|
48 |
docs = load_from_pickle(os.path.join(database_root, "docs.pkl"))
|
49 |
|
|
|
57 |
print("embedding loaded")
|
58 |
|
59 |
# Load vectorstore
|
60 |
+
vectorstore = FAISS.load_local(
|
61 |
+
os.path.join(database_root, "faiss_index"),
|
62 |
+
embeddings,
|
63 |
+
allow_dangerous_deserialization=True,
|
64 |
+
)
|
65 |
print("vectorestore loaded")
|
66 |
|
67 |
# Load or create retrievers
|
|
|
69 |
|
70 |
docstore = load_from_pickle(os.path.join(database_root, "docstore.pkl"))
|
71 |
parent_doc_retriver = get_parent_doc_retriever(
|
72 |
+
docs,
|
73 |
+
vectorstore,
|
74 |
+
save_path_root=database_root,
|
75 |
+
docstore=docstore,
|
76 |
+
add_documents=False,
|
77 |
)
|
78 |
|
79 |
# Hybrid-search
|
|
|
89 |
)
|
90 |
# Reranker
|
91 |
from rerank import BgeRerank
|
92 |
+
|
93 |
reranker = BgeRerank()
|
94 |
rerank_retriever = get_rerank_retriever(ensemble_retriever, reranker)
|
95 |
print("rerank loaded")
|
96 |
|
97 |
|
98 |
# Create LLM model
|
99 |
+
llm = get_groq_chat(model_name="llama-3.1-70b-versatile")
|
100 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
# # # Create conversation qa chain (Note: conversation is not supported yet)
|
103 |
from models import RAGChain
|
|
|
108 |
# Web search rag chain
|
109 |
from langchain_community.retrievers import TavilySearchAPIRetriever
|
110 |
from langchain.chains import RetrievalQAWithSourcesChain
|
111 |
+
|
112 |
+
web_search_retriever = TavilySearchAPIRetriever(
|
113 |
+
k=4
|
114 |
+
) # , include_raw_content=True)#, include_raw_content=True)
|
115 |
+
web_qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
|
116 |
+
llm, retriever=web_search_retriever, return_source_documents=True
|
117 |
+
)
|
118 |
print("chain loaded")
|
119 |
|
120 |
|
|
|
138 |
|
139 |
def postprocess_remove_cite_misinfo(text, allowed_max_cite_num=6):
|
140 |
"""Exp.-based removal of misinfo. of citations."""
|
141 |
+
|
142 |
# Remove trailing references at end of text
|
143 |
if "References:\n[" in text:
|
144 |
text = text.split("References:\n")[0]
|
|
|
149 |
|
150 |
# Define the custom function for replacement
|
151 |
def replace_and_increment(match):
|
152 |
+
|
153 |
match_str = match.group(1)
|
154 |
# print("match str", match_str)
|
155 |
+
|
156 |
+
# Delete anything like [[10–14]]
|
157 |
if "–" in match_str or "-" in match_str:
|
158 |
return ""
|
159 |
+
|
160 |
# Delete anything like [i]
|
161 |
if "i" in match_str:
|
162 |
return ""
|
163 |
+
|
164 |
# Find number in match_str
|
165 |
# pattern = r'\[(\d+)\]'
|
166 |
+
pattern = r"(\d+)"
|
167 |
nums = re.findall(pattern, match_str)
|
168 |
if nums:
|
169 |
nums_list = []
|
170 |
for n in nums:
|
171 |
if int(n) <= allowed_max_cite_num: # maxmium num. of inputs for llm
|
172 |
+
nums_list.append("[[" + n + "]]")
|
173 |
+
# num = int(num[0])
|
174 |
else: # no number found
|
175 |
return ""
|
176 |
+
|
177 |
+
if re.search("^,", match_str):
|
178 |
+
return (
|
179 |
+
'<sup><span style="color:#F27F0C">'
|
180 |
+
+ ", "
|
181 |
+
+ ", ".join(nums_list)
|
182 |
+
+ "</span></sup>"
|
183 |
+
)
|
184 |
+
|
185 |
+
return (
|
186 |
+
'<sup><span style="color:#F27F0C">'
|
187 |
+
+ " "
|
188 |
+
+ ", ".join(nums_list)
|
189 |
+
+ "</span></sup>"
|
190 |
+
)
|
191 |
|
192 |
# Replace all matches with itself plus 1
|
193 |
new_text = re.sub(pattern, replace_and_increment, text)
|
|
|
200 |
|
201 |
# Remove unnecessary white space etc.
|
202 |
new_text = new_text.strip()
|
203 |
+
|
204 |
return new_text
|
205 |
|
206 |
+
|
207 |
def postprocess_citation(text, source_docs):
|
208 |
"""Postprocess text for extracting citations."""
|
209 |
+
|
210 |
# return "test putout for debug {}".format(xxx)
|
211 |
+
|
212 |
source_ids = re.findall(r"\[(\d*)\]", text) # List[Char]
|
213 |
# print(f"source ids by re: {source_ids}")
|
214 |
# source_ids = re.findall(r"\[\[(.*?)\]\]", text) # List[Char]
|
|
|
244 |
# """ # collapsible section (fold)
|
245 |
# item = f"**[[{index}]] source: {source}**\n> {content}\n\n" # shift index+1
|
246 |
output_markdown += item
|
247 |
+
# print("in add citaiton funciton output markdown", output_markdown)
|
248 |
+
# output_markdown = "this is just a test before real markdown pops out."
|
249 |
return output_markdown
|
250 |
|
251 |
|
252 |
def postprocess_web_citation(text, qa_result):
|
253 |
"""Postprocess text for extracting web citations."""
|
254 |
+
|
255 |
+
# TODO: Simple implementation, to be improved
|
256 |
+
|
257 |
+
if qa_result["sources"]: # source_documents
|
258 |
# '<https://www.extremetech.com/energy/what-is-a-solid-state-battery-how-they-work-explained>,'
|
259 |
web_sources = qa_result["sources"].split(",")
|
260 |
+
web_sources = [
|
261 |
+
s.strip().replace(">", "").replace("<", "").replace(",", "")
|
262 |
+
for s in web_sources
|
263 |
+
] # simple cleaning
|
264 |
else: # if no qa_results["sources"]
|
265 |
web_sources = [doc.metadata["source"] for doc in qa_result["source_documents"]]
|
266 |
output_markdown = "" # """**References**\n\n"""
|
|
|
272 |
|
273 |
"""
|
274 |
output_markdown += item
|
275 |
+
return output_markdown
|
276 |
+
|
277 |
|
278 |
def bot_lisa(history, flag_web_search):
|
279 |
"""Get answer from LLM."""
|
|
|
313 |
answer_text = result["answer"].strip()
|
314 |
citation_text = postprocess_web_citation(answer_text, result)
|
315 |
|
316 |
+
# no stream style
|
317 |
# history[-1][1] = answer_text
|
318 |
# return history, citation_text
|
319 |
# fake stream style
|
|
|
345 |
answer_text = result["answer"].strip()
|
346 |
# Remove misinfo in text
|
347 |
answer_text = postprocess_remove_cite_misinfo(answer_text)
|
348 |
+
|
349 |
citation_text = postprocess_citation(answer_text, result["source_documents"])
|
350 |
+
|
351 |
history[-1][1] = "" # Fake stream, TODO: implement streaming
|
352 |
for character in answer_text:
|
353 |
time.sleep(0.002)
|
|
|
378 |
elif file_extension == ".xml":
|
379 |
documents.extend(load_xml_as_docs(doc))
|
380 |
|
|
|
|
|
381 |
print("now creating vectordatabase")
|
382 |
|
383 |
vectorstore = get_faiss_vectorestore(embeddings)
|
|
|
386 |
|
387 |
print("now getting llm model")
|
388 |
|
389 |
+
llm = get_groq_chat(model_name="llama-3.1-70b-versatile")
|
390 |
+
|
|
|
|
|
|
|
391 |
rag_chain = RAGChain()
|
392 |
|
393 |
# global qa_conversation
|
|
|
409 |
# LISA chat tab
|
410 |
# Title info
|
411 |
gr.Markdown("## LISA - Lithium Ion Solid-state Assistant")
|
412 |
+
gr.Markdown(
|
413 |
+
"""
|
414 |
Q&A research assistant for efficient Knowledge Management not only in Battery Science.
|
415 |
+
Based on RAG-architecture and powered by Large Language Models (LLMs)."""
|
416 |
+
)
|
417 |
|
418 |
with gr.Tab("LISA ⚡"):
|
419 |
with gr.Row():
|
|
|
425 |
label="Document Assistant",
|
426 |
bubble_full_width=False,
|
427 |
show_copy_button=True,
|
428 |
+
# likeable=True,
|
429 |
) # .style(height=750)
|
430 |
+
|
431 |
user_txt = gr.Textbox(
|
432 |
label="Question", # show_label=False,
|
433 |
placeholder="Type in the question and press Enter/click Submit",
|
434 |
) # .style(container=False)
|
435 |
+
|
436 |
with gr.Accordion("Advanced", open=False):
|
437 |
+
flag_web_search = gr.Checkbox(
|
438 |
+
label="Search web", info="Search information from Internet"
|
439 |
+
)
|
440 |
+
|
441 |
with gr.Row():
|
442 |
+
# with gr.Column(scale=8):
|
443 |
with gr.Column(scale=1):
|
444 |
submit_btn = gr.Button("Submit", variant="primary")
|
445 |
with gr.Column(scale=1):
|
446 |
clear_btn = gr.Button("Clear", variant="stop")
|
447 |
+
|
448 |
# citations test place
|
449 |
# doc_citation = gr.Markdown("References used in answering the question will be displayed below.")
|
450 |
# Examples
|
|
|
472 |
# Reference (citations) and other settings
|
473 |
with gr.Column(scale=3):
|
474 |
with gr.Tab("References"):
|
475 |
+
doc_citation = gr.HTML(
|
476 |
+
"<p>References used in answering the question will be displayed below.</p>"
|
477 |
+
) # gr.Markdown("References used in answering the question will be displayed below.")
|
478 |
# gr.Markdown("nothing test")
|
479 |
with gr.Tab("Setting"):
|
480 |
# checkbox for allowing web search
|
|
|
502 |
# Document-based QA
|
503 |
|
504 |
with gr.Tab("Upload document 📚"):
|
505 |
+
qa_conversation = gr.State(
|
506 |
+
"placeholder", time_to_live=3600
|
507 |
+
) # clean state after 1h, is , is time_to_live=3600 needed?
|
508 |
|
509 |
with gr.Row():
|
510 |
with gr.Column(scale=7, variant="chat_panel"):
|
|
|
570 |
)
|
571 |
load_document = gr.Button("Load file")
|
572 |
with gr.Tab("References"):
|
573 |
+
doc_citation_user_doc = gr.HTML(
|
574 |
+
"References used in answering the question will be displayed below."
|
575 |
+
)
|
576 |
with gr.Tab("Setting"):
|
577 |
gr.Markdown("More in DEV...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
578 |
|
579 |
load_document.click(
|
580 |
document_changes,
|
|
|
587 |
)
|
588 |
|
589 |
docqa_question.submit(check_input_text, docqa_question).success(
|
590 |
+
add_text,
|
591 |
+
[chatbot_docqa, docqa_question],
|
592 |
+
[chatbot_docqa, docqa_question],
|
593 |
+
).then(
|
594 |
+
bot,
|
595 |
+
[chatbot_docqa, qa_conversation],
|
596 |
+
[chatbot_docqa, doc_citation_user_doc],
|
597 |
+
)
|
598 |
+
|
599 |
docqa_submit_btn.click(check_input_text, docqa_question).success(
|
600 |
+
add_text,
|
601 |
+
[chatbot_docqa, docqa_question],
|
602 |
+
[chatbot_docqa, docqa_question],
|
603 |
+
).then(
|
604 |
+
bot,
|
605 |
+
[chatbot_docqa, qa_conversation],
|
606 |
+
[chatbot_docqa, doc_citation_user_doc],
|
607 |
+
)
|
608 |
|
609 |
+
##########################
|
610 |
+
# Preview tab
|
611 |
with gr.Tab("Preview feature 🔬"):
|
612 |
with gr.Tab("Vision LM 🖼"):
|
613 |
+
vision_tmp_link = (
|
614 |
+
"https://kadi-iam-lisa-vlm.hf.space/" # vision model link
|
615 |
+
)
|
616 |
with gr.Blocks(css="""footer {visibility: hidden};""") as preview_tab:
|
617 |
+
gr.HTML(
|
618 |
+
"""<iframe src="{}" style="width:100%; height:1024px; overflow:auto"></iframe>""".format(
|
619 |
+
vision_tmp_link
|
620 |
+
)
|
621 |
+
)
|
622 |
# gr.Markdown("placeholder")
|
623 |
+
|
624 |
# with gr.Tab("Knowledge Graph RAG 🔎"):
|
625 |
# graph_tmp_link = "https://ea0feb6eb6495e8b2d.gradio.live/" # vision model link
|
626 |
# with gr.Blocks(css="""footer {visibility: hidden};""") as preview_tab:
|
627 |
# gr.HTML("""<iframe src="{}" style="width:100%; height:1024px; overflow:auto"></iframe>""".format(graph_tmp_link))
|
628 |
+
|
629 |
# About information
|
630 |
with gr.Tab("About 📝"):
|
631 |
with gr.Tab("Dev. info"):
|
|
|
663 |
*Notes: The model may produce incorrect statements. Users should treat these outputs as suggestions or starting points, not as definitive or accurate facts.
|
664 |
"""
|
665 |
)
|
666 |
+
|
667 |
with gr.Tab("What's included?"):
|
668 |
from paper_list import paper_list_str
|
669 |
+
|
670 |
+
gr.Markdown(
|
671 |
+
f"Currently, LISA includes the following open/free access pulications/documents/websites:\n\n {paper_list_str}"
|
672 |
+
)
|
673 |
|
674 |
# pdf_loader.change(pdf_changes, inputs=[pdf_loader, repo_id], outputs=[langchain_status], queue=False)
|
675 |
|
documents.py
CHANGED
@@ -1,25 +1,17 @@
|
|
1 |
import os
|
2 |
-
|
|
|
3 |
from langchain.document_loaders import (
|
4 |
-
TextLoader,
|
5 |
-
UnstructuredHTMLLoader,
|
6 |
-
PyPDFLoader,
|
7 |
PyMuPDFLoader,
|
8 |
-
PyPDFDirectoryLoader,
|
9 |
)
|
10 |
-
from langchain.
|
11 |
-
|
12 |
-
from langchain.vectorstores import
|
13 |
-
from langchain.memory import ConversationBufferMemory
|
14 |
-
from langchain.chains import ConversationalRetrievalChain
|
15 |
-
from langchain.llms import HuggingFaceTextGenInference
|
16 |
|
17 |
from langchain.text_splitter import (
|
18 |
-
CharacterTextSplitter,
|
19 |
RecursiveCharacterTextSplitter,
|
20 |
SpacyTextSplitter,
|
21 |
)
|
22 |
-
import shutil
|
23 |
|
24 |
def load_pdf_as_docs(pdf_path, loader_module=None, load_kwargs=None):
|
25 |
"""Load and parse pdf file(s)."""
|
@@ -87,13 +79,12 @@ def load_xml_as_docs(xml_path, loader_module=None, load_kwargs=None):
|
|
87 |
source_info = "unknown"
|
88 |
|
89 |
# maybe even better TODO: discuss with Jens
|
90 |
-
#first_author = soup.find("author")
|
91 |
-
#publication_year = soup.find("date", attrs={'type': 'published'})
|
92 |
-
#title = soup.find("title")
|
93 |
# source_info = [first_author, publication_year, title]
|
94 |
-
#source_info_str = "_".join([info.text.strip() if info is not None else "unknown" for info in source_info])
|
95 |
|
96 |
-
from langchain.docstore.document import Document
|
97 |
doc = [Document(page_content=parsed_text_grouped, metadata={"source": source_info})]#, metadata={"source": "local"})
|
98 |
|
99 |
docs.extend(doc)
|
@@ -105,13 +96,13 @@ def get_doc_chunks(docs, splitter=None):
|
|
105 |
"""Split docs into chunks."""
|
106 |
|
107 |
if splitter is None:
|
108 |
-
#splitter = RecursiveCharacterTextSplitter(
|
109 |
# # separators=["\n\n", "\n"], chunk_size=1024, chunk_overlap=256
|
110 |
# separators=["\n\n", "\n"], chunk_size=256, chunk_overlap=128
|
111 |
-
#)
|
112 |
splitter = SpacyTextSplitter.from_tiktoken_encoder(
|
113 |
chunk_size=512,
|
114 |
-
chunk_overlap=
|
115 |
)
|
116 |
chunks = splitter.split_documents(docs)
|
117 |
|
|
|
1 |
import os
|
2 |
+
import shutil
|
3 |
+
|
4 |
from langchain.document_loaders import (
|
|
|
|
|
|
|
5 |
PyMuPDFLoader,
|
|
|
6 |
)
|
7 |
+
from langchain.docstore.document import Document
|
8 |
+
|
9 |
+
from langchain.vectorstores import Chroma
|
|
|
|
|
|
|
10 |
|
11 |
from langchain.text_splitter import (
|
|
|
12 |
RecursiveCharacterTextSplitter,
|
13 |
SpacyTextSplitter,
|
14 |
)
|
|
|
15 |
|
16 |
def load_pdf_as_docs(pdf_path, loader_module=None, load_kwargs=None):
|
17 |
"""Load and parse pdf file(s)."""
|
|
|
79 |
source_info = "unknown"
|
80 |
|
81 |
# maybe even better TODO: discuss with Jens
|
82 |
+
# first_author = soup.find("author")
|
83 |
+
# publication_year = soup.find("date", attrs={'type': 'published'})
|
84 |
+
# title = soup.find("title")
|
85 |
# source_info = [first_author, publication_year, title]
|
86 |
+
# source_info_str = "_".join([info.text.strip() if info is not None else "unknown" for info in source_info])
|
87 |
|
|
|
88 |
doc = [Document(page_content=parsed_text_grouped, metadata={"source": source_info})]#, metadata={"source": "local"})
|
89 |
|
90 |
docs.extend(doc)
|
|
|
96 |
"""Split docs into chunks."""
|
97 |
|
98 |
if splitter is None:
|
99 |
+
# splitter = RecursiveCharacterTextSplitter(
|
100 |
# # separators=["\n\n", "\n"], chunk_size=1024, chunk_overlap=256
|
101 |
# separators=["\n\n", "\n"], chunk_size=256, chunk_overlap=128
|
102 |
+
# )
|
103 |
splitter = SpacyTextSplitter.from_tiktoken_encoder(
|
104 |
chunk_size=512,
|
105 |
+
chunk_overlap=128,
|
106 |
)
|
107 |
chunks = splitter.split_documents(docs)
|
108 |
|
llms.py
CHANGED
@@ -12,6 +12,8 @@ from transformers import (
|
|
12 |
from transformers import LlamaForCausalLM, AutoModelForCausalLM, LlamaTokenizer
|
13 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
14 |
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, PromptTemplate
|
|
|
|
|
15 |
|
16 |
# model_path = "/mnt/localstorage/yinghan/llm/orca_mini_v3_13b"
|
17 |
# model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto")#, load_in_8bit=True)
|
@@ -137,3 +139,8 @@ def get_chat_vllm(model_name, inference_server_url, langfuse_callback=None):
|
|
137 |
# vllm = LangchainLLM(llm=chat)
|
138 |
# return vllm
|
139 |
return chat
|
|
|
|
|
|
|
|
|
|
|
|
12 |
from transformers import LlamaForCausalLM, AutoModelForCausalLM, LlamaTokenizer
|
13 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
14 |
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, PromptTemplate
|
15 |
+
from langchain_groq import ChatGroq
|
16 |
+
|
17 |
|
18 |
# model_path = "/mnt/localstorage/yinghan/llm/orca_mini_v3_13b"
|
19 |
# model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto")#, load_in_8bit=True)
|
|
|
139 |
# vllm = LangchainLLM(llm=chat)
|
140 |
# return vllm
|
141 |
return chat
|
142 |
+
|
143 |
+
def get_groq_chat(model_name="llama-3.1-70b-versatile"):
|
144 |
+
|
145 |
+
llm = ChatGroq(temperature=0, model_name=model_name)
|
146 |
+
return llm
|
models.py
CHANGED
@@ -1,242 +1,12 @@
|
|
1 |
-
# from langchain import HuggingFaceHub, LLMChain
|
2 |
from langchain.chains import LLMChain
|
3 |
-
|
4 |
-
from transformers import (
|
5 |
-
AutoModelForCausalLM,
|
6 |
-
AutoTokenizer,
|
7 |
-
pipeline,
|
8 |
-
T5Tokenizer,
|
9 |
-
T5ForConditionalGeneration,
|
10 |
-
GPT2TokenizerFast,
|
11 |
-
)
|
12 |
-
from transformers import LlamaForCausalLM, AutoModelForCausalLM, LlamaTokenizer
|
13 |
-
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
14 |
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, PromptTemplate
|
15 |
|
16 |
-
# model_path = "/mnt/localstorage/yinghan/llm/orca_mini_v3_13b"
|
17 |
-
# model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto")#, load_in_8bit=True)
|
18 |
-
# tokenizer = AutoTokenizer.from_pretrained(model_path)
|
19 |
-
from langchain.chat_models import ChatOpenAI
|
20 |
-
# from langchain_openai import ChatOpenAI
|
21 |
-
from langchain.embeddings.openai import OpenAIEmbeddings
|
22 |
-
from langchain.embeddings import HuggingFaceEmbeddings
|
23 |
-
from langchain.vectorstores import Chroma
|
24 |
-
from langchain.text_splitter import (
|
25 |
-
CharacterTextSplitter,
|
26 |
-
RecursiveCharacterTextSplitter,
|
27 |
-
)
|
28 |
-
from langchain.document_loaders import TextLoader, UnstructuredHTMLLoader, PyPDFLoader
|
29 |
-
from langchain.chains.retrieval_qa.base import RetrievalQA
|
30 |
-
from langchain.llms import HuggingFaceHub
|
31 |
-
from dotenv import load_dotenv
|
32 |
-
from langchain.llms import HuggingFaceTextGenInference
|
33 |
-
from langchain.chains.question_answering import load_qa_chain
|
34 |
from langchain.chains import ConversationalRetrievalChain
|
35 |
from langchain.chains.conversation.memory import (
|
36 |
ConversationBufferMemory,
|
37 |
ConversationBufferWindowMemory,
|
38 |
)
|
39 |
-
# from ragas.llms import LangchainLLM
|
40 |
-
|
41 |
-
|
42 |
-
def get_llm_hf_online(inference_api_url=""):
|
43 |
-
if not inference_api_url: # default api url
|
44 |
-
inference_api_url = (
|
45 |
-
"https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
|
46 |
-
)
|
47 |
-
|
48 |
-
llm = HuggingFaceTextGenInference(
|
49 |
-
# cache=None, # Optional: Cache verwenden oder nicht
|
50 |
-
verbose=True, # Provides detailed logs of operation
|
51 |
-
# callbacks=[StreamingStdOutCallbackHandler()], # Handeling Streams
|
52 |
-
max_new_tokens=1024, # Maximum number of token that can be generated.
|
53 |
-
# top_k=2, # Die Anzahl der Top-K Tokens, die beim Generieren berücksichtigt werden sollen
|
54 |
-
top_p=0.95, # Threshold for controlling randomness in text generation process.
|
55 |
-
typical_p=0.95, #
|
56 |
-
temperature=0.1, # For choosing probable words.
|
57 |
-
# repetition_penalty=None, # Wiederholungsstrafe beim Generieren
|
58 |
-
# truncate=None, # Schneidet die Eingabe-Tokens auf die gegebene Größe
|
59 |
-
# stop_sequences=None, # Eine Liste von Stop-Sequenzen beim Generieren
|
60 |
-
inference_server_url=inference_api_url, # URL des Inferenzservers
|
61 |
-
timeout=10, # Timeout for connection with the url
|
62 |
-
# streaming=True, # Streaming the answer
|
63 |
-
)
|
64 |
-
|
65 |
-
return llm
|
66 |
-
|
67 |
-
|
68 |
-
def get_llm_hf_local(model_path):
|
69 |
-
# model_path = "/mnt/localstorage/yinghan/llm/orca_mini_v3_13b"
|
70 |
-
# model_path = "/mnt/localstorage/yinghan/llm/zephyr-7b-beta"
|
71 |
-
model = LlamaForCausalLM.from_pretrained(
|
72 |
-
model_path, device_map="auto"
|
73 |
-
) # , load_in_8bit=True)
|
74 |
-
# model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")#, load_in_8bit=True) # which is better?
|
75 |
-
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
76 |
-
|
77 |
-
# print('making a pipeline...')
|
78 |
-
# max_length has typically been deprecated for max_new_tokens
|
79 |
-
pipe = pipeline(
|
80 |
-
"text-generation",
|
81 |
-
model=model,
|
82 |
-
tokenizer=tokenizer,
|
83 |
-
max_new_tokens=1024,
|
84 |
-
model_kwargs={"temperature": 0.1},
|
85 |
-
)
|
86 |
-
llm = HuggingFacePipeline(pipeline=pipe)
|
87 |
-
|
88 |
-
return llm
|
89 |
-
|
90 |
-
|
91 |
-
def get_llm_hf_local_zephyr(model_path):
|
92 |
-
# model_path = "/mnt/localstorage/yinghan/llm/orca_mini_v3_13b"
|
93 |
-
# model_path = "/mnt/localstorage/yinghan/llm/zephyr-7b-beta"
|
94 |
-
model = LlamaForCausalLM.from_pretrained(
|
95 |
-
model_path, device_map="auto"
|
96 |
-
) # , load_in_8bit=True)
|
97 |
-
#import torch
|
98 |
-
#model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True)#, load_in_8bit=True) # which is better?
|
99 |
-
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
100 |
-
|
101 |
-
# print('making a pipeline...')
|
102 |
-
# max_length has typically been deprecated for max_new_tokens
|
103 |
-
pipe = pipeline(
|
104 |
-
"text-generation",
|
105 |
-
model=model,
|
106 |
-
tokenizer=tokenizer,
|
107 |
-
max_new_tokens=1024,
|
108 |
-
temperature=0.1,
|
109 |
-
# top_p=0.8,
|
110 |
-
# do_sample=True,
|
111 |
-
# repetition_penalty=1.1,
|
112 |
-
return_full_text=True
|
113 |
-
# model_kwargs={"temperature": 0.1},
|
114 |
-
)
|
115 |
-
llm = HuggingFacePipeline(pipeline=pipe)
|
116 |
-
|
117 |
-
return llm
|
118 |
-
|
119 |
-
|
120 |
-
def get_chat_vllm(model_name, inference_server_url, langfuse_callback=None):
|
121 |
-
|
122 |
-
# to fix
|
123 |
-
# Create vLLM Langchain instance
|
124 |
-
|
125 |
-
# Some defaults
|
126 |
-
# chat_model_name = "openchat/openchat_3.5"
|
127 |
-
# inference_server_url = "http://localhost:8080/v1"
|
128 |
-
chat = ChatOpenAI(
|
129 |
-
model=model_name,
|
130 |
-
openai_api_key="EMPTY",
|
131 |
-
openai_api_base=inference_server_url,
|
132 |
-
max_tokens=512, # better setting?
|
133 |
-
temperature=0.1, # default 0.7, better setting?
|
134 |
-
# callbacks=[langfuse_callback],
|
135 |
-
)
|
136 |
-
|
137 |
-
# The following is not required for builing normal llm
|
138 |
-
# use the Ragas LangchainLLM wrapper to create a RagasLLM instance
|
139 |
-
# vllm = LangchainLLM(llm=chat)
|
140 |
-
# return vllm
|
141 |
-
return chat
|
142 |
-
|
143 |
-
def get_chat_vllm_stream(model_name, inference_server_url, langfuse_callback=None):
|
144 |
-
|
145 |
-
# to fix
|
146 |
-
# Create vLLM Langchain instance
|
147 |
-
|
148 |
-
# Some defaults
|
149 |
-
# chat_model_name = "openchat/openchat_3.5"
|
150 |
-
# inference_server_url = "http://localhost:8080/v1"
|
151 |
-
chat = ChatOpenAI(
|
152 |
-
model=model_name,
|
153 |
-
openai_api_key="EMPTY",
|
154 |
-
openai_api_base=inference_server_url,
|
155 |
-
max_tokens=512, # better setting?
|
156 |
-
temperature=0.1, # default 0.7, better setting?
|
157 |
-
streaming=True,
|
158 |
-
callbacks=[StreamingStdOutCallbackHandler(), langfuse_callback],
|
159 |
-
)
|
160 |
-
|
161 |
-
# The following is not required for builing normal llm
|
162 |
-
# use the Ragas LangchainLLM wrapper to create a RagasLLM instance
|
163 |
-
# vllm = LangchainLLM(llm=chat)
|
164 |
-
# return vllm
|
165 |
-
return chat
|
166 |
-
|
167 |
-
|
168 |
-
def get_chat_vllm_stream_TODO(model_name, inference_server_url, streaming=True):
|
169 |
-
|
170 |
-
# to fix
|
171 |
-
# Create vLLM Langchain instance
|
172 |
-
|
173 |
-
if streaming:
|
174 |
-
streaming_callback = StreamingStdOutCallbackHandler()
|
175 |
-
else:
|
176 |
-
streaming_callback = None
|
177 |
-
|
178 |
-
from langchain.callbacks.manager import CallbackManager
|
179 |
-
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
180 |
-
# Some defaults
|
181 |
-
# chat_model_name = "openchat/openchat_3.5"
|
182 |
-
# inference_server_url = "http://localhost:8080/v1"
|
183 |
-
chat = ChatOpenAI(
|
184 |
-
model=model_name,
|
185 |
-
openai_api_key="EMPTY",
|
186 |
-
openai_api_base=inference_server_url,
|
187 |
-
max_tokens=512, # better setting?
|
188 |
-
temperature=0.1, # default 0.7, better setting?
|
189 |
-
streaming=streaming,
|
190 |
-
callbacks=[streaming_callback],
|
191 |
-
callback_manager=callback_manager,
|
192 |
-
stream=True,
|
193 |
-
)
|
194 |
-
|
195 |
-
from langchain_community.llms import VLLMOpenAI
|
196 |
-
from langchain.callbacks.manager import CallbackManager
|
197 |
-
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
198 |
-
|
199 |
-
llm = VLLMOpenAI(
|
200 |
-
openai_api_key="EMPTY",
|
201 |
-
openai_api_base=inference_server_url,
|
202 |
-
model=model_name,
|
203 |
-
max_tokens=512, # better setting?
|
204 |
-
temperature=0.1, # default 0.7, better setting?
|
205 |
-
streaming=True,
|
206 |
-
stream=True, # necessary?
|
207 |
-
callbacks=[streaming_callback],
|
208 |
-
callback_manager=callback_manager,
|
209 |
-
)
|
210 |
-
|
211 |
-
# The following is not required for builing normal llm
|
212 |
-
# use the Ragas LangchainLLM wrapper to create a RagasLLM instance
|
213 |
-
# vllm = LangchainLLM(llm=chat)
|
214 |
-
# return vllm
|
215 |
-
return chat
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
def _get_llm_hf_local(model_path):
|
220 |
-
model_path = "/mnt/localstorage/yinghan/llm/orca_mini_v3_13b"
|
221 |
-
model_path = "/mnt/localstorage/yinghan/llm/zephyr-7b-beta"
|
222 |
-
model = LlamaForCausalLM.from_pretrained(
|
223 |
-
model_path, device_map="auto"
|
224 |
-
) # , load_in_8bit=True)
|
225 |
-
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
226 |
-
|
227 |
-
# print('making a pipeline...')
|
228 |
-
# max_length has typically been deprecated for max_new_tokens
|
229 |
-
pipe = pipeline(
|
230 |
-
"text-generation",
|
231 |
-
model=model,
|
232 |
-
tokenizer=tokenizer,
|
233 |
-
max_new_tokens=1024,
|
234 |
-
model_kwargs={"temperature": 0},
|
235 |
-
)
|
236 |
-
llm = HuggingFacePipeline(pipeline=pipe)
|
237 |
-
|
238 |
-
return llm
|
239 |
-
|
240 |
|
241 |
|
242 |
from langchain.chains import RetrievalQAWithSourcesChain, StuffDocumentsChain
|
@@ -317,32 +87,6 @@ def get_cite_combine_docs_chain(llm):
|
|
317 |
return combine_docs_chain
|
318 |
|
319 |
|
320 |
-
class ConversationChainFactory_bp:
|
321 |
-
def __init__(
|
322 |
-
self, memory_key="chat_history", output_key="answer", return_messages=True
|
323 |
-
):
|
324 |
-
self.memory_key = memory_key
|
325 |
-
self.output_key = output_key
|
326 |
-
self.return_messages = return_messages
|
327 |
-
|
328 |
-
def create(self, vectorstore, llm):
|
329 |
-
memory = ConversationBufferWindowMemory( # ConversationBufferMemory(
|
330 |
-
memory_key=self.memory_key,
|
331 |
-
return_messages=self.return_messages,
|
332 |
-
output_key=self.output_key,
|
333 |
-
)
|
334 |
-
|
335 |
-
# https://github.com/langchain-ai/langchain/issues/4608
|
336 |
-
conversation_chain = ConversationalRetrievalChain.from_llm(
|
337 |
-
llm=llm,
|
338 |
-
retriever=vectorstore.as_retriever(), # search_kwargs={"k": 8}),
|
339 |
-
memory=memory,
|
340 |
-
return_source_documents=True,
|
341 |
-
)
|
342 |
-
|
343 |
-
return conversation_chain
|
344 |
-
|
345 |
-
|
346 |
class ConversationChainFactory:
|
347 |
def __init__(
|
348 |
self, memory_key="chat_history", output_key="answer", return_messages=True
|
@@ -351,7 +95,7 @@ class ConversationChainFactory:
|
|
351 |
self.output_key = output_key
|
352 |
self.return_messages = return_messages
|
353 |
|
354 |
-
def create(self,
|
355 |
memory = ConversationBufferWindowMemory( # ConversationBufferMemory(
|
356 |
memory_key=self.memory_key,
|
357 |
return_messages=self.return_messages,
|
@@ -419,13 +163,12 @@ class ConversationChainFactory:
|
|
419 |
# https://github.com/langchain-ai/langchain/issues/4608
|
420 |
conversation_chain = ConversationalRetrievalChain.from_llm(
|
421 |
llm=llm,
|
422 |
-
retriever=
|
423 |
memory=memory,
|
424 |
return_source_documents=True,
|
425 |
# return_generated_question=True, # for debug
|
426 |
rephrase_question=False, # Disable rephrase, for test purpose
|
427 |
get_chat_history=lambda x: x,
|
428 |
-
# callbacks=[langfuse_callback]
|
429 |
# verbose=True,
|
430 |
# combine_docs_chain_kwargs={"prompt": PROMPT},
|
431 |
# condense_question_prompt=CONDENSE_QUESTION_PROMPT,
|
@@ -444,7 +187,7 @@ class ConversationChainFactoryDev:
|
|
444 |
self.output_key = output_key
|
445 |
self.return_messages = return_messages
|
446 |
|
447 |
-
def create(self,
|
448 |
memory = ConversationBufferWindowMemory( # ConversationBufferMemory(
|
449 |
memory_key=self.memory_key,
|
450 |
return_messages=self.return_messages,
|
@@ -515,13 +258,12 @@ class ConversationChainFactoryDev:
|
|
515 |
|
516 |
conversation_chain = ConversationalRetrievalChain.from_llm(
|
517 |
llm=llm,
|
518 |
-
retriever=
|
519 |
memory=memory,
|
520 |
return_source_documents=True,
|
521 |
# return_generated_question=True, # for debug
|
522 |
rephrase_question=False, # Disable rephrase, for test purpose
|
523 |
get_chat_history=lambda x: x,
|
524 |
-
# callbacks=[langfuse_callback]
|
525 |
# verbose=True,
|
526 |
# combine_docs_chain_kwargs={"prompt": PROMPT},
|
527 |
# condense_question_prompt=CONDENSE_QUESTION_PROMPT,
|
|
|
|
|
1 |
from langchain.chains import LLMChain
|
2 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, PromptTemplate
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
from langchain.chains import ConversationalRetrievalChain
|
6 |
from langchain.chains.conversation.memory import (
|
7 |
ConversationBufferMemory,
|
8 |
ConversationBufferWindowMemory,
|
9 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
|
12 |
from langchain.chains import RetrievalQAWithSourcesChain, StuffDocumentsChain
|
|
|
87 |
return combine_docs_chain
|
88 |
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
class ConversationChainFactory:
|
91 |
def __init__(
|
92 |
self, memory_key="chat_history", output_key="answer", return_messages=True
|
|
|
95 |
self.output_key = output_key
|
96 |
self.return_messages = return_messages
|
97 |
|
98 |
+
def create(self, retriever, llm):
|
99 |
memory = ConversationBufferWindowMemory( # ConversationBufferMemory(
|
100 |
memory_key=self.memory_key,
|
101 |
return_messages=self.return_messages,
|
|
|
163 |
# https://github.com/langchain-ai/langchain/issues/4608
|
164 |
conversation_chain = ConversationalRetrievalChain.from_llm(
|
165 |
llm=llm,
|
166 |
+
retriever=retriever,
|
167 |
memory=memory,
|
168 |
return_source_documents=True,
|
169 |
# return_generated_question=True, # for debug
|
170 |
rephrase_question=False, # Disable rephrase, for test purpose
|
171 |
get_chat_history=lambda x: x,
|
|
|
172 |
# verbose=True,
|
173 |
# combine_docs_chain_kwargs={"prompt": PROMPT},
|
174 |
# condense_question_prompt=CONDENSE_QUESTION_PROMPT,
|
|
|
187 |
self.output_key = output_key
|
188 |
self.return_messages = return_messages
|
189 |
|
190 |
+
def create(self, retriever, llm):
|
191 |
memory = ConversationBufferWindowMemory( # ConversationBufferMemory(
|
192 |
memory_key=self.memory_key,
|
193 |
return_messages=self.return_messages,
|
|
|
258 |
|
259 |
conversation_chain = ConversationalRetrievalChain.from_llm(
|
260 |
llm=llm,
|
261 |
+
retriever=retriever,
|
262 |
memory=memory,
|
263 |
return_source_documents=True,
|
264 |
# return_generated_question=True, # for debug
|
265 |
rephrase_question=False, # Disable rephrase, for test purpose
|
266 |
get_chat_history=lambda x: x,
|
|
|
267 |
# verbose=True,
|
268 |
# combine_docs_chain_kwargs={"prompt": PROMPT},
|
269 |
# condense_question_prompt=CONDENSE_QUESTION_PROMPT,
|