#!/usr/bin/env python # chat.py - load a previously indexed set of embeddings and query them # see: https://github.com/sunny2309/llamaindex_tutorials/ # Eric Lease Morgan # (c) University of Notre Dame; distributed under a GNU Public License # March 22, 2024 - got it working using LlamaIndex and Ollama # March 23, 2024 - started playing with prompts and added study carrels; gotta stop # April 26, 2024 - implemented Ollama embeddings, thus removing network necessity # May 5, 2024 - added Web interface; kewl! # pre-configure LLMODEL = 'llama2' SPEAKER = 'chatbot' AUDIENCE = 'freshman' TEMPERATURE = 0.25 SIMILARITY = 4 CARREL = 'climate-change' EXAMPLES = [ 'What are some of the causes of climate change?', 'What might happen if climate change is not abated?', 'What can be done to reverse the effects of climate change?' ] TITLE = "Climate Change" DESCRIPTION = "Ask anything. Your question will be addressed through the analysis of a couple hundred scholarly articles on the topic of climate change. For more detail, see the about page." # configure HEIGHT = 300 DEBUG = True SHARE = True CONFIGURATION = 'localLibrary' PERSONAS = { 'chatbot':'a helpful person whose answers are limited to one or two sentences', 'secondGrader':'a child in the second grade', 'freshman':'a freshman at a college or university', 'senior': 'a senior at a college or university', 'graduateStudent':'a graduate student at a college or university', 'librarian':'a sarcastic college or university librarian', 'professor':'a college or university professor', 'emeritus':'a college or univeristy professor emeritus' } TEMPLATE = ( "Context information is below.\n" "---------------------\n" "{context_str}\n---------------------\n" "Given the context information and not prior knowledge, answer the query.\n" "Write the answer in the style of {speaker} and intended for {audience}. \n" "Query: {query_str} \n" "Answer: \n" ) EMBEDDINGS = 'all-minilm' STREAMING = False ERROR = 'Apparently, the LLM has yet to be created for this carrel. Call Eric or create one.' TIMEOUT = 600 REFERENCES = '\n\nReferences:\n' LLM = 'llm' # require from llama_index.core import Settings, VectorStoreIndex, StorageContext, load_index_from_storage, PromptTemplate from llama_index.embeddings.ollama import OllamaEmbedding from llama_index.llms.ollama import Ollama from pathlib import Path from rdr import configuration, ETC from sys import stderr, argv, exit import gradio import time # chat; process the input def chat( message, history ) : # log input stderr.write( message + '\n\n' ) # submit the message (question) response = str( engine.query( message ) ) # create a list of references (titles) references = {} for node in engine.retrieve( message ) : # parse; create a reference metadata = node.metadata author = str( metadata.get( 'author', 'None') ) title = str( metadata.get( 'title', 'None') ) date = str( metadata.get( 'date', 'None' ) ) file = str( metadata.get( 'file', 'None' ) ) reference = title + ' / ' + author + ' (' + date + ') in ' + file # update if reference in references : references[ reference ] += 1 else : references[ reference ] = 1 # sort and transform the references into something more human-readable references = dict( sorted( references.items(), key=lambda x:x[ 1 ], reverse=True ) ) references = [ key + ' (' + str( references[ key ] ) + ')' for key in references.keys() ] # create a list of referenced items items = [] for index, reference in enumerate( references ) : items.append( ' ' + str( index + 1 ) + '. ' + reference ) # build the final result and log result = response + REFERENCES + '\n'.join( items ) stderr.write( result + '\n\n\n') # output; done for i in range(len(result)): time.sleep(0.025) yield result[ : i + 1 ] # initialize library = Path( configuration( CONFIGURATION ) ) Settings.embed_model = OllamaEmbedding( model_name=EMBEDDINGS ) Settings.llm = Ollama( model=LLMODEL, request_timeout=TIMEOUT, temperature=TEMPERATURE ) template = PromptTemplate( TEMPLATE ).partial_format( speaker=PERSONAS[ SPEAKER ] ).partial_format( audience=PERSONAS[ AUDIENCE ] ) # load the index try : index = load_index_from_storage( StorageContext.from_defaults( persist_dir=library/CARREL/ETC/LLM ) ) except : exit( ERROR ) # initialize the query engine and user interface engine = index.as_query_engine( text_qa_template=template, similarity_top_k=SIMILARITY, streaming=STREAMING ) interface = gradio.ChatInterface( chat, chatbot=gradio.Chatbot( height=HEIGHT ), examples=EXAMPLES, cache_examples=False, title=TITLE, description=DESCRIPTION ) # launch the interface (server) and done interface.launch( share=SHARE, debug=DEBUG ) exit()