#!/usr/bin/env python

# chat.py - load a previously indexed set of embeddings and query them
# see: https://github.com/sunny2309/llamaindex_tutorials/

# Eric Lease Morgan <emorgan@nd.edu>
# (c) University of Notre Dame; distributed under a GNU Public License

# March 22, 2024 - got it working using LlamaIndex and Ollama
# March 23, 2024 - started playing with prompts and added study carrels; gotta stop
# April 26, 2024 - implemented Ollama embeddings, thus removing network necessity
# May    5, 2024 - added Web interface; kewl!


# pre-configure
LLMODEL     = 'llama2'
SPEAKER     = 'chatbot'
AUDIENCE    = 'freshman'
TEMPERATURE = 0.25
SIMILARITY  = 4
CARREL      = 'climate-change'
EXAMPLES    = [ 'What are some of the causes of climate change?', 'What might happen if climate change is not abated?', 'What can be done to reverse the effects of climate change?' ]
TITLE       = "Climate Change"
DESCRIPTION = "Ask anything. Your question will be addressed through the analysis of a couple hundred scholarly articles on the topic of climate change. For more detail, see the <a href='https://distantreader.org/blog/ai-and-climate-change/'>about page</a>."

# configure
HEIGHT        = 300
DEBUG         = True
SHARE         = True
CONFIGURATION = 'localLibrary'
PERSONAS      = { 'chatbot':'a helpful person whose answers are limited to one or two sentences', 'secondGrader':'a child in the second grade', 'freshman':'a freshman at a college or university', 'senior': 'a senior at a college or university', 'graduateStudent':'a graduate student at a college or university', 'librarian':'a sarcastic college or university librarian', 'professor':'a college or university professor', 'emeritus':'a college or univeristy professor emeritus' }
TEMPLATE      = ( "Context information is below.\n"
                  "---------------------\n"
                  "{context_str}\n---------------------\n"
                  "Given the context information and not prior knowledge, answer the query.\n"
                  "Write the answer in the style of {speaker} and intended for {audience}. \n"
                  "Query: {query_str} \n"
                  "Answer: \n" )
EMBEDDINGS    = 'all-minilm'
STREAMING     = False
ERROR         = 'Apparently, the LLM has yet to be created for this carrel. Call Eric or create one.'
TIMEOUT       = 600
REFERENCES    = '\n\nReferences:\n'
LLM           = 'llm'

# require
from llama_index.core              import Settings, VectorStoreIndex, StorageContext, load_index_from_storage, PromptTemplate
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama       import Ollama
from pathlib                       import Path
from rdr                           import configuration, ETC
from sys                           import stderr, argv, exit
import gradio
import time

# chat; process the input
def chat( message, history ) :

	# log input
	stderr.write( message + '\n\n' )
	
	# submit the message (question)
	response = str( engine.query( message ) )
	
	# create a list of references (titles)
	references = {}
	for node in engine.retrieve( message ) :
	
		# parse; create a reference
		metadata  = node.metadata
		author    = str( metadata.get( 'author', 'None') )
		title     = str( metadata.get( 'title', 'None') )
		date      = str( metadata.get( 'date', 'None' ) )
		file      = str( metadata.get( 'file', 'None' ) )
		reference = title + ' / ' + author + ' (' + date + ') in ' + file 
		
		# update
		if reference in references : references[ reference ] += 1
		else                       : references[ reference ] =  1
	
	# sort and transform the references into something more human-readable
	references = dict( sorted( references.items(), key=lambda x:x[ 1 ], reverse=True ) )
	references = [ key + ' (' + str( references[ key ] ) + ')' for key in references.keys() ]	
	
	# create a list of referenced items
	items = []
	for index, reference in enumerate( references ) : items.append( '  ' + str( index + 1 ) + '. ' + reference )

	# build the final result and log
	result = response + REFERENCES + '\n'.join( items )
	stderr.write( result + '\n\n\n')

	# output; done
	for i in range(len(result)):
		time.sleep(0.025)
		yield result[ : i + 1 ]	
		
# initialize
library              = Path( configuration( CONFIGURATION ) )
Settings.embed_model = OllamaEmbedding( model_name=EMBEDDINGS )
Settings.llm         = Ollama( model=LLMODEL, request_timeout=TIMEOUT, temperature=TEMPERATURE )
template             = PromptTemplate( TEMPLATE ).partial_format( speaker=PERSONAS[ SPEAKER ] ).partial_format( audience=PERSONAS[ AUDIENCE ] )

# load the index
try    : index = load_index_from_storage( StorageContext.from_defaults( persist_dir=library/CARREL/ETC/LLM ) )
except : exit( ERROR )

# initialize the query engine and user interface
engine    = index.as_query_engine( text_qa_template=template, similarity_top_k=SIMILARITY, streaming=STREAMING )
interface = gradio.ChatInterface( chat, chatbot=gradio.Chatbot( height=HEIGHT ), examples=EXAMPLES, cache_examples=False, title=TITLE, description=DESCRIPTION )

# launch the interface (server) and done
interface.launch( share=SHARE, debug=DEBUG ) 
exit()