We are going to begin by Studying the native listing for all of the information we scraped earlier. These information are within the format of “.txt” and are saved within the “scraped/” folder.
from llama_index.core import SimpleDirectoryReaderpaperwork = SimpleDirectoryReader(
os.path.be a part of(self.config.data_path, "scraped")
).load_data()
Now that now we have loaded the information from native listing, we have to outline the format of the immediate which will probably be given by the person as a question and the fastened immediate which is able to information the LLM to behave in a sure method. The SYSTEM_PROMPT will probably be liable for defining the character of the LLM interactions and QUERY_PROMPT will probably be liable for designing the queries from the person in a LLM comprehensible template.
SYSTEM_PROMPT = """
You're a Q&A assistant. Your aim is to reply questions as
precisely as doable based mostly on the directions and context supplied.
"""QUERY_PROMPT = "<|USER|>{query_str}<|ASSISTANT|>"
To make this immediate appropriate for LlamaIndex capabilities, we have to wrap this immediate right into a Immediate class.
from llama_index.core.prompts.prompts import SimpleInputPromptquery_wrapper_prompt = SimpleInputPrompt(QUERY_PROMPT)
For together with the mannequin from HuggingFace, we have to embrace the HuggingFaceLLM occasion with all of the parameters and System immediate. This explicit class is liable for loading an LLM from HuggingFace with the outlined parameters and cargo into the reminiscence with format suitable with LlamaIndex. We’d additionally want an embedding mannequin for changing person queries in addition to context into embeddings, on this explicit we will probably be utilizing Langchain embeddings and extra particularly HuggingFace embeddings.
from llama_index.llms.huggingface import HuggingFaceLLM
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings.langchain import LangchainEmbeddingllm = HuggingFaceLLM(
context_window=self.config.context_window,
max_new_tokens=self.config.max_new_tokens,
generate_kwargs={"temperature": self.config.temperature,
"do_sample": False},
system_prompt=SYSTEM_PROMPT,
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name=self.config.llm,
model_name=self.config.llm,
device_map=self.config.system,
model_kwargs={"torch_dtype": torch.float16,
"load_in_4bit": self.config.bit_4,
"token": self.config.hf_token}
)
embed_model = LangchainEmbedding(
HuggingFaceEmbeddings(model_name=self.config.embedding)
)
All these are a number of steps of processing and must included collectively which is able to work as a easy single occasion service context, linking all these embedding and LLM mannequin collectively, prepared for inference. To make issues simpler, LlamaIndex offers a ServiceContext class for doing simply that.
from llama_index.core import ServiceContextservice_context = ServiceContext.from_defaults(
chunk_size=self.config.chunk_size,
llm=llm,
embed_model=embed_model
)
Now that now we have the service context, we want an Indexing method which is able to use the paperwork which had been loaded earlier and use the service context to carry out question search over that context on a set of chunks. This additional will probably be transformed into a question engine for performing all of the question and getting responses.
from llama_index.core import VectorStoreIndexindex = VectorStoreIndex.from_documents(paperwork,
service_context=service_context)
query_engine = index.as_query_engine()
The response could be merely retreived utilizing —
response = query_engine.question(question)
The entire implementation in a single Class will probably be (with utility capabilities included) —
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts.prompts import SimpleInputPrompt
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.core import (
VectorStoreIndex,
SimpleDirectoryReader,
ServiceContext
)
import torch
import os from careerbot.constants import SYSTEM_PROMPT, QUERY_PROMPT
from utils.frequent import load_key
from careerbot.entity import LLMConfig
from logger import logger
class LLM:
def __init__(self, config: LLMConfig):
'''
creates an occasion of LLM class for creating an LLM pipeline
## Parameters:
config: LLMConfig
configuration for llm
'''
self.config = config
if not self.config.hf_token:
self.config.hf_token = load_key("HF_TOKEN")
self.query_engine = None
def prepare_llm(self):
'''
configures and hundreds the LLM with rag integration
'''
paperwork = SimpleDirectoryReader(
os.path.be a part of(self.config.data_path, "scraped")
).load_data()
query_wrapper_prompt = SimpleInputPrompt(QUERY_PROMPT)
service_context = self.__load_service_context(query_wrapper_prompt)
index = VectorStoreIndex.from_documents(paperwork,
service_context=service_context)
query_engine = index.as_query_engine()
if self.query_engine:
logger.data("Reloading Question Engine!!!")
self.query_engine = query_engine
def generate_response(self, question: str) -> str:
'''
generates response from a question present. If question engine not provoke,
will begin the initiation course of.
## Parameters:
question: str
the person question for the llm
## Returns:
response: str
the response generated by the llm mannequin
'''
if not self.query_engine:
logger.data("Question Engine not discovered, initiating Question Engine!!!")
self.prepare_llm()
response = self.query_engine.question(question)
return response
def __load_service_context(self, query_wrapper_prompt):
llm = HuggingFaceLLM(
context_window=self.config.context_window,
max_new_tokens=self.config.max_new_tokens,
generate_kwargs={"temperature": self.config.temperature,
"do_sample": False},
system_prompt=SYSTEM_PROMPT,
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name=self.config.llm,
model_name=self.config.llm,
device_map=self.config.system,
model_kwargs={"torch_dtype": torch.float16,
"load_in_4bit": self.config.bit_4,
"token": self.config.hf_token}
)
embed_model = LangchainEmbedding(
HuggingFaceEmbeddings(model_name=self.config.embedding)
)
service_context = ServiceContext.from_defaults(
chunk_size=self.config.chunk_size,
llm=llm,
embed_model=embed_model
)
return service_context
You will discover the entire supply code with full mission construction and Finish-to-end format — https://github.com/AI-DS-Club-BetaLabs/careerbot