forked from SangHui48/GitHub-QA-Chatbot-with-Langchain
-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
122 lines (109 loc) · 5.16 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import time
import requests
from common import *
from PIL import Image
from io import BytesIO
import streamlit as st
from githubqa.vector_db import *
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from githubqa.data_processing import dictionary_to_docs, create_retriever
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
from githubqa.get_info_from_api import get_avatar_info, get_repo_list, github_api_call
# 1. Session Initialize & Donation
initialize_session()
buy_me_tea()
# 2. Sidebar username input
st.sidebar.title('`Gitter`:feather:')
st.session_state["user_name"] = st.sidebar.text_input(
'GitHub User:', key="github_user_input",
value=st.session_state["user_name"],
on_change=handling_user_change
)
# 3. Sidebar Select Repo + User Avatar layout
if st.session_state["user_name"]:
user_name = st.session_state['user_name']
repo_list = get_repo_list(user_name)[0]
user_info = get_avatar_info(user_name)
if repo_list:
repo_list = [DEFAULT_SELECT_VALUE] + repo_list
st.session_state["repo_name"] = st.sidebar.selectbox(
f"Select {user_name}'s repository", repo_list,
key="repo_select",
index=repo_list.index(st.session_state["repo_name"]),
)
if st.session_state["repo_name"] != DEFAULT_SELECT_VALUE:
st.session_state["repo_url"] = f"https://github.com/{st.session_state['user_name']}/{st.session_state['repo_name']}"
avatar_url = user_info['avatar_url']
image_response = requests.get(avatar_url)
image = Image.open(BytesIO(image_response.content)).resize((250,250))
st.sidebar.image(image, use_column_width='always', caption=f"{user_name}'s profile")
else:
st.error("Invalid user ID")
st.sidebar.info('Made with by [오미자차](https://github.com/SangHui48/KDT_AI_B3)')
# 4. Main Screen Start
st.header("`Chatbot`")
if st.session_state['repo_url']:
with st.spinner('Analyzing Repository...'):
# Return Value : "File_name" : "File_content"
github_info_dict, structure_content, _, user_content = github_api_call(st.session_state['repo_url'])
# Return Values [Doc1, Doc2 ...]
with st.spinner('Embedding to VectorSpace...'):
docs = dictionary_to_docs(
github_info_dict, structure_content, user_content,
chunking_size=1000, overlap_size=0,
model_name=MODEL_NAME
)
# Chunked Data to Vector embedding
embedding_model = OpenAIEmbeddings(model='text-embedding-ada-002')
retriever = create_retriever(embedding_model, docs)
if st.session_state['repo_url'] not in st.session_state['visitied_list']:
retriever = create_retriever(embedding_model, docs)
st.session_state['retriever'] = retriever
st.session_state['visitied_list'].append(st.session_state['repo_url'])
st.session_state['messages'] = []
else:
retriever = st.session_state['retriever']
open_ai_model = ChatOpenAI(model_name=MODEL_NAME)
if not st.session_state['chat_memory']:
st.session_state['chat_memory'] = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
memory = st.session_state['chat_memory']
qa_chain = ConversationalRetrievalChain.from_llm(
llm=open_ai_model,
memory=memory,
retriever=retriever,
get_chat_history=lambda h : h,
verbose=True,
)
# DEBUG
# print("[DEBUG] Memory:", memory.load_memory_variables({}))
# QA Start
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("Ask questions about the GitHub repository!"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.spinner('Generating answers...'):
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
response = qa_chain({"question": prompt}) # QA chain
for response in response['answer']:
full_response += response
time.sleep(0.02)
message_placeholder.markdown(full_response + "▌")
message_placeholder.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
else:
st.info('Hit your **GITHUB NAME** and **REPO** to the left side bar.')
st.info("""
I am an analysis tool for question-answering built on LangChain.\n
Given GitHub informations, I will analyze the repository using LangChain and store it in vectorDB.
And I will answer questions about GitHub through openAI.\n
For example, I respond to information about the structure of the git repository, what it does, and what functions are in.\n
Here is the video of example.
""")
st.video("video/GITTER_DEMO.webm", format="video/webm")