Spaces:
Build error
Build error
| import difflib | |
| import json | |
| import numpy as np | |
| import streamlit as st | |
| from pyserini.search.lucene import LuceneSearcher | |
| def read_json(file_name): | |
| with open(file_name, "r") as f: | |
| json_data = json.load(f) | |
| return json_data | |
| class SearchApplication: | |
| def __init__(self): | |
| self.title = "Awesome ChatGPT repositories search" | |
| self.set_page_config() | |
| self.searcher = self.set_searcher() | |
| st.header(self.title) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| self.query = st.text_input("Search English words", value="") | |
| with col2: | |
| st.write("#") | |
| self.search_button = st.button("🔎") | |
| st.caption( | |
| "You can search for open-source software from [500+ " | |
| " repositories](https://github.com/taishi-i/awesome-ChatGPT-repositories)." | |
| ) | |
| st.write("#") | |
| candidate_words_file = "candidate_words.json" | |
| candidate_words_json = read_json(candidate_words_file) | |
| self.candidate_words = candidate_words_json["candidate_words"] | |
| self.show_popular_words() | |
| self.show_search_results() | |
| def set_page_config(self): | |
| st.set_page_config( | |
| page_title=self.title, | |
| page_icon="😎", | |
| layout="centered", | |
| ) | |
| def set_searcher(self): | |
| searcher = LuceneSearcher("indexes/docs") | |
| return searcher | |
| def show_popular_words(self): | |
| st.caption("Popular words") | |
| word1, word2, word3, word4, word5, word6 = st.columns(6) | |
| with word1: | |
| button1 = st.button("Prompt") | |
| if button1: | |
| self.query = "prompt" | |
| with word2: | |
| button2 = st.button("Chatbot") | |
| if button2: | |
| self.query = "chatbot" | |
| with word3: | |
| button3 = st.button("Langchain") | |
| if button3: | |
| self.query = "langchain" | |
| with word4: | |
| button4 = st.button("Extension") | |
| if button4: | |
| self.query = "extension" | |
| with word5: | |
| button5 = st.button("LLMs") | |
| if button5: | |
| self.query = "llms" | |
| with word6: | |
| button6 = st.button("API") | |
| if button6: | |
| self.query = "api" | |
| def show_search_results(self): | |
| if self.query or self.search_button: | |
| st.write("#") | |
| search_results = self.searcher.search(self.query, k=500) | |
| num_search_results = len(search_results) | |
| st.write(f"A total of {num_search_results} repositories found.") | |
| if num_search_results > 0: | |
| json_search_results = [] | |
| for result in search_results: | |
| json_data = json.loads(result.raw) | |
| json_search_results.append(json_data) | |
| for json_data in sorted( | |
| json_search_results, key=lambda x: x["freq"], reverse=True | |
| ): | |
| description = json_data["description"] | |
| url = json_data["url"] | |
| project_name = json_data["project_name"] | |
| st.write("---") | |
| st.subheader(f"[{project_name}]({url})") | |
| st.write(description) | |
| info = [] | |
| language = json_data["language"] | |
| if language is not None and len(language) > 0: | |
| info.append(language) | |
| else: | |
| info.append("Laugage: Unkwown") | |
| license = json_data["license"] | |
| if license is not None: | |
| info.append(license["name"]) | |
| else: | |
| info.append("License: Unkwown") | |
| st.caption(" / ".join(info)) | |
| else: | |
| if len(self.query) > 0: | |
| scores = [] | |
| for candidate_word in self.candidate_words: | |
| score = difflib.SequenceMatcher( | |
| None, self.query, candidate_word | |
| ).ratio() | |
| scores.append(score) | |
| num_candidate_words = 6 | |
| indexes = np.argsort(scores)[::-1][:num_candidate_words] | |
| suggestions = [self.candidate_words[i] for i in indexes] | |
| suggestions = sorted( | |
| set(suggestions), key=suggestions.index | |
| ) | |
| st.caption("Suggestions") | |
| for i, word in enumerate(suggestions, start=1): | |
| st.write(f"{i}: {word}") | |
| def main(): | |
| SearchApplication() | |
| if __name__ == "__main__": | |
| main() | |