eosphoros-ai
diff --git a/‎.github/workflows/pylint.yml
+13-4 b/‎.github/workflows/pylint.yml
+13-4
diff --git a/‎README.md
+4 b/‎README.md
+4
diff --git a/‎README.zh.md
+4 b/‎README.zh.md
+4
diff --git a/‎examples/app.py
+29-19 b/‎examples/app.py
+29-19
diff --git a/‎examples/embdserver.py
+10-15 b/‎examples/embdserver.py
+10-15
diff --git a/‎examples/gpt_index.py
+4-4 b/‎examples/gpt_index.py
+4-4
diff --git a/‎examples/gradio_test.py
+4-2 b/‎examples/gradio_test.py
+4-2
diff --git a/‎examples/knowledge_embedding/csv_embedding_test.py
+9-4 b/‎examples/knowledge_embedding/csv_embedding_test.py
+9-4
diff --git a/‎examples/knowledge_embedding/pdf_embedding_test.py
+9-2 b/‎examples/knowledge_embedding/pdf_embedding_test.py
+9-2
diff --git a/‎examples/knowledge_embedding/url_embedding_test.py
+9-2 b/‎examples/knowledge_embedding/url_embedding_test.py
+9-2
@@ -1,6 +1,15 @@
 name: Pylint
 
-on: [push]
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.event.number || github.run_id }}
+  cancel-in-progress: true
 
 jobs:
   build:
@@ -17,7 +26,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install pylint
-    - name: Analysing the code with pylint
+        pip install -U black isort
+    - name: check the code lint
       run: |
-        pylint $(git ls-files '*.py')
+        black . --check
@@ -215,6 +215,10 @@ The achievements of this project are thanks to the technical community, especial
 - [ChatGLM](https://github.com/THUDM/ChatGLM-6B) as the base model
 - [llama_index](https://github.com/jerryjliu/llama_index) for enhancing database-related knowledge using [in-context learning](https://arxiv.org/abs/2301.00234) based on existing knowledge bases.
 
+## Contribution
+
+- Please run `black .` before submitting the code.
+
 <!-- GITCONTRIBUTOR_START -->
 
 ## Contributors
 
@@ -218,6 +218,10 @@ python tools/knowledge_init.py
 - [ChatGLM](https://github.com/THUDM/ChatGLM-6B) 基础模型
 - [llama-index](https://github.com/jerryjliu/llama_index) 基于现有知识库进行[In-Context Learning](https://arxiv.org/abs/2301.00234)来对其进行数据库相关知识的增强。
 
+# 贡献
+
+- 提交代码前请先执行 `black .`
+
 <!-- GITCONTRIBUTOR_START -->
 
 ## 贡献者
 
@@ -2,52 +2,63 @@
 # -*- coding:utf-8 -*-
 
 import gradio as gr
-from langchain.agents import (
-    load_tools,
-    initialize_agent,
-    AgentType
-)
-from pilot.model.vicuna_llm import VicunaRequestLLM, VicunaEmbeddingLLM
-from llama_index import LLMPredictor, LangchainEmbedding, ServiceContext
+from langchain.agents import AgentType, initialize_agent, load_tools
 from langchain.embeddings.huggingface import HuggingFaceEmbeddings
-from llama_index import Document, GPTSimpleVectorIndex
+from llama_index import (
+    Document,
+    GPTSimpleVectorIndex,
+    LangchainEmbedding,
+    LLMPredictor,
+    ServiceContext,
+)
+
+from pilot.model.vicuna_llm import VicunaEmbeddingLLM, VicunaRequestLLM
+
 
 def agent_demo():
     llm = VicunaRequestLLM()
 
-    tools = load_tools(['python_repl'], llm=llm)
-    agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
-    agent.run(
-        "Write a SQL script that Query 'select count(1)!'"
+    tools = load_tools(["python_repl"], llm=llm)
+    agent = initialize_agent(
+        tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True
     )
+    agent.run("Write a SQL script that Query 'select count(1)!'")
+
 
 def knowledged_qa_demo(text_list):
     llm_predictor = LLMPredictor(llm=VicunaRequestLLM())
     hfemb = VicunaEmbeddingLLM()
     embed_model = LangchainEmbedding(hfemb)
     documents = [Document(t) for t in text_list]
 
-    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
-    index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context) 
+    service_context = ServiceContext.from_defaults(
+        llm_predictor=llm_predictor, embed_model=embed_model
+    )
+    index = GPTSimpleVectorIndex.from_documents(
+        documents, service_context=service_context
+    )
     return index
 
 
 def get_answer(q):
-    base_knowledge = """ """ 
+    base_knowledge = """ """
     text_list = [base_knowledge]
     index = knowledged_qa_demo(text_list)
     response = index.query(q)
     return response.response
 
+
 def get_similar(q):
     from pilot.vector_store.extract_tovec import knownledge_tovec, knownledge_tovec_st
+
     docsearch = knownledge_tovec_st("./datasets/plan.md")
     docs = docsearch.similarity_search_with_score(q, k=1)
 
     for doc in docs:
-        dc, s = doc 
+        dc, s = doc
         print(s)
-        yield dc.page_content 
+        yield dc.page_content
+
 
 if __name__ == "__main__":
     # agent_demo()
@@ -58,8 +69,7 @@ def get_similar(q):
             text_input = gr.TextArea()
             text_output = gr.TextArea()
             text_button = gr.Button()
-        
+
         text_button.click(get_similar, inputs=text_input, outputs=text_output)
 
     demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
-   
 
@@ -1,30 +1,29 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 
-import requests
 import json
-import time
-import uuid
 import os
 import sys
 from urllib.parse import urljoin
+
 import gradio as gr
+import requests
 
 ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(ROOT_PATH)
 
 
-from pilot.configs.config import Config
-from pilot.conversation import conv_qa_prompt_template, conv_templates
 from langchain.prompts import PromptTemplate
 
+from pilot.configs.config import Config
+from pilot.conversation import conv_qa_prompt_template, conv_templates
 
 llmstream_stream_path = "generate_stream"
 
 CFG = Config()
 
-def generate(query):
 
+def generate(query):
     template_name = "conv_one_shot"
     state = conv_templates[template_name].copy()
 
@@ -47,7 +46,7 @@ def generate(query):
         "prompt": prompt,
         "temperature": 1.0,
         "max_new_tokens": 1024,
-        "stop": "###"
+        "stop": "###",
     }
 
     response = requests.post(
@@ -57,19 +56,18 @@ def generate(query):
     skip_echo_len = len(params["prompt"]) + 1 - params["prompt"].count("</s>") * 3
 
     for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
-
         if chunk:
             data = json.loads(chunk.decode())
             if data["error_code"] == 0:
-
                 if "vicuna" in CFG.LLM_MODEL:
                     output = data["text"][skip_echo_len:].strip()
                 else:
                     output = data["text"].strip()
 
                 state.messages[-1][-1] = output + "▌"
-                yield(output) 
- 
+                yield (output)
+
+
 if __name__ == "__main__":
     print(CFG.LLM_MODEL)
     with gr.Blocks() as demo:
@@ -78,10 +76,7 @@ def generate(query):
             text_input = gr.TextArea()
             text_output = gr.TextArea()
             text_button = gr.Button("提交")
-        
 
         text_button.click(generate, inputs=text_input, outputs=text_output)
 
-    demo.queue(concurrency_count=3).launch(server_name="0.0.0.0") 
-
-    
+    demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
@@ -1,19 +1,19 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-import os
 import logging
 import sys
 
-from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex
+from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader
+
 logging.basicConfig(stream=sys.stdout, level=logging.INFO)
 logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
 
 # read the document of data dir
 documents = SimpleDirectoryReader("data").load_data()
-# split the document to chunk, max token size=500, convert chunk to vector 
+# split the document to chunk, max token size=500, convert chunk to vector
 
 index = GPTSimpleVectorIndex(documents)
 
 # save index
-index.save_to_disk("index.json")
+index.save_to_disk("index.json")
@@ -3,17 +3,19 @@
 
 import gradio as gr
 
+
 def change_tab():
     return gr.Tabs.update(selected=1)
 
+
 with gr.Blocks() as demo:
     with gr.Tabs() as tabs:
         with gr.TabItem("Train", id=0):
             t = gr.Textbox()
         with gr.TabItem("Inference", id=1):
             i = gr.Image()
-    
+
     btn = gr.Button()
     btn.click(change_tab, None, tabs)
 
-demo.launch()
+demo.launch()
@@ -1,5 +1,3 @@
-
-
 from pilot.source_embedding.csv_embedding import CSVEmbedding
 
 # path = "/Users/chenketing/Downloads/share_ireserve双写数据异常2.xlsx"
@@ -8,6 +6,13 @@
 vector_store_path = "your_path/"
 
 
-pdf_embedding = CSVEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "url", "vector_store_path": "vector_store_path"})
+pdf_embedding = CSVEmbedding(
+    file_path=path,
+    model_name=model_name,
+    vector_store_config={
+        "vector_store_name": "url",
+        "vector_store_path": "vector_store_path",
+    },
+)
 pdf_embedding.source_embedding()
-print("success")
+print("success")
@@ -6,6 +6,13 @@
 vector_store_path = "your_path/"
 
 
-pdf_embedding = PDFEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "ob-pdf", "vector_store_path": vector_store_path})
+pdf_embedding = PDFEmbedding(
+    file_path=path,
+    model_name=model_name,
+    vector_store_config={
+        "vector_store_name": "ob-pdf",
+        "vector_store_path": vector_store_path,
+    },
+)
 pdf_embedding.source_embedding()
-print("success")
+print("success")
@@ -5,6 +5,13 @@
 vector_store_path = "your_path"
 
 
-pdf_embedding = URLEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "url", "vector_store_path": "vector_store_path"})
+pdf_embedding = URLEmbedding(
+    file_path=path,
+    model_name=model_name,
+    vector_store_config={
+        "vector_store_name": "url",
+        "vector_store_path": "vector_store_path",
+    },
+)
 pdf_embedding.source_embedding()
-print("success")
+print("success")