Skip to content

feat: bedrock support #126

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ minify-html = "0.15.0"
free-proxy = "1.1.1"
langchain-groq = "0.1.3"
playwright = "^1.43.0"
langchain-aws = "^0.1.2"


[tool.poetry.dev-dependencies]
pytest = "8.0.0"
Expand Down
22 changes: 19 additions & 3 deletions scrapegraphai/graphs/abstract_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"""
from abc import ABC, abstractmethod
from typing import Optional
from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq

from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq, Bedrock
from ..helpers import models_tokens


Expand All @@ -25,7 +26,8 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None):

# Set common configuration parameters
self.verbose = True if config is None else config.get("verbose", False)
self.headless = True if config is None else config.get("headless", True)
self.headless = True if config is None else config.get(
"headless", True)

# Create the graph
self.graph = self._create_graph()
Expand Down Expand Up @@ -92,12 +94,26 @@ def _create_llm(self, llm_config: dict):
return HuggingFace(llm_params)
elif "groq" in llm_params["model"]:
llm_params["model"] = llm_params["model"].split("/")[-1]

try:
self.model_token = models_tokens["groq"][llm_params["model"]]
except KeyError:
raise KeyError("Model not supported")
return Groq(llm_params)
elif "bedrock" in llm_params["model"]:
llm_params["model"] = llm_params["model"].split("/")[-1]
model_id = llm_params["model"]

try:
self.model_token = models_tokens["bedrock"][llm_params["model"]]
except KeyError:
raise KeyError("Model not supported")
return Bedrock({
"model_id": model_id,
"model_kwargs": {
"temperature": llm_params["temperature"],
}
})
else:
raise ValueError(
"Model provided by the configuration not supported")
Expand Down
17 changes: 17 additions & 0 deletions scrapegraphai/helpers/models_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,22 @@
"claude2": 9000,
"claude2.1": 200000,
"claude3": 200000
},
"bedrock": {
"anthropic.claude-3-haiku-20240307-v1:0": 200000,
"anthropic.claude-3-sonnet-20240229-v1:0": 200000,
"anthropic.claude-3-opus-20240229-v1:0": 200000,
"anthropic.claude-v2:1": 200000,
"anthropic.claude-v2": 100000,
"anthropic.claude-instant-v1": 100000,
"meta.llama3-8b-instruct-v1:0": 8192,
"meta.llama3-70b-instruct-v1:0": 8192,
"meta.llama2-13b-chat-v1": 4096,
"meta.llama2-70b-chat-v1": 4096,
"mistral.mistral-7b-instruct-v0:2": 32768,
"mistral.mixtral-8x7b-instruct-v0:1": 32768,
"mistral.mistral-large-2402-v1:0": 32768,
"cohere.embed-english-v3": 512,
"cohere.embed-multilingual-v3": 512
}
}
1 change: 1 addition & 0 deletions scrapegraphai/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
from .ollama import Ollama
from .hugging_face import HuggingFace
from .groq import Groq
from .bedrock import Bedrock
19 changes: 19 additions & 0 deletions scrapegraphai/models/bedrock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
bedrock configuration wrapper
"""
from langchain_aws import ChatBedrock


class Bedrock(ChatBedrock):
"""Class for wrapping bedrock module"""

def __init__(self, llm_config: dict):
"""
A wrapper for the ChatBedrock class that provides default configuration
and could be extended with additional methods if needed.

Args:
llm_config (dict): Configuration parameters for the language model.
"""
# Initialize the superclass (ChatBedrock) with provided config parameters
super().__init__(**llm_config)
12 changes: 9 additions & 3 deletions scrapegraphai/nodes/rag_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
from langchain.docstore.document import Document
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
from langchain_aws.embeddings.bedrock import BedrockEmbeddings
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain_community.embeddings import HuggingFaceHubEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings
from ..models import OpenAI, Ollama, AzureOpenAI, HuggingFace

from ..models import OpenAI, Ollama, AzureOpenAI, HuggingFace, Bedrock
from .base_node import BaseNode


Expand Down Expand Up @@ -42,7 +44,8 @@ def __init__(self, input: str, output: List[str], node_config: dict, node_name:
super().__init__(node_name, "node", input, output, 2, node_config)
self.llm_model = node_config["llm"]
self.embedder_model = node_config.get("embedder_model", None)
self.verbose = True if node_config is None else node_config.get("verbose", False)
self.verbose = True if node_config is None else node_config.get(
"verbose", False)

def execute(self, state):
"""
Expand Down Expand Up @@ -82,7 +85,7 @@ def execute(self, state):
},
)
chunked_docs.append(doc)

if self.verbose:
print("--- (updated chunks metadata) ---")

Expand All @@ -104,6 +107,9 @@ def execute(self, state):
embeddings = OllamaEmbeddings(**params)
elif isinstance(embedding_model, HuggingFace):
embeddings = HuggingFaceHubEmbeddings(model=embedding_model.model)
elif isinstance(embedding_model, Bedrock):
embeddings = BedrockEmbeddings(
client=None, model_id=embedding_model.model_id)
else:
raise ValueError("Embedding Model missing or not supported")

Expand Down
Loading