-
Notifications
You must be signed in to change notification settings - Fork 12k
Updated server_queue to delete tasks from queue when server is shutdown. Feature Request #6421 #6941
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
rahsuri
wants to merge
4
commits into
ggml-org:master
Choose a base branch
from
rahsuri:master
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Updated server_queue to delete tasks from queue when server is shutdown. Feature Request #6421 #6941
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,8 @@ | |
#include <thread> | ||
#include <signal.h> | ||
#include <memory> | ||
#include <iostream> | ||
#include <boost/asio.hpp> | ||
|
||
using json = nlohmann::ordered_json; | ||
|
||
|
@@ -201,7 +203,7 @@ struct server_slot { | |
|
||
double t_prompt_processing; // ms | ||
double t_token_generation; // ms | ||
|
||
void reset() { | ||
n_prompt_tokens = 0; | ||
generated_text = ""; | ||
|
@@ -463,6 +465,50 @@ struct server_queue { | |
condition_tasks.notify_all(); | ||
} | ||
|
||
//adding server health checking | ||
std::string hostname_health = "127.0.0.1"; | ||
std::string port_health = "8080"; | ||
|
||
bool check_server_health(const std::string& server, const std::string& port) { | ||
using namespace boost::asio; | ||
io_service svc; | ||
ip::tcp::socket socket(svc); | ||
ip::tcp::resolver resolver(svc); | ||
boost::system::error_code ec; | ||
|
||
// Try to connect | ||
connect(socket, resolver.resolve({server, port}), ec); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is nonsense to inner call the server itself. You have everything needed inside the process. |
||
if (ec) { | ||
std::cout << "Connection failed: " << ec.message() << std::endl; | ||
return false; | ||
} | ||
|
||
// Send HTTP GET request to /health endpoint | ||
std::string request = "GET /health HTTP/1.1\r\nHost: " + server + "\r\n\r\n"; | ||
write(socket, buffer(request), ec); | ||
if (ec) { | ||
std::cout << "Write failed: " << ec.message() << std::endl; | ||
return false; | ||
} | ||
|
||
// Read the response | ||
boost::asio::streambuf response; | ||
read_until(socket, response, "\r\n", ec); | ||
std::istream response_stream(&response); | ||
std::string http_version; | ||
unsigned int status_code; | ||
response_stream >> http_version >> status_code; | ||
|
||
bool server_status_ok = false; | ||
|
||
// Check HTTP response status code | ||
if (status_code == 200 || status_code == 500 || status_code == 503) { | ||
server_status_ok = true; | ||
} | ||
|
||
return server_status_ok | ||
} | ||
|
||
/** | ||
* Main loop consists of these steps: | ||
* - Wait until a new task arrives | ||
|
@@ -474,6 +520,13 @@ struct server_queue { | |
running = true; | ||
|
||
while (true) { | ||
bool health_check = check_server_health(hostname_health, port_health); | ||
if (health_check == false) { | ||
while(!queue_tasks.empty()) { | ||
queue_tasks.erase(queue_tasks.begin()); | ||
} | ||
break; | ||
} | ||
LOG_VERBOSE("new task may arrive", {}); | ||
|
||
while (true) { | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
import threading | ||
import requests | ||
|
||
# Stats | ||
total_requests = 0 | ||
requests_executed = 0 | ||
requests_cancelled = 0 | ||
requests_remaining = 0 | ||
|
||
class StoppableThread(threading.Thread): | ||
def __init__(self, *args, **kwargs): | ||
super(StoppableThread, self).__init__(*args, **kwargs) | ||
self.stop_event = threading.Event() | ||
|
||
def stop(self): | ||
self.stop_event.set() | ||
|
||
def stopped(self): | ||
return self.stop_event.is_set() | ||
|
||
def send_request(stop_event): | ||
try: | ||
url = 'http://127.0.0.1:8080/completion' | ||
data = { | ||
'prompt': 'Hello llama', | ||
'n_predict': 2 | ||
} | ||
if not stop_event.is_set(): | ||
response = requests.post(url, json=data, timeout=60) # Reduced timeout for testing | ||
print('Response:', response.text) | ||
global requests_executed | ||
requests_executed += 1 | ||
except requests.exceptions.Timeout: | ||
print('Request timed out') | ||
except Exception as e: | ||
print('An error occurred:', str(e)) | ||
|
||
def get_health(): | ||
try: | ||
url = 'http://127.0.0.1:8080/health' | ||
response = requests.get(url, timeout=10) | ||
return response.status_code | ||
except requests.exceptions.Timeout: | ||
print('Health check timed out') | ||
return | ||
except Exception as e: | ||
print('An error occurred during health check:', str(e)) | ||
return | ||
|
||
|
||
# User input for the number of requests | ||
num_requests = int(input("How many requests would you like to post?\n")) | ||
|
||
total_requests = num_requests | ||
|
||
# Launching multiple requests | ||
for i in range(num_requests): | ||
health = get_health() | ||
ok_status = False ##our server status | ||
|
||
if health == 503 or health == 500 or health == 200: | ||
ok_status = True | ||
|
||
if ok_status == False: | ||
print(f"Server is not running. Status:{health}. Exiting now...\n") | ||
requests_cancelled = total_requests - i | ||
break | ||
|
||
stop_event = threading.Event() | ||
req_thread = StoppableThread(target=send_request, args=(stop_event,)) | ||
req_thread.start() | ||
|
||
input("Press Enter when request is complete or you would like to stop the request!\n") | ||
if not stop_event.is_set(): | ||
stop_event.set() | ||
|
||
req_thread.join() # Ensure the thread finishes | ||
|
||
requests_remaining = total_requests - requests_executed - requests_cancelled | ||
|
||
print("\nSummary:") | ||
print(f"Total requests: {total_requests}") | ||
print(f"Requests executed: {requests_executed}") | ||
print(f"Requests cancelled: {requests_cancelled}") | ||
print(f"Requests remaining: {requests_remaining}") | ||
|
||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Boost will never be required