Skip to content

[Llava] Add csv image loading in C++ runner #5380

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 47 additions & 22 deletions .ci/scripts/test_llava.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ if [[ "${TARGET_OS_lower}" == "android" ]]; then
exit 1
fi
fi
PTE=llava.pte
IMAGE_PT=image.pt
IMAGE_CSV=image.csv
TOKENIZER=tokenizer.bin

# Number of processes for a parallel build
NPROC=8
Expand Down Expand Up @@ -96,7 +100,7 @@ cmake_build_llava_runner_for_android() {
-DANDROID_PLATFORM=android-23 \
${LLAVA_COMMON_CMAKE_ARGS} \
-DCMAKE_PREFIX_PATH="$python_lib" \
-DLLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE=ON \
-DBUILD_LLAVA_RUNNER_WITHOUT_TORCH=ON \
-B${BUILD_DIR}/${dir} \
${dir}

Expand All @@ -106,51 +110,56 @@ cmake_build_llava_runner_for_android() {
# only export the one without custom op for now since it's
export_llava() {
echo "Starting to export Llava. This will take about 6 mins"
$PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts
$PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name ${PTE} --with-artifacts
}

# Download a new image with different size, to test if the model can handle different image sizes
prepare_image_tensor() {
echo "Downloading image"
curl -o basketball.jpg https://upload.wikimedia.org/wikipedia/commons/7/73/Chicago_Bulls_and_New_Jersey_Nets%2C_March_28%2C_1991.jpg
$PYTHON_EXECUTABLE -m executorch.examples.models.llava.image_util --image-path basketball.jpg --output-path image.pt
$PYTHON_EXECUTABLE -m executorch.examples.models.llava.image_util --image-path basketball.jpg --output-path ${IMAGE_PT} --output-csv ${IMAGE_CSV}
}

run_and_verify() {
NOW=$(date +"%H:%M:%S")
echo "Starting to run llava runner at ${NOW}"
if [[ ! -f "llava.pte" ]]; then
echo "Export failed. Abort"
if [[ ! -f "${PTE}" ]]; then
echo "Could not file PTE: ${PTE}. Export failed. Aborting."
exit 1
fi
if [[ ! -f "image.pt" ]]; then
echo "image.pt is missing."
if [[ ! -f "${IMAGE_PT}" ]]; then
echo ".pt Image file: ${IMAGE_PT} is missing."
exit 1
fi
if [[ ! -f "tokenizer.bin" ]]; then
echo "tokenizer.bin is missing."
if [[ ! -f "${IMAGE_CSV}" ]]; then
echo "csv Image file ${IMAGE_CSV} is missing."
exit 1
fi
if [[ ! -f "${TOKENIZER}" ]]; then
echo "Tokenizer: ${TOKENIZER} is missing."
exit 1
fi



RUNTIME_ARGS="--model_path=llava.pte \
--tokenizer_path=tokenizer.bin \
--image_path=image.pt \
--prompt=ASSISTANT: \
--temperature=0 \
RUNTIME_ARGS="--model_path=${PTE} \
--tokenizer_path=${TOKENIZER} \
--prompt=ASSISTANT: \
--temperature=0 \
--seq_len=650"

if [[ "${TARGET_OS_lower}" == "android" ]]; then
echo "Transfer relevant files to the phone via ADB and run llava_main with following args,"
echo "$ llava_main ${RUNTIME_ARGS} "
echo "Transfer relevant files to the phone via ADB and run llava_main binary."
exit 0;
fi

${BUILD_DIR}/examples/models/llava/llava_main ${RUNTIME_ARGS} > result.txt
${BUILD_DIR}/examples/models/llava/llava_main ${RUNTIME_ARGS} \
--image_path=${IMAGE_PT} > result.txt

${BUILD_DIR}/examples/models/llava/llava_main ${RUNTIME_ARGS} \
--image_path=${IMAGE_CSV} --is_csv_image=true > result_csv.txt

# verify result.txt
RESULT=$(cat result.txt)
RESULT_CSV=$(cat result_csv.txt)
# set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
if [[ "$(uname)" == "Darwin" ]]; then
EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. One of the players is dribbling the ball, while the others are in various"
Expand All @@ -161,12 +170,25 @@ run_and_verify() {
if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
echo "Expected result prefix: ${EXPECTED_PREFIX}"
echo "Actual result: ${RESULT}"
echo "Actual result (csv): ${RESULT_CSV}"
echo "Success"
exit 0
else
echo "Expected result prefix: ${EXPECTED_PREFIX}"
echo "Actual result: ${RESULT}"
echo "Failure; results not the same"
echo "Actual result (csv): ${RESULT_CSV}"
echo "Failure; generated text is not as expected"
exit 1
fi

if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
echo ".pt and .csv image produced same output"
echo "Success"
exit 0
else
echo ".pt and .csv image inputs produced different outputs"
echo ".pt image produced: ${RESULT}"
echo ".csv image produced: ${RESULT_CSV}"
echo "Failure"
exit 1
fi
}
Expand All @@ -187,4 +209,7 @@ export_llava

# Step3. Run
prepare_image_tensor
run_and_verify
# Only run if building for native
if [[ "${TARGET_OS_lower}" == "native" ]]; then
run_and_verify
fi
11 changes: 5 additions & 6 deletions examples/models/llava/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ project(llava)
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF)

# This is a temporary hack to get around Torch dep so we can test this on android
option(LLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE "Hack option to feed dummy image to remove torch.load dep" OFF)
option(BUILD_LLAVA_RUNNER_WITHOUT_TORCH "Build Llava runner without torch dep" OFF)

include(CMakeDependentOption)
#
Expand Down Expand Up @@ -74,10 +74,9 @@ set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
find_package(gflags REQUIRED)

# Avoid torch dep from torch.load()-ing the image.
# This is a temporary hack.
if(LLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE)
add_definitions(-DLLAVA_NO_TORCH_DUMMY_IMAGE=1)
message("Buidling the runner without Torch, feeding a dummy image!")
# Use a CSV formatted image instead
if(BUILD_LLAVA_RUNNER_WITHOUT_TORCH)
add_definitions(-DBUILD_LLAVA_RUNNER_WITHOUT_TORCH=1)
else()
find_package(Torch CONFIG REQUIRED)
endif()
Expand Down Expand Up @@ -106,7 +105,7 @@ endif()
add_subdirectory(runner)

set(LINK_LIBS gflags)
if(NOT LLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE)
if(NOT BUILD_LLAVA_RUNNER_WITHOUT_TORCH)
list(APPEND LINK_LIBS torch)
endif()
set(link_libraries ${LINK_LIBS})
Expand Down
39 changes: 39 additions & 0 deletions examples/models/llava/image_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,38 @@ def serialize_image(image: torch.Tensor, path: str) -> None:
logging.info(f"Saved image tensor to {path}")


def image_to_csv(image: torch.Tensor, path: str) -> None:
im = torch.tensor(image)

# List of restrictions on the image tensor to ensure our naive csv converter works
assert im.dim() == 3, "Image must be 3D"
assert im.shape[0] == 3, "Image must have 3 channels"
assert im.dtype == torch.uint8, "Image data must be uint8"
assert im.device == torch.device("cpu"), "Image data must be on CPU"
assert im.is_contiguous(), "Image data must be contiguous"
assert im.dim_order() == (0, 1, 2), "Image data must be in CHW format"

# write header
with open(path, "w") as f:
# header
# dims, shape[...], sizeof(dtype)
f.write(f"{im.dim()},")
for i in range(im.dim()):
f.write(f"{im.shape[i]},")
f.write("1\n") # sizeof(uint8_t), add a newline to end header

# append data as bytes
with open(path, "ab") as f:
# data as bytes
for i in range(im.shape[0]):
for j in range(im.shape[1]):
for k in range(im.shape[2]):
b = int(im[i][j][k]).to_bytes(1, byteorder="little")
f.write(b)

logging.info(f"Saved image csv to {path}")


def main():
parser = ArgumentParser()
parser.add_argument(
Expand All @@ -67,12 +99,19 @@ def main():
"--output-path",
default="image.pt",
)
parser.add_argument(
"--output-csv",
required=False,
)
args = parser.parse_args()

image = Image.open(args.image_path)
image_tensor = prepare_image(image, target_h=336, target_w=336)
serialize_image(image_tensor, args.output_path)

if args.output_csv:
image_to_csv(image_tensor, args.output_csv)


if __name__ == "__main__":
main()
Loading
Loading