Support smollm2

jackzhxng · jackzhxng · commit 4db8f072fbb1 · 2025-03-24T16:59:00.000-07:00
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -105,6 +105,7 @@
 HUGGING_FACE_REPO_IDS = {
     "qwen2_5": "Qwen/Qwen2.5-1.5B",
     "phi_4_mini": "microsoft/Phi-4-mini-instruct",
+    "smollm2": "HuggingFaceTB/SmolLM-135M",
 }
 
 
@@ -541,6 +542,8 @@ def export_llama(args) -> str:
             from executorch.examples.models.qwen2_5 import convert_weights
         elif args.model == "phi_4_mini":
             from executorch.examples.models.phi_4_mini import convert_weights
+        elif args.model == "smollm2":
+            from executorch.examples.models.smollm2 import convert_weights
         else:
             raise ValueError(
                 f"Converting weights to meta format for {args.model} is not yet supported"
diff --git a/examples/models/smollm2/__init__.py b/examples/models/smollm2/__init__.py
@@ -1,7 +1,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.example.models.llama.model import Llama2Model
+from executorch.examples.models.llama.model import Llama2Model
+from executorch.examples.models.smollm2.convert_weights import convert_weights
 
 
 class SmolLM2Model(Llama2Model):
@@ -11,4 +12,5 @@ def __init__(self, **kwargs):
 
 __all__ = [
     "SmolLM2Model",
+    "convert_weights",
 ]
diff --git a/examples/models/smollm2/convert_weights.py b/examples/models/smollm2/convert_weights.py
@@ -11,7 +11,6 @@
 _SMOLLM_FROM_META = {
     "tok_embeddings.weight": "tok_embeddings.weight",
     "norm.weight": "norm.scale",
-    "output.weight": "output.weight",
     "layers.{}.attention.wk.weight": "layers.{}.attn.k_proj.weight",
     "layers.{}.attention.wq.weight": "layers.{}.attn.q_proj.weight",
     "layers.{}.attention.wv.weight": "layers.{}.attn.v_proj.weight",
@@ -41,10 +40,32 @@ def smollm_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.
     for key, value in state_dict.items():
         new_key = get_mapped_key(key, inverted_mapping_dict)
         converted_state_dict[new_key] = value
+    converted_state_dict["output.weight"] = converted_state_dict[
+        "tok_embeddings.weight"
+    ]
 
     return converted_state_dict
 
 
+def convert_weights(input_dir: str, output_file: str) -> None:
+    # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves.
+    checkpointer = FullModelHFCheckpointer(
+        checkpoint_dir=input_dir,
+        checkpoint_files=["model.safetensors"],
+        output_dir=".",
+        model_type="LLAMA3",
+    )
+
+    print("Loading checkpoint...")
+    sd = checkpointer.load_checkpoint()
+    print("Converting checkpoint...")
+    breakpoint()
+    sd = smollm_tune_to_meta(sd["model"])
+    print("Saving checkpoint...")
+    torch.save(sd, output_file)
+    print(f"Done.")
+
+
 def main():
     parser = argparse.ArgumentParser(
         description="Convert SmolLM weights to Meta format."
@@ -57,23 +78,7 @@ def main():
     parser.add_argument("output", type=str, help="Path to the output checkpoint")
 
     args = parser.parse_args()
-
-    # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves.
-    checkpointer = FullModelHFCheckpointer(
-        checkpoint_dir=args.input_dir,
-        checkpoint_files=["model.safetensors"],
-        output_dir=".",
-        model_type="LLAMA",
-    )
-
-    print("Loading checkpoint...")
-    sd = checkpointer.load_checkpoint()
-
-    print("Converting checkpoint...")
-    sd = smollm_tune_to_meta(sd["model"])
-
-    torch.save(sd, args.output)
-    print(f"Checkpoint saved to {args.output}")
+    convert_weights(args.input_dir, args.output)
 
 
 if __name__ == "__main__":