Qwen-0.5B Model
echo "Exporting Qwen-0.5B..."
optimum-cli export onnx --model Qwen/Qwen1.5-0.5B --task text-generation-with-past onnx_models/qwen_onnx/
echo "Quantizing Qwen-0.5B for ARM64 (Static)..."
optimum-cli onnxruntime quantize --onnx_model onnx_models/qwen_onnx/ --arm64 -o onnx_models/qwen_onnx_quantized/
TinyLlama-1.1B
echo "Exporting TinyLlama-1.1B..."
optimum-cli export onnx --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --task text-generation-with-past onnx_models/tinyllama_onnx/
echo "Attempting TinyLlama-1.1B quantization for ARM64 (Static)..."
optimum-cli onnxruntime quantize --onnx_model onnx_models/tinyllama_onnx/ --arm64 -o onnx_models/tinyllama_onnx_quantized/
Phi-1.5 Model
echo "Exporting Phi-1.5..."
optimum-cli export onnx --model microsoft/phi-1_5 --task text-generation-with-past onnx_models/phi_onnx/
echo "Quantizing Phi-1.5 for ARM64 (Static)..."
optimum-cli onnxruntime quantize --onnx_model onnx_models/phi_onnx/ --arm64 -o onnx_models/phi_onnx_quantized/
Falcon-1B Model
echo "Exporting Falcon-1B..."
optimum-cli export onnx --model tiiuae/falcon-rw-1b --task text-generation-with-past onnx_models/falcon_onnx/
echo "Quantizing Falcon-1B for ARM64..."
optimum-cli onnxruntime quantize --onnx_model onnx_models/falcon_onnx/ --arm64 -o onnx_models/falcon_onnx_quantized/
GPT-2Medium Model
echo "Exporting GPT2-Medium..."
optimum-cli export onnx --model gpt2-medium --task text-generation-with-past onnx_models/gpt2_onnx/
echo "Quantizing GPT2-Medium for ARM64..."
optimum-cli onnxruntime quantize --onnx_model onnx_models/gpt2_onnx/ --arm64 -o onnx_models/gpt2_onnx_quantized/
OPT-350M Model
echo "Exporting OPT-350M..."
optimum-cli export onnx --model facebook/opt-350m --task text-generation-with-past onnx_models/opt_onnx/
echo "Quantizing OPT-350M for ARM64..."
optimum-cli onnxruntime quantize --onnx_model onnx_models/opt_onnx/ --arm64 -o onnx_models/opt_onnx_quantized/
Bloom-560M Model
echo "Exporting Bloom-560M..."
optimum-cli export onnx --model bigscience/bloom-560m --task text-generation-with-past onnx_models/bloom_onnx/
echo "Quantizing Bloom-560M for ARM64..."
optimum-cli onnxruntime quantize --onnx_model onnx_models/bloom_onnx/ --arm64 -o onnx_models/bloom_onnx_quantized/