Spaces:

ThinkAI-Morocco
/

TESTing

Sleeping

App Files Files Community

AkramOM606 commited on May 18, 2024

Commit

f252528

verified ·

1 Parent(s): 697933a

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -50

app.py CHANGED Viewed

@@ -1,57 +1,61 @@
-import transformers
-import torch
-import os
-os.environ["HF_TOKEN"] = st.secrets["HF_TOKEN"]
-os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
-# os.environ["USE_FLASH_ATTENTION"] = "1"
-print(f"Device name: {torch.cuda.get_device_properties('cuda').name}")
-print(f"FlashAttention available: {torch.backends.cuda.flash_sdp_enabled()}")
-print(f"torch version: {torch.version}")
-# model_id = "meta-llama/Meta-Llama-3-8B"
 # pipeline = transformers.pipeline(
-#     "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto"
 # )
-# pipeline("Hey how are you doing today?")
-model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
-pipeline = transformers.pipeline(
-    "text-generation",
-    model=model_id,
-    model_kwargs={"torch_dtype": torch.bfloat16},
-    device_map="auto",
-)
-messages = [
-    {
-        "role": "system",
-        "content": "You are a pirate chatbot who always responds in pirate speak!",
-    },
-    {"role": "user", "content": "Who are you?"},
-]
-prompt = pipeline.tokenizer.apply_chat_template(
-    messages, tokenize=False, add_generation_prompt=True
-)
-terminators = [
-    pipeline.tokenizer.eos_token_id,
-    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>"),
-]
-outputs = pipeline(
-    prompt,
-    max_new_tokens=256,
-    eos_token_id=terminators,
-    do_sample=True,
-    temperature=0.6,
-    top_p=0.9,
-)
-print(outputs[0]["generated_text"][len(prompt) :])
-print("hello")

+import gradio as gr
+gr.load("models/davidkim205/Rhea-72b-v0.5").launch()
+# import transformers
+# import torch
+# import os
+# os.environ["HF_TOKEN"] = st.secrets["HF_TOKEN"]
+# os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
+# # os.environ["USE_FLASH_ATTENTION"] = "1"
+# print(f"Device name: {torch.cuda.get_device_properties('cuda').name}")
+# print(f"FlashAttention available: {torch.backends.cuda.flash_sdp_enabled()}")
+# print(f"torch version: {torch.version}")
+# # model_id = "meta-llama/Meta-Llama-3-8B"
+# # pipeline = transformers.pipeline(
+# #     "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto"
+# # )
+# # pipeline("Hey how are you doing today?")
+# model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
 # pipeline = transformers.pipeline(
+#     "text-generation",
+#     model=model_id,
+#     model_kwargs={"torch_dtype": torch.bfloat16},
+#     device_map="auto",
+# )
+# messages = [
+#     {
+#         "role": "system",
+#         "content": "You are a pirate chatbot who always responds in pirate speak!",
+#     },
+#     {"role": "user", "content": "Who are you?"},
+# ]
+# prompt = pipeline.tokenizer.apply_chat_template(
+#     messages, tokenize=False, add_generation_prompt=True
 # )
+# terminators = [
+#     pipeline.tokenizer.eos_token_id,
+#     pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>"),
+# ]
+# outputs = pipeline(
+#     prompt,
+#     max_new_tokens=256,
+#     eos_token_id=terminators,
+#     do_sample=True,
+#     temperature=0.6,
+#     top_p=0.9,
+# )
+# print(outputs[0]["generated_text"][len(prompt) :])
+# print("hello")