AkramOM606 commited on
Commit
f252528
·
verified ·
1 Parent(s): 697933a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -50
app.py CHANGED
@@ -1,57 +1,61 @@
1
- import transformers
2
- import torch
3
- import os
4
 
5
- os.environ["HF_TOKEN"] = st.secrets["HF_TOKEN"]
6
- os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
7
- # os.environ["USE_FLASH_ATTENTION"] = "1"
8
 
9
- print(f"Device name: {torch.cuda.get_device_properties('cuda').name}")
10
- print(f"FlashAttention available: {torch.backends.cuda.flash_sdp_enabled()}")
11
- print(f"torch version: {torch.version}")
12
 
 
 
 
13
 
14
- # model_id = "meta-llama/Meta-Llama-3-8B"
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # pipeline = transformers.pipeline(
17
- # "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # )
19
- # pipeline("Hey how are you doing today?")
20
-
21
- model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
22
-
23
- pipeline = transformers.pipeline(
24
- "text-generation",
25
- model=model_id,
26
- model_kwargs={"torch_dtype": torch.bfloat16},
27
- device_map="auto",
28
- )
29
-
30
- messages = [
31
- {
32
- "role": "system",
33
- "content": "You are a pirate chatbot who always responds in pirate speak!",
34
- },
35
- {"role": "user", "content": "Who are you?"},
36
- ]
37
-
38
- prompt = pipeline.tokenizer.apply_chat_template(
39
- messages, tokenize=False, add_generation_prompt=True
40
- )
41
-
42
- terminators = [
43
- pipeline.tokenizer.eos_token_id,
44
- pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>"),
45
- ]
46
-
47
- outputs = pipeline(
48
- prompt,
49
- max_new_tokens=256,
50
- eos_token_id=terminators,
51
- do_sample=True,
52
- temperature=0.6,
53
- top_p=0.9,
54
- )
55
- print(outputs[0]["generated_text"][len(prompt) :])
56
-
57
- print("hello")
 
1
+ import gradio as gr
 
 
2
 
3
+ gr.load("models/davidkim205/Rhea-72b-v0.5").launch()
 
 
4
 
5
+ # import transformers
6
+ # import torch
7
+ # import os
8
 
9
+ # os.environ["HF_TOKEN"] = st.secrets["HF_TOKEN"]
10
+ # os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
11
+ # # os.environ["USE_FLASH_ATTENTION"] = "1"
12
 
13
+ # print(f"Device name: {torch.cuda.get_device_properties('cuda').name}")
14
+ # print(f"FlashAttention available: {torch.backends.cuda.flash_sdp_enabled()}")
15
+ # print(f"torch version: {torch.version}")
16
+
17
+
18
+ # # model_id = "meta-llama/Meta-Llama-3-8B"
19
+
20
+ # # pipeline = transformers.pipeline(
21
+ # # "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto"
22
+ # # )
23
+ # # pipeline("Hey how are you doing today?")
24
+
25
+ # model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
26
 
27
  # pipeline = transformers.pipeline(
28
+ # "text-generation",
29
+ # model=model_id,
30
+ # model_kwargs={"torch_dtype": torch.bfloat16},
31
+ # device_map="auto",
32
+ # )
33
+
34
+ # messages = [
35
+ # {
36
+ # "role": "system",
37
+ # "content": "You are a pirate chatbot who always responds in pirate speak!",
38
+ # },
39
+ # {"role": "user", "content": "Who are you?"},
40
+ # ]
41
+
42
+ # prompt = pipeline.tokenizer.apply_chat_template(
43
+ # messages, tokenize=False, add_generation_prompt=True
44
  # )
45
+
46
+ # terminators = [
47
+ # pipeline.tokenizer.eos_token_id,
48
+ # pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>"),
49
+ # ]
50
+
51
+ # outputs = pipeline(
52
+ # prompt,
53
+ # max_new_tokens=256,
54
+ # eos_token_id=terminators,
55
+ # do_sample=True,
56
+ # temperature=0.6,
57
+ # top_p=0.9,
58
+ # )
59
+ # print(outputs[0]["generated_text"][len(prompt) :])
60
+
61
+ # print("hello")