tomo2chin2 commited on
Commit
45b1af9
·
verified ·
1 Parent(s): 82163a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -140
app.py CHANGED
@@ -14,15 +14,15 @@ from io import BytesIO
14
  import tempfile, time, os, logging
15
  from huggingface_hub import hf_hub_download
16
 
17
- # Google Gen AI SDK 1.x -----------------------------------------------
18
- from google import genai # 新 SDK
19
- from google.genai import types # ThinkingConfig 用
20
- # ---------------------------------------------------------------------
21
 
22
  logging.basicConfig(level=logging.INFO)
23
  logger = logging.getLogger(__name__)
24
 
25
- # ------------------------- 入出力スキーマ ---------------------------
26
  class GeminiRequest(BaseModel):
27
  text: str
28
  extension_percentage: float = 10.0
@@ -34,169 +34,193 @@ class ScreenshotRequest(BaseModel):
34
  html_code: str
35
  extension_percentage: float = 10.0
36
  trim_whitespace: bool = True
 
37
 
38
- # ------------------------- 補助関数 -------------------------------
39
  def enhance_font_awesome_layout(html_code: str) -> str:
40
- """FontAwesome のアイコン詰まりを CSS で解消"""
41
- css = """
42
- <style>
43
- [class*="fa-"]{display:inline-block!important;margin-right:8px!important;vertical-align:middle!important;}
44
- h1 [class*="fa-"],h2 [class*="fa-"],h3 [class*="fa-"],
45
- h4 [class*="fa-"],h5 [class*="fa-"],h6 [class*="fa-"]{margin-right:10px!important;}
46
- li [class*="fa-"],p [class*="fa-"]{margin-right:10px!important;}
47
- </style>
48
- """
49
  if "<head>" in html_code:
50
- return html_code.replace("</head>", f"{css}</head>")
51
- return f"<html><head>{css}</head>{html_code}</html>"
52
 
53
  def load_system_instruction(style="standard") -> str:
54
- valid = ["standard", "cute", "resort", "cool", "dental"]
55
- style = style if style in valid else "standard"
 
56
  local = os.path.join(os.path.dirname(__file__), style, "prompt.txt")
57
  if os.path.exists(local):
58
- return open(local, encoding="utf-8").read()
59
- path = hf_hub_download("tomo2chin2/GURAREKOstlyle",
60
- filename=f"{style}/prompt.txt", repo_type="dataset")
61
- return open(path, encoding="utf-8").read()
62
-
63
- # ----------------- Gemini HTML(thinking_off 分岐) ----------------
64
- def generate_html_from_text(text, temperature=0.3, style="standard"):
65
- api_key = os.environ["GEMINI_API_KEY"]
66
- model_name = os.environ.get("GEMINI_MODEL", "gemini-1.5-pro")
67
- client = genai.Client(api_key=api_key)
68
-
69
- cfg_kwargs = dict(
70
- temperature=temperature, top_p=0.7, top_k=20,
71
- max_output_tokens=8192, candidate_count=1
72
  )
73
- if model_name == "gemini-2.5-flash-preview-04-17": # thinking OFF :contentReference[oaicite:3]{index=3}
74
- cfg_kwargs["thinking_config"] = types.ThinkingConfig(thinking_budget=0)
 
 
 
 
 
 
 
 
75
 
76
- resp = client.models.generate_content(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  model=model_name,
78
- contents=f"{load_system_instruction(style)}\n\n{text}",
79
- config=types.GenerateContentConfig(**cfg_kwargs),
80
- safety_settings=[
81
- {"category":"HARM_CATEGORY_HARASSMENT","threshold":"BLOCK_MEDIUM_AND_ABOVE"},
82
- {"category":"HARM_CATEGORY_HATE_SPEECH","threshold":"BLOCK_MEDIUM_AND_ABOVE"},
83
- {"category":"HARM_CATEGORY_SEXUALLY_EXPLICIT","threshold":"BLOCK_MEDIUM_AND_ABOVE"},
84
- {"category":"HARM_CATEGORY_DANGEROUS_CONTENT","threshold":"BLOCK_MEDIUM_AND_ABOVE"},
85
- ]
86
- )
87
- raw = resp.text
88
  s, e = raw.find("```html"), raw.rfind("```")
89
- return enhance_font_awesome_layout(raw[s+7:e].strip()) if s!=-1<e else raw
90
-
91
- # ------------------------- 画像トリミング ---------------------------
92
- def trim_image_whitespace(img: Image.Image, th=248, pad=20):
93
- g = img.convert("L"); w,h = g.size; d=list(g.getdata())
94
- mat=[d[i*w:(i+1)*w] for i in range(h)]
95
- xs=[x for y in range(h) for x in range(w) if mat[y][x]<th]
96
- ys=[y for y in range(h) for x in range(w) if mat[y][x]<th]
97
- if not xs: return img
98
- minx,maxx,miny,maxy = max(min(xs)-pad,0),min(max(xs)+pad,w-1),max(min(ys)-pad,0),min(max(ys)+pad,h-1)
99
- return img.crop((minx,miny,maxx+1,maxy+1))
100
-
101
- # ------------- HTML フルページ PNG (Selenium) ----------------------
102
- def render_fullpage_screenshot(html, ext=6.0, trim=True):
103
- tmp, drv = None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  try:
105
- with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html", encoding="utf-8") as f:
106
- f.write(html); tmp=f.name
107
- opt=Options(); opt.add_argument("--headless"); opt.add_argument("--no-sandbox")
108
- opt.add_argument("--disable-dev-shm-usage")
109
- drv=webdriver.Chrome(options=opt)
110
- drv.set_window_size(1200,1000); drv.get("file://"+tmp)
111
- WebDriverWait(drv,15).until(EC.presence_of_element_located((By.TAG_NAME,"body")))
112
- time.sleep(3)
113
- h=drv.execute_script("return Math.max(document.body.scrollHeight,document.documentElement.scrollHeight)")
114
- vh=drv.execute_script("return window.innerHeight")
115
- for i in range(max(1,h//vh)+1):
116
- drv.execute_script(f"window.scrollTo(0,{i*(vh-200)})"); time.sleep(0.2)
117
- drv.execute_script("window.scrollTo(0,0)")
118
- drv.execute_script("document.documentElement.style.overflow='hidden';document.body.style.overflow='hidden'")
119
- w=drv.execute_script("return Math.max(document.body.scrollWidth,document.documentElement.scrollWidth)")
120
- drv.set_window_size(w,int(h*(1+ext/100))); time.sleep(1)
121
- img=Image.open(BytesIO(drv.get_screenshot_as_png()))
 
 
 
 
122
  return trim_image_whitespace(img) if trim else img
123
  except Exception as e:
124
  logger.error(e, exc_info=True)
125
- return Image.new("RGB",(1,1))
126
  finally:
127
- drv.quit() if drv else None
128
- os.remove(tmp) if tmp and os.path.exists(tmp) else None
 
129
 
130
- def text_to_screenshot(txt, ext, temp=0.3, trim=True, style="standard"):
131
- return render_fullpage_screenshot(generate_html_from_text(txt,temp,style), ext, trim)
 
132
 
133
- # ---------------------------- FastAPI -------------------------------
134
  app = FastAPI()
135
- app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
136
- allow_methods=["*"], allow_headers=["*"])
137
- gradio_dir = os.path.dirname(gr.__file__)
138
- app.mount("/static", StaticFiles(directory=os.path.join(gradio_dir,"templates/frontend/static")),name="static")
139
 
140
  @app.post("/api/screenshot", response_class=StreamingResponse)
141
- async def api_ss(req: ScreenshotRequest):
142
  img = render_fullpage_screenshot(req.html_code, req.extension_percentage, req.trim_whitespace)
143
- buf=BytesIO(); img.save(buf,"PNG"); buf.seek(0); return StreamingResponse(buf,media_type="image/png")
 
144
 
145
  @app.post("/api/text-to-screenshot", response_class=StreamingResponse)
146
- async def api_t2s(req: GeminiRequest):
147
- img = text_to_screenshot(req.text, req.extension_percentage, req.temperature,
148
- req.trim_whitespace, req.style)
149
- buf=BytesIO(); img.save(buf,"PNG"); buf.seek(0); return StreamingResponse(buf,media_type="image/png")
150
-
151
- # ---------------------------- Gradio UI -----------------------------
 
 
152
  def process(mode, txt, ext, temp, trim, style):
153
- return render_fullpage_screenshot(txt, ext, trim) if mode=="HTML入力" else text_to_screenshot(txt, ext, temp, trim, style)
154
-
155
- with gr.Blocks(title="Full Page Screenshot (Gemini 2.5 Flash)", theme=gr.themes.Base()) as iface:
156
- # ヘッダー
157
- gr.Markdown(
158
- "<h1 style='text-align:center;'>HTMLビューア & テキスト→インフォグラフィック変換</h1>"
159
- "<p style='text-align:center;'>HTML をレンダリングするか、テキストを Gemini API で変換して画像化します。</p>"
160
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
- # ------- 入力モード + テキストボックス -------
163
- with gr.Row():
164
- input_mode = gr.Radio(["HTML入力", "テキスト入力"], value="HTML入力", label="入力モード")
165
- input_text = gr.Textbox(lines=15, label="入力",
166
- placeholder="HTMLコードまたは説明テキストを入力")
167
-
168
- # ------- オプションエリア -------
169
- with gr.Row():
170
- with gr.Column(scale=1):
171
- style_dd = gr.Dropdown(["standard","cute","resort","cool","dental"], value="standard",
172
- label="デザインスタイル", visible=False)
173
- with gr.Column(scale=2):
174
- ext_sl = gr.Slider(0,30,value=10,step=1,label="上下高さ拡張率(%)")
175
- temp_sl = gr.Slider(0.0,1.0,value=0.5,step=0.1,label="生成時の温度",visible=False)
176
-
177
- trim_cb = gr.Checkbox(value=True, label="余白を自動トリミング")
178
- gen_btn = gr.Button("生成")
179
-
180
- # ------- 出力 -------
181
- out_img = gr.Image(type="pil", label="ページ全体のスクリーンショット")
182
-
183
- # ------- 可視性トグル -------
184
- def toggle_vis(mode):
185
- show = mode=="テキスト入力"
186
- return [{"visible":show,"__type__":"update"},{"visible":show,"__type__":"update"}]
187
- input_mode.change(toggle_vis, input_mode, [temp_sl, style_dd])
188
-
189
- # ------- 実行 -------
190
- gen_btn.click(process, [input_mode,input_text,ext_sl,temp_sl,trim_cb,style_dd], out_img)
191
-
192
- # ------- フッター -------
193
  gr.Markdown(
194
- f"<small>使用モデル: <code>{os.environ.get('GEMINI_MODEL','gemini-1.5-pro')}</code> / "
195
- "API: <code>/api/screenshot</code> ・ <code>/api/text-to-screenshot</code></small>"
 
 
196
  )
197
 
198
- # FastAPI へマウント
199
- app = gr.mount_gradio_app(app, iface, path="/")
200
 
201
  if __name__ == "__main__":
202
- import uvicorn; uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
14
  import tempfile, time, os, logging
15
  from huggingface_hub import hf_hub_download
16
 
17
+ # ---------- Gemini SDK (v1.x) ----------
18
+ from google import genai # :contentReference[oaicite:4]{index=4}
19
+ from google.genai import types
20
+ # ---------------------------------------
21
 
22
  logging.basicConfig(level=logging.INFO)
23
  logger = logging.getLogger(__name__)
24
 
25
+ # ---------- Pydantic モデル ----------
26
  class GeminiRequest(BaseModel):
27
  text: str
28
  extension_percentage: float = 10.0
 
34
  html_code: str
35
  extension_percentage: float = 10.0
36
  trim_whitespace: bool = True
37
+ # --------------------------------------
38
 
39
+ # ---------- ユーティリティ ----------
40
  def enhance_font_awesome_layout(html_code: str) -> str:
41
+ fix_css = """
42
+ <style>[class*="fa-"]{display:inline-block!important;margin-right:8px!important;vertical-align:middle!important;}
43
+ h1 [class*="fa-"],h2 [class*="fa-"],h3 [class*="fa-"],h4 [class*="fa-"],h5 [class*="fa-"],h6 [class*="fa-"]{
44
+ vertical-align:middle!important;margin-right:10px!important;}
45
+ .fa+span,.fas+span,.far+span,.fab+span,span+.fa,span+.fas,span+.far,span+.fab{
46
+ display:inline-block!important;margin-left:5px!important;}
47
+ li [class*="fa-"],p [class*="fa-"]{margin-right:10px!important;}</style>"""
 
 
48
  if "<head>" in html_code:
49
+ return html_code.replace("</head>", f"{fix_css}</head>")
50
+ return f"<html><head>{fix_css}</head>{html_code}</html>"
51
 
52
  def load_system_instruction(style="standard") -> str:
53
+ styles = ["standard", "cute", "resort", "cool", "dental"]
54
+ if style not in styles:
55
+ style = "standard"
56
  local = os.path.join(os.path.dirname(__file__), style, "prompt.txt")
57
  if os.path.exists(local):
58
+ with open(local, encoding="utf-8") as f:
59
+ return f.read()
60
+ # HF fallback
61
+ file_path = hf_hub_download(
62
+ repo_id="tomo2chin2/GURAREKOstlyle",
63
+ filename=f"{style}/prompt.txt" if style != "standard" else "prompt.txt",
64
+ repo_type="dataset"
 
 
 
 
 
 
 
65
  )
66
+ with open(file_path, encoding="utf-8") as f:
67
+ return f.read()
68
+ # --------------------------------------
69
+
70
+ # ---------- Gemini → HTML ----------
71
+ def generate_html_from_text(text, temperature=0.3, style="standard") -> str:
72
+ api_key = os.getenv("GEMINI_API_KEY")
73
+ if not api_key:
74
+ raise ValueError("GEMINI_API_KEY 未設定")
75
+ model_name = os.getenv("GEMINI_MODEL", "gemini-1.5-pro")
76
 
77
+ client = genai.Client(api_key=api_key)
78
+ prompt = f"{load_system_instruction(style)}\n\n{text}"
79
+
80
+ if model_name == "gemini-2.5-flash-preview-04-17": # thinking OFF :contentReference[oaicite:5]{index=5}
81
+ cfg = types.GenerateContentConfig(
82
+ temperature=temperature, top_p=0.7, top_k=20,
83
+ max_output_tokens=8192, candidate_count=1,
84
+ thinking_config=types.ThinkingConfig(thinking_budget=0)
85
+ )
86
+ else:
87
+ cfg = types.GenerateContentConfig(
88
+ temperature=temperature, top_p=0.7, top_k=20,
89
+ max_output_tokens=8192, candidate_count=1
90
+ )
91
+
92
+ raw = client.models.generate_content(
93
  model=model_name,
94
+ contents=prompt,
95
+ config=cfg
96
+ ).text
97
+
 
 
 
 
 
 
98
  s, e = raw.find("```html"), raw.rfind("```")
99
+ if s != -1 and e != -1 and s < e:
100
+ html = raw[s + 7:e].strip()
101
+ return enhance_font_awesome_layout(html)
102
+ return raw
103
+ # --------------------------------------
104
+
105
+ # ---------- 画像トリミング ----------
106
+ def trim_image_whitespace(img: Image.Image, threshold=248, padding=20):
107
+ g = img.convert("L")
108
+ w, h = g.size
109
+ pix = list(g.getdata())
110
+ pix = [pix[i*w:(i+1)*w] for i in range(h)]
111
+ xs, ys = [w], [h]
112
+ xe = ye = -1
113
+ for y in range(h):
114
+ for x in range(w):
115
+ if pix[y][x] < threshold:
116
+ xs.append(x); ys.append(y); xe = max(xe, x); ye = max(ye, y)
117
+ if xe == -1:
118
+ return img
119
+ x0, y0 = max(0, min(xs)-padding), max(0, min(ys)-padding)
120
+ x1, y1 = min(w, xe+padding+1), min(h, ye+padding+1)
121
+ return img.crop((x0, y0, x1, y1))
122
+ # --------------------------------------
123
+
124
+ # ---------- Selenium スクショ ----------
125
+ def render_fullpage_screenshot(html, ext=6.0, trim=True) -> Image.Image:
126
+ tmp, driver = None, None
127
  try:
128
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".html", mode="w", encoding="utf-8") as f:
129
+ f.write(html); tmp = f.name
130
+ opts = Options()
131
+ opts.add_argument("--headless=new") # 新 headless フラグ :contentReference[oaicite:6]{index=6}
132
+ opts.add_argument("--no-sandbox"); opts.add_argument("--disable-dev-shm-usage")
133
+ driver = webdriver.Chrome(options=opts)
134
+ driver.set_window_size(1200, 1000)
135
+ driver.get("file://" + tmp)
136
+ WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
137
+ time.sleep(1)
138
+ total = driver.execute_script("return Math.max(document.body.scrollHeight,document.documentElement.scrollHeight)")
139
+ vp = driver.execute_script("return window.innerHeight")
140
+ for y in range(0, total, vp-200):
141
+ driver.execute_script(f"window.scrollTo(0,{y})"); time.sleep(0.1)
142
+ driver.execute_script("window.scrollTo(0,0)")
143
+ driver.execute_script("document.documentElement.style.overflow='hidden'")
144
+ w = driver.execute_script("return document.documentElement.scrollWidth")
145
+ h = driver.execute_script("return document.documentElement.scrollHeight")
146
+ driver.set_window_size(w, int(h*(1+ext/100)))
147
+ time.sleep(0.5)
148
+ img = Image.open(BytesIO(driver.get_screenshot_as_png()))
149
  return trim_image_whitespace(img) if trim else img
150
  except Exception as e:
151
  logger.error(e, exc_info=True)
152
+ return Image.new("RGB", (1,1))
153
  finally:
154
+ if driver: driver.quit()
155
+ if tmp and os.path.exists(tmp): os.remove(tmp)
156
+ # --------------------------------------
157
 
158
+ def text_to_screenshot(text, ext, temp, trim, style):
159
+ html = generate_html_from_text(text, temp, style)
160
+ return render_fullpage_screenshot(html, ext, trim)
161
 
162
+ # ---------- FastAPI ----------
163
  app = FastAPI()
164
+ app.add_middleware(
165
+ CORSMiddleware, allow_origins=["*"], allow_methods=["*"],
166
+ allow_headers=["*"], allow_credentials=True
167
+ )
168
 
169
  @app.post("/api/screenshot", response_class=StreamingResponse)
170
+ async def api_screen(req: ScreenshotRequest):
171
  img = render_fullpage_screenshot(req.html_code, req.extension_percentage, req.trim_whitespace)
172
+ buf = BytesIO(); img.save(buf, format="PNG"); buf.seek(0)
173
+ return StreamingResponse(buf, media_type="image/png")
174
 
175
  @app.post("/api/text-to-screenshot", response_class=StreamingResponse)
176
+ async def api_text(req: GeminiRequest):
177
+ img = text_to_screenshot(req.text, req.extension_percentage,
178
+ req.temperature, req.trim_whitespace, req.style)
179
+ buf = BytesIO(); img.save(buf, format="PNG"); buf.seek(0)
180
+ return StreamingResponse(buf, media_type="image/png")
181
+ # --------------------------------------
182
+
183
+ # ---------- Gradio UI ----------
184
  def process(mode, txt, ext, temp, trim, style):
185
+ if mode == "HTML入力":
186
+ return render_fullpage_screenshot(txt, ext, trim)
187
+ return text_to_screenshot(txt, ext, temp, trim, style)
188
+
189
+ with gr.Blocks(theme=gr.themes.Base(), title="HTML Viewer & Text→Infographic") as demo:
190
+ gr.Markdown("## HTMLビューア & テキスト→インフォグラフィック変換") # central heading :contentReference[oaicite:7]{index=7}
191
+
192
+ with gr.Row(): # 横一列配置 :contentReference[oaicite:8]{index=8}
193
+ mode = gr.Radio(["HTML入力", "テキスト入力"], value="HTML入力", label="入力モード")
194
+
195
+ with gr.Row(): # 入力パネル & 出力画像
196
+ with gr.Column(scale=5):
197
+ txt = gr.Textbox(lines=15, label="入力")
198
+ with gr.Row():
199
+ style_dd = gr.Dropdown(["standard","cute","resort","cool","dental"],
200
+ value="standard", label="デザインスタイル", visible=False)
201
+ temp_sl = gr.Slider(0,1,step=0.1,value=0.5,label="生成温度",visible=False)
202
+ ext_sl = gr.Slider(0,30,step=1,value=10,label="高さ拡張率(%)")
203
+ trim_cb = gr.Checkbox(value=True,label="余白トリミング")
204
+ gen_btn = gr.Button("生成", variant="primary")
205
+ with gr.Column(scale=7):
206
+ out_img = gr.Image(type="pil", label="プレビュー", height=540)
207
+
208
+ # モード切替で可視/不可視を更新
209
+ def _toggle(m): vis = m=="テキスト入力"; return [gr.update(visible=vis), gr.update(visible=vis)]
210
+ mode.change(_toggle, mode, [temp_sl, style_dd])
211
+
212
+ gen_btn.click(process, [mode, txt, ext_sl, temp_sl, trim_cb, style_dd], out_img)
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  gr.Markdown(
215
+ f"""
216
+ **使用モデル** : `{os.getenv('GEMINI_MODEL','gemini-1.5-pro')}`
217
+ `/api/screenshot` ・ `/api/text-to-screenshot`
218
+ """
219
  )
220
 
221
+ # ---------- マウント ----------
222
+ demo_app = gr.mount_gradio_app(app, demo, path="/")
223
 
224
  if __name__ == "__main__":
225
+ import uvicorn
226
+ uvicorn.run(demo_app, host="0.0.0.0", port=7860)