|
|
import { GoogleGenAI, Type } from "@google/genai"; |
|
|
import type { GeminiAnalysisResult } from '../types'; |
|
|
|
|
|
const getApiKey = (): string => { |
|
|
|
|
|
const storedKey = localStorage.getItem('gemini_api_key'); |
|
|
if (storedKey) { |
|
|
return storedKey; |
|
|
} |
|
|
|
|
|
|
|
|
const envKey = import.meta.env.VITE_GEMINI_API_KEY; |
|
|
if (envKey) { |
|
|
return envKey; |
|
|
} |
|
|
|
|
|
throw new Error("No API key found. Please configure your Gemini API key."); |
|
|
}; |
|
|
|
|
|
const systemInstruction = `You are an expert audio engineer specializing in vocal processing. |
|
|
Analyze the provided audio sample to determine the speaker's vocal characteristics. |
|
|
Identify the fundamental frequency range, prominent harmonics, and any problematic frequencies (e.g., sibilance, plosives, muddiness). |
|
|
Based on this analysis, generate a 10-band graphic EQ preset to enhance vocal clarity, presence, and warmth. The preset should be suitable for a standard podcast or voice-over. |
|
|
Provide the output in a JSON format with three main keys: 'vocalProfile', 'eqPreset', and 'audacityXml'. |
|
|
- 'vocalProfile' should be an object containing 'description' (a paragraph summarizing the voice), 'fundamentalRange' (e.g., '100Hz - 250Hz'), and 'keyCharacteristics' (an array of strings like 'Slightly sibilant', 'Warm low-mids'). |
|
|
- 'eqPreset' should be an array of objects, where each object has 'frequency' (in Hz) and 'gain' (in dB). |
|
|
- 'audacityXml' should be a string containing a valid Audacity EQ preset in XML format. The curve should be named 'Gemini Vocal Preset' and contain <point> elements for each frequency and gain setting.`; |
|
|
|
|
|
const responseSchema = { |
|
|
type: Type.OBJECT, |
|
|
properties: { |
|
|
vocalProfile: { |
|
|
type: Type.OBJECT, |
|
|
properties: { |
|
|
description: { type: Type.STRING }, |
|
|
fundamentalRange: { type: Type.STRING }, |
|
|
keyCharacteristics: { |
|
|
type: Type.ARRAY, |
|
|
items: { type: Type.STRING } |
|
|
} |
|
|
}, |
|
|
required: ["description", "fundamentalRange", "keyCharacteristics"] |
|
|
}, |
|
|
eqPreset: { |
|
|
type: Type.ARRAY, |
|
|
items: { |
|
|
type: Type.OBJECT, |
|
|
properties: { |
|
|
frequency: { type: Type.NUMBER }, |
|
|
gain: { type: Type.NUMBER } |
|
|
}, |
|
|
required: ["frequency", "gain"] |
|
|
} |
|
|
}, |
|
|
audacityXml: { type: Type.STRING } |
|
|
}, |
|
|
required: ["vocalProfile", "eqPreset", "audacityXml"] |
|
|
}; |
|
|
|
|
|
export async function analyzeAudio(audioBase64: string, mimeType: string): Promise<GeminiAnalysisResult> { |
|
|
try { |
|
|
const apiKey = getApiKey(); |
|
|
const ai = new GoogleGenAI({ apiKey }); |
|
|
|
|
|
const audioPart = { |
|
|
inlineData: { |
|
|
data: audioBase64, |
|
|
mimeType: mimeType, |
|
|
}, |
|
|
}; |
|
|
|
|
|
const response = await ai.models.generateContent({ |
|
|
model: 'gemini-2.5-pro', |
|
|
contents: { parts: [audioPart] }, |
|
|
config: { |
|
|
systemInstruction, |
|
|
responseMimeType: 'application/json', |
|
|
responseSchema: responseSchema, |
|
|
} |
|
|
}); |
|
|
|
|
|
const text = response.text; |
|
|
|
|
|
if (!text) { |
|
|
throw new Error('Gemini returned an empty response.'); |
|
|
} |
|
|
|
|
|
|
|
|
return JSON.parse(text) as GeminiAnalysisResult; |
|
|
|
|
|
} catch (error) { |
|
|
console.error("Error calling Gemini API:", error); |
|
|
if(error instanceof Error && error.message.includes('SAFETY')) { |
|
|
throw new Error("The audio could not be processed due to safety settings. Please try a different audio sample."); |
|
|
} |
|
|
throw new Error("Failed to get analysis from Gemini. Please check the console for more details."); |
|
|
} |
|
|
} |