Skip to main content

fotos_lib/ai/
llm.rs

1/// Cloud LLM vision analysis.
2///
3/// Supports Claude (Anthropic) and Gemini (Google) — providers whose wire
4/// formats differ from the OpenAI-compatible standard. OpenAI-compatible
5/// endpoints (OpenAI, Ollama, llama-server, etc.) are handled by
6/// `openai_compat::analyze`.
7use anyhow::{bail, Result};
8use serde::{Deserialize, Serialize};
9use std::time::{Duration, Instant};
10
11const TIMEOUT_SECS: u64 = 30;
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub enum LlmProvider {
15    Claude { model: String },
16    Gemini { model: String },
17}
18
19pub struct LlmOutput {
20    pub response: String,
21    pub model: String,
22    pub tokens_used: u32,
23    pub latency_ms: u64,
24}
25
26/// Analyze an image with a named LLM provider (Claude or Gemini).
27///
28/// `image_b64` must be a base64-encoded JPEG (from `compress::compress_for_llm`).
29pub async fn analyze(
30    image_b64: &str,
31    prompt: &str,
32    provider: &LlmProvider,
33    api_key: &str,
34) -> Result<LlmOutput> {
35    let client = reqwest::Client::builder()
36        .timeout(Duration::from_secs(TIMEOUT_SECS))
37        .build()?;
38
39    match provider {
40        LlmProvider::Claude { model } => {
41            analyze_claude(&client, image_b64, prompt, model, api_key).await
42        }
43        LlmProvider::Gemini { model } => {
44            analyze_gemini(&client, image_b64, prompt, model, api_key).await
45        }
46    }
47}
48
49async fn analyze_claude(
50    client: &reqwest::Client,
51    image_b64: &str,
52    prompt: &str,
53    model: &str,
54    api_key: &str,
55) -> Result<LlmOutput> {
56    let body = serde_json::json!({
57        "model": model,
58        "max_tokens": 1024,
59        "messages": [{
60            "role": "user",
61            "content": [
62                {
63                    "type": "image",
64                    "source": {
65                        "type": "base64",
66                        "media_type": "image/jpeg",
67                        "data": image_b64
68                    }
69                },
70                {
71                    "type": "text",
72                    "text": prompt
73                }
74            ]
75        }]
76    });
77
78    let start = Instant::now();
79    let resp = client
80        .post("https://api.anthropic.com/v1/messages")
81        .header("x-api-key", api_key)
82        .header("anthropic-version", "2023-06-01")
83        .header("content-type", "application/json")
84        .json(&body)
85        .send()
86        .await?;
87
88    let status = resp.status();
89    let json: serde_json::Value = resp.json().await?;
90
91    if !status.is_success() {
92        let msg = json["error"]["message"].as_str().unwrap_or("unknown error");
93        bail!("Anthropic API error {status}: {msg}");
94    }
95
96    let response = json["content"]
97        .as_array()
98        .and_then(|a| a.first())
99        .and_then(|c| c["text"].as_str())
100        .unwrap_or("")
101        .to_string();
102
103    let tokens_used = (json["usage"]["input_tokens"].as_u64().unwrap_or(0)
104        + json["usage"]["output_tokens"].as_u64().unwrap_or(0)) as u32;
105
106    Ok(LlmOutput {
107        response,
108        model: model.to_string(),
109        tokens_used,
110        latency_ms: start.elapsed().as_millis() as u64,
111    })
112}
113
114async fn analyze_gemini(
115    client: &reqwest::Client,
116    image_b64: &str,
117    prompt: &str,
118    model: &str,
119    api_key: &str,
120) -> Result<LlmOutput> {
121    let url = format!(
122        "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
123    );
124
125    let body = serde_json::json!({
126        "contents": [{
127            "parts": [
128                {
129                    "inlineData": {
130                        "mimeType": "image/jpeg",
131                        "data": image_b64
132                    }
133                },
134                {
135                    "text": prompt
136                }
137            ]
138        }]
139    });
140
141    let start = Instant::now();
142    let resp = client.post(&url).json(&body).send().await?;
143
144    let status = resp.status();
145    let json: serde_json::Value = resp.json().await?;
146
147    if !status.is_success() {
148        let msg = json["error"]["message"].as_str().unwrap_or("unknown error");
149        bail!("Gemini API error {status}: {msg}");
150    }
151
152    let response = json["candidates"]
153        .as_array()
154        .and_then(|a| a.first())
155        .and_then(|c| c["content"]["parts"].as_array())
156        .and_then(|p| p.first())
157        .and_then(|p| p["text"].as_str())
158        .unwrap_or("")
159        .to_string();
160
161    let tokens_used = json["usageMetadata"]["totalTokenCount"]
162        .as_u64()
163        .unwrap_or(0) as u32;
164
165    Ok(LlmOutput {
166        response,
167        model: model.to_string(),
168        tokens_used,
169        latency_ms: start.elapsed().as_millis() as u64,
170    })
171}