Skip to main content

robonix_pilot/
history.rs

1// SPDX-License-Identifier: MulanPSL-2.0
2// Author: wheatfox <wheatfox17@icloud.com>
3//
4// Two concerns live here:
5//   1. Mapping an executor tool result (JSON string) back into one or more
6//      `Message`s the LLM can ingest. OpenAI-compatible endpoints reject
7//      images on `tool` role, so when a tool returns an image we keep the
8//      tool result textual and append a synthetic `user` vision message.
9//   2. Pre-flight cleanup of `Vec<Message>` before we hand it to the LLM:
10//      trim to MAX_HISTORY and drop tool messages whose preceding assistant
11//      tool_call was already evicted (which would otherwise be rejected).
12
13use crate::vlm::Message;
14use std::collections::HashSet;
15
16/// Output of `tool_result_to_messages`: messages that go in `tool` role,
17/// plus optional follow-up `user` messages (e.g. for an image attachment).
18pub struct ToolResultHistory {
19    pub tool_messages: Vec<Message>,
20    pub followup_messages: Vec<Message>,
21}
22
23/// Build history messages from one executor tool result.
24///
25/// Normal path: a single `Message` with `role: "tool"` and `tool_call_id = call_id`,
26/// carrying `output` (or a short placeholder) in `content`.
27///
28/// Image path: OpenAI-compatible APIs do not accept image payloads on `tool` messages.
29/// We still emit a `tool` line with a text placeholder, then add a synthetic `user`
30/// message with `image_base64` so `build_openai_messages` can attach a vision part.
31pub fn tool_result_to_messages(call_id: &str, output: &str) -> ToolResultHistory {
32    // One `tool` message in the typical case; image-shaped results add follow-up `user`
33    // vision messages (see doc above).
34    let Ok(v) = serde_json::from_str::<serde_json::Value>(output) else {
35        return ToolResultHistory {
36            tool_messages: vec![Message::tool_result(call_id, output)],
37            followup_messages: vec![],
38        };
39    };
40
41    if let Some(b64) = v.get("image_base64").and_then(|x| x.as_str()) {
42        let fmt = v.get("format").and_then(|x| x.as_str()).unwrap_or("jpeg");
43        return ToolResultHistory {
44            tool_messages: vec![Message::tool_result(
45                call_id,
46                &format!("[{fmt} image attached]"),
47            )],
48            followup_messages: vec![Message::user_with_image(
49                "Executor feedback: the previous capability call returned this image. Use it as observation data for the current task; this is not a new user request.",
50                b64.to_string(),
51            )],
52        };
53    }
54
55    // sensor_msgs/msg/Image — matches camera_snapshot / camera_depth_snapshot.
56    // Skip encoding="error" (placeholder) and any payload missing real data.
57    let img_encoding = v.get("encoding").and_then(|e| e.as_str());
58    if v.get("width").is_some()
59        && v.get("height").is_some()
60        && img_encoding.is_some()
61        && img_encoding != Some("error")
62        && v.get("data")
63            .and_then(|d| d.as_str())
64            .is_some_and(|s| !s.is_empty())
65    {
66        let enc = img_encoding.unwrap_or("jpeg");
67        let b64 = v.get("data").and_then(|d| d.as_str()).unwrap_or("");
68        return ToolResultHistory {
69            tool_messages: vec![Message::tool_result(
70                call_id,
71                &format!("[sensor_msgs/Image encoding={enc}]"),
72            )],
73            followup_messages: vec![Message::user_with_image(
74                "Executor feedback: the previous capability call returned this image. Use it as observation data for the current task; this is not a new user request.",
75                b64.to_string(),
76            )],
77        };
78    }
79
80    ToolResultHistory {
81        tool_messages: vec![Message::tool_result(call_id, output)],
82        followup_messages: vec![],
83    }
84}
85
86/// Drop the oldest messages so `history.len() <= max`. No-op if already short.
87pub fn trim(history: &mut Vec<Message>, max: usize) {
88    if history.len() > max {
89        let remove = history.len() - max;
90        history.drain(0..remove);
91    }
92}
93
94/// Filter `history` to a form OpenAI-compatible endpoints accept:
95/// every `tool` message must be preceded by an `assistant` whose
96/// `tool_calls` lists its `tool_call_id`. Orphans (e.g. left over from
97/// a trim that dropped the assistant) are quietly removed.
98pub fn sanitize_for_vlm(history: &[Message]) -> Vec<Message> {
99    let mut out: Vec<Message> = Vec::with_capacity(history.len());
100    let mut open_tool_call_ids: HashSet<String> = Default::default();
101
102    for msg in history {
103        match msg.role.as_str() {
104            "assistant" => {
105                open_tool_call_ids.clear();
106                if let Some(calls) = &msg.tool_calls {
107                    for tc in calls {
108                        open_tool_call_ids.insert(tc.id.clone());
109                    }
110                }
111                out.push(msg.clone());
112            }
113            "tool" => {
114                let Some(call_id) = msg.tool_call_id.as_ref() else {
115                    continue;
116                };
117                if open_tool_call_ids.remove(call_id) {
118                    out.push(msg.clone());
119                }
120            }
121            _ => {
122                open_tool_call_ids.clear();
123                out.push(msg.clone());
124            }
125        }
126    }
127    out
128}
129
130/// Extract `std_msgs/String.data` from a tool's JSON output. Accepts either
131/// raw text (returned verbatim) or `{"data": "..."}`.
132pub fn decode_string_output(output: &str) -> String {
133    serde_json::from_str::<serde_json::Value>(output)
134        .ok()
135        .and_then(|v| {
136            v.get("data")
137                .and_then(|x| x.as_str())
138                .map(ToString::to_string)
139        })
140        .unwrap_or_else(|| output.to_string())
141}