nixos/patches/flare/0002-feat-messages-Implement-formatted-messages.patch

From 45b21cee00bfc5545aea6fbc9a4f991cfd781cff Mon Sep 17 00:00:00 2001
From: Simon Gardling <titaniumtown@proton.me>
Date: Wed, 29 Apr 2026 19:13:52 -0400
Subject: [PATCH 2/6] feat(messages): Implement formatted messages

- Display Signal BodyRange styles (bold, italic, strikethrough,
  spoiler, monospace) on incoming messages by translating them into
  pango attributes alongside the existing mention rendering, making
  the offset accounting work for mention substitutions and
  surrogate-pair text alike.
- Parse a markdown-style formatting syntax on outbound messages and
  send the resulting BodyRanges with the cleaned body text. The
  parser lives in its own module with unit tests covering the
  supported markers, nesting, unmatched markers, and non-BMP UTF-16
  offsets.
- Update the message-input tooltip to surface the supported markers.
---
 CHANGELOG.md                           |   2 +
 data/resources/ui/channel_messages.blp |   2 +-
 src/backend/message/formatting.rs      | 287 +++++++++++++++++++++++++
 src/backend/message/mod.rs             |   2 +
 src/backend/message/text_message.rs    | 200 +++++++++++++----
 5 files changed, 447 insertions(+), 46 deletions(-)
 create mode 100644 src/backend/message/formatting.rs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2bde927..50cd5f5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 - Send typing indicators while composing a message and display them above the message input.
 - Settings to enable or disable sending and showing typing indicators.
+- Render formatted message styles (bold, italic, strikethrough, spoiler, monospace) on incoming messages.
+- Send formatted messages with markdown-style markers (`**bold**`, `*italic*`, `~~strike~~`, `||spoiler||`, `` `monospace` ``).

 ## [0.20.4] - 2026-04-22

diff --git a/data/resources/ui/channel_messages.blp b/data/resources/ui/channel_messages.blp
index 7f438e4..6c3948f 100644
--- a/data/resources/ui/channel_messages.blp
+++ b/data/resources/ui/channel_messages.blp
@@ -301,7 +301,7 @@ template $FlChannelMessages: Box {
               activate => $send_message() swapped;
               paste-file => $paste_file() swapped;
               paste-texture => $paste_texture() swapped;
-              tooltip-text: C_("tooltip", "Message input");
+              tooltip-text: C_("tooltip", "Message input. Use **bold**, *italic*, ~~strike~~, ||spoiler|| or `monospace` to format text.");
             }

             Button button_send {
diff --git a/src/backend/message/formatting.rs b/src/backend/message/formatting.rs
new file mode 100644
index 0000000..5a1d596
--- /dev/null
+++ b/src/backend/message/formatting.rs
@@ -0,0 +1,287 @@
+//! Lightweight markdown-style formatting parser for outgoing messages.
+//!
+//! Supported syntax (mirroring the way Signal Desktop and iOS render
+//! formatted messages):
+//!
+//! - `**text**` for bold
+//! - `*text*` or `_text_` for italic
+//! - `~~text~~` for strikethrough
+//! - `||text||` for spoiler
+//! - `` `text` `` for monospace
+//!
+//! Parsing is forgiving: any marker without a matching counterpart is left
+//! verbatim in the resulting text. Markers may nest as long as the inner
+//! marker is a different kind from the outer one.
+//!
+//! The function returns the cleaned message body plus the corresponding
+//! `BodyRange`s with offsets in UTF-16 code units, as required by the
+//! Signal protocol.
+
+use std::collections::HashMap;
+
+use libsignal_service::proto::BodyRange;
+use libsignal_service::proto::body_range::{AssociatedValue, Style as BodyRangeStyle};
+
+#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
+enum Marker {
+    Bold,
+    Italic,
+    Strikethrough,
+    Spoiler,
+    Monospace,
+}
+
+impl Marker {
+    fn style(self) -> BodyRangeStyle {
+        match self {
+            Marker::Bold => BodyRangeStyle::Bold,
+            Marker::Italic => BodyRangeStyle::Italic,
+            Marker::Strikethrough => BodyRangeStyle::Strikethrough,
+            Marker::Spoiler => BodyRangeStyle::Spoiler,
+            Marker::Monospace => BodyRangeStyle::Monospace,
+        }
+    }
+}
+
+/// Try to consume a marker starting at `chars[i]` and return its kind plus
+/// the number of characters that make up the marker token.
+fn detect_marker(chars: &[char], i: usize) -> Option<(Marker, usize)> {
+    let cur = *chars.get(i)?;
+    let next = chars.get(i + 1).copied();
+    match (cur, next) {
+        ('*', Some('*')) => Some((Marker::Bold, 2)),
+        ('~', Some('~')) => Some((Marker::Strikethrough, 2)),
+        ('|', Some('|')) => Some((Marker::Spoiler, 2)),
+        ('*', _) | ('_', _) => Some((Marker::Italic, 1)),
+        ('`', _) => Some((Marker::Monospace, 1)),
+        _ => None,
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+struct MatchedSpan {
+    marker: Marker,
+    open_pos: usize,
+    close_pos: usize,
+    marker_len: usize,
+}
+
+/// Walk the character stream left-to-right and pair markers of the same
+/// kind. The first occurrence opens a span, the next occurrence of the same
+/// kind closes it; markers without a partner are simply ignored.
+fn detect_matched_markers(chars: &[char]) -> Vec<MatchedSpan> {
+    let mut open: HashMap<Marker, (usize, usize)> = HashMap::new();
+    let mut spans: Vec<MatchedSpan> = Vec::new();
+    let mut i = 0;
+    while i < chars.len() {
+        if let Some((marker, len)) = detect_marker(chars, i) {
+            if let Some((open_pos, marker_len)) = open.remove(&marker) {
+                spans.push(MatchedSpan {
+                    marker,
+                    open_pos,
+                    close_pos: i,
+                    marker_len,
+                });
+            } else {
+                open.insert(marker, (i, len));
+            }
+            i += len;
+        } else {
+            i += 1;
+        }
+    }
+    spans
+}
+
+/// Parse markdown-style formatting markers in `input` and produce the cleaned
+/// text plus the corresponding Signal [BodyRange]s with UTF-16 offsets.
+///
+/// Empty matched spans (e.g. `**` followed immediately by `**`) are dropped.
+pub fn parse_formatting(input: &str) -> (String, Vec<BodyRange>) {
+    let chars: Vec<char> = input.chars().collect();
+    let spans = detect_matched_markers(&chars);
+
+    if spans.is_empty() {
+        return (input.to_owned(), Vec::new());
+    }
+
+    // Mark which character positions are part of a matched marker token and
+    // therefore must be removed from the cleaned output.
+    let mut skip = vec![false; chars.len()];
+    for sp in &spans {
+        for k in sp.open_pos..(sp.open_pos + sp.marker_len).min(chars.len()) {
+            skip[k] = true;
+        }
+        for k in sp.close_pos..(sp.close_pos + sp.marker_len).min(chars.len()) {
+            skip[k] = true;
+        }
+    }
+
+    // Build the cleaned output and a per-input-char map into the output's
+    // UTF-16 code-unit offset.
+    let mut output = String::with_capacity(input.len());
+    let mut input_to_output_utf16 = vec![0u32; chars.len() + 1];
+    let mut utf16_count: u32 = 0;
+    for (i, c) in chars.iter().enumerate() {
+        input_to_output_utf16[i] = utf16_count;
+        if !skip[i] {
+            output.push(*c);
+            utf16_count += c.len_utf16() as u32;
+        }
+    }
+    input_to_output_utf16[chars.len()] = utf16_count;
+
+    let mut ranges: Vec<BodyRange> = Vec::with_capacity(spans.len());
+    for sp in spans {
+        let start = input_to_output_utf16[sp.open_pos + sp.marker_len];
+        let end = input_to_output_utf16[sp.close_pos];
+        if end <= start {
+            continue;
+        }
+        ranges.push(BodyRange {
+            start: Some(start),
+            length: Some(end - start),
+            associated_value: Some(AssociatedValue::Style(sp.marker.style() as i32)),
+        });
+    }
+
+    // Sort by start so the final ranges are stable for tests and for
+    // downstream consumers that expect ordered ranges.
+    ranges.sort_by_key(|r| r.start);
+
+    (output, ranges)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn ranges_summary(ranges: &[BodyRange]) -> Vec<(u32, u32, BodyRangeStyle)> {
+        ranges
+            .iter()
+            .map(|r| {
+                let style = match r.associated_value {
+                    Some(AssociatedValue::Style(s)) => {
+                        BodyRangeStyle::try_from(s).unwrap_or(BodyRangeStyle::None)
+                    }
+                    _ => BodyRangeStyle::None,
+                };
+                (r.start.unwrap_or(0), r.length.unwrap_or(0), style)
+            })
+            .collect()
+    }
+
+    #[test]
+    fn no_markers() {
+        let (text, ranges) = parse_formatting("hello world");
+        assert_eq!(text, "hello world");
+        assert!(ranges.is_empty());
+    }
+
+    #[test]
+    fn bold() {
+        let (text, ranges) = parse_formatting("**bold**");
+        assert_eq!(text, "bold");
+        assert_eq!(ranges_summary(&ranges), vec![(0, 4, BodyRangeStyle::Bold)]);
+    }
+
+    #[test]
+    fn italic_asterisk() {
+        let (text, ranges) = parse_formatting("*italic*");
+        assert_eq!(text, "italic");
+        assert_eq!(
+            ranges_summary(&ranges),
+            vec![(0, 6, BodyRangeStyle::Italic)]
+        );
+    }
+
+    #[test]
+    fn italic_underscore() {
+        let (text, ranges) = parse_formatting("_italic_");
+        assert_eq!(text, "italic");
+        assert_eq!(
+            ranges_summary(&ranges),
+            vec![(0, 6, BodyRangeStyle::Italic)]
+        );
+    }
+
+    #[test]
+    fn strikethrough() {
+        let (text, ranges) = parse_formatting("~~strike~~");
+        assert_eq!(text, "strike");
+        assert_eq!(
+            ranges_summary(&ranges),
+            vec![(0, 6, BodyRangeStyle::Strikethrough)]
+        );
+    }
+
+    #[test]
+    fn spoiler() {
+        let (text, ranges) = parse_formatting("||hidden||");
+        assert_eq!(text, "hidden");
+        assert_eq!(
+            ranges_summary(&ranges),
+            vec![(0, 6, BodyRangeStyle::Spoiler)]
+        );
+    }
+
+    #[test]
+    fn monospace() {
+        let (text, ranges) = parse_formatting("`code`");
+        assert_eq!(text, "code");
+        assert_eq!(
+            ranges_summary(&ranges),
+            vec![(0, 4, BodyRangeStyle::Monospace)]
+        );
+    }
+
+    #[test]
+    fn bold_and_italic_nested() {
+        let (text, ranges) = parse_formatting("**bold *italic***");
+        assert_eq!(text, "bold italic");
+        let summary = ranges_summary(&ranges);
+        assert!(summary.contains(&(0, 11, BodyRangeStyle::Bold)));
+        assert!(summary.contains(&(5, 6, BodyRangeStyle::Italic)));
+    }
+
+    #[test]
+    fn unmatched_open_left_literal() {
+        let (text, ranges) = parse_formatting("**only one start");
+        assert_eq!(text, "**only one start");
+        assert!(ranges.is_empty());
+    }
+
+    #[test]
+    fn surrounding_text_preserved() {
+        let (text, ranges) = parse_formatting("hello **world**!");
+        assert_eq!(text, "hello world!");
+        assert_eq!(ranges_summary(&ranges), vec![(6, 5, BodyRangeStyle::Bold)]);
+    }
+
+    #[test]
+    fn multiple_pairs() {
+        let (text, ranges) = parse_formatting("**a**b**c**");
+        assert_eq!(text, "abc");
+        let summary = ranges_summary(&ranges);
+        assert_eq!(summary.len(), 2);
+        assert_eq!(summary[0], (0, 1, BodyRangeStyle::Bold));
+        assert_eq!(summary[1], (2, 1, BodyRangeStyle::Bold));
+    }
+
+    #[test]
+    fn empty_pair_dropped() {
+        let (text, ranges) = parse_formatting("****");
+        assert_eq!(text, "");
+        assert!(ranges.is_empty());
+    }
+
+    #[test]
+    fn utf16_offsets_for_non_bmp() {
+        // Character "𝟚" (U+1D7DA) is a non-BMP codepoint occupying two
+        // UTF-16 code units, so a Bold range over a string containing it
+        // must reflect that in its `length`.
+        let (text, ranges) = parse_formatting("**𝟚**");
+        assert_eq!(text, "𝟚");
+        assert_eq!(ranges_summary(&ranges), vec![(0, 2, BodyRangeStyle::Bold)]);
+    }
+}
diff --git a/src/backend/message/mod.rs b/src/backend/message/mod.rs
index 74952ac..4e0f584 100644
--- a/src/backend/message/mod.rs
+++ b/src/backend/message/mod.rs
@@ -1,12 +1,14 @@
 mod call_message;
 mod deletion_message;
 mod display_message;
+mod formatting;
 mod reaction_message;
 mod text_message;

 pub use call_message::{CallMessage, CallMessageType};
 pub use deletion_message::DeletionMessage;
 pub use display_message::{DisplayMessage, DisplayMessageExt};
+pub use formatting::parse_formatting;
 pub use reaction_message::ReactionMessage;
 pub use text_message::TextMessage;

diff --git a/src/backend/message/text_message.rs b/src/backend/message/text_message.rs
index a9adb04..c06bcfa 100644
--- a/src/backend/message/text_message.rs
+++ b/src/backend/message/text_message.rs
@@ -2,9 +2,9 @@ use crate::prelude::*;

 use libsignal_service::content::Reaction;
 use libsignal_service::proto::DataMessage;
-use libsignal_service::proto::body_range::AssociatedValue;
+use libsignal_service::proto::body_range::{AssociatedValue, Style as BodyRangeStyle};
 use libsignal_service::proto::data_message::Delete;
-use pango::{AttrColor, AttrList};
+use pango::{AttrColor, AttrInt, AttrList, AttrString, Style as PangoStyle, Weight};

 use crate::backend::timeline::{TimelineItem, TimelineItemExt};
 use crate::backend::{Attachment, Channel, Contact};
@@ -19,6 +19,48 @@ gtk::glib::wrapper! {
 const MENTION_CHAR: char = '@';
 const MENTION_COLOR: (u16, u16, u16) = (0, 0, u16::MAX);

+/// Convert a Signal [BodyRangeStyle] into the pango attributes that render
+/// the same visual style. Spoilers are approximated as a black-on-black
+/// span as pango has no native spoiler primitive.
+fn style_to_pango_attrs(
+    style: BodyRangeStyle,
+    start_byte: u32,
+    end_byte: u32,
+) -> Vec<pango::Attribute> {
+    fn span<A: Into<pango::Attribute>>(attr: A, start: u32, end: u32) -> pango::Attribute {
+        let mut attr: pango::Attribute = attr.into();
+        attr.set_start_index(start);
+        attr.set_end_index(end);
+        attr
+    }
+
+    match style {
+        BodyRangeStyle::Bold => vec![span(
+            AttrInt::new_weight(Weight::Bold),
+            start_byte,
+            end_byte,
+        )],
+        BodyRangeStyle::Italic => vec![span(
+            AttrInt::new_style(PangoStyle::Italic),
+            start_byte,
+            end_byte,
+        )],
+        BodyRangeStyle::Strikethrough => {
+            vec![span(AttrInt::new_strikethrough(true), start_byte, end_byte)]
+        }
+        BodyRangeStyle::Monospace => vec![span(
+            AttrString::new_family("monospace"),
+            start_byte,
+            end_byte,
+        )],
+        BodyRangeStyle::Spoiler => vec![
+            span(AttrColor::new_foreground(0, 0, 0), start_byte, end_byte),
+            span(AttrColor::new_background(0, 0, 0), start_byte, end_byte),
+        ],
+        BodyRangeStyle::None => Vec::new(),
+    }
+}
+
 impl TextMessage {
     pub fn from_text_channel_sender<S: AsRef<str>>(
         text: S,
@@ -65,14 +107,16 @@ impl TextMessage {
             .build();

         let text_owned = text.as_ref().to_owned();
-        let body = if text_owned.is_empty() {
-            None
+        let (body, body_ranges) = if text_owned.is_empty() {
+            (None, Vec::new())
         } else {
-            Some(text_owned)
+            let (cleaned, ranges) = super::parse_formatting(&text_owned);
+            (Some(cleaned), ranges)
         };

         let message = DataMessage {
             body,
+            body_ranges,
             timestamp: Some(timestamp),
             ..Default::default()
         };
@@ -245,10 +289,17 @@ impl TextMessage {
         self.notify_body();
     }

-    /// Formats the message body based on its ranges, e.g. to insert mention names.
+    /// Format the message body based on its body ranges.
+    ///
+    /// This both substitutes mentions with the resolved participant name and
+    /// applies styling (bold, italic, monospace, strikethrough, spoiler) as
+    /// pango attributes on the resulting text.
     ///
-    /// Returns the resulting strings and an [AttrList] that can be used in labels to highlight areas.
-    /// Be carefull when editing this function and note that Signal uses UTF-16 byte offsets, while Rust uses UTF-8 byte offsets.
+    /// Note that Signal uses UTF-16 byte offsets, while Rust strings use
+    /// UTF-8. The implementation maintains an explicit per-utf16-index
+    /// mapping into the resulting UTF-8 string so that styles applied to a
+    /// range that survives a mention substitution still land on the right
+    /// bytes.
     async fn format_body(&self) -> (Option<String>, AttrList) {
         let Some(body) = self.internal_data().and_then(|m| m.body) else {
             return (None, AttrList::new());
@@ -264,53 +315,112 @@ impl TextMessage {

         let channel = self.channel();

-        // Sort by growing start index
+        // Sort by growing start index so mention substitutions happen left-to-right.
         ranges.sort_unstable_by_key(|r| r.start());

-        let attrs = AttrList::new();
-
-        // Signal (Java) uses UTF-16 body and therefore also UTF-16 offsets, while Flare (Rust) uses UTF-8. Need to convert.
-        let body_utf16: Vec<u16> = body.encode_utf16().collect();
-
-        let mut result_utf8 = String::new();
-        let mut index_utf16 = 0;
-        let mut index_utf8 = 0;
-        for r in ranges {
-            let start = r.start() as usize;
-            let end = start + r.length() as usize;
-            let uuid = match r.associated_value {
+        // Resolve mention names asynchronously up front so the rest of the
+        // formatting can be a synchronous walk.
+        let mut mentions: Vec<(usize, usize, String)> = Vec::new();
+        for r in &ranges {
+            let uuid = match &r.associated_value {
                 Some(AssociatedValue::MentionAci(u)) => u.parse().ok(),
                 Some(AssociatedValue::MentionAciBinary(u)) => {
-                    u.try_into().ok().map(Uuid::from_bytes)
+                    u.clone().try_into().ok().map(Uuid::from_bytes)
                 }
                 _ => None,
             };
-            let Some(uuid) = uuid else {
+            if let Some(uuid) = uuid {
+                let start = r.start() as usize;
+                let end = (r.start() + r.length()) as usize;
+                let name = format!(
+                    "{}{}",
+                    MENTION_CHAR,
+                    channel.participant_by_uuid(uuid).await.title()
+                );
+                mentions.push((start, end, name));
+            }
+        }
+        // Mentions cannot overlap each other; ensure the iterator order is stable.
+        mentions.sort_unstable_by_key(|(s, _, _)| *s);
+
+        let body_utf16: Vec<u16> = body.encode_utf16().collect();
+        let attrs = AttrList::new();
+
+        // Build the result string while constructing a per-utf16-index map
+        // into the resulting UTF-8 byte offsets.
+        let mut byte_at: Vec<usize> = Vec::with_capacity(body_utf16.len() + 1);
+        let mut result_utf8 = String::new();
+        let mut mention_iter = mentions.into_iter().peekable();
+
+        let mut i = 0;
+        while i < body_utf16.len() {
+            // Inject mention substitutions at their start position.
+            if mention_iter
+                .peek()
+                .is_some_and(|(m_start, _, _)| *m_start == i)
+            {
+                let (m_start, m_end, name) = mention_iter.next().expect("peeked entry to exist");
+                let mention_byte_start = result_utf8.len();
+                // Mark every UTF-16 index inside the mention span as the start
+                // of the substituted text. Indices >= m_end will be filled by
+                // subsequent iterations.
+                for _ in m_start..m_end {
+                    byte_at.push(mention_byte_start);
+                }
+                result_utf8.push_str(&name);
+
+                let mut highlight =
+                    AttrColor::new_foreground(MENTION_COLOR.0, MENTION_COLOR.1, MENTION_COLOR.2);
+                highlight.set_start_index(mention_byte_start as u32);
+                highlight.set_end_index(result_utf8.len() as u32);
+                attrs.insert(highlight);
+
+                i = m_end.min(body_utf16.len());
                 continue;
-            };
-            let name = format!(
-                "{}{}",
-                MENTION_CHAR,
-                channel.participant_by_uuid(uuid).await.title()
-            );
-            let to_add_body = String::from_utf16_lossy(&body_utf16[index_utf16..start]);
-            result_utf8.push_str(&to_add_body);
-            result_utf8.push_str(&name);
-            index_utf16 = end;
-
-            let index_start_highlight = index_utf8 + to_add_body.len();
-            index_utf8 += to_add_body.len() + name.len();
-            let index_end_highlight = index_utf8;
-
-            let (red, green, blue) = MENTION_COLOR;
-            let mut highlight = AttrColor::new_foreground(red, green, blue);
-            highlight.set_start_index(index_start_highlight as u32);
-            highlight.set_end_index(index_end_highlight as u32);
-            attrs.insert(highlight);
+            }
+
+            byte_at.push(result_utf8.len());
+            let unit = body_utf16[i];
+            if (0xD800..=0xDBFF).contains(&unit) && i + 1 < body_utf16.len() {
+                // High surrogate: consume the pair as one codepoint.
+                let pair = [unit, body_utf16[i + 1]];
+                let decoded = char::decode_utf16(pair.iter().copied())
+                    .next()
+                    .and_then(|r| r.ok())
+                    .unwrap_or('\u{FFFD}');
+                result_utf8.push(decoded);
+                byte_at.push(result_utf8.len());
+                i += 2;
+            } else {
+                let decoded = char::decode_utf16([unit].iter().copied())
+                    .next()
+                    .and_then(|r| r.ok())
+                    .unwrap_or('\u{FFFD}');
+                result_utf8.push(decoded);
+                i += 1;
+            }
         }
+        byte_at.push(result_utf8.len());

-        if index_utf16 < body_utf16.len() {
-            result_utf8.push_str(&String::from_utf16_lossy(&body_utf16[index_utf16..]))
+        // Apply style ranges using the byte-offset map.
+        for r in ranges {
+            let Some(AssociatedValue::Style(s)) = r.associated_value else {
+                continue;
+            };
+            let style = match BodyRangeStyle::try_from(s) {
+                Ok(BodyRangeStyle::None) | Err(_) => continue,
+                Ok(other) => other,
+            };
+            let start_utf16 = (r.start() as usize).min(byte_at.len() - 1);
+            let end_utf16 = ((r.start() + r.length()) as usize).min(byte_at.len() - 1);
+            if start_utf16 >= end_utf16 {
+                continue;
+            }
+            let start_byte = byte_at[start_utf16] as u32;
+            let end_byte = byte_at[end_utf16] as u32;
+            for attr in style_to_pango_attrs(style, start_byte, end_byte) {
+                attrs.insert(attr);
+            }
         }

         (Some(result_utf8), attrs)
--
2.53.0