From 42828d68e9c8d2ed88688898d62f6727f787ac0a Mon Sep 17 00:00:00 2001 From: Hubert Hirtz Date: Wed, 2 Sep 2020 17:06:17 +0200 Subject: [PATCH] Make sure that WebSocket messages are valid UTF-8 ... by replacing invalid bytes with the REPLACEMENT CHARACTER U+FFFD This is better than: - discarding the whole message, since the user would not see it... - removing invalid bytes, since the user would not see their presence, - converting the encoding (this is actually not possible). Contrary to its documentation, strings.ToValidUTF8 doesn't copy the string if it's valid UTF-8: --- conn.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/conn.go b/conn.go index 6914179..c14f13e 100644 --- a/conn.go +++ b/conn.go @@ -5,8 +5,10 @@ import ( "fmt" "io" "net" + "strings" "sync" "time" + "unicode" "gopkg.in/irc.v3" "nhooyr.io/websocket" @@ -62,7 +64,7 @@ func (wic websocketIRCConn) ReadMessage() (*irc.Message, error) { } func (wic websocketIRCConn) WriteMessage(msg *irc.Message) error { - b := []byte(msg.String()) + b := []byte(strings.ToValidUTF8(msg.String(), string(unicode.ReplacementChar))) ctx := context.Background() if !wic.writeDeadline.IsZero() { var cancel context.CancelFunc