Add support for the SEARCH extension
This commit is contained in:
parent
b67b9b3cd2
commit
683cfe0615
104
doc/ext/search.md
Normal file
104
doc/ext/search.md
Normal file
@ -0,0 +1,104 @@
|
||||
# search
|
||||
|
||||
This is a work-in-progress specification.
|
||||
|
||||
## Description
|
||||
|
||||
This document describes the format of the `search` extension. This enables clients to run a server-side search of messages according to specified selectors.
|
||||
|
||||
This specification lets clients run an efficient search query on a bouncer or server who has quick access to the client message history, instead of having to download all logs and run the search locally.
|
||||
|
||||
The server as mentioned in this document may refer to either an IRC server or an IRC bouncer.
|
||||
|
||||
## Implementation
|
||||
|
||||
The `search` extension uses the `soju.im/search` capability and introduces a new command, `SEARCH`, and batch type, `soju.im/search`.
|
||||
|
||||
Full support for this extension requires support for the batch, server-time and message-tags capabilities. However, limited functionality is available to clients without support for these CAPs. Servers SHOULD NOT enforce that clients support all related capabilities before using the search extension.
|
||||
|
||||
The `soju.im/search` capability MUST be negotiated.
|
||||
|
||||
### `SEARCH` Command
|
||||
|
||||
The client can request a message search by sending the `SEARCH` command to the server. This command has the following general syntax:
|
||||
|
||||
SEARCH <attributes>
|
||||
|
||||
If the batch capability was negotiated, the server MUST reply to a successful SEARCH command using a batch with batch type `search`. If no content exists to return, the server SHOULD return an empty batch in order to avoid the client waiting for a reply.
|
||||
|
||||
The server then replies with a batch of batch type `search` containing messages matching all the specified attributes. These messages MUST be `PRIVMSG` or `NOTICE` messages.
|
||||
|
||||
### Returned message notes
|
||||
|
||||
The order of returned messages within the batch is implementation-defined, but SHOULD be ascending time order or some approximation thereof, regardless of the subcommand used. The server-time tag on each message SHOULD be the time at which the message was received by the IRC server. When provided, the msgid tag that identifies each individual message in a response MUST be the msgid tag as originally sent by the IRC server.
|
||||
|
||||
Servers SHOULD provide clients with a consistent message order that is valid across the lifetime of a single connection, and which determinately orders any two messages (even if they share a timestamp). This order SHOULD coincide with the order in which messages are returned within a response batch. It need not coincide with the delivery order of messages when they were relayed on any particular server.
|
||||
|
||||
#### Errors and Warnings
|
||||
|
||||
Errors are returned using the standard replies syntax.
|
||||
|
||||
If the selectors were invalid, the `INVALID_PARAMS` error code SHOULD be returned.
|
||||
|
||||
FAIL SEARCH INVALID_PARAMS [invalid_parameters] :Invalid parameters
|
||||
|
||||
If the search cannot be run due to an internal error, the `INTERNAL_ERROR` error code SHOULD be returned.
|
||||
|
||||
FAIL SEARCH INTERNAL_ERROR [extra_context] :The search could not be run
|
||||
|
||||
### Standard search attributes
|
||||
|
||||
Servers MUST recognise the following attributes.
|
||||
|
||||
The following attributes are considered a match when:
|
||||
* `in`: the message was sent to this target (channel or user).
|
||||
* `from`: the message was sent with this nick.
|
||||
* `after`: the message was sent at or after this time (same format as the `server-time` specification).
|
||||
* `before`: the message was sent at or before this time (same format as the `server-time` specification).
|
||||
* `text`: the message text matches the specified text. The actual algorithm used for matching the text is implementation defined.
|
||||
|
||||
If `after` is specified, messages SHOULD be searched from that time. Otherwise, messages SHOULD be searched from the `before` time, which defaults to the current server time.
|
||||
|
||||
Additionally, the following attributes MUST be recognized:
|
||||
* `limit`: a number representing an upper bound on the count of messages to return. The server MAY return less messages than this number.
|
||||
|
||||
### Examples
|
||||
|
||||
Searching messages sent by `jackie` in `#chan`
|
||||
~~~~
|
||||
[c] SEARCH from=jackie;in=#chan
|
||||
[s] :irc.host BATCH +ID soju.im/search
|
||||
[s] @batch=ID;msgid=1234;time=2019-01-04T14:33:26.123Z :jackie!indent@host PRIVMSG #chan :Be what you want
|
||||
[s] @batch=ID;msgid=1234;time=2019-01-04T14:35:26.123Z :jackie!indent@host PRIVMSG #chan :Want what you be
|
||||
[s] :irc.host BATCH -ID
|
||||
~~~~
|
||||
|
||||
Searching messages matching the text `fast` in `#chan`, returning up to 2 messages
|
||||
~~~~
|
||||
[c] SEARCH text=fast;in=#chan;limit=2
|
||||
[s] :irc.host BATCH +ID soju.im/search
|
||||
[s] @batch=ID;msgid=1234;time=2019-01-04T14:33:26.123Z :bill!indent@host PRIVMSG #chan :That was fast!
|
||||
[s] @batch=ID;msgid=1234;time=2019-01-04T14:35:26.123Z :jackie!indent@host PRIVMSG #chan :Fasting is hard.
|
||||
[s] :irc.host BATCH -ID
|
||||
~~~~
|
||||
|
||||
Searching messages when none match
|
||||
~~~~
|
||||
[c] SEARCH before=2010-01-01T00:00:00.000Z;in=#chan
|
||||
[s] :irc.host BATCH +ID soju.im/search
|
||||
[s] :irc.host BATCH -ID
|
||||
~~~~
|
||||
|
||||
## Use Cases
|
||||
|
||||
Clients can run a fast server-side search across months of history and channels without having to download all their logs and run the search locally.
|
||||
|
||||
This enables client interfaces to provide a search feature with quick matches. Additional context can be fetched thanks to the separate `CHATHISTORY` extension.
|
||||
|
||||
## Implementation Considerations
|
||||
|
||||
Server implementations may use different algorithms for matching messages against the specified `text`. Some implementation may choose to match by substrings, by whole words, or by other algorithms such as what is offered by their database (e.g. SQLite full-text search). The comparison may be case-insensitive or case-sensitive.
|
||||
|
||||
## Security Considerations
|
||||
|
||||
Processing logs can be slow, and arbitrary regular expressions can take a virtually infinite amount of time when maliciously crafted, even on small input sizes. Servers offering this feature should implement a timeout on their total request time, including regular expression compile time, as well as message fetching, parsing and selecting.
|
@ -361,6 +361,7 @@ func newDownstreamConn(srv *Server, ic ircConn, id uint64) *downstreamConn {
|
||||
// chatHistoryMessageStore
|
||||
if srv.Config().LogPath != "" {
|
||||
dc.caps.Available["draft/chathistory"] = ""
|
||||
dc.caps.Available["soju.im/search"] = ""
|
||||
}
|
||||
return dc
|
||||
}
|
||||
@ -2961,6 +2962,92 @@ func (dc *downstreamConn) handleMessageRegistered(ctx context.Context, msg *irc.
|
||||
})
|
||||
}
|
||||
})
|
||||
case "SEARCH":
|
||||
store, ok := dc.user.msgStore.(searchMessageStore)
|
||||
if !ok {
|
||||
return ircError{&irc.Message{
|
||||
Command: irc.ERR_UNKNOWNCOMMAND,
|
||||
Params: []string{dc.nick, "SEARCH", "Unknown command"},
|
||||
}}
|
||||
}
|
||||
var attrsStr string
|
||||
if err := parseMessageParams(msg, &attrsStr); err != nil {
|
||||
return err
|
||||
}
|
||||
attrs := irc.ParseTags(attrsStr)
|
||||
|
||||
var uc *upstreamConn
|
||||
const searchMaxLimit = 100
|
||||
opts := searchOptions{
|
||||
limit: searchMaxLimit,
|
||||
}
|
||||
for name, v := range attrs {
|
||||
value := string(v)
|
||||
switch name {
|
||||
case "before", "after":
|
||||
timestamp, err := time.Parse(serverTimeLayout, value)
|
||||
if err != nil {
|
||||
return ircError{&irc.Message{
|
||||
Command: "FAIL",
|
||||
Params: []string{"SEARCH", "INVALID_PARAMS", name, "Invalid criteria"},
|
||||
}}
|
||||
}
|
||||
switch name {
|
||||
case "after":
|
||||
opts.start = timestamp
|
||||
case "before":
|
||||
opts.end = timestamp
|
||||
}
|
||||
case "from":
|
||||
opts.from = value
|
||||
case "in":
|
||||
u, upstreamName, err := dc.unmarshalEntity(value)
|
||||
if err != nil {
|
||||
return ircError{&irc.Message{
|
||||
Command: "FAIL",
|
||||
Params: []string{"SEARCH", "INVALID_PARAMS", name, "Invalid criteria"},
|
||||
}}
|
||||
}
|
||||
uc = u
|
||||
opts.in = u.network.casemap(upstreamName)
|
||||
case "text":
|
||||
opts.text = value
|
||||
case "limit":
|
||||
limit, err := strconv.Atoi(value)
|
||||
if err != nil || limit <= 0 {
|
||||
return ircError{&irc.Message{
|
||||
Command: "FAIL",
|
||||
Params: []string{"SEARCH", "INVALID_PARAMS", name, "Invalid limit"},
|
||||
}}
|
||||
}
|
||||
opts.limit = limit
|
||||
}
|
||||
}
|
||||
if uc == nil {
|
||||
return ircError{&irc.Message{
|
||||
Command: "FAIL",
|
||||
Params: []string{"SEARCH", "INVALID_PARAMS", "in", "The in parameter is mandatory"},
|
||||
}}
|
||||
}
|
||||
if opts.limit > searchMaxLimit {
|
||||
opts.limit = searchMaxLimit
|
||||
}
|
||||
|
||||
messages, err := store.Search(ctx, &uc.network.Network, opts)
|
||||
if err != nil {
|
||||
dc.logger.Printf("failed fetching messages for search: %v", err)
|
||||
return ircError{&irc.Message{
|
||||
Command: "FAIL",
|
||||
Params: []string{"SEARCH", "INTERNAL_ERROR", "Messages could not be retrieved"},
|
||||
}}
|
||||
}
|
||||
|
||||
dc.SendBatch("soju.im/search", nil, nil, func(batchRef irc.TagValue) {
|
||||
for _, msg := range messages {
|
||||
msg.Tags["batch"] = batchRef
|
||||
dc.SendMessage(dc.marshalMessage(msg, uc.network))
|
||||
}
|
||||
})
|
||||
case "BOUNCER":
|
||||
var subcommand string
|
||||
if err := parseMessageParams(msg, &subcommand); err != nil {
|
||||
|
18
msgstore.go
18
msgstore.go
@ -51,6 +51,24 @@ type chatHistoryMessageStore interface {
|
||||
LoadAfterTime(ctx context.Context, network *Network, entity string, start, end time.Time, limit int, events bool) ([]*irc.Message, error)
|
||||
}
|
||||
|
||||
type searchOptions struct {
|
||||
start time.Time
|
||||
end time.Time
|
||||
limit int
|
||||
from string
|
||||
in string
|
||||
text string
|
||||
}
|
||||
|
||||
// searchMessageStore is a message store that supports server-side search
|
||||
// operations.
|
||||
type searchMessageStore interface {
|
||||
messageStore
|
||||
|
||||
// Search returns messages matching the specified options.
|
||||
Search(ctx context.Context, network *Network, search searchOptions) ([]*irc.Message, error)
|
||||
}
|
||||
|
||||
type msgIDType uint
|
||||
|
||||
const (
|
||||
|
@ -88,6 +88,7 @@ type fsMessageStore struct {
|
||||
|
||||
var _ messageStore = (*fsMessageStore)(nil)
|
||||
var _ chatHistoryMessageStore = (*fsMessageStore)(nil)
|
||||
var _ searchMessageStore = (*fsMessageStore)(nil)
|
||||
|
||||
func newFSMessageStore(root string, user *User) *fsMessageStore {
|
||||
return &fsMessageStore{
|
||||
@ -398,7 +399,7 @@ func (ms *fsMessageStore) parseMessage(line string, network *Network, entity str
|
||||
return msg, t, nil
|
||||
}
|
||||
|
||||
func (ms *fsMessageStore) parseMessagesBefore(network *Network, entity string, ref time.Time, end time.Time, events bool, limit int, afterOffset int64) ([]*irc.Message, error) {
|
||||
func (ms *fsMessageStore) parseMessagesBefore(network *Network, entity string, ref time.Time, end time.Time, events bool, limit int, afterOffset int64, selector func(m *irc.Message) bool) ([]*irc.Message, error) {
|
||||
path := ms.logPath(network, entity, ref)
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
@ -430,6 +431,9 @@ func (ms *fsMessageStore) parseMessagesBefore(network *Network, entity string, r
|
||||
} else if !t.Before(ref) {
|
||||
break
|
||||
}
|
||||
if selector != nil && !selector(msg) {
|
||||
continue
|
||||
}
|
||||
|
||||
historyRing[cur%limit] = msg
|
||||
cur++
|
||||
@ -454,7 +458,7 @@ func (ms *fsMessageStore) parseMessagesBefore(network *Network, entity string, r
|
||||
}
|
||||
}
|
||||
|
||||
func (ms *fsMessageStore) parseMessagesAfter(network *Network, entity string, ref time.Time, end time.Time, events bool, limit int) ([]*irc.Message, error) {
|
||||
func (ms *fsMessageStore) parseMessagesAfter(network *Network, entity string, ref time.Time, end time.Time, events bool, limit int, selector func(m *irc.Message) bool) ([]*irc.Message, error) {
|
||||
path := ms.logPath(network, entity, ref)
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
@ -476,6 +480,9 @@ func (ms *fsMessageStore) parseMessagesAfter(network *Network, entity string, re
|
||||
} else if !t.Before(end) {
|
||||
break
|
||||
}
|
||||
if selector != nil && !selector(msg) {
|
||||
continue
|
||||
}
|
||||
|
||||
history = append(history, msg)
|
||||
}
|
||||
@ -486,14 +493,18 @@ func (ms *fsMessageStore) parseMessagesAfter(network *Network, entity string, re
|
||||
return history, nil
|
||||
}
|
||||
|
||||
func (ms *fsMessageStore) LoadBeforeTime(ctx context.Context, network *Network, entity string, start time.Time, end time.Time, limit int, events bool) ([]*irc.Message, error) {
|
||||
start = start.In(time.Local)
|
||||
func (ms *fsMessageStore) getBeforeTime(ctx context.Context, network *Network, entity string, start time.Time, end time.Time, limit int, events bool, selector func(m *irc.Message) bool) ([]*irc.Message, error) {
|
||||
if start.IsZero() {
|
||||
start = time.Now()
|
||||
} else {
|
||||
start = start.In(time.Local)
|
||||
}
|
||||
end = end.In(time.Local)
|
||||
history := make([]*irc.Message, limit)
|
||||
messages := make([]*irc.Message, limit)
|
||||
remaining := limit
|
||||
tries := 0
|
||||
for remaining > 0 && tries < fsMessageStoreMaxTries && end.Before(start) {
|
||||
buf, err := ms.parseMessagesBefore(network, entity, start, end, events, remaining, -1)
|
||||
buf, err := ms.parseMessagesBefore(network, entity, start, end, events, remaining, -1, selector)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -502,7 +513,7 @@ func (ms *fsMessageStore) LoadBeforeTime(ctx context.Context, network *Network,
|
||||
} else {
|
||||
tries = 0
|
||||
}
|
||||
copy(history[remaining-len(buf):], buf)
|
||||
copy(messages[remaining-len(buf):], buf)
|
||||
remaining -= len(buf)
|
||||
year, month, day := start.Date()
|
||||
start = time.Date(year, month, day, 0, 0, 0, 0, start.Location()).Add(-1)
|
||||
@ -512,17 +523,25 @@ func (ms *fsMessageStore) LoadBeforeTime(ctx context.Context, network *Network,
|
||||
}
|
||||
}
|
||||
|
||||
return history[remaining:], nil
|
||||
return messages[remaining:], nil
|
||||
}
|
||||
|
||||
func (ms *fsMessageStore) LoadAfterTime(ctx context.Context, network *Network, entity string, start time.Time, end time.Time, limit int, events bool) ([]*irc.Message, error) {
|
||||
func (ms *fsMessageStore) LoadBeforeTime(ctx context.Context, network *Network, entity string, start time.Time, end time.Time, limit int, events bool) ([]*irc.Message, error) {
|
||||
return ms.getBeforeTime(ctx, network, entity, start, end, limit, events, nil)
|
||||
}
|
||||
|
||||
func (ms *fsMessageStore) getAfterTime(ctx context.Context, network *Network, entity string, start time.Time, end time.Time, limit int, events bool, selector func(m *irc.Message) bool) ([]*irc.Message, error) {
|
||||
start = start.In(time.Local)
|
||||
end = end.In(time.Local)
|
||||
var history []*irc.Message
|
||||
if end.IsZero() {
|
||||
end = time.Now()
|
||||
} else {
|
||||
end = end.In(time.Local)
|
||||
}
|
||||
var messages []*irc.Message
|
||||
remaining := limit
|
||||
tries := 0
|
||||
for remaining > 0 && tries < fsMessageStoreMaxTries && start.Before(end) {
|
||||
buf, err := ms.parseMessagesAfter(network, entity, start, end, events, remaining)
|
||||
buf, err := ms.parseMessagesAfter(network, entity, start, end, events, remaining, selector)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -531,7 +550,7 @@ func (ms *fsMessageStore) LoadAfterTime(ctx context.Context, network *Network, e
|
||||
} else {
|
||||
tries = 0
|
||||
}
|
||||
history = append(history, buf...)
|
||||
messages = append(messages, buf...)
|
||||
remaining -= len(buf)
|
||||
year, month, day := start.Date()
|
||||
start = time.Date(year, month, day+1, 0, 0, 0, 0, start.Location())
|
||||
@ -540,7 +559,11 @@ func (ms *fsMessageStore) LoadAfterTime(ctx context.Context, network *Network, e
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return history, nil
|
||||
return messages, nil
|
||||
}
|
||||
|
||||
func (ms *fsMessageStore) LoadAfterTime(ctx context.Context, network *Network, entity string, start time.Time, end time.Time, limit int, events bool) ([]*irc.Message, error) {
|
||||
return ms.getAfterTime(ctx, network, entity, start, end, limit, events, nil)
|
||||
}
|
||||
|
||||
func (ms *fsMessageStore) LoadLatestID(ctx context.Context, network *Network, entity, id string, limit int) ([]*irc.Message, error) {
|
||||
@ -569,7 +592,7 @@ func (ms *fsMessageStore) LoadLatestID(ctx context.Context, network *Network, en
|
||||
offset = afterOffset
|
||||
}
|
||||
|
||||
buf, err := ms.parseMessagesBefore(network, entity, t, time.Time{}, false, remaining, offset)
|
||||
buf, err := ms.parseMessagesBefore(network, entity, t, time.Time{}, false, remaining, offset, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -670,6 +693,24 @@ func (ms *fsMessageStore) ListTargets(ctx context.Context, network *Network, sta
|
||||
return targets, nil
|
||||
}
|
||||
|
||||
func (ms *fsMessageStore) Search(ctx context.Context, network *Network, opts searchOptions) ([]*irc.Message, error) {
|
||||
text := strings.ToLower(opts.text)
|
||||
selector := func(m *irc.Message) bool {
|
||||
if opts.from != "" && m.User != opts.from {
|
||||
return false
|
||||
}
|
||||
if text != "" && !strings.Contains(strings.ToLower(m.Params[1]), text) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
if !opts.start.IsZero() {
|
||||
return ms.getAfterTime(ctx, network, opts.in, opts.start, opts.end, opts.limit, false, selector)
|
||||
} else {
|
||||
return ms.getBeforeTime(ctx, network, opts.in, opts.end, opts.start, opts.limit, false, selector)
|
||||
}
|
||||
}
|
||||
|
||||
func (ms *fsMessageStore) RenameNetwork(oldNet, newNet *Network) error {
|
||||
oldDir := filepath.Join(ms.root, escapeFilename(oldNet.GetName()))
|
||||
newDir := filepath.Join(ms.root, escapeFilename(newNet.GetName()))
|
||||
|
Loading…
Reference in New Issue
Block a user