monorepo/native/desktop/maplefile/internal/service/search/search.go

512 lines
16 KiB
Go

// Package search provides full-text search functionality using Bleve.
//
// This package implements a local full-text search index for files and collections
// using the Bleve search library (https://blevesearch.com/). The search index is
// stored per-user in their local application data directory.
//
// Key features:
// - Case-insensitive substring matching (e.g., "mesh" matches "meshtastic")
// - Support for Bleve query syntax (+, -, "", *, ?)
// - Deduplication of search results by document ID
// - Batch indexing for efficient rebuilds
// - User-isolated indexes (each user has their own search index)
//
// Location: monorepo/native/desktop/maplefile/internal/service/search/search.go
package search
import (
"context"
"fmt"
"os"
"strings"
"time"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/mapping"
"go.uber.org/zap"
"codeberg.org/mapleopentech/monorepo/native/desktop/maplefile/internal/config"
)
// SearchService provides full-text search capabilities
type SearchService interface {
// Initialize opens or creates the search index for the specified user email
Initialize(ctx context.Context, userEmail string) error
// Close closes the search index
Close() error
// IndexFile adds or updates a file in the search index
IndexFile(file *FileDocument) error
// IndexCollection adds or updates a collection in the search index
IndexCollection(collection *CollectionDocument) error
// DeleteFile removes a file from the search index
DeleteFile(fileID string) error
// DeleteCollection removes a collection from the search index
DeleteCollection(collectionID string) error
// Search performs a full-text search
Search(query string, limit int) (*SearchResult, error)
// RebuildIndex rebuilds the entire search index from scratch
RebuildIndex(userEmail string, files []*FileDocument, collections []*CollectionDocument) error
// GetIndexSize returns the size of the search index in bytes
GetIndexSize() (int64, error)
// GetDocumentCount returns the number of documents in the index
GetDocumentCount() (uint64, error)
}
// FileDocument represents a file document in the search index
type FileDocument struct {
ID string `json:"id"`
Filename string `json:"filename"`
Description string `json:"description"`
CollectionID string `json:"collection_id"`
CollectionName string `json:"collection_name"` // Denormalized for search
Tags []string `json:"tags"`
Size int64 `json:"size"`
CreatedAt time.Time `json:"created_at"`
Type string `json:"type"` // "file"
}
// CollectionDocument represents a collection document in the search index
type CollectionDocument struct {
ID string `json:"id"`
Name string `json:"name"`
Description string `json:"description"`
Tags []string `json:"tags"`
FileCount int `json:"file_count"`
CreatedAt time.Time `json:"created_at"`
Type string `json:"type"` // "collection"
}
// SearchResult contains the search results
type SearchResult struct {
Files []*FileDocument `json:"files"`
Collections []*CollectionDocument `json:"collections"`
TotalFiles int `json:"total_files"`
TotalCollections int `json:"total_collections"`
TotalHits uint64 `json:"total_hits"`
MaxScore float64 `json:"max_score"`
Took time.Duration `json:"took"`
Query string `json:"query"`
}
// searchService implements SearchService
type searchService struct {
index bleve.Index
configService config.ConfigService
logger *zap.Logger
}
// New creates a new search service
func New(configService config.ConfigService, logger *zap.Logger) SearchService {
return &searchService{
configService: configService,
logger: logger,
}
}
// Initialize opens or creates the search index for the specified user
func (s *searchService) Initialize(ctx context.Context, userEmail string) error {
if userEmail == "" {
return fmt.Errorf("user email is required")
}
// Get search index path
indexPath, err := s.configService.GetUserSearchIndexDir(ctx, userEmail)
if err != nil {
return fmt.Errorf("failed to get search index path: %w", err)
}
if indexPath == "" {
return fmt.Errorf("search index path is empty")
}
s.logger.Info("Initializing search index", zap.String("path", indexPath))
// Try to open existing index
index, err := bleve.Open(indexPath)
if err == bleve.ErrorIndexPathDoesNotExist {
// Create new index
s.logger.Info("Creating new search index")
indexMapping := buildIndexMapping()
index, err = bleve.New(indexPath, indexMapping)
if err != nil {
return fmt.Errorf("failed to create search index: %w", err)
}
} else if err != nil {
return fmt.Errorf("failed to open search index: %w", err)
}
s.index = index
s.logger.Info("Search index initialized successfully")
return nil
}
// Close closes the search index
func (s *searchService) Close() error {
if s.index != nil {
err := s.index.Close()
s.index = nil
return err
}
return nil
}
// IndexFile adds or updates a file in the search index
func (s *searchService) IndexFile(file *FileDocument) error {
if s.index == nil {
return fmt.Errorf("search index not initialized")
}
file.Type = "file"
return s.index.Index(file.ID, file)
}
// IndexCollection adds or updates a collection in the search index
func (s *searchService) IndexCollection(collection *CollectionDocument) error {
if s.index == nil {
return fmt.Errorf("search index not initialized")
}
collection.Type = "collection"
return s.index.Index(collection.ID, collection)
}
// DeleteFile removes a file from the search index
func (s *searchService) DeleteFile(fileID string) error {
if s.index == nil {
return fmt.Errorf("search index not initialized")
}
return s.index.Delete(fileID)
}
// DeleteCollection removes a collection from the search index
func (s *searchService) DeleteCollection(collectionID string) error {
if s.index == nil {
return fmt.Errorf("search index not initialized")
}
return s.index.Delete(collectionID)
}
// Search performs a full-text search across files and collections.
//
// The search supports:
// - Simple queries: automatically wrapped with wildcards for substring matching
// - Advanced queries: use Bleve query syntax directly (+, -, "", *, ?)
//
// Examples:
// - "mesh" → matches "meshtastic", "mesh_config", etc.
// - "\"exact phrase\"" → matches exact phrase only
// - "+required -excluded" → must contain "required", must not contain "excluded"
func (s *searchService) Search(query string, limit int) (*SearchResult, error) {
if s.index == nil {
return nil, fmt.Errorf("search index not initialized")
}
if limit <= 0 || limit > 100 {
limit = 50
}
// Convert to lowercase for case-insensitive search
searchQueryStr := strings.ToLower(query)
// For simple queries (no operators), wrap with wildcards to enable substring matching.
// This allows "mesh" to match "meshtastic_antenna.png".
// If the user provides operators or wildcards, use their query as-is.
if !strings.Contains(searchQueryStr, "*") && !strings.Contains(searchQueryStr, "?") &&
!strings.Contains(searchQueryStr, "+") && !strings.Contains(searchQueryStr, "-") &&
!strings.Contains(searchQueryStr, "\"") {
searchQueryStr = "*" + searchQueryStr + "*"
}
searchQuery := bleve.NewQueryStringQuery(searchQueryStr)
searchRequest := bleve.NewSearchRequest(searchQuery)
searchRequest.Size = limit
searchRequest.Fields = []string{"*"}
searchRequest.Highlight = bleve.NewHighlight()
// Execute search
searchResults, err := s.index.Search(searchRequest)
if err != nil {
return nil, fmt.Errorf("search failed: %w", err)
}
// Parse results with deduplication
result := &SearchResult{
Files: make([]*FileDocument, 0),
Collections: make([]*CollectionDocument, 0),
TotalHits: searchResults.Total,
MaxScore: searchResults.MaxScore,
Took: searchResults.Took,
Query: query,
}
// Use maps to deduplicate by ID
seenFileIDs := make(map[string]bool)
seenCollectionIDs := make(map[string]bool)
for _, hit := range searchResults.Hits {
docType, ok := hit.Fields["type"].(string)
if !ok {
continue
}
if docType == "file" {
// Skip if we've already seen this file ID
if seenFileIDs[hit.ID] {
s.logger.Warn("Duplicate file in search results", zap.String("id", hit.ID))
continue
}
seenFileIDs[hit.ID] = true
file := &FileDocument{
ID: hit.ID,
Filename: getStringField(hit.Fields, "filename"),
Description: getStringField(hit.Fields, "description"),
CollectionID: getStringField(hit.Fields, "collection_id"),
CollectionName: getStringField(hit.Fields, "collection_name"),
Tags: getStringArrayField(hit.Fields, "tags"),
Size: getInt64Field(hit.Fields, "size"),
}
if createdAt, ok := hit.Fields["created_at"].(string); ok {
file.CreatedAt, _ = time.Parse(time.RFC3339, createdAt)
}
result.Files = append(result.Files, file)
} else if docType == "collection" {
// Skip if we've already seen this collection ID
if seenCollectionIDs[hit.ID] {
s.logger.Warn("Duplicate collection in search results", zap.String("id", hit.ID))
continue
}
seenCollectionIDs[hit.ID] = true
collection := &CollectionDocument{
ID: hit.ID,
Name: getStringField(hit.Fields, "name"),
Description: getStringField(hit.Fields, "description"),
Tags: getStringArrayField(hit.Fields, "tags"),
FileCount: getIntField(hit.Fields, "file_count"),
}
if createdAt, ok := hit.Fields["created_at"].(string); ok {
collection.CreatedAt, _ = time.Parse(time.RFC3339, createdAt)
}
result.Collections = append(result.Collections, collection)
}
}
result.TotalFiles = len(result.Files)
result.TotalCollections = len(result.Collections)
return result, nil
}
// RebuildIndex rebuilds the entire search index from scratch.
//
// This method:
// 1. Closes the existing index (if any)
// 2. Deletes the index directory completely
// 3. Creates a fresh new index
// 4. Batch-indexes all provided files and collections
//
// This approach ensures no stale or duplicate documents remain in the index.
// The userEmail is required to locate the user-specific index directory.
func (s *searchService) RebuildIndex(userEmail string, files []*FileDocument, collections []*CollectionDocument) error {
s.logger.Info("Rebuilding search index from scratch",
zap.Int("files", len(files)),
zap.Int("collections", len(collections)))
if userEmail == "" {
return fmt.Errorf("user email is required for rebuild")
}
// Close the current index
if s.index != nil {
s.logger.Info("Closing current index before rebuild")
if err := s.index.Close(); err != nil {
s.logger.Warn("Error closing index before rebuild", zap.Error(err))
}
s.index = nil
}
// Get the index path from config
ctx := context.Background()
indexPath, err := s.configService.GetUserSearchIndexDir(ctx, userEmail)
if err != nil {
return fmt.Errorf("failed to get search index path: %w", err)
}
// Delete the existing index directory
s.logger.Info("Deleting existing index", zap.String("path", indexPath))
// We don't check for error here because the directory might not exist
// and that's okay - we're about to create it
os.RemoveAll(indexPath)
// Create a fresh index
s.logger.Info("Creating fresh index", zap.String("path", indexPath))
indexMapping := buildIndexMapping()
index, err := bleve.New(indexPath, indexMapping)
if err != nil {
return fmt.Errorf("failed to create fresh index: %w", err)
}
s.index = index
// Now index all files and collections in a batch
batch := s.index.NewBatch()
// Index all files
for _, file := range files {
file.Type = "file"
if err := batch.Index(file.ID, file); err != nil {
s.logger.Error("Failed to batch index file", zap.String("id", file.ID), zap.Error(err))
}
}
// Index all collections
for _, collection := range collections {
collection.Type = "collection"
if err := batch.Index(collection.ID, collection); err != nil {
s.logger.Error("Failed to batch index collection", zap.String("id", collection.ID), zap.Error(err))
}
}
// Execute batch
if err := s.index.Batch(batch); err != nil {
return fmt.Errorf("failed to execute batch index: %w", err)
}
finalCount, _ := s.index.DocCount()
s.logger.Info("Search index rebuilt successfully",
zap.Uint64("documents", finalCount),
zap.Int("files_indexed", len(files)),
zap.Int("collections_indexed", len(collections)))
return nil
}
// GetIndexSize returns the size of the search index in bytes
func (s *searchService) GetIndexSize() (int64, error) {
if s.index == nil {
return 0, fmt.Errorf("search index not initialized")
}
// Note: Bleve doesn't provide a direct way to get index size
// We return the document count as a proxy for size
// For actual disk usage, you would need to walk the index directory
count, err := s.index.DocCount()
if err != nil {
return 0, err
}
return int64(count), nil
}
// GetDocumentCount returns the number of documents in the index
func (s *searchService) GetDocumentCount() (uint64, error) {
if s.index == nil {
return 0, fmt.Errorf("search index not initialized")
}
count, err := s.index.DocCount()
if err != nil {
return 0, err
}
return count, nil
}
// buildIndexMapping creates the Bleve index mapping for files and collections.
//
// Field types:
// - Text fields (filename, description, name, tags): Analyzed with "standard" analyzer
// for good tokenization without stemming (better for substring matching)
// - Keyword fields (collection_id, type): Exact match only, no analysis
// - Numeric fields (size, file_count): Stored as numbers for range queries
// - Date fields (created_at): Stored as datetime for date-based queries
func buildIndexMapping() mapping.IndexMapping {
indexMapping := bleve.NewIndexMapping()
// Use standard analyzer (not English) for better substring matching.
// The English analyzer applies stemming which can interfere with partial matches.
textFieldMapping := bleve.NewTextFieldMapping()
textFieldMapping.Analyzer = "standard"
// Create keyword field mapping (no analysis)
keywordFieldMapping := bleve.NewKeywordFieldMapping()
// Create numeric field mapping
numericFieldMapping := bleve.NewNumericFieldMapping()
// Create datetime field mapping
dateFieldMapping := bleve.NewDateTimeFieldMapping()
// File document mapping
fileMapping := bleve.NewDocumentMapping()
fileMapping.AddFieldMappingsAt("filename", textFieldMapping)
fileMapping.AddFieldMappingsAt("description", textFieldMapping)
fileMapping.AddFieldMappingsAt("collection_name", textFieldMapping)
fileMapping.AddFieldMappingsAt("tags", textFieldMapping)
fileMapping.AddFieldMappingsAt("collection_id", keywordFieldMapping)
fileMapping.AddFieldMappingsAt("size", numericFieldMapping)
fileMapping.AddFieldMappingsAt("created_at", dateFieldMapping)
fileMapping.AddFieldMappingsAt("type", keywordFieldMapping)
// Collection document mapping
collectionMapping := bleve.NewDocumentMapping()
collectionMapping.AddFieldMappingsAt("name", textFieldMapping)
collectionMapping.AddFieldMappingsAt("description", textFieldMapping)
collectionMapping.AddFieldMappingsAt("tags", textFieldMapping)
collectionMapping.AddFieldMappingsAt("file_count", numericFieldMapping)
collectionMapping.AddFieldMappingsAt("created_at", dateFieldMapping)
collectionMapping.AddFieldMappingsAt("type", keywordFieldMapping)
indexMapping.AddDocumentMapping("file", fileMapping)
indexMapping.AddDocumentMapping("collection", collectionMapping)
return indexMapping
}
// Helper functions to extract fields from search results
func getStringField(fields map[string]interface{}, key string) string {
if val, ok := fields[key].(string); ok {
return val
}
return ""
}
func getStringArrayField(fields map[string]interface{}, key string) []string {
if val, ok := fields[key].([]interface{}); ok {
result := make([]string, 0, len(val))
for _, v := range val {
if str, ok := v.(string); ok {
result = append(result, str)
}
}
return result
}
return []string{}
}
func getIntField(fields map[string]interface{}, key string) int {
if val, ok := fields[key].(float64); ok {
return int(val)
}
return 0
}
func getInt64Field(fields map[string]interface{}, key string) int64 {
if val, ok := fields[key].(float64); ok {
return int64(val)
}
return 0
}