// Package search provides full-text search functionality using Bleve. // // This package implements a local full-text search index for files and collections // using the Bleve search library (https://blevesearch.com/). The search index is // stored per-user in their local application data directory. // // Key features: // - Case-insensitive substring matching (e.g., "mesh" matches "meshtastic") // - Support for Bleve query syntax (+, -, "", *, ?) // - Deduplication of search results by document ID // - Batch indexing for efficient rebuilds // - User-isolated indexes (each user has their own search index) // // Location: monorepo/native/desktop/maplefile/internal/service/search/search.go package search import ( "context" "fmt" "os" "strings" "time" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/mapping" "go.uber.org/zap" "codeberg.org/mapleopentech/monorepo/native/desktop/maplefile/internal/config" ) // SearchService provides full-text search capabilities type SearchService interface { // Initialize opens or creates the search index for the specified user email Initialize(ctx context.Context, userEmail string) error // Close closes the search index Close() error // IndexFile adds or updates a file in the search index IndexFile(file *FileDocument) error // IndexCollection adds or updates a collection in the search index IndexCollection(collection *CollectionDocument) error // DeleteFile removes a file from the search index DeleteFile(fileID string) error // DeleteCollection removes a collection from the search index DeleteCollection(collectionID string) error // Search performs a full-text search Search(query string, limit int) (*SearchResult, error) // RebuildIndex rebuilds the entire search index from scratch RebuildIndex(userEmail string, files []*FileDocument, collections []*CollectionDocument) error // GetIndexSize returns the size of the search index in bytes GetIndexSize() (int64, error) // GetDocumentCount returns the number of documents in the index GetDocumentCount() (uint64, error) } // FileDocument represents a file document in the search index type FileDocument struct { ID string `json:"id"` Filename string `json:"filename"` Description string `json:"description"` CollectionID string `json:"collection_id"` CollectionName string `json:"collection_name"` // Denormalized for search Tags []string `json:"tags"` Size int64 `json:"size"` CreatedAt time.Time `json:"created_at"` Type string `json:"type"` // "file" } // CollectionDocument represents a collection document in the search index type CollectionDocument struct { ID string `json:"id"` Name string `json:"name"` Description string `json:"description"` Tags []string `json:"tags"` FileCount int `json:"file_count"` CreatedAt time.Time `json:"created_at"` Type string `json:"type"` // "collection" } // SearchResult contains the search results type SearchResult struct { Files []*FileDocument `json:"files"` Collections []*CollectionDocument `json:"collections"` TotalFiles int `json:"total_files"` TotalCollections int `json:"total_collections"` TotalHits uint64 `json:"total_hits"` MaxScore float64 `json:"max_score"` Took time.Duration `json:"took"` Query string `json:"query"` } // searchService implements SearchService type searchService struct { index bleve.Index configService config.ConfigService logger *zap.Logger } // New creates a new search service func New(configService config.ConfigService, logger *zap.Logger) SearchService { return &searchService{ configService: configService, logger: logger, } } // Initialize opens or creates the search index for the specified user func (s *searchService) Initialize(ctx context.Context, userEmail string) error { if userEmail == "" { return fmt.Errorf("user email is required") } // Get search index path indexPath, err := s.configService.GetUserSearchIndexDir(ctx, userEmail) if err != nil { return fmt.Errorf("failed to get search index path: %w", err) } if indexPath == "" { return fmt.Errorf("search index path is empty") } s.logger.Info("Initializing search index", zap.String("path", indexPath)) // Try to open existing index index, err := bleve.Open(indexPath) if err == bleve.ErrorIndexPathDoesNotExist { // Create new index s.logger.Info("Creating new search index") indexMapping := buildIndexMapping() index, err = bleve.New(indexPath, indexMapping) if err != nil { return fmt.Errorf("failed to create search index: %w", err) } } else if err != nil { return fmt.Errorf("failed to open search index: %w", err) } s.index = index s.logger.Info("Search index initialized successfully") return nil } // Close closes the search index func (s *searchService) Close() error { if s.index != nil { err := s.index.Close() s.index = nil return err } return nil } // IndexFile adds or updates a file in the search index func (s *searchService) IndexFile(file *FileDocument) error { if s.index == nil { return fmt.Errorf("search index not initialized") } file.Type = "file" return s.index.Index(file.ID, file) } // IndexCollection adds or updates a collection in the search index func (s *searchService) IndexCollection(collection *CollectionDocument) error { if s.index == nil { return fmt.Errorf("search index not initialized") } collection.Type = "collection" return s.index.Index(collection.ID, collection) } // DeleteFile removes a file from the search index func (s *searchService) DeleteFile(fileID string) error { if s.index == nil { return fmt.Errorf("search index not initialized") } return s.index.Delete(fileID) } // DeleteCollection removes a collection from the search index func (s *searchService) DeleteCollection(collectionID string) error { if s.index == nil { return fmt.Errorf("search index not initialized") } return s.index.Delete(collectionID) } // Search performs a full-text search across files and collections. // // The search supports: // - Simple queries: automatically wrapped with wildcards for substring matching // - Advanced queries: use Bleve query syntax directly (+, -, "", *, ?) // // Examples: // - "mesh" → matches "meshtastic", "mesh_config", etc. // - "\"exact phrase\"" → matches exact phrase only // - "+required -excluded" → must contain "required", must not contain "excluded" func (s *searchService) Search(query string, limit int) (*SearchResult, error) { if s.index == nil { return nil, fmt.Errorf("search index not initialized") } if limit <= 0 || limit > 100 { limit = 50 } // Convert to lowercase for case-insensitive search searchQueryStr := strings.ToLower(query) // For simple queries (no operators), wrap with wildcards to enable substring matching. // This allows "mesh" to match "meshtastic_antenna.png". // If the user provides operators or wildcards, use their query as-is. if !strings.Contains(searchQueryStr, "*") && !strings.Contains(searchQueryStr, "?") && !strings.Contains(searchQueryStr, "+") && !strings.Contains(searchQueryStr, "-") && !strings.Contains(searchQueryStr, "\"") { searchQueryStr = "*" + searchQueryStr + "*" } searchQuery := bleve.NewQueryStringQuery(searchQueryStr) searchRequest := bleve.NewSearchRequest(searchQuery) searchRequest.Size = limit searchRequest.Fields = []string{"*"} searchRequest.Highlight = bleve.NewHighlight() // Execute search searchResults, err := s.index.Search(searchRequest) if err != nil { return nil, fmt.Errorf("search failed: %w", err) } // Parse results with deduplication result := &SearchResult{ Files: make([]*FileDocument, 0), Collections: make([]*CollectionDocument, 0), TotalHits: searchResults.Total, MaxScore: searchResults.MaxScore, Took: searchResults.Took, Query: query, } // Use maps to deduplicate by ID seenFileIDs := make(map[string]bool) seenCollectionIDs := make(map[string]bool) for _, hit := range searchResults.Hits { docType, ok := hit.Fields["type"].(string) if !ok { continue } if docType == "file" { // Skip if we've already seen this file ID if seenFileIDs[hit.ID] { s.logger.Warn("Duplicate file in search results", zap.String("id", hit.ID)) continue } seenFileIDs[hit.ID] = true file := &FileDocument{ ID: hit.ID, Filename: getStringField(hit.Fields, "filename"), Description: getStringField(hit.Fields, "description"), CollectionID: getStringField(hit.Fields, "collection_id"), CollectionName: getStringField(hit.Fields, "collection_name"), Tags: getStringArrayField(hit.Fields, "tags"), Size: getInt64Field(hit.Fields, "size"), } if createdAt, ok := hit.Fields["created_at"].(string); ok { file.CreatedAt, _ = time.Parse(time.RFC3339, createdAt) } result.Files = append(result.Files, file) } else if docType == "collection" { // Skip if we've already seen this collection ID if seenCollectionIDs[hit.ID] { s.logger.Warn("Duplicate collection in search results", zap.String("id", hit.ID)) continue } seenCollectionIDs[hit.ID] = true collection := &CollectionDocument{ ID: hit.ID, Name: getStringField(hit.Fields, "name"), Description: getStringField(hit.Fields, "description"), Tags: getStringArrayField(hit.Fields, "tags"), FileCount: getIntField(hit.Fields, "file_count"), } if createdAt, ok := hit.Fields["created_at"].(string); ok { collection.CreatedAt, _ = time.Parse(time.RFC3339, createdAt) } result.Collections = append(result.Collections, collection) } } result.TotalFiles = len(result.Files) result.TotalCollections = len(result.Collections) return result, nil } // RebuildIndex rebuilds the entire search index from scratch. // // This method: // 1. Closes the existing index (if any) // 2. Deletes the index directory completely // 3. Creates a fresh new index // 4. Batch-indexes all provided files and collections // // This approach ensures no stale or duplicate documents remain in the index. // The userEmail is required to locate the user-specific index directory. func (s *searchService) RebuildIndex(userEmail string, files []*FileDocument, collections []*CollectionDocument) error { s.logger.Info("Rebuilding search index from scratch", zap.Int("files", len(files)), zap.Int("collections", len(collections))) if userEmail == "" { return fmt.Errorf("user email is required for rebuild") } // Close the current index if s.index != nil { s.logger.Info("Closing current index before rebuild") if err := s.index.Close(); err != nil { s.logger.Warn("Error closing index before rebuild", zap.Error(err)) } s.index = nil } // Get the index path from config ctx := context.Background() indexPath, err := s.configService.GetUserSearchIndexDir(ctx, userEmail) if err != nil { return fmt.Errorf("failed to get search index path: %w", err) } // Delete the existing index directory s.logger.Info("Deleting existing index", zap.String("path", indexPath)) // We don't check for error here because the directory might not exist // and that's okay - we're about to create it os.RemoveAll(indexPath) // Create a fresh index s.logger.Info("Creating fresh index", zap.String("path", indexPath)) indexMapping := buildIndexMapping() index, err := bleve.New(indexPath, indexMapping) if err != nil { return fmt.Errorf("failed to create fresh index: %w", err) } s.index = index // Now index all files and collections in a batch batch := s.index.NewBatch() // Index all files for _, file := range files { file.Type = "file" if err := batch.Index(file.ID, file); err != nil { s.logger.Error("Failed to batch index file", zap.String("id", file.ID), zap.Error(err)) } } // Index all collections for _, collection := range collections { collection.Type = "collection" if err := batch.Index(collection.ID, collection); err != nil { s.logger.Error("Failed to batch index collection", zap.String("id", collection.ID), zap.Error(err)) } } // Execute batch if err := s.index.Batch(batch); err != nil { return fmt.Errorf("failed to execute batch index: %w", err) } finalCount, _ := s.index.DocCount() s.logger.Info("Search index rebuilt successfully", zap.Uint64("documents", finalCount), zap.Int("files_indexed", len(files)), zap.Int("collections_indexed", len(collections))) return nil } // GetIndexSize returns the size of the search index in bytes func (s *searchService) GetIndexSize() (int64, error) { if s.index == nil { return 0, fmt.Errorf("search index not initialized") } // Note: Bleve doesn't provide a direct way to get index size // We return the document count as a proxy for size // For actual disk usage, you would need to walk the index directory count, err := s.index.DocCount() if err != nil { return 0, err } return int64(count), nil } // GetDocumentCount returns the number of documents in the index func (s *searchService) GetDocumentCount() (uint64, error) { if s.index == nil { return 0, fmt.Errorf("search index not initialized") } count, err := s.index.DocCount() if err != nil { return 0, err } return count, nil } // buildIndexMapping creates the Bleve index mapping for files and collections. // // Field types: // - Text fields (filename, description, name, tags): Analyzed with "standard" analyzer // for good tokenization without stemming (better for substring matching) // - Keyword fields (collection_id, type): Exact match only, no analysis // - Numeric fields (size, file_count): Stored as numbers for range queries // - Date fields (created_at): Stored as datetime for date-based queries func buildIndexMapping() mapping.IndexMapping { indexMapping := bleve.NewIndexMapping() // Use standard analyzer (not English) for better substring matching. // The English analyzer applies stemming which can interfere with partial matches. textFieldMapping := bleve.NewTextFieldMapping() textFieldMapping.Analyzer = "standard" // Create keyword field mapping (no analysis) keywordFieldMapping := bleve.NewKeywordFieldMapping() // Create numeric field mapping numericFieldMapping := bleve.NewNumericFieldMapping() // Create datetime field mapping dateFieldMapping := bleve.NewDateTimeFieldMapping() // File document mapping fileMapping := bleve.NewDocumentMapping() fileMapping.AddFieldMappingsAt("filename", textFieldMapping) fileMapping.AddFieldMappingsAt("description", textFieldMapping) fileMapping.AddFieldMappingsAt("collection_name", textFieldMapping) fileMapping.AddFieldMappingsAt("tags", textFieldMapping) fileMapping.AddFieldMappingsAt("collection_id", keywordFieldMapping) fileMapping.AddFieldMappingsAt("size", numericFieldMapping) fileMapping.AddFieldMappingsAt("created_at", dateFieldMapping) fileMapping.AddFieldMappingsAt("type", keywordFieldMapping) // Collection document mapping collectionMapping := bleve.NewDocumentMapping() collectionMapping.AddFieldMappingsAt("name", textFieldMapping) collectionMapping.AddFieldMappingsAt("description", textFieldMapping) collectionMapping.AddFieldMappingsAt("tags", textFieldMapping) collectionMapping.AddFieldMappingsAt("file_count", numericFieldMapping) collectionMapping.AddFieldMappingsAt("created_at", dateFieldMapping) collectionMapping.AddFieldMappingsAt("type", keywordFieldMapping) indexMapping.AddDocumentMapping("file", fileMapping) indexMapping.AddDocumentMapping("collection", collectionMapping) return indexMapping } // Helper functions to extract fields from search results func getStringField(fields map[string]interface{}, key string) string { if val, ok := fields[key].(string); ok { return val } return "" } func getStringArrayField(fields map[string]interface{}, key string) []string { if val, ok := fields[key].([]interface{}); ok { result := make([]string, 0, len(val)) for _, v := range val { if str, ok := v.(string); ok { result = append(result, str) } } return result } return []string{} } func getIntField(fields map[string]interface{}, key string) int { if val, ok := fields[key].(float64); ok { return int(val) } return 0 } func getInt64Field(fields map[string]interface{}, key string) int64 { if val, ok := fields[key].(float64); ok { return int64(val) } return 0 }