512 lines
16 KiB
Go
512 lines
16 KiB
Go
// Package search provides full-text search functionality using Bleve.
|
|
//
|
|
// This package implements a local full-text search index for files and collections
|
|
// using the Bleve search library (https://blevesearch.com/). The search index is
|
|
// stored per-user in their local application data directory.
|
|
//
|
|
// Key features:
|
|
// - Case-insensitive substring matching (e.g., "mesh" matches "meshtastic")
|
|
// - Support for Bleve query syntax (+, -, "", *, ?)
|
|
// - Deduplication of search results by document ID
|
|
// - Batch indexing for efficient rebuilds
|
|
// - User-isolated indexes (each user has their own search index)
|
|
//
|
|
// Location: monorepo/native/desktop/maplefile/internal/service/search/search.go
|
|
package search
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/blevesearch/bleve/v2"
|
|
"github.com/blevesearch/bleve/v2/mapping"
|
|
"go.uber.org/zap"
|
|
|
|
"codeberg.org/mapleopentech/monorepo/native/desktop/maplefile/internal/config"
|
|
)
|
|
|
|
// SearchService provides full-text search capabilities
|
|
type SearchService interface {
|
|
// Initialize opens or creates the search index for the specified user email
|
|
Initialize(ctx context.Context, userEmail string) error
|
|
|
|
// Close closes the search index
|
|
Close() error
|
|
|
|
// IndexFile adds or updates a file in the search index
|
|
IndexFile(file *FileDocument) error
|
|
|
|
// IndexCollection adds or updates a collection in the search index
|
|
IndexCollection(collection *CollectionDocument) error
|
|
|
|
// DeleteFile removes a file from the search index
|
|
DeleteFile(fileID string) error
|
|
|
|
// DeleteCollection removes a collection from the search index
|
|
DeleteCollection(collectionID string) error
|
|
|
|
// Search performs a full-text search
|
|
Search(query string, limit int) (*SearchResult, error)
|
|
|
|
// RebuildIndex rebuilds the entire search index from scratch
|
|
RebuildIndex(userEmail string, files []*FileDocument, collections []*CollectionDocument) error
|
|
|
|
// GetIndexSize returns the size of the search index in bytes
|
|
GetIndexSize() (int64, error)
|
|
|
|
// GetDocumentCount returns the number of documents in the index
|
|
GetDocumentCount() (uint64, error)
|
|
}
|
|
|
|
// FileDocument represents a file document in the search index
|
|
type FileDocument struct {
|
|
ID string `json:"id"`
|
|
Filename string `json:"filename"`
|
|
Description string `json:"description"`
|
|
CollectionID string `json:"collection_id"`
|
|
CollectionName string `json:"collection_name"` // Denormalized for search
|
|
Tags []string `json:"tags"`
|
|
Size int64 `json:"size"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
Type string `json:"type"` // "file"
|
|
}
|
|
|
|
// CollectionDocument represents a collection document in the search index
|
|
type CollectionDocument struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
Description string `json:"description"`
|
|
Tags []string `json:"tags"`
|
|
FileCount int `json:"file_count"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
Type string `json:"type"` // "collection"
|
|
}
|
|
|
|
// SearchResult contains the search results
|
|
type SearchResult struct {
|
|
Files []*FileDocument `json:"files"`
|
|
Collections []*CollectionDocument `json:"collections"`
|
|
TotalFiles int `json:"total_files"`
|
|
TotalCollections int `json:"total_collections"`
|
|
TotalHits uint64 `json:"total_hits"`
|
|
MaxScore float64 `json:"max_score"`
|
|
Took time.Duration `json:"took"`
|
|
Query string `json:"query"`
|
|
}
|
|
|
|
// searchService implements SearchService
|
|
type searchService struct {
|
|
index bleve.Index
|
|
configService config.ConfigService
|
|
logger *zap.Logger
|
|
}
|
|
|
|
// New creates a new search service
|
|
func New(configService config.ConfigService, logger *zap.Logger) SearchService {
|
|
return &searchService{
|
|
configService: configService,
|
|
logger: logger,
|
|
}
|
|
}
|
|
|
|
// Initialize opens or creates the search index for the specified user
|
|
func (s *searchService) Initialize(ctx context.Context, userEmail string) error {
|
|
if userEmail == "" {
|
|
return fmt.Errorf("user email is required")
|
|
}
|
|
|
|
// Get search index path
|
|
indexPath, err := s.configService.GetUserSearchIndexDir(ctx, userEmail)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get search index path: %w", err)
|
|
}
|
|
|
|
if indexPath == "" {
|
|
return fmt.Errorf("search index path is empty")
|
|
}
|
|
|
|
s.logger.Info("Initializing search index", zap.String("path", indexPath))
|
|
|
|
// Try to open existing index
|
|
index, err := bleve.Open(indexPath)
|
|
if err == bleve.ErrorIndexPathDoesNotExist {
|
|
// Create new index
|
|
s.logger.Info("Creating new search index")
|
|
indexMapping := buildIndexMapping()
|
|
index, err = bleve.New(indexPath, indexMapping)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create search index: %w", err)
|
|
}
|
|
} else if err != nil {
|
|
return fmt.Errorf("failed to open search index: %w", err)
|
|
}
|
|
|
|
s.index = index
|
|
s.logger.Info("Search index initialized successfully")
|
|
|
|
return nil
|
|
}
|
|
|
|
// Close closes the search index
|
|
func (s *searchService) Close() error {
|
|
if s.index != nil {
|
|
err := s.index.Close()
|
|
s.index = nil
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// IndexFile adds or updates a file in the search index
|
|
func (s *searchService) IndexFile(file *FileDocument) error {
|
|
if s.index == nil {
|
|
return fmt.Errorf("search index not initialized")
|
|
}
|
|
|
|
file.Type = "file"
|
|
return s.index.Index(file.ID, file)
|
|
}
|
|
|
|
// IndexCollection adds or updates a collection in the search index
|
|
func (s *searchService) IndexCollection(collection *CollectionDocument) error {
|
|
if s.index == nil {
|
|
return fmt.Errorf("search index not initialized")
|
|
}
|
|
|
|
collection.Type = "collection"
|
|
return s.index.Index(collection.ID, collection)
|
|
}
|
|
|
|
// DeleteFile removes a file from the search index
|
|
func (s *searchService) DeleteFile(fileID string) error {
|
|
if s.index == nil {
|
|
return fmt.Errorf("search index not initialized")
|
|
}
|
|
|
|
return s.index.Delete(fileID)
|
|
}
|
|
|
|
// DeleteCollection removes a collection from the search index
|
|
func (s *searchService) DeleteCollection(collectionID string) error {
|
|
if s.index == nil {
|
|
return fmt.Errorf("search index not initialized")
|
|
}
|
|
|
|
return s.index.Delete(collectionID)
|
|
}
|
|
|
|
// Search performs a full-text search across files and collections.
|
|
//
|
|
// The search supports:
|
|
// - Simple queries: automatically wrapped with wildcards for substring matching
|
|
// - Advanced queries: use Bleve query syntax directly (+, -, "", *, ?)
|
|
//
|
|
// Examples:
|
|
// - "mesh" → matches "meshtastic", "mesh_config", etc.
|
|
// - "\"exact phrase\"" → matches exact phrase only
|
|
// - "+required -excluded" → must contain "required", must not contain "excluded"
|
|
func (s *searchService) Search(query string, limit int) (*SearchResult, error) {
|
|
if s.index == nil {
|
|
return nil, fmt.Errorf("search index not initialized")
|
|
}
|
|
|
|
if limit <= 0 || limit > 100 {
|
|
limit = 50
|
|
}
|
|
|
|
// Convert to lowercase for case-insensitive search
|
|
searchQueryStr := strings.ToLower(query)
|
|
|
|
// For simple queries (no operators), wrap with wildcards to enable substring matching.
|
|
// This allows "mesh" to match "meshtastic_antenna.png".
|
|
// If the user provides operators or wildcards, use their query as-is.
|
|
if !strings.Contains(searchQueryStr, "*") && !strings.Contains(searchQueryStr, "?") &&
|
|
!strings.Contains(searchQueryStr, "+") && !strings.Contains(searchQueryStr, "-") &&
|
|
!strings.Contains(searchQueryStr, "\"") {
|
|
searchQueryStr = "*" + searchQueryStr + "*"
|
|
}
|
|
|
|
searchQuery := bleve.NewQueryStringQuery(searchQueryStr)
|
|
searchRequest := bleve.NewSearchRequest(searchQuery)
|
|
searchRequest.Size = limit
|
|
searchRequest.Fields = []string{"*"}
|
|
searchRequest.Highlight = bleve.NewHighlight()
|
|
|
|
// Execute search
|
|
searchResults, err := s.index.Search(searchRequest)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("search failed: %w", err)
|
|
}
|
|
|
|
// Parse results with deduplication
|
|
result := &SearchResult{
|
|
Files: make([]*FileDocument, 0),
|
|
Collections: make([]*CollectionDocument, 0),
|
|
TotalHits: searchResults.Total,
|
|
MaxScore: searchResults.MaxScore,
|
|
Took: searchResults.Took,
|
|
Query: query,
|
|
}
|
|
|
|
// Use maps to deduplicate by ID
|
|
seenFileIDs := make(map[string]bool)
|
|
seenCollectionIDs := make(map[string]bool)
|
|
|
|
for _, hit := range searchResults.Hits {
|
|
docType, ok := hit.Fields["type"].(string)
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
if docType == "file" {
|
|
// Skip if we've already seen this file ID
|
|
if seenFileIDs[hit.ID] {
|
|
s.logger.Warn("Duplicate file in search results", zap.String("id", hit.ID))
|
|
continue
|
|
}
|
|
seenFileIDs[hit.ID] = true
|
|
|
|
file := &FileDocument{
|
|
ID: hit.ID,
|
|
Filename: getStringField(hit.Fields, "filename"),
|
|
Description: getStringField(hit.Fields, "description"),
|
|
CollectionID: getStringField(hit.Fields, "collection_id"),
|
|
CollectionName: getStringField(hit.Fields, "collection_name"),
|
|
Tags: getStringArrayField(hit.Fields, "tags"),
|
|
Size: getInt64Field(hit.Fields, "size"),
|
|
}
|
|
if createdAt, ok := hit.Fields["created_at"].(string); ok {
|
|
file.CreatedAt, _ = time.Parse(time.RFC3339, createdAt)
|
|
}
|
|
result.Files = append(result.Files, file)
|
|
} else if docType == "collection" {
|
|
// Skip if we've already seen this collection ID
|
|
if seenCollectionIDs[hit.ID] {
|
|
s.logger.Warn("Duplicate collection in search results", zap.String("id", hit.ID))
|
|
continue
|
|
}
|
|
seenCollectionIDs[hit.ID] = true
|
|
|
|
collection := &CollectionDocument{
|
|
ID: hit.ID,
|
|
Name: getStringField(hit.Fields, "name"),
|
|
Description: getStringField(hit.Fields, "description"),
|
|
Tags: getStringArrayField(hit.Fields, "tags"),
|
|
FileCount: getIntField(hit.Fields, "file_count"),
|
|
}
|
|
if createdAt, ok := hit.Fields["created_at"].(string); ok {
|
|
collection.CreatedAt, _ = time.Parse(time.RFC3339, createdAt)
|
|
}
|
|
result.Collections = append(result.Collections, collection)
|
|
}
|
|
}
|
|
|
|
result.TotalFiles = len(result.Files)
|
|
result.TotalCollections = len(result.Collections)
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// RebuildIndex rebuilds the entire search index from scratch.
|
|
//
|
|
// This method:
|
|
// 1. Closes the existing index (if any)
|
|
// 2. Deletes the index directory completely
|
|
// 3. Creates a fresh new index
|
|
// 4. Batch-indexes all provided files and collections
|
|
//
|
|
// This approach ensures no stale or duplicate documents remain in the index.
|
|
// The userEmail is required to locate the user-specific index directory.
|
|
func (s *searchService) RebuildIndex(userEmail string, files []*FileDocument, collections []*CollectionDocument) error {
|
|
s.logger.Info("Rebuilding search index from scratch",
|
|
zap.Int("files", len(files)),
|
|
zap.Int("collections", len(collections)))
|
|
|
|
if userEmail == "" {
|
|
return fmt.Errorf("user email is required for rebuild")
|
|
}
|
|
|
|
// Close the current index
|
|
if s.index != nil {
|
|
s.logger.Info("Closing current index before rebuild")
|
|
if err := s.index.Close(); err != nil {
|
|
s.logger.Warn("Error closing index before rebuild", zap.Error(err))
|
|
}
|
|
s.index = nil
|
|
}
|
|
|
|
// Get the index path from config
|
|
ctx := context.Background()
|
|
indexPath, err := s.configService.GetUserSearchIndexDir(ctx, userEmail)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get search index path: %w", err)
|
|
}
|
|
|
|
// Delete the existing index directory
|
|
s.logger.Info("Deleting existing index", zap.String("path", indexPath))
|
|
// We don't check for error here because the directory might not exist
|
|
// and that's okay - we're about to create it
|
|
os.RemoveAll(indexPath)
|
|
|
|
// Create a fresh index
|
|
s.logger.Info("Creating fresh index", zap.String("path", indexPath))
|
|
indexMapping := buildIndexMapping()
|
|
index, err := bleve.New(indexPath, indexMapping)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create fresh index: %w", err)
|
|
}
|
|
|
|
s.index = index
|
|
|
|
// Now index all files and collections in a batch
|
|
batch := s.index.NewBatch()
|
|
|
|
// Index all files
|
|
for _, file := range files {
|
|
file.Type = "file"
|
|
if err := batch.Index(file.ID, file); err != nil {
|
|
s.logger.Error("Failed to batch index file", zap.String("id", file.ID), zap.Error(err))
|
|
}
|
|
}
|
|
|
|
// Index all collections
|
|
for _, collection := range collections {
|
|
collection.Type = "collection"
|
|
if err := batch.Index(collection.ID, collection); err != nil {
|
|
s.logger.Error("Failed to batch index collection", zap.String("id", collection.ID), zap.Error(err))
|
|
}
|
|
}
|
|
|
|
// Execute batch
|
|
if err := s.index.Batch(batch); err != nil {
|
|
return fmt.Errorf("failed to execute batch index: %w", err)
|
|
}
|
|
|
|
finalCount, _ := s.index.DocCount()
|
|
s.logger.Info("Search index rebuilt successfully",
|
|
zap.Uint64("documents", finalCount),
|
|
zap.Int("files_indexed", len(files)),
|
|
zap.Int("collections_indexed", len(collections)))
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetIndexSize returns the size of the search index in bytes
|
|
func (s *searchService) GetIndexSize() (int64, error) {
|
|
if s.index == nil {
|
|
return 0, fmt.Errorf("search index not initialized")
|
|
}
|
|
|
|
// Note: Bleve doesn't provide a direct way to get index size
|
|
// We return the document count as a proxy for size
|
|
// For actual disk usage, you would need to walk the index directory
|
|
count, err := s.index.DocCount()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return int64(count), nil
|
|
}
|
|
|
|
// GetDocumentCount returns the number of documents in the index
|
|
func (s *searchService) GetDocumentCount() (uint64, error) {
|
|
if s.index == nil {
|
|
return 0, fmt.Errorf("search index not initialized")
|
|
}
|
|
|
|
count, err := s.index.DocCount()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
return count, nil
|
|
}
|
|
|
|
// buildIndexMapping creates the Bleve index mapping for files and collections.
|
|
//
|
|
// Field types:
|
|
// - Text fields (filename, description, name, tags): Analyzed with "standard" analyzer
|
|
// for good tokenization without stemming (better for substring matching)
|
|
// - Keyword fields (collection_id, type): Exact match only, no analysis
|
|
// - Numeric fields (size, file_count): Stored as numbers for range queries
|
|
// - Date fields (created_at): Stored as datetime for date-based queries
|
|
func buildIndexMapping() mapping.IndexMapping {
|
|
indexMapping := bleve.NewIndexMapping()
|
|
|
|
// Use standard analyzer (not English) for better substring matching.
|
|
// The English analyzer applies stemming which can interfere with partial matches.
|
|
textFieldMapping := bleve.NewTextFieldMapping()
|
|
textFieldMapping.Analyzer = "standard"
|
|
|
|
// Create keyword field mapping (no analysis)
|
|
keywordFieldMapping := bleve.NewKeywordFieldMapping()
|
|
|
|
// Create numeric field mapping
|
|
numericFieldMapping := bleve.NewNumericFieldMapping()
|
|
|
|
// Create datetime field mapping
|
|
dateFieldMapping := bleve.NewDateTimeFieldMapping()
|
|
|
|
// File document mapping
|
|
fileMapping := bleve.NewDocumentMapping()
|
|
fileMapping.AddFieldMappingsAt("filename", textFieldMapping)
|
|
fileMapping.AddFieldMappingsAt("description", textFieldMapping)
|
|
fileMapping.AddFieldMappingsAt("collection_name", textFieldMapping)
|
|
fileMapping.AddFieldMappingsAt("tags", textFieldMapping)
|
|
fileMapping.AddFieldMappingsAt("collection_id", keywordFieldMapping)
|
|
fileMapping.AddFieldMappingsAt("size", numericFieldMapping)
|
|
fileMapping.AddFieldMappingsAt("created_at", dateFieldMapping)
|
|
fileMapping.AddFieldMappingsAt("type", keywordFieldMapping)
|
|
|
|
// Collection document mapping
|
|
collectionMapping := bleve.NewDocumentMapping()
|
|
collectionMapping.AddFieldMappingsAt("name", textFieldMapping)
|
|
collectionMapping.AddFieldMappingsAt("description", textFieldMapping)
|
|
collectionMapping.AddFieldMappingsAt("tags", textFieldMapping)
|
|
collectionMapping.AddFieldMappingsAt("file_count", numericFieldMapping)
|
|
collectionMapping.AddFieldMappingsAt("created_at", dateFieldMapping)
|
|
collectionMapping.AddFieldMappingsAt("type", keywordFieldMapping)
|
|
|
|
indexMapping.AddDocumentMapping("file", fileMapping)
|
|
indexMapping.AddDocumentMapping("collection", collectionMapping)
|
|
|
|
return indexMapping
|
|
}
|
|
|
|
// Helper functions to extract fields from search results
|
|
|
|
func getStringField(fields map[string]interface{}, key string) string {
|
|
if val, ok := fields[key].(string); ok {
|
|
return val
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func getStringArrayField(fields map[string]interface{}, key string) []string {
|
|
if val, ok := fields[key].([]interface{}); ok {
|
|
result := make([]string, 0, len(val))
|
|
for _, v := range val {
|
|
if str, ok := v.(string); ok {
|
|
result = append(result, str)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
return []string{}
|
|
}
|
|
|
|
func getIntField(fields map[string]interface{}, key string) int {
|
|
if val, ok := fields[key].(float64); ok {
|
|
return int(val)
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func getInt64Field(fields map[string]interface{}, key string) int64 {
|
|
if val, ok := fields[key].(float64); ok {
|
|
return int64(val)
|
|
}
|
|
return 0
|
|
}
|