Initial commit: Open sourcing all of the Maple Open Technologies code.
This commit is contained in:
commit
755d54a99d
2010 changed files with 448675 additions and 0 deletions
512
native/desktop/maplefile/internal/service/search/search.go
Normal file
512
native/desktop/maplefile/internal/service/search/search.go
Normal file
|
|
@ -0,0 +1,512 @@
|
|||
// Package search provides full-text search functionality using Bleve.
|
||||
//
|
||||
// This package implements a local full-text search index for files and collections
|
||||
// using the Bleve search library (https://blevesearch.com/). The search index is
|
||||
// stored per-user in their local application data directory.
|
||||
//
|
||||
// Key features:
|
||||
// - Case-insensitive substring matching (e.g., "mesh" matches "meshtastic")
|
||||
// - Support for Bleve query syntax (+, -, "", *, ?)
|
||||
// - Deduplication of search results by document ID
|
||||
// - Batch indexing for efficient rebuilds
|
||||
// - User-isolated indexes (each user has their own search index)
|
||||
//
|
||||
// Location: monorepo/native/desktop/maplefile/internal/service/search/search.go
|
||||
package search
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"codeberg.org/mapleopentech/monorepo/native/desktop/maplefile/internal/config"
|
||||
)
|
||||
|
||||
// SearchService provides full-text search capabilities
|
||||
type SearchService interface {
|
||||
// Initialize opens or creates the search index for the specified user email
|
||||
Initialize(ctx context.Context, userEmail string) error
|
||||
|
||||
// Close closes the search index
|
||||
Close() error
|
||||
|
||||
// IndexFile adds or updates a file in the search index
|
||||
IndexFile(file *FileDocument) error
|
||||
|
||||
// IndexCollection adds or updates a collection in the search index
|
||||
IndexCollection(collection *CollectionDocument) error
|
||||
|
||||
// DeleteFile removes a file from the search index
|
||||
DeleteFile(fileID string) error
|
||||
|
||||
// DeleteCollection removes a collection from the search index
|
||||
DeleteCollection(collectionID string) error
|
||||
|
||||
// Search performs a full-text search
|
||||
Search(query string, limit int) (*SearchResult, error)
|
||||
|
||||
// RebuildIndex rebuilds the entire search index from scratch
|
||||
RebuildIndex(userEmail string, files []*FileDocument, collections []*CollectionDocument) error
|
||||
|
||||
// GetIndexSize returns the size of the search index in bytes
|
||||
GetIndexSize() (int64, error)
|
||||
|
||||
// GetDocumentCount returns the number of documents in the index
|
||||
GetDocumentCount() (uint64, error)
|
||||
}
|
||||
|
||||
// FileDocument represents a file document in the search index
|
||||
type FileDocument struct {
|
||||
ID string `json:"id"`
|
||||
Filename string `json:"filename"`
|
||||
Description string `json:"description"`
|
||||
CollectionID string `json:"collection_id"`
|
||||
CollectionName string `json:"collection_name"` // Denormalized for search
|
||||
Tags []string `json:"tags"`
|
||||
Size int64 `json:"size"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Type string `json:"type"` // "file"
|
||||
}
|
||||
|
||||
// CollectionDocument represents a collection document in the search index
|
||||
type CollectionDocument struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Tags []string `json:"tags"`
|
||||
FileCount int `json:"file_count"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Type string `json:"type"` // "collection"
|
||||
}
|
||||
|
||||
// SearchResult contains the search results
|
||||
type SearchResult struct {
|
||||
Files []*FileDocument `json:"files"`
|
||||
Collections []*CollectionDocument `json:"collections"`
|
||||
TotalFiles int `json:"total_files"`
|
||||
TotalCollections int `json:"total_collections"`
|
||||
TotalHits uint64 `json:"total_hits"`
|
||||
MaxScore float64 `json:"max_score"`
|
||||
Took time.Duration `json:"took"`
|
||||
Query string `json:"query"`
|
||||
}
|
||||
|
||||
// searchService implements SearchService
|
||||
type searchService struct {
|
||||
index bleve.Index
|
||||
configService config.ConfigService
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
// New creates a new search service
|
||||
func New(configService config.ConfigService, logger *zap.Logger) SearchService {
|
||||
return &searchService{
|
||||
configService: configService,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize opens or creates the search index for the specified user
|
||||
func (s *searchService) Initialize(ctx context.Context, userEmail string) error {
|
||||
if userEmail == "" {
|
||||
return fmt.Errorf("user email is required")
|
||||
}
|
||||
|
||||
// Get search index path
|
||||
indexPath, err := s.configService.GetUserSearchIndexDir(ctx, userEmail)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get search index path: %w", err)
|
||||
}
|
||||
|
||||
if indexPath == "" {
|
||||
return fmt.Errorf("search index path is empty")
|
||||
}
|
||||
|
||||
s.logger.Info("Initializing search index", zap.String("path", indexPath))
|
||||
|
||||
// Try to open existing index
|
||||
index, err := bleve.Open(indexPath)
|
||||
if err == bleve.ErrorIndexPathDoesNotExist {
|
||||
// Create new index
|
||||
s.logger.Info("Creating new search index")
|
||||
indexMapping := buildIndexMapping()
|
||||
index, err = bleve.New(indexPath, indexMapping)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create search index: %w", err)
|
||||
}
|
||||
} else if err != nil {
|
||||
return fmt.Errorf("failed to open search index: %w", err)
|
||||
}
|
||||
|
||||
s.index = index
|
||||
s.logger.Info("Search index initialized successfully")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes the search index
|
||||
func (s *searchService) Close() error {
|
||||
if s.index != nil {
|
||||
err := s.index.Close()
|
||||
s.index = nil
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IndexFile adds or updates a file in the search index
|
||||
func (s *searchService) IndexFile(file *FileDocument) error {
|
||||
if s.index == nil {
|
||||
return fmt.Errorf("search index not initialized")
|
||||
}
|
||||
|
||||
file.Type = "file"
|
||||
return s.index.Index(file.ID, file)
|
||||
}
|
||||
|
||||
// IndexCollection adds or updates a collection in the search index
|
||||
func (s *searchService) IndexCollection(collection *CollectionDocument) error {
|
||||
if s.index == nil {
|
||||
return fmt.Errorf("search index not initialized")
|
||||
}
|
||||
|
||||
collection.Type = "collection"
|
||||
return s.index.Index(collection.ID, collection)
|
||||
}
|
||||
|
||||
// DeleteFile removes a file from the search index
|
||||
func (s *searchService) DeleteFile(fileID string) error {
|
||||
if s.index == nil {
|
||||
return fmt.Errorf("search index not initialized")
|
||||
}
|
||||
|
||||
return s.index.Delete(fileID)
|
||||
}
|
||||
|
||||
// DeleteCollection removes a collection from the search index
|
||||
func (s *searchService) DeleteCollection(collectionID string) error {
|
||||
if s.index == nil {
|
||||
return fmt.Errorf("search index not initialized")
|
||||
}
|
||||
|
||||
return s.index.Delete(collectionID)
|
||||
}
|
||||
|
||||
// Search performs a full-text search across files and collections.
|
||||
//
|
||||
// The search supports:
|
||||
// - Simple queries: automatically wrapped with wildcards for substring matching
|
||||
// - Advanced queries: use Bleve query syntax directly (+, -, "", *, ?)
|
||||
//
|
||||
// Examples:
|
||||
// - "mesh" → matches "meshtastic", "mesh_config", etc.
|
||||
// - "\"exact phrase\"" → matches exact phrase only
|
||||
// - "+required -excluded" → must contain "required", must not contain "excluded"
|
||||
func (s *searchService) Search(query string, limit int) (*SearchResult, error) {
|
||||
if s.index == nil {
|
||||
return nil, fmt.Errorf("search index not initialized")
|
||||
}
|
||||
|
||||
if limit <= 0 || limit > 100 {
|
||||
limit = 50
|
||||
}
|
||||
|
||||
// Convert to lowercase for case-insensitive search
|
||||
searchQueryStr := strings.ToLower(query)
|
||||
|
||||
// For simple queries (no operators), wrap with wildcards to enable substring matching.
|
||||
// This allows "mesh" to match "meshtastic_antenna.png".
|
||||
// If the user provides operators or wildcards, use their query as-is.
|
||||
if !strings.Contains(searchQueryStr, "*") && !strings.Contains(searchQueryStr, "?") &&
|
||||
!strings.Contains(searchQueryStr, "+") && !strings.Contains(searchQueryStr, "-") &&
|
||||
!strings.Contains(searchQueryStr, "\"") {
|
||||
searchQueryStr = "*" + searchQueryStr + "*"
|
||||
}
|
||||
|
||||
searchQuery := bleve.NewQueryStringQuery(searchQueryStr)
|
||||
searchRequest := bleve.NewSearchRequest(searchQuery)
|
||||
searchRequest.Size = limit
|
||||
searchRequest.Fields = []string{"*"}
|
||||
searchRequest.Highlight = bleve.NewHighlight()
|
||||
|
||||
// Execute search
|
||||
searchResults, err := s.index.Search(searchRequest)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("search failed: %w", err)
|
||||
}
|
||||
|
||||
// Parse results with deduplication
|
||||
result := &SearchResult{
|
||||
Files: make([]*FileDocument, 0),
|
||||
Collections: make([]*CollectionDocument, 0),
|
||||
TotalHits: searchResults.Total,
|
||||
MaxScore: searchResults.MaxScore,
|
||||
Took: searchResults.Took,
|
||||
Query: query,
|
||||
}
|
||||
|
||||
// Use maps to deduplicate by ID
|
||||
seenFileIDs := make(map[string]bool)
|
||||
seenCollectionIDs := make(map[string]bool)
|
||||
|
||||
for _, hit := range searchResults.Hits {
|
||||
docType, ok := hit.Fields["type"].(string)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if docType == "file" {
|
||||
// Skip if we've already seen this file ID
|
||||
if seenFileIDs[hit.ID] {
|
||||
s.logger.Warn("Duplicate file in search results", zap.String("id", hit.ID))
|
||||
continue
|
||||
}
|
||||
seenFileIDs[hit.ID] = true
|
||||
|
||||
file := &FileDocument{
|
||||
ID: hit.ID,
|
||||
Filename: getStringField(hit.Fields, "filename"),
|
||||
Description: getStringField(hit.Fields, "description"),
|
||||
CollectionID: getStringField(hit.Fields, "collection_id"),
|
||||
CollectionName: getStringField(hit.Fields, "collection_name"),
|
||||
Tags: getStringArrayField(hit.Fields, "tags"),
|
||||
Size: getInt64Field(hit.Fields, "size"),
|
||||
}
|
||||
if createdAt, ok := hit.Fields["created_at"].(string); ok {
|
||||
file.CreatedAt, _ = time.Parse(time.RFC3339, createdAt)
|
||||
}
|
||||
result.Files = append(result.Files, file)
|
||||
} else if docType == "collection" {
|
||||
// Skip if we've already seen this collection ID
|
||||
if seenCollectionIDs[hit.ID] {
|
||||
s.logger.Warn("Duplicate collection in search results", zap.String("id", hit.ID))
|
||||
continue
|
||||
}
|
||||
seenCollectionIDs[hit.ID] = true
|
||||
|
||||
collection := &CollectionDocument{
|
||||
ID: hit.ID,
|
||||
Name: getStringField(hit.Fields, "name"),
|
||||
Description: getStringField(hit.Fields, "description"),
|
||||
Tags: getStringArrayField(hit.Fields, "tags"),
|
||||
FileCount: getIntField(hit.Fields, "file_count"),
|
||||
}
|
||||
if createdAt, ok := hit.Fields["created_at"].(string); ok {
|
||||
collection.CreatedAt, _ = time.Parse(time.RFC3339, createdAt)
|
||||
}
|
||||
result.Collections = append(result.Collections, collection)
|
||||
}
|
||||
}
|
||||
|
||||
result.TotalFiles = len(result.Files)
|
||||
result.TotalCollections = len(result.Collections)
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// RebuildIndex rebuilds the entire search index from scratch.
|
||||
//
|
||||
// This method:
|
||||
// 1. Closes the existing index (if any)
|
||||
// 2. Deletes the index directory completely
|
||||
// 3. Creates a fresh new index
|
||||
// 4. Batch-indexes all provided files and collections
|
||||
//
|
||||
// This approach ensures no stale or duplicate documents remain in the index.
|
||||
// The userEmail is required to locate the user-specific index directory.
|
||||
func (s *searchService) RebuildIndex(userEmail string, files []*FileDocument, collections []*CollectionDocument) error {
|
||||
s.logger.Info("Rebuilding search index from scratch",
|
||||
zap.Int("files", len(files)),
|
||||
zap.Int("collections", len(collections)))
|
||||
|
||||
if userEmail == "" {
|
||||
return fmt.Errorf("user email is required for rebuild")
|
||||
}
|
||||
|
||||
// Close the current index
|
||||
if s.index != nil {
|
||||
s.logger.Info("Closing current index before rebuild")
|
||||
if err := s.index.Close(); err != nil {
|
||||
s.logger.Warn("Error closing index before rebuild", zap.Error(err))
|
||||
}
|
||||
s.index = nil
|
||||
}
|
||||
|
||||
// Get the index path from config
|
||||
ctx := context.Background()
|
||||
indexPath, err := s.configService.GetUserSearchIndexDir(ctx, userEmail)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get search index path: %w", err)
|
||||
}
|
||||
|
||||
// Delete the existing index directory
|
||||
s.logger.Info("Deleting existing index", zap.String("path", indexPath))
|
||||
// We don't check for error here because the directory might not exist
|
||||
// and that's okay - we're about to create it
|
||||
os.RemoveAll(indexPath)
|
||||
|
||||
// Create a fresh index
|
||||
s.logger.Info("Creating fresh index", zap.String("path", indexPath))
|
||||
indexMapping := buildIndexMapping()
|
||||
index, err := bleve.New(indexPath, indexMapping)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create fresh index: %w", err)
|
||||
}
|
||||
|
||||
s.index = index
|
||||
|
||||
// Now index all files and collections in a batch
|
||||
batch := s.index.NewBatch()
|
||||
|
||||
// Index all files
|
||||
for _, file := range files {
|
||||
file.Type = "file"
|
||||
if err := batch.Index(file.ID, file); err != nil {
|
||||
s.logger.Error("Failed to batch index file", zap.String("id", file.ID), zap.Error(err))
|
||||
}
|
||||
}
|
||||
|
||||
// Index all collections
|
||||
for _, collection := range collections {
|
||||
collection.Type = "collection"
|
||||
if err := batch.Index(collection.ID, collection); err != nil {
|
||||
s.logger.Error("Failed to batch index collection", zap.String("id", collection.ID), zap.Error(err))
|
||||
}
|
||||
}
|
||||
|
||||
// Execute batch
|
||||
if err := s.index.Batch(batch); err != nil {
|
||||
return fmt.Errorf("failed to execute batch index: %w", err)
|
||||
}
|
||||
|
||||
finalCount, _ := s.index.DocCount()
|
||||
s.logger.Info("Search index rebuilt successfully",
|
||||
zap.Uint64("documents", finalCount),
|
||||
zap.Int("files_indexed", len(files)),
|
||||
zap.Int("collections_indexed", len(collections)))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetIndexSize returns the size of the search index in bytes
|
||||
func (s *searchService) GetIndexSize() (int64, error) {
|
||||
if s.index == nil {
|
||||
return 0, fmt.Errorf("search index not initialized")
|
||||
}
|
||||
|
||||
// Note: Bleve doesn't provide a direct way to get index size
|
||||
// We return the document count as a proxy for size
|
||||
// For actual disk usage, you would need to walk the index directory
|
||||
count, err := s.index.DocCount()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return int64(count), nil
|
||||
}
|
||||
|
||||
// GetDocumentCount returns the number of documents in the index
|
||||
func (s *searchService) GetDocumentCount() (uint64, error) {
|
||||
if s.index == nil {
|
||||
return 0, fmt.Errorf("search index not initialized")
|
||||
}
|
||||
|
||||
count, err := s.index.DocCount()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
// buildIndexMapping creates the Bleve index mapping for files and collections.
|
||||
//
|
||||
// Field types:
|
||||
// - Text fields (filename, description, name, tags): Analyzed with "standard" analyzer
|
||||
// for good tokenization without stemming (better for substring matching)
|
||||
// - Keyword fields (collection_id, type): Exact match only, no analysis
|
||||
// - Numeric fields (size, file_count): Stored as numbers for range queries
|
||||
// - Date fields (created_at): Stored as datetime for date-based queries
|
||||
func buildIndexMapping() mapping.IndexMapping {
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
|
||||
// Use standard analyzer (not English) for better substring matching.
|
||||
// The English analyzer applies stemming which can interfere with partial matches.
|
||||
textFieldMapping := bleve.NewTextFieldMapping()
|
||||
textFieldMapping.Analyzer = "standard"
|
||||
|
||||
// Create keyword field mapping (no analysis)
|
||||
keywordFieldMapping := bleve.NewKeywordFieldMapping()
|
||||
|
||||
// Create numeric field mapping
|
||||
numericFieldMapping := bleve.NewNumericFieldMapping()
|
||||
|
||||
// Create datetime field mapping
|
||||
dateFieldMapping := bleve.NewDateTimeFieldMapping()
|
||||
|
||||
// File document mapping
|
||||
fileMapping := bleve.NewDocumentMapping()
|
||||
fileMapping.AddFieldMappingsAt("filename", textFieldMapping)
|
||||
fileMapping.AddFieldMappingsAt("description", textFieldMapping)
|
||||
fileMapping.AddFieldMappingsAt("collection_name", textFieldMapping)
|
||||
fileMapping.AddFieldMappingsAt("tags", textFieldMapping)
|
||||
fileMapping.AddFieldMappingsAt("collection_id", keywordFieldMapping)
|
||||
fileMapping.AddFieldMappingsAt("size", numericFieldMapping)
|
||||
fileMapping.AddFieldMappingsAt("created_at", dateFieldMapping)
|
||||
fileMapping.AddFieldMappingsAt("type", keywordFieldMapping)
|
||||
|
||||
// Collection document mapping
|
||||
collectionMapping := bleve.NewDocumentMapping()
|
||||
collectionMapping.AddFieldMappingsAt("name", textFieldMapping)
|
||||
collectionMapping.AddFieldMappingsAt("description", textFieldMapping)
|
||||
collectionMapping.AddFieldMappingsAt("tags", textFieldMapping)
|
||||
collectionMapping.AddFieldMappingsAt("file_count", numericFieldMapping)
|
||||
collectionMapping.AddFieldMappingsAt("created_at", dateFieldMapping)
|
||||
collectionMapping.AddFieldMappingsAt("type", keywordFieldMapping)
|
||||
|
||||
indexMapping.AddDocumentMapping("file", fileMapping)
|
||||
indexMapping.AddDocumentMapping("collection", collectionMapping)
|
||||
|
||||
return indexMapping
|
||||
}
|
||||
|
||||
// Helper functions to extract fields from search results
|
||||
|
||||
func getStringField(fields map[string]interface{}, key string) string {
|
||||
if val, ok := fields[key].(string); ok {
|
||||
return val
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func getStringArrayField(fields map[string]interface{}, key string) []string {
|
||||
if val, ok := fields[key].([]interface{}); ok {
|
||||
result := make([]string, 0, len(val))
|
||||
for _, v := range val {
|
||||
if str, ok := v.(string); ok {
|
||||
result = append(result, str)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
return []string{}
|
||||
}
|
||||
|
||||
func getIntField(fields map[string]interface{}, key string) int {
|
||||
if val, ok := fields[key].(float64); ok {
|
||||
return int(val)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func getInt64Field(fields map[string]interface{}, key string) int64 {
|
||||
if val, ok := fields[key].(float64); ok {
|
||||
return int64(val)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue