Initial commit: Open sourcing all of the Maple Open Technologies code.

This commit is contained in:
Bartlomiej Mika 2025-12-02 14:33:08 -05:00
commit 755d54a99d
2010 changed files with 448675 additions and 0 deletions

View file

@ -0,0 +1,13 @@
// monorepo/cloud/backend/internal/maplefile/domain/file/constants.go
package file
const (
// FileStatePending is the initial state of a file before it is uploaded.
FileStatePending = "pending"
// FileStateActive indicates that the file is fully uploaded and ready for use.
FileStateActive = "active"
// FileStateDeleted marks the file as deleted, but still accessible for a period but will eventually be permanently removed.
FileStateDeleted = "deleted"
// FileStateArchived indicates that the file is no longer accessible.
FileStateArchived = "archived"
)

View file

@ -0,0 +1,95 @@
// monorepo/cloud/backend/internal/maplefile/domain/file/interface.go
package file
import (
"context"
"time"
"github.com/gocql/gocql"
)
// FileMetadataRepository defines the interface for interacting with file metadata storage.
// It handles operations related to storing, retrieving, updating, and deleting file information (metadata).
type FileMetadataRepository interface {
// Create saves a single File metadata record to the storage.
Create(file *File) error
// CreateMany saves multiple File metadata records to the storage.
CreateMany(files []*File) error
// Get retrieves a single File metadata record (regardless of its state) by its unique identifier (ID) .
Get(id gocql.UUID) (*File, error)
// GetByIDs retrieves multiple File metadata records by their unique identifiers (IDs).
GetByIDs(ids []gocql.UUID) ([]*File, error)
// GetByCollection retrieves all File metadata records associated with a specific collection ID.
GetByCollection(collectionID gocql.UUID) ([]*File, error)
// Update modifies an existing File metadata record in the storage.
Update(file *File) error
// SoftDelete removes a single File metadata record by its unique identifier (ID) by setting its state to deleted.
SoftDelete(id gocql.UUID) error
// HardDelete permanently removes a file metadata record
HardDelete(id gocql.UUID) error
// SoftDeleteMany removes multiple File metadata records by their unique identifiers (IDs) by setting its state to deleted.
SoftDeleteMany(ids []gocql.UUID) error
// HardDeleteMany permanently removes multiple file metadata records
HardDeleteMany(ids []gocql.UUID) error
// CheckIfExistsByID verifies if a File metadata record with the given ID exists in the storage.
CheckIfExistsByID(id gocql.UUID) (bool, error)
// CheckIfUserHasAccess determines if a specific user (userID) has access permissions for a given file (fileID).
CheckIfUserHasAccess(fileID gocql.UUID, userID gocql.UUID) (bool, error)
GetByCreatedByUserID(createdByUserID gocql.UUID) ([]*File, error)
GetByOwnerID(ownerID gocql.UUID) ([]*File, error)
// State management operations
Archive(id gocql.UUID) error
Restore(id gocql.UUID) error
RestoreMany(ids []gocql.UUID) error
// ListSyncData retrieves file sync data with pagination for the specified user and accessible collections
ListSyncData(ctx context.Context, userID gocql.UUID, cursor *FileSyncCursor, limit int64, accessibleCollectionIDs []gocql.UUID) (*FileSyncResponse, error)
// ListRecentFiles retrieves recent files with pagination for the specified user and accessible collections
ListRecentFiles(ctx context.Context, userID gocql.UUID, cursor *RecentFilesCursor, limit int64, accessibleCollectionIDs []gocql.UUID) (*RecentFilesResponse, error)
// CountFilesByUser counts all active files accessible to the user
CountFilesByUser(ctx context.Context, userID gocql.UUID, accessibleCollectionIDs []gocql.UUID) (int, error)
// CountFilesByCollection counts active files in a specific collection
CountFilesByCollection(ctx context.Context, collectionID gocql.UUID) (int, error)
// Storage size calculation methods
GetTotalStorageSizeByOwner(ctx context.Context, ownerID gocql.UUID) (int64, error)
GetTotalStorageSizeByUser(ctx context.Context, userID gocql.UUID, accessibleCollectionIDs []gocql.UUID) (int64, error)
GetTotalStorageSizeByCollection(ctx context.Context, collectionID gocql.UUID) (int64, error)
// IP Anonymization for GDPR compliance
AnonymizeOldIPs(ctx context.Context, cutoffDate time.Time) (int, error)
AnonymizeFileIPsByOwner(ctx context.Context, ownerID gocql.UUID) (int, error) // For GDPR right-to-be-forgotten
// Tag-related operations
// ListByTagID retrieves all files that have the specified tag assigned
// Used for tag update propagation (updating embedded tag data across all files)
ListByTagID(ctx context.Context, tagID gocql.UUID) ([]*File, error)
}
// FileObjectStorageRepository defines the interface for interacting with the actual encrypted file data storage.
// It handles operations related to storing, retrieving, deleting, and generating access URLs for encrypted data.
type FileObjectStorageRepository interface {
// StoreEncryptedData saves encrypted file data to the storage system. It takes the owner's ID,
// the file's ID (metadata ID), and the encrypted byte slice. It returns the storage path
// where the data was saved, or an error.
StoreEncryptedData(ownerID string, fileID string, encryptedData []byte) (string, error)
// GetEncryptedData retrieves encrypted file data from the storage system using its storage path.
// It returns the encrypted data as a byte slice, or an error.
GetEncryptedData(storagePath string) ([]byte, error)
// DeleteEncryptedData removes encrypted file data from the storage system using its storage path.
DeleteEncryptedData(storagePath string) error
// GeneratePresignedDownloadURL creates a temporary, time-limited URL that allows direct download
// of the file data located at the given storage path, with proper content disposition headers.
GeneratePresignedDownloadURL(storagePath string, duration time.Duration) (string, error)
// GeneratePresignedUploadURL creates a temporary, time-limited URL that allows clients to upload
// encrypted file data directly to the storage system at the specified storage path.
GeneratePresignedUploadURL(storagePath string, duration time.Duration) (string, error)
// VerifyObjectExists checks if an object exists at the given storage path.
VerifyObjectExists(storagePath string) (bool, error)
// GetObjectSize returns the size in bytes of the object at the given storage path.
GetObjectSize(storagePath string) (int64, error)
}

View file

@ -0,0 +1,136 @@
// monorepo/cloud/backend/internal/maplefile/domain/file/model.go
package file
import (
"time"
"github.com/gocql/gocql"
"codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/internal/domain/crypto"
"codeberg.org/mapleopentech/monorepo/cloud/maplefile-backend/internal/domain/tag"
)
// File represents an encrypted file entity stored in the backend database (MongoDB).
// This entity holds metadata and pointers to the actual file content and thumbnail,
// which are stored separately in S3. All sensitive file metadata and the file itself
// are encrypted client-side before being uploaded. The backend stores only encrypted
// data and necessary non-sensitive identifiers or sizes for management.
type File struct {
// Identifiers
// Unique identifier for this specific file entity.
ID gocql.UUID `bson:"_id" json:"id"`
// Identifier of the collection this file belongs to. Used for grouping and key management.
CollectionID gocql.UUID `bson:"collection_id" json:"collection_id"`
// Identifier of the user who owns this file.
OwnerID gocql.UUID `bson:"owner_id" json:"owner_id"`
// Encryption and Content Details
// Client-side encrypted JSON blob containing file-specific metadata like the original file name,
// MIME type, size of the *unencrypted* data, etc. Encrypted by the client using the file key.
EncryptedMetadata string `bson:"encrypted_metadata" json:"encrypted_metadata"`
// The file-specific data encryption key (DEK) used to encrypt the file content and metadata.
// This key is encrypted by the client using the collection's key (a KEK). The backend
// stores this encrypted key; only a user with access to the KEK can decrypt it.
EncryptedFileKey crypto.EncryptedFileKey `bson:"encrypted_file_key" json:"encrypted_file_key"`
// Version identifier for the encryption scheme or client application version used to
// encrypt this file. Useful for migration or compatibility checks.
EncryptionVersion string `bson:"encryption_version" json:"encryption_version"`
// Cryptographic hash of the *encrypted* file content stored in S3. Used for integrity
// verification upon download *before* decryption.
EncryptedHash string `bson:"encrypted_hash" json:"encrypted_hash"`
// File Storage Object Details
// The unique key or path within the S3 bucket where the main encrypted file content is stored.
// This is an internal backend detail and is not exposed to the client API.
EncryptedFileObjectKey string `bson:"encrypted_file_object_key" json:"-"`
// The size of the *encrypted* file content stored in S3, in bytes. This size is not sensitive
// and is used by the backend for storage accounting, billing, and transfer management.
EncryptedFileSizeInBytes int64 `bson:"encrypted_file_size_in_bytes" json:"encrypted_file_size_in_bytes"`
// Thumbnail Storage Object Details (Optional)
// The unique key or path within the S3 bucket where the encrypted thumbnail image (if generated
// and uploaded) is stored. Internal backend detail, not exposed to the client API.
EncryptedThumbnailObjectKey string `bson:"encrypted_thumbnail_object_key" json:"-"`
// The size of the *encrypted* thumbnail image stored in S3, in bytes. Used for accounting.
// Value will be 0 if no thumbnail exists.
EncryptedThumbnailSizeInBytes int64 `bson:"encrypted_thumbnail_size_in_bytes" json:"encrypted_thumbnail_size_in_bytes"`
// DEPRECATED: Replaced by Tags field below
// TagIDs []gocql.UUID `bson:"tag_ids,omitempty" json:"tag_ids,omitempty"`
// Tags stores full embedded tag data (eliminates frontend API lookups)
// Stored as JSON text in database, marshaled/unmarshaled automatically
Tags []tag.EmbeddedTag `bson:"tags,omitempty" json:"tags,omitempty"`
// Timestamps and conflict resolution
// Timestamp when this file entity was created/uploaded.
CreatedAt time.Time `bson:"created_at" json:"created_at"`
// CreatedByUserID is the ID of the user who created this file.
CreatedByUserID gocql.UUID `bson:"created_by_user_id" json:"created_by_user_id"`
// Timestamp when this file entity's metadata or content was last modified.
ModifiedAt time.Time `bson:"modified_at" json:"modified_at"`
// ModifiedByUserID is the ID of the user whom has last modified this file.
ModifiedByUserID gocql.UUID `bson:"modified_by_user_id" json:"modified_by_user_id"`
// The current version of the file.
Version uint64 `bson:"version" json:"version"` // Every mutation (create, update, delete) is a versioned operation, keep track of the version number with this variable
// State management.
State string `bson:"state" json:"state"` // pending, active, deleted, archived
TombstoneVersion uint64 `bson:"tombstone_version" json:"tombstone_version"` // The `version` number that this collection was deleted at.
TombstoneExpiry time.Time `bson:"tombstone_expiry" json:"tombstone_expiry"`
}
// FileSyncCursor represents cursor-based pagination for sync operations
type FileSyncCursor struct {
LastModified time.Time `json:"last_modified" bson:"last_modified"`
LastID gocql.UUID `json:"last_id" bson:"last_id"`
}
// FileSyncItem represents minimal file data for sync operations
type FileSyncItem struct {
ID gocql.UUID `json:"id" bson:"_id"`
CollectionID gocql.UUID `json:"collection_id" bson:"collection_id"`
Version uint64 `json:"version" bson:"version"`
ModifiedAt time.Time `json:"modified_at" bson:"modified_at"`
State string `json:"state" bson:"state"`
TombstoneVersion uint64 `bson:"tombstone_version" json:"tombstone_version"`
TombstoneExpiry time.Time `bson:"tombstone_expiry" json:"tombstone_expiry"`
EncryptedFileSizeInBytes int64 `bson:"encrypted_file_size_in_bytes" json:"encrypted_file_size_in_bytes"`
}
// FileSyncResponse represents the response for file sync data
type FileSyncResponse struct {
Files []FileSyncItem `json:"files"`
NextCursor *FileSyncCursor `json:"next_cursor,omitempty"`
HasMore bool `json:"has_more"`
}
// RecentFilesCursor represents cursor-based pagination for recent files
type RecentFilesCursor struct {
LastModified time.Time `json:"last_modified" bson:"last_modified"`
LastID gocql.UUID `json:"last_id" bson:"last_id"`
}
// RecentFilesItem represents a file item for recent files listing
type RecentFilesItem struct {
ID gocql.UUID `json:"id" bson:"_id"`
CollectionID gocql.UUID `json:"collection_id" bson:"collection_id"`
OwnerID gocql.UUID `json:"owner_id" bson:"owner_id"`
EncryptedMetadata string `json:"encrypted_metadata" bson:"encrypted_metadata"`
EncryptedFileKey string `json:"encrypted_file_key" bson:"encrypted_file_key"`
EncryptionVersion string `json:"encryption_version" bson:"encryption_version"`
EncryptedHash string `json:"encrypted_hash" bson:"encrypted_hash"`
EncryptedFileSizeInBytes int64 `json:"encrypted_file_size_in_bytes" bson:"encrypted_file_size_in_bytes"`
EncryptedThumbnailSizeInBytes int64 `json:"encrypted_thumbnail_size_in_bytes" bson:"encrypted_thumbnail_size_in_bytes"`
Tags []tag.EmbeddedTag `json:"tags,omitempty" bson:"tags,omitempty"`
CreatedAt time.Time `json:"created_at" bson:"created_at"`
ModifiedAt time.Time `json:"modified_at" bson:"modified_at"`
Version uint64 `json:"version" bson:"version"`
State string `json:"state" bson:"state"`
}
// RecentFilesResponse represents the response for recent files listing
type RecentFilesResponse struct {
Files []RecentFilesItem `json:"files"`
NextCursor *RecentFilesCursor `json:"next_cursor,omitempty"`
HasMore bool `json:"has_more"`
}

View file

@ -0,0 +1,45 @@
// monorepo/cloud/backend/internal/maplefile/domain/file/state_validator.go
package file
import "errors"
// StateTransition validates file state transitions
type StateTransition struct {
From string
To string
}
// IsValidStateTransition checks if a file state transition is allowed
func IsValidStateTransition(from, to string) error {
validTransitions := map[StateTransition]bool{
// From pending
{FileStatePending, FileStateActive}: true,
{FileStatePending, FileStateDeleted}: true,
{FileStatePending, FileStateArchived}: false,
// From active
{FileStateActive, FileStatePending}: false,
{FileStateActive, FileStateDeleted}: true,
{FileStateActive, FileStateArchived}: true,
// From deleted (cannot be restored nor archived)
{FileStateDeleted, FileStatePending}: false,
{FileStateDeleted, FileStateActive}: false,
{FileStateDeleted, FileStateArchived}: false,
// From archived (can only be restored to active)
{FileStateArchived, FileStateActive}: true,
// Same state transitions (no-op)
{FileStatePending, FileStatePending}: true,
{FileStateActive, FileStateActive}: true,
{FileStateDeleted, FileStateDeleted}: true,
{FileStateArchived, FileStateArchived}: true,
}
if !validTransitions[StateTransition{from, to}] {
return errors.New("invalid state transition from " + from + " to " + to)
}
return nil
}