Skip to content

Migration Guide

This guide helps you upgrade from basic collections to enhanced collections in Harneet. The enhanced collections provide better performance, additional features, and improved APIs while maintaining backward compatibility where possible.

Overview of Changes

What's New in Enhanced Collections

  1. Performance Improvements: Better memory allocation, optimized algorithms
  2. New Data Structures: Set, LinkedList, BinarySearchTree
  3. Enhanced Existing Types: Improved Array and Map with additional methods
  4. Concurrent Collections: Thread-safe versions of all data structures
  5. Functional Programming: Map, filter, reduce operations
  6. Better Memory Management: Pre-allocation, automatic resizing, memory pooling

Backward Compatibility

Most existing code will continue to work without changes:

Backward Compatibility
1
2
3
4
// This code works in both old and new versions
var stack = collections.new_stack()
stack.push(42)
var item = stack.pop()

Migration Steps

Step 1: Update Import Statements

No changes needed - the collections module remains the same:

Import Statement
import collections  // Same as before

Step 2: Replace Basic Collections

Stack Migration

Stack Migration
// Old code (still works)
var stack = collections.new_stack()
stack.push(1)
stack.push(2)
var top = stack.pop()

// Enhanced features now available
stack.reserve(1000)        // NEW: Pre-allocate capacity
var array = stack.to_array()  // NEW: Convert to array
stack.clear()              // NEW: Clear all elements

// NEW: Batch operations
var items = [1, 2, 3, 4, 5]
for item in items {
    stack.push(item)
}

Queue Migration

Queue Migration
// Old code (still works)
var queue = collections.new_queue()
queue.enqueue("first")
queue.enqueue("second")
var item = queue.dequeue()

// Enhanced features
queue.reserve(500)         // NEW: Pre-allocate circular buffer
var size = queue.size()    // Improved performance
var array = queue.to_array()  // NEW: Convert to array

// NEW: Iterator support
var iter = queue.iterator()
for iter.has_next() {
    var item = iter.next()
    fmt.Println(item)
}

Step 3: Migrate to New Data Structures

Replace Array-based Unique Collections with Set

Replace with Set
// Old approach: manual uniqueness with arrays
var unique_items = []

function add_unique_old(item) {
    for existing in unique_items {
        if existing == item {
            return  // Already exists
        }
    }
    unique_items.append(item)
}

function contains_old(item) {
    for existing in unique_items {
        if existing == item {
            return true
        }
    }
    return false
}

// New approach: use Set
var unique_items = collections.new_set()

function add_unique_new(item) {
    unique_items.add(item)  // Automatically handles uniqueness
}

function contains_new(item) {
    return unique_items.contains(item)  // O(1) average case
}

Replace Manual Linked Structures with LinkedList

Replace with LinkedList
// Old approach: manual linked list implementation
struct Node {
    value: any
    next: Node
}

struct ManualList {
    head: Node
    size: int
}

function (ml *ManualList) add_first(value) {
    var new_node = Node{value: value, next: ml.head}
    ml.head = new_node
    ml.size = size + 1
}

// New approach: use LinkedList
var list = collections.new_linked_list()

list.add_first(value)      // Built-in, optimized
list.add_last(value)       // O(1) tail operations
list.add_at(index, value)  // Insert at any position

Replace Sorted Arrays with BinarySearchTree

Replace with BST
// Old approach: maintaining sorted arrays
var sorted_data = []

function insert_sorted_old(value) {
    var index = 0
    for i in range(sorted_data.size()) {
        if sorted_data[i] > value {
            index = i
            break
        }
        index = i + 1
    }
    sorted_data.insert_at(index, value)  // O(n) operation
}

function search_old(value) {
    // Binary search implementation
    var left = 0
    var right = sorted_data.size() - 1

    for left <= right {
        var mid = (left + right) / 2
        if sorted_data[mid] == value {
            return true
        } else if sorted_data[mid] < value {
            left = mid + 1
        } else {
            right = mid - 1
        }
    }
    return false
}

// New approach: use BinarySearchTree
var tree = collections.new_bst()

tree.insert(value)         // O(log n) average case
var found = tree.contains(value)  // O(log n) average case
var sorted = tree.in_order_traversal()  // Get sorted data

Step 4: Enhance Existing Array and Map Usage

Array Enhancements

Array Enhancements
// Old array usage
var array = collections.new_array()
for i in range(1000) {
    array.append(i)  // May cause multiple reallocations
}

// Enhanced array usage
var array = collections.new_array()
array.reserve(1000)        // NEW: Pre-allocate space

for i in range(1000) {
    array.append(i)        // No reallocations needed
}

// NEW: Functional programming methods
var evens = array.filter((n) => n % 2 == 0)
var doubled = array.map((n) => n * 2)
var sum = array.reduce((acc, n) => acc + n, 0)

// NEW: Enhanced operations
var index = array.index_of(500)
array.reverse()
array.sort()

Map Enhancements

Map Enhancements
// Old map usage
var map = collections.new_map()
map.set("key1", "value1")
map.set("key2", "value2")

// Enhanced map usage
var map = collections.new_map()
map.reserve(100)           // NEW: Pre-allocate buckets

map.set("key1", "value1")
map.set("key2", "value2")

// NEW: Enhanced methods
var has_key = map.contains_key("key1")
var has_value = map.contains_value("value1")
var keys = map.keys()      // Get all keys as array
var values = map.values()  // Get all values as array
var entries = map.entries()  // Get key-value pairs

// NEW: Functional operations
var filtered = map.filter((key, value) => key.starts_with("user_"))

Step 5: Add Concurrent Support

Identify Shared Collections

Identify Shared Collections
// Old code: manual synchronization
var shared_data = collections.new_map()
var mutex = sync.new_mutex()

function thread_safe_set(key, value) {
    mutex.lock()
    shared_data.set(key, value)
    mutex.unlock()
}

function thread_safe_get(key) {
    mutex.lock()
    var value = shared_data.get(key)
    mutex.unlock()
    return value
}

// New code: concurrent collections
var shared_data = collections.new_concurrent_map()

function thread_safe_set(key, value) {
    shared_data.set(key, value)  // Automatically thread-safe
}

function thread_safe_get(key) {
    return shared_data.get(key)  // Automatically thread-safe
}

Convert to Concurrent Collections

Concurrent Collections
// Replace regular collections with concurrent versions
var shared_stack = collections.new_concurrent_stack()
var shared_queue = collections.new_concurrent_queue()
var shared_set = collections.new_concurrent_set()
var shared_map = collections.new_concurrent_map()
var shared_list = collections.new_concurrent_linked_list()
var shared_tree = collections.new_concurrent_bst()

// All operations are now thread-safe
do {
    for i in range(1000) {
        shared_stack.push(i)
    }
}

do {
    for i in range(500) {
        if !shared_stack.is_empty() {
            var item = shared_stack.pop()
        }
    }
}

Common Migration Patterns

Pattern 1: Cache Implementation

Cache Migration
// Old cache implementation
struct OldCache {
    data: Map
    max_size: int
    access_order: Array  // Track access for LRU
}

function (oc *OldCache) get(key) {
    var value = oc.data.get(key)
    if value != null {
        // Update access order
        var index = oc.access_order.index_of(key)
        if index != -1 {
            oc.access_order.remove_at(index)
        }
        oc.access_order.append(key)
    }
    return value
}

function (oc *OldCache) set(key, value) {
    // Evict if necessary
    if oc.data.size() >= oc.max_size && !oc.data.contains_key(key) {
        var oldest = oc.access_order.get(0)
        oc.data.remove(oldest)
        oc.access_order.remove_at(0)
    }

    oc.data.set(key, value)
    oc.access_order.append(key)
}

// New cache implementation
struct NewCache {
    data: ConcurrentMap
    access_order: ConcurrentLinkedList
    max_size: int
}

function (nc *NewCache) get(key) {
    var value = nc.data.get(key)
    if value != null {
        // Efficiently update access order
        nc.access_order.remove(key)  // O(1) if we track nodes
        nc.access_order.add_last(key)
    }
    return value
}

function (nc *NewCache) set(key, value) {
    // Thread-safe eviction
    if nc.data.size() >= nc.max_size && !nc.data.contains_key(key) {
        var oldest = nc.access_order.remove_first()
        nc.data.remove(oldest)
    }

    nc.data.set(key, value)
    nc.access_order.add_last(key)
}

Pattern 2: Event Processing

Event Processing Migration
// Old event processing
struct OldEventProcessor {
    events: Array
    handlers: Map
}

function (oep *OldEventProcessor) add_event(event) {
    oep.events.append(event)
}

function (oep *OldEventProcessor) process_events() {
    for event in oep.events {
        var handler = oep.handlers.get(event.type)
        if handler != null {
            handler(event)
        }
    }
    oep.events.clear()
}

// New event processing
struct NewEventProcessor {
    events: ConcurrentQueue
    handlers: ConcurrentMap
    processed_events: ConcurrentSet  // Track processed event IDs
}

function (nep *NewEventProcessor) add_event(event) {
    nep.events.enqueue(event)
}

function (nep *NewEventProcessor) process_events() {
    for !nep.events.is_empty() {
        var event = nep.events.dequeue()

        // Avoid duplicate processing
        if nep.processed_events.contains(event.id) {
            continue
        }

        var handler = nep.handlers.get(event.type)
        if handler != null {
            handler(event)
            nep.processed_events.add(event.id)
        }
    }
}

// Multiple workers can process concurrently
function (nep *NewEventProcessor) start_workers(worker_count) {
    for i in range(worker_count) {
        do nep.process_events()
    }
}

Pattern 3: Graph Algorithms

Graph Migration
// Old graph representation
struct OldGraph {
    vertices: Array
    edges: Map  // vertex -> Array of neighbors
}

function (og *OldGraph) add_edge(from, to) {
    if !og.vertices.contains(from) {
        og.vertices.append(from)
        og.edges.set(from, [])
    }
    if !og.vertices.contains(to) {
        og.vertices.append(to)
        og.edges.set(to, [])
    }

    var neighbors = og.edges.get(from)
    if !neighbors.contains(to) {
        neighbors.append(to)
    }
}

function (og *OldGraph) bfs(start) {
    var visited = []
    var queue = collections.new_queue()

    queue.enqueue(start)
    visited.append(start)

    for !queue.is_empty() {
        var vertex = queue.dequeue()
        var neighbors = og.edges.get(vertex) || []

        for neighbor in neighbors {
            if !visited.contains(neighbor) {  // O(n) check
                visited.append(neighbor)
                queue.enqueue(neighbor)
            }
        }
    }

    return visited
}

// New graph representation
struct NewGraph {
    vertices: Set
    edges: Map  // vertex -> Set of neighbors
}

function (ng *NewGraph) add_edge(from, to) {
    ng.vertices.add(from)
    ng.vertices.add(to)

    if !ng.edges.contains_key(from) {
        ng.edges.set(from, collections.new_set())
    }
    if !ng.edges.contains_key(to) {
        ng.edges.set(to, collections.new_set())
    }

    ng.edges.get(from).add(to)
}

function (ng *NewGraph) bfs(start) {
    var visited = collections.new_set()
    var queue = collections.new_queue()

    queue.enqueue(start)
    visited.add(start)

    for !queue.is_empty() {
        var vertex = queue.dequeue()
        var neighbors = ng.edges.get(vertex) || collections.new_set()

        for neighbor in neighbors {
            if !visited.contains(neighbor) {  // O(1) check
                visited.add(neighbor)
                queue.enqueue(neighbor)
            }
        }
    }

    return visited.to_array()
}

Performance Migration Benefits

Before and After Comparisons

Performance Comparison
function compare_migration_performance() {
    var size = 50000

    // Old approach: array-based unique collection
    benchmark_operation("Old unique collection", () => {
        var unique_items = []

        for i in range(size) {
            var found = false
            for existing in unique_items {
                if existing == i {
                    found = true
                    break
                }
            }
            if !found {
                unique_items.append(i)
            }
        }
    })

    // New approach: Set-based unique collection
    benchmark_operation("New Set collection", () => {
        var unique_items = collections.new_set()

        for i in range(size) {
            unique_items.add(i)
        }
    })

    // Old approach: manual sorted insertion
    benchmark_operation("Old sorted insertion", () => {
        var sorted_data = []

        for i in range(size) {
            var value = random_int(0, size * 2)

            // Find insertion point
            var index = 0
            for j in range(sorted_data.size()) {
                if sorted_data[j] > value {
                    index = j
                    break
                }
                index = j + 1
            }
            sorted_data.insert_at(index, value)
        }
    })

    // New approach: BinarySearchTree
    benchmark_operation("New BST insertion", () => {
        var tree = collections.new_bst()

        for i in range(size) {
            var value = random_int(0, size * 2)
            tree.insert(value)
        }
    })
}

compare_migration_performance()

Troubleshooting Migration Issues

Common Issues and Solutions

Issue 1: Performance Regression

Performance Regression Fix
// Problem: Using LinkedList for random access
var list = collections.new_linked_list()
for i in range(10000) {
    list.add_last(i)
}

// This is slow: O(n) for each access
for i in range(100) {
    var value = list.get(random_int(0, list.size()))  // Slow!
}

// Solution: Use Array for random access
var array = collections.new_array()
for i in range(10000) {
    array.append(i)
}

// This is fast: O(1) for each access
for i in range(100) {
    var value = array.get(random_int(0, array.size()))  // Fast!
}

Issue 2: Memory Usage Increase

Memory Usage Fix
// Problem: Not pre-allocating collections
var map = collections.new_map()
for i in range(100000) {
    map.set("key_" + i.to_string(), i)  // Multiple reallocations
}

// Solution: Pre-allocate expected capacity
var map = collections.new_map()
map.reserve(100000)  // Pre-allocate buckets
for i in range(100000) {
    map.set("key_" + i.to_string(), i)  // No reallocations
}

Issue 3: Thread Safety Issues

Thread Safety Fix
// Problem: Using regular collections in concurrent code
var shared_data = collections.new_map()  // Not thread-safe

do {
    for i in range(1000) {
        shared_data.set("key_" + i.to_string(), i)  // Race condition!
    }
}

do {
    for i in range(1000) {
        var value = shared_data.get("key_" + i.to_string())  // Race condition!
    }
}

// Solution: Use concurrent collections
var shared_data = collections.new_concurrent_map()  // Thread-safe

do {
    for i in range(1000) {
        shared_data.set("key_" + i.to_string(), i)  // Safe
    }
}

do {
    for i in range(1000) {
        var value = shared_data.get("key_" + i.to_string())  // Safe
    }
}

Migration Checklist

Pre-Migration Assessment

  • [ ] Identify all collection usage in your codebase
  • [ ] Determine which collections are accessed by multiple threads
  • [ ] Measure current performance baselines
  • [ ] Identify collections that maintain uniqueness manually
  • [ ] Find sorted data structures implemented with arrays

Migration Steps

  • [ ] Update to enhanced collections module
  • [ ] Replace manual unique collections with Set
  • [ ] Replace manual linked structures with LinkedList
  • [ ] Replace sorted arrays with BinarySearchTree
  • [ ] Add pre-allocation where collection sizes are known
  • [ ] Convert shared collections to concurrent versions
  • [ ] Add functional programming methods where beneficial
  • [ ] Update error handling for new return types

Post-Migration Validation

  • [ ] Run performance benchmarks to verify improvements
  • [ ] Test concurrent access patterns
  • [ ] Verify memory usage is within expected ranges
  • [ ] Ensure all functionality works as expected
  • [ ] Update documentation and examples

Performance Validation

Performance Validation
function validate_migration_performance() {
    fmt.Println("Migration Performance Validation")
    fmt.Println("===============================")

    // Test 1: Set vs Array for uniqueness
    var unique_test_size = 10000

    var set_time = benchmark_operation("Set uniqueness", () => {
        var set = collections.new_set()
        for i in range(unique_test_size) {
            set.add(i % 1000)  // Many duplicates
        }
    })

    var array_time = benchmark_operation("Array uniqueness", () => {
        var array = []
        for i in range(unique_test_size) {
            var value = i % 1000
            if !array.contains(value) {
                array.append(value)
            }
        }
    })

    var set_improvement = (array_time - set_time) / array_time * 100
    fmt.Println("Set improvement:", set_improvement.to_int(), "%")

    // Test 2: BST vs sorted array
    var sorted_test_size = 5000

    var bst_time = benchmark_operation("BST insertion", () => {
        var tree = collections.new_bst()
        for i in range(sorted_test_size) {
            tree.insert(random_int(0, sorted_test_size * 2))
        }
    })

    var sorted_array_time = benchmark_operation("Sorted array insertion", () => {
        var array = []
        for i in range(sorted_test_size) {
            var value = random_int(0, sorted_test_size * 2)
            // Find insertion point and insert
            var index = 0
            for j in range(array.size()) {
                if array[j] > value {
                    index = j
                    break
                }
                index = j + 1
            }
            array.insert_at(index, value)
        }
    })

    var bst_improvement = (sorted_array_time - bst_time) / sorted_array_time * 100
    fmt.Println("BST improvement:", bst_improvement.to_int(), "%")

    fmt.Println()
    fmt.Println("Migration validation complete!")
}

validate_migration_performance()

This migration guide provides a comprehensive path for upgrading to enhanced collections while maintaining functionality and improving performance. Follow the steps systematically and validate each change to ensure a successful migration.