This guide covers performance characteristics, optimization techniques, and benchmarking for Harneet's enhanced collections. Understanding these concepts will help you choose the right data structure and optimize your applications.
Time Complexity Summary
| Data Structure | Access | Search | Insert | Delete | Space |
| Array | O(1) | O(n) | O(n)* | O(n)* | O(n) |
| Stack | O(1)† | O(n) | O(1) | O(1) | O(n) |
| Queue | O(1)† | O(n) | O(1) | O(1) | O(n) |
| Set | N/A | O(1)‡ | O(1)‡ | O(1)‡ | O(n) |
| Map | O(1)‡ | O(1)‡ | O(1)‡ | O(1)‡ | O(n) |
| LinkedList | O(n) | O(n) | O(1)§ | O(1)§ | O(n) |
| BinarySearchTree | N/A | O(log n) | O(log n) | O(log n) | O(n) |
*Amortized for dynamic arrays
†Top/front element only
‡Average case, O(n) worst case
§Head/tail operations only
Memory Overhead Comparison
| Data Structure | Memory Overhead | Notes |
| Array | ~0-50% | Growth factor dependent |
| Stack | ~0-50% | Pre-allocated capacity |
| Queue | ~25-50% | Circular buffer overhead |
| Set | ~25-75% | Hash table load factor |
| Map | ~25-75% | Hash table load factor |
| LinkedList | ~100-200% | Pointer overhead per node |
| BinarySearchTree | ~100-200% | Pointer overhead per node |
Benchmarking and Measurement
| Performance Testing |
|---|
| import collections
import time
function benchmark_operation(name string, operation function()) int {
var start_time = time.now_millis()
operation()
var end_time = time.now_millis()
var duration = end_time - start_time
fmt.Println(name, "took", duration, "ms")
return duration
}
function benchmark_insertions() {
var size = 100000
// Array insertions
benchmark_operation("Array append", () => {
var array = collections.new_array()
for i in range(size) {
array.append(i)
}
})
// Stack insertions
benchmark_operation("Stack push", () => {
var stack = collections.new_stack()
for i in range(size) {
stack.push(i)
}
})
// Set insertions
benchmark_operation("Set add", () => {
var set = collections.new_set()
for i in range(size) {
set.add(i)
}
})
// LinkedList insertions
benchmark_operation("LinkedList add_last", () => {
var list = collections.new_linked_list()
for i in range(size) {
list.add_last(i)
}
})
}
benchmark_insertions()
|
Memory Usage Profiling
| Memory Profiling |
|---|
| function profile_memory_usage() {
var sizes = [1000, 10000, 100000, 1000000]
for size in sizes {
fmt.Println("Size:", size)
// Array memory usage
var array = collections.new_array()
array.reserve(size)
var array_memory = get_memory_usage()
for i in range(size) {
array.append(i)
}
var array_filled_memory = get_memory_usage()
fmt.Println(" Array reserved:", array_memory, "bytes")
fmt.Println(" Array filled:", array_filled_memory, "bytes")
fmt.Println(" Array overhead:", (array_filled_memory - array_memory) / size, "bytes per element")
// LinkedList memory usage
var list = collections.new_linked_list()
var list_start_memory = get_memory_usage()
for i in range(size) {
list.add_last(i)
}
var list_filled_memory = get_memory_usage()
fmt.Println(" LinkedList:", list_filled_memory - list_start_memory, "bytes")
fmt.Println(" LinkedList overhead:", (list_filled_memory - list_start_memory) / size, "bytes per element")
fmt.Println()
}
}
|
| Concurrent Performance |
|---|
| function benchmark_concurrent_operations() {
var iterations = 10000
var thread_count = 4
// Regular map (not thread-safe)
var regular_map = collections.new_map()
// Concurrent map
var concurrent_map = collections.new_concurrent_map()
// Benchmark concurrent writes
benchmark_operation("Concurrent map writes", () => {
var workers = []
for i in range(thread_count) {
workers.append(do {
for j in range(iterations / thread_count) {
var key = "key_" + (i * 1000 + j).to_string()
concurrent_map.set(key, j)
}
})
}
// Wait for all workers
for worker in workers {
await worker
}
})
// Benchmark concurrent reads
benchmark_operation("Concurrent map reads", () => {
var workers = []
for i in range(thread_count) {
workers.append(do {
for j in range(iterations / thread_count) {
var key = "key_" + (i * 1000 + j).to_string()
var value = concurrent_map.get(key)
}
})
}
for worker in workers {
await worker
}
})
}
|
Optimization Techniques
1. Pre-allocation and Capacity Management
| Pre-allocation |
|---|
| // Bad: frequent reallocations
function inefficient_array_building() {
var array = collections.new_array() // Starts small
for i in range(100000) {
array.append(i) // May trigger multiple reallocations
}
return array
}
// Good: pre-allocate known capacity
function efficient_array_building() {
var array = collections.new_array()
array.reserve(100000) // Pre-allocate space
for i in range(100000) {
array.append(i) // No reallocations needed
}
return array
}
// Benchmark the difference
benchmark_operation("Without pre-allocation", inefficient_array_building)
benchmark_operation("With pre-allocation", efficient_array_building)
|
2. Batch Operations
| Batch Operations |
|---|
| // Inefficient: individual operations
function individual_operations(set, items) {
for item in items {
set.add(item) // Individual hash computation and insertion
}
}
// Efficient: batch operations
function batch_operations(set, items) {
set.add_all(items) // Optimized batch insertion
}
var large_dataset = range(50000)
var set1 = collections.new_set()
var set2 = collections.new_set()
benchmark_operation("Individual adds", () => individual_operations(set1, large_dataset))
benchmark_operation("Batch add", () => batch_operations(set2, large_dataset))
|
3. Iterator vs Index Access
| Iterator Performance |
|---|
| var large_list = collections.new_linked_list()
for i in range(10000) {
large_list.add_last(i)
}
// Inefficient: index-based access (O(n²) total)
benchmark_operation("Index-based access", () => {
var sum = 0
for i in range(large_list.size()) {
sum += large_list.get(i) // O(n) for each access
}
})
// Efficient: iterator-based access (O(n) total)
benchmark_operation("Iterator access", () => {
var sum = 0
var iter = large_list.iterator()
for iter.has_next() {
sum += iter.next() // O(1) for each access
}
})
|
4. Choosing the Right Data Structure
| Data Structure Selection |
|---|
| function compare_lookup_performance() {
var size = 100000
var lookup_count = 10000
// Prepare data
var array = collections.new_array()
var set = collections.new_set()
var tree = collections.new_bst()
for i in range(size) {
array.append(i)
set.add(i)
tree.insert(i)
}
var random_keys = []
for i in range(lookup_count) {
random_keys.append(random_int(0, size))
}
// Array linear search: O(n) per lookup
benchmark_operation("Array contains", () => {
for key in random_keys {
array.contains(key)
}
})
// Set hash lookup: O(1) average per lookup
benchmark_operation("Set contains", () => {
for key in random_keys {
set.contains(key)
}
})
// Tree binary search: O(log n) per lookup
benchmark_operation("Tree contains", () => {
for key in random_keys {
tree.contains(key)
}
})
}
compare_lookup_performance()
|
5. Memory Pool Optimization
| Memory Pool |
|---|
| // Custom memory pool for frequent allocations
struct ObjectPool {
available: Stack
create_func: function()
reset_func: function(obj)
}
function new_object_pool(create_func, reset_func, initial_size) {
var pool = ObjectPool{
available: collections.new_stack(),
create_func: create_func,
reset_func: reset_func
}
// Pre-populate pool
for i in range(initial_size) {
pool.available.push(create_func())
}
return pool
}
function (pool *ObjectPool) acquire() {
if !pool.available.is_empty() {
return pool.available.pop()
}
return pool.create_func()
}
function (pool *ObjectPool) release(obj) {
pool.reset_func(obj)
pool.available.push(obj)
}
// Usage example
var list_pool = new_object_pool(
() => collections.new_linked_list(), // Create function
(list) => list.clear(), // Reset function
10 // Initial pool size
)
function process_with_pool() {
var temp_list = list_pool.acquire()
// Use the list
for i in range(100) {
temp_list.add_last(i)
}
// Process data...
// Return to pool
list_pool.release(temp_list)
}
|
Good Patterns
1. Cache-Friendly Access Patterns
| Cache-Friendly Access |
|---|
| // Good: sequential access (cache-friendly)
function sequential_sum(array) {
var sum = 0
for i in range(array.size()) {
sum += array.get(i)
}
return sum
}
// Bad: random access (cache-unfriendly)
function random_sum(array) {
var sum = 0
var indices = range(array.size()).shuffle()
for i in indices {
sum += array.get(i)
}
return sum
}
|
2. Minimize Object Creation
| Object Reuse |
|---|
| // Good: reuse objects
function efficient_string_building(words) {
var builder = collections.new_array()
builder.reserve(words.size())
for word in words {
builder.append(word)
}
return builder.join("")
}
// Bad: create many temporary objects
function inefficient_string_building(words) {
var result = ""
for word in words {
result = result + word // Creates new string each time
}
return result
}
|
3. Lazy Evaluation
| Lazy Evaluation |
|---|
| // Good: lazy evaluation for large datasets
function lazy_filter_map(collection, filter_func, map_func) {
return LazyIterator{
source: collection.iterator(),
filter: filter_func,
mapper: map_func
}
}
// Bad: eager evaluation
function eager_filter_map(collection, filter_func, map_func) {
var filtered = collection.filter(filter_func) // Full pass
var mapped = filtered.map(map_func) // Another full pass
return mapped
}
|
Anti-patterns to Avoid
1. Inappropriate Data Structure Choice
| Data Structure Anti-pattern |
|---|
| // Anti-pattern: using LinkedList for random access
function bad_random_access() {
var list = collections.new_linked_list()
for i in range(10000) {
list.add_last(i)
}
// This is O(n²) - very slow!
for i in range(100) {
var random_index = random_int(0, list.size())
var value = list.get(random_index) // O(n) each time
}
}
// Better: use Array for random access
function good_random_access() {
var array = collections.new_array()
for i in range(10000) {
array.append(i)
}
// This is O(1) per access
for i in range(100) {
var random_index = random_int(0, array.size())
var value = array.get(random_index) // O(1) each time
}
}
|
2. Excessive Memory Allocation
| Memory Anti-pattern |
|---|
| // Anti-pattern: creating many temporary collections
function wasteful_processing(data) {
var temp1 = collections.new_array()
var temp2 = collections.new_array()
var temp3 = collections.new_array()
// Multiple passes with temporary collections
for item in data {
if condition1(item) {
temp1.append(item)
}
}
for item in temp1 {
if condition2(item) {
temp2.append(transform(item))
}
}
for item in temp2 {
if condition3(item) {
temp3.append(finalize(item))
}
}
return temp3
}
// Better: single pass with streaming
function efficient_processing(data) {
var result = collections.new_array()
for item in data {
if condition1(item) && condition2(item) && condition3(item) {
result.append(finalize(transform(item)))
}
}
return result
}
|
3. Ignoring Amortized Costs
| Amortized Cost Anti-pattern |
|---|
| // Anti-pattern: frequent size changes
function bad_dynamic_sizing() {
var array = collections.new_array()
for i in range(1000) {
// Grow
for j in range(100) {
array.append(j)
}
// Shrink
for j in range(50) {
array.pop()
}
}
}
// Better: stable sizing or pre-allocation
function good_sizing() {
var array = collections.new_array()
array.reserve(5000) // Pre-allocate expected maximum
for i in range(1000) {
// Operations within pre-allocated space
var start_size = array.size()
for j in range(100) {
array.append(j)
}
// Shrink back to original size
array.resize(start_size)
}
}
|
1. Web Server Request Cache
| Request Cache |
|---|
| struct RequestCache {
cache: ConcurrentMap
max_size: int
hit_count: int
miss_count: int
}
function new_request_cache(max_size) {
return RequestCache{
cache: collections.new_concurrent_map(),
max_size: max_size,
hit_count: 0,
miss_count: 0
}
}
function (rc *RequestCache) get(key) {
var value = rc.cache.get(key)
if value != null {
rc.hit_count = hit_count + 1
return value
}
rc.miss_count = miss_count + 1
return null
}
function (rc *RequestCache) put(key, value) {
// Simple LRU eviction when full
if rc.cache.size() >= rc.max_size {
// Remove oldest entries (simplified)
var keys = rc.cache.keys()
var to_remove = keys.slice(0, rc.max_size / 4) // Remove 25%
for key in to_remove {
rc.cache.remove(key)
}
}
rc.cache.set(key, value)
}
function (rc *RequestCache) get_hit_rate() {
var total = rc.hit_count + rc.miss_count
if total == 0 {
return 0.0
}
return rc.hit_count.to_float() / total.to_float()
}
// Performance testing
function test_cache_performance() {
var cache = new_request_cache(1000)
var request_count = 10000
benchmark_operation("Cache operations", () => {
for i in range(request_count) {
var key = "request_" + (i % 500).to_string() // 50% hit rate expected
var cached = cache.get(key)
if cached == null {
var response = "response_for_" + key
cache.put(key, response)
}
}
})
fmt.Println("Cache hit rate:", cache.get_hit_rate())
}
|
2. Log Processing Pipeline
| Log Processor |
|---|
| struct LogProcessor {
buffer: ConcurrentQueue
processed_count: int
error_count: int
}
function new_log_processor() {
return LogProcessor{
buffer: collections.new_concurrent_queue(),
processed_count: 0,
error_count: 0
}
}
function (lp *LogProcessor) add_log_entry(entry) {
lp.buffer.enqueue(entry)
}
function (lp *LogProcessor) process_batch(batch_size) {
var batch = []
// Collect batch
for i in range(batch_size) {
if lp.buffer.is_empty() {
break
}
batch.append(lp.buffer.dequeue())
}
if batch.size() == 0 {
return
}
// Process batch efficiently
benchmark_operation("Process batch of " + batch.size().to_string(), () => {
for entry in batch {
if process_log_entry(entry) {
lp.processed_count = processed_count + 1
} else {
lp.error_count = error_count + 1
}
}
})
}
function process_log_entry(entry) {
// Simulate log processing
return entry.length() > 0
}
// Performance test
function test_log_processing() {
var processor = new_log_processor()
// Generate test logs
for i in range(100000) {
processor.add_log_entry("log entry " + i.to_string())
}
// Process in batches for better performance
benchmark_operation("Batch processing", () => {
for processor.buffer.size() > 0 {
processor.process_batch(1000) // Process 1000 at a time
}
})
fmt.Println("Processed:", processor.processed_count)
fmt.Println("Errors:", processor.error_count)
}
|
| Performance Metrics |
|---|
| struct CollectionMetrics {
operation_counts: Map
operation_times: Map
memory_usage: int
}
function new_collection_metrics() {
return CollectionMetrics{
operation_counts: collections.new_map(),
operation_times: collections.new_map(),
memory_usage: 0
}
}
function (cm *CollectionMetrics) record_operation(operation, duration) {
// Count operations
var current_count = cm.operation_counts.get(operation) || 0
cm.operation_counts.set(operation, current_count + 1)
// Track timing
var times = cm.operation_times.get(operation) || []
times.append(duration)
cm.operation_times.set(operation, times)
}
function (cm *CollectionMetrics) get_average_time(operation) {
var times = cm.operation_times.get(operation)
if times == null || times.size() == 0 {
return 0
}
var sum = times.reduce((acc, time) => acc + time, 0)
return sum / times.size()
}
function (cm *CollectionMetrics) print_report() {
fmt.Println("Collection Performance Report")
fmt.Println("============================")
var operations = cm.operation_counts.keys()
for op in operations {
var count = cm.operation_counts.get(op)
var avg_time = cm.get_average_time(op)
fmt.Println(op + ":", count, "operations, avg", avg_time, "ms")
}
}
// Instrumented collection wrapper
struct InstrumentedMap {
map: Map
metrics: CollectionMetrics
}
function new_instrumented_map() {
return InstrumentedMap{
map: collections.new_map(),
metrics: new_collection_metrics()
}
}
function (im *InstrumentedMap) set(key, value) {
var start = time.now_millis()
im.map.set(key, value)
var duration = time.now_millis() - start
im.metrics.record_operation("set", duration)
}
function (im *InstrumentedMap) get(key) {
var start = time.now_millis()
var result = im.map.get(key)
var duration = time.now_millis() - start
im.metrics.record_operation("get", duration)
return result
}
|
| Regression Testing |
|---|
| struct PerformanceBenchmark {
name: string
setup_func: function()
test_func: function()
expected_max_time: int // milliseconds
}
function run_performance_suite(benchmarks) {
fmt.Println("Running Performance Regression Tests")
fmt.Println("===================================")
var results = []
for benchmark in benchmarks {
fmt.Println("Running:", benchmark.name)
// Setup
benchmark.setup_func()
// Run test
var start_time = time.now_millis()
benchmark.test_func()
var duration = time.now_millis() - start_time
// Check against expected performance
var passed = duration <= benchmark.expected_max_time
var status = if passed { "PASS" } else { "FAIL" }
fmt.Println(" Result:", status, "-", duration, "ms (expected <=", benchmark.expected_max_time, "ms)")
results.append({
name: benchmark.name,
duration: duration,
expected: benchmark.expected_max_time,
passed: passed
})
}
// Summary
var passed_count = results.filter((r) => r.passed).size()
fmt.Println()
fmt.Println("Summary:", passed_count, "/", results.size(), "tests passed")
return results
}
// Define benchmarks
var performance_benchmarks = [
PerformanceBenchmark{
name: "Array 100k insertions",
setup_func: () => {},
test_func: () => {
var array = collections.new_array()
array.reserve(100000)
for i in range(100000) {
array.append(i)
}
},
expected_max_time: 100
},
PerformanceBenchmark{
name: "Set 50k unique insertions",
setup_func: () => {},
test_func: () => {
var set = collections.new_set()
for i in range(50000) {
set.add(i)
}
},
expected_max_time: 200
},
PerformanceBenchmark{
name: "Map 50k key-value pairs",
setup_func: () => {},
test_func: () => {
var map = collections.new_map()
for i in range(50000) {
map.set("key_" + i.to_string(), i)
}
},
expected_max_time: 250
}
]
// Run the performance suite
run_performance_suite(performance_benchmarks)
|
This comprehensive performance guide provides the tools and knowledge needed to optimize collection usage in Harneet applications, from basic benchmarking to advanced performance monitoring and regression testing.