Reshape complex data

Real-world data is rarely flat. It contains nested structures, arrays of objects, and deeply hierarchical information. This guide shows advanced techniques for reshaping complex data structures to meet your analysis needs.

Flatten nested structures

Transform deeply nested data into flat structures for easier analysis.

Basic flattening

Start with simple nested objects:

from {
  user: {
    id: 123,
    profile: {
      name: "Alice",
      contact: {
        email: "alice@example.com",
        phone: "+1-555-0123"
      }
    }
  }
}
flat_user = {
  id: user.id,
  name: user.profile.name,
  email: user.profile.contact.email,
  phone: user.profile.contact.phone
}

{
  user: {...},
  flat_user: {
    id: 123,
    name: "Alice",
    email: "alice@example.com",
    phone: "+1-555-0123",
  },
}

Flatten with prefixes

Preserve context when flattening:

from {
  event: {
    id: "evt-001",
    user: {name: "Alice", role: "admin"},
    system: {name: "web-server", version: "2.1"}
  }
}
flattened = {
  event_id: event.id,
  user_name: event.user.name,
  user_role: event.user.role,
  system_name: event.system.name,
  system_version: event.system.version
}

{
  event: {...},
  flattened: {
    event_id: "evt-001",
    user_name: "Alice",
    user_role: "admin",
    system_name: "web-server",
    system_version: "2.1",
  }
}

Unflatten data

Reconstruct hierarchical structures from flattened data using unflatten():

Basic unflattening

Convert dotted field names back to nested structures:

from {
  flattened: {
    "user.name": "Alice",
    "user.email": "alice@example.com",
    "user.address.city": "NYC",
    "user.address.zip": "10001",
    "status": "active"
  }
}
nested = flattened.unflatten()

{
  flattened: {
    "user.name": "Alice",
    "user.email": "alice@example.com",
    "user.address.city": "NYC",
    "user.address.zip": "10001",
    status: "active",
  },
  nested: {
    user: {
      name: "Alice",
      email: "alice@example.com",
      address: {
        city: "NYC",
        zip: "10001",
      },
    },
    status: "active",
  },
}

Custom separator

Use a different separator for field paths:

from {
  metrics: {
    cpu_usage_percent: 45,
    memory_used_gb: 8,
    memory_total_gb: 16,
    disk_root_used: 100,
    disk_root_total: 500
  }
}
structured = metrics.unflatten("_")

{
  metrics: {
    cpu_usage_percent: 45,
    memory_used_gb: 8,
    memory_total_gb: 16,
    disk_root_used: 100,
    disk_root_total: 500,
  },
  structured: {
    cpu: {
      usage: {
        percent: 45,
      },
    },
    memory: {
      used: {
        gb: 8,
      },
      total: {
        gb: 16,
      },
    },
    disk: {
      root: {
        used: 100,
        total: 500,
      },
    },
  },
}

Normalize denormalized data

Convert wide data to long format for better analysis.

Wide to long transformation

from {
  metrics: {
    timestamp: "2024-01-15T10:00:00",
    cpu_usage: 45,
    memory_usage: 62,
    disk_usage: 78
  }
}
long_format = [
  {timestamp: metrics.timestamp, metric: "cpu", value: metrics.cpu_usage},
  {timestamp: metrics.timestamp, metric: "memory", value: metrics.memory_usage},
  {timestamp: metrics.timestamp, metric: "disk", value: metrics.disk_usage}
]

{
  metrics: {
    timestamp: "2024-01-15T10:00:00",
    cpu_usage: 45,
    memory_usage: 62,
    disk_usage: 78,
  },
  long_format: [
    {
      timestamp: "2024-01-15T10:00:00",
      metric: "cpu",
      value: 45,
    },
    {
      timestamp: "2024-01-15T10:00:00",
      metric: "memory",
      value: 62,
    },
    {
      timestamp: "2024-01-15T10:00:00",
      metric: "disk",
      value: 78,
    },
  ],
}

Transform to event stream

Convert arrays to event streams for processing:

from {
  readings: [
    {sensor: "temp", location: "room1", value: 72},
    {sensor: "humidity", location: "room1", value: 45},
    {sensor: "temp", location: "room2", value: 68},
    {sensor: "humidity", location: "room2", value: 50}
  ]
}
unroll readings
select sensor=readings.sensor,
       location=readings.location,
      value=readings.value

{sensor: "temp", location: "room1", value: 72}
{sensor: "humidity", location: "room1", value: 45}
{sensor: "temp", location: "room2", value: 68}
{sensor: "humidity", location: "room2", value: 50}

Extract arrays from objects

Transform objects with numbered keys into proper arrays.

from {
  response: {
    item_0: {name: "Widget", price: 9.99},
    item_1: {name: "Gadget", price: 19.99},
    item_2: {name: "Tool", price: 14.99},
    total_items: 3
  }
}
// Extract items manually when keys are known
items = [
  response.item_0,
  response.item_1,
  response.item_2
]

{
  response: {...},
  items: [
    {name: "Widget", price: 9.99},
    {name: "Gadget", price: 19.99},
    {name: "Tool", price: 14.99},
  ]
}

Build hierarchical structures

Create nested structures from flat data.

Extract path components

from {
  paths: [
    "/home/user/docs/report.pdf",
    "/home/user/docs/summary.txt",
    "/home/user/images/photo.jpg",
    "/var/log/system.log"
  ]
}
path_info = paths.map(path => {
  full_path: path,
  directory: path.parent_dir(),
  filename: path.file_name(),
  parts: path.split("/").where(p => p.length_bytes() > 0)
})

{
  paths: [...],
  path_info: [
    {
      full_path: "/home/user/docs/report.pdf",
      directory: "/home/user/docs",
      filename: "report.pdf",
      parts: ["home", "user", "docs", "report.pdf"],
    },
    {
      full_path: "/home/user/docs/summary.txt",
      directory: "/home/user/docs",
      filename: "summary.txt",
      parts: ["home", "user", "docs", "summary.txt"],
    },
    {
      full_path: "/home/user/images/photo.jpg",
      directory: "/home/user/images",
      filename: "photo.jpg",
      parts: ["home", "user", "images", "photo.jpg"],
    },
    {
      full_path: "/var/log/system.log",
      directory: "/var/log",
      filename: "system.log",
      parts: ["var", "log", "system.log"],
    },
  ],
}

Merge fragmented data

Combine data split across multiple records.

Merge records with spread operator

from {
  user: {id: 1, name: "Alice"},
  profile: {email: "alice@example.com", dept: "Engineering"},
  metrics: {logins: 45, actions: 234}
}
merged = {
  ...user,
  ...profile,
  ...metrics
}

{
  user: {
    id: 1,
    name: "Alice",
  },
  profile: {
    email: "alice@example.com",
    dept: "Engineering",
  },
  metrics: {
    logins: 45,
    actions: 234,
  },
  merged: {
    id: 1,
    name: "Alice",
    email: "alice@example.com",
    dept: "Engineering",
    logins: 45,
    actions: 234,
  },
}

Handle dynamic schemas

Work with data that has varying structures.

Normalize inconsistent records

from {
  events: [
    {type: "user", data: {name: "Alice", email: "alice@example.com"}},
    {type: "system", data: {cpu: 45, memory: 1024}},
    {type: "error", message: "Connection failed", code: 500}
  ]
}
select normalized = events.map(e => {
  event_type: e.type,
  timestamp: now(),
  // Use conditional assignment for type-specific fields
  user_name: e.data.name if e.type == "user",
  user_email: e.data.email if e.type == "user",
  system_cpu: e.data.cpu if e.type == "system",
  system_memory: e.data.memory if e.type == "system",
  error_message: e.message if e.type == "error",
  error_code: e.code if e.type == "error"
})

{
  normalized: [
    {
      event_type: "user",
      timestamp: 2025-07-21T18:54:09.307412Z,
      user_name: "Alice",
      user_email: "alice@example.com",
      system_cpu: null,
      system_memory: null,
      error_message: null,
      error_code: null,
    },
    {
      event_type: "system",
      timestamp: 2025-07-21T18:54:09.307412Z,
      user_name: null,
      user_email: null,
      system_cpu: 45,
      system_memory: 1024,
      error_message: null,
      error_code: null,
    },
    {
      event_type: "error",
      timestamp: 2025-07-21T18:54:09.307412Z,
      user_name: null,
      user_email: null,
      system_cpu: null,
      system_memory: null,
      error_message: "Connection failed",
      error_code: 500,
    },
  ],
}

Working with aggregated events

Unroll arrays to individual events

Some data sources aggregate multiple events into a single record. Use unroll to expand these into individual events:

// DHCP logs may contain multiple message types in one record
from {
  ts: 2024-01-15T10:00:00,
  uid: "C123abc",
  msg_types: ["DISCOVER", "OFFER", "REQUEST", "ACK"],
  client_addr: 192.168.1.100
}
unroll msg_types
// Now we have 4 separate events, one per message type
activity_name = msg_types.to_title()

{ts: 2024-01-15T10:00:00, uid: "C123abc", msg_types: "DISCOVER", client_addr: 192.168.1.100, activity_name: "Discover"}
{ts: 2024-01-15T10:00:00, uid: "C123abc", msg_types: "OFFER", client_addr: 192.168.1.100, activity_name: "Offer"}
{ts: 2024-01-15T10:00:00, uid: "C123abc", msg_types: "REQUEST", client_addr: 192.168.1.100, activity_name: "Request"}
{ts: 2024-01-15T10:00:00, uid: "C123abc", msg_types: "ACK", client_addr: 192.168.1.100, activity_name: "Ack"}

This pattern is essential when:

Converting aggregated logs to event-per-row formats
Normalizing data for OCSF or other schemas that expect individual events
Processing batched API responses

Safe arithmetic with optional fields

Dynamic field computation with null safety

When computing metrics from fields that might be null, use the else keyword for safe fallbacks:

from {
  packets_sent: 1000,
  packets_received: 950,
  duration: 10s,
  bytes_sent: null,  // Missing data
  bytes_received: 5000
}

// Safe division with fallback
packets_per_second = packets_sent / duration.count_seconds() else 0

// Handle missing values in arithmetic
loss_rate = (packets_sent - packets_received) / packets_sent else 0

// Compute only when both values exist
throughput = (bytes_sent + bytes_received) / duration.count_seconds() if bytes_sent != null else null

// Complex calculation with multiple fallbacks
efficiency = (bytes_received / bytes_sent) * 100 if bytes_sent != null else
              100 if bytes_received > 0 else 0

{
  packets_sent: 1000,
  packets_received: 950,
  duration: 10s,
  bytes_sent: null,
  bytes_received: 5000,
  packets_per_second: 100.0,
  loss_rate: 0.05,
  throughput: null,
  efficiency: 100
}

Advanced transformations

Recursive flattening

Flatten arbitrarily nested structures:

from {
  data: {
    level1: {
      level2: {
        level3: {
          value: "deep",
          items: [1, 2, 3]
        }
      },
      other: "value"
    }
  }
}
// Use flatten function for automatic recursive flattening
select flattened = data.flatten()

{
  flattened: {
    "level1.level2.level3.value": "deep",
    "level1.level2.level3.items": [
      1,
      2,
      3,
    ],
    "level1.other": "value",
  },
}

Extract fields by prefix

Use field access to extract specific configurations:

from {
  config: {
    env_DATABASE_HOST: "db.example.com",
    env_DATABASE_PORT: 5432,
    env_API_KEY: "secret123",
    app_name: "MyApp",
    app_version: "1.0"
  }
}
// Extract specific fields directly
select env_vars = {
    DATABASE_HOST: config.env_DATABASE_HOST,
    DATABASE_PORT: config.env_DATABASE_PORT,
    API_KEY: config.env_API_KEY
  },
  app_config = {
    name: config.app_name,
    version: config.app_version
  }

{
  env_vars: {
    DATABASE_HOST: "db.example.com",
    DATABASE_PORT: 5432,
    API_KEY: "secret123",
  },
  app_config: {
    name: "MyApp",
    version: "1.0",
  },
}

Practical examples

Process nested API responses

from {
  api_response: {
    status: "success",
    data: {
      user: {
        id: 123,
        profile: {
          personal: {name: "Alice", age: 30},
          professional: {title: "Engineer", company: "TechCorp"}
        }
      },
      metadata: {
        request_id: "req-001",
        timestamp: "2024-01-15T10:00:00"
      }
    }
  }
}
// Extract and reshape for database storage
select user_record = {
  user_id: api_response.data.user.id,
  name: api_response.data.user.profile.personal.name,
  age: api_response.data.user.profile.personal.age,
  job_title: api_response.data.user.profile.professional.title,
  company: api_response.data.user.profile.professional.company,
  last_updated: api_response.data.metadata.timestamp.parse_time("%Y-%m-%dT%H:%M:%S")
}

{
  user_record: {
    user_id: 123,
    name: "Alice",
    age: 30,
    job_title: "Engineer",
    company: "TechCorp",
    last_updated: 2024-01-15T10:00:00Z,
  },
}

Transform log aggregations

from {
  log_stats: {
    "2024-01-15": {
      "/api/users": {GET: 150, POST: 20},
      "/api/orders": {GET: 200, POST: 50, DELETE: 5}
    },
    "2024-01-16": {
      "/api/users": {GET: 180, POST: 25},
      "/api/orders": {GET: 220, POST: 60}
    }
  }
}
// Flatten nested structure into individual events
select flattened = log_stats.flatten()

{
  flattened: {
    "2024-01-15./api/users.GET": 150,
    "2024-01-15./api/users.POST": 20,
    "2024-01-15./api/orders.GET": 200,
    "2024-01-15./api/orders.POST": 50,
    "2024-01-15./api/orders.DELETE": 5,
    "2024-01-16./api/users.GET": 180,
    "2024-01-16./api/users.POST": 25,
    "2024-01-16./api/orders.GET": 220,
    "2024-01-16./api/orders.POST": 60,
  },
}