Skip to content

Reshape complex data

Real-world data is rarely flat. It contains nested structures, arrays of objects, and deeply hierarchical information. This guide shows advanced techniques for reshaping complex data structures to meet your analysis needs.

Transform deeply nested data into flat structures for easier analysis.

Start with simple nested objects:

from {
user: {
id: 123,
profile: {
name: "Alice",
contact: {
email: "alice@example.com",
phone: "+1-555-0123"
}
}
}
}
flat_user = {
id: user.id,
name: user.profile.name,
email: user.profile.contact.email,
phone: user.profile.contact.phone
}
{
user: {...},
flat_user: {
id: 123,
name: "Alice",
email: "alice@example.com",
phone: "+1-555-0123",
},
}

Preserve context when flattening:

from {
event: {
id: "evt-001",
user: {name: "Alice", role: "admin"},
system: {name: "web-server", version: "2.1"}
}
}
flattened = {
event_id: event.id,
user_name: event.user.name,
user_role: event.user.role,
system_name: event.system.name,
system_version: event.system.version
}
{
event: {...},
flattened: {
event_id: "evt-001",
user_name: "Alice",
user_role: "admin",
system_name: "web-server",
system_version: "2.1",
}
}

Reconstruct hierarchical structures from flattened data using unflatten():

Convert dotted field names back to nested structures:

from {
flattened: {
"user.name": "Alice",
"user.email": "alice@example.com",
"user.address.city": "NYC",
"user.address.zip": "10001",
"status": "active"
}
}
nested = flattened.unflatten()
{
flattened: {
"user.name": "Alice",
"user.email": "alice@example.com",
"user.address.city": "NYC",
"user.address.zip": "10001",
status: "active",
},
nested: {
user: {
name: "Alice",
email: "alice@example.com",
address: {
city: "NYC",
zip: "10001",
},
},
status: "active",
},
}

Use a different separator for field paths:

from {
metrics: {
cpu_usage_percent: 45,
memory_used_gb: 8,
memory_total_gb: 16,
disk_root_used: 100,
disk_root_total: 500
}
}
structured = metrics.unflatten("_")
{
metrics: {
cpu_usage_percent: 45,
memory_used_gb: 8,
memory_total_gb: 16,
disk_root_used: 100,
disk_root_total: 500,
},
structured: {
cpu: {
usage: {
percent: 45,
},
},
memory: {
used: {
gb: 8,
},
total: {
gb: 16,
},
},
disk: {
root: {
used: 100,
total: 500,
},
},
},
}

Convert wide data to long format for better analysis.

from {
metrics: {
timestamp: "2024-01-15T10:00:00",
cpu_usage: 45,
memory_usage: 62,
disk_usage: 78
}
}
long_format = [
{timestamp: metrics.timestamp, metric: "cpu", value: metrics.cpu_usage},
{timestamp: metrics.timestamp, metric: "memory", value: metrics.memory_usage},
{timestamp: metrics.timestamp, metric: "disk", value: metrics.disk_usage}
]
{
metrics: {
timestamp: "2024-01-15T10:00:00",
cpu_usage: 45,
memory_usage: 62,
disk_usage: 78,
},
long_format: [
{
timestamp: "2024-01-15T10:00:00",
metric: "cpu",
value: 45,
},
{
timestamp: "2024-01-15T10:00:00",
metric: "memory",
value: 62,
},
{
timestamp: "2024-01-15T10:00:00",
metric: "disk",
value: 78,
},
],
}

Convert arrays to event streams for processing:

from {
readings: [
{sensor: "temp", location: "room1", value: 72},
{sensor: "humidity", location: "room1", value: 45},
{sensor: "temp", location: "room2", value: 68},
{sensor: "humidity", location: "room2", value: 50}
]
}
unroll readings
select sensor=readings.sensor,
location=readings.location,
value=readings.value
{sensor: "temp", location: "room1", value: 72}
{sensor: "humidity", location: "room1", value: 45}
{sensor: "temp", location: "room2", value: 68}
{sensor: "humidity", location: "room2", value: 50}

Transform objects with numbered keys into proper arrays.

from {
response: {
item_0: {name: "Widget", price: 9.99},
item_1: {name: "Gadget", price: 19.99},
item_2: {name: "Tool", price: 14.99},
total_items: 3
}
}
// Extract items manually when keys are known
items = [
response.item_0,
response.item_1,
response.item_2
]
{
response: {...},
items: [
{name: "Widget", price: 9.99},
{name: "Gadget", price: 19.99},
{name: "Tool", price: 14.99},
]
}

Create nested structures from flat data.

Use the summarize operator to group data:

from {
records: [
{dept: "Engineering", team: "Backend", member: "Alice"},
{dept: "Engineering", team: "Backend", member: "Bob"},
{dept: "Engineering", team: "Frontend", member: "Charlie"},
{dept: "Sales", team: "Direct", member: "David"}
]
}
unroll records
summarize dept=records.dept, team=records.team, members=collect(records.member)
{
dept: "Sales",
team: "Direct",
members: [
"David",
],
}
{
dept: "Engineering",
team: "Frontend",
members: [
"Charlie",
],
}
{
dept: "Engineering",
team: "Backend",
members: [
"Alice",
"Bob",
],
}
from {
paths: [
"/home/user/docs/report.pdf",
"/home/user/docs/summary.txt",
"/home/user/images/photo.jpg",
"/var/log/system.log"
]
}
path_info = paths.map(path => {
full_path: path,
directory: path.parent_dir(),
filename: path.file_name(),
parts: path.split("/").where(p => p.length_bytes() > 0)
})
{
paths: [...],
path_info: [
{
full_path: "/home/user/docs/report.pdf",
directory: "/home/user/docs",
filename: "report.pdf",
parts: ["home", "user", "docs", "report.pdf"],
},
{
full_path: "/home/user/docs/summary.txt",
directory: "/home/user/docs",
filename: "summary.txt",
parts: ["home", "user", "docs", "summary.txt"],
},
{
full_path: "/home/user/images/photo.jpg",
directory: "/home/user/images",
filename: "photo.jpg",
parts: ["home", "user", "images", "photo.jpg"],
},
{
full_path: "/var/log/system.log",
directory: "/var/log",
filename: "system.log",
parts: ["var", "log", "system.log"],
},
],
}

Combine data split across multiple records.

from {
user: {id: 1, name: "Alice"},
profile: {email: "alice@example.com", dept: "Engineering"},
metrics: {logins: 45, actions: 234}
}
merged = {
...user,
...profile,
...metrics
}
{
user: {
id: 1,
name: "Alice",
},
profile: {
email: "alice@example.com",
dept: "Engineering",
},
metrics: {
logins: 45,
actions: 234,
},
merged: {
id: 1,
name: "Alice",
email: "alice@example.com",
dept: "Engineering",
logins: 45,
actions: 234,
},
}

Work with data that has varying structures.

from {
events: [
{type: "user", data: {name: "Alice", email: "alice@example.com"}},
{type: "system", data: {cpu: 45, memory: 1024}},
{type: "error", message: "Connection failed", code: 500}
]
}
select normalized = events.map(e => {
event_type: e.type,
timestamp: now(),
// Use conditional assignment for type-specific fields
user_name: e.data.name if e.type == "user",
user_email: e.data.email if e.type == "user",
system_cpu: e.data.cpu if e.type == "system",
system_memory: e.data.memory if e.type == "system",
error_message: e.message if e.type == "error",
error_code: e.code if e.type == "error"
})
{
normalized: [
{
event_type: "user",
timestamp: 2025-07-21T18:54:09.307412Z,
user_name: "Alice",
user_email: "alice@example.com",
system_cpu: null,
system_memory: null,
error_message: null,
error_code: null,
},
{
event_type: "system",
timestamp: 2025-07-21T18:54:09.307412Z,
user_name: null,
user_email: null,
system_cpu: 45,
system_memory: 1024,
error_message: null,
error_code: null,
},
{
event_type: "error",
timestamp: 2025-07-21T18:54:09.307412Z,
user_name: null,
user_email: null,
system_cpu: null,
system_memory: null,
error_message: "Connection failed",
error_code: 500,
},
],
}

Flatten arbitrarily nested structures:

from {
data: {
level1: {
level2: {
level3: {
value: "deep",
items: [1, 2, 3]
}
},
other: "value"
}
}
}
// Use flatten function for automatic recursive flattening
select flattened = flatten(data)
{
flattened: {
"level1.level2.level3.value": "deep",
"level1.level2.level3.items": [
1,
2,
3,
],
"level1.other": "value",
},
}

Use field access to extract specific configurations:

from {
config: {
env_DATABASE_HOST: "db.example.com",
env_DATABASE_PORT: 5432,
env_API_KEY: "secret123",
app_name: "MyApp",
app_version: "1.0"
}
}
// Extract specific fields directly
select env_vars = {
DATABASE_HOST: config.env_DATABASE_HOST,
DATABASE_PORT: config.env_DATABASE_PORT,
API_KEY: config.env_API_KEY
},
app_config = {
name: config.app_name,
version: config.app_version
}
{
env_vars: {
DATABASE_HOST: "db.example.com",
DATABASE_PORT: 5432,
API_KEY: "secret123",
},
app_config: {
name: "MyApp",
version: "1.0",
},
}
from {
api_response: {
status: "success",
data: {
user: {
id: 123,
profile: {
personal: {name: "Alice", age: 30},
professional: {title: "Engineer", company: "TechCorp"}
}
},
metadata: {
request_id: "req-001",
timestamp: "2024-01-15T10:00:00"
}
}
}
}
// Extract and reshape for database storage
select user_record = {
user_id: api_response.data.user.id,
name: api_response.data.user.profile.personal.name,
age: api_response.data.user.profile.personal.age,
job_title: api_response.data.user.profile.professional.title,
company: api_response.data.user.profile.professional.company,
last_updated: api_response.data.metadata.timestamp.parse_time("%Y-%m-%dT%H:%M:%S")
}
{
user_record: {
user_id: 123,
name: "Alice",
age: 30,
job_title: "Engineer",
company: "TechCorp",
last_updated: 2024-01-15T10:00:00Z,
},
}
from {
log_stats: {
"2024-01-15": {
"/api/users": {GET: 150, POST: 20},
"/api/orders": {GET: 200, POST: 50, DELETE: 5}
},
"2024-01-16": {
"/api/users": {GET: 180, POST: 25},
"/api/orders": {GET: 220, POST: 60}
}
}
}
// Flatten nested structure into individual events
select flattened = flatten(log_stats)
{
flattened: {
"2024-01-15./api/users.GET": 150,
"2024-01-15./api/users.POST": 20,
"2024-01-15./api/orders.GET": 200,
"2024-01-15./api/orders.POST": 50,
"2024-01-15./api/orders.DELETE": 5,
"2024-01-16./api/users.GET": 180,
"2024-01-16./api/users.POST": 25,
"2024-01-16./api/orders.GET": 220,
"2024-01-16./api/orders.POST": 60,
},
}
  1. Plan your structure: Design the target schema before transforming
  2. Handle missing fields: Use conditional logic or defaults for optional data
  3. Preserve information: Don’t lose data during transformation unless intentional
  4. Test edge cases: Verify transformations work with incomplete or unusual data
  5. Document complex logic: Add comments explaining non-obvious transformations

Last updated: