Skip to content

VOC Analysis

Volatile Organic Compounds (VOC) analysis workflow, including OFP and SOAP calculation.

Data Preparation

from datetime import datetime
from pathlib import Path
from AeroViz import RawDataReader
from AeroViz.dataProcess import DataProcess

# Read VOC data
voc = RawDataReader(
    instrument='VOC',
    path=Path('./data/voc'),
    start=datetime(2024, 1, 1),
    end=datetime(2024, 3, 31),
    mean_freq='1h'
)

# View available species
print(voc.columns.tolist())
# ['Ethane', 'Propane', 'Butane', 'Benzene', 'Toluene', ...]

OFP/SOAP Calculation

Basic Calculation

dp = DataProcess('VOC', Path('./output'))

# Calculate ozone formation potential
result = dp.potential(voc)

# OFP for each species
df_ofp = result['OFP']
print(df_ofp.columns.tolist())
# ['Ethane_OFP', 'Propane_OFP', 'Benzene_OFP', 'Toluene_OFP', ...]

# SOAP for each species
df_soap = result['SOAP']

# Total OFP/SOAP
df_total = result['total']
print(f"Mean total OFP: {df_total['OFP'].mean():.1f} ug O3/m3")
print(f"Mean total SOAP: {df_total['SOAP'].mean():.2f}")

Species Contribution Analysis

Top 10 Contributing Species

# Calculate mean OFP contribution for each species
mean_ofp = df_ofp.mean().sort_values(ascending=False)

# Top 10
top10 = mean_ofp.head(10)
print("Top 10 OFP contributors:")
for species, value in top10.items():
    print(f"  {species}: {value:.1f} ug O3/m3")

Species Contribution Ratio

import matplotlib.pyplot as plt

# Calculate contribution ratio
total_ofp = df_ofp.sum(axis=1)
contribution = df_ofp.div(total_ofp, axis=0) * 100

# Mean contribution ratio
mean_contrib = contribution.mean().sort_values(ascending=False)

# Plot pie chart (Top 8 + Others)
top8 = mean_contrib.head(8)
others = mean_contrib[8:].sum()
plot_data = pd.concat([top8, pd.Series({'Others': others})])

fig, ax = plt.subplots(figsize=(10, 8))
ax.pie(plot_data, labels=plot_data.index, autopct='%1.1f%%')
ax.set_title('OFP Contribution by Species')
plt.show()

Analysis by Chemical Category

# Define chemical categories
categories = {
    'Alkanes': ['Ethane', 'Propane', 'Butane', 'Pentane', 'Hexane'],
    'Alkenes': ['Ethene', 'Propene', 'Butene', 'Isoprene'],
    'Aromatics': ['Benzene', 'Toluene', 'Ethylbenzene', 'Xylene'],
    'OVOCs': ['Formaldehyde', 'Acetaldehyde', 'Acetone']
}

# Calculate OFP for each category
category_ofp = {}
for cat, species in categories.items():
    # Find matching columns
    cols = [c for c in df_ofp.columns if any(s in c for s in species)]
    if cols:
        category_ofp[cat] = df_ofp[cols].sum(axis=1)

df_cat_ofp = pd.DataFrame(category_ofp)

# Plot stacked bar chart
fig, ax = plt.subplots(figsize=(12, 6))
df_cat_ofp.resample('D').mean().plot(kind='bar', stacked=True, ax=ax)
ax.set_ylabel('OFP (ug O3/m3)')
ax.set_title('Daily OFP by Chemical Category')
plt.tight_layout()
plt.show()

Temporal Variation Analysis

Diurnal Variation

# Calculate diurnal variation of total OFP
hourly = df_total['OFP'].groupby(df_total.index.hour).agg(['mean', 'std'])

fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(hourly.index, hourly['mean'], 'b-o')
ax.fill_between(hourly.index,
                hourly['mean'] - hourly['std'],
                hourly['mean'] + hourly['std'],
                alpha=0.3)
ax.set_xlabel('Hour')
ax.set_ylabel('OFP (ug O3/m3)')
ax.set_title('Diurnal Variation of Total OFP')
ax.set_xticks(range(0, 24, 3))
plt.show()

Seasonal Variation

# Monthly average
monthly = df_total['OFP'].resample('M').mean()

fig, ax = plt.subplots(figsize=(10, 5))
monthly.plot(kind='bar', ax=ax)
ax.set_ylabel('OFP (ug O3/m3)')
ax.set_title('Monthly Average OFP')
plt.show()

OFP vs SOAP Relationship

fig, ax = plt.subplots(figsize=(8, 8))
ax.scatter(df_total['OFP'], df_total['SOAP'], alpha=0.5)
ax.set_xlabel('Total OFP (ug O3/m3)')
ax.set_ylabel('Total SOAP')
ax.set_title('OFP vs SOAP Relationship')

# Correlation
from scipy import stats
r, p = stats.pearsonr(df_total['OFP'].dropna(), df_total['SOAP'].dropna())
ax.text(0.05, 0.95, f'r = {r:.3f}\np < 0.001' if p < 0.001 else f'r = {r:.3f}\np = {p:.3f}',
        transform=ax.transAxes, va='top')
plt.show()

Complete Analysis Script

from datetime import datetime
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
from AeroViz import RawDataReader
from AeroViz.dataProcess import DataProcess

# 1. Read VOC data
voc = RawDataReader('VOC', Path('./data/voc'),
                    datetime(2024, 1, 1), datetime(2024, 3, 31))

# 2. Calculate OFP/SOAP
dp = DataProcess('VOC', Path('./output'))
result = dp.potential(voc)

df_ofp = result['OFP']
df_soap = result['SOAP']
df_total = result['total']

# 3. Species contribution analysis
mean_ofp = df_ofp.mean().sort_values(ascending=False)
print("=== Top 5 OFP Contributors ===")
for i, (species, value) in enumerate(mean_ofp.head(5).items(), 1):
    pct = value / df_total['OFP'].mean() * 100
    print(f"{i}. {species}: {value:.1f} ug O3/m3 ({pct:.1f}%)")

# 4. Output results summary
print("\n=== VOC Analysis Summary ===")
print(f"Total OFP: {df_total['OFP'].mean():.1f} +/- {df_total['OFP'].std():.1f} ug O3/m3")
print(f"Total SOAP: {df_total['SOAP'].mean():.2f} +/- {df_total['SOAP'].std():.2f}")

# 5. Plot contribution chart
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# OFP contribution
top10_ofp = mean_ofp.head(10)
axes[0].barh(range(10), top10_ofp.values)
axes[0].set_yticks(range(10))
axes[0].set_yticklabels([s.replace('_OFP', '') for s in top10_ofp.index])
axes[0].set_xlabel('OFP (ug O3/m3)')
axes[0].set_title('Top 10 OFP Contributors')

# Diurnal variation
hourly = df_total['OFP'].groupby(df_total.index.hour).mean()
axes[1].plot(hourly.index, hourly.values, 'b-o')
axes[1].set_xlabel('Hour')
axes[1].set_ylabel('OFP (ug O3/m3)')
axes[1].set_title('Diurnal Pattern')

plt.tight_layout()
plt.savefig('voc_analysis.png', dpi=300)