Hi I am trying to slice the mseed files, which is almost 4 GB. But this error is coming back to back.
import obspy
from obspy import UTCDateTime
import os
# Path to your MiniSEED file
file_path = "/home/TSIMP_Data_2022/D110_JAN_MAR.mseed"
# Base output directory
base_output_dir = "/home/TSIMP_Data_2022/"
# Read the MiniSEED file
try:
mseed_file = obspy.read(file_path)
except Exception as e:
print(f"Error reading MiniSEED file: {e}")
raise
# Get the start and end times
start = min(tr.stats.starttime for tr in mseed_file)
end = max(tr.stats.endtime for tr in mseed_file)
# Create a function to format and write each daily segment
def write_daily_segment(start_time, end_time, traces, output_dir):
try:
# Slice and write each trace
for tr in traces:
trace_slice = tr.slice(starttime=start_time, endtime=end_time)
# Construct filename
network = trace_slice.stats.network
station = trace_slice.stats.station
location = trace_slice.stats.location or "--"
channel = trace_slice.stats.channel
year = start_time.year
julian_day = start_time.julday
filename = f"{network}.{station}.{location}.{channel}.{year}.{julian_day:03d}.mseed"
output_path = os.path.join(output_dir, filename)
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Write the slice to a file
trace_slice.write(output_path, format='MSEED')
print(f"Saved: {output_path}")
except Exception as e:
print(f"Error writing daily segment: {e}")
# Initialize the start of the first day
doy = start.julday
year = start.year
t1 = UTCDateTime(f'{year:04d}-{doy:03d}T00:00:00')
while t1 < end:
# Calculate the end of the current day
t2 = UTCDateTime(f'{year:04d}-{doy:03d}T23:59:59')
# Ensure t2 does not exceed the actual end time
if t2 > end:
t2 = end
# Define output directory
day_output_dir = os.path.join(base_output_dir, f"{year}_{doy:03d}")
# Write the daily segment
write_daily_segment(t1, t2, mseed_file, day_output_dir)
# Move to the next day
t1 = UTCDateTime(f'{year:04d}-{doy:03d}T00:00:00') + 86400 # 24 hours later
# Increment day and handle year transition
doy += 1
if doy > 365: # Handle leap years if necessary
doy = 1
year += 1
# For leap years, check if year is a leap year
if (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0):
if doy > 366:
doy = 1
year += 1
---------------------------------------------------------------------------
ObsPyMSEEDFilesizeTooLargeError Traceback (most recent call last)
Cell In[2], line 13
11 # Read the MiniSEED file
12 try:
---> 13 mseed_file = obspy.read(file_path)
14 except Exception as e:
15 print(f"Error reading MiniSEED file: {e}")
File ~/.conda/envs/wmsan/lib/python3.12/site-packages/decorator.py:232, in decorate.<locals>.fun(*args, **kw)
230 if not kwsyntax:
231 args, kw = fix(args, kw, sig)
--> 232 return caller(func, *(extras + args), **kw)
File ~/.conda/envs/wmsan/lib/python3.12/site-packages/obspy/core/util/decorator.py:297, in map_example_filename.<locals>._map_example_filename(func, *args, **kwargs)
295 except IOError:
296 pass
--> 297 return func(*args, **kwargs)
File ~/.conda/envs/wmsan/lib/python3.12/site-packages/obspy/core/stream.py:208, in read(pathname_or_url, format, headonly, starttime, endtime, nearest_sample, dtype, apply_calib, check_compression, **kwargs)
206 st = _create_example_stream(headonly=headonly)
207 else:
--> 208 st = _generic_reader(pathname_or_url, _read, **kwargs)
210 if len(st) == 0:
211 if isinstance(pathname_or_url, Path):
...
--> 280 raise ObsPyMSEEDFilesizeTooLargeError(msg)
282 info = util.get_record_information(mseed_object, endian=bo)
284 # Map the encoding to a readable string value.
ObsPyMSEEDFilesizeTooLargeError: ObsPy can currently not directly read mini-SEED files that are larger than 2^31 bytes (2048 MiB). To still read it, please read the file in chunks as documented here: https://github.com/obspy/obspy/pull/1419#issuecomment-221582369
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
I have tried to use the chunk thing as well mentioned in the error but then it is not slicing the files into exact 24 hr files. Can you help me out where the issue is coming?
import os
import io
import obspy
from obspy import UTCDateTime
# Path to your large MiniSEED file
file_path = "/home/TSIMP_Data_2022/D110_JAN_MAR.mseed"
# Base output directory
base_output_dir = "/home/TSIMP_Data_2022/D100/"
# Define the record length and chunk size (e.g., 50 MB)
RECORD_LENGTH = 512
CHUNK_SIZE_IN_MB = 50
CHUNK_SIZE = (CHUNK_SIZE_IN_MB * 1024 * 1024) // RECORD_LENGTH * RECORD_LENGTH # Ensure it's multiple of record length
# Keep track of processed time windows to avoid duplicates
processed_segments = set()
with io.open(file_path, "rb") as fh:
while True:
# Read a chunk from the file
data = fh.read(CHUNK_SIZE)
if not data:
break # End of file reached
# Use BytesIO to read from the chunk
with io.BytesIO(data) as buf:
try:
# Read the stream from buffer
st = obspy.read(buf)
except Exception as e:
print(f"Error reading chunk: {e}")
continue # Skip this chunk on error
# Process each trace in the stream
for trace in st:
# Extract trace metadata
network = trace.stats.network
station = trace.stats.station
location = trace.stats.location or "--"
channel = trace.stats.channel
start_time = trace.stats.starttime
end_time = trace.stats.endtime
# Define station-specific output directory
station_output_dir = os.path.join(base_output_dir, station)
os.makedirs(station_output_dir, exist_ok=True)
# Calculate the start of the first 24-hour period
day_start = UTCDateTime(year=start_time.year, julday=start_time.julday)
# Iterate over 24-hour periods within the trace time span
current_start = day_start
while current_start < end_time:
current_end = current_start + 86400 # 24 hours later
# Determine the overlap between trace and current 24-hour period
slice_start = max(current_start, start_time)
slice_end = min(current_end, end_time)
# Skip if no overlap
if slice_start >= slice_end:
current_start += 86400
continue
# Create a unique identifier for this segment to avoid duplicates
segment_id = (
network, station, location, channel,
slice_start.strftime("%Y.%j"), slice_end.strftime("%Y.%j")
)
if segment_id in processed_segments:
current_start += 86400
continue # Skip already processed segments
processed_segments.add(segment_id)
# Slice the trace for current 24-hour period
trace_slice = trace.slice(starttime=slice_start, endtime=slice_end)
# Construct filename
year = slice_start.year
julian_day = slice_start.julday
filename = f"{network}.{station}.{location}.{channel}.{year}.{julian_day:03d}.mseed"
output_path = os.path.join(station_output_dir, filename)
# Write the sliced trace to file
try:
trace_slice.write(output_path, format='MSEED')
print(f"Saved: {output_path}")
except Exception as e:
print(f"Error saving {output_path}: {e}")
# Move to the next 24-hour period
current_start += 86400