{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [], "source": [ "!test -f aircraft_small.bufr || wget https://get.ecmwf.int/repository/test-data/pdbufr/test-data/aircraft_small.bufr\n", "!test -f temp.bufr || wget https://get.ecmwf.int/repository/test-data/pdbufr/test-data/temp.bufr" ] }, { "cell_type": "markdown", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "# Flat: overview" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [], "source": [ "import pdbufr" ] }, { "cell_type": "raw", "metadata": { "editable": true, "raw_mimetype": "text/restructuredtext", "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "The :ref:`flat reader ` is activated with the ``reader=\"flat\"`` option in :ref:`read_bufr() `. With this messages/subsets are extracted as a whole preserving the column order (see exceptions below). \n", "\n", "Since the results contain a large number of columns with very long names the **transpose** of the DataFrames are shown in all the examples below to make better use of the available space." ] }, { "cell_type": "markdown", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "### Options\n", "\n", "By default all the header and data keys are extracted:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789
edition3333333333
masterTableNumber0000000000
bufrHeaderSubCentre0000000000
bufrHeaderCentre98989898989898989898
updateSequenceNumber0000000000
.................................
#1#dewpointTemperatureNoneNoneNoneNoneNoneNoneNoneNoneNoneNone
#1#relativeHumidityNoneNoneNoneNoneNoneNoneNoneNoneNoneNone
#1#airframeIcingNoneNoneNoneNoneNoneNoneNoneNoneNoneNone
#1#centre98989898989898989898
#1#generatingApplication1111111111
\n", "

81 rows × 10 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 \\\n", "edition 3 3 3 3 3 3 3 3 \n", "masterTableNumber 0 0 0 0 0 0 0 0 \n", "bufrHeaderSubCentre 0 0 0 0 0 0 0 0 \n", "bufrHeaderCentre 98 98 98 98 98 98 98 98 \n", "updateSequenceNumber 0 0 0 0 0 0 0 0 \n", "... ... ... ... ... ... ... ... ... \n", "#1#dewpointTemperature None None None None None None None None \n", "#1#relativeHumidity None None None None None None None None \n", "#1#airframeIcing None None None None None None None None \n", "#1#centre 98 98 98 98 98 98 98 98 \n", "#1#generatingApplication 1 1 1 1 1 1 1 1 \n", "\n", " 8 9 \n", "edition 3 3 \n", "masterTableNumber 0 0 \n", "bufrHeaderSubCentre 0 0 \n", "bufrHeaderCentre 98 98 \n", "updateSequenceNumber 0 0 \n", "... ... ... \n", "#1#dewpointTemperature None None \n", "#1#relativeHumidity None None \n", "#1#airframeIcing None None \n", "#1#centre 98 98 \n", "#1#generatingApplication 1 1 \n", "\n", "[81 rows x 10 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pdbufr.read_bufr(\"aircraft_small.bufr\", reader=\"flat\")\n", "df.T" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "However, we can extract only the **header keys**:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789
edition3333333333
masterTableNumber0000000000
bufrHeaderSubCentre0000000000
bufrHeaderCentre98989898989898989898
updateSequenceNumber0000000000
dataCategory4444444444
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9\n", "edition 3 3 3 3 3 3 3 3 3 3\n", "masterTableNumber 0 0 0 0 0 0 0 0 0 0\n", "bufrHeaderSubCentre 0 0 0 0 0 0 0 0 0 0\n", "bufrHeaderCentre 98 98 98 98 98 98 98 98 98 98\n", "updateSequenceNumber 0 0 0 0 0 0 0 0 0 0\n", "dataCategory 4 4 4 4 4 4 4 4 4 4" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pdbufr.read_bufr(\"aircraft_small.bufr\", columns=\"header\", reader=\"flat\")\n", "df.T[:6]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "or only the **data keys**:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789
subsetNumber1111111111
#1#aircraftFlightNumberQGOBTRRAQGOBTRRAUOZDOZ2SUOZDOZ2SUOZDOZ2SUOZDOZ2SVUVTEWZQ4IPASOZAWSSASKBAWSSASKBA
#1#aircraftRegistrationNumberOrOtherIdentificationHGSKJFBAHGSKJFBAO2RYR4JAO2RYR4JAO2RYR4JAO2RYR4JA4NK13QZA0IKWU1JAP4MAWDZAP4MAWDZA
#1#aircraftNavigationalSystemNoneNoneNoneNoneNoneNoneNoneNoneNoneNone
#1#aircraftDataRelaySystemType3333333333
#1#instrumentationForWindMeasurement4444444444
#1#temperatureObservationPrecision0.10.10.250.250.250.250.250.250.250.25
#1#originalSpecificationOfLatitudeOrLongitude11101010101011010
#1#aircraftRollAngleNoneNoneNoneNoneNoneNoneNoneNoneNoneNone
#1#stationType0000000000
#1#year2009200920092009200920092009200920092009
#1#month1111111111
#1#day23232323232323232323
#1#hour13131212131313131313
#1#minute015658022200
#1#latitude35.135.0741.4541.5241.5941.6738.2619.6242.3142.29
#1#longitude-89.97-89.97-75.43-75.63-75.87-76.16-78.5773.75-70.7-70.67
#1#phaseOfAircraftFlight6.06.0NaNNaNNaNNaN3.0NaN5.05.0
\n", "
" ], "text/plain": [ " 0 1 \\\n", "subsetNumber 1 1 \n", "#1#aircraftFlightNumber QGOBTRRA QGOBTRRA \n", "#1#aircraftRegistrationNumberOrOtherIdentification HGSKJFBA HGSKJFBA \n", "#1#aircraftNavigationalSystem None None \n", "#1#aircraftDataRelaySystemType 3 3 \n", "#1#instrumentationForWindMeasurement 4 4 \n", "#1#temperatureObservationPrecision 0.1 0.1 \n", "#1#originalSpecificationOfLatitudeOrLongitude 1 1 \n", "#1#aircraftRollAngle None None \n", "#1#stationType 0 0 \n", "#1#year 2009 2009 \n", "#1#month 1 1 \n", "#1#day 23 23 \n", "#1#hour 13 13 \n", "#1#minute 0 1 \n", "#1#latitude 35.1 35.07 \n", "#1#longitude -89.97 -89.97 \n", "#1#phaseOfAircraftFlight 6.0 6.0 \n", "\n", " 2 3 \\\n", "subsetNumber 1 1 \n", "#1#aircraftFlightNumber UOZDOZ2S UOZDOZ2S \n", "#1#aircraftRegistrationNumberOrOtherIdentification O2RYR4JA O2RYR4JA \n", "#1#aircraftNavigationalSystem None None \n", "#1#aircraftDataRelaySystemType 3 3 \n", "#1#instrumentationForWindMeasurement 4 4 \n", "#1#temperatureObservationPrecision 0.25 0.25 \n", "#1#originalSpecificationOfLatitudeOrLongitude 10 10 \n", "#1#aircraftRollAngle None None \n", "#1#stationType 0 0 \n", "#1#year 2009 2009 \n", "#1#month 1 1 \n", "#1#day 23 23 \n", "#1#hour 12 12 \n", "#1#minute 56 58 \n", "#1#latitude 41.45 41.52 \n", "#1#longitude -75.43 -75.63 \n", "#1#phaseOfAircraftFlight NaN NaN \n", "\n", " 4 5 \\\n", "subsetNumber 1 1 \n", "#1#aircraftFlightNumber UOZDOZ2S UOZDOZ2S \n", "#1#aircraftRegistrationNumberOrOtherIdentification O2RYR4JA O2RYR4JA \n", "#1#aircraftNavigationalSystem None None \n", "#1#aircraftDataRelaySystemType 3 3 \n", "#1#instrumentationForWindMeasurement 4 4 \n", "#1#temperatureObservationPrecision 0.25 0.25 \n", "#1#originalSpecificationOfLatitudeOrLongitude 10 10 \n", "#1#aircraftRollAngle None None \n", "#1#stationType 0 0 \n", "#1#year 2009 2009 \n", "#1#month 1 1 \n", "#1#day 23 23 \n", "#1#hour 13 13 \n", "#1#minute 0 2 \n", "#1#latitude 41.59 41.67 \n", "#1#longitude -75.87 -76.16 \n", "#1#phaseOfAircraftFlight NaN NaN \n", "\n", " 6 7 \\\n", "subsetNumber 1 1 \n", "#1#aircraftFlightNumber VUVTEWZQ 4IPASOZA \n", "#1#aircraftRegistrationNumberOrOtherIdentification 4NK13QZA 0IKWU1JA \n", "#1#aircraftNavigationalSystem None None \n", "#1#aircraftDataRelaySystemType 3 3 \n", "#1#instrumentationForWindMeasurement 4 4 \n", "#1#temperatureObservationPrecision 0.25 0.25 \n", "#1#originalSpecificationOfLatitudeOrLongitude 10 1 \n", "#1#aircraftRollAngle None None \n", "#1#stationType 0 0 \n", "#1#year 2009 2009 \n", "#1#month 1 1 \n", "#1#day 23 23 \n", "#1#hour 13 13 \n", "#1#minute 2 2 \n", "#1#latitude 38.26 19.62 \n", "#1#longitude -78.57 73.75 \n", "#1#phaseOfAircraftFlight 3.0 NaN \n", "\n", " 8 9 \n", "subsetNumber 1 1 \n", "#1#aircraftFlightNumber WSSASKBA WSSASKBA \n", "#1#aircraftRegistrationNumberOrOtherIdentification P4MAWDZA P4MAWDZA \n", "#1#aircraftNavigationalSystem None None \n", "#1#aircraftDataRelaySystemType 3 3 \n", "#1#instrumentationForWindMeasurement 4 4 \n", "#1#temperatureObservationPrecision 0.25 0.25 \n", "#1#originalSpecificationOfLatitudeOrLongitude 10 10 \n", "#1#aircraftRollAngle None None \n", "#1#stationType 0 0 \n", "#1#year 2009 2009 \n", "#1#month 1 1 \n", "#1#day 23 23 \n", "#1#hour 13 13 \n", "#1#minute 0 0 \n", "#1#latitude 42.31 42.29 \n", "#1#longitude -70.7 -70.67 \n", "#1#phaseOfAircraftFlight 5.0 5.0 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pdbufr.read_bufr(\"aircraft_small.bufr\", columns=\"data\", reader=\"flat\")\n", "df.T[:18]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Filtering works similarly to the hierarchical (i.e. non-flat) mode:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
subsetNumber1111
#1#aircraftFlightNumberUOZDOZ2SUOZDOZ2SUOZDOZ2SUOZDOZ2S
#1#aircraftRegistrationNumberOrOtherIdentificationO2RYR4JAO2RYR4JAO2RYR4JAO2RYR4JA
#1#aircraftNavigationalSystemNoneNoneNoneNone
#1#aircraftDataRelaySystemType3333
#1#instrumentationForWindMeasurement4444
#1#temperatureObservationPrecision0.250.250.250.25
#1#originalSpecificationOfLatitudeOrLongitude10101010
#1#aircraftRollAngleNoneNoneNoneNone
#1#stationType0000
#1#year2009200920092009
#1#month1111
#1#day23232323
#1#hour12121313
#1#minute565802
#1#latitude41.4541.5241.5941.67
#1#longitude-75.43-75.63-75.87-76.16
#1#phaseOfAircraftFlightNoneNoneNoneNone
\n", "
" ], "text/plain": [ " 0 1 \\\n", "subsetNumber 1 1 \n", "#1#aircraftFlightNumber UOZDOZ2S UOZDOZ2S \n", "#1#aircraftRegistrationNumberOrOtherIdentification O2RYR4JA O2RYR4JA \n", "#1#aircraftNavigationalSystem None None \n", "#1#aircraftDataRelaySystemType 3 3 \n", "#1#instrumentationForWindMeasurement 4 4 \n", "#1#temperatureObservationPrecision 0.25 0.25 \n", "#1#originalSpecificationOfLatitudeOrLongitude 10 10 \n", "#1#aircraftRollAngle None None \n", "#1#stationType 0 0 \n", "#1#year 2009 2009 \n", "#1#month 1 1 \n", "#1#day 23 23 \n", "#1#hour 12 12 \n", "#1#minute 56 58 \n", "#1#latitude 41.45 41.52 \n", "#1#longitude -75.43 -75.63 \n", "#1#phaseOfAircraftFlight None None \n", "\n", " 2 3 \n", "subsetNumber 1 1 \n", "#1#aircraftFlightNumber UOZDOZ2S UOZDOZ2S \n", "#1#aircraftRegistrationNumberOrOtherIdentification O2RYR4JA O2RYR4JA \n", "#1#aircraftNavigationalSystem None None \n", "#1#aircraftDataRelaySystemType 3 3 \n", "#1#instrumentationForWindMeasurement 4 4 \n", "#1#temperatureObservationPrecision 0.25 0.25 \n", "#1#originalSpecificationOfLatitudeOrLongitude 10 10 \n", "#1#aircraftRollAngle None None \n", "#1#stationType 0 0 \n", "#1#year 2009 2009 \n", "#1#month 1 1 \n", "#1#day 23 23 \n", "#1#hour 13 13 \n", "#1#minute 0 2 \n", "#1#latitude 41.59 41.67 \n", "#1#longitude -75.87 -76.16 \n", "#1#phaseOfAircraftFlight None None " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pdbufr.read_bufr(\"aircraft_small.bufr\", \n", " columns=\"data\", \n", " filters={\"aircraftFlightNumber\": \"UOZDOZ2S\"}, \n", " reader=\"flat\")\n", "df.T.iloc[:18]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Column alignment\n", "\n", "The aircraft messages we have examined so far had identical structure; each message contained the very same keys in the very same order. The result was always a nicely aligned DataFrame.\n", "\n", "However, in a BUFR file each message can have a different structure and the alignment is not guaranteed at all. We will demonstrate it with a BUFR file containing radiosonde data.\n", "\n", "First, we extract the first message only. From the output we can see it contains 24 pressure level blocks." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
#23#pressure26300.0
#23#verticalSoundingSignificance4
#23#nonCoordinateGeopotential89290.0
#23#airTemperature218.5
#23#dewpointTemperature198.5
#23#windDirectionNone
#23#windSpeedNone
#24#pressure25800.0
#24#verticalSoundingSignificance4
#24#nonCoordinateGeopotential90490.0
#24#airTemperature218.5
#24#dewpointTemperature196.5
#24#windDirectionNone
#24#windSpeedNone
#1#centre98
#1#generatingApplication1
\n", "
" ], "text/plain": [ " 0\n", "#23#pressure 26300.0\n", "#23#verticalSoundingSignificance 4\n", "#23#nonCoordinateGeopotential 89290.0\n", "#23#airTemperature 218.5\n", "#23#dewpointTemperature 198.5\n", "#23#windDirection None\n", "#23#windSpeed None\n", "#24#pressure 25800.0\n", "#24#verticalSoundingSignificance 4\n", "#24#nonCoordinateGeopotential 90490.0\n", "#24#airTemperature 218.5\n", "#24#dewpointTemperature 196.5\n", "#24#windDirection None\n", "#24#windSpeed None\n", "#1#centre 98\n", "#1#generatingApplication 1" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pdbufr.read_bufr(\"temp.bufr\", columns=\"data\", filters={\"count\": 1}, reader=\"flat\")\n", "df.T.iloc[-16:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next, we extract the second message. This message contains one more block (25 in total):" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
#24#pressure23200.0
#24#verticalSoundingSignificance4
#24#nonCoordinateGeopotential98410.0
#24#airTemperature223.1
#24#dewpointTemperature192.1
#24#windDirectionNone
#24#windSpeedNone
#25#pressure20500.0
#25#verticalSoundingSignificance4
#25#nonCoordinateGeopotential106300.0
#25#airTemperature221.5
#25#dewpointTemperature191.5
#25#windDirectionNone
#25#windSpeedNone
#1#centre98
#1#generatingApplication1
\n", "
" ], "text/plain": [ " 0\n", "#24#pressure 23200.0\n", "#24#verticalSoundingSignificance 4\n", "#24#nonCoordinateGeopotential 98410.0\n", "#24#airTemperature 223.1\n", "#24#dewpointTemperature 192.1\n", "#24#windDirection None\n", "#24#windSpeed None\n", "#25#pressure 20500.0\n", "#25#verticalSoundingSignificance 4\n", "#25#nonCoordinateGeopotential 106300.0\n", "#25#airTemperature 221.5\n", "#25#dewpointTemperature 191.5\n", "#25#windDirection None\n", "#25#windSpeed None\n", "#1#centre 98\n", "#1#generatingApplication 1" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pdbufr.read_bufr(\"temp.bufr\", columns=\"data\", filters={\"count\": 2}, reader=\"flat\")\n", "df.T.iloc[-16:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now, if we extract these messages together the columns will not be aligned:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Warning: not all BUFR messages/subsets have the same structure in the input file. Non-overlapping columns (starting with column[{column_info.first_count-1}] =#1#generatingApplication) were added to end of the resulting dataframealtering the original column order for these messages.\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
#24#pressure25800.023200.0
#24#verticalSoundingSignificance44
#24#nonCoordinateGeopotential90490.098410.0
#24#airTemperature218.5223.1
#24#dewpointTemperature196.5192.1
#24#windDirectionNoneNone
#24#windSpeedNoneNone
#1#centre9898
#1#generatingApplication11
#25#pressureNaN20500.0
#25#verticalSoundingSignificanceNaN4.0
#25#nonCoordinateGeopotentialNaN106300.0
#25#airTemperatureNaN221.5
#25#dewpointTemperatureNaN191.5
#25#windDirectionNaNNaN
#25#windSpeedNaNNaN
\n", "
" ], "text/plain": [ " 0 1\n", "#24#pressure 25800.0 23200.0\n", "#24#verticalSoundingSignificance 4 4\n", "#24#nonCoordinateGeopotential 90490.0 98410.0\n", "#24#airTemperature 218.5 223.1\n", "#24#dewpointTemperature 196.5 192.1\n", "#24#windDirection None None\n", "#24#windSpeed None None\n", "#1#centre 98 98\n", "#1#generatingApplication 1 1\n", "#25#pressure NaN 20500.0\n", "#25#verticalSoundingSignificance NaN 4.0\n", "#25#nonCoordinateGeopotential NaN 106300.0\n", "#25#airTemperature NaN 221.5\n", "#25#dewpointTemperature NaN 191.5\n", "#25#windDirection NaN NaN\n", "#25#windSpeed NaN NaN" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pdbufr.read_bufr(\"temp.bufr\", columns=\"data\", filters={\"count\": [1,2]}, reader=\"flat\")\n", "df.T.iloc[-16:]" ] }, { "cell_type": "raw", "metadata": { "editable": true, "raw_mimetype": "text/restructuredtext", "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ " So what happened here? The resulting DataFrame was built message by message and columns not yet present were automatically appended to the end by Pandas. We can see that this happened to block #25 from the second message. It changed the original column order because \"#1#centre\" and \"#1#generatingApplication\" now come before and not after block #25. While this is probably a harmless change in this case we can imagine it can pose a significant challenge for more complex message types. \n", " \n", " As a safety measure, when messages are not fully aligned :ref:`read_bufr() ` prints a warning message to the stderr.\n", "\n", " To disable the warning message use the **warnings** module as shown below:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
#24#pressure25800.023200.0
#24#verticalSoundingSignificance44
#24#nonCoordinateGeopotential90490.098410.0
#24#airTemperature218.5223.1
#24#dewpointTemperature196.5192.1
#24#windDirectionNoneNone
#24#windSpeedNoneNone
#1#centre9898
#1#generatingApplication11
#25#pressureNaN20500.0
#25#verticalSoundingSignificanceNaN4.0
#25#nonCoordinateGeopotentialNaN106300.0
#25#airTemperatureNaN221.5
#25#dewpointTemperatureNaN191.5
#25#windDirectionNaNNaN
#25#windSpeedNaNNaN
\n", "
" ], "text/plain": [ " 0 1\n", "#24#pressure 25800.0 23200.0\n", "#24#verticalSoundingSignificance 4 4\n", "#24#nonCoordinateGeopotential 90490.0 98410.0\n", "#24#airTemperature 218.5 223.1\n", "#24#dewpointTemperature 196.5 192.1\n", "#24#windDirection None None\n", "#24#windSpeed None None\n", "#1#centre 98 98\n", "#1#generatingApplication 1 1\n", "#25#pressure NaN 20500.0\n", "#25#verticalSoundingSignificance NaN 4.0\n", "#25#nonCoordinateGeopotential NaN 106300.0\n", "#25#airTemperature NaN 221.5\n", "#25#dewpointTemperature NaN 191.5\n", "#25#windDirection NaN NaN\n", "#25#windSpeed NaN NaN" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import warnings\n", "warnings.filterwarnings(\"ignore\", module=\"pdbufr\")\n", "\n", "df = pdbufr.read_bufr(\"temp.bufr\", columns=\"data\", filters={\"count\": [1,2]}, reader=\"flat\")\n", "df.T.iloc[-16:]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.12" }, "vscode": { "interpreter": { "hash": "22dc05efe0944894879e71a134ce5db002aedecbcd8b98acee6e3c2217e44519" } } }, "nbformat": 4, "nbformat_minor": 4 }