HEPData check BaBar 2012 $\sigma(e^+e^- \to \pi^+ \pi^- (\gamma))$¶

go to web page HEPData submission
select "Download All" in top-center-left of page
- select "YAML with resource files"
mkdir -p ./hepdata-babar-2012-pip-pim/submission
unzip the downloaded zip file into ./hepdata-babar-2012-pip-pim/submission

In [1]:

import re
import os
from pathlib import Path
from array import array
from pprint import pprint
import urllib.request
from requests.utils import requote_uri
from math import *
import json
import yaml
import numpy as np
import pandas as pd

In [2]:

##
## globals
##

myfolder = Path("hepdata-babar-2012-pip-pim/submission")
if not os.path.exists(myfolder):
  os.makedirs(myfolder)

In [3]:

##
## procedures
##

In [4]:

def json_load_url(url):
  return json.load(urllib.request.urlopen(url))

def yaml_load_url(url):
  return yaml.safe_load(urllib.request.urlopen(url))

In [5]:

##
## main code
##

In [6]:

##--- list submission folder content
pprint(os.listdir(myfolder))

['submission.yaml',
 'bare_cross-section.yaml',
 'bare_cross-section_systematic_uncertainties.yaml',
 'bare_cross-section_statistical_covariance.yaml']

In [7]:

##
## read bare cross-section value and total uncertainty
##
with open(os.path.join(myfolder, "bare_cross-section.yaml"), 'r') as stream:
  hd_115140_table1 = yaml.safe_load(stream)

##
## create data frame from dict with column names and values
##
hd_115140_table1_df = pd.DataFrame(
  {
    'E_lo':      [float(var["low"]) for var in hd_115140_table1["independent_variables"][0]["values"]],
    'E_hi':      [float(var["high"]) for var in hd_115140_table1["independent_variables"][0]["values"]],
    "sigma val": [float(var["value"]) for var in hd_115140_table1["dependent_variables"][0]["values"]],
    "sigma unc": [float(var["errors"][0]["symerror"]) for var in hd_115140_table1["dependent_variables"][0]["values"]],
  }
)

hd_115140_table1_df

Out[7]:

	E_lo	E_hi	sigma val	sigma unc
0	0.30	0.31	25.490000	2.699400
1	0.31	0.32	35.480000	2.914600
2	0.32	0.33	45.486000	3.046700
3	0.33	0.34	51.782000	3.133600
4	0.34	0.35	64.416000	3.499500
...	...	...	...	...
332	2.50	2.60	0.047650	0.018634
333	2.60	2.70	0.024211	0.013667
334	2.70	2.80	0.013945	0.014118
335	2.80	2.90	0.009181	0.013260
336	2.90	3.00	0.010228	0.012373

337 rows × 4 columns

In [9]:

##
## read systematics table
##
with open(os.path.join(myfolder, "bare_cross-section_systematic_uncertainties.yaml"), 'r') as stream:
  hd_115140_table2 = yaml.safe_load(stream)

##
## convert YAML data to data frame of systematic unc per energy bin
##
hd_115140_table2_df = pd.DataFrame(
  {
    ##--- create single dict concatenating array of dicts
    key: val for idict in
    [
      ##--- independent variables are energy bin lo-hi
      {'E_lo': [float(var["low"]) for var in hd_115140_table2["independent_variables"][0]["values"]]},
      {'E_hi': [float(var["high"]) for var in hd_115140_table2["independent_variables"][0]["values"]]}
    ]
    +
    [
      ##--- array of dicts, each has one syst contribution values for all energy bins
      {depvar["header"]["name"]: [float(delem["value"]) for delem in depvar["values"]]}
      for depvar in hd_115140_table2["dependent_variables"]
    ]
    for key, val in idict.items()
  }
)

hd_115140_table2_df

Out[9]:

	E_lo	E_hi	total	trigger / filter	tracking	pi-ID	background	acceptance	kinematic fit chi2 cut	correlated mu mu ID loss	non cancellation of HO ISR in pi pi gamma/mu mu gamma ratio	unfolding	ISR luminosity from mu mu gamma process
0	0.3	0.4	1.38	0.53	0.38	1.01	0.35	0.16	0.09	0.30	0.27	0.10	0.34
1	0.4	0.5	0.81	0.27	0.21	0.25	0.43	0.16	0.09	0.20	0.14	0.27	0.34
2	0.5	0.6	1.02	0.19	0.21	0.62	0.52	0.10	0.03	0.30	0.16	0.27	0.34
3	0.6	0.9	0.50	0.10	0.11	0.24	0.10	0.10	0.03	0.13	0.11	0.10	0.34
4	0.9	1.2	0.65	0.05	0.17	0.42	0.30	0.16	0.09	0.20	0.13	0.13	0.34
5	1.2	1.4	1.39	0.04	0.31	1.01	0.70	0.16	0.09	0.30	0.27	0.10	0.34
6	1.4	2.0	1.98	0.03	0.31	1.01	1.20	0.16	0.09	1.00	0.51	0.10	0.34
7	2.0	3.0	5.24	0.03	0.31	1.01	5.00	0.16	0.09	1.00	0.51	0.10	0.34

In [10]:

##
## read statistical covariance
##
with open(os.path.join(myfolder, "bare_cross-section_statistical_covariance.yaml"), 'r') as stream:
  hd_115140_table3 = yaml.load(stream, Loader=yaml.CBaseLoader)

##
## create data frame using dict of named columns
##
hd_115140_table3_df = pd.DataFrame(
  {
    'E_lo_i': [float(var["low"]) for var in hd_115140_table3["independent_variables"][0]["values"]],
    'E_hi_i': [float(var["high"]) for var in hd_115140_table3["independent_variables"][0]["values"]],
    'E_lo_j': [float(var["low"]) for var in hd_115140_table3["independent_variables"][1]["values"]],
    'E_hi_j': [float(var["high"]) for var in hd_115140_table3["independent_variables"][1]["values"]],
    'cov':    [float(var["value"]) for var in hd_115140_table3["dependent_variables"][0]["values"]]
  }
)

hd_115140_table3_df

Out[10]:

	E_lo_i	E_hi_i	E_lo_j	E_hi_j	cov
0	0.30	0.31	0.3	0.31	7.164100e+00
1	0.31	0.32	0.3	0.31	8.112100e-01
2	0.32	0.33	0.3	0.31	2.924900e-02
3	0.33	0.34	0.3	0.31	2.160700e-01
4	0.34	0.35	0.3	0.31	5.264100e-02
...	...	...	...	...	...
113564	2.50	2.60	2.9	3.00	1.793600e-13
113565	2.60	2.70	2.9	3.00	1.110300e-13
113566	2.70	2.80	2.9	3.00	1.148900e-13
113567	2.80	2.90	2.9	3.00	4.000000e-06
113568	2.90	3.00	2.9	3.00	1.530000e-04

113569 rows × 5 columns

In [ ]: