Submit HEPData BaBar 2012 $\sigma(e^+e^- \rightarrow \pi^+\pi^- (\gamma))$

HEPData submit BaBar 2012 $\sigma(e^+e^- \to \pi^+ \pi^- (\gamma))$

Paper

HEPData documentation for submissions

Requirements

  • hepdata_lib python3 library
    • ROOT with Python3 libraries
    • ImageMagick
    • Make sure that you have ROOT in your $PYTHONPATH and that the convert command is available by adding its location to your $PATH if needed.

Notes

In the supplementary material, numbers are printed into strings. We do not convert these strings to numeric format when reading the supplementary material, since the hepdata_lib code works with strings as well.

In [1]:
import re
import io
import os
from pathlib import Path
import tempfile
from array import array
import math
import numpy as np
import pandas as pd
import urllib.request
from requests.utils import requote_uri
import json
import yaml
from contextlib import contextmanager
In [2]:
## import hepdata_lib
from hepdata_lib import Submission
from hepdata_lib import Table
from hepdata_lib import Variable, Uncertainty
## from hepdata_lib import RootFileReader
## from hepdata_lib import root_utils
Welcome to JupyROOT 6.24/06
In [3]:
##
## globals
##

myfolder = Path("hepdata-babar-2012-pip-pim")
if not os.path.exists(myfolder):
  os.makedirs(myfolder)

suppmat_fname = "BABAR_ISR2pi_EPAPS.txt"
In [4]:
##
## procedures
##

@contextmanager
def cd(newdir):
  prevdir = os.getcwd()
  os.chdir(os.path.expanduser(newdir))
  try:
    yield
  finally:
    os.chdir(prevdir)
In [5]:
##
## abstract
##
paper_abstract = r"""A precise measurement of the cross section of the process $e^+e^- \to
\pi^+\pi^-(\gamma)$ from threshold to an energy of $3$ GeV is obtained with the
initial-state radiation (ISR) method using $232$ fb$^{-1}$ of data collected
with the BaBar detector at $e^+e^-$ center-of-mass energies near $10.6$ GeV.
The ISR luminosity is determined from a study of the leptonic process
$e^+e^-\to\mu^+\mu^-(\gamma)\gamma_{\rm ISR}$, which is found to agree with the
next-to-leading-order QED prediction to within 1.1\%. The cross section for the
process $e^+e^-\to\pi^+\pi^-(\gamma)$ is obtained with a systematic uncertainty
of 0.5\% in the dominant $\rho$ resonance region. The leading-order hadronic
contribution to the muon magnetic anomaly calculated using the measured
$\pi\pi$ cross section from threshold to $1.8$ GeV is $(514.1 \pm 2.2({\rm
stat}) \pm 3.1({\rm syst}))\times 10^{-10}$."""
paper_abstract = ' '.join(paper_abstract.splitlines())
In [6]:
##
## download BaBar Phys. Rev. D 86 (2012) 032013 suppl. mat.
##
## was available without authentication until end 2021
## but recently became unavailable
##
# babar_data_url = """\
# http://ftp.aip.org/\
# epaps/phys_rev_lett/E-PRLTAO-103-045950/BABAR_ISR2pi_EPAPS.txt\
# """
# tmpfile, headers = urllib.request.urlretrieve(babar_data_url)
# tmpfile
In [7]:
##
## download with authentication from PRD supplementary material archive
## in https://journals.aps.org/prd/supplemental/10.1103/PhysRevD.86.032013
## and place the file in the location in "tmpfile"
##
babar_data_url = "https://journals.aps.org/prd/supplemental/10.1103/PhysRevD.86.032013"
tmpfile = myfolder / suppmat_fname
In [8]:
##
## read cross-section values and uncertainties by energy bins
##
sigma_df = pd.read_csv(
  tmpfile,
  header=None,
  names=["E_lo", "colon", "E_hi", "sigma_val", "sigma_unc"],
  skiprows=29, nrows=337, sep="\s+"
)
sigma_df.drop(columns="colon", inplace=True)
sigma_df
Out[8]:
E_lo E_hi sigma_val sigma_unc
0 0.30 0.31 25.490436 2.699430
1 0.31 0.32 35.480116 2.914640
2 0.32 0.33 45.485797 3.046690
3 0.33 0.34 51.782467 3.133550
4 0.34 0.35 64.415646 3.499530
... ... ... ... ...
332 2.50 2.60 0.047650 0.018634
333 2.60 2.70 0.024211 0.013667
334 2.70 2.80 0.013945 0.014118
335 2.80 2.90 0.009181 0.013260
336 2.90 3.00 0.010228 0.012373

337 rows × 4 columns

In [9]:
##
## read systematics
##

##
## BaBar data format
##
#              Energy Intervals (GeV)
#          
#    0.3-0.4  0.4-0.5  0.5-0.6  0.6-0.9  0.9-1.2  1.2-1.4  1.4-2.0  2.0-3.0
#
# 1    5.3      2.7      1.9      1.0      0.5      0.4      0.3      0.3 
# 2    3.8      2.1      2.1      1.1      1.7      3.1      3.1      3.1 
# 3   10.1      2.5      6.2      2.4      4.2     10.1     10.1     10.1
# 4    3.5      4.3      5.2      1.0      3.0      7.0     12.0     50.0
# 5    1.6      1.6      1.0      1.0      1.6      1.6      1.6      1.6
# 6    0.9      0.9      0.3      0.3      0.9      0.9      0.9      0.9
# 7    3.0      2.0      3.0      1.3      2.0      3.0     10.0     10.0
# 8    2.7      1.4      1.6      1.1      1.3      2.7      5.1      5.1
# 9    1.0      2.7      2.7      1.0      1.3      1.0      1.0      1.0
# 10   3.4      3.4      3.4      3.4      3.4      3.4      3.4      3.4
#
#    (13.8)    (8.1)   (10.2)    (5.0)    (6.5)   (13.9)   (19.8)   (52.4) 
#

##
## read edges of E for systematics, in a single line
## format is space-separated sequence of "<El>-<Eh>"
## 
tmp_df = pd.read_csv(
  tmpfile,
  header=None,
  skiprows=387, nrows=1, sep="\s+"
)

##
## - get first row of tmp_df
## - join first row values in a series of lines separated with newline
## - interpret each line as two numbers separated by "-"
## - output is data frame with columns El, Eh
##
sigma_syst_df = pd.read_csv(
  io.StringIO("\n".join(tmp_df.iloc[0])),
  header=None,
  names=["E_lo", "E_hi"],
  sep="-"
)

##
## read total systematics for E bins in permille
## sequence of numbers in round brakets
##
tmp_df = pd.read_csv(
  tmpfile,
  header=None,
  skiprows=400, nrows=1, sep="\s+"
)
##--- read column with total syst unc
tmp_df = pd.read_csv(
  io.StringIO(re.sub("[\(\)]", "", "\n".join(tmp_df.iloc[0]))),
  header=None
)

##
## - convert total unc in 1st column from permille to percent
## - add to data frame as 3rd column after El, Eh
##
sigma_syst_df = sigma_syst_df.assign(unc_syst_perc_tot = tmp_df[0]/10)

##
## read systematics contributions table in permille
## each row is a systematics contribution
## each column corresponds to an energy bin
##
tmp_df = pd.read_csv(
  tmpfile,
  header=None,
  skiprows=389, nrows=10, sep="\s+",
)

##
## - drop first column with number of systematic contribution
## - transpose to get energy bins in rows, contribution in columns
## 
tmp_df = tmp_df.drop(0, axis=1).transpose()
##--- name columns of the 10 syst contributoins
tmp_df.columns = ["unc_%02d" % i for i in range(1, 10+1)]
##
## concat columnwise 
## - data frame with energy bins and total syst unc
## - data frame with syst unc contributions, converted from permille to percent
##
sigma_syst_df = pd.concat(
  [
    sigma_syst_df.reset_index(drop=True),
    tmp_df.apply(lambda x: x/10, axis=1).reset_index(drop=True)
  ],
  axis=1
)

sigma_syst_df
Out[9]:
E_lo E_hi unc_syst_perc_tot unc_01 unc_02 unc_03 unc_04 unc_05 unc_06 unc_07 unc_08 unc_09 unc_10
0 0.3 0.4 1.38 0.53 0.38 1.01 0.35 0.16 0.09 0.30 0.27 0.10 0.34
1 0.4 0.5 0.81 0.27 0.21 0.25 0.43 0.16 0.09 0.20 0.14 0.27 0.34
2 0.5 0.6 1.02 0.19 0.21 0.62 0.52 0.10 0.03 0.30 0.16 0.27 0.34
3 0.6 0.9 0.50 0.10 0.11 0.24 0.10 0.10 0.03 0.13 0.11 0.10 0.34
4 0.9 1.2 0.65 0.05 0.17 0.42 0.30 0.16 0.09 0.20 0.13 0.13 0.34
5 1.2 1.4 1.39 0.04 0.31 1.01 0.70 0.16 0.09 0.30 0.27 0.10 0.34
6 1.4 2.0 1.98 0.03 0.31 1.01 1.20 0.16 0.09 1.00 0.51 0.10 0.34
7 2.0 3.0 5.24 0.03 0.31 1.01 5.00 0.16 0.09 1.00 0.51 0.10 0.34
In [10]:
##
## get stat unc correlation
## it is a sequence of 337*337 values
##

sigma_stat_cov_df = pd.read_csv(
  tmpfile,
  header=None,
  skiprows=405, nrows=337*337, sep="\s+",
  names=["cov"]
)

##
## insert data frame columns with lo-hi row energy bin, lo-hi column energy bin
## (very verbose inefficient format but prepared for HEPData format)
##
sigma_stat_cov_df.insert(0, "E_lo_i", np.tile(sigma_df.E_lo.values, len(sigma_df.E_lo)))
sigma_stat_cov_df.insert(1, "E_hi_i", np.tile(sigma_df.E_hi.values, len(sigma_df.E_hi)))
sigma_stat_cov_df.insert(2, "E_lo_j", np.repeat(sigma_df.E_lo.values, len(sigma_df.E_lo)))
sigma_stat_cov_df.insert(3, "E_hi_j", np.repeat(sigma_df.E_hi.values, len(sigma_df.E_hi)))

sigma_stat_cov_df
Out[10]:
E_lo_i E_hi_i E_lo_j E_hi_j cov
0 0.30 0.31 0.3 0.31 7.164117e+00
1 0.31 0.32 0.3 0.31 8.112126e-01
2 0.32 0.33 0.3 0.31 2.924933e-02
3 0.33 0.34 0.3 0.31 2.160734e-01
4 0.34 0.35 0.3 0.31 5.264122e-02
... ... ... ... ... ...
113564 2.50 2.60 2.9 3.00 1.793598e-13
113565 2.60 2.70 2.9 3.00 1.110301e-13
113566 2.70 2.80 2.9 3.00 1.148924e-13
113567 2.80 2.90 2.9 3.00 4.000000e-06
113568 2.90 3.00 2.9 3.00 1.530000e-04

113569 rows × 5 columns

In [11]:
##
## HEPData information that is common across tables
##

keyw_reaction1 = "E+ E- --> PI+ PI-"

##
## configure properties common to all tables
##
def table_common_config(table):
  table.location = "Data from " + babar_data_url
  table.keywords["observables"] = ["SIG"]
  table.keywords["cmenergies"] = ["0.3-3.0"]
  table.keywords["reactions"] = [keyw_reaction1]
  table.keywords["phrases"] = [
    "Exclusive",
    "E+E- Scattering",
    "Integrated Cross Section",
    "Cross Section",
  ]

##
## configure properties common to all dep vars
##
def var_dep_common_config(var):
  var.add_qualifier("RE", keyw_reaction1)
  var.add_qualifier("SQRT(S)", "0.3-3.0", "GeV")
In [12]:
##
## bare cross section value and total uncertainty
##

table_val = Table("Bare cross-section")
table_val.description = r"Bare cross-section $e^+e^-\rightarrow\pi^+\pi^-(\gamma)$"
table_common_config(table_val)

##--- independent variable: energy bins
val_x = Variable("SQRT(S)", is_independent=True, is_binned=True, units="GeV")
val_x.values = np.column_stack((sigma_df["E_lo"], sigma_df["E_hi"]))

##--- dependent variable: bare cross-section vs. energy bin
val_y = Variable("$\sigma_{\pi^+\pi^-(\gamma)}$",  is_independent=False, is_binned=False, units="nb")
val_y.values = sigma_df["sigma_val"]
var_dep_common_config(val_y)

##--- uncertainty of above variable
val_y_unc = Uncertainty("total", is_symmetric=True)
val_y_unc.values = sigma_df["sigma_unc"]
val_y.add_uncertainty(val_y_unc)

##--- assemble HEPDATA table
table_val.add_variable(val_x)
table_val.add_variable(val_y)
## table_val.add_image("image.eps")
In [13]:
##
## systematic uncertainty
##

table_syst_unc = Table("Bare cross-section systematic uncertainties")
table_syst_unc.description = """\
Bare cross-section $e^+e^-\\rightarrow\\pi^+\\pi^-(\\gamma)$ \
systematic uncertainties
all systematics contributions are each 100% correlated in all energy bins\
"""
table_common_config(table_syst_unc)

##--- independent variable: energy bins
syst_unc_x = Variable("SQRT(S) [GeV]", is_independent=True, is_binned=True)
syst_unc_x.values = np.column_stack((sigma_syst_df["E_lo"], sigma_syst_df["E_hi"]))

##
## systematics contributions
## (and first colums is total unc)
##
syst_contr_descr = [
  "trigger / filter",
  "tracking",
  "pi-ID",
  "background",
  "acceptance",
  "kinematic fit chi2 cut",
  "correlated mu mu ID loss",
  "non cancellation of HO ISR in pi pi gamma/mu mu gamma ratio",
  "unfolding",
  "ISR luminosity from mu mu gamma process",
]

##
## create a HEPData var for syst unc total and contributions
## they are in columns indices 3-1 .. ncol-1
##
vars = []
for descr, icol in zip(
  ["total"] + syst_contr_descr,
  range(3-1, sigma_syst_df.shape[1])
):
  syst_unc_y = Variable(
    descr,
    is_independent=False, is_binned=False, units="%"
  )
  syst_unc_y.values = sigma_syst_df.iloc[:, icol]
  var_dep_common_config(syst_unc_y)
  vars.append(syst_unc_y)

##--- add all defined vars to submission
for var in [syst_unc_x] + vars:
  table_syst_unc.add_variable(var)
In [14]:
##
## statistical covariance
##

table_stat_cov = Table("Bare cross-section statistical covariance")
table_stat_cov.description = """\
Bare cross-section $e^+e^-\\rightarrow\\pi^+\\pi^-(\\gamma)$ \
statistical covariance\
"""
table_common_config(table_stat_cov)

##--- independent variables: row and column energy bins
stat_cov_x = Variable("SQRT(S) [GeV]", is_independent=True, is_binned=True)
stat_cov_x.values = np.column_stack((sigma_stat_cov_df["E_lo_i"], sigma_stat_cov_df["E_hi_i"]))
stat_cov_y = Variable("SQRT(S) [GeV]", is_independent=True, is_binned=True)
stat_cov_y.values = np.column_stack((sigma_stat_cov_df["E_lo_j"], sigma_stat_cov_df["E_hi_j"]))

##--- correlation of cross section for energy bin i and energy bin j
stat_cov_z = Variable(
  "$\sigma_{\pi^+\pi^-(\gamma)}$ statistical covariance",
  is_independent=False, is_binned=False, units="nb^2"
)
stat_cov_z.values = sigma_stat_cov_df["cov"]
var_dep_common_config(stat_cov_z)

for var in [stat_cov_x, stat_cov_y, stat_cov_z]:
  table_stat_cov.add_variable(var)
In [15]:
##
## assembly and submission
##

##--- all output will go here
with cd(myfolder):
  print("creating submission in " + str(myfolder / "submission.tar.gz"))
  hd_subm = Submission()

  ##--- general info
  hd_subm.add_record_id(1114155, "inspire")
  hd_subm.add_link("arXiv", "https://arxiv.org/abs/1205.2228")
  hd_subm.add_link("Webpage with data files", babar_data_url)

  ##--- use temp file for abstract
  tmp = tempfile.NamedTemporaryFile(mode='w+t', delete=False)
  try:
    tmp.writelines(paper_abstract)
  finally:
    tmp.close()
  hd_subm.read_abstract(tmp.name)

  ##--- add tables
  hd_subm.add_table(table_val)
  hd_subm.add_table(table_syst_unc)
  hd_subm.add_table(table_stat_cov)

  ##--- create submission
  outdir = "submission"
  hd_subm.create_files(outdir, remove_old=True)
  os.unlink(tmp.name)

print("submission created")
creating submission in hepdata-babar-2012-pip-pim/submission.tar.gz
submission created
In [ ]: